opt -mtriple=riscv64 -mcpu=sifive-p870 -passes=slp-vectorizer -S -slp-revec -slp-threshold=-100
define void @widened_strided_load(ptr %in0, ptr %out0) {
entry:
%in1 = getelementptr i8, ptr %in0, i64 16
%l0 = load <8 x i8>, ptr %in0, align 2
%l1 = load <8 x i8>, ptr %in1, align 2
%out1 = getelementptr i8, ptr %out0, i64 8
store <8 x i8> %l0, ptr %out0, align 2
store <8 x i8> %l1, ptr %out1, align 2
ret void
}
incorrectly lowers to a basic strided load
define void @widened_strided_load(ptr %in0, ptr %out0) #0 {
entry:
%0 = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.p0.i64(ptr align 2 %in0, i64 16, <16 x i1> splat (i1 true), i32 16)
store <16 x i8> %0, ptr %out0, align 2
ret void
}
should instead lower to a widened strided load
define void @widened_strided_load(ptr %in0, ptr %out0) #0 {
entry:
%0 = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i64(ptr align 2 %in0, i64 16, <2 x i1> splat (i1 true), i32 2)
%1 = bitcast <2 x i64> %0 to <16 x i8>
store <16 x i8> %1, ptr %out0, align 2
ret void
}
opt -mtriple=riscv64 -mcpu=sifive-p870 -passes=slp-vectorizer -S -slp-revec -slp-threshold=-100incorrectly lowers to a basic strided load
should instead lower to a widened strided load