You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[InstCombine] Fold shuffled intrinsic operands with constant operands (#141300)
We currently pull shuffles through binops and intrinsics, which is an
important canonical form for VectorCombine to be able to scalarize
vector sequences. But while binops can be folded with a constant
operand, intrinsics currently require all operands to be shufflevectors.
This extends intrinsic folding to be in line with regular binops by
reusing the constant "unshuffling" logic.
As far as I can tell the list of currently folded intrinsics don't
require any special UB handling.
This change in combination with #138095 and #137823 fixes the following
C:
```c
void max(int *x, int *y, int n) {
for (int i = 0; i < n; i++)
x[i] += *y > 42 ? *y : 42;
}
```
Not using the splatted vector form on RISC-V with `-O3 -march=rva23u64`:
```asm
vmv.s.x v8, a4
li a4, 42
vmax.vx v10, v8, a4
vrgather.vi v8, v10, 0
.LBB0_9: # %vector.body
# =>This Inner Loop Header: Depth=1
vl2re32.v v10, (a5)
vadd.vv v10, v10, v8
vs2r.v v10, (a5)
```
i.e., it now generates
```asm
li a6, 42
max a6, a4, a6
.LBB0_9: # %vector.body
# =>This Inner Loop Header: Depth=1
vl2re32.v v8, (a5)
vadd.vx v8, v8, a6
vs2r.v v8, (a5)
```
; CHECK-NEXT: [[R:%.*]] = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> [[A:%.*]], <vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> [[B:%.*]])
831
+
; CHECK-NEXT: [[R1:%.*]] = shufflevector <vscale x 2 x float> [[R]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
832
+
; CHECK-NEXT: ret <vscale x 2 x float> [[R1]]
833
+
;
834
+
%a = shufflevector <vscale x 2 x float> %x, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
835
+
%b = shufflevector <vscale x 2 x float> %y, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
836
+
%r = call <vscale x 2 x float> @llvm.fma(<vscale x 2 x float> %a, <vscale x 2 x float> splat (float42.0), <vscale x 2 x float> %b)
837
+
ret <vscale x 2 x float> %r
838
+
}
839
+
840
+
define <vscale x 2 x float> @fma_unary_shuffle_ops_2_const_scalable(<vscale x 2 x float> %x) {
; CHECK-NEXT: [[X:%.*]] = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> [[X1:%.*]])
843
+
; CHECK-NEXT: [[A:%.*]] = shufflevector <vscale x 2 x float> [[X]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
844
+
; CHECK-NEXT: ret <vscale x 2 x float> [[A]]
845
+
;
846
+
%a = shufflevector <vscale x 2 x float> %x, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
847
+
%r = call <vscale x 2 x float> @llvm.fma(<vscale x 2 x float> splat (float42.0), <vscale x 2 x float> splat (float42.0), <vscale x 2 x float> %a)
848
+
ret <vscale x 2 x float> %r
849
+
}
850
+
851
+
define <3 x float> @fma_unary_shuffle_ops_widening_1_const(<2 x float> %x, <2 x float> %y) {
; CHECK-NEXT: [[Y:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> [[X:%.*]], <vscale x 2 x i32> [[Y1:%.*]])
959
+
; CHECK-NEXT: [[B:%.*]] = shufflevector <vscale x 2 x i32> [[Y]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
960
+
; CHECK-NEXT: ret <vscale x 2 x i32> [[B]]
961
+
;
962
+
%a = shufflevector <vscale x 2 x i32> %x, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
963
+
%b = shufflevector <vscale x 2 x i32> %y, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
964
+
%r = call <vscale x 2 x i32> @llvm.fshr(<vscale x 2 x i32> splat (i3242), <vscale x 2 x i32> %a, <vscale x 2 x i32> %b)
965
+
ret <vscale x 2 x i32> %r
966
+
}
967
+
968
+
define <vscale x 2 x i32> @fsh_unary_shuffle_ops_2_const_scalable(<vscale x 2 x i32> %x) {
; CHECK-NEXT: [[X:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> [[X1:%.*]])
971
+
; CHECK-NEXT: [[A:%.*]] = shufflevector <vscale x 2 x i32> [[X]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
972
+
; CHECK-NEXT: ret <vscale x 2 x i32> [[A]]
973
+
;
974
+
%a = shufflevector <vscale x 2 x i32> %x, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
975
+
%r = call <vscale x 2 x i32> @llvm.fshr(<vscale x 2 x i32> splat (i3242), <vscale x 2 x i32> splat (i3242), <vscale x 2 x i32> %a)
976
+
ret <vscale x 2 x i32> %r
977
+
}
978
+
979
+
define <3 x i32> @fsh_unary_shuffle_ops_widening_1_const(<2 x i32> %x, <2 x i32> %y) {
0 commit comments