@@ -3815,91 +3815,25 @@ define dso_local i64 @idx_scalar_dec(ptr %a, ptr %b, i64 %ii, i64 %n) {
3815
3815
; NO-EVL-NEXT: [[CMP_NOT9:%.*]] = icmp eq i64 [[N:%.*]], 0
3816
3816
; NO-EVL-NEXT: br i1 [[CMP_NOT9]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
3817
3817
; NO-EVL: for.body.preheader:
3818
- ; NO-EVL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
3819
- ; NO-EVL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3820
- ; NO-EVL: vector.ph:
3821
- ; NO-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
3822
- ; NO-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
3823
- ; NO-EVL-NEXT: [[IND_END:%.*]] = sub i64 [[N]], [[N_VEC]]
3824
- ; NO-EVL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0
3825
- ; NO-EVL-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
3826
- ; NO-EVL-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], <i64 0, i64 -1, i64 -2, i64 -3>
3827
- ; NO-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
3828
- ; NO-EVL: vector.body:
3829
- ; NO-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3830
- ; NO-EVL-NEXT: [[CSA_MASK_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[CSA_MASK_SEL8:%.*]], [[VECTOR_BODY]] ]
3831
- ; NO-EVL-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
3832
- ; NO-EVL-NEXT: [[CSA_DATA_PHI:%.*]] = phi <4 x i64> [ poison, [[VECTOR_PH]] ], [ [[CSA_DATA_SEL9:%.*]], [[VECTOR_BODY]] ]
3833
- ; NO-EVL-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], <i64 -4, i64 -4, i64 -4, i64 -4>
3834
- ; NO-EVL-NEXT: [[TMP0:%.*]] = add <4 x i64> [[VEC_IND]], <i64 -1, i64 -1, i64 -1, i64 -1>
3835
- ; NO-EVL-NEXT: [[TMP1:%.*]] = add <4 x i64> [[STEP_ADD]], <i64 -1, i64 -1, i64 -1, i64 -1>
3836
- ; NO-EVL-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0
3837
- ; NO-EVL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
3838
- ; NO-EVL-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP1]], i32 0
3839
- ; NO-EVL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
3840
- ; NO-EVL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
3841
- ; NO-EVL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 -3
3842
- ; NO-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 -4
3843
- ; NO-EVL-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 -3
3844
- ; NO-EVL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8
3845
- ; NO-EVL-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
3846
- ; NO-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8
3847
- ; NO-EVL-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD2]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
3848
- ; NO-EVL-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]]
3849
- ; NO-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
3850
- ; NO-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0
3851
- ; NO-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 -3
3852
- ; NO-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 -4
3853
- ; NO-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i32 -3
3854
- ; NO-EVL-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP13]], align 8
3855
- ; NO-EVL-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD4]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
3856
- ; NO-EVL-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP15]], align 8
3857
- ; NO-EVL-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD6]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
3858
- ; NO-EVL-NEXT: [[TMP16:%.*]] = icmp sgt <4 x i64> [[REVERSE]], [[REVERSE5]]
3859
- ; NO-EVL-NEXT: [[TMP17:%.*]] = icmp sgt <4 x i64> [[REVERSE3]], [[REVERSE7]]
3860
- ; NO-EVL-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP16]])
3861
- ; NO-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP17]])
3862
- ; NO-EVL-NEXT: [[CSA_MASK_SEL:%.*]] = select i1 [[TMP18]], <4 x i1> [[TMP16]], <4 x i1> [[CSA_MASK_PHI]]
3863
- ; NO-EVL-NEXT: [[CSA_MASK_SEL8]] = select i1 [[TMP19]], <4 x i1> [[TMP17]], <4 x i1> [[CSA_MASK_SEL]]
3864
- ; NO-EVL-NEXT: [[CSA_DATA_SEL:%.*]] = select i1 [[TMP18]], <4 x i64> [[VEC_IND]], <4 x i64> [[CSA_DATA_PHI]]
3865
- ; NO-EVL-NEXT: [[CSA_DATA_SEL9]] = select i1 [[TMP19]], <4 x i64> [[STEP_ADD]], <4 x i64> [[CSA_DATA_SEL]]
3866
- ; NO-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
3867
- ; NO-EVL-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], <i64 -4, i64 -4, i64 -4, i64 -4>
3868
- ; NO-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3869
- ; NO-EVL-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
3870
- ; NO-EVL: middle.block:
3871
- ; NO-EVL-NEXT: [[TMP21:%.*]] = select <4 x i1> [[CSA_MASK_SEL8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> zeroinitializer
3872
- ; NO-EVL-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP21]])
3873
- ; NO-EVL-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[CSA_MASK_SEL8]], i64 0
3874
- ; NO-EVL-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP22]], 0
3875
- ; NO-EVL-NEXT: [[TMP25:%.*]] = and i1 [[TMP23]], [[TMP24]]
3876
- ; NO-EVL-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 0, i32 -1
3877
- ; NO-EVL-NEXT: [[CSA_EXTRACT:%.*]] = extractelement <4 x i64> [[CSA_DATA_SEL9]], i32 [[TMP26]]
3878
- ; NO-EVL-NEXT: [[TMP27:%.*]] = icmp sge i32 [[TMP26]], 0
3879
- ; NO-EVL-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i64 [[CSA_EXTRACT]], i64 [[II:%.*]]
3880
- ; NO-EVL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
3881
- ; NO-EVL-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
3882
- ; NO-EVL: scalar.ph:
3883
- ; NO-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[N]], [[FOR_BODY_PREHEADER]] ]
3884
3818
; NO-EVL-NEXT: br label [[FOR_BODY:%.*]]
3885
3819
; NO-EVL: for.cond.cleanup.loopexit:
3886
- ; NO-EVL-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND:%.*]], [[FOR_BODY]] ], [ [[TMP28]], [[MIDDLE_BLOCK]] ]
3820
+ ; NO-EVL-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND:%.*]], [[FOR_BODY]] ]
3887
3821
; NO-EVL-NEXT: br label [[FOR_COND_CLEANUP]]
3888
3822
; NO-EVL: for.cond.cleanup:
3889
- ; NO-EVL-NEXT: [[IDX_0_LCSSA:%.*]] = phi i64 [ [[II]], [[ENTRY:%.*]] ], [ [[COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
3823
+ ; NO-EVL-NEXT: [[IDX_0_LCSSA:%.*]] = phi i64 [ [[II:%.* ]], [[ENTRY:%.*]] ], [ [[COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT:%.* ]] ]
3890
3824
; NO-EVL-NEXT: ret i64 [[IDX_0_LCSSA]]
3891
3825
; NO-EVL: for.body:
3892
- ; NO-EVL-NEXT: [[I_011:%.*]] = phi i64 [ [[SUB:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL ]], [[SCALAR_PH ]] ]
3893
- ; NO-EVL-NEXT: [[IDX_010:%.*]] = phi i64 [ [[COND]], [[FOR_BODY]] ], [ [[II]], [[SCALAR_PH ]] ]
3826
+ ; NO-EVL-NEXT: [[I_011:%.*]] = phi i64 [ [[SUB:%.*]], [[FOR_BODY]] ], [ [[N ]], [[FOR_BODY_PREHEADER ]] ]
3827
+ ; NO-EVL-NEXT: [[IDX_010:%.*]] = phi i64 [ [[COND]], [[FOR_BODY]] ], [ [[II]], [[FOR_BODY_PREHEADER ]] ]
3894
3828
; NO-EVL-NEXT: [[SUB]] = add i64 [[I_011]], -1
3895
- ; NO-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[SUB]]
3896
- ; NO-EVL-NEXT: [[TMP29 :%.*]] = load i64, ptr [[ARRAYIDX]], align 8
3897
- ; NO-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[SUB]]
3898
- ; NO-EVL-NEXT: [[TMP30 :%.*]] = load i64, ptr [[ARRAYIDX2]], align 8
3899
- ; NO-EVL-NEXT: [[CMP3:%.*]] = icmp sgt i64 [[TMP29 ]], [[TMP30 ]]
3829
+ ; NO-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.* ]], i64 [[SUB]]
3830
+ ; NO-EVL-NEXT: [[TMP0 :%.*]] = load i64, ptr [[ARRAYIDX]], align 8
3831
+ ; NO-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[B:%.* ]], i64 [[SUB]]
3832
+ ; NO-EVL-NEXT: [[TMP1 :%.*]] = load i64, ptr [[ARRAYIDX2]], align 8
3833
+ ; NO-EVL-NEXT: [[CMP3:%.*]] = icmp sgt i64 [[TMP0 ]], [[TMP1 ]]
3900
3834
; NO-EVL-NEXT: [[COND]] = select i1 [[CMP3]], i64 [[I_011]], i64 [[IDX_010]]
3901
3835
; NO-EVL-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[SUB]], 0
3902
- ; NO-EVL-NEXT: br i1 [[CMP_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
3836
+ ; NO-EVL-NEXT: br i1 [[CMP_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
3903
3837
;
3904
3838
; DATA-LABEL: @idx_scalar_dec(
3905
3839
; DATA-NEXT: entry:
@@ -4037,7 +3971,7 @@ define i32 @simple_csa_int_select_neg_cond(i32 %N, ptr %data) {
4037
3971
; NO-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
4038
3972
; NO-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
4039
3973
; NO-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4040
- ; NO-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18 :![0-9]+]]
3974
+ ; NO-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16 :![0-9]+]]
4041
3975
; NO-EVL: middle.block:
4042
3976
; NO-EVL-NEXT: [[CSA_STEP:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
4043
3977
; NO-EVL-NEXT: [[TMP20:%.*]] = select <vscale x 4 x i1> [[CSA_MASK_SEL]], <vscale x 4 x i32> [[CSA_STEP]], <vscale x 4 x i32> zeroinitializer
@@ -4070,7 +4004,7 @@ define i32 @simple_csa_int_select_neg_cond(i32 %N, ptr %data) {
4070
4004
; NO-EVL-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1_NOT]], i32 [[T_010]], i32 [[TMP28]]
4071
4005
; NO-EVL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
4072
4006
; NO-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
4073
- ; NO-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19 :![0-9]+]]
4007
+ ; NO-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17 :![0-9]+]]
4074
4008
;
4075
4009
; DATA-LABEL: @simple_csa_int_select_neg_cond(
4076
4010
; DATA-NEXT: entry:
@@ -4207,7 +4141,7 @@ define ptr @simple_csa_ptr_select(i32 %N, ptr %data) {
4207
4141
; NO-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
4208
4142
; NO-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
4209
4143
; NO-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4210
- ; NO-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20 :![0-9]+]]
4144
+ ; NO-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18 :![0-9]+]]
4211
4145
; NO-EVL: middle.block:
4212
4146
; NO-EVL-NEXT: [[CSA_STEP:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
4213
4147
; NO-EVL-NEXT: [[TMP19:%.*]] = select <vscale x 2 x i1> [[CSA_MASK_SEL]], <vscale x 2 x i32> [[CSA_STEP]], <vscale x 2 x i32> zeroinitializer
@@ -4241,7 +4175,7 @@ define ptr @simple_csa_ptr_select(i32 %N, ptr %data) {
4241
4175
; NO-EVL-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], ptr [[TMP27]], ptr [[T_010]]
4242
4176
; NO-EVL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
4243
4177
; NO-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
4244
- ; NO-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP21 :![0-9]+]]
4178
+ ; NO-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19 :![0-9]+]]
4245
4179
;
4246
4180
; DATA-LABEL: @simple_csa_ptr_select(
4247
4181
; DATA-NEXT: entry:
0 commit comments