@@ -2971,7 +2971,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
2971
2971
fixupIVUsers (Entry.first , Entry.second ,
2972
2972
getOrCreateVectorTripCount (nullptr ),
2973
2973
IVEndValues[Entry.first ], LoopMiddleBlock, State);
2974
- IVEndValues[Entry.first ], LoopMiddleBlock, Plan, State);
2975
2974
}
2976
2975
2977
2976
for (Instruction *PI : PredicatedInstructions)
@@ -8705,13 +8704,18 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
8705
8704
// directly, enabling more efficient codegen.
8706
8705
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe (Phi, *StartV);
8707
8706
} else if (Legal->isCSAPhi (Phi)) {
8708
- VPCSAState *State = Plan.getCSAStates ().find (Phi)->second ;
8709
- VPValue *InitData = State->getVPInitData ();
8707
+ VPValue *InitScalar = Plan.getOrAddLiveIn (
8708
+ Phi->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
8709
+
8710
+ // Don't build full CSA for VF=ElementCount::getFixed(1)
8711
+ bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange (
8712
+ [&](ElementCount VF) { return VF.isScalar (); }, Range);
8713
+
8710
8714
// When the VF=getFixed(1), InitData is just InitScalar.
8711
- if (!InitData)
8712
- InitData = State->getVPInitScalar ();
8715
+ VPValue *InitData =
8716
+ IsScalarVF ? InitScalar
8717
+ : getVPValueOrAddLiveIn (PoisonValue::get (Phi->getType ()));
8713
8718
PhiRecipe = new VPCSAHeaderPHIRecipe (Phi, InitData);
8714
- State->setPhiRecipe (cast<VPCSAHeaderPHIRecipe>(PhiRecipe));
8715
8719
} else {
8716
8720
llvm_unreachable (
8717
8721
" can only widen reductions, fixed-order recurrences, and CSAs here" );
@@ -8752,13 +8756,17 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
8752
8756
return CSADescriptor::isCSASelect (CSA.second , SI);
8753
8757
});
8754
8758
if (CSADescIt != Legal->getCSAs ().end ()) {
8755
- PHINode *CSAPhi = CSADescIt->first ;
8756
- VPCSAState *State = Plan.getCSAStates ().find (CSAPhi)->second ;
8757
- VPValue *VPDataPhi = State->getPhiRecipe ();
8758
- auto *R = new VPCSADataUpdateRecipe (
8759
- SI, {VPDataPhi, Operands[0 ], Operands[1 ], Operands[2 ]});
8760
- State->setDataUpdate (R);
8761
- return R;
8759
+ for (VPRecipeBase &R :
8760
+ Plan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
8761
+ if (auto PhiR = dyn_cast<VPCSAHeaderPHIRecipe>(&R)) {
8762
+ if (PhiR->getUnderlyingInstr () == CSADescIt->first ) {
8763
+ auto *R = new VPCSADataUpdateRecipe (
8764
+ SI, {PhiR, Operands[0 ], Operands[1 ], Operands[2 ]});
8765
+ PhiR->setDataUpdate (R);
8766
+ return R;
8767
+ }
8768
+ }
8769
+ }
8762
8770
}
8763
8771
8764
8772
return new VPWidenSelectRecipe (
@@ -8773,44 +8781,6 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
8773
8781
return tryToWiden (Instr, Operands, VPBB);
8774
8782
}
8775
8783
8776
- // / Add CSA Recipes that can occur before each instruction in the input IR
8777
- // / is processed and introduced into VPlan.
8778
- static void
8779
- addCSAPreprocessRecipes (const LoopVectorizationLegality::CSAList &CSAs,
8780
- Loop *OrigLoop, VPBasicBlock *PreheaderVPBB,
8781
- VPBasicBlock *HeaderVPBB, DebugLoc DL, VFRange &Range,
8782
- VPlan &Plan, VPRecipeBuilder &Builder) {
8783
-
8784
- // Don't build full CSA for VF=ElementCount::getFixed(1)
8785
- bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange (
8786
- [&](ElementCount VF) { return VF.isScalar (); }, Range);
8787
-
8788
- for (const auto &CSA : CSAs) {
8789
- VPValue *VPInitScalar = Plan.getOrAddLiveIn (
8790
- CSA.first ->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
8791
-
8792
- // Scalar VF builds the scalar version of the loop. In that case,
8793
- // no maintenence of mask nor extraction in middle block is needed.
8794
- if (IsScalarVF) {
8795
- VPCSAState *S = new VPCSAState (VPInitScalar);
8796
- Plan.addCSAState (CSA.first , S);
8797
- continue ;
8798
- }
8799
-
8800
- VPBuilder PHB (PreheaderVPBB);
8801
- auto *VPInitMask = Builder.getVPValueOrAddLiveIn (
8802
- ConstantInt::getFalse (Type::getInt1Ty (CSA.first ->getContext ())));
8803
- auto *VPInitData =
8804
- Builder.getVPValueOrAddLiveIn (PoisonValue::get (CSA.first ->getType ()));
8805
-
8806
- VPBuilder HB (HeaderVPBB);
8807
- auto *VPMaskPhi = HB.createCSAMaskPhi (VPInitMask, DL, " csa.mask.phi" );
8808
-
8809
- auto *S = new VPCSAState (VPInitScalar, VPInitData, VPMaskPhi);
8810
- Plan.addCSAState (CSA.first , S);
8811
- }
8812
- }
8813
-
8814
8784
// / Add CSA Recipes that must occur after each instruction in the input IR
8815
8785
// / is processed and introduced into VPlan.
8816
8786
static void
@@ -8823,60 +8793,57 @@ addCSAPostprocessRecipes(VPRecipeBuilder &RecipeBuilder,
8823
8793
[&](ElementCount VF) { return VF.isScalar (); }, Range))
8824
8794
return ;
8825
8795
8796
+ VPBasicBlock *Header = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
8826
8797
for (const auto &CSA : CSAs) {
8827
- VPCSAState *CSAState = Plan.getCSAStates ().find (CSA.first )->second ;
8828
- VPCSADataUpdateRecipe *VPDataUpdate = CSAState->getDataUpdate ();
8798
+ // Build the MaskPhi recipe.
8799
+ auto *VPInitMask = RecipeBuilder.getVPValueOrAddLiveIn (
8800
+ ConstantInt::getFalse (Type::getInt1Ty (CSA.first ->getContext ())));
8801
+ VPBuilder B;
8802
+ B.setInsertPoint (Header, Header->getFirstNonPhi ());
8803
+ auto *VPMaskPhi = B.createCSAMaskPhi (VPInitMask, DL, " csa.mask.phi" );
8804
+ B.clearInsertionPoint ();
8829
8805
8830
- assert (VPDataUpdate &&
8831
- " VPDataUpdate must have been introduced prior to postprocess" );
8832
- assert (CSA.second .getCond () &&
8833
- " CSADescriptor must know how to describe the condition" );
8834
8806
auto GetVPValue = [&](Value *I) {
8835
8807
return RecipeBuilder.getRecipe (cast<Instruction>(I))->getVPSingleValue ();
8836
8808
};
8837
- VPValue *WidenedCond = GetVPValue (CSA. second . getCond ());
8838
- VPValue *VPInitScalar = CSAState-> getVPInitScalar ( );
8809
+ VPCSADataUpdateRecipe *VPDataUpdate = cast<VPCSADataUpdateRecipe>(
8810
+ cast<VPCSAHeaderPHIRecipe>( GetVPValue (CSA. first ))-> getVPNewData () );
8839
8811
8840
8812
// The CSA optimization wants to use a condition such that when it is
8841
8813
// true, a new value is assigned. However, it is possible that a true lane
8842
8814
// in WidenedCond corresponds to selection of the initial value instead.
8843
8815
// In that case, we must use the negation of WidenedCond.
8844
8816
// i.e. select cond new_val old_val versus select cond.not old_val new_val
8817
+ assert (CSA.second .getCond () &&
8818
+ " CSADescriptor must know how to describe the condition" );
8819
+ VPValue *WidenedCond = GetVPValue (CSA.second .getCond ());
8845
8820
VPValue *CondToUse = WidenedCond;
8846
- VPBuilder B;
8847
8821
if (cast<SelectInst>(CSA.second .getAssignment ())->getTrueValue () ==
8848
8822
CSA.first ) {
8849
8823
auto *VPNotCond = B.createNot (WidenedCond, DL);
8850
- VPNotCond->insertBefore (
8851
- GetVPValue (CSA.second .getAssignment ())->getDefiningRecipe ());
8824
+ VPNotCond->insertBefore (VPDataUpdate);
8852
8825
CondToUse = VPNotCond;
8853
8826
}
8854
8827
8855
- auto *VPAnyActive =
8856
- B.createAnyActive (CondToUse, DL, " csa.cond.anyactive" );
8857
- VPAnyActive->insertBefore (
8858
- GetVPValue (CSA.second .getAssignment ())->getDefiningRecipe ());
8828
+ auto *VPAnyActive = B.createAnyActive (CondToUse, DL, " csa.cond.anyactive" );
8829
+ VPAnyActive->insertBefore (VPDataUpdate);
8859
8830
8860
- auto *VPMaskSel = B.createCSAMaskSel (CondToUse, CSAState-> getVPMaskPhi () ,
8861
- VPAnyActive, DL, " csa.mask.sel" );
8831
+ auto *VPMaskSel = B.createCSAMaskSel (CondToUse, VPMaskPhi, VPAnyActive, DL ,
8832
+ " csa.mask.sel" );
8862
8833
VPMaskSel->insertAfter (VPAnyActive);
8834
+
8863
8835
VPDataUpdate->setVPNewMaskAndVPAnyActive (VPMaskSel, VPAnyActive);
8836
+ VPValue *VPInitScalar = Plan.getOrAddLiveIn (
8837
+ CSA.first ->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
8838
+ SmallVector<PHINode *> PhiToFix;
8839
+ for (User *U : VPDataUpdate->getUnderlyingValue ()->users ())
8840
+ if (auto *Phi = dyn_cast<PHINode>(U);
8841
+ Phi && Phi->getParent () == OrigLoop->getUniqueExitBlock ())
8842
+ PhiToFix.emplace_back (Phi);
8864
8843
VPCSAExtractScalarRecipe *ExtractScalarRecipe =
8865
- new VPCSAExtractScalarRecipe ({VPInitScalar, VPMaskSel, VPDataUpdate});
8866
-
8844
+ new VPCSAExtractScalarRecipe ({VPInitScalar, VPMaskSel, VPDataUpdate},
8845
+ PhiToFix);
8867
8846
MiddleVPBB->insert (ExtractScalarRecipe, MiddleVPBB->getFirstNonPhi ());
8868
-
8869
- // Update CSAState with new recipes
8870
- CSAState->setExtractScalarRecipe (ExtractScalarRecipe);
8871
- CSAState->setVPAnyActive (VPAnyActive);
8872
-
8873
- // Add live out for the CSA. We should be in LCSSA, so we are looking for
8874
- // Phi users in the unique exit block of the original updated value.
8875
- BasicBlock *OrigExit = OrigLoop->getUniqueExitBlock ();
8876
- assert (OrigExit && " Expected a single exit block" );
8877
- for (User *U :VPDataUpdate->getUnderlyingValue ()->users ())
8878
- if (auto *Phi = dyn_cast<PHINode>(U); Phi && Phi->getParent () == OrigExit)
8879
- Plan.addLiveOut (Phi, ExtractScalarRecipe);
8880
8847
}
8881
8848
}
8882
8849
@@ -9194,11 +9161,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9194
9161
9195
9162
VPRecipeBuilder RecipeBuilder (*Plan, OrigLoop, TLI, Legal, CM, PSE, Builder);
9196
9163
9197
- addCSAPreprocessRecipes (Legal->getCSAs (), OrigLoop, Plan->getPreheader (),
9198
- Plan->getVectorLoopRegion ()->getEntryBasicBlock (), DL,
9199
- Range, *Plan, RecipeBuilder);
9200
-
9201
-
9202
9164
// ---------------------------------------------------------------------------
9203
9165
// Pre-construction: record ingredients whose recipes we'll need to further
9204
9166
// process after constructing the initial VPlan.
0 commit comments