@@ -1224,6 +1224,13 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
12241224 return ;
12251225 }
12261226
1227+ uint64_t Idx;
1228+ if (match (&R, m_ExtractElement (m_BuildVector (), m_ConstantInt (Idx)))) {
1229+ auto *BuildVector = cast<VPInstruction>(R.getOperand (0 ));
1230+ Def->replaceAllUsesWith (BuildVector->getOperand (Idx));
1231+ return ;
1232+ }
1233+
12271234 if (auto *Phi = dyn_cast<VPPhi>(Def)) {
12281235 if (Phi->getNumOperands () == 1 )
12291236 Phi->replaceAllUsesWith (Phi->getOperand (0 ));
@@ -3780,7 +3787,7 @@ void VPlanTransforms::materializeBackedgeTakenCount(VPlan &Plan,
37803787 BTC->replaceAllUsesWith (TCMO);
37813788}
37823789
3783- void VPlanTransforms::materializeBuildVectors (VPlan &Plan) {
3790+ void VPlanTransforms::materializePacksAndUnpacks (VPlan &Plan) {
37843791 if (Plan.hasScalarVFOnly ())
37853792 return ;
37863793
@@ -3828,6 +3835,50 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
38283835 });
38293836 }
38303837 }
3838+
3839+ // Create explicit VPInstructions to convert vectors to scalars. The current
3840+ // implementation is conservative - it may miss some cases that may or may not
3841+ // be vector values. TODO: introduce Unpacks speculatively - remove them later
3842+ // if they are known to operate on scalar values.
3843+ for (VPBasicBlock *VPBB : VPBBsInsideLoopRegion) {
3844+ for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
3845+ if (isa<VPReplicateRecipe, VPInstruction, VPScalarIVStepsRecipe,
3846+ VPDerivedIVRecipe, VPCanonicalIVPHIRecipe>(&R))
3847+ continue ;
3848+ for (VPValue *Def : R.definedValues ()) {
3849+ // Skip recipes that are single-scalar or only have their first lane
3850+ // used.
3851+ // TODO: The Defs skipped here may or may not be vector values.
3852+ // Introduce Unpacks, and remove them later, if they are guaranteed to
3853+ // produce scalar values.
3854+ if (vputils::isSingleScalar (Def) || vputils::onlyFirstLaneUsed (Def))
3855+ continue ;
3856+
3857+ // At the moment, we create unpacks only for scalar users outside
3858+ // replicate regions. Recipes inside replicate regions still extract the
3859+ // required lanes implicitly.
3860+ // TODO: Remove once replicate regions are unrolled completely.
3861+ auto IsCandidateUnpackUser = [Def](VPUser *U) {
3862+ VPRegionBlock *ParentRegion =
3863+ cast<VPRecipeBase>(U)->getParent ()->getParent ();
3864+ return U->usesScalars (Def) &&
3865+ (!ParentRegion || !ParentRegion->isReplicator ());
3866+ };
3867+ if (none_of (Def->users (), IsCandidateUnpackUser))
3868+ continue ;
3869+
3870+ auto *Unpack = new VPInstruction (VPInstruction::Unpack, {Def});
3871+ if (R.isPhi ())
3872+ Unpack->insertBefore (*VPBB, VPBB->getFirstNonPhi ());
3873+ else
3874+ Unpack->insertAfter (&R);
3875+ Def->replaceUsesWithIf (Unpack,
3876+ [&IsCandidateUnpackUser](VPUser &U, unsigned ) {
3877+ return IsCandidateUnpackUser (&U);
3878+ });
3879+ }
3880+ }
3881+ }
38313882}
38323883
38333884void VPlanTransforms::materializeVectorTripCount (VPlan &Plan,
0 commit comments