@@ -463,15 +463,16 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
463463 VPlanTransforms::removeDeadRecipes (Plan);
464464}
465465
466- // / Create a single-scalar clone of \p RepR for lane \p Lane. Use \p
467- // / Def2LaneDefs to look up scalar definitions for operands of \RepR.
468- static VPReplicateRecipe *
466+ // / Create a single-scalar clone of \p DefR (must be a VPReplicateRecipe or
467+ // / VPInstruction) for lane \p Lane. Use \p Def2LaneDefs to look up scalar
468+ // / definitions for operands of \DefR.
469+ static VPRecipeWithIRFlags *
469470cloneForLane (VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
470- VPReplicateRecipe *RepR , VPLane Lane,
471+ VPRecipeWithIRFlags *DefR , VPLane Lane,
471472 const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
472473 // Collect the operands at Lane, creating extracts as needed.
473474 SmallVector<VPValue *> NewOps;
474- for (VPValue *Op : RepR ->operands ()) {
475+ for (VPValue *Op : DefR ->operands ()) {
475476 // If Op is a definition that has been unrolled, directly use the clone for
476477 // the corresponding lane.
477478 auto LaneDefs = Def2LaneDefs.find (Op);
@@ -501,11 +502,24 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
501502 NewOps.push_back (Ext);
502503 }
503504
504- auto *New =
505- new VPReplicateRecipe (RepR->getUnderlyingInstr (), NewOps,
506- /* IsSingleScalar=*/ true , /* Mask=*/ nullptr , *RepR);
507- New->transferFlags (*RepR);
508- New->insertBefore (RepR);
505+ VPRecipeWithIRFlags *New;
506+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(DefR)) {
507+ // TODO: have cloning of replicate recipes also provide the desired result
508+ // coupled with setting its operands to NewOps (deriving IsSingleScalar and
509+ // Mask from the operands?)
510+ New =
511+ new VPReplicateRecipe (RepR->getUnderlyingInstr (), NewOps,
512+ /* IsSingleScalar=*/ true , /* Mask=*/ nullptr , *RepR);
513+ } else {
514+ assert (isa<VPInstruction>(DefR) &&
515+ " DefR must be a VPReplicateRecipe or VPInstruction" );
516+ New = DefR->clone ();
517+ for (const auto &[Idx, Op] : enumerate(NewOps)) {
518+ New->setOperand (Idx, Op);
519+ }
520+ }
521+ New->transferFlags (*DefR);
522+ New->insertBefore (DefR);
509523 return New;
510524}
511525
@@ -530,34 +544,38 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
530544 SmallVector<VPRecipeBase *> ToRemove;
531545 for (VPBasicBlock *VPBB : VPBBsToUnroll) {
532546 for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
533- auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
534- if (!RepR || RepR->isSingleScalar ())
547+ if (!isa<VPInstruction, VPReplicateRecipe>(&R) ||
548+ (isa<VPReplicateRecipe>(&R) &&
549+ cast<VPReplicateRecipe>(&R)->isSingleScalar ()) ||
550+ (isa<VPInstruction>(&R) &&
551+ !cast<VPInstruction>(&R)->doesGeneratePerAllLanes ()))
535552 continue ;
536553
537- VPBuilder Builder (RepR);
538- if (RepR->getNumUsers () == 0 ) {
539- // Create single-scalar version of RepR for all lanes.
554+ auto *DefR = cast<VPRecipeWithIRFlags>(&R);
555+ VPBuilder Builder (DefR);
556+ if (DefR->getNumUsers () == 0 ) {
557+ // Create single-scalar version of DefR for all lanes.
540558 for (unsigned I = 0 ; I != VF.getKnownMinValue (); ++I)
541- cloneForLane (Plan, Builder, IdxTy, RepR , VPLane (I), Def2LaneDefs);
542- RepR ->eraseFromParent ();
559+ cloneForLane (Plan, Builder, IdxTy, DefR , VPLane (I), Def2LaneDefs);
560+ DefR ->eraseFromParent ();
543561 continue ;
544562 }
545- // / Create single-scalar version of RepR for all lanes.
563+ // / Create single-scalar version of DefR for all lanes.
546564 SmallVector<VPValue *> LaneDefs;
547565 for (unsigned I = 0 ; I != VF.getKnownMinValue (); ++I)
548566 LaneDefs.push_back (
549- cloneForLane (Plan, Builder, IdxTy, RepR , VPLane (I), Def2LaneDefs));
567+ cloneForLane (Plan, Builder, IdxTy, DefR , VPLane (I), Def2LaneDefs));
550568
551- Def2LaneDefs[RepR ] = LaneDefs;
569+ Def2LaneDefs[DefR ] = LaneDefs;
552570 // / Users that only demand the first lane can use the definition for lane
553571 // / 0.
554- RepR ->replaceUsesWithIf (LaneDefs[0 ], [RepR ](VPUser &U, unsigned ) {
555- return U.onlyFirstLaneUsed (RepR );
572+ DefR ->replaceUsesWithIf (LaneDefs[0 ], [DefR ](VPUser &U, unsigned ) {
573+ return U.onlyFirstLaneUsed (DefR );
556574 });
557575
558- // Update each build vector user that currently has RepR as its only
576+ // Update each build vector user that currently has DefR as its only
559577 // operand, to have all LaneDefs as its operands.
560- for (VPUser *U : to_vector (RepR ->users ())) {
578+ for (VPUser *U : to_vector (DefR ->users ())) {
561579 auto *VPI = dyn_cast<VPInstruction>(U);
562580 if (!VPI || (VPI->getOpcode () != VPInstruction::BuildVector &&
563581 VPI->getOpcode () != VPInstruction::BuildStructVector))
@@ -569,7 +587,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
569587 for (VPValue *LaneDef : drop_begin (LaneDefs))
570588 VPI->addOperand (LaneDef);
571589 }
572- ToRemove.push_back (RepR );
590+ ToRemove.push_back (DefR );
573591 }
574592 }
575593 for (auto *R : reverse (ToRemove))
0 commit comments