-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[LV] Vectorize maxnum/minnum w/o fast-math flags. #148239
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
42d1194
de2d6f2
8ebc7ff
bf65394
914a0b4
18312dc
bfe6853
e441974
ecc70b8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -84,6 +84,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { | |
return ResTy; | ||
} | ||
case Instruction::ICmp: | ||
case Instruction::FCmp: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can the treatment of FCmp here and below be patched independently, or is it tested only now when FCmp is introduced to check isNaN? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, only this patch introduces FCmp VPInstructions, so only tested now. |
||
case VPInstruction::ActiveLaneMask: | ||
assert(inferScalarType(R->getOperand(0)) == | ||
inferScalarType(R->getOperand(1)) && | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -652,3 +652,163 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond, | |||||||||||||
Term->addMetadata(LLVMContext::MD_prof, BranchWeights); | ||||||||||||||
} | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
bool VPlanTransforms::handleMaxMinNumReductionsWithoutFastMath(VPlan &Plan) { | ||||||||||||||
auto GetMinMaxCompareValue = [](VPReductionPHIRecipe *RedPhiR) -> VPValue * { | ||||||||||||||
auto *MinMaxR = dyn_cast<VPRecipeWithIRFlags>( | ||||||||||||||
RedPhiR->getBackedgeValue()->getDefiningRecipe()); | ||||||||||||||
if (!MinMaxR) | ||||||||||||||
return nullptr; | ||||||||||||||
|
||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth checking/asserting intrinsic is min/max? |
||||||||||||||
auto *RepR = dyn_cast<VPReplicateRecipe>(MinMaxR); | ||||||||||||||
if (!isa<VPWidenIntrinsicRecipe>(MinMaxR) && | ||||||||||||||
!(RepR && isa<IntrinsicInst>(RepR->getUnderlyingInstr()))) | ||||||||||||||
return nullptr; | ||||||||||||||
|
||||||||||||||
#ifndef NDEBUG | ||||||||||||||
Intrinsic::ID RdxIntrinsicId = | ||||||||||||||
RedPhiR->getRecurrenceKind() == RecurKind::FMaxNum ? Intrinsic::maxnum | ||||||||||||||
: Intrinsic::minnum; | ||||||||||||||
assert((isa<VPWidenIntrinsicRecipe>(MinMaxR) && | ||||||||||||||
cast<VPWidenIntrinsicRecipe>(MinMaxR)->getVectorIntrinsicID() == | ||||||||||||||
RdxIntrinsicId) || | ||||||||||||||
(RepR && | ||||||||||||||
cast<IntrinsicInst>(RepR->getUnderlyingInstr())->getIntrinsicID() == | ||||||||||||||
RdxIntrinsicId) && | ||||||||||||||
"Intrinsic did not match recurrence kind"); | ||||||||||||||
#endif | ||||||||||||||
|
||||||||||||||
if (MinMaxR->getOperand(0) == RedPhiR) | ||||||||||||||
return MinMaxR->getOperand(1); | ||||||||||||||
|
||||||||||||||
assert(MinMaxR->getOperand(1) == RedPhiR && | ||||||||||||||
"Reduction phi operand expected"); | ||||||||||||||
return MinMaxR->getOperand(0); | ||||||||||||||
}; | ||||||||||||||
|
||||||||||||||
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); | ||||||||||||||
VPReductionPHIRecipe *RedPhiR = nullptr; | ||||||||||||||
bool HasUnsupportedPhi = false; | ||||||||||||||
for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) { | ||||||||||||||
if (isa<VPCanonicalIVPHIRecipe, VPWidenIntOrFpInductionRecipe>(&R)) | ||||||||||||||
continue; | ||||||||||||||
auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R); | ||||||||||||||
if (!Cur) { | ||||||||||||||
// TODO: Also support fixed-order recurrence phis. | ||||||||||||||
HasUnsupportedPhi = true; | ||||||||||||||
continue; | ||||||||||||||
} | ||||||||||||||
// For now, only a single reduction is supported. | ||||||||||||||
// TODO: Support multiple MaxNum/MinNum reductions and other reductions. | ||||||||||||||
if (RedPhiR) | ||||||||||||||
ayalz marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||
return false; | ||||||||||||||
if (Cur->getRecurrenceKind() != RecurKind::FMaxNum && | ||||||||||||||
Cur->getRecurrenceKind() != RecurKind::FMinNum) { | ||||||||||||||
HasUnsupportedPhi = true; | ||||||||||||||
continue; | ||||||||||||||
} | ||||||||||||||
RedPhiR = Cur; | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
if (!RedPhiR) | ||||||||||||||
return true; | ||||||||||||||
|
||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. RedPhiR must be FMaxNum/FMinNum when set, added an assert |
||||||||||||||
// We won't be able to resume execution in the scalar tail, if there are | ||||||||||||||
// unsupported header phis or there is no scalar tail at all, due to | ||||||||||||||
// tail-folding. | ||||||||||||||
if (HasUnsupportedPhi || !Plan.hasScalarTail()) | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added, thanks. |
||||||||||||||
return false; | ||||||||||||||
|
||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moved, thanks! |
||||||||||||||
VPValue *MinMaxOp = GetMinMaxCompareValue(RedPhiR); | ||||||||||||||
if (!MinMaxOp) | ||||||||||||||
return false; | ||||||||||||||
|
||||||||||||||
RecurKind RedPhiRK = RedPhiR->getRecurrenceKind(); | ||||||||||||||
assert((RedPhiRK == RecurKind::FMaxNum || RedPhiRK == RecurKind::FMinNum) && | ||||||||||||||
"unsupported reduction"); | ||||||||||||||
|
||||||||||||||
/// Check if the vector loop of \p Plan can early exit and restart | ||||||||||||||
/// execution of last vector iteration in the scalar loop. This requires all | ||||||||||||||
/// recipes up to early exit point be side-effect free as they are | ||||||||||||||
/// re-executed. Currently we check that the loop is free of any recipe that | ||||||||||||||
/// may write to memory. Expected to operate on an early VPlan w/o nested | ||||||||||||||
/// regions. | ||||||||||||||
for (VPBlockBase *VPB : vp_depth_first_shallow( | ||||||||||||||
Plan.getVectorLoopRegion()->getEntryBasicBlock())) { | ||||||||||||||
auto *VPBB = cast<VPBasicBlock>(VPB); | ||||||||||||||
for (auto &R : *VPBB) { | ||||||||||||||
if (R.mayWriteToMemory() && | ||||||||||||||
!match(&R, m_BranchOnCount(m_VPValue(), m_VPValue()))) | ||||||||||||||
return false; | ||||||||||||||
} | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock(); | ||||||||||||||
VPBuilder Builder(LatchVPBB->getTerminator()); | ||||||||||||||
auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->getTerminator()); | ||||||||||||||
assert(LatchExitingBranch->getOpcode() == VPInstruction::BranchOnCount && | ||||||||||||||
"Unexpected terminator"); | ||||||||||||||
auto *IsLatchExitTaken = | ||||||||||||||
Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0), | ||||||||||||||
LatchExitingBranch->getOperand(1)); | ||||||||||||||
|
||||||||||||||
VPValue *IsNaN = Builder.createFCmp(CmpInst::FCMP_UNO, MinMaxOp, MinMaxOp); | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could have an isNaN opcode if its useful. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, would help merge checks when interleaving, but would be good to do as follow-up? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fine. Would also help clarify the purpose of the recipe a bit. |
||||||||||||||
VPValue *AnyNaN = Builder.createNaryOp(VPInstruction::AnyOf, {IsNaN}); | ||||||||||||||
auto *AnyExitTaken = | ||||||||||||||
Builder.createNaryOp(Instruction::Or, {AnyNaN, IsLatchExitTaken}); | ||||||||||||||
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken); | ||||||||||||||
LatchExitingBranch->eraseFromParent(); | ||||||||||||||
|
||||||||||||||
// If we exit early due to NaNs, compute the final reduction result based on | ||||||||||||||
// the reduction phi at the beginning of the last vector iteration. | ||||||||||||||
auto *RdxResult = find_singleton<VPSingleDefRecipe>( | ||||||||||||||
RedPhiR->users(), [](VPUser *U, bool) -> VPSingleDefRecipe * { | ||||||||||||||
auto *VPI = dyn_cast<VPInstruction>(U); | ||||||||||||||
if (VPI && VPI->getOpcode() == VPInstruction::ComputeReductionResult) | ||||||||||||||
return VPI; | ||||||||||||||
return nullptr; | ||||||||||||||
}); | ||||||||||||||
|
||||||||||||||
auto *MiddleVPBB = Plan.getMiddleBlock(); | ||||||||||||||
Builder.setInsertPoint(MiddleVPBB, MiddleVPBB->begin()); | ||||||||||||||
auto *NewSel = | ||||||||||||||
Builder.createSelect(AnyNaN, RedPhiR, RdxResult->getOperand(1)); | ||||||||||||||
RdxResult->setOperand(1, NewSel); | ||||||||||||||
|
||||||||||||||
auto *ScalarPH = Plan.getScalarPreheader(); | ||||||||||||||
// Update resume phis for inductions in the scalar preheader. If AnyNaN is | ||||||||||||||
// true, the resume from the start of the last vector iteration via the | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated, thanks |
||||||||||||||
// canonical IV, otherwise from the original value. | ||||||||||||||
for (auto &R : ScalarPH->phis()) { | ||||||||||||||
auto *ResumeR = cast<VPPhi>(&R); | ||||||||||||||
VPValue *VecV = ResumeR->getOperand(0); | ||||||||||||||
if (VecV == RdxResult) | ||||||||||||||
continue; | ||||||||||||||
if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) { | ||||||||||||||
if (DerivedIV->getNumUsers() == 1 && | ||||||||||||||
DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) { | ||||||||||||||
auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), | ||||||||||||||
&Plan.getVectorTripCount()); | ||||||||||||||
DerivedIV->moveAfter(&*Builder.getInsertPoint()); | ||||||||||||||
DerivedIV->setOperand(1, NewSel); | ||||||||||||||
continue; | ||||||||||||||
} | ||||||||||||||
} | ||||||||||||||
// Bail out and abandon the current, partially modified, VPlan if we | ||||||||||||||
// encounter resume phi that cannot be updated yet. | ||||||||||||||
if (VecV != &Plan.getVectorTripCount()) { | ||||||||||||||
LLVM_DEBUG(dbgs() << "Found resume phi we cannot update for VPlan with " | ||||||||||||||
"FMaxNum/FMinNum reduction.\n"); | ||||||||||||||
return false; | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Returning false now, after several recipes have been modified? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, in that case we abandon the transform (and the invalid VPlan wil be discarded). Alternative would be to first check separately before making changes and then updating phis separately. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth noting, at-least. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, added a debug message + comment! |
||||||||||||||
} | ||||||||||||||
auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), VecV); | ||||||||||||||
ResumeR->setOperand(0, NewSel); | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
auto *MiddleTerm = MiddleVPBB->getTerminator(); | ||||||||||||||
Builder.setInsertPoint(MiddleTerm); | ||||||||||||||
VPValue *MiddleCond = MiddleTerm->getOperand(0); | ||||||||||||||
VPValue *NewCond = Builder.createAnd(MiddleCond, Builder.createNot(AnyNaN)); | ||||||||||||||
MiddleTerm->setOperand(0, NewCond); | ||||||||||||||
return true; | ||||||||||||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks!