Skip to content

Commit cff3418

Browse files
authored
Merge pull request #11963 from fhahn/pick-vplan-cost-improvements
[LV] Pick VPlan cost improvements rdar://163931465
2 parents 37e4e22 + 8cd7527 commit cff3418

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+4311
-929
lines changed

llvm/include/llvm/Analysis/ScalarEvolution.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -636,8 +636,12 @@ class ScalarEvolution {
636636
/// \p GEP The GEP. The indices contained in the GEP itself are ignored,
637637
/// instead we use IndexExprs.
638638
/// \p IndexExprs The expressions for the indices.
639-
LLVM_ABI const SCEV *
640-
getGEPExpr(GEPOperator *GEP, const SmallVectorImpl<const SCEV *> &IndexExprs);
639+
LLVM_ABI const SCEV *getGEPExpr(GEPOperator *GEP,
640+
ArrayRef<const SCEV *> IndexExprs);
641+
LLVM_ABI const SCEV *getGEPExpr(const SCEV *BaseExpr,
642+
ArrayRef<const SCEV *> IndexExprs,
643+
Type *SrcElementTy,
644+
GEPNoWrapFlags NW = GEPNoWrapFlags::none());
641645
LLVM_ABI const SCEV *getAbsExpr(const SCEV *Op, bool IsNSW);
642646
LLVM_ABI const SCEV *getMinMaxExpr(SCEVTypes Kind,
643647
SmallVectorImpl<const SCEV *> &Operands);

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -961,12 +961,10 @@ class TargetTransformInfo {
961961
TTI::TargetCostKind CostKind, bool ForPoisonSrc = true,
962962
ArrayRef<Value *> VL = {}) const;
963963

964-
/// Estimate the overhead of scalarizing an instructions unique
965-
/// non-constant operands. The (potentially vector) types to use for each of
966-
/// argument are passes via Tys.
964+
/// Estimate the overhead of scalarizing operands with the given types. The
965+
/// (potentially vector) types to use for each of argument are passes via Tys.
967966
LLVM_ABI InstructionCost getOperandsScalarizationOverhead(
968-
ArrayRef<const Value *> Args, ArrayRef<Type *> Tys,
969-
TTI::TargetCostKind CostKind) const;
967+
ArrayRef<Type *> Tys, TTI::TargetCostKind CostKind) const;
970968

971969
/// If target has efficient vector element load/store instructions, it can
972970
/// return true here so that insertion/extraction costs are not added to

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -459,8 +459,7 @@ class TargetTransformInfoImplBase {
459459
}
460460

461461
virtual InstructionCost
462-
getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
463-
ArrayRef<Type *> Tys,
462+
getOperandsScalarizationOverhead(ArrayRef<Type *> Tys,
464463
TTI::TargetCostKind CostKind) const {
465464
return 0;
466465
}

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
#include "llvm/ADT/APInt.h"
2020
#include "llvm/ADT/BitVector.h"
21+
#include "llvm/ADT/STLExtras.h"
2122
#include "llvm/ADT/SmallPtrSet.h"
2223
#include "llvm/ADT/SmallVector.h"
2324
#include "llvm/Analysis/LoopInfo.h"
@@ -347,6 +348,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
347348
return Cost;
348349
}
349350

351+
/// Filter out constant and duplicated entries in \p Ops and return a vector
352+
/// containing the types from \p Tys corresponding to the remaining operands.
353+
static SmallVector<Type *, 4>
354+
filterConstantAndDuplicatedOperands(ArrayRef<const Value *> Ops,
355+
ArrayRef<Type *> Tys) {
356+
SmallPtrSet<const Value *, 4> UniqueOperands;
357+
SmallVector<Type *, 4> FilteredTys;
358+
for (const auto &[Op, Ty] : zip_equal(Ops, Tys)) {
359+
if (isa<Constant>(Op) || !UniqueOperands.insert(Op).second)
360+
continue;
361+
FilteredTys.push_back(Ty);
362+
}
363+
return FilteredTys;
364+
}
365+
350366
protected:
351367
explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
352368
: BaseT(DL) {}
@@ -935,29 +951,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
935951
CostKind);
936952
}
937953

938-
/// Estimate the overhead of scalarizing an instructions unique
939-
/// non-constant operands. The (potentially vector) types to use for each of
954+
/// Estimate the overhead of scalarizing an instruction's
955+
/// operands. The (potentially vector) types to use for each of
940956
/// argument are passes via Tys.
941957
InstructionCost getOperandsScalarizationOverhead(
942-
ArrayRef<const Value *> Args, ArrayRef<Type *> Tys,
943-
TTI::TargetCostKind CostKind) const override {
944-
assert(Args.size() == Tys.size() && "Expected matching Args and Tys");
945-
958+
ArrayRef<Type *> Tys, TTI::TargetCostKind CostKind) const override {
946959
InstructionCost Cost = 0;
947-
SmallPtrSet<const Value*, 4> UniqueOperands;
948-
for (int I = 0, E = Args.size(); I != E; I++) {
960+
for (Type *Ty : Tys) {
949961
// Disregard things like metadata arguments.
950-
const Value *A = Args[I];
951-
Type *Ty = Tys[I];
952962
if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy() &&
953963
!Ty->isPtrOrPtrVectorTy())
954964
continue;
955965

956-
if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
957-
if (auto *VecTy = dyn_cast<VectorType>(Ty))
958-
Cost += getScalarizationOverhead(VecTy, /*Insert*/ false,
959-
/*Extract*/ true, CostKind);
960-
}
966+
if (auto *VecTy = dyn_cast<VectorType>(Ty))
967+
Cost += getScalarizationOverhead(VecTy, /*Insert*/ false,
968+
/*Extract*/ true, CostKind);
961969
}
962970

963971
return Cost;
@@ -974,7 +982,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
974982
InstructionCost Cost = getScalarizationOverhead(
975983
RetTy, /*Insert*/ true, /*Extract*/ false, CostKind);
976984
if (!Args.empty())
977-
Cost += getOperandsScalarizationOverhead(Args, Tys, CostKind);
985+
Cost += getOperandsScalarizationOverhead(
986+
filterConstantAndDuplicatedOperands(Args, Tys), CostKind);
978987
else
979988
// When no information on arguments is provided, we add the cost
980989
// associated with one argument as a heuristic.
@@ -2158,8 +2167,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
21582167
/*Insert=*/true, /*Extract=*/false, CostKind);
21592168
}
21602169
}
2161-
ScalarizationCost +=
2162-
getOperandsScalarizationOverhead(Args, ICA.getArgTypes(), CostKind);
2170+
ScalarizationCost += getOperandsScalarizationOverhead(
2171+
filterConstantAndDuplicatedOperands(Args, ICA.getArgTypes()),
2172+
CostKind);
21632173
}
21642174

21652175
IntrinsicCostAttributes Attrs(IID, RetTy, ICA.getArgTypes(), FMF, I,

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3493,17 +3493,25 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
34933493
/// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
34943494
// We can currently only fold X%N if X is constant.
34953495
const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
3496-
if (StartC && !DivInt.urem(StepInt) &&
3497-
getZeroExtendExpr(AR, ExtTy) ==
3498-
getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
3499-
getZeroExtendExpr(Step, ExtTy),
3500-
AR->getLoop(), SCEV::FlagAnyWrap)) {
3496+
if (StartC && !DivInt.urem(StepInt)) {
35013497
const APInt &StartInt = StartC->getAPInt();
35023498
const APInt &StartRem = StartInt.urem(StepInt);
3503-
if (StartRem != 0) {
3504-
const SCEV *NewLHS =
3505-
getAddRecExpr(getConstant(StartInt - StartRem), Step,
3506-
AR->getLoop(), SCEV::FlagNW);
3499+
bool NoWrap =
3500+
getZeroExtendExpr(AR, ExtTy) ==
3501+
getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
3502+
getZeroExtendExpr(Step, ExtTy), AR->getLoop(),
3503+
SCEV::FlagAnyWrap);
3504+
3505+
// With N <= C and both N, C as powers-of-2, the transformation
3506+
// {X,+,N}/C => {(X - X%N),+,N}/C preserves division results even
3507+
// if wrapping occurs, as the division results remain equivalent for
3508+
// all offsets in [[(X - X%N), X).
3509+
bool CanFoldWithWrap = StepInt.ule(DivInt) && // N <= C
3510+
StepInt.isPowerOf2() && DivInt.isPowerOf2();
3511+
if (StartRem != 0 && (NoWrap || CanFoldWithWrap)) {
3512+
const SCEV *NewLHS = getAddRecExpr(
3513+
getConstant(StartInt - StartRem), Step, AR->getLoop(),
3514+
NoWrap ? SCEV::FlagNW : SCEV::FlagAnyWrap);
35073515
if (LHS != NewLHS) {
35083516
LHS = NewLHS;
35093517

@@ -3770,13 +3778,11 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
37703778
return getOrCreateAddRecExpr(Operands, L, Flags);
37713779
}
37723780

3773-
const SCEV *
3774-
ScalarEvolution::getGEPExpr(GEPOperator *GEP,
3775-
const SmallVectorImpl<const SCEV *> &IndexExprs) {
3781+
const SCEV *ScalarEvolution::getGEPExpr(GEPOperator *GEP,
3782+
ArrayRef<const SCEV *> IndexExprs) {
37763783
const SCEV *BaseExpr = getSCEV(GEP->getPointerOperand());
37773784
// getSCEV(Base)->getType() has the same address space as Base->getType()
37783785
// because SCEV::getType() preserves the address space.
3779-
Type *IntIdxTy = getEffectiveSCEVType(BaseExpr->getType());
37803786
GEPNoWrapFlags NW = GEP->getNoWrapFlags();
37813787
if (NW != GEPNoWrapFlags::none()) {
37823788
// We'd like to propagate flags from the IR to the corresponding SCEV nodes,
@@ -3789,13 +3795,20 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
37893795
NW = GEPNoWrapFlags::none();
37903796
}
37913797

3798+
return getGEPExpr(BaseExpr, IndexExprs, GEP->getSourceElementType(), NW);
3799+
}
3800+
3801+
const SCEV *ScalarEvolution::getGEPExpr(const SCEV *BaseExpr,
3802+
ArrayRef<const SCEV *> IndexExprs,
3803+
Type *SrcElementTy, GEPNoWrapFlags NW) {
37923804
SCEV::NoWrapFlags OffsetWrap = SCEV::FlagAnyWrap;
37933805
if (NW.hasNoUnsignedSignedWrap())
37943806
OffsetWrap = setFlags(OffsetWrap, SCEV::FlagNSW);
37953807
if (NW.hasNoUnsignedWrap())
37963808
OffsetWrap = setFlags(OffsetWrap, SCEV::FlagNUW);
37973809

3798-
Type *CurTy = GEP->getType();
3810+
Type *CurTy = BaseExpr->getType();
3811+
Type *IntIdxTy = getEffectiveSCEVType(BaseExpr->getType());
37993812
bool FirstIter = true;
38003813
SmallVector<const SCEV *, 4> Offsets;
38013814
for (const SCEV *IndexExpr : IndexExprs) {
@@ -3814,7 +3827,7 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
38143827
if (FirstIter) {
38153828
assert(isa<PointerType>(CurTy) &&
38163829
"The first index of a GEP indexes a pointer");
3817-
CurTy = GEP->getSourceElementType();
3830+
CurTy = SrcElementTy;
38183831
FirstIter = false;
38193832
} else {
38203833
CurTy = GetElementPtrInst::getTypeAtIndex(CurTy, (uint64_t)0);

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -637,9 +637,8 @@ InstructionCost TargetTransformInfo::getScalarizationOverhead(
637637
}
638638

639639
InstructionCost TargetTransformInfo::getOperandsScalarizationOverhead(
640-
ArrayRef<const Value *> Args, ArrayRef<Type *> Tys,
641-
TTI::TargetCostKind CostKind) const {
642-
return TTIImpl->getOperandsScalarizationOverhead(Args, Tys, CostKind);
640+
ArrayRef<Type *> Tys, TTI::TargetCostKind CostKind) const {
641+
return TTIImpl->getOperandsScalarizationOverhead(Tys, CostKind);
643642
}
644643

645644
bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const {

0 commit comments

Comments
 (0)