@@ -2148,6 +2148,24 @@ void VPCSAHeaderPHIRecipe::execute(VPTransformState &State) {
2148
2148
State.set (this , DataPhi, Part);
2149
2149
}
2150
2150
2151
+ InstructionCost VPCSAHeaderPHIRecipe::computeCost (ElementCount VF,
2152
+ VPCostContext &Ctx) const {
2153
+ if (VF.isScalar ())
2154
+ return 0 ;
2155
+
2156
+ InstructionCost C = 0 ;
2157
+ auto *VTy = VectorType::get (getUnderlyingValue ()->getType (), VF);
2158
+ const TargetTransformInfo &TTI = Ctx.TTI ;
2159
+
2160
+ // FIXME: These costs should be moved into VPInstruction::computeCost. We put
2161
+ // them here for now since there is no VPInstruction::computeCost support.
2162
+ // CSAInitMask
2163
+ C += TTI.getShuffleCost (TargetTransformInfo::SK_Broadcast, VTy);
2164
+ // CSAInitData
2165
+ C += TTI.getShuffleCost (TargetTransformInfo::SK_Broadcast, VTy);
2166
+ return C;
2167
+ }
2168
+
2151
2169
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2152
2170
void VPCSADataUpdateRecipe::print (raw_ostream &O, const Twine &Indent,
2153
2171
VPSlotTracker &SlotTracker) const {
@@ -2176,6 +2194,34 @@ void VPCSADataUpdateRecipe::execute(VPTransformState &State) {
2176
2194
}
2177
2195
}
2178
2196
2197
+ InstructionCost VPCSADataUpdateRecipe::computeCost (ElementCount VF,
2198
+ VPCostContext &Ctx) const {
2199
+ if (VF.isScalar ())
2200
+ return 0 ;
2201
+
2202
+ InstructionCost C = 0 ;
2203
+ auto *VTy = VectorType::get (getUnderlyingValue ()->getType (), VF);
2204
+ auto *MaskTy = VectorType::get (IntegerType::getInt1Ty (VTy->getContext ()), VF);
2205
+ constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2206
+ const TargetTransformInfo &TTI = Ctx.TTI ;
2207
+
2208
+ // Data Update
2209
+ C += TTI.getArithmeticInstrCost (Instruction::Select, VTy, CostKind);
2210
+
2211
+ // FIXME: These costs should be moved into VPInstruction::computeCost. We put
2212
+ // them here for now since they are related to updating the data and there is
2213
+ // no VPInstruction::computeCost support at the moment. CSAInitMask AnyActive
2214
+ C += TTI.getArithmeticInstrCost (Instruction::Select, VTy, CostKind);
2215
+ // vp.reduce.or
2216
+ C += TTI.getArithmeticReductionCost (Instruction::Or, VTy, std::nullopt,
2217
+ CostKind);
2218
+ // VPVLSel
2219
+ C += TTI.getArithmeticInstrCost (Instruction::Select, VTy, CostKind);
2220
+ // MaskUpdate
2221
+ C += TTI.getArithmeticInstrCost (Instruction::Select, MaskTy, CostKind);
2222
+ return C;
2223
+ }
2224
+
2179
2225
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2180
2226
void VPCSAExtractScalarRecipe::print (raw_ostream &O, const Twine &Indent,
2181
2227
VPSlotTracker &SlotTracker) const {
@@ -2236,6 +2282,60 @@ void VPCSAExtractScalarRecipe::execute(VPTransformState &State) {
2236
2282
State.set (this , ChooseFromVecOrInit, 0 , /* IsScalar=*/ true );
2237
2283
}
2238
2284
2285
+ InstructionCost
2286
+ VPCSAExtractScalarRecipe::computeCost (ElementCount VF,
2287
+ VPCostContext &Ctx) const {
2288
+ if (VF.isScalar ())
2289
+ return 0 ;
2290
+
2291
+ InstructionCost C = 0 ;
2292
+ auto *VTy = VectorType::get (getUnderlyingValue ()->getType (), VF);
2293
+ auto *Int32VTy =
2294
+ VectorType::get (IntegerType::getInt32Ty (VTy->getContext ()), VF);
2295
+ auto *MaskTy = VectorType::get (IntegerType::getInt1Ty (VTy->getContext ()), VF);
2296
+ constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2297
+ const TargetTransformInfo &TTI = Ctx.TTI ;
2298
+
2299
+ // StepVector
2300
+ ArrayRef<Value *> Args;
2301
+ IntrinsicCostAttributes CostAttrs (Intrinsic::stepvector, Int32VTy, Args);
2302
+ C += TTI.getIntrinsicInstrCost (CostAttrs, CostKind);
2303
+ // NegOneSplat
2304
+ C += TTI.getShuffleCost (TargetTransformInfo::SK_Broadcast, Int32VTy);
2305
+ // LastIdx
2306
+ if (usesEVL ()) {
2307
+ C += TTI.getMinMaxReductionCost (Intrinsic::smax, Int32VTy, FastMathFlags (),
2308
+ CostKind);
2309
+ } else {
2310
+ // ActiveLaneIdxs
2311
+ C += TTI.getArithmeticInstrCost (Instruction::Select,
2312
+ MaskTy->getScalarType (), CostKind);
2313
+ // MaybeLastIdx
2314
+ C += TTI.getMinMaxReductionCost (Intrinsic::smax, Int32VTy, FastMathFlags (),
2315
+ CostKind);
2316
+ // IsLaneZeroActive
2317
+ C += TTI.getArithmeticInstrCost (Instruction::ExtractElement, MaskTy,
2318
+ CostKind);
2319
+ // MaybeLastIdxEQZero
2320
+ C += TTI.getArithmeticInstrCost (Instruction::ICmp, MaskTy->getScalarType (),
2321
+ CostKind);
2322
+ // And
2323
+ C += TTI.getArithmeticInstrCost (Instruction::And, MaskTy->getScalarType (),
2324
+ CostKind);
2325
+ // LastIdx
2326
+ C += TTI.getArithmeticInstrCost (Instruction::Select, VTy->getScalarType (),
2327
+ CostKind);
2328
+ }
2329
+ // ExtractFromVec
2330
+ C += TTI.getArithmeticInstrCost (Instruction::ExtractElement, VTy, CostKind);
2331
+ // LastIdxGeZero
2332
+ C += TTI.getArithmeticInstrCost (Instruction::ICmp, Int32VTy, CostKind);
2333
+ // ChooseFromVecOrInit
2334
+ C += TTI.getArithmeticInstrCost (Instruction::Select, VTy->getScalarType (),
2335
+ CostKind);
2336
+ return C;
2337
+ }
2338
+
2239
2339
void VPBranchOnMaskRecipe::execute (VPTransformState &State) {
2240
2340
assert (State.Instance && " Branch on Mask works only on single instance." );
2241
2341
0 commit comments