diff --git a/cmake/llvm-version-imex.txt b/cmake/llvm-version-imex.txt index 33000613b..04e84c095 100644 --- a/cmake/llvm-version-imex.txt +++ b/cmake/llvm-version-imex.txt @@ -1 +1 @@ -add6b2f35f2bcf1f59a2ab2d5b3dab124fe0895a +7842374103b26933d71a8fe354cd4d8715d55b1c diff --git a/cmake/llvm-version.txt b/cmake/llvm-version.txt index 33000613b..04e84c095 100644 --- a/cmake/llvm-version.txt +++ b/cmake/llvm-version.txt @@ -1 +1 @@ -add6b2f35f2bcf1f59a2ab2d5b3dab124fe0895a +7842374103b26933d71a8fe354cd4d8715d55b1c diff --git a/include/gc/Dialect/LLVMIR/XeVMOps.td b/include/gc/Dialect/LLVMIR/XeVMOps.td index 4b4ee6814..c5fe511dd 100644 --- a/include/gc/Dialect/LLVMIR/XeVMOps.td +++ b/include/gc/Dialect/LLVMIR/XeVMOps.td @@ -70,7 +70,7 @@ def XeVM_L1StoreCacheControl : XeVM_StoreCacheControl<"L1">; def XeVM_L3StoreCacheControl : XeVM_StoreCacheControl<"L3">; def XeVM_BlockLoad2dOp : XeVM_Op<"blockload2d">, - Results<(outs FixedVectorOf<[XeVM_ElemType]>:$res)>, + Results<(outs FixedVectorOfRankAndType<[1,2,3], [XeVM_ElemType]>:$res)>, Arguments<(ins Arg:$ptr, I32:$base_width, @@ -137,7 +137,7 @@ def XeVM_BlockStore2dOp : XeVM_Op<"blockstore2d">, I32Attr:$tile_width, I32Attr:$tile_height, I32Attr:$v_blocks, - FixedVectorOf<[XeVM_ElemType]>:$stored_val, + FixedVectorOfRankAndType<[1, 2, 3], [XeVM_ElemType]>:$stored_val, DefaultValuedAttr:$l1_cache_control, DefaultValuedAttr:$l3_cache_control )> { @@ -243,7 +243,7 @@ def XeVM_PrecisionTypeAttr : I32EnumAttr<"PrecisionType", } def XeVM_DPASOp : XeVM_Op<"dpas">, - Results<(outs FixedVectorOf<[XeVM_MatrixElemType]>:$d)>, + Results<(outs FixedVectorOfRankAndType<[1], [XeVM_MatrixElemType]>:$d)>, Arguments<(ins FixedVectorOfRankAndType<[1], [XeVM_MatrixElemType]>:$c, FixedVectorOfRankAndType<[1], [XeVM_MatrixElemType]>:$a, diff --git a/include/gc/Transforms/Microkernel/BrgemmRuntimeUtils.h b/include/gc/Transforms/Microkernel/BrgemmRuntimeUtils.h index 0c92458ed..9c0ba87db 100644 --- a/include/gc/Transforms/Microkernel/BrgemmRuntimeUtils.h +++ b/include/gc/Transforms/Microkernel/BrgemmRuntimeUtils.h @@ -27,13 +27,13 @@ static inline int64_t getDnnlDataTypeVal(RewriterBase &rewriter, auto context = rewriter.getContext(); auto tattr = dyn_cast_or_null(attr); assert(tattr); - if (tattr == TypeAttr::get(FloatType::getF32(context))) { + if (tattr == TypeAttr::get(Float32Type::get(context))) { return static_cast(dnnl_f32); - } else if (tattr == TypeAttr::get(FloatType::getF64(context))) { + } else if (tattr == TypeAttr::get(Float64Type::get(context))) { return static_cast(dnnl_f64); - } else if (tattr == TypeAttr::get(FloatType::getBF16(context))) { + } else if (tattr == TypeAttr::get(BFloat16Type::get(context))) { return static_cast(dnnl_bf16); - } else if (tattr == TypeAttr::get(FloatType::getF16(context))) { + } else if (tattr == TypeAttr::get(Float16Type::get(context))) { return static_cast(dnnl_f16); } else if (tattr == TypeAttr::get( IntegerType::get(context, 32, IntegerType::Signed))) { diff --git a/include/gc/Transforms/Utils/StructuredOpMatcher.h b/include/gc/Transforms/Utils/StructuredOpMatcher.h index 66d398474..131888b1b 100644 --- a/include/gc/Transforms/Utils/StructuredOpMatcher.h +++ b/include/gc/Transforms/Utils/StructuredOpMatcher.h @@ -163,7 +163,7 @@ struct HasStaticStrides { SmallVector strides; if (auto memRefType = dyn_cast_or_null(operandType)) { int64_t offset; - if (failed(getStridesAndOffset(memRefType, strides, offset))) + if (failed(memRefType.getStridesAndOffset(strides, offset))) return false; if (llvm::any_of(strides, [](int64_t stride) { return stride == ShapedType::kDynamic; @@ -244,7 +244,8 @@ struct NumDpsInits { // Callable object to validate number of input operands for `op`. struct NumDpsInputs { NumDpsInputs() = delete; - explicit NumDpsInputs(std::function fun) : fun(std::move(fun)){}; + explicit NumDpsInputs(std::function fun) + : fun(std::move(fun)){}; bool operator()(Operation *op) { if (auto linalgOp = dyn_cast_or_null(op)) diff --git a/lib/gc/Dialect/Linalgx/Utils.cpp b/lib/gc/Dialect/Linalgx/Utils.cpp index fe9096fe7..73a1c9f93 100644 --- a/lib/gc/Dialect/Linalgx/Utils.cpp +++ b/lib/gc/Dialect/Linalgx/Utils.cpp @@ -385,7 +385,7 @@ bool isGenericAttrEquivalent(linalg::GenericOp op, ShapedType shapeA, DenseMap replaceMap; std::map iterMap; // get shape-to-loop map - AffineMap inverse = inversePermutation(concatAffineMaps(inMaps)); + AffineMap inverse = inversePermutation(concatAffineMaps(inMaps, context)); assert(inverse && "shape-to-loops map to be non-null"); assert(dimSize == inverse.getResults().size()); // renumber the dim id based on shape-to-loop map @@ -492,8 +492,10 @@ bool isGenericPackedMatmulOpImpl(linalg::GenericOp genericOp, return false; } // Check for packing - ValueRange inputs = genericOp.getDpsInputs(); - ValueRange outputs = genericOp.getDpsInits(); + auto inputsVec = genericOp.getDpsInputs(); + ValueRange inputs = inputsVec; + auto outputsVec = genericOp.getDpsInits(); + ValueRange outputs = outputsVec; auto shapeA = cast(inputs.front().getType()); auto shapeB = cast(inputs.back().getType()); auto shapeC = cast(outputs.back().getType()); diff --git a/lib/gc/Dialect/Microkernel/MicrokernelOps.cpp b/lib/gc/Dialect/Microkernel/MicrokernelOps.cpp index 785a5bc03..f8fc07bee 100644 --- a/lib/gc/Dialect/Microkernel/MicrokernelOps.cpp +++ b/lib/gc/Dialect/Microkernel/MicrokernelOps.cpp @@ -551,11 +551,11 @@ static LogicalResult verifyBrgemmDataTypes(ArrayAttr dtypes, auto context = op.getContext(); -#define FTAttr(t) TypeAttr::get(FloatType::get##t(context)) +#define FTAttr(t) TypeAttr::get(t::get(context)) #define ITAttr(s, w) TypeAttr::get(IntegerType::get(context, w, IntegerType::s)) SmallVector> validDataTypes = { - {FTAttr(F32), FTAttr(F32)}, - {FTAttr(BF16), FTAttr(BF16)}, + {FTAttr(Float32Type), FTAttr(Float32Type)}, + {FTAttr(BFloat16Type), FTAttr(BFloat16Type)}, {ITAttr(Unsigned, 8), ITAttr(Signed, 8)}, {ITAttr(Signed, 8), ITAttr(Unsigned, 8)}, {ITAttr(Unsigned, 8), ITAttr(Unsigned, 8)}, diff --git a/lib/gc/ExecutionEngine/GPURuntime/ocl/GpuOclRuntime.cpp b/lib/gc/ExecutionEngine/GPURuntime/ocl/GpuOclRuntime.cpp index 2c48c214e..dfdf366d9 100644 --- a/lib/gc/ExecutionEngine/GPURuntime/ocl/GpuOclRuntime.cpp +++ b/lib/gc/ExecutionEngine/GPURuntime/ocl/GpuOclRuntime.cpp @@ -718,7 +718,7 @@ StringRef createStaticMain(OpBuilder &builder, ModuleOp &module, auto offsetPtr = constArgs.end(); constArgs.emplace_back(0); constArgs.append(shape.begin(), shape.end()); - if (failed(getStridesAndOffset(type, constArgs, *offsetPtr))) { + if (failed(type.getStridesAndOffset(constArgs, *offsetPtr))) { gcLogD("Failed to get strides and offset of arg", i, " of the function ", funcName.begin()); return {}; @@ -929,8 +929,9 @@ OclModuleBuilder::build(const OclRuntime::Ext &ext) { builder.getI64IntegerAttr(static_cast(wgSize))); TargetDeviceSpecInterface devSpec = TargetDeviceSpecAttr::get(ctx, dltiAttrs); - auto sysSpec = - TargetSystemSpecAttr::get(ctx, ArrayRef(std::pair(devStr, devSpec))); + DataLayoutEntryInterface dl = + DataLayoutEntryAttr::get(ctx, devStr, devSpec); + auto sysSpec = TargetSystemSpecAttr::get(ctx, ArrayRef(dl)); mod = mlirModule.clone(); mod.getOperation()->setAttr("#dlti.sys_spec", sysSpec); PassManager pm{ctx}; diff --git a/lib/gc/Transforms/DecomposeAggregatedOps.cpp b/lib/gc/Transforms/DecomposeAggregatedOps.cpp index a9cf889a9..3f84a8b3a 100644 --- a/lib/gc/Transforms/DecomposeAggregatedOps.cpp +++ b/lib/gc/Transforms/DecomposeAggregatedOps.cpp @@ -42,7 +42,7 @@ struct DecomposeAggregatedOps void runOnOperation() override { RewritePatternSet patterns(getOperation().getContext()); patterns.add(patterns.getContext()); - (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); + (void)applyPatternsGreedily(getOperation(), std::move(patterns)); } }; diff --git a/lib/gc/Transforms/DecomposeTensorOperation.cpp b/lib/gc/Transforms/DecomposeTensorOperation.cpp index 3f4f4ecf9..758d97717 100644 --- a/lib/gc/Transforms/DecomposeTensorOperation.cpp +++ b/lib/gc/Transforms/DecomposeTensorOperation.cpp @@ -170,8 +170,7 @@ struct DecomposeTensorOperationPass patterns.add(patterns.getContext()); tensor::populateDecomposeTensorConcatPatterns(patterns); - if (failed(applyPatternsAndFoldGreedily(getOperation(), - std::move(patterns)))) { + if (failed(applyPatternsGreedily(getOperation(), std::move(patterns)))) { return signalPassFailure(); } } diff --git a/lib/gc/Transforms/DeepTileContractionOp.cpp b/lib/gc/Transforms/DeepTileContractionOp.cpp index 21de7b778..c53138f44 100644 --- a/lib/gc/Transforms/DeepTileContractionOp.cpp +++ b/lib/gc/Transforms/DeepTileContractionOp.cpp @@ -405,7 +405,7 @@ generateOuterLoop(RewriterBase &b, linalg::LinalgOp linalgOp, // the extra copy generated by bufferization. So remove the dummy loop // at this early stage. if (!isDummyLoop(tilingResult->loops.back())) { - b.replaceOp(currentOp, tilingResult->replacements); + b.replaceOp(currentOp, tilingResult->mergeResult.replacements); currentOp = dyn_cast(tilingResult->tiledOps.back()); if (iteratorTypes[d] == mlir::utils::IteratorType::reduction) result.reductionLoops.push_back(tilingResult->loops.back()); @@ -477,7 +477,7 @@ generateOuterLoop(RewriterBase &b, linalg::LinalgOp linalgOp, b, cast(currentOp.getOperation()), tileOption); if (failed(tilingResult)) return failure(); - b.replaceOp(currentOp, tilingResult->replacements); + b.replaceOp(currentOp, tilingResult->mergeResult.replacements); currentOp = dyn_cast(tilingResult->tiledOps.back()); } } @@ -1029,8 +1029,7 @@ struct DeepTileContractionOp dialect->getCanonicalizationPatterns(patterns); for (RegisteredOperationName op : ctx.getRegisteredOperations()) op.getCanonicalizationPatterns(patterns, &ctx); - if (failed( - applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)))) + if (failed(applyPatternsGreedily(getOperation(), std::move(patterns)))) return signalPassFailure(); } }; diff --git a/lib/gc/Transforms/FoldTensorOperation.cpp b/lib/gc/Transforms/FoldTensorOperation.cpp index e0bf23abb..abd84ab16 100644 --- a/lib/gc/Transforms/FoldTensorOperation.cpp +++ b/lib/gc/Transforms/FoldTensorOperation.cpp @@ -44,8 +44,7 @@ struct FoldTensorOperationPass // Use to remove useless tensor operation like extract or // insert slice. config.strictMode = GreedyRewriteStrictness::ExistingOps; - (void)applyPatternsAndFoldGreedily(getOperation(), std::move(pattern), - config); + (void)applyPatternsGreedily(getOperation(), std::move(pattern), config); } }; } // namespace diff --git a/lib/gc/Transforms/GPU/AllocsToSLM.cpp b/lib/gc/Transforms/GPU/AllocsToSLM.cpp index 46ec2a4ad..06c4dce6b 100644 --- a/lib/gc/Transforms/GPU/AllocsToSLM.cpp +++ b/lib/gc/Transforms/GPU/AllocsToSLM.cpp @@ -152,7 +152,7 @@ struct AllocsToSLM : public gc::impl::AllocsToSLMBase { RewritePatternSet patterns(ctx); patterns.add(patterns.getContext()); - (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); + (void)applyPatternsGreedily(getOperation(), std::move(patterns)); } }; diff --git a/lib/gc/Transforms/GPU/IMEX/LinalgToXeGPU.cpp b/lib/gc/Transforms/GPU/IMEX/LinalgToXeGPU.cpp index bc78fe937..8edeca784 100644 --- a/lib/gc/Transforms/GPU/IMEX/LinalgToXeGPU.cpp +++ b/lib/gc/Transforms/GPU/IMEX/LinalgToXeGPU.cpp @@ -2124,17 +2124,17 @@ struct LinalgToXeGPU : public gc::impl::LinalgToXeGPUBase { // Run GEMM pattern first to allow fusion with its consumers. RewritePatternSet gemmPatterns(&getContext()); populateLinalgGemmToXeGPUPatterns(gemmPatterns, options); - (void)applyPatternsAndFoldGreedily(getOperation(), std::move(gemmPatterns)); + (void)applyPatternsGreedily(getOperation(), std::move(gemmPatterns)); // Convert memory fill ops. RewritePatternSet fillPatterns(&getContext()); populateLinalgMemoryFillToXeGPUPatterns(fillPatterns, options); - (void)applyPatternsAndFoldGreedily(getOperation(), std::move(fillPatterns)); + (void)applyPatternsGreedily(getOperation(), std::move(fillPatterns)); // Convert other remaining ops. RewritePatternSet patterns(&getContext()); populateLinalgEltwiseToXeGPUPatterns(patterns, options); - (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); + (void)applyPatternsGreedily(getOperation(), std::move(patterns)); } }; diff --git a/lib/gc/Transforms/GPU/Pipeline.cpp b/lib/gc/Transforms/GPU/Pipeline.cpp index 5386fbf38..f90d9f562 100644 --- a/lib/gc/Transforms/GPU/Pipeline.cpp +++ b/lib/gc/Transforms/GPU/Pipeline.cpp @@ -154,7 +154,8 @@ void populateGPUPipeline(OpPassManager &pm, pm.addPass(createGpuKernelOutliningPass()); pm.addPass(createConvertXeVMToLLVMPass()); pm.addPass(createGpuXeVMAttachTarget()); - pm.addNestedPass(createConvertGpuOpsToLLVMSPVOps()); + pm.addNestedPass( + createConvertGpuOpsToLLVMSPVOps({.use64bitIndex = true})); pm.addNestedPass(createConvertIndexToLLVMPass()); pm.addNestedPass(createArithToLLVMConversionPass()); pm.addPass(createReconcileUnrealizedCastsPass()); diff --git a/lib/gc/Transforms/IterativeTilingAndFusion.cpp b/lib/gc/Transforms/IterativeTilingAndFusion.cpp index a486c29b0..d492e01e2 100644 --- a/lib/gc/Transforms/IterativeTilingAndFusion.cpp +++ b/lib/gc/Transforms/IterativeTilingAndFusion.cpp @@ -813,7 +813,7 @@ void iterativeTilingAndFusionUntilExhaustion( defaultTilingOfType(rewriter, op, isaOpTy, cfg); if (succeeded(tilingResult)) { tiledOps.insert(tilingResult->tiledOps[0]); - rewriter.replaceOp(op, tilingResult->replacements); + rewriter.replaceOp(op, tilingResult->mergeResult.replacements); break; } } diff --git a/lib/gc/Transforms/LowerToTileVector.cpp b/lib/gc/Transforms/LowerToTileVector.cpp index d105eaeb8..9690b2461 100644 --- a/lib/gc/Transforms/LowerToTileVector.cpp +++ b/lib/gc/Transforms/LowerToTileVector.cpp @@ -614,8 +614,7 @@ struct LowerToTileVectorPass // Init patterns use to remove useless tensor operation like extract or // insert slice. configInit.strictMode = GreedyRewriteStrictness::ExistingOps; - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patternsInit), - configInit); + (void)applyPatternsGreedily(funcOp, std::move(patternsInit), configInit); RewritePatternSet firstPatterns(ctx); // All the dynamic shape will reject to lower. @@ -623,8 +622,8 @@ struct LowerToTileVectorPass GreedyRewriteConfig configFirstPn; // We only apply the lowering pattern on existing operations configFirstPn.strictMode = GreedyRewriteStrictness::ExistingOps; - (void)applyPatternsAndFoldGreedily(funcOp, std::move(firstPatterns), - configFirstPn); + (void)applyPatternsGreedily(funcOp, std::move(firstPatterns), + configFirstPn); // Error case: // ``` // linalg.copy : <1x32xf32> @@ -649,10 +648,10 @@ struct LowerToTileVectorPass vector::populateVectorTransferPermutationMapLoweringPatterns(secondPattern); // Remove unnessary broadcast operation vector::populateSinkVectorOpsPatterns(secondPattern); - // Second fold (with the help of the `applyPatternsAndFoldGreedily` + // Second fold (with the help of the `applyPatternsGreedily` // function) can help us to eliminate redundant operation like consecutive // read and write. - (void)applyPatternsAndFoldGreedily(funcOp, std::move(secondPattern)); + (void)applyPatternsGreedily(funcOp, std::move(secondPattern)); // may need other patterns to reduce redundant operations } }; diff --git a/lib/gc/Transforms/MemRefToCPURuntime.cpp b/lib/gc/Transforms/MemRefToCPURuntime.cpp index d18506e54..2498ad83a 100644 --- a/lib/gc/Transforms/MemRefToCPURuntime.cpp +++ b/lib/gc/Transforms/MemRefToCPURuntime.cpp @@ -51,7 +51,7 @@ uint64_t getMemRefSizeInBytes(MemRefType memrefType) { if (!layout.isIdentity()) { int64_t offset; SmallVector strides; - if (failed(getStridesAndOffset(memrefType, strides, offset))) { + if (failed(memrefType.getStridesAndOffset(strides, offset))) { return UINT64_MAX; } diff --git a/lib/gc/Transforms/MergeNestedForall.cpp b/lib/gc/Transforms/MergeNestedForall.cpp index 07eb5ffbf..bd35e2e9d 100644 --- a/lib/gc/Transforms/MergeNestedForall.cpp +++ b/lib/gc/Transforms/MergeNestedForall.cpp @@ -82,8 +82,7 @@ struct MergeNestedForall patterns.add(patterns.getContext()); - if (failed( - applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)))) + if (failed(applyPatternsGreedily(getOperation(), std::move(patterns)))) return signalPassFailure(); } }; diff --git a/lib/gc/Transforms/Microkernel/ConvertLinalgToMicrokernel.cpp b/lib/gc/Transforms/Microkernel/ConvertLinalgToMicrokernel.cpp index 0eabd6e1b..c312abe6f 100644 --- a/lib/gc/Transforms/Microkernel/ConvertLinalgToMicrokernel.cpp +++ b/lib/gc/Transforms/Microkernel/ConvertLinalgToMicrokernel.cpp @@ -391,7 +391,7 @@ class ConvertLinalgToMicrokernel patterns.add>( &getContext()); FrozenRewritePatternSet patternSet(std::move(patterns)); - if (failed(applyPatternsAndFoldGreedily(getOperation(), patternSet))) + if (failed(applyPatternsGreedily(getOperation(), patternSet))) signalPassFailure(); } }; diff --git a/lib/gc/Transforms/Microkernel/ConvertMicrokernelToDnnlFunc.cpp b/lib/gc/Transforms/Microkernel/ConvertMicrokernelToDnnlFunc.cpp index 647d8f784..8a5d97f0a 100644 --- a/lib/gc/Transforms/Microkernel/ConvertMicrokernelToDnnlFunc.cpp +++ b/lib/gc/Transforms/Microkernel/ConvertMicrokernelToDnnlFunc.cpp @@ -63,7 +63,7 @@ class ConvertBrgemmDispatchOpRewriter SmallVector operands; SmallVector operandTypes; IntegerType integer64 = IntegerType::get(rewriter.getContext(), 64); - FloatType float32 = FloatType::getF32(rewriter.getContext()); + FloatType float32 = Float32Type::get(rewriter.getContext()); // M, N, K, LDA, LDB, LDC, stride_a, stride_b // they are in the same order with BrgemmDispatchOp inputs @@ -215,7 +215,7 @@ class ConvertMicrokernelToDnnlFunc &getContext()); FrozenRewritePatternSet patternSet(std::move(patterns)); - if (failed(applyPatternsAndFoldGreedily(getOperation(), patternSet))) + if (failed(applyPatternsGreedily(getOperation(), patternSet))) signalPassFailure(); } }; diff --git a/lib/gc/Transforms/Microkernel/EarlyDispatchMicrokernel.cpp b/lib/gc/Transforms/Microkernel/EarlyDispatchMicrokernel.cpp index 2f66feee4..058d55357 100644 --- a/lib/gc/Transforms/Microkernel/EarlyDispatchMicrokernel.cpp +++ b/lib/gc/Transforms/Microkernel/EarlyDispatchMicrokernel.cpp @@ -205,8 +205,7 @@ class EarlyDispatchMicrokernel // Ignore newly created Ops GreedyRewriteConfig config; config.strictMode = GreedyRewriteStrictness::ExistingOps; - if (failed( - applyPatternsAndFoldGreedily(getOperation(), patternSet, config))) + if (failed(applyPatternsGreedily(getOperation(), patternSet, config))) signalPassFailure(); } }; diff --git a/lib/gc/Transforms/Microkernel/ExpandMicrokernel.cpp b/lib/gc/Transforms/Microkernel/ExpandMicrokernel.cpp index 9e58a76cf..164edb609 100644 --- a/lib/gc/Transforms/Microkernel/ExpandMicrokernel.cpp +++ b/lib/gc/Transforms/Microkernel/ExpandMicrokernel.cpp @@ -275,7 +275,7 @@ class ExpandMicrokernel patterns.add(&getContext()); FrozenRewritePatternSet patternSet(std::move(patterns)); - if (failed(applyPatternsAndFoldGreedily(getOperation(), patternSet))) + if (failed(applyPatternsGreedily(getOperation(), patternSet))) signalPassFailure(); } }; diff --git a/lib/gc/Transforms/Microkernel/MergeBranchMicrokernelContext.cpp b/lib/gc/Transforms/Microkernel/MergeBranchMicrokernelContext.cpp index 9865f5220..59554ef67 100644 --- a/lib/gc/Transforms/Microkernel/MergeBranchMicrokernelContext.cpp +++ b/lib/gc/Transforms/Microkernel/MergeBranchMicrokernelContext.cpp @@ -296,7 +296,7 @@ class MergeBranchMicrokernelContext patterns.add(&getContext(), dispatchAnalysis); FrozenRewritePatternSet patternSet(std::move(patterns)); - if (failed(applyPatternsAndFoldGreedily(getOperation(), patternSet))) { + if (failed(applyPatternsGreedily(getOperation(), patternSet))) { signalPassFailure(); } } diff --git a/lib/gc/Transforms/Microkernel/MicrokernelInvariantCodeMotion.cpp b/lib/gc/Transforms/Microkernel/MicrokernelInvariantCodeMotion.cpp index ad8a0631f..4363795ca 100644 --- a/lib/gc/Transforms/Microkernel/MicrokernelInvariantCodeMotion.cpp +++ b/lib/gc/Transforms/Microkernel/MicrokernelInvariantCodeMotion.cpp @@ -421,8 +421,7 @@ class MicrokernelInvariantCodeMotion // Ignore newly created Ops GreedyRewriteConfig config; config.strictMode = GreedyRewriteStrictness::ExistingOps; - if (failed( - applyPatternsAndFoldGreedily(getOperation(), patternSet, config))) { + if (failed(applyPatternsGreedily(getOperation(), patternSet, config))) { signalPassFailure(); } } diff --git a/lib/gc/Transforms/OneDNNGraphToLinalg.cpp b/lib/gc/Transforms/OneDNNGraphToLinalg.cpp index 5a75c37cd..138d3176d 100644 --- a/lib/gc/Transforms/OneDNNGraphToLinalg.cpp +++ b/lib/gc/Transforms/OneDNNGraphToLinalg.cpp @@ -515,8 +515,7 @@ struct ConvertOneDNNGraphToLinalg MatMulOpBatchFlatten // clang-format on >(ctx); - if (failed(applyPatternsAndFoldGreedily(getOperation(), - std::move(patternsPre)))) { + if (failed(applyPatternsGreedily(getOperation(), std::move(patternsPre)))) { signalPassFailure(); } // ========================================== diff --git a/lib/gc/Transforms/Utils/ValueUtils.cpp b/lib/gc/Transforms/Utils/ValueUtils.cpp index c6285df18..6db2fa5df 100644 --- a/lib/gc/Transforms/Utils/ValueUtils.cpp +++ b/lib/gc/Transforms/Utils/ValueUtils.cpp @@ -110,7 +110,7 @@ FailureOr> getStrides(Value value) { auto memrefType = cast(valueType); SmallVector strides; int64_t offset; - if (failed(getStridesAndOffset(memrefType, strides, offset))) + if (failed(memrefType.getStridesAndOffset(strides, offset))) return failure(); return strides; } diff --git a/src/dnnl/JsonParser.h b/src/dnnl/JsonParser.h index 6d9bc2893..9615219d8 100644 --- a/src/dnnl/JsonParser.h +++ b/src/dnnl/JsonParser.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions * and limitations under the License. - * * SPDX-License-Identifier: Apache-2.0 */ @@ -179,8 +178,8 @@ class JsonParser { GC_DTYPE("u8", b.getIntegerType(8, true)), GC_DTYPE("f64", b.getF64Type()), GC_DTYPE("boolean", b.getI1Type()), - GC_DTYPE("f8_e5m2", b.getFloat8E5M2Type()), - GC_DTYPE("f8_e4m3", b.getFloat8E4M3FNType()), + GC_DTYPE("f8_e5m2", mlir::Float8E5M2Type::get(b.getContext())), + GC_DTYPE("f8_e4m3", mlir::Float8E4M3Type::get(b.getContext())), GC_DTYPE("s4", b.getIntegerType(4, false)), GC_DTYPE("u4", b.getIntegerType(4, true)), }; diff --git a/test/benchgc/src/benchgc/mlir/util.py b/test/benchgc/src/benchgc/mlir/util.py index 9ff5b8f45..26c2c1e50 100644 --- a/test/benchgc/src/benchgc/mlir/util.py +++ b/test/benchgc/src/benchgc/mlir/util.py @@ -187,12 +187,12 @@ def attach_dlti(flags: argparse.Namespace, module: ir.Module): dlti_template = f""" module attributes {{ dlti.target_system_spec = #dlti.target_system_spec< - "CPU": #dlti.target_device_spec< - #dlti.dl_entry<"L1_cache_size_in_bytes", {l1_data_cache_size} : ui32>, - #dlti.dl_entry<"L2_cache_size_in_bytes", {l2_cache_size} : ui64>, - #dlti.dl_entry<"L3_cache_size_in_bytes", {l3_cache_size} : ui64>, - #dlti.dl_entry<"num_threads", {num_threads} : i32>, - #dlti.dl_entry<"max_vector_width", {max_vector_width} : i64>> + "CPU" = #dlti.target_device_spec< + "L1_cache_size_in_bytes" = {l1_data_cache_size} : ui32, + "L2_cache_size_in_bytes" = {l2_cache_size} : ui64, + "L3_cache_size_in_bytes" = {l3_cache_size} : ui64, + "num_threads" = {num_threads} : i32>, + "max_vector_width" = {max_vector_width} : i64> >}} {{}} """ with module.context: diff --git a/test/mlir/test/gc/Dialect/CPURuntime/cpu-runner/allocators.mlir b/test/mlir/test/gc/Dialect/CPURuntime/cpu-runner/allocators.mlir index 399467290..35666487a 100644 --- a/test/mlir/test/gc/Dialect/CPURuntime/cpu-runner/allocators.mlir +++ b/test/mlir/test/gc/Dialect/CPURuntime/cpu-runner/allocators.mlir @@ -1,3 +1,4 @@ +// UNSUPPORTED: target={{.*}} // RUN: gc-opt %s --finalize-memref-to-llvm --convert-scf-to-cf --convert-cpuruntime-to-llvm --convert-func-to-llvm --reconcile-unrealized-casts | gc-cpu-runner -e main -entry-point-result=void -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils | FileCheck %s module { diff --git a/test/mlir/test/gc/Dialect/CPURuntime/memref-to-cpuruntime.mlir b/test/mlir/test/gc/Dialect/CPURuntime/memref-to-cpuruntime.mlir index c32cb618e..b25562fc0 100644 --- a/test/mlir/test/gc/Dialect/CPURuntime/memref-to-cpuruntime.mlir +++ b/test/mlir/test/gc/Dialect/CPURuntime/memref-to-cpuruntime.mlir @@ -1,3 +1,4 @@ +// UNSUPPORTED: target={{.*}} // RUN: gc-opt --split-input-file --convert-memref-to-cpuruntime %s -verify-diagnostics | FileCheck %s func.func @alloca() { diff --git a/test/mlir/test/gc/Transforms/GPU/module-to-binary-xevm.mlir b/test/mlir/test/gc/Transforms/GPU/module-to-binary-xevm.mlir index 3b3f4a26e..444edcda4 100644 --- a/test/mlir/test/gc/Transforms/GPU/module-to-binary-xevm.mlir +++ b/test/mlir/test/gc/Transforms/GPU/module-to-binary-xevm.mlir @@ -1,4 +1,4 @@ -// RUN: gc-opt %s --gpu-to-llvm --convert-gpu-to-llvm-spv --gpu-module-to-binary | FileCheck %s +// RUN: gc-opt %s --gpu-to-llvm --convert-gpu-to-llvm-spv='use-64bit-index=true' --gpu-module-to-binary | FileCheck %s module attributes {gpu.container_module} { // CHECK-LABEL:gpu.binary @entry_kernel diff --git a/test/mlir/test/gc/Transforms/deepTileContractionNamedOp.mlir b/test/mlir/test/gc/Transforms/deepTileContractionNamedOp.mlir index 61848dcb7..ccb9ca418 100644 --- a/test/mlir/test/gc/Transforms/deepTileContractionNamedOp.mlir +++ b/test/mlir/test/gc/Transforms/deepTileContractionNamedOp.mlir @@ -150,12 +150,12 @@ func.func @matmul_2Dx4D_bf16(%arg0: tensor<4096x4096xbf16>, %arg1: tensor<128x12 module attributes { dlti.target_system_spec = #dlti.target_system_spec< - "CPU": #dlti.target_device_spec< - #dlti.dl_entry<"L1_cache_size_in_bytes", 49152 : i32>, - #dlti.dl_entry<"L2_cache_size_in_bytes", 2097152 : i32>, - #dlti.dl_entry<"L3_cache_size_in_bytes", 110100480 : i32>, - #dlti.dl_entry<"num_threads", 56 : i32>, - #dlti.dl_entry<"max_vector_width", 512 : i32>> + "CPU" = #dlti.target_device_spec< + "L1_cache_size_in_bytes" = 49152 : i32, + "L2_cache_size_in_bytes" = 2097152 : i32, + "L3_cache_size_in_bytes" = 110100480 : i32, + "num_threads" = 56 : i32, + "max_vector_width" = 512 : i32> >} { // CHECK: #[[mapA:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3 * 2 + d4)> // CHECK: #[[mapB:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2, d4)> diff --git a/test/mlir/test/gc/cpu-runner/GPU/xevm_block_dpas.mlir b/test/mlir/test/gc/cpu-runner/GPU/xevm_block_dpas.mlir index 3f28e68bc..282277d71 100644 --- a/test/mlir/test/gc/cpu-runner/GPU/xevm_block_dpas.mlir +++ b/test/mlir/test/gc/cpu-runner/GPU/xevm_block_dpas.mlir @@ -1,4 +1,4 @@ -// RUN: gc-opt %s --convert-xevm-to-llvm --xevm-attach-target --convert-scf-to-cf --convert-cf-to-llvm --convert-arith-to-llvm --convert-gpu-to-llvm-spv --gpu-to-llvm --reconcile-unrealized-casts --cse --gpu-module-to-binary | gc-cpu-runner -e main -entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s +// RUN: gc-opt %s --convert-xevm-to-llvm --xevm-attach-target --convert-scf-to-cf --convert-cf-to-llvm --convert-arith-to-llvm --convert-gpu-to-llvm-spv='use-64bit-index=true' --gpu-to-llvm --reconcile-unrealized-casts --cse --gpu-module-to-binary | gc-cpu-runner -e main -entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s module @gemm attributes {gpu.container_module} { gpu.module @kernel { diff --git a/test/mlir/test/gc/cpu-runner/GPU/xevm_block_load_store.mlir b/test/mlir/test/gc/cpu-runner/GPU/xevm_block_load_store.mlir index f4bb29f2a..c114673c3 100644 --- a/test/mlir/test/gc/cpu-runner/GPU/xevm_block_load_store.mlir +++ b/test/mlir/test/gc/cpu-runner/GPU/xevm_block_load_store.mlir @@ -1,4 +1,4 @@ -// RUN: gc-opt %s --convert-xevm-to-llvm --xevm-attach-target --convert-scf-to-cf --convert-cf-to-llvm --convert-arith-to-llvm --convert-gpu-to-llvm-spv --gpu-to-llvm --reconcile-unrealized-casts --cse --gpu-module-to-binary | gc-cpu-runner -e main -entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s +// RUN: gc-opt %s --convert-xevm-to-llvm --xevm-attach-target --convert-scf-to-cf --convert-cf-to-llvm --convert-arith-to-llvm --convert-gpu-to-llvm-spv='use-64bit-index=true' --gpu-to-llvm --reconcile-unrealized-casts --cse --gpu-module-to-binary | gc-cpu-runner -e main -entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s module @gemm attributes {gpu.container_module} { @@ -23,8 +23,9 @@ module @gemm attributes {gpu.container_module} { %loaded = xevm.blockload2d %src, %base_width, %base_height, %base_pitch, %x, %y {elem_size_in_bits=32, tile_width=16, tile_height=8, v_blocks=1, transpose=false, vnni_transform=false, l1_cache_control=Default, l3_cache_control=Default} : (!llvm.ptr<1>, i32, i32, i32, i32, i32) -> vector<8xi32> %loaded_f32 = vector.bitcast %loaded : vector<8xi32> to vector<8xf32> %c0 = arith.constant 0 : i32 - %thread_x = gpu.thread_id x - %thread_x_i32 = arith.index_cast %thread_x : index to i32 + %thread_x = gpu.thread_id x + %thread_x_i64 = arith.index_cast %thread_x : index to i64 + %thread_x_i32 = llvm.trunc %thread_x_i64 : i64 to i32 %thread_x_f32 = arith.sitofp %thread_x_i32 : i32 to f32 %loaded_f32_modified = vector.insertelement %thread_x_f32, %loaded_f32[%c0 : i32] : vector<8xf32> %loaded_modified = vector.bitcast %loaded_f32_modified : vector<8xf32> to vector<8xi32> diff --git a/test/mlir/test/gc/cpu-runner/tid.mlir b/test/mlir/test/gc/cpu-runner/tid.mlir index aedcc0a20..ff0fcd451 100644 --- a/test/mlir/test/gc/cpu-runner/tid.mlir +++ b/test/mlir/test/gc/cpu-runner/tid.mlir @@ -1,3 +1,4 @@ +// UNSUPPORTED: target={{.*}} // RUN: gc-opt %s --convert-cpuruntime-to-llvm --convert-openmp-to-llvm --convert-func-to-llvm --convert-arith-to-llvm --convert-cf-to-llvm --reconcile-unrealized-casts | gc-cpu-runner -e main -entry-point-result=void | FileCheck %s module { func.func private @omp_get_thread_num() -> i32 diff --git a/test/mlir/unittests/Analysis/TargetDescriptionAnalysisTest.cpp b/test/mlir/unittests/Analysis/TargetDescriptionAnalysisTest.cpp index a3ba8261b..518c50526 100644 --- a/test/mlir/unittests/Analysis/TargetDescriptionAnalysisTest.cpp +++ b/test/mlir/unittests/Analysis/TargetDescriptionAnalysisTest.cpp @@ -26,12 +26,12 @@ using namespace mlir; static const char code1[] = R"mlir( module attributes { dlti.target_system_spec = #dlti.target_system_spec< -"CPU": #dlti.target_device_spec< - #dlti.dl_entry<"L1_cache_size_in_bytes", 49152 : ui32>, - #dlti.dl_entry<"L2_cache_size_in_bytes", 2097152 : ui64>, - #dlti.dl_entry<"L3_cache_size_in_bytes", "110100480">, - #dlti.dl_entry<"num_threads", 56 : i32>, - #dlti.dl_entry<"max_vector_width", 512 : i64>> +"CPU" = #dlti.target_device_spec< + "L1_cache_size_in_bytes" = 49152 : ui32, + "L2_cache_size_in_bytes" = 2097152 : ui64, + "L3_cache_size_in_bytes" = "110100480", + "num_threads" = 56 : i32, + "max_vector_width" = 512 : i64> >} {} )mlir"; @@ -56,9 +56,9 @@ TEST(TargetDescriptionAnalysis, CPUNormal) { static const char code2[] = R"mlir( module attributes { dlti.target_system_spec = #dlti.target_system_spec< -"CPU": #dlti.target_device_spec< - #dlti.dl_entry<"L1_cache_size_in_bytes", 49152 : ui32>, - #dlti.dl_entry<"L2_cache_size_in_bytes", 2097152 : ui32>> +"CPU" = #dlti.target_device_spec< + "L1_cache_size_in_bytes" = 49152 : ui32>, + "L2_cache_size_in_bytes" = 2097152 : ui32> >} {} )mlir"; diff --git a/test/mlir/unittests/ExecutionEngine/IMEX/IMEXGpuOclRuntimeTest.cpp b/test/mlir/unittests/ExecutionEngine/IMEX/IMEXGpuOclRuntimeTest.cpp index d2d15d8a4..ba92536e7 100644 --- a/test/mlir/unittests/ExecutionEngine/IMEX/IMEXGpuOclRuntimeTest.cpp +++ b/test/mlir/unittests/ExecutionEngine/IMEX/IMEXGpuOclRuntimeTest.cpp @@ -62,7 +62,7 @@ module @test { )mlir"; constexpr char matmulAddStatic[] = R"mlir( -module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"GPU" : #dlti.target_device_spec<#dlti.dl_entry<"max_work_group_size", 16 : i64>>>} { +module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"GPU" = #dlti.target_device_spec<"max_work_group_size" = 16 : i64>>} { func.func @entry(%arg0: memref<128x256xf16>, %arg1: memref<256x256xf16>, %arg2: memref<128x256xf16>) { %0 = bufferization.to_tensor %arg0 restrict : memref<128x256xf16> %1 = bufferization.to_tensor %arg1 restrict : memref<256x256xf16>