Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
replace sysDesc with target info
Browse files Browse the repository at this point in the history
zhczhong committed Aug 6, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent efc2d86 commit 51527c0
Showing 3 changed files with 24 additions and 75 deletions.
56 changes: 0 additions & 56 deletions include/gc/Analysis/MatmulConfigAnalysis.h
Original file line number Diff line number Diff line change
@@ -19,62 +19,6 @@ namespace gc {

using namespace mlir;

struct SystemDesc {
// get runtime OMP_NUM_THREADS
uint32_t getNumThreads() {
std::optional<Attribute> numThreads = layout.getDevicePropertyValue(
Builder(ctx).getStringAttr("CPU" /* device ID*/),
Builder(ctx).getStringAttr("num_threads"));
if (numThreads && isa<IntegerAttr>(*numThreads)) {
return dyn_cast<IntegerAttr>(*numThreads).getInt();
}
return 1;
}
// get cache size by cacheLevel
size_t getCacheSize(uint8_t cacheLevel) {
if (cacheLevel == 1) {
std::optional<Attribute> cacheSize = layout.getDevicePropertyValue(
Builder(ctx).getStringAttr("CPU" /* device ID*/),
Builder(ctx).getStringAttr("L1_cache_size_in_bytes"));
if (cacheSize && isa<IntegerAttr>(*cacheSize)) {
return dyn_cast<IntegerAttr>(*cacheSize).getInt();
}
} else if (cacheLevel == 2) {
std::optional<Attribute> cacheSize = layout.getDevicePropertyValue(
Builder(ctx).getStringAttr("CPU" /* device ID*/),
Builder(ctx).getStringAttr("L2_cache_size_in_bytes"));
if (cacheSize && isa<IntegerAttr>(*cacheSize)) {
return dyn_cast<IntegerAttr>(*cacheSize).getInt();
}
} else if (cacheLevel == 3) {
std::optional<Attribute> cacheSize = layout.getDevicePropertyValue(
Builder(ctx).getStringAttr("CPU" /* device ID*/),
Builder(ctx).getStringAttr("L3_cache_size_in_bytes"));
if (cacheSize && isa<IntegerAttr>(*cacheSize)) {
return dyn_cast<IntegerAttr>(*cacheSize).getInt();
}
}
return 0;
}

// get the maximum vector length in bits
size_t getMaxVectorLength() {
std::optional<Attribute> maxVectorLength = layout.getDevicePropertyValue(
Builder(ctx).getStringAttr("CPU" /* device ID*/),
Builder(ctx).getStringAttr("max_vector_width"));
if (maxVectorLength && isa<IntegerAttr>(*maxVectorLength)) {
return dyn_cast<IntegerAttr>(*maxVectorLength).getInt();
}
return 512;
}

SystemDesc(ModuleOp m) : layout(m), ctx(m->getContext()) {}

private:
DataLayout layout;
MLIRContext *ctx;
};

// The configuration for matmul tiling
// TODO: support batch matmul
struct MatmulConfig {
42 changes: 23 additions & 19 deletions lib/gc/Analysis/MatmulConfigAnalysis.cpp
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include "gc/Analysis/MatmulConfigAnalysis.h"
#include "gc/Analysis/TargetDescriptionAnalysis.h"
#include <limits>
#include <llvm/Support/Debug.h>

@@ -64,7 +65,8 @@ getCandidate(uint32_t num, uint32_t floor,
}

// check if the threads are valid
bool validateThreads(ArrayRef<uint32_t> threads, SystemDesc &sysDesc) {
bool validateThreads(ArrayRef<uint32_t> threads,
CPUTargetDescriptionAnalysis &sysDesc) {
uint32_t numThreads = sysDesc.getNumThreads();
uint32_t actualThreads = 1U;
for (uint32_t t : threads)
@@ -77,24 +79,25 @@ bool validateThreads(ArrayRef<uint32_t> threads, SystemDesc &sysDesc) {
double vectorRegEfficiencyCost(linalg::LinalgOp &linalgOp,
ArrayRef<uint32_t> shape,
const MatmulConfig &config,
SystemDesc &sysDesc) {
CPUTargetDescriptionAnalysis &sysDesc) {
size_t dtypeSize = DataLayout().getTypeSizeInBits(
ShapeAdaptor(linalgOp.getDpsInputs()[1].getType()).getElementType());
size_t maxVectorLength = sysDesc.getMaxVectorLength() / dtypeSize;
size_t maxVectorWidth = sysDesc.getMaxVectorWidth() / dtypeSize;
// TODO: take matrix register like amx into account
double cost = (maxVectorLength - config.innerMostMBlock % maxVectorLength) %
maxVectorLength * 1.0 / config.innerMostMBlock +
(maxVectorLength - config.innerMostKBlock % maxVectorLength) %
maxVectorLength * 1.0 / config.innerMostKBlock +
(maxVectorLength - config.innerMostNBlock % maxVectorLength) %
maxVectorLength * 1.0 / config.innerMostNBlock;
double cost = (maxVectorWidth - config.innerMostMBlock % maxVectorWidth) %
maxVectorWidth * 1.0 / config.innerMostMBlock +
(maxVectorWidth - config.innerMostKBlock % maxVectorWidth) %
maxVectorWidth * 1.0 / config.innerMostKBlock +
(maxVectorWidth - config.innerMostNBlock % maxVectorWidth) %
maxVectorWidth * 1.0 / config.innerMostNBlock;
return cost;
}

// calculate the cost of the workload balance
double workloadBalancedCost(linalg::LinalgOp &linalgOp,
ArrayRef<uint32_t> shape,
const MatmulConfig &config, SystemDesc &sysDesc) {
const MatmulConfig &config,
CPUTargetDescriptionAnalysis &sysDesc) {
if (shape.size() < 3) {
// Has an invalid shape
return 0;
@@ -118,7 +121,7 @@ double workloadBalancedCost(linalg::LinalgOp &linalgOp,
double memoryConsumptionOnThreadCost(linalg::LinalgOp &linalgOp,
ArrayRef<uint32_t> shape,
const MatmulConfig &config,
SystemDesc &sysDesc) {
CPUTargetDescriptionAnalysis &sysDesc) {
if (shape.size() < 3) {
// Has an invalid shape
return 0;
@@ -141,7 +144,7 @@ double memoryConsumptionOnThreadCost(linalg::LinalgOp &linalgOp,
double computationIntensityOnL2Cache(linalg::LinalgOp &linalgOp,
ArrayRef<uint32_t> shape,
const MatmulConfig &config,
SystemDesc &sysDesc) {
CPUTargetDescriptionAnalysis &sysDesc) {
double fullLoadRatio = 0.7;
uint32_t L2Cache = sysDesc.getCacheSize(2);
size_t dtypeSize = DataLayout().getTypeSize(
@@ -157,16 +160,17 @@ double computationIntensityOnL2Cache(linalg::LinalgOp &linalgOp,
return 1 / computationIntensity;
}

using CostModelFn =
std::function<double(linalg::LinalgOp &linalgOp, ArrayRef<uint32_t> shape,
MatmulConfig cfg, SystemDesc &sysDesc)>;
using CostModelFn = std::function<double(
linalg::LinalgOp &linalgOp, ArrayRef<uint32_t> shape, MatmulConfig cfg,
CPUTargetDescriptionAnalysis &sysDesc)>;

// filter the config by the cost model
std::vector<MatmulConfig>
filterConfigByCostModel(ArrayRef<MatmulConfig> configs,
linalg::LinalgOp &linalgOp, ArrayRef<uint32_t> shape,
SystemDesc &sysDesc, const CostModelFn &costModel,
float preserveRatio = 0.5, float threshold = -1) {
CPUTargetDescriptionAnalysis &sysDesc,
const CostModelFn &costModel, float preserveRatio = 0.5,
float threshold = -1) {
std::vector<MatmulConfig> result;
std::vector<float> costs;
std::vector<size_t> idx;
@@ -196,7 +200,7 @@ filterConfigByCostModel(ArrayRef<MatmulConfig> configs,

// prepare the config candidates
std::vector<MatmulConfig>
prepareConfigCandidates(Operation *root, SystemDesc &sysDesc,
prepareConfigCandidates(Operation *root, CPUTargetDescriptionAnalysis &sysDesc,
ArrayRef<uint32_t> shape,
ArrayRef<uint32_t> givenInnermostBlock) {
if (shape.size() < 3) {
@@ -347,7 +351,7 @@ bool readConfigFromAttrs(MatmulConfig &config, ArrayRef<NamedAttribute> attrs) {
// previous matmul
MatmulConfigAnalysis::MatmulConfigAnalysis(Operation *root) {
if (auto linalgOp = dyn_cast<linalg::LinalgOp>(root)) {
SystemDesc sysDesc(root->getParentOfType<ModuleOp>());
CPUTargetDescriptionAnalysis sysDesc(root);
SmallVector<SmallVector<DimType>> oprandDimType =
*getOprandDimType(linalgOp);
// get the origin M,N,K size
1 change: 1 addition & 0 deletions test/mlir/unittests/Analysis/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -3,5 +3,6 @@ add_mlir_unittest(GCAnalysisTests
)
target_link_libraries(GCAnalysisTests
PRIVATE
GcPasses
GcAnalysis
GcJitWrapper)

0 comments on commit 51527c0

Please sign in to comment.