Merge branch 'xurui/add_benchmark' into zhangyan/test_pybind

intel · crazydemo · Jun 20, 2024 · Jun 21, 2024 · Jun 25, 2024 · Jun 25, 2024
commit 65ac7f01a9e5cdda4a8722b4cc8e1f626f33ec01
diff --git a/lib/gc/Transforms/OneDNNGraphToLinalg.cpp b/lib/gc/Transforms/OneDNNGraphToLinalg.cpp
@@ -26,6 +26,8 @@
 #include "mlir/Transforms/DialectConversion.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
+#include "llvm/Support/raw_ostream.h"
+#include<iostream>
 using namespace mlir::onednn_graph;
 
 namespace mlir {
@@ -492,6 +494,8 @@ struct MatMulOpLowering : public OpRewritePattern<MatMulOp> {
           /*outputs=*/outBias);
     }
 
+    // Passing mutmal configs to linalg.matmul
+    newOp->setAttrs(op->getAttrs());
     rewriter.replaceOp(op, newOp);
     return success();
   }

diff --git a/lib/gc/Transforms/Pipeline.cpp b/lib/gc/Transforms/Pipeline.cpp
@@ -75,6 +75,7 @@ void populateBufferizationPasses(mlir::PassManager &pm) {
       bufferization::LayoutMapOption::IdentityLayoutMap);
   pm.addPass(bufferization::createOneShotBufferizePass(options));
   pm.addPass(createCSEPass());
+
   bufferization::BufferResultsToOutParamsOpts opt{};
   opt.hoistStaticAllocs = true;
   pm.addPass(bufferization::createBufferResultsToOutParamsPass(opt));

diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
@@ -34,6 +34,7 @@ declare_mlir_python_sources(GcPythonSources.Common
   ADD_TO_PARENT GcPythonSources
   SOURCES
     __init__.py
+    graph_compiler.py
     dialects/__init__.py
     # init hooks
     _mlir_libs/_site_initialize_0.py
@@ -83,6 +84,8 @@ add_mlir_python_common_capi_library(GcPythonCAPI
     GcPythonSources
     MLIRPythonExtension.RegisterEverything
     MLIRPythonSources.Core
+    MLIRPythonSources.Dialects.linalg
+    MLIRPythonSources.ExecutionEngine
 )
 
 ################################################################################
@@ -96,6 +99,7 @@ add_mlir_python_modules(GcPythonModules
     GcPythonSources
     MLIRPythonExtension.RegisterEverything
     MLIRPythonSources
+    MLIRPythonSources.ExecutionEngine
   COMMON_CAPI_LINK_LIBS
     GcPythonCAPI
   )
diff --git a/python/gc_mlir/graph_compiler.py b/python/gc_mlir/graph_compiler.py
@@ -0,0 +1,48 @@
+# ===-- graph_compiler.py - DESC ------------------------------*- Python -*-===#
+#
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# ===-----------------------------------------------------------------------===#
+
+from gc_mlir import execution_engine
+from gc_mlir import ir
+from gc_mlir import passmanager
+from typing import Sequence
+
+__all__ = [
+    "GraphCompiler",
+]
+
+
+class GraphCompiler:
+    def __init__(
+        self,
+        pipeline: str = "any(gc-cpu-pipeline)",
+        shared_libs: Sequence[str] = [],
+        opt_level: int = 3,
+    ):
+        self.shared_libs = shared_libs
+        self.pipeline = pipeline
+        self.opt_level = opt_level
+
+    def __call__(self, module: ir.Module, ir_printing: bool = False):
+        self.compile(module, ir_printing)
+
+    def compile(self, module: ir.Module, ir_printing: bool = False):
+        pm = passmanager.PassManager.parse(self.pipeline)
+        if ir_printing:
+            pm.enable_ir_printing()
+        pm.run(module.operation)
+
+    def jit(self, module: ir.Module) -> execution_engine.ExecutionEngine:
+        return execution_engine.ExecutionEngine(
+            module, opt_level=self.opt_level, shared_libs=self.shared_libs
+        )
+
+    def compile_and_jit(
+        self, module: ir.Module, ir_printing: bool = False
+    ) -> execution_engine.ExecutionEngine:
+        self.compile(module, ir_printing)
+        return self.jit(module)
diff --git a/tools/README.md b/tools/README.md
@@ -0,0 +1,14 @@
+# Python Tools
+## Pre-requisites
+* Enable python binding
+* Install `/tools/requirements.txt`
+* Set env
+** PYTHONPATH=${BUILD_DIR}/python_packages/gc_mlir_core
+** LD_PRELOAD=path/to/libiomp5.so
+** MLIR_C_RUNNER_UTILS=${LLVM_INSTALL_DIR}/lib/libmlir_c_runner_utils.so
+** MLIR_RUNNER_UTILS=${LLVM_INSTALL_DIR}/lib/libmlir_runner_utils.so
+
+
+##Bench
+##Tuning
+TODO
diff --git a/tools/bench.py b/tools/bench.py
@@ -0,0 +1,110 @@
+################################################################################
+# Copyright (C) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+# SPDX-License-Identifier: Apache-2.0
+################################################################################
+
+import ctypes
+import random
+import timeit
+from time import sleep
+from typing import Sequence
+
+import numpy as np
+from gc_mlir import ir, runtime
+from gc_mlir.dialects import arith, func, memref
+from gc_mlir.graph_compiler import GraphCompiler
+from utils import (
+    emit_benchmark_wrapped_main_func,
+    emit_nano_time,
+    get_kernel_func_from_module,
+)
+
+
+def py_timeit_bench(
+    ir_module: ir.Module,
+    entry_name: str,
+    pipeline: str,
+    mlir_args: list,
+    shared_libs: Sequence,
+    ir_printing=False,
+    repeat_time=100,
+    warm_up=20,
+) -> float:
+
+    compiler = GraphCompiler(
+        pipeline,
+        shared_libs,
+    )
+    compile_begin = timeit.default_timer()
+    engine = compiler.compile_and_jit(ir_module, ir_printing=ir_printing)
+    compile_cost = (timeit.default_timer() - compile_begin) * 1000
+
+    func = engine.lookup(entry_name)
+    packed_args = (ctypes.c_void_p * len(mlir_args))()
+    for argNum in range(len(mlir_args)):
+        packed_args[argNum] = ctypes.cast(mlir_args[argNum], ctypes.c_void_p)
+
+    def run_bench(func, arg):
+        func(arg)
+
+    timeit.timeit(lambda: run_bench(func, packed_args), number=warm_up)
+    total_time = timeit.timeit(lambda: run_bench(func, packed_args), number=repeat_time)
+    execute_cost = total_time * 1000 / repeat_time
+    return (execute_cost, compile_cost)
+
+
+def mlir_wrapper_bench(
+    ir_module: ir.Module,
+    entry_name: str,
+    pipeline: str,
+    mlir_args: list,
+    shared_libs: Sequence,
+    ir_printing=False,
+    repeat_time=100,
+    warm_up=20,
+) -> float:
+    kernel_func = get_kernel_func_from_module(ir_module, entry_name)
+
+    wrapper_module = ir_module
+    with ir.InsertionPoint(wrapper_module.body):
+        emit_benchmark_wrapped_main_func(kernel_func, emit_nano_time())
+    compiler = GraphCompiler(
+        pipeline,
+        shared_libs,
+    )
+    compile_begin = timeit.default_timer()
+    engine = compiler.compile_and_jit(wrapper_module, ir_printing=ir_printing)
+    compile_cost = (timeit.default_timer() - compile_begin) * 1000
+
+    np_timers_ns = np.array([0], dtype=np.int64)
+    time_arg = ctypes.pointer(
+        ctypes.pointer(runtime.get_ranked_memref_descriptor(np_timers_ns))
+    )
+    total_time = 0
+    ns_to_ms_scale = 1e-6
+    def run(engine_invoke, bench_func_name, *mlir_args):
+        engine_invoke(bench_func_name, *mlir_args)
+
+    for i in range(repeat_time + warm_up):
+        run(engine.invoke, "wrapped_main", time_arg, *mlir_args)
+        if i >= warm_up:
+            total_time += int(np_timers_ns[0]) * ns_to_ms_scale
+    execute_cost = total_time / repeat_time
+    return (execute_cost, compile_cost)
+
+
+# for test
+def fake_bench() -> float:
+    return float(random.randint(1, 100))
diff --git a/tools/config_filter.py b/tools/config_filter.py
@@ -0,0 +1,82 @@
+################################################################################
+# Copyright (C) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+# SPDX-License-Identifier: Apache-2.0
+################################################################################
+
+import math
+from abc import ABC, abstractmethod
+from typing import List
+
+import mmh3
+
+
+class ConfigFilter(ABC):
+
+    @abstractmethod
+    def already_met(self, v: List[int]) -> bool:
+        pass
+
+    @abstractmethod
+    def add(self, v: List[int]):
+        pass
+
+    @abstractmethod
+    def save(self):
+        pass
+
+    def load(self, data):
+        pass
+
+
+class BloomFilter(ConfigFilter):
+    def __init__(self, num_samples: int, err_rate: float):
+        self.num_bits = int(-(num_samples * math.log(err_rate)) / (math.log(2) ** 2))
+        self.num_hashes = int((self.num_bits / num_samples) * math.log(2))
+        self.bit_array = [0] * self.num_bits
+
+    def already_met(self, v):
+        for i in range(int(self.num_hashes)):
+            hash_v = mmh3.hash(v, i) % self.num_bits
+            if self.bit_array[hash_v] == 0:
+                return False
+        return True
+
+    def add(self, v):
+        for i in range(int(self.num_hashes)):
+            hash_v = mmh3.hash(v, i) % self.num_bits
+            self.bit_array[hash_v] = 1
+
+    def save(self):
+        return self.bit_array
+
+    def load(self, data):
+        self.bit_array == data
+
+
+class HashSetFilter(ConfigFilter):
+    def __init__(self):
+        self.data = set()
+
+    def add(self, v):
+        self.data.add(tuple(v))
+
+    def already_met(self, v: List[int]) -> bool:
+        return tuple(v) in self.data
+
+    def save(self):
+        return self.data
+
+    def load(self, data):
+        self.data = data