Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Experimental][Transform] Split Compute Intensive Op #154

Open
wants to merge 12 commits into
base: xurui/add_benchmark
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Merge branch 'xurui/add_benchmark' into zhangyan/test_pybind
  • Loading branch information
Zhang Yan committed Jul 1, 2024
commit 65ac7f01a9e5cdda4a8722b4cc8e1f626f33ec01
4 changes: 4 additions & 0 deletions lib/gc/Transforms/OneDNNGraphToLinalg.cpp
Original file line number Diff line number Diff line change
@@ -26,6 +26,8 @@
#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"

#include "llvm/Support/raw_ostream.h"
#include<iostream>
using namespace mlir::onednn_graph;

namespace mlir {
@@ -492,6 +494,8 @@ struct MatMulOpLowering : public OpRewritePattern<MatMulOp> {
/*outputs=*/outBias);
}

// Passing mutmal configs to linalg.matmul
newOp->setAttrs(op->getAttrs());
rewriter.replaceOp(op, newOp);
return success();
}
1 change: 1 addition & 0 deletions lib/gc/Transforms/Pipeline.cpp
Original file line number Diff line number Diff line change
@@ -75,6 +75,7 @@ void populateBufferizationPasses(mlir::PassManager &pm) {
bufferization::LayoutMapOption::IdentityLayoutMap);
pm.addPass(bufferization::createOneShotBufferizePass(options));
pm.addPass(createCSEPass());

bufferization::BufferResultsToOutParamsOpts opt{};
opt.hoistStaticAllocs = true;
pm.addPass(bufferization::createBufferResultsToOutParamsPass(opt));
4 changes: 4 additions & 0 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -34,6 +34,7 @@ declare_mlir_python_sources(GcPythonSources.Common
ADD_TO_PARENT GcPythonSources
SOURCES
__init__.py
graph_compiler.py
dialects/__init__.py
# init hooks
_mlir_libs/_site_initialize_0.py
@@ -83,6 +84,8 @@ add_mlir_python_common_capi_library(GcPythonCAPI
GcPythonSources
MLIRPythonExtension.RegisterEverything
MLIRPythonSources.Core
MLIRPythonSources.Dialects.linalg
MLIRPythonSources.ExecutionEngine
)

################################################################################
@@ -96,6 +99,7 @@ add_mlir_python_modules(GcPythonModules
GcPythonSources
MLIRPythonExtension.RegisterEverything
MLIRPythonSources
MLIRPythonSources.ExecutionEngine
COMMON_CAPI_LINK_LIBS
GcPythonCAPI
)
48 changes: 48 additions & 0 deletions python/gc_mlir/graph_compiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# ===-- graph_compiler.py - DESC ------------------------------*- Python -*-===#
#
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# ===-----------------------------------------------------------------------===#

from gc_mlir import execution_engine
from gc_mlir import ir
from gc_mlir import passmanager
from typing import Sequence

__all__ = [
"GraphCompiler",
]


class GraphCompiler:
def __init__(
self,
pipeline: str = "any(gc-cpu-pipeline)",
shared_libs: Sequence[str] = [],
opt_level: int = 3,
):
self.shared_libs = shared_libs
self.pipeline = pipeline
self.opt_level = opt_level

def __call__(self, module: ir.Module, ir_printing: bool = False):
self.compile(module, ir_printing)

def compile(self, module: ir.Module, ir_printing: bool = False):
pm = passmanager.PassManager.parse(self.pipeline)
if ir_printing:
pm.enable_ir_printing()
pm.run(module.operation)

def jit(self, module: ir.Module) -> execution_engine.ExecutionEngine:
return execution_engine.ExecutionEngine(
module, opt_level=self.opt_level, shared_libs=self.shared_libs
)

def compile_and_jit(
self, module: ir.Module, ir_printing: bool = False
) -> execution_engine.ExecutionEngine:
self.compile(module, ir_printing)
return self.jit(module)
14 changes: 14 additions & 0 deletions tools/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Python Tools
## Pre-requisites
* Enable python binding
* Install `/tools/requirements.txt`
* Set env
** PYTHONPATH=${BUILD_DIR}/python_packages/gc_mlir_core
** LD_PRELOAD=path/to/libiomp5.so
** MLIR_C_RUNNER_UTILS=${LLVM_INSTALL_DIR}/lib/libmlir_c_runner_utils.so
** MLIR_RUNNER_UTILS=${LLVM_INSTALL_DIR}/lib/libmlir_runner_utils.so


##Bench
##Tuning
TODO
110 changes: 110 additions & 0 deletions tools/bench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
################################################################################
# Copyright (C) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.
# SPDX-License-Identifier: Apache-2.0
################################################################################

import ctypes
import random
import timeit
from time import sleep
from typing import Sequence

import numpy as np
from gc_mlir import ir, runtime
from gc_mlir.dialects import arith, func, memref
from gc_mlir.graph_compiler import GraphCompiler
from utils import (
emit_benchmark_wrapped_main_func,
emit_nano_time,
get_kernel_func_from_module,
)


def py_timeit_bench(
ir_module: ir.Module,
entry_name: str,
pipeline: str,
mlir_args: list,
shared_libs: Sequence,
ir_printing=False,
repeat_time=100,
warm_up=20,
) -> float:

compiler = GraphCompiler(
pipeline,
shared_libs,
)
compile_begin = timeit.default_timer()
engine = compiler.compile_and_jit(ir_module, ir_printing=ir_printing)
compile_cost = (timeit.default_timer() - compile_begin) * 1000

func = engine.lookup(entry_name)
packed_args = (ctypes.c_void_p * len(mlir_args))()
for argNum in range(len(mlir_args)):
packed_args[argNum] = ctypes.cast(mlir_args[argNum], ctypes.c_void_p)

def run_bench(func, arg):
func(arg)

timeit.timeit(lambda: run_bench(func, packed_args), number=warm_up)
total_time = timeit.timeit(lambda: run_bench(func, packed_args), number=repeat_time)
execute_cost = total_time * 1000 / repeat_time
return (execute_cost, compile_cost)


def mlir_wrapper_bench(
ir_module: ir.Module,
entry_name: str,
pipeline: str,
mlir_args: list,
shared_libs: Sequence,
ir_printing=False,
repeat_time=100,
warm_up=20,
) -> float:
kernel_func = get_kernel_func_from_module(ir_module, entry_name)

wrapper_module = ir_module
with ir.InsertionPoint(wrapper_module.body):
emit_benchmark_wrapped_main_func(kernel_func, emit_nano_time())
compiler = GraphCompiler(
pipeline,
shared_libs,
)
compile_begin = timeit.default_timer()
engine = compiler.compile_and_jit(wrapper_module, ir_printing=ir_printing)
compile_cost = (timeit.default_timer() - compile_begin) * 1000

np_timers_ns = np.array([0], dtype=np.int64)
time_arg = ctypes.pointer(
ctypes.pointer(runtime.get_ranked_memref_descriptor(np_timers_ns))
)
total_time = 0
ns_to_ms_scale = 1e-6
def run(engine_invoke, bench_func_name, *mlir_args):
engine_invoke(bench_func_name, *mlir_args)

for i in range(repeat_time + warm_up):
run(engine.invoke, "wrapped_main", time_arg, *mlir_args)
if i >= warm_up:
total_time += int(np_timers_ns[0]) * ns_to_ms_scale
execute_cost = total_time / repeat_time
return (execute_cost, compile_cost)


# for test
def fake_bench() -> float:
return float(random.randint(1, 100))
82 changes: 82 additions & 0 deletions tools/config_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
################################################################################
# Copyright (C) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.
# SPDX-License-Identifier: Apache-2.0
################################################################################

import math
from abc import ABC, abstractmethod
from typing import List

import mmh3


class ConfigFilter(ABC):

@abstractmethod
def already_met(self, v: List[int]) -> bool:
pass

@abstractmethod
def add(self, v: List[int]):
pass

@abstractmethod
def save(self):
pass

def load(self, data):
pass


class BloomFilter(ConfigFilter):
def __init__(self, num_samples: int, err_rate: float):
self.num_bits = int(-(num_samples * math.log(err_rate)) / (math.log(2) ** 2))
self.num_hashes = int((self.num_bits / num_samples) * math.log(2))
self.bit_array = [0] * self.num_bits

def already_met(self, v):
for i in range(int(self.num_hashes)):
hash_v = mmh3.hash(v, i) % self.num_bits
if self.bit_array[hash_v] == 0:
return False
return True

def add(self, v):
for i in range(int(self.num_hashes)):
hash_v = mmh3.hash(v, i) % self.num_bits
self.bit_array[hash_v] = 1

def save(self):
return self.bit_array

def load(self, data):
self.bit_array == data


class HashSetFilter(ConfigFilter):
def __init__(self):
self.data = set()

def add(self, v):
self.data.add(tuple(v))

def already_met(self, v: List[int]) -> bool:
return tuple(v) in self.data

def save(self):
return self.data

def load(self, data):
self.data = data
Loading
You are viewing a condensed version of this merge commit. You can view the full changes here.