Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement permute_dims #12

Merged
merged 15 commits into from
Nov 4, 2024
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ repos:
- id: trailing-whitespace
exclude: '.*\.patch'
- repo: https://github.com/psf/black
rev: 24.3.0
rev: 24.8.0
hooks:
- id: black
args: ["--line-length", "80"]
language_version: python3
- repo: https://github.com/PyCQA/bandit
rev: '1.7.8'
rev: '1.7.9'
hooks:
- id: bandit
args: ["-c", ".bandit.yml"]
Expand All @@ -35,7 +35,7 @@ repos:
- id: isort
name: isort (python)
- repo: https://github.com/pycqa/flake8
rev: 7.0.0
rev: 7.1.1
hooks:
- id: flake8
- repo: https://github.com/pocc/pre-commit-hooks
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ include_directories(
${PROJECT_SOURCE_DIR}/third_party/bitsery/include
${MPI_INCLUDE_PATH}
${pybind11_INCLUDE_DIRS}
${LLVM_INCLUDE_DIRS}
${MLIR_INCLUDE_DIRS}
${IMEX_INCLUDE_DIRS})

Expand Down
184 changes: 184 additions & 0 deletions examples/transpose.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
"""
Transpose benchmark

Matrix transpose benchmark for sharpy and numpy backends.

Examples:

# Run 1000 iterations of 1000*1000 matrix on sharpy backend
python transpose.py -r 10 -c 1000 -b sharpy -i 1000

# MPI parallel run
mpiexec -n 3 python transpose.py -r 1000 -c 1000 -b sharpy -i 1000

"""

import argparse
import time as time_mod

import numpy

import sharpy

try:
import mpi4py

mpi4py.rc.finalize = False
from mpi4py import MPI

comm_rank = MPI.COMM_WORLD.Get_rank()
comm = MPI.COMM_WORLD
except ImportError:
comm_rank = 0
comm = None


def info(s):
if comm_rank == 0:
print(s)


def sp_transpose(arr):
brr = sharpy.permute_dims(arr, [1, 0])
return brr


def np_transpose(arr):
brr = arr.transpose()
return brr.copy()


def initialize(np, row, col, dtype):
arr = np.arange(0, row * col, 1, dtype=dtype)
return np.reshape(arr, (row, col))


def run(row, col, backend, iterations, datatype):
if backend == "sharpy":
import sharpy as np
from sharpy import fini, init, sync

transpose = sp_transpose

init(False)
elif backend == "numpy":
import numpy as np

if comm is not None:
assert (
comm.Get_size() == 1
), "Numpy backend only supports serial execution."

fini = sync = lambda x=None: None
transpose = np_transpose
else:
raise ValueError(f'Unknown backend: "{backend}"')

dtype = {
"f32": np.float32,
"f64": np.float64,
}[datatype]

info(f"Using backend: {backend}")
info(f"Number of row: {row}")
info(f"Number of column: {col}")
info(f"Datatype: {datatype}")

arr = initialize(np, row, col, dtype)
sync()

# verify
if backend == "sharpy":
brr = sp_transpose(arr)
crr = np_transpose(sharpy.to_numpy(arr))
assert numpy.allclose(sharpy.to_numpy(brr), crr)

def eval():
tic = time_mod.perf_counter()
transpose(arr)
sync()
toc = time_mod.perf_counter()
return toc - tic

# warm-up run
t_warm = eval()

# evaluate
info(f"Running {iterations} iterations")
time_list = []
for i in range(iterations):
time_list.append(eval())

# get max time over mpi ranks
if comm is not None:
t_warm = comm.allreduce(t_warm, MPI.MAX)
time_list = comm.allreduce(time_list, MPI.MAX)

t_min = numpy.min(time_list)
t_max = numpy.max(time_list)
t_med = numpy.median(time_list)
init_overhead = t_warm - t_med
if backend == "sharpy":
info(f"Estimated initialization overhead: {init_overhead:.5f} s")
info(f"Min. duration: {t_min:.5f} s")
info(f"Max. duration: {t_max:.5f} s")
info(f"Median duration: {t_med:.5f} s")

fini()


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Run transpose benchmark",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)

parser.add_argument(
"-r",
"--row",
type=int,
default=10000,
help="Number of row.",
)
parser.add_argument(
"-c",
"--column",
type=int,
default=10000,
help="Number of column.",
)

parser.add_argument(
"-b",
"--backend",
type=str,
default="sharpy",
choices=["sharpy", "numpy"],
help="Backend to use.",
)

parser.add_argument(
"-i",
"--iterations",
type=int,
default=10,
help="Number of iterations to run.",
)

parser.add_argument(
"-d",
"--datatype",
type=str,
default="f64",
choices=["f32", "f64"],
help="Datatype for model state variables",
)

args = parser.parse_args()
run(
args.row,
args.column,
args.backend,
args.iterations,
args.datatype,
)
2 changes: 1 addition & 1 deletion imex_version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
5a7bb80ede5fe4fa8d56ee0dd77c4e5c1327fe09
8ae485bbfb1303a414b375e25130fcaa4c02127a
6 changes: 5 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import multiprocessing
import os
import pathlib

Expand Down Expand Up @@ -44,7 +45,10 @@ def build_cmake(self, ext):
os.chdir(str(build_temp))
self.spawn(["cmake", str(cwd)] + cmake_args)
if not self.dry_run:
self.spawn(["cmake", "--build", ".", "-j5"] + build_args)
self.spawn(
["cmake", "--build", ".", f"-j{multiprocessing.cpu_count()}"]
+ build_args
)
# Troubleshooting: if fail on line above then delete all possible
# temporary CMake files including "CMakeCache.txt" in top level dir.
os.chdir(str(cwd))
Expand Down
4 changes: 4 additions & 0 deletions sharpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ def _validate_device(device):
exec(
f"{func} = lambda this, shape, cp=None: ndarray(_csp.ManipOp.reshape(this._t, shape, cp))"
)
elif func == "permute_dims":
exec(
f"{func} = lambda this, axes: ndarray(_csp.ManipOp.permute_dims(this._t, axes))"
)

for func in api.api_categories["ReduceOp"]:
FUNC = func.upper()
Expand Down
1 change: 1 addition & 0 deletions sharpy/array_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@
"roll", # (x, /, shift, *, axis=None)
"squeeze", # (x, /, axis)
"stack", # (arrays, /, *, axis=0)
"permute_dims", # (x: array, /, axes: Tuple[int, ...]) → array
],
"LinAlgOp": [
"matmul", # (x1, x2, /)
Expand Down
2 changes: 1 addition & 1 deletion src/EWBinOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ struct DeferredEWBinOp : public Deferred {
auto av = dm.getDependent(builder, Registry::get(_a));
auto bv = dm.getDependent(builder, Registry::get(_b));

auto aTyp = av.getType().cast<::imex::ndarray::NDArrayType>();
auto aTyp = ::mlir::cast<::imex::ndarray::NDArrayType>(av.getType());
auto outElemType =
::imex::ndarray::toMLIR(builder, SHARPY::jit::getPTDType(_dtype));
auto outTyp = aTyp.cloneWith(shape(), outElemType);
Expand Down
2 changes: 1 addition & 1 deletion src/EWUnyOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ struct DeferredEWUnyOp : public Deferred {
jit::DepManager &dm) override {
auto av = dm.getDependent(builder, Registry::get(_a));

auto aTyp = av.getType().cast<::imex::ndarray::NDArrayType>();
auto aTyp = ::mlir::cast<::imex::ndarray::NDArrayType>(av.getType());
auto outTyp = aTyp.cloneWith(shape(), aTyp.getElementType());

auto ndOpId = sharpy(_op);
Expand Down
2 changes: 1 addition & 1 deletion src/IEWBinOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ struct DeferredIEWBinOp : public Deferred {
auto av = dm.getDependent(builder, Registry::get(_a));
auto bv = dm.getDependent(builder, Registry::get(_b));

auto aTyp = av.getType().cast<::imex::ndarray::NDArrayType>();
auto aTyp = ::mlir::cast<::imex::ndarray::NDArrayType>(av.getType());
auto outTyp = aTyp.cloneWith(shape(), aTyp.getElementType());

auto binop = builder.create<::imex::ndarray::EWBinOp>(
Expand Down
Loading
Loading