Skip to content

Commit 76ce011

Browse files
authored
Merge pull request #11 from dionhaefner/bump-dependencies
Re-run all benchmarks + tweaks
2 parents e79007d + 20c8704 commit 76ce011

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+3637
-2934
lines changed

README.md

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,10 @@ Two reasons:
4646

4747
- [NumPy](https://numpy.org) (CPU only)
4848
- [Numba](https://numba.pydata.org) (CPU only)
49+
- [Aesara](https://github.com/aesara-devs/aesara) (CPU only)
4950
- [Jax](https://github.com/google/jax)
5051
- [Tensorflow](https://www.tensorflow.org)
5152
- [Pytorch](https://pytorch.org)
52-
- [Theano](http://deeplearning.net/software/theano/)
53-
- [Bohrium](http://www.bh107.org)
5453
- [CuPy](https://cupy.chainer.org/) (GPU only)
5554

5655
(not every backend is available for every benchmark)
@@ -71,14 +70,14 @@ As a rule of thumb (from our experience with Veros), the performance of a Fortra
7170
For CPU:
7271

7372
```bash
74-
$ conda create -f environment-cpu.yml
73+
$ conda env create -f environment-cpu.yml
7574
$ conda activate pyhpc-bench-cpu
7675
```
7776

7877
GPU:
7978

8079
```bash
81-
$ conda create -f environment-gpu.yml
80+
$ conda env create -f environment-gpu.yml
8281
$ conda activate pyhpc-bench-gpu
8382
```
8483

@@ -113,7 +112,7 @@ Options:
113112
-s, --size INTEGER Run benchmark for this array size
114113
(repeatable) [default: 4096, 16384, 65536,
115114
262144, 1048576, 4194304]
116-
-b, --backend [numpy|bohrium|cupy|jax|theano|numba|pytorch|tensorflow]
115+
-b, --backend [numpy|cupy|jax|aesara|numba|pytorch|tensorflow]
117116
Run benchmark with this backend (repeatable)
118117
[default: run all backends]
119118
-r, --repetitions INTEGER Fixed number of iterations to run for each
@@ -143,19 +142,12 @@ $ taskset -c 0 python run.py benchmarks/<benchmark_name>
143142
Some backends use all available GPUs by default, some don't. If you have multiple GPUs, you can set the
144143
one to be used through `CUDA_VISIBLE_DEVICES`, so keep things fair.
145144

146-
```bash
147-
$ conda activate pyhpc-bench-gpu
148-
$ export CUDA_VISIBLE_DEVICES="0"
149-
$ python run.py benchmarks/<benchmark_name> --gpu
150-
```
151-
152-
Some backends are pretty greedy with allocating memory. For large problem sizes, it can be a good idea to
153-
only run one backend at a time (and NumPy for reference):
145+
Some backends are greedy with allocating memory. On GPU, you can only run one backend at a time (add NumPy for reference):
154146

155147
```bash
156148
$ conda activate pyhpc-bench-gpu
157149
$ export CUDA_VISIBLE_DEVICES="0"
158-
$ for backend in bohrium jax cupy pytorch tensorflow; do
150+
$ for backend in jax cupy pytorch tensorflow; do
159151
... python run benchmarks/<benchmark_name> --device gpu -b $backend -b numpy -s 10_000_000
160152
... done
161153
```

backends.py

Lines changed: 83 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
import os
2-
import importlib
32

43
import numpy
54

65

7-
def convert_to_numpy(arr, backend, device='cpu'):
6+
def convert_to_numpy(arr, backend, device="cpu"):
87
"""Converts an array or collection of arrays to np.ndarray"""
98
if isinstance(arr, (list, tuple)):
109
return [convert_to_numpy(subarr, backend, device) for subarr in arr]
@@ -14,34 +13,46 @@ def convert_to_numpy(arr, backend, device='cpu'):
1413
# we don't want subclasses to get passed through
1514
return arr
1615

17-
if backend == 'bohrium':
18-
return arr.copy2numpy()
19-
20-
if backend == 'cupy':
16+
if backend == "cupy":
2117
return arr.get()
2218

23-
if backend == 'jax':
19+
if backend == "jax":
2420
return numpy.asarray(arr)
2521

26-
if backend == 'pytorch':
27-
if device == 'gpu':
22+
if backend == "pytorch":
23+
if device == "gpu":
2824
return numpy.asarray(arr.cpu())
2925
else:
3026
return numpy.asarray(arr)
3127

32-
if backend == 'tensorflow':
28+
if backend == "tensorflow":
3329
return numpy.asarray(arr)
3430

35-
if backend == 'theano':
31+
if backend == "aesara":
3632
return numpy.asarray(arr)
3733

38-
raise RuntimeError(f'Got unexpected array / backend combination: {type(arr)} / {backend}')
34+
raise RuntimeError(
35+
f"Got unexpected array / backend combination: {type(arr)} / {backend}"
36+
)
3937

4038

4139
class BackendNotSupported(Exception):
4240
pass
4341

4442

43+
class BackendConflict(Exception):
44+
pass
45+
46+
47+
def check_backend_conflicts(backends, device):
48+
if device == "gpu":
49+
gpu_backends = set(backends) - {"numba", "numpy", "aesara"}
50+
if len(gpu_backends) > 1:
51+
raise BackendConflict(
52+
f"Can only use one GPU backend at the same time (got: {gpu_backends})"
53+
)
54+
55+
4556
class SetupContext:
4657
def __init__(self, f):
4758
self._f = f
@@ -57,11 +68,11 @@ def __enter__(self):
5768
self._f_iter = iter(self._f(*args, **kwargs))
5869

5970
try:
60-
next(self._f_iter)
71+
module = next(self._f_iter)
6172
except Exception as e:
6273
raise BackendNotSupported(str(e)) from None
6374

64-
return self
75+
return module
6576

6677
def __exit__(self, *args, **kwargs):
6778
try:
@@ -76,126 +87,120 @@ def __exit__(self, *args, **kwargs):
7687

7788
# setup function definitions
7889

90+
7991
@setup_function
80-
def setup_numpy(device='cpu'):
92+
def setup_numpy(device="cpu"):
93+
import numpy
94+
8195
os.environ.update(
82-
OMP_NUM_THREADS='1',
96+
OMP_NUM_THREADS="1",
8397
)
84-
yield
98+
yield numpy
8599

86100

87101
@setup_function
88-
def setup_bohrium(device='cpu'):
102+
def setup_aesara(device="cpu"):
89103
os.environ.update(
90-
OMP_NUM_THREADS='1',
91-
BH_STACK='opencl' if device == 'gpu' else 'openmp',
104+
OMP_NUM_THREADS="1",
92105
)
93-
try:
94-
import bohrium # noqa: F401
95-
yield
96-
finally:
97-
# bohrium does things to numpy
98-
importlib.reload(numpy)
106+
if device == "gpu":
107+
raise RuntimeError("aesara uses JAX on GPU")
99108

109+
import aesara
100110

101-
@setup_function
102-
def setup_theano(device='cpu'):
103-
os.environ.update(
104-
OMP_NUM_THREADS='1',
105-
)
106-
if device == 'gpu':
107-
os.environ.update(
108-
THEANO_FLAGS='device=cuda',
109-
)
110-
import theano # noqa: F401
111-
yield
111+
# clang needs this, aesara#127
112+
aesara.config.gcc__cxxflags = "-Wno-c++11-narrowing"
113+
yield aesara
112114

113115

114116
@setup_function
115-
def setup_numba(device='cpu'):
117+
def setup_numba(device="cpu"):
116118
os.environ.update(
117-
OMP_NUM_THREADS='1',
119+
OMP_NUM_THREADS="1",
118120
)
119-
import numba # noqa: F401
120-
yield
121+
import numba
122+
123+
yield numba
121124

122125

123126
@setup_function
124-
def setup_cupy(device='cpu'):
125-
if device != 'gpu':
126-
raise RuntimeError('cupy requires GPU mode')
127-
import cupy # noqa: F401
128-
yield
127+
def setup_cupy(device="cpu"):
128+
if device != "gpu":
129+
raise RuntimeError("cupy requires GPU mode")
130+
import cupy
131+
132+
yield cupy
129133

130134

131135
@setup_function
132-
def setup_jax(device='cpu'):
136+
def setup_jax(device="cpu"):
133137
os.environ.update(
134138
XLA_FLAGS=(
135-
'--xla_cpu_multi_thread_eigen=false '
136-
'intra_op_parallelism_threads=1 '
137-
'inter_op_parallelism_threads=1 '
139+
"--xla_cpu_multi_thread_eigen=false "
140+
"intra_op_parallelism_threads=1 "
141+
"inter_op_parallelism_threads=1 "
138142
),
139-
XLA_PYTHON_CLIENT_PREALLOCATE='false',
140143
)
141144

142-
if device in ('cpu', 'gpu'):
145+
if device in ("cpu", "gpu"):
143146
os.environ.update(JAX_PLATFORM_NAME=device)
144147

145148
import jax
146149
from jax.config import config
147150

148-
if device == 'tpu':
149-
config.update('jax_xla_backend', 'tpu_driver')
150-
config.update('jax_backend_target', os.environ.get('JAX_BACKEND_TARGET'))
151+
if device == "tpu":
152+
config.update("jax_xla_backend", "tpu_driver")
153+
config.update("jax_backend_target", os.environ.get("JAX_BACKEND_TARGET"))
151154

152-
if device != 'tpu':
155+
if device != "tpu":
153156
# use 64 bit floats (not supported on TPU)
154-
config.update('jax_enable_x64', True)
157+
config.update("jax_enable_x64", True)
155158

156-
if device == 'gpu':
159+
if device == "gpu":
157160
assert len(jax.devices()) > 0
158161

159-
yield
162+
yield jax
160163

161164

162165
@setup_function
163-
def setup_pytorch(device='cpu'):
166+
def setup_pytorch(device="cpu"):
164167
os.environ.update(
165-
OMP_NUM_THREADS='1',
168+
OMP_NUM_THREADS="1",
166169
)
167170
import torch
168-
if device == 'gpu':
171+
172+
if device == "gpu":
169173
assert torch.cuda.is_available()
170174
assert torch.cuda.device_count() > 0
171-
yield
175+
176+
yield torch
172177

173178

174179
@setup_function
175-
def setup_tensorflow(device='cpu'):
180+
def setup_tensorflow(device="cpu"):
176181
os.environ.update(
177-
OMP_NUM_THREADS='1',
178-
XLA_PYTHON_CLIENT_PREALLOCATE='false',
182+
OMP_NUM_THREADS="1",
179183
)
180184
import tensorflow as tf
185+
181186
tf.config.threading.set_inter_op_parallelism_threads(1)
182187
tf.config.threading.set_intra_op_parallelism_threads(1)
183188

184-
if device == 'gpu':
185-
gpus = tf.config.experimental.list_physical_devices('GPU')
189+
if device == "gpu":
190+
gpus = tf.config.experimental.list_physical_devices("GPU")
186191
assert gpus
187192
else:
188-
tf.config.experimental.set_visible_devices([], 'GPU')
189-
yield
193+
tf.config.experimental.set_visible_devices([], "GPU")
194+
195+
yield tf
190196

191197

192198
__backends__ = {
193-
'numpy': setup_numpy,
194-
'bohrium': setup_bohrium,
195-
'cupy': setup_cupy,
196-
'jax': setup_jax,
197-
'theano': setup_theano,
198-
'numba': setup_numba,
199-
'pytorch': setup_pytorch,
200-
'tensorflow': setup_tensorflow
199+
"numpy": setup_numpy,
200+
"cupy": setup_cupy,
201+
"jax": setup_jax,
202+
"aesara": setup_aesara,
203+
"numba": setup_numba,
204+
"pytorch": setup_pytorch,
205+
"tensorflow": setup_tensorflow,
201206
}

benchmarks/equation_of_state/__init__.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@
55

66
def generate_inputs(size):
77
import numpy as np
8+
89
np.random.seed(17)
910

1011
shape = (
11-
math.ceil(2 * size ** (1/3)),
12-
math.ceil(2 * size ** (1/3)),
13-
math.ceil(0.25 * size ** (1/3)),
12+
math.ceil(2 * size ** (1 / 3)),
13+
math.ceil(2 * size ** (1 / 3)),
14+
math.ceil(0.25 * size ** (1 / 3)),
1415
)
1516

1617
s = np.random.uniform(1e-2, 10, size=shape)
@@ -21,26 +22,25 @@ def generate_inputs(size):
2122

2223
def try_import(backend):
2324
try:
24-
return importlib.import_module(f'.eos_{backend}', __name__)
25+
return importlib.import_module(f".eos_{backend}", __name__)
2526
except ImportError:
2627
return None
2728

2829

29-
def get_callable(backend, size, device='cpu'):
30+
def get_callable(backend, size, device="cpu"):
3031
backend_module = try_import(backend)
3132
inputs = generate_inputs(size)
32-
if hasattr(backend_module, 'prepare_inputs'):
33+
if hasattr(backend_module, "prepare_inputs"):
3334
inputs = backend_module.prepare_inputs(*inputs, device=device)
3435
return functools.partial(backend_module.run, *inputs, device=device)
3536

3637

3738
__implementations__ = (
38-
'bohrium',
39-
'cupy',
40-
'jax',
41-
'numba',
42-
'numpy',
43-
'pytorch',
44-
'tensorflow',
45-
'theano',
39+
"aesara",
40+
"cupy",
41+
"jax",
42+
"numba",
43+
"numpy",
44+
"pytorch",
45+
"tensorflow",
4646
)

0 commit comments

Comments
 (0)