dionhaefner
diff --git a/‎README.md‎
Lines changed: 6 additions & 14 deletions b/‎README.md‎
Lines changed: 6 additions & 14 deletions
diff --git a/‎backends.py‎
Lines changed: 83 additions & 78 deletions b/‎backends.py‎
Lines changed: 83 additions & 78 deletions
diff --git a/‎benchmarks/equation_of_state/__init__.py‎
Lines changed: 14 additions & 14 deletions b/‎benchmarks/equation_of_state/__init__.py‎
Lines changed: 14 additions & 14 deletions
@@ -46,11 +46,10 @@ Two reasons:
 
 - [NumPy](https://numpy.org) (CPU only)
 - [Numba](https://numba.pydata.org) (CPU only)
+- [Aesara](https://github.com/aesara-devs/aesara) (CPU only)
 - [Jax](https://github.com/google/jax)
 - [Tensorflow](https://www.tensorflow.org)
 - [Pytorch](https://pytorch.org)
-- [Theano](http://deeplearning.net/software/theano/)
-- [Bohrium](http://www.bh107.org)
 - [CuPy](https://cupy.chainer.org/) (GPU only)
 
 (not every backend is available for every benchmark)
@@ -71,14 +70,14 @@ As a rule of thumb (from our experience with Veros), the performance of a Fortra
 For CPU:
 
 ```bash
-$ conda create -f environment-cpu.yml
+$ conda env create -f environment-cpu.yml
 $ conda activate pyhpc-bench-cpu
 ```
 
 GPU:
 
 ```bash
-$ conda create -f environment-gpu.yml
+$ conda env create -f environment-gpu.yml
 $ conda activate pyhpc-bench-gpu
 ```
 
@@ -113,7 +112,7 @@ Options:
   -s, --size INTEGER              Run benchmark for this array size
                                   (repeatable)  [default: 4096, 16384, 65536,
                                   262144, 1048576, 4194304]
-  -b, --backend [numpy|bohrium|cupy|jax|theano|numba|pytorch|tensorflow]
+  -b, --backend [numpy|cupy|jax|aesara|numba|pytorch|tensorflow]
                                   Run benchmark with this backend (repeatable)
                                   [default: run all backends]
   -r, --repetitions INTEGER       Fixed number of iterations to run for each
@@ -143,19 +142,12 @@ $ taskset -c 0 python run.py benchmarks/<benchmark_name>
 Some backends use all available GPUs by default, some don't. If you have multiple GPUs, you can set the
 one to be used through `CUDA_VISIBLE_DEVICES`, so keep things fair.
 
-```bash
-$ conda activate pyhpc-bench-gpu
-$ export CUDA_VISIBLE_DEVICES="0"
-$ python run.py benchmarks/<benchmark_name> --gpu
-```
-
-Some backends are pretty greedy with allocating memory. For large problem sizes, it can be a good idea to
-only run one backend at a time (and NumPy for reference):
+Some backends are greedy with allocating memory. On GPU, you can only run one backend at a time (add NumPy for reference):
 
 ```bash
 $ conda activate pyhpc-bench-gpu
 $ export CUDA_VISIBLE_DEVICES="0"
-$ for backend in bohrium jax cupy pytorch tensorflow; do
+$ for backend in jax cupy pytorch tensorflow; do
 ...    python run benchmarks/<benchmark_name> --device gpu -b $backend -b numpy -s 10_000_000
 ...    done
 ```
 
@@ -1,10 +1,9 @@
 import os
-import importlib
 
 import numpy
 
 
-def convert_to_numpy(arr, backend, device='cpu'):
+def convert_to_numpy(arr, backend, device="cpu"):
     """Converts an array or collection of arrays to np.ndarray"""
     if isinstance(arr, (list, tuple)):
         return [convert_to_numpy(subarr, backend, device) for subarr in arr]
@@ -14,34 +13,46 @@ def convert_to_numpy(arr, backend, device='cpu'):
         # we don't want subclasses to get passed through
         return arr
 
-    if backend == 'bohrium':
-        return arr.copy2numpy()
-
-    if backend == 'cupy':
+    if backend == "cupy":
         return arr.get()
 
-    if backend == 'jax':
+    if backend == "jax":
         return numpy.asarray(arr)
 
-    if backend == 'pytorch':
-        if device == 'gpu':
+    if backend == "pytorch":
+        if device == "gpu":
             return numpy.asarray(arr.cpu())
         else:
             return numpy.asarray(arr)
 
-    if backend == 'tensorflow':
+    if backend == "tensorflow":
         return numpy.asarray(arr)
 
-    if backend == 'theano':
+    if backend == "aesara":
         return numpy.asarray(arr)
 
-    raise RuntimeError(f'Got unexpected array / backend combination: {type(arr)} / {backend}')
+    raise RuntimeError(
+        f"Got unexpected array / backend combination: {type(arr)} / {backend}"
+    )
 
 
 class BackendNotSupported(Exception):
     pass
 
 
+class BackendConflict(Exception):
+    pass
+
+
+def check_backend_conflicts(backends, device):
+    if device == "gpu":
+        gpu_backends = set(backends) - {"numba", "numpy", "aesara"}
+        if len(gpu_backends) > 1:
+            raise BackendConflict(
+                f"Can only use one GPU backend at the same time (got: {gpu_backends})"
+            )
+
+
 class SetupContext:
     def __init__(self, f):
         self._f = f
@@ -57,11 +68,11 @@ def __enter__(self):
         self._f_iter = iter(self._f(*args, **kwargs))
 
         try:
-            next(self._f_iter)
+            module = next(self._f_iter)
         except Exception as e:
             raise BackendNotSupported(str(e)) from None
 
-        return self
+        return module
 
     def __exit__(self, *args, **kwargs):
         try:
@@ -76,126 +87,120 @@ def __exit__(self, *args, **kwargs):
 
 # setup function definitions
 
+
 @setup_function
-def setup_numpy(device='cpu'):
+def setup_numpy(device="cpu"):
+    import numpy
+
     os.environ.update(
-        OMP_NUM_THREADS='1',
+        OMP_NUM_THREADS="1",
     )
-    yield
+    yield numpy
 
 
 @setup_function
-def setup_bohrium(device='cpu'):
+def setup_aesara(device="cpu"):
     os.environ.update(
-        OMP_NUM_THREADS='1',
-        BH_STACK='opencl' if device == 'gpu' else 'openmp',
+        OMP_NUM_THREADS="1",
     )
-    try:
-        import bohrium  # noqa: F401
-        yield
-    finally:
-        # bohrium does things to numpy
-        importlib.reload(numpy)
+    if device == "gpu":
+        raise RuntimeError("aesara uses JAX on GPU")
 
+    import aesara
 
-@setup_function
-def setup_theano(device='cpu'):
-    os.environ.update(
-        OMP_NUM_THREADS='1',
-    )
-    if device == 'gpu':
-        os.environ.update(
-            THEANO_FLAGS='device=cuda',
-        )
-    import theano  # noqa: F401
-    yield
+    # clang needs this, aesara#127
+    aesara.config.gcc__cxxflags = "-Wno-c++11-narrowing"
+    yield aesara
 
 
 @setup_function
-def setup_numba(device='cpu'):
+def setup_numba(device="cpu"):
     os.environ.update(
-        OMP_NUM_THREADS='1',
+        OMP_NUM_THREADS="1",
     )
-    import numba  # noqa: F401
-    yield
+    import numba
+
+    yield numba
 
 
 @setup_function
-def setup_cupy(device='cpu'):
-    if device != 'gpu':
-        raise RuntimeError('cupy requires GPU mode')
-    import cupy  # noqa: F401
-    yield
+def setup_cupy(device="cpu"):
+    if device != "gpu":
+        raise RuntimeError("cupy requires GPU mode")
+    import cupy
+
+    yield cupy
 
 
 @setup_function
-def setup_jax(device='cpu'):
+def setup_jax(device="cpu"):
     os.environ.update(
         XLA_FLAGS=(
-            '--xla_cpu_multi_thread_eigen=false '
-            'intra_op_parallelism_threads=1 '
-            'inter_op_parallelism_threads=1 '
+            "--xla_cpu_multi_thread_eigen=false "
+            "intra_op_parallelism_threads=1 "
+            "inter_op_parallelism_threads=1 "
         ),
-        XLA_PYTHON_CLIENT_PREALLOCATE='false',
     )
 
-    if device in ('cpu', 'gpu'):
+    if device in ("cpu", "gpu"):
         os.environ.update(JAX_PLATFORM_NAME=device)
 
     import jax
     from jax.config import config
 
-    if device == 'tpu':
-        config.update('jax_xla_backend', 'tpu_driver')
-        config.update('jax_backend_target', os.environ.get('JAX_BACKEND_TARGET'))
+    if device == "tpu":
+        config.update("jax_xla_backend", "tpu_driver")
+        config.update("jax_backend_target", os.environ.get("JAX_BACKEND_TARGET"))
 
-    if device != 'tpu':
+    if device != "tpu":
         # use 64 bit floats (not supported on TPU)
-        config.update('jax_enable_x64', True)
+        config.update("jax_enable_x64", True)
 
-    if device == 'gpu':
+    if device == "gpu":
         assert len(jax.devices()) > 0
 
-    yield
+    yield jax
 
 
 @setup_function
-def setup_pytorch(device='cpu'):
+def setup_pytorch(device="cpu"):
     os.environ.update(
-        OMP_NUM_THREADS='1',
+        OMP_NUM_THREADS="1",
     )
     import torch
-    if device == 'gpu':
+
+    if device == "gpu":
         assert torch.cuda.is_available()
         assert torch.cuda.device_count() > 0
-    yield
+
+    yield torch
 
 
 @setup_function
-def setup_tensorflow(device='cpu'):
+def setup_tensorflow(device="cpu"):
     os.environ.update(
-        OMP_NUM_THREADS='1',
-        XLA_PYTHON_CLIENT_PREALLOCATE='false',
+        OMP_NUM_THREADS="1",
     )
     import tensorflow as tf
+
     tf.config.threading.set_inter_op_parallelism_threads(1)
     tf.config.threading.set_intra_op_parallelism_threads(1)
 
-    if device == 'gpu':
-        gpus = tf.config.experimental.list_physical_devices('GPU')
+    if device == "gpu":
+        gpus = tf.config.experimental.list_physical_devices("GPU")
         assert gpus
     else:
-        tf.config.experimental.set_visible_devices([], 'GPU')
-    yield
+        tf.config.experimental.set_visible_devices([], "GPU")
+
+    yield tf
 
 
 __backends__ = {
-    'numpy': setup_numpy,
-    'bohrium': setup_bohrium,
-    'cupy': setup_cupy,
-    'jax': setup_jax,
-    'theano': setup_theano,
-    'numba': setup_numba,
-    'pytorch': setup_pytorch,
-    'tensorflow': setup_tensorflow
+    "numpy": setup_numpy,
+    "cupy": setup_cupy,
+    "jax": setup_jax,
+    "aesara": setup_aesara,
+    "numba": setup_numba,
+    "pytorch": setup_pytorch,
+    "tensorflow": setup_tensorflow,
 }
@@ -5,12 +5,13 @@
 
 def generate_inputs(size):
     import numpy as np
+
     np.random.seed(17)
 
     shape = (
-        math.ceil(2 * size ** (1/3)),
-        math.ceil(2 * size ** (1/3)),
-        math.ceil(0.25 * size ** (1/3)),
+        math.ceil(2 * size ** (1 / 3)),
+        math.ceil(2 * size ** (1 / 3)),
+        math.ceil(0.25 * size ** (1 / 3)),
     )
 
     s = np.random.uniform(1e-2, 10, size=shape)
@@ -21,26 +22,25 @@ def generate_inputs(size):
 
 def try_import(backend):
     try:
-        return importlib.import_module(f'.eos_{backend}', __name__)
+        return importlib.import_module(f".eos_{backend}", __name__)
     except ImportError:
         return None
 
 
-def get_callable(backend, size, device='cpu'):
+def get_callable(backend, size, device="cpu"):
     backend_module = try_import(backend)
     inputs = generate_inputs(size)
-    if hasattr(backend_module, 'prepare_inputs'):
+    if hasattr(backend_module, "prepare_inputs"):
         inputs = backend_module.prepare_inputs(*inputs, device=device)
     return functools.partial(backend_module.run, *inputs, device=device)
 
 
 __implementations__ = (
-    'bohrium',
-    'cupy',
-    'jax',
-    'numba',
-    'numpy',
-    'pytorch',
-    'tensorflow',
-    'theano',
+    "aesara",
+    "cupy",
+    "jax",
+    "numba",
+    "numpy",
+    "pytorch",
+    "tensorflow",
 )