Skip to content

Implement NumPy's __array_function__ protocol for array methods that are not in the Array API Standard #643

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions cubed/array_api/array_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,24 @@
120 # cubed doesn't have a config module like dask does so hard-code this for now
)

_HANDLED_FUNCTIONS = {}


def implements(*numpy_functions):
"""Register an __array_function__ implementation for cubed.Array

Note that this is **only** used for functions that are not defined in the
Array API Standard.
"""

def decorator(cubed_func):
for numpy_function in numpy_functions:
_HANDLED_FUNCTIONS[numpy_function] = cubed_func

return cubed_func

return decorator


class Array(CoreArray):
"""Chunked array backed by Zarr storage that conforms to the Python Array API standard."""
Expand All @@ -44,6 +62,12 @@ def __array__(self, dtype=None) -> np.ndarray:
x = np.array(x)
return x

def __array_function__(self, func, types, args, kwargs):
# Only dispatch to functions that are not defined in the Array API Standard
if func in _HANDLED_FUNCTIONS:
return _HANDLED_FUNCTIONS[func](*args, **kwargs)
return NotImplemented

def __repr__(self):
return f"cubed.Array<{self.name}, shape={self.shape}, dtype={self.dtype}, chunks={self.chunks}>"

Expand Down
17 changes: 15 additions & 2 deletions cubed/nan_functions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import numpy as np

from cubed.array_api.array_object import implements
from cubed.array_api.creation_functions import asarray
from cubed.array_api.dtypes import (
_numeric_dtypes,
_signed_integer_dtypes,
Expand All @@ -18,9 +20,10 @@
# https://github.com/data-apis/array-api/issues/621


def nanmean(x, /, *, axis=None, keepdims=False, split_every=None):
@implements(np.nanmean)
def nanmean(x, /, *, axis=None, dtype=None, keepdims=False, split_every=None):
"""Compute the arithmetic mean along the specified axis, ignoring NaNs."""
dtype = x.dtype
dtype = dtype or x.dtype
intermediate_dtype = [("n", nxp.int64), ("total", nxp.float64)]
return reduction(
x,
Expand Down Expand Up @@ -60,6 +63,7 @@ def _nannumel(x, **kwargs):
return nxp.sum(~(nxp.isnan(x)), **kwargs)


@implements(np.nansum)
def nansum(x, /, *, axis=None, dtype=None, keepdims=False, split_every=None):
"""Return the sum of array elements over a given axis treating NaNs as zero."""
if x.dtype not in _numeric_dtypes:
Expand All @@ -83,3 +87,12 @@ def nansum(x, /, *, axis=None, dtype=None, keepdims=False, split_every=None):
keepdims=keepdims,
split_every=split_every,
)


@implements(np.isclose)
def isclose(a, b, rtol=1.0e-5, atol=1.0e-8, equal_nan=False):
# Note: this should only be used for testing small arrays since it
# materialize arrays in memory
na = nxp.asarray(a)
nb = nxp.asarray(b)
return asarray(nxp.isclose(na, nb, rtol=rtol, atol=atol, equal_nan=equal_nan))
7 changes: 7 additions & 0 deletions cubed/pad.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
import numpy as np

from cubed.array_api.array_object import implements
from cubed.array_api.manipulation_functions import concat

# TODO: refactor once pad is standardized:
# https://github.com/data-apis/array-api/issues/187


@implements(np.pad)
def pad(x, pad_width, mode=None, chunks=None):
"""Pad an array."""
if len(pad_width) != x.ndim:
Expand Down
2 changes: 1 addition & 1 deletion cubed/tests/runtime/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def deterministic_failure(path, timing_map, i, *, default_sleep=0.01, name=None)
else:
time.sleep(-timing_code)
raise RuntimeError(
f"Deliberately fail on invocation number {invocation_count+1} for input {i}"
f"Deliberately fail on invocation number {invocation_count + 1} for input {i}"
)


Expand Down
12 changes: 8 additions & 4 deletions cubed/tests/test_nan_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@ def spec(tmp_path):
return cubed.Spec(tmp_path, allowed_mem=100000)


def test_nanmean(spec):
@pytest.mark.parametrize("namespace", [cubed, np])
def test_nanmean(spec, namespace):
a = xp.asarray([[1, 2, 3], [4, 5, 6], [7, 8, xp.nan]], chunks=(2, 2), spec=spec)
b = cubed.nanmean(a)
b = namespace.nanmean(a)
assert isinstance(b, cubed.Array)
assert_array_equal(
b.compute(), np.nanmean(np.array([[1, 2, 3], [4, 5, 6], [7, 8, np.nan]]))
)
Expand All @@ -26,9 +28,11 @@ def test_nanmean_allnan(spec):
assert_array_equal(b.compute(), np.nanmean(np.array([np.nan])))


def test_nansum(spec):
@pytest.mark.parametrize("namespace", [cubed, np])
def test_nansum(spec, namespace):
a = xp.asarray([[1, 2, 3], [4, 5, 6], [7, 8, xp.nan]], chunks=(2, 2), spec=spec)
b = cubed.nansum(a)
b = namespace.nansum(a)
assert isinstance(b, cubed.Array)
assert_array_equal(
b.compute(), np.nansum(np.array([[1, 2, 3], [4, 5, 6], [7, 8, np.nan]]))
)
Expand Down
8 changes: 6 additions & 2 deletions cubed/tests/test_pad.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,15 @@ def spec(tmp_path):
return cubed.Spec(tmp_path, allowed_mem=100000)


def test_pad(spec):
@pytest.mark.parametrize("namespace", [cubed, np])
def test_pad(spec, namespace):
an = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

a = xp.asarray([[1, 2, 3], [4, 5, 6], [7, 8, 9]], chunks=(2, 2), spec=spec)
b = cubed.pad(a, ((1, 0), (0, 0)), mode="symmetric")
# check that we can dispatch via the numpy namespace (via __array_function__)
# since pad is not yet a part of the Array API Standard
b = namespace.pad(a, ((1, 0), (0, 0)), mode="symmetric")
assert isinstance(b, cubed.Array)
assert b.chunks == ((2, 2), (2, 1))

assert_array_equal(b.compute(), np.pad(an, ((1, 0), (0, 0)), mode="symmetric"))
Loading