Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX-#6287: Fix handling of numpy array_function #6288

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions modin/numpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,10 @@


def where(condition, x=None, y=None):
x_specified = x is not None
y_specified = y is not None
if x_specified != y_specified:
raise ValueError("either both or neither of x and y should be given")
if condition is True:
return x
if condition is False:
Expand Down
37 changes: 33 additions & 4 deletions modin/numpy/arr.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,10 +397,29 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
if isinstance(input, pd.Series):
input = input._query_compiler.to_numpy().flatten()
args += [input]
out_kwarg = kwargs.get("out", None)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
out_kwarg = kwargs.get("out", None)
out_kwarg = kwargs.pop("out", None)

if out_kwarg is not None:
# If `out` is a modin.numpy.array, `kwargs.get("out")` returns a 1-tuple
# whose only element is that array, so we need to unwrap it from the tuple.
out_kwarg = out_kwarg[0]
kwargs.pop("out", None)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should merge the .get and .pop like

out_kwarg = kwargs.pop("out", None)

I see no point of keeping them separated

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good!

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
kwargs.pop("out", None)

where_kwarg = kwargs.get("where", None)
if where_kwarg is not None:
if isinstance(where_kwarg, type(self)):
kwargs["where"] = where_kwarg._to_numpy()
output = self._to_numpy().__array_ufunc__(ufunc, method, *args, **kwargs)
if is_scalar(output):
return output
return array(output)
if out_kwarg is None:
return array(output)
else:
return fix_dtypes_and_determine_return(
array(output)._query_compiler,
len(output.shape),
dtype=kwargs.get("dtype", None),
out=out_kwarg,
where=True,
)
args = []
for input in inputs:
input = try_convert_from_interoperable_type(input)
Expand All @@ -414,16 +433,14 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
# If `out` is a modin.numpy.array, `kwargs.get("out")` returns a 1-tuple
# whose only element is that array, so we need to unwrap it from the tuple.
out_kwarg = out_kwarg[0]
where_kwarg = kwargs.get("where", True)
kwargs["out"] = None
kwargs["where"] = True
result = new_ufunc(*args, **kwargs)
return fix_dtypes_and_determine_return(
result,
out_ndim,
dtype=kwargs.get("dtype", None),
out=out_kwarg,
where=where_kwarg,
where=True,
)

def __array_function__(self, func, types, args, kwargs):
Expand All @@ -437,11 +454,17 @@ def __array_function__(self, func, types, args, kwargs):
modin_func = getattr(shaping, func_name)
elif hasattr(creation, func_name):
modin_func = getattr(creation, func_name)
if func_name == "where":
return self.where(*args[1:])
if modin_func is None:
return NotImplemented
return modin_func(*args, **kwargs)

def where(self, x=None, y=None):
x_specified = x is not None
y_specified = y is not None
if x_specified != y_specified:
raise ValueError("either both or neither of x and y should be given")
if not is_bool_dtype(self.dtype):
raise NotImplementedError(
"Modin currently only supports where on condition arrays with boolean dtype."
Expand Down Expand Up @@ -2600,3 +2623,9 @@ def _to_numpy(self):
if self._ndim == 1:
arr = arr.flatten()
return arr

def __array__(self, dtype=None):
arr = self._to_numpy()
if dtype is not None:
return arr.astype(dtype)
return arr
4 changes: 3 additions & 1 deletion modin/numpy/test/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,9 @@ def test_array_where():
):
warnings.filterwarnings("ignore", message="Distributing")
(modin_flat_arr <= 0).where()
with pytest.raises(ValueError, match="np.where requires x and y"):
with pytest.raises(
ValueError, match="either both or neither of x and y should be given"
):
(modin_flat_arr <= 0).where(x=["Should Fail."])
with pytest.warns(UserWarning, match="np.where not supported when both x and y"):
warnings.filterwarnings("ignore", message="Distributing")
Expand Down
78 changes: 77 additions & 1 deletion modin/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
from modin.error_message import ErrorMessage
from modin import pandas as pd
from modin.pandas.utils import is_scalar
from modin.config import IsExperimental
from modin.config import IsExperimental, ExperimentalNumPyAPI
from modin.logging import disable_logging, ClassLogger

# Similar to pandas, sentinel value to use as kwarg in place of None when None has
Expand Down Expand Up @@ -3426,6 +3426,80 @@ def __and__(self, other):
def __rand__(self, other):
return self._binary_op("__rand__", other, axis=0)

def __array_function__(self, func, types, args, kwargs):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

both array_ufunc and array_function will not work if the user passes in a Modin NumPy Array for out if we end up defaulting to NumPy. In the Modin NumPy API, I've fixed it so that we do copy in the values to a Modin NumPy Array, but I'm not sure what the solution should be for this, and so have defaulted to having it be unsupported.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see where this "unsupported" is placed in code. I think it should be an exception or something similar, shouldn't it?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bump

"""
Return the result of calling an array function on self.

Parameters
----------
func : Callable
The function to call.
types : list[types]
Types of arguments.
args : list
Arguments to pass to function.
kwargs : dict
Key word arguments to pass to function.

Returns
-------
arr : np.ndarray or modin.numpy.array
The result of calling the array function on self.
"""
out = self.to_numpy().__array_function__(func, types, args, kwargs)
if out is NotImplemented:
func_name = func.__name__
arr = self.__array__()
if ExperimentalNumPyAPI.get():
ErrorMessage.warn(
f"Attempted to use Experimental NumPy API for function {func_name} but failed. Defaulting to NumPy."
)
converted_args = []
for input in args:
if hasattr(input, "_query_compiler"):
input = input.__array__()
converted_args += [input]
where_kwarg = kwargs.get("where")
if where_kwarg is not None:
if hasattr(where_kwarg, "_query_compiler"):
kwargs["where"] = where_kwarg.__array__()
return func(arr, *converted_args[1:], **kwargs)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this seems to be losing converteg_args[0] (which was args[0]), feels weird

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can add a comment - but basically args[0] is self. Since we’re calling the function directly instead of calling the special method again, I’m passing in the converted self as the first argument, and only need to pass in any remaining args (after making sure to convert them)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a comment would be nice; also, if we're dropping converted_args[0], then we should probably be doing so in the upper loop and skip the call of args[0].__array__() altogether

return out

def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
"""
Return the result of calling ufunc `ufunc`.

Parameters
----------
ufunc : Callable
The ufunc that was called.
method : str
Which method of the ufunc was called.
*inputs : tuple
Tuple of inputs passed to the ufunc.
**kwargs : dict
Keyword arguments passed to the ufunc.

Returns
-------
arr : np.ndarray or modin.numpy.array
The result of calling the array function on self.
"""
if ExperimentalNumPyAPI.get():
return self.to_numpy().__array_ufunc__(ufunc, method, *inputs, **kwargs)
else:
# If we are not using our Experimental NumPy API, we need to convert
# all of the inputs to the ufunc to compatible types with NumPy - otherwise
# NumPy will not be able to find valid implementations for the ufunc.
arr = self.to_numpy()
args = []
for input in inputs:
if hasattr(input, "_query_compiler"):
input = input.__array__()
args += [input]
return arr.__array_ufunc__(ufunc, method, *args, **kwargs)

def __array__(self, dtype=None):
"""
Return the values as a NumPy array.
Expand All @@ -3441,6 +3515,8 @@ def __array__(self, dtype=None):
NumPy representation of Modin object.
"""
arr = self.to_numpy(dtype)
if ExperimentalNumPyAPI.get():
arr = arr._to_numpy()
return arr

def __copy__(self, deep=True):
Expand Down