-
Notifications
You must be signed in to change notification settings - Fork 655
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
FIX-#6287: Fix handling of numpy array_function #6288
base: master
Are you sure you want to change the base?
Changes from all commits
fd1c7c0
3baae48
3a72779
f7527f3
22147e9
66b36c7
5de028b
4f18ffe
074a52b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -397,10 +397,29 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): | |||
if isinstance(input, pd.Series): | ||||
input = input._query_compiler.to_numpy().flatten() | ||||
args += [input] | ||||
out_kwarg = kwargs.get("out", None) | ||||
if out_kwarg is not None: | ||||
# If `out` is a modin.numpy.array, `kwargs.get("out")` returns a 1-tuple | ||||
# whose only element is that array, so we need to unwrap it from the tuple. | ||||
out_kwarg = out_kwarg[0] | ||||
kwargs.pop("out", None) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should merge the out_kwarg = kwargs.pop("out", None) I see no point of keeping them separated There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
where_kwarg = kwargs.get("where", None) | ||||
if where_kwarg is not None: | ||||
if isinstance(where_kwarg, type(self)): | ||||
kwargs["where"] = where_kwarg._to_numpy() | ||||
output = self._to_numpy().__array_ufunc__(ufunc, method, *args, **kwargs) | ||||
if is_scalar(output): | ||||
return output | ||||
return array(output) | ||||
if out_kwarg is None: | ||||
return array(output) | ||||
else: | ||||
return fix_dtypes_and_determine_return( | ||||
array(output)._query_compiler, | ||||
len(output.shape), | ||||
dtype=kwargs.get("dtype", None), | ||||
out=out_kwarg, | ||||
where=True, | ||||
) | ||||
args = [] | ||||
for input in inputs: | ||||
input = try_convert_from_interoperable_type(input) | ||||
|
@@ -414,16 +433,14 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): | |||
# If `out` is a modin.numpy.array, `kwargs.get("out")` returns a 1-tuple | ||||
# whose only element is that array, so we need to unwrap it from the tuple. | ||||
out_kwarg = out_kwarg[0] | ||||
where_kwarg = kwargs.get("where", True) | ||||
kwargs["out"] = None | ||||
kwargs["where"] = True | ||||
result = new_ufunc(*args, **kwargs) | ||||
return fix_dtypes_and_determine_return( | ||||
result, | ||||
out_ndim, | ||||
dtype=kwargs.get("dtype", None), | ||||
out=out_kwarg, | ||||
where=where_kwarg, | ||||
where=True, | ||||
) | ||||
|
||||
def __array_function__(self, func, types, args, kwargs): | ||||
|
@@ -437,11 +454,17 @@ def __array_function__(self, func, types, args, kwargs): | |||
modin_func = getattr(shaping, func_name) | ||||
elif hasattr(creation, func_name): | ||||
modin_func = getattr(creation, func_name) | ||||
if func_name == "where": | ||||
return self.where(*args[1:]) | ||||
if modin_func is None: | ||||
return NotImplemented | ||||
return modin_func(*args, **kwargs) | ||||
|
||||
def where(self, x=None, y=None): | ||||
x_specified = x is not None | ||||
y_specified = y is not None | ||||
if x_specified != y_specified: | ||||
raise ValueError("either both or neither of x and y should be given") | ||||
if not is_bool_dtype(self.dtype): | ||||
raise NotImplementedError( | ||||
"Modin currently only supports where on condition arrays with boolean dtype." | ||||
|
@@ -2600,3 +2623,9 @@ def _to_numpy(self): | |||
if self._ndim == 1: | ||||
arr = arr.flatten() | ||||
return arr | ||||
|
||||
def __array__(self, dtype=None): | ||||
arr = self._to_numpy() | ||||
if dtype is not None: | ||||
return arr.astype(dtype) | ||||
return arr |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,7 +61,7 @@ | |
from modin.error_message import ErrorMessage | ||
from modin import pandas as pd | ||
from modin.pandas.utils import is_scalar | ||
from modin.config import IsExperimental | ||
from modin.config import IsExperimental, ExperimentalNumPyAPI | ||
from modin.logging import disable_logging, ClassLogger | ||
|
||
# Similar to pandas, sentinel value to use as kwarg in place of None when None has | ||
|
@@ -3426,6 +3426,80 @@ def __and__(self, other): | |
def __rand__(self, other): | ||
return self._binary_op("__rand__", other, axis=0) | ||
|
||
def __array_function__(self, func, types, args, kwargs): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. both array_ufunc and array_function will not work if the user passes in a Modin NumPy Array for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't see where this "unsupported" is placed in code. I think it should be an exception or something similar, shouldn't it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. bump |
||
""" | ||
Return the result of calling an array function on self. | ||
|
||
Parameters | ||
---------- | ||
func : Callable | ||
The function to call. | ||
types : list[types] | ||
Types of arguments. | ||
args : list | ||
Arguments to pass to function. | ||
kwargs : dict | ||
Key word arguments to pass to function. | ||
|
||
Returns | ||
------- | ||
arr : np.ndarray or modin.numpy.array | ||
The result of calling the array function on self. | ||
""" | ||
out = self.to_numpy().__array_function__(func, types, args, kwargs) | ||
if out is NotImplemented: | ||
func_name = func.__name__ | ||
arr = self.__array__() | ||
if ExperimentalNumPyAPI.get(): | ||
ErrorMessage.warn( | ||
f"Attempted to use Experimental NumPy API for function {func_name} but failed. Defaulting to NumPy." | ||
) | ||
converted_args = [] | ||
for input in args: | ||
if hasattr(input, "_query_compiler"): | ||
input = input.__array__() | ||
converted_args += [input] | ||
where_kwarg = kwargs.get("where") | ||
if where_kwarg is not None: | ||
if hasattr(where_kwarg, "_query_compiler"): | ||
kwargs["where"] = where_kwarg.__array__() | ||
return func(arr, *converted_args[1:], **kwargs) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this seems to be losing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can add a comment - but basically args[0] is self. Since we’re calling the function directly instead of calling the special method again, I’m passing in the converted self as the first argument, and only need to pass in any remaining args (after making sure to convert them) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. a comment would be nice; also, if we're dropping |
||
return out | ||
|
||
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): | ||
""" | ||
Return the result of calling ufunc `ufunc`. | ||
|
||
Parameters | ||
---------- | ||
ufunc : Callable | ||
The ufunc that was called. | ||
method : str | ||
Which method of the ufunc was called. | ||
*inputs : tuple | ||
Tuple of inputs passed to the ufunc. | ||
**kwargs : dict | ||
Keyword arguments passed to the ufunc. | ||
|
||
Returns | ||
------- | ||
arr : np.ndarray or modin.numpy.array | ||
The result of calling the array function on self. | ||
""" | ||
if ExperimentalNumPyAPI.get(): | ||
return self.to_numpy().__array_ufunc__(ufunc, method, *inputs, **kwargs) | ||
else: | ||
# If we are not using our Experimental NumPy API, we need to convert | ||
# all of the inputs to the ufunc to compatible types with NumPy - otherwise | ||
# NumPy will not be able to find valid implementations for the ufunc. | ||
arr = self.to_numpy() | ||
args = [] | ||
for input in inputs: | ||
if hasattr(input, "_query_compiler"): | ||
input = input.__array__() | ||
args += [input] | ||
return arr.__array_ufunc__(ufunc, method, *args, **kwargs) | ||
|
||
def __array__(self, dtype=None): | ||
""" | ||
Return the values as a NumPy array. | ||
|
@@ -3441,6 +3515,8 @@ def __array__(self, dtype=None): | |
NumPy representation of Modin object. | ||
""" | ||
arr = self.to_numpy(dtype) | ||
if ExperimentalNumPyAPI.get(): | ||
arr = arr._to_numpy() | ||
return arr | ||
|
||
def __copy__(self, deep=True): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.