Skip to content

Commit bb4c7b4

Browse files
FIX: handle dask ValueErrors in apply_ufunc (set allow_rechunk=True) (#4392)
* FIX: catch dask chunk mismatch ValueErrors, warn and set ``allow_rechunk=True`` * FIX: raise ValueError for the core dimension case * add pull request reference to whats-new.rst (internal changes) * add test for FutureWarning * WIP allow_rechunk * fix allow_rechunk * use new is_duck_dask_array()-check * use get instead pop * Small change to error message. Co-authored-by: dcherian <[email protected]>
1 parent 572a528 commit bb4c7b4

File tree

3 files changed

+23
-4
lines changed

3 files changed

+23
-4
lines changed

doc/whats-new.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ Documentation
9898
Internal Changes
9999
~~~~~~~~~~~~~~~~
100100
- Use :py:func:`dask.array.apply_gufunc` instead of :py:func:`dask.array.blockwise` in
101-
:py:func:`xarray.apply_ufunc` when using ``dask='parallelized'``. (:pull:`4060`, :pull:`4391`)
101+
:py:func:`xarray.apply_ufunc` when using ``dask='parallelized'``. (:pull:`4060`, :pull:`4391`, :pull:`4392`)
102102
- Fix ``pip install .`` when no ``.git`` directory exists; namely when the xarray source
103103
directory has been rsync'ed by PyCharm Professional for a remote deployment over SSH.
104104
By `Guido Imperiale <https://github.com/crusaderky>`_

xarray/core/computation.py

+20
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,26 @@ def apply_variable_ufunc(
625625
if dask_gufunc_kwargs is None:
626626
dask_gufunc_kwargs = {}
627627

628+
allow_rechunk = dask_gufunc_kwargs.get("allow_rechunk", None)
629+
if allow_rechunk is None:
630+
for n, (data, core_dims) in enumerate(
631+
zip(input_data, signature.input_core_dims)
632+
):
633+
if is_duck_dask_array(data):
634+
# core dimensions cannot span multiple chunks
635+
for axis, dim in enumerate(core_dims, start=-len(core_dims)):
636+
if len(data.chunks[axis]) != 1:
637+
raise ValueError(
638+
f"dimension {dim} on {n}th function argument to "
639+
"apply_ufunc with dask='parallelized' consists of "
640+
"multiple chunks, but is also a core dimension. To "
641+
"fix, either rechunk into a single dask array chunk along "
642+
f"this dimension, i.e., ``.chunk({dim}: -1)``, or "
643+
"pass ``allow_rechunk=True`` in ``dask_gufunc_kwargs`` "
644+
"but beware that this may significantly increase memory usage."
645+
)
646+
dask_gufunc_kwargs["allow_rechunk"] = True
647+
628648
output_sizes = dask_gufunc_kwargs.pop("output_sizes", {})
629649
if output_sizes:
630650
output_sizes_renamed = {}

xarray/tests/test_computation.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -695,8 +695,7 @@ def check(x, y):
695695
check(data_array, 0 * data_array)
696696
check(data_array, 0 * data_array[0])
697697
check(data_array[:, 0], 0 * data_array[0])
698-
with raises_regex(ValueError, "with different chunksize present"):
699-
check(data_array, 0 * data_array.compute())
698+
check(data_array, 0 * data_array.compute())
700699

701700

702701
@requires_dask
@@ -710,7 +709,7 @@ def test_apply_dask_parallelized_errors():
710709
with raises_regex(ValueError, "at least one input is an xarray object"):
711710
apply_ufunc(identity, array, dask="parallelized")
712711

713-
# formerly from _apply_blockwise, now from dask.array.apply_gufunc
712+
# formerly from _apply_blockwise, now from apply_variable_ufunc
714713
with raises_regex(ValueError, "consists of multiple chunks"):
715714
apply_ufunc(
716715
identity,

0 commit comments

Comments
 (0)