diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml index f1b6d46fd95..f61aab69e0f 100644 --- a/ci/requirements/py37.yml +++ b/ci/requirements/py37.yml @@ -21,6 +21,7 @@ dependencies: - pip - scipy - seaborn + - sparse - toolz - rasterio - boto3 diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 0e28613323e..ba6477f34cc 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -260,8 +260,10 @@ def __init__( else: # try to fill in arguments from data if they weren't supplied if coords is None: - coords = getattr(data, 'coords', None) - if isinstance(data, pd.Series): + + if isinstance(data, DataArray): + coords = data.coords + elif isinstance(data, pd.Series): coords = [data.index] elif isinstance(data, pd.DataFrame): coords = [data.index, data.columns] @@ -269,6 +271,7 @@ def __init__( coords = [data] elif isinstance(data, pdcompat.Panel): coords = [data.items, data.major_axis, data.minor_axis] + if dims is None: dims = getattr(data, 'dims', getattr(coords, 'dims', None)) if name is None: diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index ac204df568f..f78ecb969a1 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -13,7 +13,7 @@ from . import dask_array_ops, dtypes, npcompat, nputils from .nputils import nanfirst, nanlast -from .pycompat import dask_array_type +from .pycompat import dask_array_type, sparse_array_type try: import dask.array as dask_array @@ -64,6 +64,7 @@ def fail_on_dask_array_input(values, msg=None, func_name=None): around = _dask_or_eager_func('around') isclose = _dask_or_eager_func('isclose') + if hasattr(np, 'isnat') and ( dask_array is None or hasattr(dask_array_type, '__array_ufunc__')): # np.isnat is available since NumPy 1.13, so __array_ufunc__ is always @@ -153,7 +154,11 @@ def trapz(y, x, axis): def asarray(data): - return data if isinstance(data, dask_array_type) else np.asarray(data) + return ( + data if (isinstance(data, dask_array_type) + or hasattr(data, '__array_function__')) + else np.asarray(data) + ) def as_shared_dtype(scalars_or_arrays): @@ -170,6 +175,9 @@ def as_shared_dtype(scalars_or_arrays): def as_like_arrays(*data): if all(isinstance(d, dask_array_type) for d in data): return data + elif any(isinstance(d, sparse_array_type) for d in data): + from sparse import COO + return tuple(COO(d) for d in data) else: return tuple(np.asarray(d) for d in data) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 9f39acde90b..00c813ece09 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -357,7 +357,10 @@ def set_numpy_options(*args, **kwargs): def short_array_repr(array): - array = np.asarray(array) + + if not hasattr(array, '__array_function__'): + array = np.asarray(array) + # default to lower precision so a full (abbreviated) line can fit on # one line with the default display_width options = { @@ -394,7 +397,7 @@ def short_data_repr(array): if isinstance(getattr(array, 'variable', array)._data, dask_array_type): return short_dask_repr(array) elif array._in_memory or array.size < 1e5: - return short_array_repr(array.values) + return short_array_repr(array.data) else: return u'[{} values with dtype={}]'.format(array.size, array.dtype) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 14f62c533da..aea5a5a3f4f 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -657,6 +657,9 @@ def as_indexable(array): return PandasIndexAdapter(array) if isinstance(array, dask_array_type): return DaskIndexingAdapter(array) + if hasattr(array, '__array_function__'): + return NdArrayLikeIndexingAdapter(array) + raise TypeError('Invalid array type: {}'.format(type(array))) @@ -1189,6 +1192,16 @@ def __setitem__(self, key, value): raise +class NdArrayLikeIndexingAdapter(NumpyIndexingAdapter): + def __init__(self, array): + if not hasattr(array, '__array_function__'): + raise TypeError( + 'NdArrayLikeIndexingAdapter must wrap an object that ' + 'implements the __array_function__ protocol' + ) + self.array = array + + class DaskIndexingAdapter(ExplicitlyIndexedNDArrayMixin): """Wrap a dask array to support explicit indexing.""" diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py index 856cfc4fe79..afef9a5e083 100644 --- a/xarray/core/npcompat.py +++ b/xarray/core/npcompat.py @@ -357,3 +357,18 @@ def moveaxis(a, source, destination): # https://github.com/numpy/numpy/issues/7370 # https://github.com/numpy/numpy-stubs/ DTypeLike = Union[np.dtype, str] + + +# from dask/array/utils.py +def _is_nep18_active(): + class A: + def __array_function__(self, *args, **kwargs): + return True + + try: + return np.concatenate([A()]) + except ValueError: + return False + + +IS_NEP18_ACTIVE = _is_nep18_active() diff --git a/xarray/core/pycompat.py b/xarray/core/pycompat.py index 16c5325565c..259f44f2862 100644 --- a/xarray/core/pycompat.py +++ b/xarray/core/pycompat.py @@ -8,3 +8,10 @@ dask_array_type = (dask.array.Array,) except ImportError: # pragma: no cover dask_array_type = () + +try: + # solely for isinstance checks + import sparse + sparse_array_type = (sparse.SparseArray,) +except ImportError: # pragma: no cover + sparse_array_type = () diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 60e0fe1e7d7..b3e19aebcbf 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -243,7 +243,9 @@ def is_scalar(value: Any) -> bool: return ( getattr(value, 'ndim', None) == 0 or isinstance(value, (str, bytes)) or not - isinstance(value, (Iterable, ) + dask_array_type)) + (isinstance(value, (Iterable, ) + dask_array_type) or + hasattr(value, '__array_function__')) + ) def is_valid_numpy_dtype(dtype: Any) -> bool: diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 5c6a3ad0f30..3c9d85f13d7 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -17,6 +17,7 @@ as_indexable) from .options import _get_keep_attrs from .pycompat import dask_array_type, integer_types +from .npcompat import IS_NEP18_ACTIVE from .utils import ( OrderedSet, decode_numpy_dict_values, either_dict_or_kwargs, ensure_us_time_resolution) @@ -179,6 +180,18 @@ def as_compatible_data(data, fastpath=False): else: data = np.asarray(data) + if not isinstance(data, np.ndarray): + if hasattr(data, '__array_function__'): + if IS_NEP18_ACTIVE: + return data + else: + raise TypeError( + 'Got an NumPy-like array type providing the ' + '__array_function__ protocol but NEP18 is not enabled. ' + 'Check that numpy >= v1.16 and that the environment ' + 'variable "NUMPY_EXPERIMENTAL_ARRAY_FUNCTION" is set to ' + '"1"') + # validate whether the data is valid data types data = np.asarray(data) @@ -288,7 +301,7 @@ def _in_memory(self): @property def data(self): - if isinstance(self._data, dask_array_type): + if hasattr(self._data, '__array_function__'): return self._data else: return self.values @@ -320,7 +333,7 @@ def load(self, **kwargs): """ if isinstance(self._data, dask_array_type): self._data = as_compatible_data(self._data.compute(**kwargs)) - elif not isinstance(self._data, np.ndarray): + elif not hasattr(self._data, '__array_function__'): self._data = np.asarray(self._data) return self @@ -705,8 +718,8 @@ def __setitem__(self, key, value): if new_order: value = duck_array_ops.asarray(value) - value = value[(len(dims) - value.ndim) * (np.newaxis,) + - (Ellipsis,)] + value = value[(len(dims) - value.ndim) * (np.newaxis,) + + (Ellipsis,)] value = duck_array_ops.moveaxis( value, new_order, range(len(new_order))) @@ -805,7 +818,8 @@ def copy(self, deep=True, data=None): data = indexing.MemoryCachedArray(data.array) if deep: - if isinstance(data, dask_array_type): + if (hasattr(data, '__array_function__') + or isinstance(data, dask_array_type)): data = data.copy() elif not isinstance(data, PandasIndexAdapter): # pandas.Index is immutable @@ -1494,9 +1508,10 @@ def equals(self, other, equiv=duck_array_ops.array_equiv): """ other = getattr(other, 'variable', other) try: - return (self.dims == other.dims and - (self._data is other._data or - equiv(self.data, other.data))) + return ( + self.dims == other.dims and + (self._data is other._data or equiv(self.data, other.data)) + ) except (TypeError, AttributeError): return False @@ -1517,8 +1532,8 @@ def identical(self, other): """Like equals, but also checks attributes. """ try: - return (utils.dict_equiv(self.attrs, other.attrs) and - self.equals(other)) + return (utils.dict_equiv(self.attrs, other.attrs) + and self.equals(other)) except (TypeError, AttributeError): return False @@ -1959,8 +1974,8 @@ def equals(self, other, equiv=None): # otherwise use the native index equals, rather than looking at _data other = getattr(other, 'variable', other) try: - return (self.dims == other.dims and - self._data_equals(other)) + return (self.dims == other.dims + and self._data_equals(other)) except (TypeError, AttributeError): return False diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py new file mode 100644 index 00000000000..3aa407f72bc --- /dev/null +++ b/xarray/tests/test_sparse.py @@ -0,0 +1,689 @@ +from collections import OrderedDict +from contextlib import suppress +from distutils.version import LooseVersion +from textwrap import dedent +import pickle +import numpy as np +import pandas as pd + +from xarray import DataArray, Dataset, Variable +from xarray.tests import mock +from xarray.core.npcompat import IS_NEP18_ACTIVE +import xarray as xr +import xarray.ufuncs as xu + +from . import ( + assert_allclose, assert_array_equal, assert_equal, assert_frame_equal, + assert_identical, raises_regex) + +import pytest + +param = pytest.param +xfail = pytest.mark.xfail + +if not IS_NEP18_ACTIVE: + pytest.skip("NUMPY_EXPERIMENTAL_ARRAY_FUNCTION is not enabled", + allow_module_level=True) + +sparse = pytest.importorskip('sparse') +from sparse.utils import assert_eq as assert_sparse_eq # noqa +from sparse import COO, SparseArray # noqa + + +def make_ndarray(shape): + return np.arange(np.prod(shape)).reshape(shape) + + +def make_sparray(shape): + return sparse.random(shape, density=0.1, random_state=0) + + +def make_xrvar(dim_lengths): + return xr.Variable( + tuple(dim_lengths.keys()), + make_sparray(shape=tuple(dim_lengths.values()))) + + +def make_xrarray(dim_lengths, coords=None, name='test'): + if coords is None: + coords = {d: np.arange(n) for d, n in dim_lengths.items()} + return xr.DataArray( + make_sparray(shape=tuple(dim_lengths.values())), + dims=tuple(coords.keys()), + coords=coords, + name=name) + + +class do: + def __init__(self, meth, *args, **kwargs): + self.meth = meth + self.args = args + self.kwargs = kwargs + + def __call__(self, obj): + return getattr(obj, self.meth)(*self.args, **self.kwargs) + + def __repr__(self): + return 'obj.{}(*{}, **{})'.format(self.meth, self.args, self.kwargs) + + +@pytest.mark.parametrize("prop", [ + 'chunks', + 'data', + 'dims', + 'dtype', + 'encoding', + 'imag', + 'nbytes', + 'ndim', + param('values', marks=xfail(reason='Coercion to dense')) +]) +def test_variable_property(prop): + var = make_xrvar({'x': 10, 'y': 5}) + getattr(var, prop) + + +@pytest.mark.parametrize("func,sparse_output", [ + (do('all'), False), + (do('any'), False), + (do('astype', dtype=int), True), + (do('broadcast_equals', make_xrvar({'x': 10, 'y': 5})), False), + (do('clip', min=0, max=1), True), + (do('coarsen', windows={'x': 2}, func=np.sum), True), + (do('compute'), True), + (do('conj'), True), + (do('copy'), True), + (do('count'), False), + (do('equals', make_xrvar({'x': 10, 'y': 5})), False), + (do('get_axis_num', dim='x'), False), + (do('identical', other=make_xrvar({'x': 10, 'y': 5})), False), + (do('isel', x=slice(2, 4)), True), + (do('isnull'), True), + (do('load'), True), + (do('mean'), False), + (do('notnull'), True), + (do('roll'), True), + (do('round'), True), + (do('set_dims', dims=('x', 'y', 'z')), True), + (do('stack', dimensions={'flat': ('x', 'y')}), True), + (do('to_base_variable'), True), + (do('transpose'), True), + (do('unstack', dimensions={'x': {'x1': 5, 'x2': 2}}), True), + + param(do('argmax'), True, + marks=xfail(reason='Missing implementation for np.argmin')), + param(do('argmin'), True, + marks=xfail(reason='Missing implementation for np.argmax')), + param(do('argsort'), True, + marks=xfail(reason="'COO' object has no attribute 'argsort'")), + param(do('chunk', chunks=(5, 5)), True, + marks=xfail), + param(do('concat', variables=[make_xrvar({'x': 10, 'y': 5}), + make_xrvar({'x': 10, 'y': 5})]), True, + marks=xfail(reason='Coercion to dense')), + param(do('conjugate'), True, + marks=xfail(reason="'COO' object has no attribute 'conjugate'")), + param(do('cumprod'), True, + marks=xfail(reason='Missing implementation for np.nancumprod')), + param(do('cumsum'), True, + marks=xfail(reason='Missing implementation for np.nancumsum')), + param(do('fillna', 0), True, + marks=xfail(reason='Missing implementation for np.result_type')), + param(do('item', (1, 1)), False, + marks=xfail(reason="'COO' object has no attribute 'item'")), + param(do('max'), False, + marks=xfail(reason='Coercion to dense via bottleneck')), + param(do('median'), False, + marks=xfail(reason='Coercion to dense via bottleneck')), + param(do('min'), False, + marks=xfail(reason='Coercion to dense via bottleneck')), + param(do('no_conflicts', other=make_xrvar({'x': 10, 'y': 5})), True, + marks=xfail(reason='mixed sparse-dense operation')), + param(do('pad_with_fill_value', pad_widths={'x': (1, 1)}, fill_value=5), True, # noqa + marks=xfail(reason='Missing implementation for np.pad')), + param(do('prod'), False, + marks=xfail(reason='Missing implementation for np.result_type')), + param(do('quantile', q=0.5), True, + marks=xfail(reason='Missing implementation for np.nanpercentile')), + param(do('rank', dim='x'), False, + marks=xfail(reason='Coercion to dense via bottleneck')), + param(do('reduce', func=np.sum, dim='x'), True, + marks=xfail(reason='Coercion to dense')), + param(do('rolling_window', dim='x', window=2, window_dim='x_win'), True, + marks=xfail(reason='Missing implementation for np.pad')), + param(do('shift', x=2), True, + marks=xfail(reason='mixed sparse-dense operation')), + param(do('std'), False, + marks=xfail(reason='Coercion to dense via bottleneck')), + param(do('sum'), False, + marks=xfail(reason='Missing implementation for np.result_type')), + param(do('var'), False, + marks=xfail(reason='Coercion to dense via bottleneck')), + param(do('to_dict'), False, + marks=xfail(reason='Coercion to dense')), + param(do('where', cond=make_xrvar({'x': 10, 'y': 5}) > 0.5), True, + marks=xfail(reason='Coercion of dense to sparse when using sparse mask')), # noqa +], +ids=repr) +def test_variable_method(func, sparse_output): + var_s = make_xrvar({'x': 10, 'y': 5}) + var_d = xr.Variable(var_s.dims, var_s.data.todense()) + ret_s = func(var_s) + ret_d = func(var_d) + + if sparse_output: + assert isinstance(ret_s.data, SparseArray) + assert np.allclose(ret_s.data.todense(), ret_d.data, equal_nan=True) + else: + assert np.allclose(ret_s, ret_d, equal_nan=True) + + +@pytest.mark.parametrize("func,sparse_output", [ + (do('squeeze'), True), + + param(do('to_index'), False, + marks=xfail(reason='Coercion to dense')), + param(do('to_index_variable'), False, + marks=xfail(reason='Coercion to dense')), + param(do('searchsorted', 0.5), True, + marks=xfail(reason="'COO' object has no attribute 'searchsorted'")), +]) +def test_1d_variable_method(func, sparse_output): + var_s = make_xrvar({'x': 10}) + var_d = xr.Variable(var_s.dims, var_s.data.todense()) + ret_s = func(var_s) + ret_d = func(var_d) + + if sparse_output: + assert isinstance(ret_s.data, SparseArray) + assert np.allclose(ret_s.data.todense(), ret_d.data) + else: + assert np.allclose(ret_s, ret_d) + + +class TestSparseVariable: + @pytest.fixture(autouse=True) + def setUp(self): + self.data = sparse.random((4, 6), random_state=0, density=0.5) + self.var = xr.Variable(('x', 'y'), self.data) + + def test_unary_op(self): + assert_sparse_eq(-self.var.data, -self.data) + assert_sparse_eq(abs(self.var).data, abs(self.data)) + assert_sparse_eq(self.var.round().data, self.data.round()) + + def test_univariate_ufunc(self): + assert_sparse_eq(np.sin(self.data), xu.sin(self.var).data) + + def test_bivariate_ufunc(self): + assert_sparse_eq(np.maximum(self.data, 0), + xu.maximum(self.var, 0).data) + assert_sparse_eq(np.maximum(self.data, 0), + xu.maximum(0, self.var).data) + + def test_repr(self): + expected = dedent("""\ + + """) + assert expected == repr(self.var) + + def test_pickle(self): + v1 = self.var + v2 = pickle.loads(pickle.dumps(v1)) + assert_sparse_eq(v1.data, v2.data) + + @pytest.mark.xfail(reason="Missing implementation for np.result_type") + def test_missing_values(self): + a = np.array([0, 1, np.nan, 3]) + s = COO.from_numpy(a) + var_s = Variable('x', s) + assert np.all(var_s.fillna(2).data.todense() == np.arange(4)) + assert np.all(var_s.count() == 3) + + +@pytest.mark.parametrize("prop", [ + 'attrs', + 'chunks', + 'coords', + 'data', + 'dims', + 'dtype', + 'encoding', + 'imag', + 'indexes', + 'loc', + 'name', + 'nbytes', + 'ndim', + 'plot', + 'real', + 'shape', + 'size', + 'sizes', + 'str', + 'variable', +]) +def test_dataarray_property(prop): + arr = make_xrarray({'x': 10, 'y': 5}) + getattr(arr, prop) + + +@pytest.mark.parametrize("func,sparse_output", [ + (do('all'), False), + (do('any'), False), + (do('assign_attrs', {'foo': 'bar'}), True), + (do('assign_coords', x=make_xrarray({'x': 10}).x + 1), True), + (do('astype', int), True), + (do('broadcast_equals', make_xrarray({'x': 10, 'y': 5})), False), + (do('clip', min=0, max=1), True), + (do('compute'), True), + (do('conj'), True), + (do('copy'), True), + (do('count'), False), + (do('diff', 'x'), True), + (do('drop', 'x'), True), + (do('equals', make_xrarray({'x': 10, 'y': 5})), False), + (do('expand_dims', {'z': 2}, axis=2), True), + (do('get_axis_num', 'x'), False), + (do('get_index', 'x'), False), + (do('identical', make_xrarray({'x': 5, 'y': 5})), False), + (do('integrate', 'x'), True), + (do('isel', {'x': slice(0, 3), 'y': slice(2, 4)}), True), + (do('isnull'), True), + (do('load'), True), + (do('mean'), False), + (do('persist'), True), + (do('reindex', {'x': [1, 2, 3]}), True), + (do('rename', 'foo'), True), + (do('reorder_levels'), True), + (do('reset_coords', drop=True), True), + (do('reset_index', 'x'), True), + (do('round'), True), + (do('sel', x=[0, 1, 2]), True), + (do('shift'), True), + (do('sortby', 'x', ascending=False), True), + (do('stack', z={'x', 'y'}), True), + (do('transpose'), True), + + # TODO + # isel_points + # sel_points + # set_index + # swap_dims + + param(do('argmax'), True, + marks=xfail(reason='Missing implementation for np.argmax')), + param(do('argmin'), True, + marks=xfail(reason='Missing implementation for np.argmin')), + param(do('argsort'), True, + marks=xfail(reason="'COO' object has no attribute 'argsort'")), + param(do('bfill', dim='x'), False, + marks=xfail(reason='Missing implementation for np.flip')), + param(do('chunk', chunks=(5, 5)), False, + marks=xfail(reason='Coercion to dense')), + param(do('combine_first', make_xrarray({'x': 10, 'y': 5})), True, + marks=xfail(reason='mixed sparse-dense operation')), + param(do('conjugate'), False, + marks=xfail(reason="'COO' object has no attribute 'conjugate'")), + param(do('cumprod'), True, + marks=xfail(reason='Missing implementation for np.nancumprod')), + param(do('cumsum'), True, + marks=xfail(reason='Missing implementation for np.nancumsum')), + param(do('differentiate', 'x'), False, + marks=xfail(reason='Missing implementation for np.gradient')), + param(do('dot', make_xrarray({'x': 10, 'y': 5})), True, + marks=xfail(reason='Missing implementation for np.einsum')), + param(do('dropna', 'x'), False, + marks=xfail(reason='Coercion to dense')), + param(do('ffill', 'x'), False, + marks=xfail(reason='Coercion to dense via bottleneck.push')), + param(do('fillna', 0), True, + marks=xfail(reason='Missing implementation for np.result_type')), + param(do('interp', coords={'x': np.arange(10) + 0.5}), True, + marks=xfail(reason='Coercion to dense')), + param(do('interp_like', + make_xrarray({'x': 10, 'y': 5}, + coords={'x': np.arange(10) + 0.5, + 'y': np.arange(5) + 0.5})), True, + marks=xfail(reason='Indexing COO with more than one iterable index')), # noqa + param(do('interpolate_na', 'x'), True, + marks=xfail(reason='Coercion to dense')), + param(do('isin', [1, 2, 3]), False, + marks=xfail(reason='Missing implementation for np.isin')), + param(do('item', (1, 1)), False, + marks=xfail(reason="'COO' object has no attribute 'item'")), + param(do('max'), False, + marks=xfail(reason='Coercion to dense via bottleneck')), + param(do('median'), False, + marks=xfail(reason='Coercion to dense via bottleneck')), + param(do('min'), False, + marks=xfail(reason='Coercion to dense via bottleneck')), + param(do('notnull'), False, + marks=xfail(reason="'COO' object has no attribute 'notnull'")), + param(do('pipe', np.sum, axis=1), True, + marks=xfail(reason='Missing implementation for np.result_type')), + param(do('prod'), False, + marks=xfail(reason='Missing implementation for np.result_type')), + param(do('quantile', q=0.5), False, + marks=xfail(reason='Missing implementation for np.nanpercentile')), + param(do('rank', 'x'), False, + marks=xfail(reason='Coercion to dense via bottleneck')), + param(do('reduce', np.sum, dim='x'), False, + marks=xfail(reason='Coercion to dense')), + param(do('reindex_like', + make_xrarray({'x': 10, 'y': 5}, + coords={'x': np.arange(10) + 0.5, + 'y': np.arange(5) + 0.5})), + True, + marks=xfail(reason='Indexing COO with more than one iterable index')), # noqa + param(do('roll', x=2), True, + marks=xfail(reason='Missing implementation for np.result_type')), + param(do('sel', x=[0, 1, 2], y=[2, 3]), True, + marks=xfail(reason='Indexing COO with more than one iterable index')), # noqa + param(do('std'), False, + marks=xfail(reason='Coercion to dense via bottleneck')), + param(do('sum'), False, + marks=xfail(reason='Missing implementation for np.result_type')), + param(do('var'), False, + marks=xfail(reason='Coercion to dense via bottleneck')), + param(do('where', make_xrarray({'x': 10, 'y': 5}) > 0.5), False, + marks=xfail(reason='Conversion of dense to sparse when using sparse mask')), # noqa +], +ids=repr) +def test_dataarray_method(func, sparse_output): + arr_s = make_xrarray({'x': 10, 'y': 5}, + coords={'x': np.arange(10), 'y': np.arange(5)}) + arr_d = xr.DataArray( + arr_s.data.todense(), + coords=arr_s.coords, + dims=arr_s.dims) + ret_s = func(arr_s) + ret_d = func(arr_d) + + if sparse_output: + assert isinstance(ret_s.data, SparseArray) + assert np.allclose(ret_s.data.todense(), ret_d.data, equal_nan=True) + else: + assert np.allclose(ret_s, ret_d, equal_nan=True) + + +@pytest.mark.parametrize("func,sparse_output", [ + (do('squeeze'), True), + param(do('searchsorted', [1, 2, 3]), False, + marks=xfail(reason="'COO' object has no attribute 'searchsorted'")), +]) +def test_datarray_1d_method(func, sparse_output): + arr_s = make_xrarray({'x': 10}, coords={'x': np.arange(10)}) + arr_d = xr.DataArray( + arr_s.data.todense(), + coords=arr_s.coords, + dims=arr_s.dims) + ret_s = func(arr_s) + ret_d = func(arr_d) + + if sparse_output: + assert isinstance(ret_s.data, SparseArray) + assert np.allclose(ret_s.data.todense(), ret_d.data, equal_nan=True) + else: + assert np.allclose(ret_s, ret_d, equal_nan=True) + + +class TestSparseDataArrayAndDataset: + @pytest.fixture(autouse=True) + def setUp(self): + self.sp_ar = sparse.random((4, 6), random_state=0, density=0.5) + self.sp_xr = xr.DataArray(self.sp_ar, coords={'x': range(4)}, + dims=('x', 'y'), name='foo') + self.ds_ar = self.sp_ar.todense() + self.ds_xr = xr.DataArray(self.ds_ar, coords={'x': range(4)}, + dims=('x', 'y'), name='foo') + + @pytest.mark.xfail(reason='Missing implementation for np.result_type') + def test_to_dataset_roundtrip(self): + x = self.sp_xr + assert_equal(x, x.to_dataset('x').to_array('x')) + + def test_align(self): + a1 = xr.DataArray( + COO.from_numpy(np.arange(4)), + dims=['x'], + coords={'x': ['a', 'b', 'c', 'd']}) + b1 = xr.DataArray( + COO.from_numpy(np.arange(4)), + dims=['x'], + coords={'x': ['a', 'b', 'd', 'e']}) + a2, b2 = xr.align(a1, b1, join='inner') + assert isinstance(a2.data, sparse.SparseArray) + assert isinstance(b2.data, sparse.SparseArray) + assert np.all(a2.coords['x'].data == ['a', 'b', 'd']) + assert np.all(b2.coords['x'].data == ['a', 'b', 'd']) + + @pytest.mark.xfail( + reason="COO objects currently do not accept more than one " + "iterable index at a time") + def test_align_2d(self): + A1 = xr.DataArray(self.sp_ar, dims=['x', 'y'], coords={ + 'x': np.arange(self.sp_ar.shape[0]), + 'y': np.arange(self.sp_ar.shape[1]) + }) + + A2 = xr.DataArray(self.sp_ar, dims=['x', 'y'], coords={ + 'x': np.arange(1, self.sp_ar.shape[0] + 1), + 'y': np.arange(1, self.sp_ar.shape[1] + 1) + }) + + B1, B2 = xr.align(A1, A2, join='inner') + assert np.all(B1.coords['x'] == np.arange(1, self.sp_ar.shape[0])) + assert np.all(B1.coords['y'] == np.arange(1, self.sp_ar.shape[0])) + assert np.all(B1.coords['x'] == B2.coords['x']) + assert np.all(B1.coords['y'] == B2.coords['y']) + + @pytest.mark.xfail(reason="fill value leads to sparse-dense operation") + def test_align_outer(self): + a1 = xr.DataArray( + COO.from_numpy(np.arange(4)), + dims=['x'], + coords={'x': ['a', 'b', 'c', 'd']}) + b1 = xr.DataArray( + COO.from_numpy(np.arange(4)), + dims=['x'], + coords={'x': ['a', 'b', 'd', 'e']}) + a2, b2 = xr.align(a1, b1, join='outer') + assert isinstance(a2.data, sparse.SparseArray) + assert isinstance(b2.data, sparse.SparseArray) + assert np.all(a2.coords['x'].data == ['a', 'b', 'c', 'd']) + assert np.all(b2.coords['x'].data == ['a', 'b', 'c', 'd']) + + @pytest.mark.xfail(reason='Missing implementation for np.result_type') + def test_concat(self): + ds1 = xr.Dataset(data_vars={'d': self.sp_xr}) + ds2 = xr.Dataset(data_vars={'d': self.sp_xr}) + ds3 = xr.Dataset(data_vars={'d': self.sp_xr}) + out = xr.concat([ds1, ds2, ds3], dim='x') + assert_sparse_eq( + out['d'].data, + sparse.concatenate([self.sp_ar, self.sp_ar, self.sp_ar], axis=0) + ) + + out = xr.concat([self.sp_xr, self.sp_xr, self.sp_xr], dim='y') + assert_sparse_eq( + out.data, + sparse.concatenate([self.sp_ar, self.sp_ar, self.sp_ar], axis=1) + ) + + def test_stack(self): + arr = make_xrarray({'w': 2, 'x': 3, 'y': 4}) + stacked = arr.stack(z=('x', 'y')) + + z = pd.MultiIndex.from_product( + [np.arange(3), np.arange(4)], + names=['x', 'y']) + + expected = xr.DataArray( + arr.data.reshape((2, -1)), + {'w': [0, 1], 'z': z}, + dims=['w', 'z']) + + assert_equal(expected, stacked) + + roundtripped = stacked.unstack() + assert arr.identical(roundtripped) + + def test_ufuncs(self): + x = self.sp_xr + assert_equal(np.sin(x), xu.sin(x)) + + def test_dataarray_repr(self): + a = xr.DataArray( + COO.from_numpy(np.ones((4))), + dims=['x'], + coords={'y': ('x', COO.from_numpy(np.arange(4)))}) + expected = dedent("""\ + + + Coordinates: + y (x) int64 ... + Dimensions without coordinates: x""") + assert expected == repr(a) + + def test_dataset_repr(self): + ds = xr.Dataset( + data_vars={'a': ('x', COO.from_numpy(np.ones((4))))}, + coords={'y': ('x', COO.from_numpy(np.arange(4)))}) + expected = dedent("""\ + + Dimensions: (x: 4) + Coordinates: + y (x) int64 ... + Dimensions without coordinates: x + Data variables: + a (x) float64 ...""") + assert expected == repr(ds) + + def test_dataarray_pickle(self): + a1 = xr.DataArray( + COO.from_numpy(np.ones((4))), + dims=['x'], + coords={'y': ('x', COO.from_numpy(np.arange(4)))}) + a2 = pickle.loads(pickle.dumps(a1)) + assert_identical(a1, a2) + + def test_dataset_pickle(self): + ds1 = xr.Dataset( + data_vars={'a': ('x', COO.from_numpy(np.ones((4))))}, + coords={'y': ('x', COO.from_numpy(np.arange(4)))}) + ds2 = pickle.loads(pickle.dumps(ds1)) + assert_identical(ds1, ds2) + + def test_coarsen(self): + a1 = self.ds_xr + a2 = self.sp_xr + m1 = a1.coarsen(x=2, boundary='trim').mean() + m2 = a2.coarsen(x=2, boundary='trim').mean() + + assert isinstance(m2.data, sparse.SparseArray) + assert np.allclose(m1.data, m2.data.todense()) + + @pytest.mark.xfail(reason="No implementation of np.pad") + def test_rolling(self): + a1 = self.ds_xr + a2 = self.sp_xr + m1 = a1.rolling(x=2, center=True).mean() + m2 = a2.rolling(x=2, center=True).mean() + + assert isinstance(m2.data, sparse.SparseArray) + assert np.allclose(m1.data, m2.data.todense()) + + @pytest.mark.xfail(reason="Coercion to dense") + def test_rolling_exp(self): + a1 = self.ds_xr + a2 = self.sp_xr + m1 = a1.rolling_exp(x=2, center=True).mean() + m2 = a2.rolling_exp(x=2, center=True).mean() + + assert isinstance(m2.data, sparse.SparseArray) + assert np.allclose(m1.data, m2.data.todense()) + + @pytest.mark.xfail(reason="No implementation of np.einsum") + def test_dot(self): + a1 = self.xp_xr.dot(self.xp_xr[0]) + a2 = self.sp_ar.dot(self.sp_ar[0]) + assert_equal(a1, a2) + + @pytest.mark.xfail(reason="Groupby reductions produce dense output") + def test_groupby(self): + x1 = self.ds_xr + x2 = self.sp_xr + m1 = x1.groupby('x').mean(xr.ALL_DIMS) + m2 = x2.groupby('x').mean(xr.ALL_DIMS) + assert isinstance(m2.data, sparse.SparseArray) + assert np.allclose(m1.data, m2.data.todense()) + + @pytest.mark.xfail(reason="Groupby reductions produce dense output") + def test_groupby_first(self): + x = self.sp_xr.copy() + x.coords['ab'] = ('x', ['a', 'a', 'b', 'b']) + x.groupby('ab').first() + x.groupby('ab').first(skipna=False) + + @pytest.mark.xfail(reason="Groupby reductions produce dense output") + def test_groupby_bins(self): + x1 = self.ds_xr + x2 = self.sp_xr + m1 = x1.groupby_bins('x', bins=[0, 3, 7, 10]).sum() + m2 = x2.groupby_bins('x', bins=[0, 3, 7, 10]).sum() + assert isinstance(m2.data, sparse.SparseArray) + assert np.allclose(m1.data, m2.data.todense()) + + @pytest.mark.xfail(reason="Resample produces dense output") + def test_resample(self): + t1 = xr.DataArray(np.linspace(0, 11, num=12), + coords=[pd.date_range('15/12/1999', + periods=12, freq=pd.DateOffset(months=1))], + dims='time') + t2 = t1.copy() + t2.data = COO(t2.data) + m1 = t1.resample(time="QS-DEC").mean() + m2 = t2.resample(time="QS-DEC").mean() + assert isinstance(m2.data, sparse.SparseArray) + assert np.allclose(m1.data, m2.data.todense()) + + @pytest.mark.xfail + def test_reindex(self): + x1 = self.ds_xr + x2 = self.sp_xr + for kwargs in [{'x': [2, 3, 4]}, + {'x': [1, 100, 2, 101, 3]}, + {'x': [2.5, 3, 3.5], 'y': [2, 2.5, 3]}]: + m1 = x1.reindex(**kwargs) + m2 = x2.reindex(**kwargs) + assert np.allclose(m1, m2, equal_nan=True) + + @pytest.mark.xfail + def test_merge(self): + x = self.sp_xr + y = xr.merge([x, x.rename('bar')]).to_array() + assert isinstance(y, sparse.SparseArray) + + @pytest.mark.xfail + def test_where(self): + a = np.arange(10) + cond = a > 3 + xr.DataArray(a).where(cond) + + s = COO.from_numpy(a) + cond = s > 3 + xr.DataArray(s).where(cond) + + x = xr.DataArray(s) + cond = x > 3 + x.where(cond) + + +class TestSparseCoords: + @pytest.mark.xfail(reason="Coercion of coords to dense") + def test_sparse_coords(self): + xr.DataArray( + COO.from_numpy(np.arange(4)), + dims=['x'], + coords={'x': COO.from_numpy([1, 2, 3, 4])})