Skip to content

Commit 69f7386

Browse files
committed
Merge pull request #481 from jhamman/feature/isel_points
Add pointwise indexing via isel_points method
2 parents c1fe6c2 + 5ab9d4b commit 69f7386

File tree

7 files changed

+282
-2
lines changed

7 files changed

+282
-2
lines changed

doc/api.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ Indexing
9393
Dataset.loc
9494
Dataset.isel
9595
Dataset.sel
96+
Dataset.isel_points
9697
Dataset.squeeze
9798
Dataset.reindex
9899
Dataset.reindex_like
@@ -202,6 +203,7 @@ Indexing
202203
DataArray.loc
203204
DataArray.isel
204205
DataArray.sel
206+
DataArray.isel_points
205207
DataArray.squeeze
206208
DataArray.reindex
207209
DataArray.reindex_like

doc/indexing.rst

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ DataArray:
5757

5858
Positional indexing deviates from the NumPy when indexing with multiple
5959
arrays like ``arr[[0, 1], [0, 1]]``, as described in :ref:`indexing details`.
60+
See :ref:`pointwise indexing` and :py:meth:`~xray.Dataset.isel_points` for more on this functionality.
6061

6162
xray also supports label-based indexing, just like pandas. Because
6263
we use a :py:class:`pandas.Index` under the hood, label based indexing is very
@@ -122,7 +123,7 @@ __ http://legacy.python.org/dev/peps/pep-0472/
122123

123124
.. warning::
124125

125-
Do not try to assign values when using ``isel`` or ``sel``::
126+
Do not try to assign values when using ``isel``, ``isel_points`` or ``sel``::
126127

127128
# DO NOT do this
128129
arr.isel(space=0) = 0
@@ -134,6 +135,23 @@ __ http://legacy.python.org/dev/peps/pep-0472/
134135
# this is safe
135136
arr[dict(space=0)] = 0
136137

138+
.. _pointwise indexing:
139+
140+
Pointwise indexing
141+
------------------
142+
143+
xray pointwise indexing supports the indexing along multiple labeled dimensions
144+
using list-like objects. While :py:meth:`~xray.DataArray.isel` performs
145+
orthogonal indexing, the :py:meth:`~xray.DataArray.isel_points` method
146+
provides similar numpy indexing behavior as if you were using multiple lists to index an array (e.g. `arr[[0, 1], [0, 1]]` ):
147+
148+
.. ipython:: python
149+
150+
# index by integer array indices
151+
da = xray.DataArray(np.arange(56).reshape((7, 8)), dims=['x', 'y'])
152+
da
153+
da.isel_points(x=[0, 1, 6], y=[0, 1, 0])
154+
137155
Dataset indexing
138156
----------------
139157

@@ -145,13 +163,16 @@ simultaneously, returning a new dataset:
145163
ds = arr.to_dataset()
146164
ds.isel(space=[0], time=[0])
147165
ds.sel(time='2000-01-01')
166+
ds2 = da.to_dataset()
167+
ds2.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim='points')
148168
149169
Positional indexing on a dataset is not supported because the ordering of
150170
dimensions in a dataset is somewhat ambiguous (it can vary between different
151171
arrays). However, you can do normal indexing with labeled dimensions:
152172

153173
.. ipython:: python
154174
175+
155176
ds[dict(space=[0], time=[0])]
156177
ds.loc[dict(time='2000-01-01')]
157178

doc/whats-new.rst

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,36 @@ v0.5.3 (unreleased)
1717

1818
- Dataset variables are now written to netCDF files in order of appearance
1919
when using the netcdf4 backend (:issue:`479`).
20+
- Added :py:meth:`~xray.Dataset.isel_points` and :py:meth:`~xray.DataArray.isel_points` to support pointwise indexing of Datasets and DataArrays (:issue:`475`).
21+
22+
.. ipython::
23+
:verbatim:
24+
25+
In [1]: da = xray.DataArray(np.arange(56).reshape((7, 8)),
26+
dims=['x', 'y'])
27+
28+
In [2]: da
29+
Out[2]:
30+
<xray.DataArray (x: 7, y: 8)>
31+
array([[ 0, 1, 2, 3, 4, 5, 6, 7],
32+
[ 8, 9, 10, 11, 12, 13, 14, 15],
33+
[16, 17, 18, 19, 20, 21, 22, 23],
34+
[24, 25, 26, 27, 28, 29, 30, 31],
35+
[32, 33, 34, 35, 36, 37, 38, 39],
36+
[40, 41, 42, 43, 44, 45, 46, 47],
37+
[48, 49, 50, 51, 52, 53, 54, 55]])
38+
Coordinates:
39+
* x (x) int64 0 1 2 3 4 5 6
40+
* y (y) int64 0 1 2 3 4 5 6 7
41+
42+
In [3]: da.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim='points')
43+
Out[3]:
44+
<xray.DataArray (points: 3)>
45+
array([ 0, 9, 48])
46+
Coordinates:
47+
x (points) int64 0 1 6
48+
y (points) int64 0 1 0
49+
* points (points) int64 0 1 2
2050

2151

2252
Bug fixes

xray/core/dataarray.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,17 @@ def sel(self, method=None, **indexers):
551551
return self.isel(**indexing.remap_label_indexers(self, indexers,
552552
method=method))
553553

554+
def isel_points(self, dim='points', **indexers):
555+
"""Return a new DataArray whose dataset is given by pointwise integer
556+
indexing along the specified dimension(s).
557+
558+
See Also
559+
--------
560+
Dataset.isel_points
561+
"""
562+
ds = self._dataset.isel_points(dim=dim, **indexers)
563+
return self._with_replaced_dataset(ds)
564+
554565
def reindex_like(self, other, method=None, copy=True):
555566
"""Conform this object onto the indexes of another object, filling
556567
in missing values with NaN.

xray/core/dataset.py

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import functools
22
import warnings
3-
from collections import Mapping
3+
from collections import Mapping, Sequence
44
from numbers import Number
55

66
import numpy as np
@@ -21,6 +21,7 @@
2121
from .variable import as_variable, Variable, Coordinate, broadcast_variables
2222
from .pycompat import (iteritems, itervalues, basestring, OrderedDict,
2323
dask_array_type)
24+
from .combine import concat
2425

2526

2627
# list of attributes of pd.DatetimeIndex that are ndarrays of time info
@@ -1028,6 +1029,93 @@ def sel(self, method=None, **indexers):
10281029
return self.isel(**indexing.remap_label_indexers(self, indexers,
10291030
method=method))
10301031

1032+
def isel_points(self, dim='points', **indexers):
1033+
"""Returns a new dataset with each array indexed pointwise along the
1034+
specified dimension(s).
1035+
1036+
This method selects pointwise values from each array and is akin to
1037+
the NumPy indexing behavior of `arr[[0, 1], [0, 1]]`, except this
1038+
method does not require knowing the order of each array's dimensions.
1039+
1040+
Parameters
1041+
----------
1042+
dim : str or DataArray or pandas.Index or other list-like object, optional
1043+
Name of the dimension to concatenate along. If dim is provided as a
1044+
string, it must be a new dimension name, in which case it is added
1045+
along axis=0. If dim is provided as a DataArray or Index or
1046+
list-like object, its name, which must not be present in the
1047+
dataset, is used as the dimension to concatenate along and the
1048+
values are added as a coordinate.
1049+
**indexers : {dim: indexer, ...}
1050+
Keyword arguments with names matching dimensions and values given
1051+
by array-like objects. All indexers must be the same length and
1052+
1 dimensional.
1053+
1054+
Returns
1055+
-------
1056+
obj : Dataset
1057+
A new Dataset with the same contents as this dataset, except each
1058+
array and dimension is indexed by the appropriate indexers. With
1059+
pointwise indexing, the new Dataset will always be a copy of the
1060+
original.
1061+
1062+
See Also
1063+
--------
1064+
Dataset.sel
1065+
DataArray.isel
1066+
DataArray.sel
1067+
DataArray.isel_points
1068+
"""
1069+
indexer_dims = set(indexers)
1070+
1071+
def relevant_keys(mapping):
1072+
return [k for k, v in mapping.items()
1073+
if any(d in indexer_dims for d in v.dims)]
1074+
1075+
data_vars = relevant_keys(self.data_vars)
1076+
coords = relevant_keys(self.coords)
1077+
1078+
# all the indexers should be iterables
1079+
keys = indexers.keys()
1080+
indexers = [(k, np.asarray(v)) for k, v in iteritems(indexers)]
1081+
# Check that indexers are valid dims, integers, and 1D
1082+
for k, v in indexers:
1083+
if k not in self.dims:
1084+
raise ValueError("dimension %s does not exist" % k)
1085+
if v.dtype.kind != 'i':
1086+
raise TypeError('Indexers must be integers')
1087+
if v.ndim != 1:
1088+
raise ValueError('Indexers must be 1 dimensional')
1089+
1090+
# all the indexers should have the same length
1091+
lengths = set(len(v) for k, v in indexers)
1092+
if len(lengths) > 1:
1093+
raise ValueError('All indexers must be the same length')
1094+
1095+
# Existing dimensions are not valid choices for the dim argument
1096+
if isinstance(dim, basestring):
1097+
if dim in self.dims:
1098+
# dim is an invalid string
1099+
raise ValueError('Existing dimension names are not valid '
1100+
'choices for the dim argument in sel_points')
1101+
elif hasattr(dim, 'dims'):
1102+
# dim is a DataArray or Coordinate
1103+
if dim.name in self.dims:
1104+
# dim already exists
1105+
raise ValueError('Existing dimensions are not valid choices '
1106+
'for the dim argument in sel_points')
1107+
else:
1108+
# try to cast dim to DataArray with name = points
1109+
from .dataarray import DataArray
1110+
dim = DataArray(dim, dims='points', name='points')
1111+
1112+
# TODO: This would be sped up with vectorized indexing. This will
1113+
# require dask to support pointwise indexing as well.
1114+
return concat([self.isel(**d) for d in
1115+
[dict(zip(keys, inds)) for inds in
1116+
zip(*[v for k, v in indexers])]],
1117+
dim=dim, coords=coords, data_vars=data_vars)
1118+
10311119
def reindex_like(self, other, method=None, copy=True):
10321120
"""Conform this object onto the indexes of another object, filling
10331121
in missing values with NaN.

xray/test/test_dataarray.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,66 @@ def test_sel_method(self):
382382
actual = data.sel(x=[0.9, 1.9], method='backfill')
383383
self.assertDataArrayIdentical(expected, actual)
384384

385+
def test_isel_points_method(self):
386+
shape = (10, 5, 6)
387+
np_array = np.random.random(shape)
388+
da = DataArray(np_array, dims=['time', 'y', 'x'])
389+
y = [1, 3]
390+
x = [3, 0]
391+
392+
expected = da.values[:, y, x]
393+
394+
actual = da.isel_points(y=y, x=x, dim='test_coord')
395+
assert 'test_coord' in actual.coords
396+
assert actual.coords['test_coord'].shape == (len(y), )
397+
assert all(x in actual for x in ['time', 'x', 'y', 'test_coord'])
398+
assert actual.dims == ('test_coord', 'time')
399+
actual = da.isel_points(y=y, x=x)
400+
assert 'points' in actual.coords
401+
# Note that because xray always concatenates along the first dimension,
402+
# We must transpose the result to match the numpy style of
403+
# concatentation.
404+
np.testing.assert_equal(actual.T, expected)
405+
406+
# a few corner cases
407+
da.isel_points(time=[1, 2], x=[2, 2], y=[3, 4])
408+
np.testing.assert_allclose(
409+
da.isel_points(time=[1], x=[2], y=[4]).values.squeeze(),
410+
np_array[1, 4, 2].squeeze())
411+
da.isel_points(time=[1, 2])
412+
y = [-1, 0]
413+
x = [-2, 2]
414+
expected = da.values[:, y, x]
415+
actual = da.isel_points(x=x, y=y).values
416+
np.testing.assert_equal(actual.T, expected)
417+
418+
# test that the order of the indexers doesn't matter
419+
self.assertDataArrayIdentical(
420+
da.isel_points(y=y, x=x),
421+
da.isel_points(x=x, y=y))
422+
423+
# make sure we're raising errors in the right places
424+
with self.assertRaisesRegexp(ValueError,
425+
'All indexers must be the same length'):
426+
da.isel_points(y=[1, 2], x=[1, 2, 3])
427+
with self.assertRaisesRegexp(ValueError,
428+
'dimension bad_key does not exist'):
429+
da.isel_points(bad_key=[1, 2])
430+
with self.assertRaisesRegexp(TypeError, 'Indexers must be integers'):
431+
da.isel_points(y=[1.5, 2.2])
432+
with self.assertRaisesRegexp(TypeError, 'Indexers must be integers'):
433+
da.isel_points(x=[1, 2, 3], y=slice(3))
434+
with self.assertRaisesRegexp(ValueError,
435+
'Indexers must be 1 dimensional'):
436+
da.isel_points(y=1, x=2)
437+
with self.assertRaisesRegexp(ValueError,
438+
'Existing dimension names are not'):
439+
da.isel_points(y=[1, 2], x=[1, 2], dim='x')
440+
441+
# using non string dims
442+
actual = da.isel_points(y=[1, 2], x=[1, 2], dim=['A', 'B'])
443+
assert 'points' in actual.coords
444+
385445
def test_loc(self):
386446
self.ds['x'] = ('x', np.array(list('abcdefghij')))
387447
da = self.ds['foo']

xray/test/test_dataset.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -661,6 +661,74 @@ def test_sel(self):
661661
self.assertDatasetEqual(data.isel(td=slice(1, 3)),
662662
data.sel(td=slice('1 days', '2 days')))
663663

664+
def test_isel_points(self):
665+
data = create_test_data()
666+
667+
pdim1 = [1, 2, 3]
668+
pdim2 = [4, 5, 1]
669+
pdim3 = [1, 2, 3]
670+
671+
actual = data.isel_points(dim1=pdim1, dim2=pdim2, dim3=pdim3,
672+
dim='test_coord')
673+
assert 'test_coord' in actual.coords
674+
assert actual.coords['test_coord'].shape == (len(pdim1), )
675+
676+
actual = data.isel_points(dim1=pdim1, dim2=pdim2)
677+
assert 'points' in actual.coords
678+
np.testing.assert_array_equal(pdim1, actual['dim1'])
679+
680+
# test that the order of the indexers doesn't matter
681+
self.assertDatasetIdentical(data.isel_points(dim1=pdim1, dim2=pdim2),
682+
data.isel_points(dim2=pdim2, dim1=pdim1))
683+
684+
# make sure we're raising errors in the right places
685+
with self.assertRaisesRegexp(ValueError,
686+
'All indexers must be the same length'):
687+
data.isel_points(dim1=[1, 2], dim2=[1, 2, 3])
688+
with self.assertRaisesRegexp(ValueError,
689+
'dimension bad_key does not exist'):
690+
data.isel_points(bad_key=[1, 2])
691+
with self.assertRaisesRegexp(TypeError, 'Indexers must be integers'):
692+
data.isel_points(dim1=[1.5, 2.2])
693+
with self.assertRaisesRegexp(TypeError, 'Indexers must be integers'):
694+
data.isel_points(dim1=[1, 2, 3], dim2=slice(3))
695+
with self.assertRaisesRegexp(ValueError,
696+
'Indexers must be 1 dimensional'):
697+
data.isel_points(dim1=1, dim2=2)
698+
with self.assertRaisesRegexp(ValueError,
699+
'Existing dimension names are not valid'):
700+
data.isel_points(dim1=[1, 2], dim2=[1, 2], dim='dim2')
701+
702+
# test to be sure we keep around variables that were not indexed
703+
ds = Dataset({'x': [1, 2, 3, 4], 'y': 0})
704+
actual = ds.isel_points(x=[0, 1, 2])
705+
self.assertDataArrayIdentical(ds['y'], actual['y'])
706+
707+
# tests using index or DataArray as a dim
708+
stations = Dataset()
709+
stations['station'] = ('station', ['A', 'B', 'C'])
710+
stations['dim1s'] = ('station', [1, 2, 3])
711+
stations['dim2s'] = ('station', [4, 5, 1])
712+
713+
actual = data.isel_points(dim1=stations['dim1s'],
714+
dim2=stations['dim2s'],
715+
dim=stations['station'])
716+
assert 'station' in actual.coords
717+
assert 'station' in actual.dims
718+
self.assertDataArrayIdentical(actual['station'].drop(['dim1', 'dim2']),
719+
stations['station'])
720+
721+
# make sure we get the default points coordinate when a list is passed
722+
actual = data.isel_points(dim1=stations['dim1s'],
723+
dim2=stations['dim2s'],
724+
dim=['A', 'B', 'C'])
725+
assert 'points' in actual.coords
726+
727+
# can pass a numpy array
728+
data.isel_points(dim1=stations['dim1s'],
729+
dim2=stations['dim2s'],
730+
dim=np.array([4, 5, 6]))
731+
664732
def test_sel_method(self):
665733
data = create_test_data()
666734

0 commit comments

Comments
 (0)