Skip to content

Commit 214d941

Browse files
TomNicholaspre-commit-ci[bot]dcherian
authored
Option to not auto-create index during expand_dims (#8960)
* test expand_dims doesn't create Index * add option to not create 1D index in expand_dims * refactor tests to consider data variables and coordinate variables separately * fix bug causing new test to fail * test index auto-creation when iterable passed as new coordinate values * make test for iterable pass * added kwarg to dataarray * whatsnew * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update tests to use private versions of assertions * create_1d_index->create_index * Update doc/whats-new.rst Co-authored-by: Deepak Cherian <[email protected]> * warn if create_index=True but no index created because dimension variable was a data var not a coord * add string marks in warning message --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian <[email protected]>
1 parent d7edbd7 commit 214d941

File tree

4 files changed

+87
-9
lines changed

4 files changed

+87
-9
lines changed

doc/whats-new.rst

+3
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ New Features
2929
for example, will retain the object. However, one cannot do operations that are not possible on the `ExtensionArray`
3030
then, such as broadcasting.
3131
By `Ilan Gold <https://github.com/ilan-gold>`_.
32+
- Added the option to avoid automatically creating 1D pandas indexes in :py:meth:`Dataset.expand_dims()`, by passing the new kwarg
33+
`create_index=False`. (:pull:`8960`)
34+
By `Tom Nicholas <https://github.com/TomNicholas>`_.
3235

3336
Breaking changes
3437
~~~~~~~~~~~~~~~~

xarray/core/dataarray.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -2557,6 +2557,7 @@ def expand_dims(
25572557
self,
25582558
dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None,
25592559
axis: None | int | Sequence[int] = None,
2560+
create_index: bool = True,
25602561
**dim_kwargs: Any,
25612562
) -> Self:
25622563
"""Return a new object with an additional axis (or axes) inserted at
@@ -2566,6 +2567,9 @@ def expand_dims(
25662567
If dim is already a scalar coordinate, it will be promoted to a 1D
25672568
coordinate consisting of a single value.
25682569
2570+
The automatic creation of indexes to back new 1D coordinate variables
2571+
controlled by the create_index kwarg.
2572+
25692573
Parameters
25702574
----------
25712575
dim : Hashable, sequence of Hashable, dict, or None, optional
@@ -2581,6 +2585,8 @@ def expand_dims(
25812585
multiple axes are inserted. In this case, dim arguments should be
25822586
same length list. If axis=None is passed, all the axes will be
25832587
inserted to the start of the result array.
2588+
create_index : bool, default is True
2589+
Whether to create new PandasIndex objects for any new 1D coordinate variables.
25842590
**dim_kwargs : int or sequence or ndarray
25852591
The keywords are arbitrary dimensions being inserted and the values
25862592
are either the lengths of the new dims (if int is given), or their
@@ -2644,7 +2650,7 @@ def expand_dims(
26442650
dim = {dim: 1}
26452651

26462652
dim = either_dict_or_kwargs(dim, dim_kwargs, "expand_dims")
2647-
ds = self._to_temp_dataset().expand_dims(dim, axis)
2653+
ds = self._to_temp_dataset().expand_dims(dim, axis, create_index=create_index)
26482654
return self._from_temp_dataset(ds)
26492655

26502656
def set_index(

xarray/core/dataset.py

+31-8
Original file line numberDiff line numberDiff line change
@@ -4497,6 +4497,7 @@ def expand_dims(
44974497
self,
44984498
dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None,
44994499
axis: None | int | Sequence[int] = None,
4500+
create_index: bool = True,
45004501
**dim_kwargs: Any,
45014502
) -> Self:
45024503
"""Return a new object with an additional axis (or axes) inserted at
@@ -4506,6 +4507,9 @@ def expand_dims(
45064507
If dim is already a scalar coordinate, it will be promoted to a 1D
45074508
coordinate consisting of a single value.
45084509
4510+
The automatic creation of indexes to back new 1D coordinate variables
4511+
controlled by the create_index kwarg.
4512+
45094513
Parameters
45104514
----------
45114515
dim : hashable, sequence of hashable, mapping, or None
@@ -4521,6 +4525,8 @@ def expand_dims(
45214525
multiple axes are inserted. In this case, dim arguments should be
45224526
same length list. If axis=None is passed, all the axes will be
45234527
inserted to the start of the result array.
4528+
create_index : bool, default is True
4529+
Whether to create new PandasIndex objects for any new 1D coordinate variables.
45244530
**dim_kwargs : int or sequence or ndarray
45254531
The keywords are arbitrary dimensions being inserted and the values
45264532
are either the lengths of the new dims (if int is given), or their
@@ -4640,9 +4646,14 @@ def expand_dims(
46404646
# save the coordinates to the variables dict, and set the
46414647
# value within the dim dict to the length of the iterable
46424648
# for later use.
4643-
index = PandasIndex(v, k)
4644-
indexes[k] = index
4645-
variables.update(index.create_variables())
4649+
4650+
if create_index:
4651+
index = PandasIndex(v, k)
4652+
indexes[k] = index
4653+
name_and_new_1d_var = index.create_variables()
4654+
else:
4655+
name_and_new_1d_var = {k: Variable(data=v, dims=k)}
4656+
variables.update(name_and_new_1d_var)
46464657
coord_names.add(k)
46474658
dim[k] = variables[k].size
46484659
elif isinstance(v, int):
@@ -4678,11 +4689,23 @@ def expand_dims(
46784689
variables[k] = v.set_dims(dict(all_dims))
46794690
else:
46804691
if k not in variables:
4681-
# If dims includes a label of a non-dimension coordinate,
4682-
# it will be promoted to a 1D coordinate with a single value.
4683-
index, index_vars = create_default_index_implicit(v.set_dims(k))
4684-
indexes[k] = index
4685-
variables.update(index_vars)
4692+
if k in coord_names and create_index:
4693+
# If dims includes a label of a non-dimension coordinate,
4694+
# it will be promoted to a 1D coordinate with a single value.
4695+
index, index_vars = create_default_index_implicit(v.set_dims(k))
4696+
indexes[k] = index
4697+
variables.update(index_vars)
4698+
else:
4699+
if create_index:
4700+
warnings.warn(
4701+
f"No index created for dimension {k} because variable {k} is not a coordinate. "
4702+
f"To create an index for {k}, please first call `.set_coords('{k}')` on this object.",
4703+
UserWarning,
4704+
)
4705+
4706+
# create 1D variable without creating a new index
4707+
new_1d_var = v.set_dims(k)
4708+
variables.update({k: new_1d_var})
46864709

46874710
return self._replace_with_new_dims(
46884711
variables, coord_names=coord_names, indexes=indexes

xarray/tests/test_dataset.py

+46
Original file line numberDiff line numberDiff line change
@@ -3430,6 +3430,52 @@ def test_expand_dims_kwargs_python36plus(self) -> None:
34303430
)
34313431
assert_identical(other_way_expected, other_way)
34323432

3433+
@pytest.mark.parametrize("create_index_flag", [True, False])
3434+
def test_expand_dims_create_index_data_variable(self, create_index_flag):
3435+
# data variables should not gain an index ever
3436+
ds = Dataset({"x": 0})
3437+
3438+
if create_index_flag:
3439+
with pytest.warns(UserWarning, match="No index created"):
3440+
expanded = ds.expand_dims("x", create_index=create_index_flag)
3441+
else:
3442+
expanded = ds.expand_dims("x", create_index=create_index_flag)
3443+
3444+
# TODO Can't just create the expected dataset directly using constructor because of GH issue 8959
3445+
expected = Dataset({"x": ("x", [0])}).drop_indexes("x").reset_coords("x")
3446+
3447+
assert_identical(expanded, expected, check_default_indexes=False)
3448+
assert expanded.indexes == {}
3449+
3450+
def test_expand_dims_create_index_coordinate_variable(self):
3451+
# coordinate variables should gain an index only if create_index is True (the default)
3452+
ds = Dataset(coords={"x": 0})
3453+
expanded = ds.expand_dims("x")
3454+
expected = Dataset({"x": ("x", [0])})
3455+
assert_identical(expanded, expected)
3456+
3457+
expanded_no_index = ds.expand_dims("x", create_index=False)
3458+
3459+
# TODO Can't just create the expected dataset directly using constructor because of GH issue 8959
3460+
expected = Dataset(coords={"x": ("x", [0])}).drop_indexes("x")
3461+
3462+
assert_identical(expanded_no_index, expected, check_default_indexes=False)
3463+
assert expanded_no_index.indexes == {}
3464+
3465+
def test_expand_dims_create_index_from_iterable(self):
3466+
ds = Dataset(coords={"x": 0})
3467+
expanded = ds.expand_dims(x=[0, 1])
3468+
expected = Dataset({"x": ("x", [0, 1])})
3469+
assert_identical(expanded, expected)
3470+
3471+
expanded_no_index = ds.expand_dims(x=[0, 1], create_index=False)
3472+
3473+
# TODO Can't just create the expected dataset directly using constructor because of GH issue 8959
3474+
expected = Dataset(coords={"x": ("x", [0, 1])}).drop_indexes("x")
3475+
3476+
assert_identical(expanded, expected, check_default_indexes=False)
3477+
assert expanded_no_index.indexes == {}
3478+
34333479
def test_expand_dims_non_nanosecond_conversion(self) -> None:
34343480
# Regression test for https://github.com/pydata/xarray/issues/7493#issuecomment-1953091000
34353481
with pytest.warns(UserWarning, match="non-nanosecond precision"):

0 commit comments

Comments
 (0)