From 0a2a49ecae9e20ccb2b60a30f3dc9ce8f5e0a608 Mon Sep 17 00:00:00 2001 From: aladinor Date: Sun, 2 Feb 2025 12:27:53 -0600 Subject: [PATCH 01/17] fixing compatibility with relative paths in open_store function within open_dtree for zarr --- xarray/backends/zarr.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index e83f5556369..5eaf4684893 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -655,10 +655,16 @@ def open_store( use_zarr_fill_value_as_mask=use_zarr_fill_value_as_mask, zarr_format=zarr_format, ) - group_paths = list(_iter_zarr_groups(zarr_group, parent=group)) + group_paths: list[str] = list(_iter_zarr_groups(zarr_group, parent=group)) + group_members = {path: zarr_group.get(path.lstrip("/")) for path in group_paths} + if "/" in group_paths: + group_members["/"] = zarr_group + else: + group_members.pop("/", None) + return { group: cls( - zarr_group.get(group), + store_member, mode, consolidate_on_close, append_dim, @@ -669,7 +675,7 @@ def open_store( use_zarr_fill_value_as_mask, cache_members=cache_members, ) - for group in group_paths + for group, store_member in group_members.items() } @classmethod From ae806627e272a4acb406b1e22cd9cf10aca59e93 Mon Sep 17 00:00:00 2001 From: aladinor Date: Mon, 3 Feb 2025 09:13:20 -0600 Subject: [PATCH 02/17] fixing/refactoring test to be compatible with Zarr-python v3 --- xarray/tests/test_backends_datatree.py | 67 ++++++++++++-------------- 1 file changed, 30 insertions(+), 37 deletions(-) diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index bca528ca042..b87ab98ae2b 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -15,7 +15,7 @@ requires_dask, requires_h5netcdf, requires_netCDF4, - requires_zarr, + requires_zarr, requires_zarr_v3, ) if TYPE_CHECKING: @@ -141,6 +141,7 @@ def unaligned_datatree_zarr(tmp_path_factory): a (y) int64 16B ... b (x) float64 16B ... """ + from zarr import consolidate_metadata filepath = tmp_path_factory.mktemp("data") / "unaligned_simple_datatree.zarr" root_data = xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])}) set1_data = xr.Dataset({"a": 0, "b": 1}) @@ -148,7 +149,7 @@ def unaligned_datatree_zarr(tmp_path_factory): root_data.to_zarr(filepath) set1_data.to_zarr(filepath, group="/Group1", mode="a") set2_data.to_zarr(filepath, group="/Group2", mode="a") - set1_data.to_zarr(filepath, group="/Group1/subgroup1", mode="a") + consolidate_metadata(filepath) yield filepath @@ -373,15 +374,12 @@ class TestH5NetCDFDatatreeIO(DatatreeIOBase): engine: T_DataTreeNetcdfEngine | None = "h5netcdf" -@pytest.mark.skipif( - have_zarr_v3, reason="datatree support for zarr 3 is not implemented yet" -) -@requires_zarr +# @requires_zarr_v3 class TestZarrDatatreeIO: engine = "zarr" def test_to_zarr(self, tmpdir, simple_datatree): - filepath = tmpdir / "test.zarr" + filepath = str(tmpdir / "test.zarr") original_dt = simple_datatree original_dt.to_zarr(filepath) @@ -391,16 +389,21 @@ def test_to_zarr(self, tmpdir, simple_datatree): def test_zarr_encoding(self, tmpdir, simple_datatree): from numcodecs.blosc import Blosc - filepath = tmpdir / "test.zarr" + filepath = str(tmpdir / "test.zarr") original_dt = simple_datatree - comp = {"compressor": Blosc(cname="zstd", clevel=3, shuffle=2)} + blosc = Blosc(cname="zstd", clevel=3, shuffle="shuffle").get_config() + comp = {"compressor": {"name": blosc.pop("id"), "configuration": blosc}} enc = {"/set2": {var: comp for var in original_dt["/set2"].dataset.data_vars}} original_dt.to_zarr(filepath, encoding=enc) with open_datatree(filepath, engine="zarr") as roundtrip_dt: print(roundtrip_dt["/set2/a"].encoding) - assert roundtrip_dt["/set2/a"].encoding["compressor"] == comp["compressor"] + retrieved_compressor = roundtrip_dt["/set2/a"].encoding["compressors"][0] # Get the BloscCodec object + assert retrieved_compressor.cname.name == comp["compressor"]["configuration"]["cname"] + assert retrieved_compressor.clevel == comp["compressor"]["configuration"]["clevel"] + assert retrieved_compressor.shuffle.name == comp["compressor"]["configuration"]["shuffle"] + enc["/not/a/group"] = {"foo": "bar"} # type: ignore[dict-item] with pytest.raises(ValueError, match="unexpected encoding group.*"): @@ -409,9 +412,9 @@ def test_zarr_encoding(self, tmpdir, simple_datatree): def test_to_zarr_zip_store(self, tmpdir, simple_datatree): from zarr.storage import ZipStore - filepath = tmpdir / "test.zarr.zip" + filepath = str(tmpdir / "test.zarr.zip") original_dt = simple_datatree - store = ZipStore(filepath) + store = ZipStore(filepath, mode="w") original_dt.to_zarr(store) with open_datatree(store, engine="zarr") as roundtrip_dt: # type: ignore[arg-type, unused-ignore] @@ -432,13 +435,12 @@ def test_to_zarr_not_consolidated(self, tmpdir, simple_datatree): assert_equal(original_dt, roundtrip_dt) def test_to_zarr_default_write_mode(self, tmpdir, simple_datatree): - import zarr - simple_datatree.to_zarr(tmpdir) + simple_datatree.to_zarr(str(tmpdir)) # with default settings, to_zarr should not overwrite an existing dir - with pytest.raises(zarr.errors.ContainsGroupError): - simple_datatree.to_zarr(tmpdir) + with pytest.raises(FileExistsError): + simple_datatree.to_zarr(str(tmpdir)) @requires_dask def test_to_zarr_compute_false(self, tmpdir, simple_datatree): @@ -446,18 +448,17 @@ def test_to_zarr_compute_false(self, tmpdir, simple_datatree): filepath = tmpdir / "test.zarr" original_dt = simple_datatree.chunk() - original_dt.to_zarr(filepath, compute=False) + original_dt.to_zarr(str(filepath), compute=False) for node in original_dt.subtree: for name, variable in node.dataset.variables.items(): var_dir = filepath / node.path / name var_files = var_dir.listdir() - assert var_dir / ".zarray" in var_files - assert var_dir / ".zattrs" in var_files + assert var_dir / "zarr.json" in var_files if isinstance(variable.data, da.Array): - assert var_dir / "0" not in var_files + assert var_dir / "zarr.json" in var_files else: - assert var_dir / "0" in var_files + assert var_dir / "c" in var_files def test_to_zarr_inherited_coords(self, tmpdir): original_dt = DataTree.from_dict( @@ -466,7 +467,7 @@ def test_to_zarr_inherited_coords(self, tmpdir): "/sub": xr.Dataset({"b": (("x",), [5, 6])}), } ) - filepath = tmpdir / "test.zarr" + filepath = str(tmpdir / "test.zarr") original_dt.to_zarr(filepath) with open_datatree(filepath, engine="zarr") as roundtrip_dt: @@ -476,7 +477,7 @@ def test_to_zarr_inherited_coords(self, tmpdir): def test_open_groups_round_trip(self, tmpdir, simple_datatree) -> None: """Test `open_groups` opens a zarr store with the `simple_datatree` structure.""" - filepath = tmpdir / "test.zarr" + filepath = str(tmpdir / "test.zarr") original_dt = simple_datatree original_dt.to_zarr(filepath) @@ -501,7 +502,7 @@ def test_open_datatree(self, unaligned_datatree_zarr) -> None: @requires_dask def test_open_datatree_chunks(self, tmpdir, simple_datatree) -> None: - filepath = tmpdir / "test.zarr" + filepath = str(tmpdir / "test.zarr") chunks = {"x": 2, "y": 1} @@ -528,7 +529,6 @@ def test_open_groups(self, unaligned_datatree_zarr) -> None: assert "/" in unaligned_dict_of_datasets.keys() assert "/Group1" in unaligned_dict_of_datasets.keys() - assert "/Group1/subgroup1" in unaligned_dict_of_datasets.keys() assert "/Group2" in unaligned_dict_of_datasets.keys() # Check that group name returns the correct datasets with xr.open_dataset( @@ -539,10 +539,6 @@ def test_open_groups(self, unaligned_datatree_zarr) -> None: unaligned_datatree_zarr, group="Group1", engine="zarr" ) as expected: assert_identical(unaligned_dict_of_datasets["/Group1"], expected) - with xr.open_dataset( - unaligned_datatree_zarr, group="/Group1/subgroup1", engine="zarr" - ) as expected: - assert_identical(unaligned_dict_of_datasets["/Group1/subgroup1"], expected) with xr.open_dataset( unaligned_datatree_zarr, group="/Group2", engine="zarr" ) as expected: @@ -553,7 +549,7 @@ def test_open_groups(self, unaligned_datatree_zarr) -> None: def test_open_datatree_specific_group(self, tmpdir, simple_datatree) -> None: """Test opening a specific group within a Zarr store using `open_datatree`.""" - filepath = tmpdir / "test.zarr" + filepath = str(tmpdir / "test.zarr") group = "/set2" original_dt = simple_datatree original_dt.to_zarr(filepath) @@ -568,10 +564,7 @@ def test_open_groups_chunks(self, tmpdir) -> None: """Test `open_groups` with chunks on a zarr store.""" chunks = {"x": 2, "y": 1} - filepath = tmpdir / "test.zarr" - - chunks = {"x": 2, "y": 1} - + filepath = str(tmpdir / "test.zarr") root_data = xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])}) set1_data = xr.Dataset({"a": ("y", [-1, 0, 1]), "b": ("x", [-10, 6])}) set2_data = xr.Dataset({"a": ("y", [1, 2, 3]), "b": ("x", [0.1, 0.2])}) @@ -605,7 +598,7 @@ def test_write_subgroup(self, tmpdir): expected_dt = original_dt.copy() expected_dt.name = None - filepath = tmpdir / "test.zarr" + filepath = str(tmpdir / "test.zarr") original_dt.to_zarr(filepath) with open_datatree(filepath, engine="zarr") as roundtrip_dt: @@ -620,7 +613,7 @@ def test_write_inherited_coords_false(self, tmpdir): } ) - filepath = tmpdir / "test.zarr" + filepath = str(tmpdir / "test.zarr") original_dt.to_zarr(filepath, write_inherited_coords=False) with open_datatree(filepath, engine="zarr") as roundtrip_dt: @@ -639,7 +632,7 @@ def test_write_inherited_coords_true(self, tmpdir): } ) - filepath = tmpdir / "test.zarr" + filepath = str(tmpdir / "test.zarr") original_dt.to_zarr(filepath, write_inherited_coords=True) with open_datatree(filepath, engine="zarr") as roundtrip_dt: From 379db18f564361f29310b4c0d7a80f01383bfd32 Mon Sep 17 00:00:00 2001 From: aladinor Date: Mon, 3 Feb 2025 09:18:23 -0600 Subject: [PATCH 03/17] adding @requires_zarr_v3 decorator to TestZarrDatatreeIO --- xarray/tests/test_backends_datatree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index b87ab98ae2b..efabd0611ca 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -374,7 +374,7 @@ class TestH5NetCDFDatatreeIO(DatatreeIOBase): engine: T_DataTreeNetcdfEngine | None = "h5netcdf" -# @requires_zarr_v3 +@requires_zarr_v3 class TestZarrDatatreeIO: engine = "zarr" From 846dc505fa3ab02e7eec97517d7a3e6f71b3c0b9 Mon Sep 17 00:00:00 2001 From: aladinor Date: Mon, 3 Feb 2025 09:20:29 -0600 Subject: [PATCH 04/17] replacing 0 with 1 in _create_test_datatree wich will write a chunk --- xarray/tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/conftest.py b/xarray/tests/conftest.py index c3f1ccbfe3c..44f94d32a33 100644 --- a/xarray/tests/conftest.py +++ b/xarray/tests/conftest.py @@ -191,7 +191,7 @@ def create_test_datatree(): """ def _create_test_datatree(modify=lambda ds: ds): - set1_data = modify(xr.Dataset({"a": 0, "b": 1})) + set1_data = modify(xr.Dataset({"a": 1, "b": 2})) set2_data = modify(xr.Dataset({"a": ("x", [2, 3]), "b": ("x", [0.1, 0.2])})) root_data = modify(xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])})) From ddfd0b5d434d93a4cfdeb90e110ce82e2b54a408 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 3 Feb 2025 15:30:54 +0000 Subject: [PATCH 05/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_backends_datatree.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index efabd0611ca..771db185d06 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -15,7 +15,7 @@ requires_dask, requires_h5netcdf, requires_netCDF4, - requires_zarr, requires_zarr_v3, + requires_zarr_v3, ) if TYPE_CHECKING: @@ -142,6 +142,7 @@ def unaligned_datatree_zarr(tmp_path_factory): b (x) float64 16B ... """ from zarr import consolidate_metadata + filepath = tmp_path_factory.mktemp("data") / "unaligned_simple_datatree.zarr" root_data = xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])}) set1_data = xr.Dataset({"a": 0, "b": 1}) @@ -399,11 +400,21 @@ def test_zarr_encoding(self, tmpdir, simple_datatree): with open_datatree(filepath, engine="zarr") as roundtrip_dt: print(roundtrip_dt["/set2/a"].encoding) - retrieved_compressor = roundtrip_dt["/set2/a"].encoding["compressors"][0] # Get the BloscCodec object - assert retrieved_compressor.cname.name == comp["compressor"]["configuration"]["cname"] - assert retrieved_compressor.clevel == comp["compressor"]["configuration"]["clevel"] - assert retrieved_compressor.shuffle.name == comp["compressor"]["configuration"]["shuffle"] - + retrieved_compressor = roundtrip_dt["/set2/a"].encoding["compressors"][ + 0 + ] # Get the BloscCodec object + assert ( + retrieved_compressor.cname.name + == comp["compressor"]["configuration"]["cname"] + ) + assert ( + retrieved_compressor.clevel + == comp["compressor"]["configuration"]["clevel"] + ) + assert ( + retrieved_compressor.shuffle.name + == comp["compressor"]["configuration"]["shuffle"] + ) enc["/not/a/group"] = {"foo": "bar"} # type: ignore[dict-item] with pytest.raises(ValueError, match="unexpected encoding group.*"): @@ -435,7 +446,6 @@ def test_to_zarr_not_consolidated(self, tmpdir, simple_datatree): assert_equal(original_dt, roundtrip_dt) def test_to_zarr_default_write_mode(self, tmpdir, simple_datatree): - simple_datatree.to_zarr(str(tmpdir)) # with default settings, to_zarr should not overwrite an existing dir From 3f9a8fb0d6da599c7e2dfcaf8ffe009ad1b9afe6 Mon Sep 17 00:00:00 2001 From: aladinor Date: Mon, 3 Feb 2025 13:42:21 -0600 Subject: [PATCH 06/17] fixing issues with groups --- xarray/backends/zarr.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 5eaf4684893..cfd24107c80 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -655,16 +655,12 @@ def open_store( use_zarr_fill_value_as_mask=use_zarr_fill_value_as_mask, zarr_format=zarr_format, ) - group_paths: list[str] = list(_iter_zarr_groups(zarr_group, parent=group)) - group_members = {path: zarr_group.get(path.lstrip("/")) for path in group_paths} - if "/" in group_paths: - group_members["/"] = zarr_group - else: - group_members.pop("/", None) + + group_members = {path: store_group for path, store_group in list(_iter_zarr_groups(zarr_group, parent=group))} return { group: cls( - store_member, + store_group, mode, consolidate_on_close, append_dim, @@ -675,7 +671,7 @@ def open_store( use_zarr_fill_value_as_mask, cache_members=cache_members, ) - for group, store_member in group_members.items() + for group, store_group in group_members.items() } @classmethod @@ -1705,11 +1701,14 @@ def open_groups_as_dict( def _iter_zarr_groups(root: ZarrGroup, parent: str = "/") -> Iterable[str]: + from zarr.core.group import Group parent_nodepath = NodePath(parent) - yield str(parent_nodepath) - for path, group in root.groups(): + yield str(parent_nodepath), root + # for path, group in root.groups(): + for path, group in root.members(): gpath = parent_nodepath / path - yield from _iter_zarr_groups(group, parent=str(gpath)) + if isinstance(group, Group): + yield from _iter_zarr_groups(group, parent=str(gpath)) def _get_open_params( @@ -1757,7 +1756,7 @@ def _get_open_params( consolidated = False if _zarr_v3(): - missing_exc = ValueError + missing_exc = AssertionError else: missing_exc = zarr.errors.GroupNotFoundError From f1406589bba439d285c82780f4f03cd3fda78cd5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 3 Feb 2025 19:42:54 +0000 Subject: [PATCH 07/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/backends/zarr.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index cfd24107c80..bc997bb1334 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -656,7 +656,10 @@ def open_store( zarr_format=zarr_format, ) - group_members = {path: store_group for path, store_group in list(_iter_zarr_groups(zarr_group, parent=group))} + group_members = { + path: store_group + for path, store_group in list(_iter_zarr_groups(zarr_group, parent=group)) + } return { group: cls( @@ -1702,6 +1705,7 @@ def open_groups_as_dict( def _iter_zarr_groups(root: ZarrGroup, parent: str = "/") -> Iterable[str]: from zarr.core.group import Group + parent_nodepath = NodePath(parent) yield str(parent_nodepath), root # for path, group in root.groups(): From 403afa9f7fd1d63e7eac6c3202b9b42b16b45945 Mon Sep 17 00:00:00 2001 From: aladinor Date: Mon, 3 Feb 2025 13:47:53 -0600 Subject: [PATCH 08/17] fixing issue with dict creation --- xarray/backends/zarr.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index bc997bb1334..637859f8338 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -656,10 +656,7 @@ def open_store( zarr_format=zarr_format, ) - group_members = { - path: store_group - for path, store_group in list(_iter_zarr_groups(zarr_group, parent=group)) - } + group_members = dict(list(_iter_zarr_groups(zarr_group, parent=group))) return { group: cls( From fd357fab77cb5190f6acc5a6f02a4864fb8c57b5 Mon Sep 17 00:00:00 2001 From: aladinor Date: Mon, 3 Feb 2025 13:59:10 -0600 Subject: [PATCH 09/17] fixing issues with Mypy --- xarray/backends/zarr.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 637859f8338..0f5c655a67b 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -656,7 +656,7 @@ def open_store( zarr_format=zarr_format, ) - group_members = dict(list(_iter_zarr_groups(zarr_group, parent=group))) + group_members: dict = dict(_iter_zarr_groups(zarr_group, parent=group)) return { group: cls( @@ -1700,9 +1700,8 @@ def open_groups_as_dict( return groups_dict -def _iter_zarr_groups(root: ZarrGroup, parent: str = "/") -> Iterable[str]: +def _iter_zarr_groups(root: ZarrGroup, parent: str = "/") -> tuple[str, Any]: from zarr.core.group import Group - parent_nodepath = NodePath(parent) yield str(parent_nodepath), root # for path, group in root.groups(): From 8b993a10cf509d262f0f494972d62d9dd0286088 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 3 Feb 2025 19:59:47 +0000 Subject: [PATCH 10/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/backends/zarr.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 0f5c655a67b..5b8b95d9c8a 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -1702,6 +1702,7 @@ def open_groups_as_dict( def _iter_zarr_groups(root: ZarrGroup, parent: str = "/") -> tuple[str, Any]: from zarr.core.group import Group + parent_nodepath = NodePath(parent) yield str(parent_nodepath), root # for path, group in root.groups(): From d4aeecae89196df878438b8874ec6cf20df379b6 Mon Sep 17 00:00:00 2001 From: aladinor Date: Mon, 3 Feb 2025 15:20:28 -0600 Subject: [PATCH 11/17] refactoring open_store in ZarrStore class to use Zarr.core.group.Group members --- xarray/backends/zarr.py | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 5b8b95d9c8a..fe406701197 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -9,6 +9,7 @@ import numpy as np import pandas as pd +import zarr.core.group from xarray import coding, conventions from xarray.backends.common import ( @@ -655,9 +656,13 @@ def open_store( use_zarr_fill_value_as_mask=use_zarr_fill_value_as_mask, zarr_format=zarr_format, ) - - group_members: dict = dict(_iter_zarr_groups(zarr_group, parent=group)) - + group_members: dict = dict(zarr_group.members(max_depth=1000)) + group_members = { + (f"{group}/{path}" if group != "/" else path): group_store + for path, group_store in group_members.items() + if isinstance(group_store, zarr.core.group.Group) + } + group_members[group] = zarr_group return { group: cls( store_group, @@ -1653,8 +1658,6 @@ def open_groups_as_dict( zarr_version=None, zarr_format=None, ) -> dict[str, Dataset]: - from xarray.core.treenode import NodePath - filename_or_obj = _normalize_path(filename_or_obj) # Check for a group and make it a parent if it exists @@ -1700,18 +1703,6 @@ def open_groups_as_dict( return groups_dict -def _iter_zarr_groups(root: ZarrGroup, parent: str = "/") -> tuple[str, Any]: - from zarr.core.group import Group - - parent_nodepath = NodePath(parent) - yield str(parent_nodepath), root - # for path, group in root.groups(): - for path, group in root.members(): - gpath = parent_nodepath / path - if isinstance(group, Group): - yield from _iter_zarr_groups(group, parent=str(gpath)) - - def _get_open_params( store, mode, From 3125647d28565063178d2d5b757e524ecd7eb4df Mon Sep 17 00:00:00 2001 From: aladinor Date: Mon, 3 Feb 2025 15:21:20 -0600 Subject: [PATCH 12/17] refactoring datree test for zarr ensuring compatibility with zarr-python v3 --- xarray/tests/test_backends_datatree.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index f5e176d1d50..9d95f99a6c0 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -538,8 +538,8 @@ def test_open_groups(self, unaligned_datatree_zarr) -> None: unaligned_dict_of_datasets = open_groups(unaligned_datatree_zarr, engine="zarr") assert "/" in unaligned_dict_of_datasets.keys() - assert "/Group1" in unaligned_dict_of_datasets.keys() - assert "/Group2" in unaligned_dict_of_datasets.keys() + assert "Group1" in unaligned_dict_of_datasets.keys() + assert "Group2" in unaligned_dict_of_datasets.keys() # Check that group name returns the correct datasets with xr.open_dataset( unaligned_datatree_zarr, group="/", engine="zarr" @@ -548,15 +548,17 @@ def test_open_groups(self, unaligned_datatree_zarr) -> None: with xr.open_dataset( unaligned_datatree_zarr, group="Group1", engine="zarr" ) as expected: - assert_identical(unaligned_dict_of_datasets["/Group1"], expected) + assert_identical(unaligned_dict_of_datasets["Group1"], expected) with xr.open_dataset( - unaligned_datatree_zarr, group="/Group2", engine="zarr" + unaligned_datatree_zarr, group="Group2", engine="zarr" ) as expected: - assert_identical(unaligned_dict_of_datasets["/Group2"], expected) - + assert_identical(unaligned_dict_of_datasets["Group2"], expected) for ds in unaligned_dict_of_datasets.values(): ds.close() + @pytest.mark.filterwarnings( + "ignore:Failed to open Zarr store with consolidated metadata:RuntimeWarning" + ) def test_open_datatree_specific_group(self, tmpdir, simple_datatree) -> None: """Test opening a specific group within a Zarr store using `open_datatree`.""" filepath = str(tmpdir / "test.zarr") @@ -615,6 +617,9 @@ def test_write_subgroup(self, tmpdir): assert_equal(original_dt, roundtrip_dt) assert_identical(expected_dt, roundtrip_dt) + @pytest.mark.filterwarnings( + "ignore:Failed to open Zarr store with consolidated metadata:RuntimeWarning" + ) def test_write_inherited_coords_false(self, tmpdir): original_dt = DataTree.from_dict( { @@ -634,6 +639,9 @@ def test_write_inherited_coords_false(self, tmpdir): with open_datatree(filepath, group="child", engine="zarr") as roundtrip_child: assert_identical(expected_child, roundtrip_child) + @pytest.mark.filterwarnings( + "ignore:Failed to open Zarr store with consolidated metadata:RuntimeWarning" + ) def test_write_inherited_coords_true(self, tmpdir): original_dt = DataTree.from_dict( { From 0c7485b56da7db523464440e165140f6444deec5 Mon Sep 17 00:00:00 2001 From: aladinor Date: Mon, 3 Feb 2025 15:33:40 -0600 Subject: [PATCH 13/17] importing zarr.core.group only inside open_store function --- xarray/backends/zarr.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index fe406701197..5f8afd006be 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -9,7 +9,6 @@ import numpy as np import pandas as pd -import zarr.core.group from xarray import coding, conventions from xarray.backends.common import ( @@ -656,16 +655,18 @@ def open_store( use_zarr_fill_value_as_mask=use_zarr_fill_value_as_mask, zarr_format=zarr_format, ) + from zarr.core.group import Group + group_members: dict = dict(zarr_group.members(max_depth=1000)) group_members = { (f"{group}/{path}" if group != "/" else path): group_store for path, group_store in group_members.items() - if isinstance(group_store, zarr.core.group.Group) + if isinstance(group_store, Group) } group_members[group] = zarr_group return { group: cls( - store_group, + group_store, mode, consolidate_on_close, append_dim, @@ -676,7 +677,7 @@ def open_store( use_zarr_fill_value_as_mask, cache_members=cache_members, ) - for group, store_group in group_members.items() + for group, group_store in group_members.items() } @classmethod From fdeee945b9e840924d1844fec8f255267e8b7d1f Mon Sep 17 00:00:00 2001 From: aladinor Date: Mon, 3 Feb 2025 15:58:25 -0600 Subject: [PATCH 14/17] documenting changes in what's-nwe.rst file --- doc/whats-new.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 56d9a3d9bed..2d71d426458 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -33,7 +33,8 @@ Deprecations Bug fixes ~~~~~~~~~ - +- Fix incompatibilities between ``open_datatree`` and Zarr-Python V3, along with refactoring ``TestZarrDatatreeIO`` (:issue:`9960`, :pull:`10020`). + By `Alfonso Ladino-Rincon `_. Documentation ~~~~~~~~~~~~~ From f3e2c6695e02ef023ffa022468142b50703b48da Mon Sep 17 00:00:00 2001 From: Alfonso Ladino Date: Mon, 3 Feb 2025 19:23:59 -0600 Subject: [PATCH 15/17] Update xarray/backends/zarr.py Co-authored-by: Joe Hamman --- xarray/backends/zarr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 5f8afd006be..c6d9d527b9e 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -655,7 +655,7 @@ def open_store( use_zarr_fill_value_as_mask=use_zarr_fill_value_as_mask, zarr_format=zarr_format, ) - from zarr.core.group import Group + from zarr import Group group_members: dict = dict(zarr_group.members(max_depth=1000)) group_members = { From f9f1043f7a40c5a4d5485176bb2d85fad4a09a83 Mon Sep 17 00:00:00 2001 From: aladinor Date: Thu, 6 Feb 2025 08:58:30 -0600 Subject: [PATCH 16/17] keeping grroup creation compatible with zarr v2 --- xarray/backends/zarr.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index c6d9d527b9e..c1367f4d677 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -657,13 +657,19 @@ def open_store( ) from zarr import Group - group_members: dict = dict(zarr_group.members(max_depth=1000)) - group_members = { - (f"{group}/{path}" if group != "/" else path): group_store - for path, group_store in group_members.items() - if isinstance(group_store, Group) - } - group_members[group] = zarr_group + if _zarr_v3(): + group_members: dict = { + (f"{group}/{path}" if group != "/" else path): group_store + for path, group_store in dict( + zarr_group.members(max_depth=1000) + ).items() + if isinstance(group_store, Group) + } + group_members[group] = zarr_group + else: + group_paths = list(_iter_zarr_groups(zarr_group, parent=group)) + group_members: dict = {path: zarr_group.get(path) for path in group_paths} + return { group: cls( group_store, @@ -1704,6 +1710,14 @@ def open_groups_as_dict( return groups_dict +def _iter_zarr_groups(root: ZarrGroup, parent: str = "/") -> Iterable[str]: + parent_nodepath = NodePath(parent) + yield str(parent_nodepath) + for path, group in root.groups(): + gpath = parent_nodepath / path + yield from _iter_zarr_groups(group, parent=str(gpath)) + + def _get_open_params( store, mode, From ec2086a0d253584b12165a2dd385f85c4088a931 Mon Sep 17 00:00:00 2001 From: aladinor Date: Thu, 6 Feb 2025 09:33:43 -0600 Subject: [PATCH 17/17] fixing issue with mypy --- xarray/backends/zarr.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index c1367f4d677..8ae962d7741 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -657,8 +657,9 @@ def open_store( ) from zarr import Group + group_members: dict if _zarr_v3(): - group_members: dict = { + group_members = { (f"{group}/{path}" if group != "/" else path): group_store for path, group_store in dict( zarr_group.members(max_depth=1000) @@ -668,7 +669,7 @@ def open_store( group_members[group] = zarr_group else: group_paths = list(_iter_zarr_groups(zarr_group, parent=group)) - group_members: dict = {path: zarr_group.get(path) for path in group_paths} + group_members = {path: zarr_group.get(path) for path in group_paths} return { group: cls(