From 0da74a45828b3576d7fc0bea26c1e2906dcc8821 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 24 Apr 2025 17:20:49 +0200 Subject: [PATCH 1/4] (fix): structured dtype consolidated metadata fill value --- src/zarr/core/group.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 3f8dad1740..3f4f15b9e9 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -1,6 +1,7 @@ from __future__ import annotations import asyncio +import base64 import itertools import json import logging @@ -358,7 +359,13 @@ def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]: d[f"{k}/{ZATTRS_JSON}"] = _replace_special_floats(attrs) if "shape" in v: # it's an array - d[f"{k}/{ZARRAY_JSON}"] = _replace_special_floats(v) + if isinstance(v.get("fill_value", None), np.void): + v["fill_value"] = base64.standard_b64encode( + cast(bytes, v["fill_value"]) + ).decode("ascii") + else: + v = _replace_special_floats(v) + d[f"{k}/{ZARRAY_JSON}"] = v else: d[f"{k}/{ZGROUP_JSON}"] = { "zarr_format": self.zarr_format, From 8136b005328710e37d3e6705c2f98960e596b792 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 24 Apr 2025 17:21:01 +0200 Subject: [PATCH 2/4] (chore): relnote --- changes/2998.bugfix.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/2998.bugfix.md diff --git a/changes/2998.bugfix.md b/changes/2998.bugfix.md new file mode 100644 index 0000000000..7b94223122 --- /dev/null +++ b/changes/2998.bugfix.md @@ -0,0 +1 @@ +Fix structured `dtype` fill value serialization for consolidated metadata \ No newline at end of file From 8e23bf9b516e91e75fa6fa5dbb3478cc386e8ed9 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 24 Apr 2025 17:21:12 +0200 Subject: [PATCH 3/4] (chore): test --- tests/test_metadata/test_v2.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test_metadata/test_v2.py b/tests/test_metadata/test_v2.py index 4600a977d4..07f06d218b 100644 --- a/tests/test_metadata/test_v2.py +++ b/tests/test_metadata/test_v2.py @@ -316,3 +316,16 @@ def test_zstd_checksum() -> None: arr.metadata.to_buffer_dict(default_buffer_prototype())[".zarray"].to_bytes() ) assert "checksum" not in metadata["compressor"] + + +def test_structured_dtype_fill_value_serialization(tmp_path): + group_path = tmp_path / "test.zarr" + root_group = zarr.open_group(group_path, mode="w", zarr_format=2) + root_group.create_array( + name="structured_headers", + shape=(100, 100), + chunks=(100, 100), + dtype=np.dtype([("foo", "i4"), ("bar", "i4")]), + ) + + zarr.consolidate_metadata(root_group.store, zarr_format=2) From feb367e5ba5bf2cfdd89aa1562054e4efa3f717a Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Fri, 25 Apr 2025 00:05:42 +0200 Subject: [PATCH 4/4] (fix): more robust testing --- tests/test_metadata/test_v2.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tests/test_metadata/test_v2.py b/tests/test_metadata/test_v2.py index 07f06d218b..08b9cb2507 100644 --- a/tests/test_metadata/test_v2.py +++ b/tests/test_metadata/test_v2.py @@ -318,14 +318,26 @@ def test_zstd_checksum() -> None: assert "checksum" not in metadata["compressor"] -def test_structured_dtype_fill_value_serialization(tmp_path): +@pytest.mark.parametrize( + "fill_value", [None, np.void((0, 0), np.dtype([("foo", "i4"), ("bar", "i4")]))] +) +def test_structured_dtype_fill_value_serialization(tmp_path, fill_value): group_path = tmp_path / "test.zarr" root_group = zarr.open_group(group_path, mode="w", zarr_format=2) + dtype = np.dtype([("foo", "i4"), ("bar", "i4")]) root_group.create_array( - name="structured_headers", + name="structured_dtype", shape=(100, 100), chunks=(100, 100), - dtype=np.dtype([("foo", "i4"), ("bar", "i4")]), + dtype=dtype, + fill_value=fill_value, ) zarr.consolidate_metadata(root_group.store, zarr_format=2) + root_group = zarr.open_group(group_path, mode="r") + assert ( + root_group.metadata.consolidated_metadata.to_dict()["metadata"]["structured_dtype"][ + "fill_value" + ] + == fill_value + )