zarr-developers · d-v-b · Apr 30, 2025 · Apr 24, 2025 · Apr 24, 2025 · Apr 24, 2025
diff --git a/changes/2998.bugfix.md b/changes/2998.bugfix.md
@@ -0,0 +1 @@
+Fix structured `dtype` fill value serialization for consolidated metadata
diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import asyncio
+import base64
 import itertools
 import json
 import logging
@@ -358,7 +359,13 @@
                     d[f"{k}/{ZATTRS_JSON}"] = _replace_special_floats(attrs)
                     if "shape" in v:
                         # it's an array
-                        d[f"{k}/{ZARRAY_JSON}"] = _replace_special_floats(v)
+                        if isinstance(v.get("fill_value", None), np.void):
+                            v["fill_value"] = base64.standard_b64encode(
+                                cast(bytes, v["fill_value"])
+                            ).decode("ascii")
+                        else:
+                            v = _replace_special_floats(v)
+                        d[f"{k}/{ZARRAY_JSON}"] = v
                     else:
                         d[f"{k}/{ZGROUP_JSON}"] = {
                             "zarr_format": self.zarr_format,

diff --git a/tests/test_metadata/test_v2.py b/tests/test_metadata/test_v2.py
@@ -316,3 +316,28 @@ def test_zstd_checksum() -> None:
         arr.metadata.to_buffer_dict(default_buffer_prototype())[".zarray"].to_bytes()
     )
     assert "checksum" not in metadata["compressor"]
+
+
+@pytest.mark.parametrize(
+    "fill_value", [None, np.void((0, 0), np.dtype([("foo", "i4"), ("bar", "i4")]))]
+)
+def test_structured_dtype_fill_value_serialization(tmp_path, fill_value):
+    group_path = tmp_path / "test.zarr"
+    root_group = zarr.open_group(group_path, mode="w", zarr_format=2)
+    dtype = np.dtype([("foo", "i4"), ("bar", "i4")])
+    root_group.create_array(
+        name="structured_dtype",
+        shape=(100, 100),
+        chunks=(100, 100),
+        dtype=dtype,
+        fill_value=fill_value,
+    )
+
+    zarr.consolidate_metadata(root_group.store, zarr_format=2)
+    root_group = zarr.open_group(group_path, mode="r")
+    assert (
+        root_group.metadata.consolidated_metadata.to_dict()["metadata"]["structured_dtype"][
+            "fill_value"
+        ]
+        == fill_value
 try: 
     if isinstance(fill_value, list): 
         return np.array([tuple(fill_value)], dtype=dtype)[0] 
     elif isinstance(fill_value, tuple): 
         return np.array([fill_value], dtype=dtype)[0] 
     elif isinstance(fill_value, bytes): 
         return np.frombuffer(fill_value, dtype=dtype)[0] 
     elif isinstance(fill_value, str): 
         decoded = base64.standard_b64decode(fill_value) 
         return np.frombuffer(decoded, dtype=dtype)[0] 
     else: 
         return np.array(fill_value, dtype=dtype)[()] 
 try: 
     if isinstance(fill_value, list): 
         return np.array([tuple(fill_value)], dtype=dtype)[0] 
     elif isinstance(fill_value, tuple): 
         return np.array([fill_value], dtype=dtype)[0] 
     elif isinstance(fill_value, bytes): 
         return np.frombuffer(fill_value, dtype=dtype)[0] 
     elif isinstance(fill_value, str): 
         decoded = base64.standard_b64decode(fill_value) 
         return np.frombuffer(decoded, dtype=dtype)[0] 
     else: 
         return np.array(fill_value, dtype=dtype)[()] 
+    )
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Fix structured `dtype` fill value serialization for consolidated metadata