From 19675e4a7119a3d2015ef6aa3e47638cb97b3c7e Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Mon, 24 Mar 2025 15:34:31 -0400 Subject: [PATCH 1/2] add test --- tests/test_metadata/test_v3.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py index a47cbf43bb..afbab2a57c 100644 --- a/tests/test_metadata/test_v3.py +++ b/tests/test_metadata/test_v3.py @@ -411,3 +411,20 @@ def test_dtypes(dtype_str: str) -> None: else: # return type for vlen types may vary depending on numpy version assert dt.byte_count is None + + +def test_metadata_comparison_with_nan_fill_value(): + # regression test for https://github.com/zarr-developers/zarr-python/issues/2929 + metadata_dict = { + "zarr_format": 3, + "node_type": "array", + "shape": (1,), + "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, + "data_type": np.dtype("float32"), + "chunk_key_encoding": {"name": "default", "separator": "."}, + "codecs": ({'name': 'bytes', 'configuration': {'endian': 'little'}},), + "fill_value": np.float32("nan"), + } + metadata1 = ArrayV3Metadata.from_dict(metadata_dict) + metadata2 = ArrayV3Metadata.from_dict(metadata_dict) + assert metadata1 == metadata2 From affa01ca1eef7bbb355474fb3ee8f0c0c7d16785 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Mon, 24 Mar 2025 15:37:08 -0400 Subject: [PATCH 2/2] override __eq__ method --- src/zarr/abc/metadata.py | 24 +++++++++++++++++++++++- src/zarr/core/metadata/v3.py | 2 +- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/zarr/abc/metadata.py b/src/zarr/abc/metadata.py index a56f986645..55cc0bae56 100644 --- a/src/zarr/abc/metadata.py +++ b/src/zarr/abc/metadata.py @@ -1,7 +1,7 @@ from __future__ import annotations from collections.abc import Sequence -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from typing import Self @@ -10,6 +10,8 @@ from dataclasses import dataclass, fields +import numpy as np + __all__ = ["Metadata"] @@ -44,3 +46,23 @@ def from_dict(cls, data: dict[str, JSON]) -> Self: """ return cls(**data) + + def __eq__(self, other: Any) -> bool: + """Checks metadata are identical, including special treatment for NaN fill_values.""" + if not isinstance(other, type(self)): + return False + + metadata_dict1 = self.to_dict() + metadata_dict2 = other.to_dict() + + # fill_value is a special case because numpy NaNs cannot be compared using __eq__, see https://stackoverflow.com/a/10059796 + fill_value1 = metadata_dict1.pop("fill_value") + fill_value2 = metadata_dict2.pop("fill_value") + if np.isnan(fill_value1) and np.isnan(fill_value2): + fill_values_equal = fill_value1.dtype == fill_value2.dtype + else: + fill_values_equal = fill_value1 == fill_value2 + + # everything else in ArrayV3Metadata is a string, Enum, or Dataclass + return fill_values_equal and metadata_dict1 == metadata_dict2 + diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index 9154762648..22606b1ae8 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -233,7 +233,7 @@ class ArrayV3MetadataDict(TypedDict): attributes: dict[str, JSON] -@dataclass(frozen=True, kw_only=True) +@dataclass(frozen=True, kw_only=True, eq=False) class ArrayV3Metadata(Metadata): shape: ChunkCoords data_type: DataType