From ba0416d01b5922ba22a75372eb2452120ec8071a Mon Sep 17 00:00:00 2001 From: norlandrhagen Date: Thu, 9 Oct 2025 11:45:02 -0600 Subject: [PATCH 1/5] remove ZARR_DEFAULT_FILL_VALUE hardcoded lookup --- virtualizarr/parsers/zarr.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/virtualizarr/parsers/zarr.py b/virtualizarr/parsers/zarr.py index e94c456d..e90bce4e 100644 --- a/virtualizarr/parsers/zarr.py +++ b/virtualizarr/parsers/zarr.py @@ -5,7 +5,6 @@ from pathlib import Path # noqa from typing import TYPE_CHECKING, Any, Hashable -import numpy as np from zarr.api.asynchronous import open_group as open_group_async from zarr.core.metadata import ArrayV3Metadata from zarr.storage import ObjectStore @@ -20,18 +19,6 @@ from virtualizarr.registry import ObjectStoreRegistry from virtualizarr.vendor.zarr.core.common import _concurrent_map -FillValueT = bool | str | float | int | list | None - -ZARR_DEFAULT_FILL_VALUE: dict[str, FillValueT] = { - # numpy dtypes's hierarchy lets us avoid checking for all the widths - # https://numpy.org/doc/stable/reference/arrays.scalars.html - np.dtype("bool").kind: False, - np.dtype("int").kind: 0, - np.dtype("float").kind: 0.0, - np.dtype("complex").kind: [0.0, 0.0], - np.dtype("datetime64").kind: 0, -} - if TYPE_CHECKING: import zarr @@ -77,12 +64,7 @@ async def build_chunk_manifest(zarr_array: zarr.AsyncArray, path: str) -> ChunkM def get_metadata(zarr_array: zarr.AsyncArray[Any]) -> ArrayV3Metadata: - fill_value = zarr_array.metadata.fill_value - if fill_value is not None: - fill_value = ZARR_DEFAULT_FILL_VALUE[zarr_array.metadata.fill_value.dtype.kind] - zarr_format = zarr_array.metadata.zarr_format - if zarr_format == 2: # TODO: Once we want to support V2, we will have to deconstruct the # zarr_array codecs etc. and reconstruct them with create_v3_array_metadata From 4a9d9c81c17a63a5c8dade3109235ac29a43d806 Mon Sep 17 00:00:00 2001 From: norlandrhagen Date: Tue, 14 Oct 2025 10:34:35 -0700 Subject: [PATCH 2/5] add regression test for fill_value in get_metadata + release notes --- conftest.py | 6 +++--- docs/releases.md | 13 +++++++++++++ virtualizarr/tests/test_parsers/test_zarr.py | 11 +++++++++-- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/conftest.py b/conftest.py index 870f339e..84e26152 100644 --- a/conftest.py +++ b/conftest.py @@ -72,12 +72,12 @@ def local_registry(): return ObjectStoreRegistry({"file://": LocalStore()}) -@pytest.fixture() -def zarr_store_scalar(tmpdir): +@pytest.fixture(params=["int8", "uint8", "float32"]) +def zarr_store_scalar(tmpdir, request): import zarr store = zarr.storage.MemoryStore() - zarr_store_scalar = zarr.create_array(store=store, shape=(), dtype="int8") + zarr_store_scalar = zarr.create_array(store=store, shape=(), dtype=request.param) zarr_store_scalar[()] = 42 return zarr_store_scalar diff --git a/docs/releases.md b/docs/releases.md index e4238b02..aa69a2db 100644 --- a/docs/releases.md +++ b/docs/releases.md @@ -1,5 +1,18 @@ # Release notes + + +## v2.1.3 (Upcoming) + + +### Bug fixes + +- `ZarrParser` no longer uses `ZARR_DEFAULT_FILL_VALUE` lookup to infer missing `fill_value`. + ([#666](https://github.com/zarr-developers/VirtualiZarr/pull/812)). + By [Raphael Hagen](https://github.com/norlandrhagen). + +### Internal changes + ## v2.1.2 (3rd September 2025) Patch release with minor bug fixes for the DMRPParser and Icechunk writing behavior. diff --git a/virtualizarr/tests/test_parsers/test_zarr.py b/virtualizarr/tests/test_parsers/test_zarr.py index b38f00ef..cbd7118d 100644 --- a/virtualizarr/tests/test_parsers/test_zarr.py +++ b/virtualizarr/tests/test_parsers/test_zarr.py @@ -1,11 +1,12 @@ import numpy as np import pytest +import zarr from obstore.store import LocalStore from virtualizarr import open_virtual_dataset from virtualizarr.manifests import ManifestArray from virtualizarr.parsers import ZarrParser -from virtualizarr.parsers.zarr import get_chunk_mapping_prefix +from virtualizarr.parsers.zarr import get_chunk_mapping_prefix, get_metadata from virtualizarr.registry import ObjectStoreRegistry @@ -105,7 +106,7 @@ def test_virtual_dataset_zarr_attrs(self, zarr_store): assert expected == actual -def test_scalar_get_chunk_mapping_prefix(zarr_store_scalar): +def test_scalar_get_chunk_mapping_prefix(zarr_store_scalar: zarr.Array): # Use a scalar zarr store with a /c/ representing the scalar: # https://zarr-specs.readthedocs.io/en/latest/v3/chunk-key-encodings/default/index.html#description @@ -118,3 +119,9 @@ def test_scalar_get_chunk_mapping_prefix(zarr_store_scalar): ) assert chunk_map["c"]["offset"] == 0 assert chunk_map["c"]["length"] == 10 + + +def test_get_metadata(zarr_store_scalar: zarr.Array): + # Check that the `get_metadata` function is assigning fill_values + zarr_array_metadata = get_metadata(zarr_array=zarr_store_scalar) + assert zarr_array_metadata.fill_value == zarr_store_scalar.metadata.fill_value From 59c820b41a10d19c8f31298bbefba9b87286f789 Mon Sep 17 00:00:00 2001 From: norlandrhagen Date: Tue, 14 Oct 2025 10:43:20 -0700 Subject: [PATCH 3/5] fix conftest --- conftest.py | 11 ++++++++--- virtualizarr/tests/test_parsers/test_zarr.py | 6 +++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/conftest.py b/conftest.py index 84e26152..5ff12b8c 100644 --- a/conftest.py +++ b/conftest.py @@ -10,6 +10,7 @@ import numpy as np import pytest import xarray as xr +import zarr from obstore.store import LocalStore from xarray.core.variable import Variable @@ -73,11 +74,15 @@ def local_registry(): @pytest.fixture(params=["int8", "uint8", "float32"]) -def zarr_store_scalar(tmpdir, request): - import zarr +def zarr_array_fill_value(request): + store = zarr.storage.MemoryStore() + return zarr.create_array(store=store, shape=(), dtype=request.param) + +@pytest.fixture() +def zarr_store_scalar(): store = zarr.storage.MemoryStore() - zarr_store_scalar = zarr.create_array(store=store, shape=(), dtype=request.param) + zarr_store_scalar = zarr.create_array(store=store, shape=(), dtype="int8") zarr_store_scalar[()] = 42 return zarr_store_scalar diff --git a/virtualizarr/tests/test_parsers/test_zarr.py b/virtualizarr/tests/test_parsers/test_zarr.py index cbd7118d..f7c44b17 100644 --- a/virtualizarr/tests/test_parsers/test_zarr.py +++ b/virtualizarr/tests/test_parsers/test_zarr.py @@ -121,7 +121,7 @@ def test_scalar_get_chunk_mapping_prefix(zarr_store_scalar: zarr.Array): assert chunk_map["c"]["length"] == 10 -def test_get_metadata(zarr_store_scalar: zarr.Array): +def test_get_metadata(zarr_array_fill_value: zarr.Array): # Check that the `get_metadata` function is assigning fill_values - zarr_array_metadata = get_metadata(zarr_array=zarr_store_scalar) - assert zarr_array_metadata.fill_value == zarr_store_scalar.metadata.fill_value + zarr_array_metadata = get_metadata(zarr_array=zarr_array_fill_value) + assert zarr_array_metadata.fill_value == zarr_array_fill_value.metadata.fill_value From 8ef951abcc076b7c07f3f8255f396357d3855707 Mon Sep 17 00:00:00 2001 From: norlandrhagen Date: Tue, 14 Oct 2025 11:31:43 -0700 Subject: [PATCH 4/5] mypy --- virtualizarr/parsers/zarr.py | 11 +++++++---- virtualizarr/tests/test_parsers/test_zarr.py | 6 ++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/virtualizarr/parsers/zarr.py b/virtualizarr/parsers/zarr.py index e90bce4e..9c484cda 100644 --- a/virtualizarr/parsers/zarr.py +++ b/virtualizarr/parsers/zarr.py @@ -5,6 +5,7 @@ from pathlib import Path # noqa from typing import TYPE_CHECKING, Any, Hashable +import zarr from zarr.api.asynchronous import open_group as open_group_async from zarr.core.metadata import ArrayV3Metadata from zarr.storage import ObjectStore @@ -22,8 +23,10 @@ if TYPE_CHECKING: import zarr +ZarrArrayType = zarr.AsyncArray | zarr.Array -async def get_chunk_mapping_prefix(zarr_array: zarr.AsyncArray, path: str) -> dict: + +async def get_chunk_mapping_prefix(zarr_array: ZarrArrayType, path: str) -> dict: """Create a dictionary to pass into ChunkManifest __init__""" # TODO: For when we want to support reading V2 we should parse the /c/ and "/" between chunks @@ -57,13 +60,13 @@ async def get_chunk_mapping_prefix(zarr_array: zarr.AsyncArray, path: str) -> di } -async def build_chunk_manifest(zarr_array: zarr.AsyncArray, path: str) -> ChunkManifest: +async def build_chunk_manifest(zarr_array: ZarrArrayType, path: str) -> ChunkManifest: """Build a ChunkManifest from a dictionary""" chunk_map = await get_chunk_mapping_prefix(zarr_array=zarr_array, path=path) return ChunkManifest(chunk_map) -def get_metadata(zarr_array: zarr.AsyncArray[Any]) -> ArrayV3Metadata: +def get_metadata(zarr_array: ZarrArrayType) -> ArrayV3Metadata: zarr_format = zarr_array.metadata.zarr_format if zarr_format == 2: # TODO: Once we want to support V2, we will have to deconstruct the @@ -71,7 +74,7 @@ def get_metadata(zarr_array: zarr.AsyncArray[Any]) -> ArrayV3Metadata: raise NotImplementedError("Reading Zarr V2 currently not supported.") elif zarr_format == 3: - return zarr_array.metadata + return zarr_array.metadata # type: ignore[return-value] else: raise NotImplementedError("Zarr format is not recognized as v2 or v3.") diff --git a/virtualizarr/tests/test_parsers/test_zarr.py b/virtualizarr/tests/test_parsers/test_zarr.py index f7c44b17..75050e3e 100644 --- a/virtualizarr/tests/test_parsers/test_zarr.py +++ b/virtualizarr/tests/test_parsers/test_zarr.py @@ -9,6 +9,8 @@ from virtualizarr.parsers.zarr import get_chunk_mapping_prefix, get_metadata from virtualizarr.registry import ObjectStoreRegistry +ZarrArrayType = zarr.AsyncArray | zarr.Array + @pytest.mark.parametrize( "zarr_store", @@ -106,7 +108,7 @@ def test_virtual_dataset_zarr_attrs(self, zarr_store): assert expected == actual -def test_scalar_get_chunk_mapping_prefix(zarr_store_scalar: zarr.Array): +def test_scalar_get_chunk_mapping_prefix(zarr_store_scalar: ZarrArrayType): # Use a scalar zarr store with a /c/ representing the scalar: # https://zarr-specs.readthedocs.io/en/latest/v3/chunk-key-encodings/default/index.html#description @@ -121,7 +123,7 @@ def test_scalar_get_chunk_mapping_prefix(zarr_store_scalar: zarr.Array): assert chunk_map["c"]["length"] == 10 -def test_get_metadata(zarr_array_fill_value: zarr.Array): +def test_get_metadata(zarr_array_fill_value: ZarrArrayType): # Check that the `get_metadata` function is assigning fill_values zarr_array_metadata = get_metadata(zarr_array=zarr_array_fill_value) assert zarr_array_metadata.fill_value == zarr_array_fill_value.metadata.fill_value From 842b6beb45fe2c374c24caa4c3ff8e6827df0cf1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 24 Oct 2025 17:11:43 +0000 Subject: [PATCH 5/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docs/releases.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/releases.md b/docs/releases.md index 3daf1838..092622da 100644 --- a/docs/releases.md +++ b/docs/releases.md @@ -11,7 +11,7 @@ - `ZarrParser` no longer uses `ZARR_DEFAULT_FILL_VALUE` lookup to infer missing `fill_value`. ([#666](https://github.com/zarr-developers/VirtualiZarr/pull/812)). By [Raphael Hagen](https://github.com/norlandrhagen). - + ### Documentation ### Internal changes