Skip to content

Model saving and loading fails with Zarr v3 #242

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
nicrie opened this issue Jan 24, 2025 · 1 comment
Open

Model saving and loading fails with Zarr v3 #242

nicrie opened this issue Jan 24, 2025 · 1 comment
Labels
bug Something isn't working

Comments

@nicrie
Copy link
Contributor

nicrie commented Jan 24, 2025

Describe the bug
Model saving and loading no longer work after the release of Zarr version 3.

Reproducible Minimal Working Example

import xarray as xr
from xeofs.single import EOF
t2m = xr.tutorial.open_dataset("air_temperature")
pca = EOF(n_modes=50)
pca.fit(t2m, "time")

pca.save("pca", engine="zarr")  # <---- UserWarning

pca.load("pca, enginge="zarr")  # <---- AttributeError
Error Log
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
File /home/nrieger/Projects/teleconnections.py:1
----> 1 pca.load("pca2/", engine="zarr")

File ~/miniconda3/envs/minimal/lib/python3.11/site-packages/xeofs/base_model.py:189, in BaseModel.load(cls, path, engine, **kwargs)
    165 @classmethod
    166 def load(
    167     cls,
   (...)
    170     **kwargs,
    171 ) -> Self:
    172     """Load a saved model.
    173 
    174     Parameters
   (...)
    187 
    188     """
--> 189     dt = open_model_tree(path, engine=engine, **kwargs)
    190     model = cls.deserialize(dt)
    191     return model

File ~/miniconda3/envs/minimal/lib/python3.11/site-packages/xeofs/utils/io.py:26, in open_model_tree(path, engine, **kwargs)
     24 if engine == "zarr" and "chunks" not in kwargs:
     25     kwargs["chunks"] = {}
---> 26 dt = xr.open_datatree(path, engine=engine, **kwargs)
     27 if engine in ["netcdf4", "h5netcdf"]:
     28     dt = _desanitize_attrs_nc(dt)

File ~/miniconda3/envs/minimal/lib/python3.11/site-packages/xarray/backends/api.py:1113, in open_datatree(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
   1101 decoders = _resolve_decoders_kwargs(
   1102     decode_cf,
   1103     open_backend_dataset_parameters=(),
   (...)
   1109     decode_coords=decode_coords,
   1110 )
   1111 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
-> 1113 backend_tree = backend.open_datatree(
   1114     filename_or_obj,
   1115     drop_variables=drop_variables,
   1116     **decoders,
   1117     **kwargs,
   1118 )
   1120 tree = _datatree_from_backend_datatree(
   1121     backend_tree,
   1122     filename_or_obj,
   (...)
   1132     **kwargs,
   1133 )
   1135 return tree

File ~/miniconda3/envs/minimal/lib/python3.11/site-packages/xarray/backends/zarr.py:1614, in ZarrBackendEntrypoint.open_datatree(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, synchronizer, consolidated, chunk_store, storage_options, zarr_version, zarr_format)
   1593 def open_datatree(
   1594     self,
   1595     filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
   (...)
   1611     zarr_format=None,
   1612 ) -> DataTree:
   1613     filename_or_obj = _normalize_path(filename_or_obj)
-> 1614     groups_dict = self.open_groups_as_dict(
   1615         filename_or_obj=filename_or_obj,
   1616         mask_and_scale=mask_and_scale,
   1617         decode_times=decode_times,
   1618         concat_characters=concat_characters,
   1619         decode_coords=decode_coords,
   1620         drop_variables=drop_variables,
   1621         use_cftime=use_cftime,
   1622         decode_timedelta=decode_timedelta,
   1623         group=group,
   1624         mode=mode,
   1625         synchronizer=synchronizer,
   1626         consolidated=consolidated,
   1627         chunk_store=chunk_store,
   1628         storage_options=storage_options,
   1629         zarr_version=zarr_version,
   1630         zarr_format=zarr_format,
   1631     )
   1633     return datatree_from_dict_with_io_cleanup(groups_dict)

File ~/miniconda3/envs/minimal/lib/python3.11/site-packages/xarray/backends/zarr.py:1665, in ZarrBackendEntrypoint.open_groups_as_dict(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, synchronizer, consolidated, chunk_store, storage_options, zarr_version, zarr_format)
   1662 else:
   1663     parent = str(NodePath("/"))
-> 1665 stores = ZarrStore.open_store(
   1666     filename_or_obj,
   1667     group=parent,
   1668     mode=mode,
   1669     synchronizer=synchronizer,
   1670     consolidated=consolidated,
   1671     consolidate_on_close=False,
   1672     chunk_store=chunk_store,
   1673     storage_options=storage_options,
   1674     zarr_version=zarr_version,
   1675     zarr_format=zarr_format,
   1676 )
   1678 groups_dict = {}
   1680 for path_group, store in stores.items():

File ~/miniconda3/envs/minimal/lib/python3.11/site-packages/xarray/backends/zarr.py:662, in ZarrStore.open_store(cls, store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, append_dim, write_region, safe_chunks, zarr_version, zarr_format, use_zarr_fill_value_as_mask, write_empty, cache_members)
    643 (
    644     zarr_group,
    645     consolidate_on_close,
   (...)
    659     zarr_format=zarr_format,
    660 )
    661 group_paths = list(_iter_zarr_groups(zarr_group, parent=group))
--> 662 return {
    663     group: cls(
    664         zarr_group.get(group),
    665         mode,
    666         consolidate_on_close,
    667         append_dim,
    668         write_region,
    669         safe_chunks,
    670         write_empty,
    671         close_store_on_close,
    672         use_zarr_fill_value_as_mask,
    673         cache_members=cache_members,
    674     )
    675     for group in group_paths
    676 }

File ~/miniconda3/envs/minimal/lib/python3.11/site-packages/xarray/backends/zarr.py:663, in <dictcomp>(.0)
    643 (
    644     zarr_group,
    645     consolidate_on_close,
   (...)
    659     zarr_format=zarr_format,
    660 )
    661 group_paths = list(_iter_zarr_groups(zarr_group, parent=group))
    662 return {
--> 663     group: cls(
    664         zarr_group.get(group),
    665         mode,
    666         consolidate_on_close,
    667         append_dim,
    668         write_region,
    669         safe_chunks,
    670         write_empty,
    671         close_store_on_close,
    672         use_zarr_fill_value_as_mask,
    673         cache_members=cache_members,
    674     )
    675     for group in group_paths
    676 }

File ~/miniconda3/envs/minimal/lib/python3.11/site-packages/xarray/backends/zarr.py:744, in ZarrStore.__init__(self, zarr_group, mode, consolidate_on_close, append_dim, write_region, safe_chunks, write_empty, close_store_on_close, use_zarr_fill_value_as_mask, cache_members)
    730 def __init__(
    731     self,
    732     zarr_group,
   (...)
    741     cache_members: bool = True,
    742 ):
    743     self.zarr_group = zarr_group
--> 744     self._read_only = self.zarr_group.read_only
    745     self._synchronizer = self.zarr_group.synchronizer
    746     self._group = self.zarr_group.path

AttributeError: 'NoneType' object has no attribute 'read_only'

Expected behavior
Saving and loading a model should complete without warnings or errors.

Desktop (please complete the following information):

  • OS: Ubuntu 22.04
  • xeofs version 3.0.4

Additional context
None.

@nicrie nicrie added the bug Something isn't working label Jan 24, 2025
@slevang
Copy link
Contributor

slevang commented Jan 24, 2025

Looks like this is the same issue on read noted upstream.

I also ran into an issue with the stacker having self.dims_in = X.dims which is a FrozenMappingWarningOnValuesAccess and apparently doesn't automatically cast to dict anymore, and zarr can't serialize to json. That's an easy one line fix.

Then a million warnings about:

UserWarning: Object at preprocessor is not recognized as a component of a Zarr hierarchy.
# and
UserWarning: The codec `vlen-utf8` is currently not part in the Zarr format 3 specification. It may not be supported by other zarr implementations and may change in the future.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

2 participants