silx-kit · t20100 · Jul 8, 2025 · Jul 8, 2025 · Jul 8, 2025
diff --git a/src/silx/app/utils/parseutils.py b/src/silx/app/utils/parseutils.py
@@ -31,6 +31,7 @@
 from collections.abc import Sequence
 import glob
 import logging
+import urllib.parse
 from typing import Any
 from collections.abc import Generator, Iterable
 from pathlib import Path
@@ -76,7 +77,7 @@ def to_bool(thing: Any, default: bool | None = None) -> bool:
 def filenames_to_dataurls(
     filenames: Iterable[str | Path],
     slices: Sequence[int] = tuple(),
-) -> Generator[object]:
+) -> Generator["DataUrl" | str]:
     """Expand filenames and HDF5 data path in files input argument"""
     # Imports here so they are performed after setting HDF5_USE_FILE_LOCKING and logging level
     import silx.io
@@ -87,6 +88,12 @@ def filenames_to_dataurls(
     extra_slices = tuple(slices)
 
     for filename in filenames:
+        if isinstance(filename, str) and urllib.parse.urlparse(
+            filename
+        ).scheme.startswith("zarr+"):
+            yield filename
+            continue
+
         url = DataUrl(filename)
 
         for file_path in sorted(silx.utils.files.expand_filenames([url.file_path()])):

diff --git a/src/silx/app/view/main.py b/src/silx/app/view/main.py
@@ -32,6 +32,7 @@
 import signal
 import sys
 import traceback
+import urllib.parse
 from silx.app.utils import parseutils
 
 
@@ -178,7 +179,7 @@ def exceptHook(type_, value, trace):
     for url in parseutils.filenames_to_dataurls(options.files, options.slices):
         # TODO: Would be nice to add a process widget and a cancel button
         try:
-            window.appendFile(url.path())
+            window.appendFile(url if isinstance(url, str) else url.path())
         except OSError as e:
             _logger.error(e.args[0])
             _logger.debug("Backtrace", exc_info=True)

diff --git a/src/silx/io/meson.build b/src/silx/io/meson.build
@@ -18,6 +18,7 @@ py.install_sources([
     'spech5.py',
     'url.py',
     'utils.py',
+    'zarrh5.py',
 ],
 subdir: 'silx/io',  # Folder relative to site-packages to install to
 )

diff --git a/src/silx/io/utils.py b/src/silx/io/utils.py
@@ -32,6 +32,7 @@
 import sys
 import time
 import logging
+import urllib.parse
 from collections.abc import Generator
 
 import numpy
@@ -49,6 +50,12 @@
 except ImportError as e:
     h5pyd = None
 
+try:
+    from .zarrh5 import ZarrH5
+except ImportError as e:
+    ZarrH5 = None
+
+
 logger = logging.getLogger(__name__)
 
 NEXUS_HDF5_EXT = [".h5", ".nx5", ".nxs", ".hdf", ".hdf5", ".cxi"]
@@ -689,34 +696,46 @@ def open(filename):  # pylint:disable=redefined-builtin
     :raises: IOError if the file can't be loaded or path can't be found
     :rtype: h5py-like node
     """
-    url = DataUrl(filename)
+    url = urllib.parse.urlparse(filename)
+    if url.scheme.startswith("zarr+"):
+        if ZarrH5 is None:
+            raise IOError(
+                f"Zarr support is not available, please install zarr, cannot open: {filename}"
+            )
+        try:
+            return ZarrH5(filename)
+        except Exception as e:
+            raise IOError(f"Failed to open URL with zarr: {type(e)} {e}")
 
-    if url.scheme() in [None, "file", "silx"]:
+    data_url = DataUrl(filename)
+    if data_url.scheme() in [None, "file", "silx"]:
         # That's a local file
-        if not url.is_valid():
+        if not data_url.is_valid():
             raise OSError("URL '%s' is not valid" % filename)
-        h5_file = _open_local_file(url.file_path())
-    elif url.scheme() in ("http", "https"):
+        h5_file = _open_local_file(data_url.file_path())
+    elif data_url.scheme() in ("http", "https"):
         return _open_url_with_h5pyd(filename)
     else:
-        raise OSError(f"Unsupported URL scheme {url.scheme}: {filename}")
+        raise OSError(f"Unsupported URL scheme {data_url.scheme}: {filename}")
 
-    if url.data_path() in [None, "/", ""]:  # The full file is requested
-        if url.data_slice():
+    if data_url.data_path() in [None, "/", ""]:  # The full file is requested
+        if data_url.data_slice():
             raise OSError(f"URL '{filename}' containing slicing is not supported")
         return h5_file
     else:
         # Only a children is requested
-        if url.data_path() not in h5_file:
-            msg = f"File '{filename}' does not contain path '{url.data_path()}'."
+        if data_url.data_path() not in h5_file:
+            msg = f"File '{filename}' does not contain path '{data_url.data_path()}'."
             raise OSError(msg)
-        node = h5_file[url.data_path()]
+        node = h5_file[data_url.data_path()]
 
-        if url.data_slice() is not None:
+        if data_url.data_slice() is not None:
             from . import _sliceh5  # Lazy-import to avoid circular dependency
 
             try:
-                return _sliceh5.DatasetSlice(node, url.data_slice(), attrs=node.attrs)
+                return _sliceh5.DatasetSlice(
+                    node, data_url.data_slice(), attrs=node.attrs
+                )
             except ValueError:
                 raise OSError(
                     f"URL {filename} contains slicing, but it is not a dataset"

diff --git a/src/silx/io/zarrh5.py b/src/silx/io/zarrh5.py
@@ -0,0 +1,151 @@
+# /*##########################################################################
+# Copyright (C) 2025 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ############################################################################*/
+"""
+Provides a wrapper to expose `Zarr <https://zarr.readthedocs.io/>`_
+This is a preview feature.
+"""
+from __future__ import annotations
+
+import logging
+import urllib.parse
+from typing import Generator
+import numpy
+from . import commonh5
+import zarr
+
+
+_logger = logging.getLogger(__name__)
+
+
+def _children(group: zarr.Group) -> Generator[ZarrDataset | ZarrGroup]:
+    for name, item in group.items():
+        if isinstance(item, zarr.Group):
+            yield ZarrGroup(name, item)
+        elif isinstance(item, zarr.Array):
+            yield ZarrDataset(name, item)
+        else:
+            _logger.warning(f"Cannot map child {name}: Ignored")
+
+
+class ZarrH5(commonh5.File):
+    """Zarr client wrapper"""
+
+    def __init__(
+        self,
+        name: str,
+        mode: str | None = None,
+        attrs: dict | None = None,
+    ) -> None:
+        assert mode in ("r", None)
+        if name.startswith("zarr+"):
+            name = name[5:]
+        full_url = urllib.parse.urlparse(name)
+        if full_url.fragment:
+            raise ValueError("URL fragment is not supported")
+
+        base_url = urllib.parse.urlunparse(
+            (full_url.scheme, full_url.netloc, full_url.path, "", "", "")
+        )
+
+        # quick&dirty storage_options parsing: it would need pydantic model
+        storage_options = {}
+        for key, values in urllib.parse.parse_qs(full_url.query).items():
+            value = values[-1]
+            if key == "use_ssl":
+                value = True if value.lower() == "true" else False
+            storage_options[key] = value
+        self.__group = zarr.open_group(base_url, storage_options=storage_options)
+
+        if attrs is None:
+            attrs = {}
+        super().__init__(
+            base_url.rstrip("/"), mode, attrs={**self.__group.attrs, **attrs}
+        )
+
+        for child in _children(self.__group):
+            self.add_node(child)
+
+        _logger.warning(
+            "Zarr support is a preview feature: This may change or be removed without notice."
+        )
+
+    def close(self) -> None:
+        super().close()
+        self.__group = None
+
+
+class ZarrGroup(commonh5.LazyLoadableGroup):
+    """Zarr Group wrapper"""
+
+    def __init__(
+        self,
+        name: str,
+        group: zarr.Group,
+        parent: ZarrH5 | ZarrGroup | None = None,
+        attrs: dict | None = None,
+    ) -> None:
+        super().__init__(name, parent, attrs)
+        self.__group = group
+
+    def _create_child(self) -> None:
+        for child in _children(self.__group):
+            self.add_node(child)
+
+
+class ZarrDataset(commonh5.Dataset):
+    """Zarr Array wrapper"""
+
+    def __init__(
+        self,
+        name: str,
+        array: zarr.Array,
+        parent: ZarrH5 | ZarrGroup | None = None,
+        attrs: dict | None = None,
+    ) -> None:
+        super().__init__(name, array, parent, attrs)
+
+    @property
+    def shape(self) -> tuple[int, ...]:
+        return self._get_data().shape
+
+    @property
+    def size(self) -> int:
+        return self._get_data().size
+
+    def __len__(self) -> int:
+        return len(self._get_data())
+
+    def __getitem__(self, item):
+        return self._get_data()[item]
+
+    @property
+    def value(self) -> numpy.ndarray:
+        return self._get_data()[()]
+
+    @property
+    def compression(self):
+        return self._get_data().compressor.codec_id
+
+    @property
+    def chunks(self):
+        return self._get_data().chunks