diff --git a/docs/api/parsers/tiff.md b/docs/api/parsers/tiff.md new file mode 100644 index 00000000..ee3cafbd --- /dev/null +++ b/docs/api/parsers/tiff.md @@ -0,0 +1,3 @@ +# TIFF + +The [virtual-tiff](https://github.com/virtual-zarr/virtual-tiff) library provides a TIFF parser. See [their API documentation for details](https://virtual-tiff.readthedocs.io/en/latest/api/parser/). diff --git a/docs/faq.md b/docs/faq.md index 5d0c8840..44e44fbb 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -128,7 +128,7 @@ Users of Kerchunk may find the following comparison table useful, which shows wh | **Generation of references from archival files (1)** | | | | From a netCDF4/HDF5 file | `kerchunk.hdf.SingleHdf5ToZarr` | `open_virtual_dataset(..., parser=HDFParser())` | | From a netCDF3 file | `kerchunk.netCDF3.NetCDF3ToZarr` | `open_virtual_dataset(..., parser=NetCDF3Parser())`, via `kerchunk.netCDF3.NetCDF3ToZarr` | -| From a COG / tiff file | `kerchunk.tiff.tiff_to_zarr` | `open_virtual_dataset(..., parser=TIFFParser())`, via `kerchunk.tiff.tiff_to_zarr` or potentially `tifffile` (❌ Not yet implemented - see [issue #291](https://github.com/zarr-developers/VirtualiZarr/issues/291)) | +| From a COG / tiff file | `kerchunk.tiff.tiff_to_zarr` | `open_virtual_dataset(..., parser=VirtualTIFF())`, via [virtual_tiff](https://github.com/virtual-zarr/virtual-tiff) | | From a Zarr v2 store | `kerchunk.zarr.ZarrToZarr` | `open_virtual_dataset(..., parser=ZarrParser())` (❌ Not yet implemented - see [issue #262](https://github.com/zarr-developers/VirtualiZarr/issues/262)) | | From a Zarr v3 store | | `open_virtual_dataset(..., parser=ZarrParser())` | | From a GRIB2 file | `kerchunk.grib2.scan_grib` | `open_virtual_datatree(..., parser=GribParser())` (❌ Not yet implemented - see [issue #11](https://github.com/zarr-developers/VirtualiZarr/issues/11)) | diff --git a/mkdocs.yml b/mkdocs.yml index 385a3083..9343ae1b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -35,6 +35,7 @@ nav: - api/parsers/netcdf3.md - api/parsers/zarr.md - api/parsers/dmrpp.md + - api/parsers/tiff.md - api/serialization.md - api/developer.md - "migration_guide.md" diff --git a/pyproject.toml b/pyproject.toml index 26093378..9108b15a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,9 @@ fits = [ "kerchunk>=0.2.8", "astropy", ] +tiff = [ + "virtual-tiff", +] kerchunk_parquet = [ "virtualizarr[remote]", "fastparquet", @@ -71,6 +74,7 @@ all_parsers = [ "virtualizarr[netcdf3]", "virtualizarr[fits]", "virtualizarr[kerchunk_parquet]", + "virtualizarr[tiff]", ] # writers @@ -193,14 +197,14 @@ run-tests-html-cov = { cmd = "pytest -n auto --run-network-tests --verbose --cov [tool.pixi.environments] min-deps = ["dev", "test", "hdf", "hdf5-lib"] # VirtualiZarr/conftest.py using h5py, so the minimum set of dependencies for testing still includes hdf libs # Inherit from min-deps to get all the test commands, along with optional dependencies -test = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "py313"] -test-py311 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "py311"] # test against python 3.11 -test-py312 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "py312"] # test against python 3.12 -minio = ["dev", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "py312", "minio"] -minimum-versions = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "minimum-versions"] +test = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "tiff", "py313"] +test-py311 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "tiff", "py311"] # test against python 3.11 +test-py312 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "tiff", "py312"] # test against python 3.12 +minio = ["dev", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "tiff", "py312", "minio"] +minimum-versions = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "tiff", "hdf5-lib", "minimum-versions"] upstream = ["dev", "test", "hdf", "hdf5-lib", "netcdf3", "upstream", "icechunk-dev", "py313"] -all = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "all_parsers", "all_writers", "py313"] -docs = ["docs", "dev", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "py313"] +all = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "tiff", "all_parsers", "all_writers", "py313"] +docs = ["docs", "dev", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "tiff", "py313"] # Define commands to run within the docs environment [tool.pixi.feature.docs.tasks] diff --git a/virtualizarr/tests/__init__.py b/virtualizarr/tests/__init__.py index 9089136f..c14f048c 100644 --- a/virtualizarr/tests/__init__.py +++ b/virtualizarr/tests/__init__.py @@ -41,3 +41,4 @@ def _importorskip( has_zarr_python, requires_zarr_python = _importorskip("zarr") has_dask, requires_dask = _importorskip("dask") has_obstore, requires_obstore = _importorskip("obstore") +has_tiff, requires_tiff = _importorskip("virtual_tiff") diff --git a/virtualizarr/tests/test_parsers/test_tiff.py b/virtualizarr/tests/test_parsers/test_tiff.py new file mode 100644 index 00000000..38ea85a0 --- /dev/null +++ b/virtualizarr/tests/test_parsers/test_tiff.py @@ -0,0 +1,24 @@ +import pytest +from obstore.store import S3Store +from xarray import Dataset + +from virtualizarr import open_virtual_dataset +from virtualizarr.registry import ObjectStoreRegistry +from virtualizarr.tests import requires_network, requires_tiff + +virtual_tiff = pytest.importorskip("virtual_tiff") + + +@requires_tiff +@requires_network +def test_virtual_tiff() -> None: + store = S3Store("sentinel-cogs", region="us-west-2", skip_signature=True) + registry = ObjectStoreRegistry({"s3://sentinel-cogs/": store}) + url = "s3://sentinel-cogs/sentinel-s2-l2a-cogs/12/S/UF/2022/6/S2B_12SUF_20220609_0_L2A/B04.tif" + parser = virtual_tiff.VirtualTIFF(ifd=0) + with open_virtual_dataset(url=url, parser=parser, registry=registry) as vds: + assert isinstance(vds, Dataset) + assert list(vds.variables) == ["0"] + var = vds["0"].variable + assert var.sizes == {"y": 10980, "x": 10980} + assert var.dtype == "