diff --git a/.github/workflows/run_tests_ubuntu.yml b/.github/workflows/run_tests_ubuntu.yml index 6b78cb478b..c797c38216 100644 --- a/.github/workflows/run_tests_ubuntu.yml +++ b/.github/workflows/run_tests_ubuntu.yml @@ -4,7 +4,7 @@ name: Run Ubuntu/Linux netCDF Tests -on: [workflow_dispatch] +on: [pull_request,workflow_dispatch] env: REMOTETESTDOWN: ${{ vars.REMOTETESTDOWN }} @@ -14,7 +14,16 @@ concurrency: cancel-in-progress: true jobs: - + + build-python-tools: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v4 + with: + python-version: '3.12' + build-deps-serial: strategy: @@ -243,7 +252,9 @@ jobs: ## Serial, Static nc-ac-tests-oneoff-serial-static: - needs: build-deps-serial + needs: + - build-deps-serial + - build-python-tools strategy: matrix: @@ -314,7 +325,7 @@ jobs: shell: bash -l {0} run: CFLAGS=${CFLAGS} LDFLAGS=${LDFLAGS} LD_LIBRARY_PATH=${LD_LIBRARY_PATH} make check TESTS="" -j if: ${{ success() }} - + - name: Run Tests shell: bash -l {0} run: | @@ -332,7 +343,10 @@ jobs: ## nc-ac-tests-oneoff-parallel: - needs: build-deps-parallel + needs: + - build-deps-parallel + - build-python-tools + runs-on: ubuntu-22.04 strategy: @@ -488,7 +502,7 @@ jobs: if: ${{ failure() }} ## - # Serial, Shared + # Serial, Static ## nc-cmake-tests-oneoff-serial-static: diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index d06a276d73..c100dc65ef 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -510,3 +510,10 @@ endif() if (NETCDF_PACKAGE) find_program(NC_DPKG NAMES dpkg) endif() + +################################ +# Python for testing tools +################################ +if (NETCDF_ENABLE_NCZARR AND NETCDF_ENABLE_TESTS) + find_package (Python COMPONENTS Interpreter) +endif() diff --git a/nczarr_test/CMakeLists.txt b/nczarr_test/CMakeLists.txt index ad6fde8c8b..38a526bbac 100644 --- a/nczarr_test/CMakeLists.txt +++ b/nczarr_test/CMakeLists.txt @@ -198,6 +198,21 @@ IF(NETCDF_ENABLE_TESTS) # if(NOT ISCMAKE) add_sh_test(nczarr_test run_interop) # ENDIF() + IF(NETCDF_ENABLE_TESTS AND NETCDF_ENABLE_NCZARR AND Python_EXECUTABLE) + add_custom_target(build_python_tools ALL + COMMAND ${Python_EXECUTABLE} -m venv ${CMAKE_BINARY_DIR}/venv + COMMAND ${CMAKE_BINARY_DIR}/venv/bin/pip install --upgrade pip + COMMAND ${CMAKE_BINARY_DIR}/venv/bin/pip install . + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + COMMENT "Creating virtual environment and installing Python netcdf-c-testing-tools" + ) + add_sh_test(nczarr_test run_interop_python) + + ENDIF() + add_custom_target(clean_python_tools + COMMAND rm -rfv ${CMAKE_BINARY_DIR}/venv + COMMENT "Cleaning up Python virtual environment" + ) IF(USE_HDF5) add_sh_test(nczarr_test run_fillonlyz) diff --git a/nczarr_test/run_interop_python.sh b/nczarr_test/run_interop_python.sh new file mode 100755 index 0000000000..e3645172c7 --- /dev/null +++ b/nczarr_test/run_interop_python.sh @@ -0,0 +1,62 @@ +#!/bin/sh + +set -e + +if test "x$srcdir" = x ; then srcdir=`pwd`; fi +. ../test_common.sh + +. "$srcdir/test_nczarr.sh" + +set -e + +metaonly="-h" + +s3isolate "testdir_interop_python" +THISDIR=`pwd` +cd $ISOPATH + +# Running python means activate the virtual environment +if [ -d "${TOPBUILDDIR}/venv" ]; then + . ${TOPBUILDDIR}/venv/bin/activate + trap "deactivate" EXIT +else + echo "Error: Virtual environment not found. Did you run the build step?" + exit 1 +fi + +testxarray() { + # Create a dataset with xarray and convert it to both: + # 1. netcdf4 + # 2. zarr + # Then compare the output of ncdump on both files + # to make sure they are the same + + echo " o Running xarray Testcase: (${@})" + # Create files + testing-tools create xarray $1 + + ${NCDUMP} -n same ${PWD}/xarray-$1.nc > nc.out + ${NCDUMP} -n same file://${PWD}/xarray-$1.zarr/#mode=zarr,file > zarr.out + diff -b nc.out zarr.out +} + +# This shell script tests compatibility between +# this implementation and other implementations +# by means of files constructed by that other implementation + +testallcases() { +zext=$1 +case "$zext" in + file) + testxarray "filename" + ;; + zip) + ;; + s3) + ;; + *) echo "unimplemented kind: $1" ; exit 1;; +esac +} + + +testallcases file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000..82ff73f508 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,23 @@ +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "netcdf-c-testing-tools" +version = "0.1.0" +description = "Script to create Zarr datasets using zarr-python" +authors = [{name = "Your Name", email = "your.email@example.com"}] +license = {text = "MIT"} +requires-python = ">= 3.10" +dependencies = [ + "argparse", + "zarr==2.18.2", + "xarray==2025.1.1", + "numcodecs", + "scipy", +] + +[tool.setuptools] +packages = ["pythontools"] + +[project.scripts] +testing-tools = "pythontools.main:run" \ No newline at end of file diff --git a/pythontools/__init__.py b/pythontools/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pythontools/main.py b/pythontools/main.py new file mode 100644 index 0000000000..d0453e1f0a --- /dev/null +++ b/pythontools/main.py @@ -0,0 +1,97 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- +""" +Script to create Zarr datasets using zarr-python" +""" +from sys import version as python_version +import zarr, xarray as xr, numpy as np +import numcodecs +import argparse +from pathlib import Path + +def xarray_create_zarr_vs_netcdf(name): + ds = xr.Dataset({'var':np.int8(2**7-1)}) + ds.to_netcdf(f'{name}.nc') + ds.to_zarr(f'{name}.zarr', mode='w', consolidated=True) + +def zarr_create_dataset(name): + ds = zarr.open(f'{name}.zarr','w') + # Attributes + ds.attrs['Description'] = 'Zarr dataset created using zarr-python' + ds.attrs['python'] = python_version + ds.attrs['zarr-python'] = zarr.__version__ + ds.attrs[1] = 1 + + scalars = ds.create_group('scalars') + scalars.attrs['Description'] = "'Scalar' data types or unlimited arrays with size 1" + + # # scalar like arrays + scalars.ones('pi',shape=()) + scalars.pi.fill_value = 3.1415 + scalars.create(name='int-b',shape=(),dtype='uint8') + scalars.array(name='int-c',data=(3)) + # scalars with fill-in + scalars.create(name='int-fillin',shape=(),fill_value=3.1415) + + + arrays= ds.create_group('arrays') + arrays.attrs['Description'] = 'Arrays groups by data type, int or float' + + # # Arrays + ints = arrays.create_group('ints') + arrays.attrs['Description'] = 'Integer arrays with different int sizes' + for t in ('int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'): + ints.create(name=f'{t}-2x3',shape=(2,3),chunks=(2,2),dtype=t) + + floats = arrays.create_group('floats') + arrays.attrs['Description'] = 'Float arrays with different float sizes' + for t in ('float32', 'float64'): + floats.create(name=f'{t}-2x3',shape=(2,3),chunks=(2,2),dtype=t) + + # Misc + misc = arrays.create_group('misc') + misc.attrs['Description'] = 'Miscellaneous arrays, with other types' + misc['string'] = 'string' + misc['byte-string'] = b'string' + + zarr.consolidate_metadata(ds.store) + +def argument_parser(): + parser = argparse.ArgumentParser(description='Create Zarr datasets using python tooling') + parser.add_argument('operation', choices=('create',), help='Use xarray or zarr to create the Zarr dataset') + parser.add_argument('type', default='zarr', choices=['zarr','xarray'], help='Use xarray or zarr to create the Zarr dataset') + parser.add_argument('filename', nargs='?', type=Path, default=f'python-{zarr.__version__}', help=f'Name of the Zarr dataset (defaults to `zarr-python-{zarr.__version__}.zarr`') + parser.add_argument('--consolidate', action='store_true', help='Consolidate metadata') + return parser + +def sanitize_filename(args): + filename = Path(args.filename) + if filename.is_dir(): + filename = filename / Path(f'python-{zarr.__version__}') + filename = filename.parent / Path(args.type +'-' + filename.name) + while filename.suffix in ('.zarr', '.nc'): + filename = filename.with_suffix('') + return filename + +def run (): + + args = argument_parser().parse_args() + + print(f"""Testing zarr-python compatibility using: + python = {python_version} + zarr = {zarr.__version__} + xarray = {xr.__version__} + numpy = {np.__version__} + numcodecs = {numcodecs.__version__}""") + filename = sanitize_filename(args) + + if args.type == 'xarray': + xarray_create_zarr_vs_netcdf(filename) + print(f"Datasets {filename}.zarr and {filename}.nc created") + elif args.type == 'zarr': + zarr_create_dataset(filename) + print(f"Zarr dataset {filename}.zarr created") + + +if __name__ == "__main__": + run() \ No newline at end of file