From c68c6260a0d11fbaeb755cc6a8763a261a8256d3 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 15 Nov 2022 17:58:40 -0700 Subject: [PATCH 1/4] Update dependabot schedule to monthly --- .github/dependabot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 4398856..fb89901 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -3,9 +3,9 @@ updates: - package-ecosystem: pip directory: "/" schedule: - interval: weekly + interval: monthly - package-ecosystem: "github-actions" directory: "/" schedule: # Check for updates to GitHub Actions every weekday - interval: weekly + interval: monthly From cfa8ec8e4cda8d06f8a256ae2efe1ef3e55204c2 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 15 Nov 2022 18:27:22 -0700 Subject: [PATCH 2/4] Update pre-commit hooks --- pyproject.toml | 67 +++++++++++ .pre-commit-config.yaml | 27 +++-- docs/conf.py | 30 ++--- docs/quickstart.ipynb | 18 +-- setup.cfg | 20 ---- setup.py | 49 +------- tasks.py | 6 +- tests/test_core.py | 231 +++++++++++++++++++----------------- xarray_schema/__init__.py | 31 +++-- xarray_schema/base.py | 10 +- xarray_schema/components.py | 178 +++++++++++++++------------ xarray_schema/dataarray.py | 180 ++++++++++++++-------------- xarray_schema/dataset.py | 74 +++++++----- xarray_schema/types.py | 10 +- 14 files changed, 496 insertions(+), 435 deletions(-) create mode 100644 pyproject.toml diff --git a/ pyproject.toml b/ pyproject.toml new file mode 100644 index 0000000..a0465ed --- /dev/null +++ b/ pyproject.toml @@ -0,0 +1,67 @@ +[tool.black] +line-length = 100 +target-version = ['py39'] +skip-string-normalization = true + + +[build-system] +requires = [ + "setuptools>=61", + "setuptools-scm" +] +build-backend = "setuptools.build_meta" + + +[project] +name = "xarray-schema" +description = "Schema validation for Xarray objects" +readme = "README.md" +license = {text = "MIT"} +authors = [{name = "Xarray-schema Developers", email = "joe@carbonplan.org"}] +requires-python = ">=3.8" +classifiers = [ + "Development Status :: 4 - Beta", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Intended Audience :: Science/Research", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Topic :: Scientific/Engineering", +] +dynamic = ["version"] +dependencies = [ + "xarray>=0.16" +] +[project.optional-dependencies] + +dev = [ +"jsonschema", +"pytest", +"pytest-cov", +"mypy==0.982", +"types-jsonschema", +"types-setuptools==65.5.0.3" +] +[project.urls] +documentation = "https://xarray-schema.readthedocs.io/" +repository = "https://github.com/xarray-contrib/xarray-schema" + +[tool.setuptools.packages.find] +include = ["xarray_schema*"] + +[tool.setuptools_scm] +version_scheme = "post-release" +local_scheme = "dirty-tag" +fallback_version = "999" + +[tool.isort] +profile = "black" +skip_gitignore = true + + +[tool.pytest.ini_options] +log_cli = true +log_level = "INFO" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 89f50d0..1b3de67 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,3 +1,6 @@ +ci: + autoupdate_schedule: monthly + repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.3.0 @@ -5,24 +8,31 @@ repos: - id: trailing-whitespace - id: end-of-file-fixer - id: check-docstring-first - - id: check-json - - id: check-yaml - - id: double-quote-string-fixer + - id: mixed-line-ending + + - repo: https://github.com/asottile/pyupgrade + rev: v3.2.2 + hooks: + - id: pyupgrade + args: + - "--py39-plus" - repo: https://github.com/psf/black rev: 22.10.0 hooks: - id: black - args: ["--line-length", "100", "--skip-string-normalization"] + - id: black-jupyter + + - repo: https://github.com/keewis/blackdoc + rev: v0.3.8 + hooks: + - id: blackdoc - repo: https://github.com/PyCQA/flake8 rev: 5.0.4 hooks: - id: flake8 - - repo: https://github.com/asottile/seed-isort-config - rev: v2.2.0 - hooks: - - id: seed-isort-config + - repo: https://github.com/PyCQA/isort rev: 5.10.1 hooks: @@ -32,4 +42,3 @@ repos: rev: v3.0.0-alpha.4 hooks: - id: prettier - language_version: system diff --git a/docs/conf.py b/docs/conf.py index d1ed565..9767ea5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -17,9 +17,9 @@ # -- Project information ----------------------------------------------------- -project = 'xarray-schema' -copyright = '2022, CarbonPlan' -author = 'CarbonPlan' +project = "xarray-schema" +copyright = "2022, CarbonPlan" +author = "CarbonPlan" # -- General configuration --------------------------------------------------- @@ -28,23 +28,23 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx_rtd_theme', - 'sphinx.ext.autosummary', - 'sphinx.ext.autodoc', - 'sphinx.ext.napoleon', - 'sphinx_autodoc_typehints', - 'IPython.sphinxext.ipython_directive', - 'IPython.sphinxext.ipython_console_highlighting', - 'nbsphinx', + "sphinx_rtd_theme", + "sphinx.ext.autosummary", + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx_autodoc_typehints", + "IPython.sphinxext.ipython_directive", + "IPython.sphinxext.ipython_console_highlighting", + "nbsphinx", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # -- Options for HTML output ------------------------------------------------- @@ -52,11 +52,11 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". # html_static_path = ['_static'] -autodoc_default_options = {'exclude-members': '__init__'} +autodoc_default_options = {"exclude-members": "__init__"} diff --git a/docs/quickstart.ipynb b/docs/quickstart.ipynb index 05c5d2c..8799d72 100644 --- a/docs/quickstart.ipynb +++ b/docs/quickstart.ipynb @@ -40,7 +40,7 @@ "metadata": {}, "outputs": [], "source": [ - "da = xr.DataArray(np.ones((4, 10), dtype='i4'), dims=['x', 't'], name='foo')" + "da = xr.DataArray(np.ones((4, 10), dtype=\"i4\"), dims=[\"x\", \"t\"], name=\"foo\")" ] }, { @@ -58,7 +58,7 @@ "metadata": {}, "outputs": [], "source": [ - "schema = DataArraySchema(dtype=np.integer, name='foo', shape=(4, None))" + "schema = DataArraySchema(dtype=np.integer, name=\"foo\", shape=(4, None))" ] }, { @@ -108,7 +108,7 @@ } ], "source": [ - "da2 = xr.DataArray(np.ones((4, 10), dtype='f4'), dims=['x', 't'], name='foo')\n", + "da2 = xr.DataArray(np.ones((4, 10), dtype=\"f4\"), dims=[\"x\", \"t\"], name=\"foo\")\n", "schema.validate(da2)" ] }, @@ -174,15 +174,17 @@ " ChunksSchema,\n", " ArrayTypeSchema,\n", " AttrSchema,\n", - " AttrsSchema\n", + " AttrsSchema,\n", ")\n", "\n", "# example constructions\n", - "dtype_schema = DTypeSchema('i4')\n", - "dims_schema = DimsSchema(('x', 'y', None)) # None is used as a wildcard\n", + "dtype_schema = DTypeSchema(\"i4\")\n", + "dims_schema = DimsSchema((\"x\", \"y\", None)) # None is used as a wildcard\n", "shape_schema = ShapeSchema((5, 10, None)) # None is used as a wildcard\n", - "name_schema = NameSchema('foo')\n", - "chunk_schema = ChunksSchema({'x': None, 'y': -1}) # None is used as a wildcard, -1 is used as\n", + "name_schema = NameSchema(\"foo\")\n", + "chunk_schema = ChunksSchema(\n", + " {\"x\": None, \"y\": -1}\n", + ") # None is used as a wildcard, -1 is used as\n", "ArrayTypeSchema = ArrayTypeSchema(np.ndarray)\n", "\n", "# Example usage\n", diff --git a/setup.cfg b/setup.cfg index 1dcc25e..2e70fb8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,25 +6,5 @@ max-line-length = 100 max-complexity = 18 select = B,C,E,F,W,T4,B9 -[isort] -known_first_party=xarray_schema -known_third_party=dask,invoke,jsonschema,numpy,pkg_resources,pytest,setuptools,xarray -multi_line_output=3 -include_trailing_comma=True -force_grid_wrap=0 -combine_as_imports=True -line_length=100 -skip= - docs/source/conf.py - setup.py - -[tool:pytest] -log_cli = True -log_level = INFO - -[coverage:run] -omit = - tasks.py - [mypy] plugins = numpy.typing.mypy_plugin diff --git a/setup.py b/setup.py index cd4be04..088d7e4 100644 --- a/setup.py +++ b/setup.py @@ -1,49 +1,4 @@ #!/usr/bin/env python +from setuptools import setup -"""The setup script.""" - -from os.path import exists - -from setuptools import find_packages, setup - -with open('requirements.txt') as f: - install_requires = f.read().strip().split('\n') - -if exists('README.md'): - with open('README.md') as f: - long_description = f.read() -else: - long_description = '' - -CLASSIFIERS = [ - 'Development Status :: 3 - Alpha', - 'License :: OSI Approved :: MIT License', - 'Operating System :: OS Independent', - 'Intended Audience :: Science/Research', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Topic :: Scientific/Engineering', -] - -setup( - name='xarray-schema', - description='Schema validation for Xarray objects', - long_description=long_description, - long_description_content_type='text/markdown', - python_requires='>=3.8', - maintainer='Joe Hamman', - maintainer_email='joe@carbonplan.org', - classifiers=CLASSIFIERS, - url='https://github.com/xarray-contrib/xarray-schema', - packages=find_packages(exclude=('tests',)), - package_dir={'xarray_schema': 'xarray_schema'}, - include_package_data=True, - install_requires=install_requires, - license='MIT', - zip_safe=False, - keywords=['xarray', 'schema'], - use_scm_version={'version_scheme': 'post-release', 'local_scheme': 'dirty-tag'}, - setup_requires=['setuptools_scm', 'setuptools>=30.3.0'], -) +setup(use_scm_version={"fallback_version": "999"}) diff --git a/tasks.py b/tasks.py index e371fd4..726081b 100644 --- a/tasks.py +++ b/tasks.py @@ -1,9 +1,9 @@ from invoke import task # pragma: no cover -SRC_DIR = 'xarray_schema' # pragma: no cover -TEST_DIR = 'tests' # pragma: no cover +SRC_DIR = "xarray_schema" # pragma: no cover +TEST_DIR = "tests" # pragma: no cover @task # pragma: no cover def mypy(c): # pragma: no cover - c.run(f'mypy {SRC_DIR} {TEST_DIR}') # pragma: no cover + c.run(f"mypy {SRC_DIR} {TEST_DIR}") # pragma: no cover diff --git a/tests/test_core.py b/tests/test_core.py index 00b4621..11d4238 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import dask.array import jsonschema import numpy as np @@ -6,42 +8,36 @@ from xarray_schema import DataArraySchema, DatasetSchema from xarray_schema.base import SchemaError -from xarray_schema.components import ( - ArrayTypeSchema, - AttrSchema, - AttrsSchema, - ChunksSchema, - DimsSchema, - DTypeSchema, - NameSchema, - ShapeSchema, -) +from xarray_schema.components import (ArrayTypeSchema, AttrSchema, AttrsSchema, + ChunksSchema, DimsSchema, DTypeSchema, + NameSchema, ShapeSchema) from xarray_schema.dataarray import CoordsSchema @pytest.fixture def ds(): - ds = xr.Dataset( + return xr.Dataset( { - 'x': xr.DataArray(np.arange(4) - 2, dims='x'), - 'foo': xr.DataArray(np.ones(4, dtype='i4'), dims='x'), - 'bar': xr.DataArray(np.arange(8, dtype=np.float32).reshape(4, 2), dims=('x', 'y')), + "x": xr.DataArray(np.arange(4) - 2, dims="x"), + "foo": xr.DataArray(np.ones(4, dtype="i4"), dims="x"), + "bar": xr.DataArray( + np.arange(8, dtype=np.float32).reshape(4, 2), dims=("x", "y") + ), } ) - return ds @pytest.mark.parametrize( - 'component, schema_args, validate, json', + "component, schema_args, validate, json", [ - (DTypeSchema, np.integer, ['i4', 'int', np.int32], 'integer'), - (DTypeSchema, np.int64, ['i8', np.int64], '"), ( ArrayTypeSchema, @@ -50,44 +46,53 @@ def ds(): "", ), # schema_args for ChunksSchema include [chunks, dims, shape] - (ChunksSchema, True, [(((1, 1),), ('x',), (2,))], True), - (ChunksSchema, {'x': 2}, [(((2, 2),), ('x',), (4,))], {'x': 2}), - (ChunksSchema, {'x': (2, 2)}, [(((2, 2),), ('x',), (4,))], {'x': [2, 2]}), - (ChunksSchema, {'x': [2, 2]}, [(((2, 2),), ('x',), (4,))], {'x': [2, 2]}), - (ChunksSchema, {'x': 4}, [(((4,),), ('x',), (4,))], {'x': 4}), - (ChunksSchema, {'x': -1}, [(((4,),), ('x',), (4,))], {'x': -1}), - (ChunksSchema, {'x': (1, 2, 1)}, [(((1, 2, 1),), ('x',), (4,))], {'x': [1, 2, 1]}), + (ChunksSchema, True, [(((1, 1),), ("x",), (2,))], True), + (ChunksSchema, {"x": 2}, [(((2, 2),), ("x",), (4,))], {"x": 2}), + (ChunksSchema, {"x": (2, 2)}, [(((2, 2),), ("x",), (4,))], {"x": [2, 2]}), + (ChunksSchema, {"x": [2, 2]}, [(((2, 2),), ("x",), (4,))], {"x": [2, 2]}), + (ChunksSchema, {"x": 4}, [(((4,),), ("x",), (4,))], {"x": 4}), + (ChunksSchema, {"x": -1}, [(((4,),), ("x",), (4,))], {"x": -1}), ( ChunksSchema, - {'x': 2, 'y': -1}, - [(((2, 2), (10,)), ('x', 'y'), (4, 10))], - {'x': 2, 'y': -1}, + {"x": (1, 2, 1)}, + [(((1, 2, 1),), ("x",), (4,))], + {"x": [1, 2, 1]}, + ), + ( + ChunksSchema, + {"x": 2, "y": -1}, + [(((2, 2), (10,)), ("x", "y"), (4, 10))], + {"x": 2, "y": -1}, ), ( AttrsSchema, - {'foo': AttrSchema(value='bar')}, - [{'foo': 'bar'}], + {"foo": AttrSchema(value="bar")}, + [{"foo": "bar"}], { - 'allow_extra_keys': True, - 'require_all_keys': True, - 'attrs': {'foo': {'type': None, 'value': 'bar'}}, + "allow_extra_keys": True, + "require_all_keys": True, + "attrs": {"foo": {"type": None, "value": "bar"}}, }, ), ( AttrsSchema, - {'foo': AttrSchema(value=1)}, - [{'foo': 1}], + {"foo": AttrSchema(value=1)}, + [{"foo": 1}], { - 'allow_extra_keys': True, - 'require_all_keys': True, - 'attrs': {'foo': {'type': None, 'value': 1}}, + "allow_extra_keys": True, + "require_all_keys": True, + "attrs": {"foo": {"type": None, "value": 1}}, }, ), ( CoordsSchema, - {'x': DataArraySchema(name='x')}, - [{'x': xr.DataArray([0, 1], name='x')}], - {'coords': {'x': {'name': 'x'}}, 'allow_extra_keys': True, 'require_all_keys': True}, + {"x": DataArraySchema(name="x")}, + [{"x": xr.DataArray([0, 1], name="x")}], + { + "coords": {"x": {"name": "x"}}, + "allow_extra_keys": True, + "require_all_keys": True, + }, ), ], ) @@ -109,11 +114,11 @@ def test_component_schema(component, schema_args, validate, json): @pytest.mark.parametrize( - 'type, value, validate, json', + "type, value, validate, json", [ - (str, None, 'foo', {'type': str, 'value': None}), - (None, 'foo', 'foo', {'type': None, 'value': 'foo'}), - (str, 'foo', 'foo', {'type': str, 'value': 'foo'}), + (str, None, "foo", {"type": str, "value": None}), + (None, "foo", "foo", {"type": None, "value": "foo"}), + (str, "foo", "foo", {"type": str, "value": "foo"}), ], ) def test_attr_schema(type, value, validate, json): @@ -124,30 +129,40 @@ def test_attr_schema(type, value, validate, json): @pytest.mark.parametrize( - 'component, schema_args, validate, match', + "component, schema_args, validate, match", [ - (DTypeSchema, np.integer, np.float32, r'.*float.*'), - (DimsSchema, ('foo', 'bar'), ('foo',), r'.*length.*'), - (DimsSchema, ('foo', 'bar'), ('foo', 'baz'), r'.*mismatch.*'), - (ShapeSchema, (1, 2, None), (1, 2), r'.*number of dimensions.*'), - (ShapeSchema, (1, 4, 4), (1, 3, 4), r'.*mismatch.*'), - (NameSchema, 'foo', 'bar', r'.*name bar != foo.*'), - (ArrayTypeSchema, np.ndarray, 'bar', r'.*array_type.*'), + (DTypeSchema, np.integer, np.float32, r".*float.*"), + (DimsSchema, ("foo", "bar"), ("foo",), r".*length.*"), + (DimsSchema, ("foo", "bar"), ("foo", "baz"), r".*mismatch.*"), + (ShapeSchema, (1, 2, None), (1, 2), r".*number of dimensions.*"), + (ShapeSchema, (1, 4, 4), (1, 3, 4), r".*mismatch.*"), + (NameSchema, "foo", "bar", r".*name bar != foo.*"), + (ArrayTypeSchema, np.ndarray, "bar", r".*array_type.*"), # schema_args for ChunksSchema include [chunks, dims, shape] - (ChunksSchema, {'x': 3}, (((2, 2),), ('x',), (4,)), r'.*(3).*'), - (ChunksSchema, {'x': (2, 1)}, (((2, 2),), ('x',), (4,)), r'.*(2, 1).*'), - (ChunksSchema, {'x': (2, 1)}, (None, ('x',), (4,)), r'.*expected array to be chunked.*'), - (ChunksSchema, True, (None, ('x',), (4,)), r'.*expected array to be chunked.*'), + (ChunksSchema, {"x": 3}, (((2, 2),), ("x",), (4,)), r".*(3).*"), + (ChunksSchema, {"x": (2, 1)}, (((2, 2),), ("x",), (4,)), r".*(2, 1).*"), + ( + ChunksSchema, + {"x": (2, 1)}, + (None, ("x",), (4,)), + r".*expected array to be chunked.*", + ), + (ChunksSchema, True, (None, ("x",), (4,)), r".*expected array to be chunked.*"), ( ChunksSchema, False, - (((2, 2),), ('x',), (4,)), - r'.*expected unchunked array but it is chunked*', + (((2, 2),), ("x",), (4,)), + r".*expected unchunked array but it is chunked*", + ), + (ChunksSchema, {"x": -1}, (((1, 2, 1),), ("x",), (4,)), r".*did not match.*"), + (ChunksSchema, {"x": 2}, (((2, 3, 2),), ("x",), (7,)), r".*did not match.*"), + (ChunksSchema, {"x": 2}, (((2, 2, 3),), ("x",), (7,)), r".*did not match.*"), + ( + ChunksSchema, + {"x": 2, "y": -1}, + (((2, 2), (5, 5)), ("x", "y"), (4, 10)), + r".*(5).*", ), - (ChunksSchema, {'x': -1}, (((1, 2, 1),), ('x',), (4,)), r'.*did not match.*'), - (ChunksSchema, {'x': 2}, (((2, 3, 2),), ('x',), (7,)), r'.*did not match.*'), - (ChunksSchema, {'x': 2}, (((2, 2, 3),), ('x',), (7,)), r'.*did not match.*'), - (ChunksSchema, {'x': 2, 'y': -1}, (((2, 2), (5, 5)), ('x', 'y'), (4, 10)), r'.*(5).*'), ], ) def test_component_raises_schema_error(component, schema_args, validate, match): @@ -160,39 +175,39 @@ def test_component_raises_schema_error(component, schema_args, validate, match): def test_chunks_schema_raises_for_invalid_chunks(): - with pytest.raises(ValueError, match=r'.*int.*'): + with pytest.raises(ValueError, match=r".*int.*"): schema = ChunksSchema(chunks=2) - schema.validate(((2, 2),), ('x',), (4,)) + schema.validate(((2, 2),), ("x",), (4,)) def test_unknown_array_type_raises(): - with pytest.raises(ValueError, match=r'.*unknown array_type.*'): - _ = ArrayTypeSchema.from_json('foo.array') + with pytest.raises(ValueError, match=r".*unknown array_type.*"): + _ = ArrayTypeSchema.from_json("foo.array") def test_dataarray_empty_constructor(): - da = xr.DataArray(np.ones(4, dtype='i4')) + da = xr.DataArray(np.ones(4, dtype="i4")) da_schema = DataArraySchema() - assert hasattr(da_schema, 'validate') + assert hasattr(da_schema, "validate") jsonschema.validate(da_schema.json, da_schema._json_schema) assert da_schema.json == {} da_schema.validate(da) @pytest.mark.parametrize( - 'kind, component, schema_args', + "kind, component, schema_args", [ - ('dtype', DTypeSchema, 'i4'), - ('dims', DimsSchema, ('x', None)), - ('shape', ShapeSchema, (2, None)), - ('name', NameSchema, 'foo'), - ('array_type', ArrayTypeSchema, np.ndarray), - ('chunks', ChunksSchema, False), + ("dtype", DTypeSchema, "i4"), + ("dims", DimsSchema, ("x", None)), + ("shape", ShapeSchema, (2, None)), + ("name", NameSchema, "foo"), + ("array_type", ArrayTypeSchema, np.ndarray), + ("chunks", ChunksSchema, False), ], ) def test_dataarray_component_constructors(kind, component, schema_args): - da = xr.DataArray(np.zeros((2, 4), dtype='i4'), dims=('x', 'y'), name='foo') + da = xr.DataArray(np.zeros((2, 4), dtype="i4"), dims=("x", "y"), name="foo") comp_schema = component(schema_args) schema = DataArraySchema(**{kind: schema_args}) assert comp_schema.json == getattr(schema, kind).json @@ -210,13 +225,13 @@ def test_dataarray_component_constructors(kind, component, schema_args): def test_dataarray_schema_validate_raises_for_invalid_input_type(): ds = xr.Dataset() schema = DataArraySchema() - with pytest.raises(ValueError, match='Input must be a xarray.DataArray'): + with pytest.raises(ValueError, match="Input must be a xarray.DataArray"): schema.validate(ds) def test_dataset_empty_constructor(): ds_schema = DatasetSchema() - assert hasattr(ds_schema, 'validate') + assert hasattr(ds_schema, "validate") jsonschema.validate(ds_schema.json, ds_schema._json_schema) ds_schema.json == {} @@ -225,22 +240,22 @@ def test_dataset_example(ds): ds_schema = DatasetSchema( { - 'foo': DataArraySchema(name='foo', dtype=np.int32, dims=['x']), - 'bar': DataArraySchema(name='bar', dtype=np.floating, dims=['x', 'y']), + "foo": DataArraySchema(name="foo", dtype=np.int32, dims=["x"]), + "bar": DataArraySchema(name="bar", dtype=np.floating, dims=["x", "y"]), } ) jsonschema.validate(ds_schema.json, ds_schema._json_schema) - assert list(ds_schema.json['data_vars'].keys()) == ['foo', 'bar'] + assert list(ds_schema.json["data_vars"].keys()) == ["foo", "bar"] ds_schema.validate(ds) - ds['foo'] = ds.foo.astype('float32') - with pytest.raises(SchemaError, match='dtype'): + ds["foo"] = ds.foo.astype("float32") + with pytest.raises(SchemaError, match="dtype"): ds_schema.validate(ds) - ds = ds.drop_vars('foo') - with pytest.raises(SchemaError, match='variable foo'): + ds = ds.drop_vars("foo") + with pytest.raises(SchemaError, match="variable foo"): ds_schema.validate(ds) # json roundtrip @@ -251,12 +266,12 @@ def test_dataset_example(ds): def test_checks_ds(ds): def check_foo(ds): - assert 'foo' in ds + assert "foo" in ds ds_schema = DatasetSchema(checks=[check_foo]) ds_schema.validate(ds) - ds = ds.drop_vars('foo') + ds = ds.drop_vars("foo") with pytest.raises(AssertionError): ds_schema.validate(ds) @@ -269,14 +284,16 @@ def check_foo(ds): def test_dataset_with_attrs_schema(): - name = 'name' - expected_value = 'expected_value' - actual_value = 'actual_value' + name = "name" + expected_value = "expected_value" + actual_value = "actual_value" ds = xr.Dataset(attrs={name: actual_value}) ds_schema = DatasetSchema(attrs={name: AttrSchema(value=expected_value)}) jsonschema.validate(ds_schema.json, ds_schema._json_schema) - ds_schema_2 = DatasetSchema(attrs=AttrsSchema({name: AttrSchema(value=expected_value)})) + ds_schema_2 = DatasetSchema( + attrs=AttrsSchema({name: AttrSchema(value=expected_value)}) + ) jsonschema.validate(ds_schema_2.json, ds_schema_2._json_schema) with pytest.raises(SchemaError): ds_schema.validate(ds) @@ -285,10 +302,10 @@ def test_dataset_with_attrs_schema(): def test_attrs_extra_key(): - name = 'name' - value = 'value_2' - name_2 = 'name_2' - value_2 = 'value_2' + name = "name" + value = "value_2" + name_2 = "name_2" + value_2 = "value_2" ds = xr.Dataset(attrs={name: value}) ds_schema = DatasetSchema( attrs=AttrsSchema( @@ -308,10 +325,10 @@ def test_attrs_extra_key(): def test_attrs_missing_key(): - name = 'name' - value = 'value_2' - name_2 = 'name_2' - value_2 = 'value_2' + name = "name" + value = "value_2" + name_2 = "name_2" + value_2 = "value_2" ds = xr.Dataset(attrs={name: value, name_2: value_2}) ds_schema = DatasetSchema( attrs=AttrsSchema(attrs={name: AttrSchema(value=value)}, allow_extra_keys=False) @@ -321,13 +338,13 @@ def test_attrs_missing_key(): def test_checks_da(ds): - da = ds['foo'] + da = ds["foo"] def check_foo(da): - assert da.name == 'foo' + assert da.name == "foo" def check_bar(da): - assert da.name == 'bar' + assert da.name == "bar" schema = DataArraySchema(checks=[check_foo]) schema.validate(da) diff --git a/xarray_schema/__init__.py b/xarray_schema/__init__.py index 28873d8..e28376c 100644 --- a/xarray_schema/__init__.py +++ b/xarray_schema/__init__.py @@ -1,21 +1,18 @@ -from pkg_resources import DistributionNotFound, get_distribution +#!/usr/bin/env python3 +# flake8: noqa -from .base import SchemaError # noqa: F401 -from .components import ( # noqa: F401 - ArrayTypeSchema, - AttrSchema, - AttrsSchema, - ChunksSchema, - DimsSchema, - DTypeSchema, - NameSchema, - ShapeSchema, -) -from .dataarray import CoordsSchema, DataArraySchema # noqa: F401 -from .dataset import DatasetSchema # noqa: F401 +from importlib.metadata import PackageNotFoundError as _PackageNotFoundError +from importlib.metadata import version as _version + +from .base import SchemaError +from .components import (ArrayTypeSchema, AttrSchema, AttrsSchema, + ChunksSchema, DimsSchema, DTypeSchema, NameSchema, + ShapeSchema) +from .dataarray import CoordsSchema, DataArraySchema +from .dataset import DatasetSchema try: - __version__ = get_distribution(__name__).version -except DistributionNotFound: # noqa: F401; pragma: no cover + __version__ = _version(__name__) +except _PackageNotFoundError: # package is not installed - pass + __version__ = "unknown" diff --git a/xarray_schema/base.py b/xarray_schema/base.py index 94d4447..7a4ad38 100644 --- a/xarray_schema/base.py +++ b/xarray_schema/base.py @@ -1,17 +1,17 @@ import json from abc import abstractmethod -from typing import Any, Dict +from typing import Any class SchemaError(Exception): - '''Custom Schema Error''' + """Custom Schema Error""" pass class BaseSchema: - _json_schema: Dict[str, Any] + _json_schema: dict[str, Any] # def validate(self, obj: Any) -> None: # pass @@ -22,7 +22,7 @@ def json(self) -> Any: # pragma: no cover pass def to_json(self, **dumps_kws) -> str: - '''Generate a JSON string representation of this schema + """Generate a JSON string representation of this schema Parameters ---------- @@ -32,7 +32,7 @@ def to_json(self, **dumps_kws) -> str: Returns ------- str - ''' + """ return json.dumps(self.json, **dumps_kws) @classmethod diff --git a/xarray_schema/components.py b/xarray_schema/components.py index d212ddc..05d3ae1 100644 --- a/xarray_schema/components.py +++ b/xarray_schema/components.py @@ -1,5 +1,7 @@ -from collections.abc import Iterable -from typing import Any, Dict, Hashable, Mapping, Optional, Tuple, Union +from __future__ import annotations + +from collections.abc import Hashable, Iterable, Mapping +from typing import Any import numpy as np @@ -8,7 +10,7 @@ class DTypeSchema(BaseSchema): - '''Datatype schema + """Datatype schema Parameters ---------- @@ -18,34 +20,46 @@ class DTypeSchema(BaseSchema): Raises ------ SchemaError - ''' + """ - _json_schema = {'type': 'string'} + _json_schema = {"type": "string"} def __init__(self, dtype: DTypeLike) -> None: - if dtype in [np.floating, np.integer, np.signedinteger, np.unsignedinteger, np.generic]: + if dtype in [ + np.floating, + np.integer, + np.signedinteger, + np.unsignedinteger, + np.generic, + ]: self.dtype = dtype else: self.dtype = np.dtype(dtype) @classmethod def from_json(cls, obj: str): - if obj in ['floating', 'integer', 'signedinteger', 'unsignedinteger', 'generic']: + if obj in [ + "floating", + "integer", + "signedinteger", + "unsignedinteger", + "generic", + ]: dtype = getattr(np, obj) else: dtype = obj return cls(dtype) def validate(self, dtype: DTypeLike) -> None: - '''Validate dtype + """Validate dtype Parameters ---------- dtype : Any Dtype of the DataArray. - ''' + """ if not np.issubdtype(dtype, self.dtype): - raise SchemaError(f'dtype {dtype} != {self.dtype}') + raise SchemaError(f"dtype {dtype} != {self.dtype}") @property def json(self) -> str: @@ -53,11 +67,11 @@ def json(self) -> str: return self.dtype.str else: # fallbacks - return str(getattr(self.dtype, '__name__', str(self.dtype))) + return str(getattr(self.dtype, "__name__", str(self.dtype))) class DimsSchema(BaseSchema): - '''Dimensions schema + """Dimensions schema Parameters ---------- @@ -67,9 +81,9 @@ class DimsSchema(BaseSchema): Raises ------ SchemaError - ''' + """ - _json_schema = {'type': 'array', 'items': {'type': ['string', 'null']}} + _json_schema = {"type": "array", "items": {"type": ["string", "null"]}} def __init__(self, dims: DimsT) -> None: self.dims = dims @@ -79,19 +93,21 @@ def from_json(cls, obj: DimsT): return cls(obj) def validate(self, dims: tuple) -> None: - '''Validate dimensions + """Validate dimensions Parameters ---------- dims : Tuple[Union[str, None]] Dimensions of the DataArray. `None` may be used as a wildcard value. - ''' + """ if len(self.dims) != len(dims): - raise SchemaError(f'length of dims does not match: {len(dims)} != {len(self.dims)}') + raise SchemaError( + f"length of dims does not match: {len(dims)} != {len(self.dims)}" + ) for i, (actual, expected) in enumerate(zip(dims, self.dims)): if expected is not None and actual != expected: - raise SchemaError(f'dim mismatch in axis {i}: {actual} != {expected}') + raise SchemaError(f"dim mismatch in axis {i}: {actual} != {expected}") @property def json(self) -> list: @@ -99,7 +115,7 @@ def json(self) -> list: class ShapeSchema(BaseSchema): - '''Shape schema + """Shape schema Parameters ---------- @@ -109,9 +125,9 @@ class ShapeSchema(BaseSchema): Raises ------ SchemaError - ''' + """ - _json_schema = {'type': 'array'} + _json_schema = {"type": "array"} def __init__(self, shape: ShapeT) -> None: self.shape = shape @@ -121,21 +137,21 @@ def from_json(cls, obj: ShapeT): return cls(obj) def validate(self, shape: tuple) -> None: - '''Validate shape + """Validate shape Parameters ---------- shape : ShapeT Shape of the DataArray. `None` may be used as a wildcard value. - ''' + """ if len(self.shape) != len(shape): raise SchemaError( - f'number of dimensions in shape ({len(shape)}) != da.ndim ({len(self.shape)})' + f"number of dimensions in shape ({len(shape)}) != da.ndim ({len(self.shape)})" ) for i, (actual, expected) in enumerate(zip(shape, self.shape)): if expected is not None and actual != expected: - raise SchemaError(f'shape mismatch in axis {i}: {actual} != {expected}') + raise SchemaError(f"shape mismatch in axis {i}: {actual} != {expected}") @property def json(self) -> list: @@ -143,7 +159,7 @@ def json(self) -> list: class NameSchema(BaseSchema): - '''Name schema + """Name schema Parameters ---------- @@ -153,9 +169,9 @@ class NameSchema(BaseSchema): Raises ------ SchemaError - ''' + """ - _json_schema = {'type': 'string'} + _json_schema = {"type": "string"} def __init__(self, name: str) -> None: self.name = name @@ -165,18 +181,18 @@ def from_json(cls, obj: str): return cls(obj) def validate(self, name: Hashable) -> None: - '''Validate name + """Validate name Parameters ---------- name : str, optional Name of the DataArray. Currently requires an exact string match. - ''' + """ # TODO: support regular expressions # - http://json-schema.org/understanding-json-schema/reference/regular_expressions.html # - https://docs.python.org/3.9/library/re.html if self.name != name: - raise SchemaError(f'name {name} != {self.name}') + raise SchemaError(f"name {name} != {self.name}") @property def json(self) -> str: @@ -184,7 +200,7 @@ def json(self) -> str: class ChunksSchema(BaseSchema): - '''Chunks schema + """Chunks schema Parameters ---------- @@ -195,9 +211,9 @@ class ChunksSchema(BaseSchema): Raises ------ SchemaError - ''' + """ - _json_schema = {'type': ['boolean', 'object']} + _json_schema = {"type": ["boolean", "object"]} def __init__(self, chunks: ChunksT) -> None: self.chunks = chunks @@ -207,9 +223,12 @@ def from_json(cls, obj: dict): return cls(obj) # TODO: this will likely need input validation. def validate( - self, chunks: Optional[Tuple[Tuple[int, ...], ...]], dims: Tuple, shape: Tuple[int, ...] + self, + chunks: tuple[tuple[int, ...], ...] | None, + dims: tuple, + shape: tuple[int, ...], ) -> None: - '''Validate chunks + """Validate chunks Parameters ---------- @@ -219,16 +238,16 @@ def validate( Dimension keys from array. shape : tuple of int Shape of array. - ''' + """ if isinstance(self.chunks, bool): if self.chunks and not chunks: - raise SchemaError('expected array to be chunked but it is not') + raise SchemaError("expected array to be chunked but it is not") elif not self.chunks and chunks: - raise SchemaError('expected unchunked array but it is chunked') + raise SchemaError("expected unchunked array but it is chunked") elif isinstance(self.chunks, dict): if chunks is None: - raise SchemaError('expected array to be chunked but it is not') + raise SchemaError("expected array to be chunked but it is not") dim_chunks = dict(zip(dims, chunks)) dim_sizes = dict(zip(dims, shape)) # check whether chunk sizes are regular because we assume the first chunk to be representative below @@ -239,17 +258,17 @@ def validate( ec = dim_sizes[key] ac = dim_chunks[key] if any([a != ec for a in ac[:-1]]) or ac[-1] > ec: - raise SchemaError(f'{key} chunks did not match: {ac} != {ec}') + raise SchemaError(f"{key} chunks did not match: {ac} != {ec}") else: # assumes ec is an iterable ac = dim_chunks[key] if ec is not None and tuple(ac) != tuple(ec): - raise SchemaError(f'{key} chunks did not match: {ac} != {ec}') + raise SchemaError(f"{key} chunks did not match: {ac} != {ec}") else: - raise ValueError(f'got unknown chunks type: {type(self.chunks)}') + raise ValueError(f"got unknown chunks type: {type(self.chunks)}") @property - def json(self) -> Union[bool, Dict[str, Any]]: + def json(self) -> bool | dict[str, Any]: if isinstance(self.chunks, bool): return self.chunks else: @@ -263,7 +282,7 @@ def json(self) -> Union[bool, Dict[str, Any]]: class ArrayTypeSchema(BaseSchema): - '''Array type schema + """Array type schema Parameters ---------- @@ -273,9 +292,9 @@ class ArrayTypeSchema(BaseSchema): Raises ------ SchemaError - ''' + """ - _json_schema = {'type': 'string'} + _json_schema = {"type": "string"} def __init__(self, array_type: Any) -> None: self.array_type = array_type @@ -291,19 +310,19 @@ def from_json(cls, obj: str): elif obj == "": array_type = np.ndarray else: - raise ValueError(f'unknown array_type: {obj}') + raise ValueError(f"unknown array_type: {obj}") return cls(array_type) def validate(self, array: Any) -> None: - '''Validate array_type + """Validate array_type Parameters ---------- array : array_like array_type of the DataArray. `None` may be used as a wildcard value. - ''' + """ if not isinstance(array, self.array_type): - raise SchemaError(f'array_type {type(array)} != {self.array_type}') + raise SchemaError(f"array_type {type(array)} != {self.array_type}") @property def json(self) -> str: @@ -311,7 +330,7 @@ def json(self) -> str: class AttrSchema(BaseSchema): - '''Attribute schema + """Attribute schema Parameters ---------- @@ -323,9 +342,12 @@ class AttrSchema(BaseSchema): Raises ------ SchemaError - ''' + """ - _json_schema = {'type': 'string', 'value': ['string', 'number', 'array', 'boolean', 'null']} + _json_schema = { + "type": "string", + "value": ["string", "number", "array", "boolean", "null"], + } def __init__(self, type: Any = None, value: Any = None): self.type = type @@ -336,28 +358,28 @@ def from_json(cls, obj: str): return cls(obj) def validate(self, attr: Any): - '''Validate attrs + """Validate attrs Parameters ---------- attr : any attribute, `None` may be used as a wildcard value. - ''' + """ if self.type is not None: if not isinstance(attr, self.type): - SchemaError(f'attrs {attr} is not of type {self.type}') + SchemaError(f"attrs {attr} is not of type {self.type}") if self.value is not None: if self.value is not None and self.value != attr: - raise SchemaError(f'name {attr} != {self.value}') + raise SchemaError(f"name {attr} != {self.value}") @property def json(self) -> dict: - return {'type': self.type, 'value': self.value} + return {"type": self.type, "value": self.value} class AttrsSchema(BaseSchema): - '''Attributes schema + """Attributes schema Parameters ---------- @@ -371,16 +393,16 @@ class AttrsSchema(BaseSchema): Raises ------ SchemaError - ''' + """ _json_schema = { - 'type': 'object', - 'properties': { - 'require_all_keys': { - 'type': 'boolean' + "type": "object", + "properties": { + "require_all_keys": { + "type": "boolean" }, # Question: is this the same as JSON's additionalProperties? - 'allow_extra_keys': {'type': 'boolean'}, - 'attrs': {'type': 'object'}, + "allow_extra_keys": {"type": "boolean"}, + "attrs": {"type": "object"}, }, } @@ -397,44 +419,44 @@ def __init__( @classmethod def from_json(cls, obj: dict): attrs = {} - for key, val in obj['attrs'].items(): + for key, val in obj["attrs"].items(): attrs[key] = AttrSchema(**val) return cls( attrs, - require_all_keys=obj['require_all_keys'], - allow_extra_keys=obj['allow_extra_keys'], + require_all_keys=obj["require_all_keys"], + allow_extra_keys=obj["allow_extra_keys"], ) def validate(self, attrs: Any) -> None: - '''Validate attrs + """Validate attrs Parameters ---------- attrs : dict_like attrs dict, `None` may be used as a wildcard value. - ''' + """ if self.require_all_keys: missing_keys = set(self.attrs) - set(attrs) if missing_keys: - raise SchemaError(f'attrs has missing keys: {missing_keys}') + raise SchemaError(f"attrs has missing keys: {missing_keys}") if not self.allow_extra_keys: extra_keys = set(attrs) - set(self.attrs) if extra_keys: - raise SchemaError(f'attrs has extra keys: {extra_keys}') + raise SchemaError(f"attrs has extra keys: {extra_keys}") for key, attr_schema in self.attrs.items(): if key not in attrs: - raise SchemaError(f'key {key} not in attrs') + raise SchemaError(f"key {key} not in attrs") else: attr_schema.validate(attrs[key]) @property def json(self) -> dict: obj = { - 'require_all_keys': self.require_all_keys, - 'allow_extra_keys': self.allow_extra_keys, - 'attrs': {k: v.json for k, v in self.attrs.items()}, + "require_all_keys": self.require_all_keys, + "allow_extra_keys": self.allow_extra_keys, + "attrs": {k: v.json for k, v in self.attrs.items()}, } return obj diff --git a/xarray_schema/dataarray.py b/xarray_schema/dataarray.py index 4a71e62..9e1aac2 100644 --- a/xarray_schema/dataarray.py +++ b/xarray_schema/dataarray.py @@ -1,24 +1,19 @@ from __future__ import annotations -from typing import Any, Callable, Dict, Hashable, List, Mapping, Optional, Union +import contextlib +from collections.abc import Hashable, Mapping +from typing import Any, Callable import xarray as xr from .base import BaseSchema, SchemaError -from .components import ( - ArrayTypeSchema, - AttrsSchema, - ChunksSchema, - DimsSchema, - DTypeSchema, - NameSchema, - ShapeSchema, -) +from .components import (ArrayTypeSchema, AttrsSchema, ChunksSchema, + DimsSchema, DTypeSchema, NameSchema, ShapeSchema) from .types import ChunksT, DimsT, DTypeLike, ShapeT class DataArraySchema(BaseSchema): - '''A light-weight xarray.DataArray validator + """A light-weight xarray.DataArray validator Parameters ---------- @@ -37,31 +32,40 @@ class DataArraySchema(BaseSchema): Type of the underlying data in a DataArray (e.g. `numpy.ndarray`), by default None checks : List[Callable], optional List of callables that take and return a DataArray, by default None - ''' - - _json_schema = {'type': 'object'} - _schema_slots = ['dtype', 'dims', 'shape', 'coords', 'name', 'chunks', 'attrs', 'array_type'] - - _dtype: Union[DTypeSchema, None] - _shape: Union[ShapeSchema, None] - _dims: Union[DimsSchema, None] - _name: Union[NameSchema, None] - _coords: Union[Any, None] - _chunks: Union[ChunksSchema, None] - _attrs: Union[AttrsSchema, None] - _array_type: Union[ArrayTypeSchema, None] + """ + + _json_schema = {"type": "object"} + _schema_slots = [ + "dtype", + "dims", + "shape", + "coords", + "name", + "chunks", + "attrs", + "array_type", + ] + + _dtype: DTypeSchema | None + _shape: ShapeSchema | None + _dims: DimsSchema | None + _name: NameSchema | None + _coords: Any | None + _chunks: ChunksSchema | None + _attrs: AttrsSchema | None + _array_type: ArrayTypeSchema | None def __init__( self, - dtype: Union[DTypeLike, DTypeSchema] = None, - shape: Union[ShapeT, ShapeSchema] = None, - dims: Union[DimsT, DimsSchema] = None, - name: Union[str, NameSchema] = None, - coords: Dict[str, Any] = None, - chunks: Union[ChunksT, ChunksSchema] = None, + dtype: DTypeLike | DTypeSchema = None, + shape: ShapeT | ShapeSchema = None, + dims: DimsT | DimsSchema = None, + name: str | NameSchema = None, + coords: dict[str, Any] = None, + chunks: ChunksT | ChunksSchema = None, array_type: Any = None, attrs: Mapping[str, Any] = None, - checks: List[Callable] = None, + checks: list[Callable] = None, ) -> None: # see https://github.com/python/mypy/issues/3004 self.dtype = dtype # type: ignore @@ -75,18 +79,18 @@ def __init__( self.checks = checks # type: ignore @property - def dtype(self) -> Union[DTypeSchema, None]: + def dtype(self) -> DTypeSchema | None: return self._dtype @dtype.setter - def dtype(self, value: Union[DTypeSchema, DTypeLike, None]): + def dtype(self, value: DTypeSchema | DTypeLike | None): if value is None or isinstance(value, DTypeSchema): self._dtype = value else: self._dtype = DTypeSchema(value) @property - def dims(self) -> Union[DimsSchema, None]: + def dims(self) -> DimsSchema | None: return self._dims @dims.setter @@ -97,18 +101,18 @@ def dims(self, value): self._dims = DimsSchema(value) @property - def shape(self) -> Optional[ShapeSchema]: + def shape(self) -> ShapeSchema | None: return self._shape @shape.setter - def shape(self, value: Union[ShapeSchema, ShapeT, None]): + def shape(self, value: ShapeSchema | ShapeT | None): if value is None or isinstance(value, ShapeSchema): self._shape = value else: self._shape = ShapeSchema(value) @property - def chunks(self) -> Optional[ChunksSchema]: + def chunks(self) -> ChunksSchema | None: return self._chunks @chunks.setter @@ -119,7 +123,7 @@ def chunks(self, value): self._chunks = ChunksSchema(value) @property - def name(self) -> Optional[NameSchema]: + def name(self) -> NameSchema | None: return self._name @name.setter @@ -130,7 +134,7 @@ def name(self, value): self._name = NameSchema(value) @property - def array_type(self) -> Optional[ArrayTypeSchema]: + def array_type(self) -> ArrayTypeSchema | None: return self._array_type @array_type.setter @@ -141,7 +145,7 @@ def array_type(self, value): self._array_type = ArrayTypeSchema(value) @property - def attrs(self) -> Optional[AttrsSchema]: + def attrs(self) -> AttrsSchema | None: return self._attrs @attrs.setter @@ -152,7 +156,7 @@ def attrs(self, value): self._attrs = AttrsSchema(value) @property - def coords(self) -> Optional[CoordsSchema]: + def coords(self) -> CoordsSchema | None: return self._coords @coords.setter @@ -163,20 +167,21 @@ def coords(self, value): self._coords = CoordsSchema(value) @property - def checks(self) -> List[Callable]: + def checks(self) -> list[Callable]: return self._checks @checks.setter def checks(self, value): - if value is not None: - if not all([callable(f) for f in value]): - raise ValueError('All checks must be callables') - self._checks = value - else: + if value is None: self._checks = [] + elif not all(callable(f) for f in value): + raise ValueError("All checks must be callables") + else: + self._checks = value + def validate(self, da: xr.DataArray) -> None: - '''Check if the DataArray complies with the Schema. + """Check if the DataArray complies with the Schema. Parameters ---------- @@ -191,9 +196,9 @@ def validate(self, da: xr.DataArray) -> None: Raises ------ SchemaError - ''' + """ if not isinstance(da, xr.DataArray): - raise ValueError('Input must be a xarray.DataArray') + raise ValueError("Input must be a xarray.DataArray") if self.dtype is not None: self.dtype.validate(da.dtype) @@ -226,38 +231,36 @@ def validate(self, da: xr.DataArray) -> None: def json(self) -> dict: obj = {} for slot in self._schema_slots: - try: + with contextlib.suppress(AttributeError): obj[slot] = getattr(self, slot).json - except AttributeError: - pass return obj @classmethod def from_json(cls, obj: dict): kwargs = {} - if 'dtype' in obj: - kwargs['dtype'] = DTypeSchema.from_json(obj['dtype']) - if 'shape' in obj: - kwargs['shape'] = ShapeSchema.from_json(obj['shape']) - if 'dims' in obj: - kwargs['dims'] = DimsSchema.from_json(obj['dims']) - if 'name' in obj: - kwargs['name'] = NameSchema.from_json(obj['name']) - if 'coords' in obj: - kwargs['coords'] = CoordsSchema.from_json(obj['coords']) - if 'chunks' in obj: - kwargs['chunks'] = ChunksSchema.from_json(obj['chunks']) - if 'array_type' in obj: - kwargs['array_type'] = ArrayTypeSchema.from_json(obj['array_type']) - if 'attrs' in obj: - kwargs['attrs'] = AttrsSchema.from_json(obj['attrs']) + if "dtype" in obj: + kwargs["dtype"] = DTypeSchema.from_json(obj["dtype"]) + if "shape" in obj: + kwargs["shape"] = ShapeSchema.from_json(obj["shape"]) + if "dims" in obj: + kwargs["dims"] = DimsSchema.from_json(obj["dims"]) + if "name" in obj: + kwargs["name"] = NameSchema.from_json(obj["name"]) + if "coords" in obj: + kwargs["coords"] = CoordsSchema.from_json(obj["coords"]) + if "chunks" in obj: + kwargs["chunks"] = ChunksSchema.from_json(obj["chunks"]) + if "array_type" in obj: + kwargs["array_type"] = ArrayTypeSchema.from_json(obj["array_type"]) + if "attrs" in obj: + kwargs["attrs"] = AttrsSchema.from_json(obj["attrs"]) return cls(**kwargs) class CoordsSchema(BaseSchema): - '''Schema container for Coordinates + """Schema container for Coordinates Parameters ---------- @@ -271,22 +274,22 @@ class CoordsSchema(BaseSchema): Raises ------ SchemaError - ''' + """ _json_schema = { - 'type': 'object', - 'properties': { - 'require_all_keys': { - 'type': 'boolean' + "type": "object", + "properties": { + "require_all_keys": { + "type": "boolean" }, # Question: is this the same as JSON's additionalProperties? - 'allow_extra_keys': {'type': 'boolean'}, - 'coords': {'type': 'object'}, + "allow_extra_keys": {"type": "boolean"}, + "coords": {"type": "object"}, }, } def __init__( self, - coords: Dict[Hashable, DataArraySchema], + coords: dict[Hashable, DataArraySchema], require_all_keys: bool = True, allow_extra_keys: bool = True, ) -> None: @@ -296,40 +299,39 @@ def __init__( @classmethod def from_json(cls, obj: dict): - coords = obj.pop('coords', {}) + coords = obj.pop("coords", {}) coords = {k: DataArraySchema(**v) for k, v in coords.items()} return cls(coords, **obj) def validate(self, coords: Any) -> None: - '''Validate coords + """Validate coords Parameters ---------- coords : dict_like coords of the DataArray. `None` may be used as a wildcard value. - ''' + """ if self.require_all_keys: missing_keys = set(self.coords) - set(coords) if missing_keys: - raise SchemaError(f'coords has missing keys: {missing_keys}') + raise SchemaError(f"coords has missing keys: {missing_keys}") if not self.allow_extra_keys: extra_keys = set(coords) - set(self.coords) if extra_keys: - raise SchemaError(f'coords has extra keys: {extra_keys}') + raise SchemaError(f"coords has extra keys: {extra_keys}") for key, da_schema in self.coords.items(): if key not in coords: - raise SchemaError(f'key {key} not in coords') + raise SchemaError(f"key {key} not in coords") else: da_schema.validate(coords[key]) @property def json(self) -> dict: - obj = { - 'require_all_keys': self.require_all_keys, - 'allow_extra_keys': self.allow_extra_keys, - 'coords': {k: v.json for k, v in self.coords.items()}, + return { + "require_all_keys": self.require_all_keys, + "allow_extra_keys": self.allow_extra_keys, + "coords": {k: v.json for k, v in self.coords.items()}, } - return obj diff --git a/xarray_schema/dataset.py b/xarray_schema/dataset.py index 97dc3b4..857bea0 100644 --- a/xarray_schema/dataset.py +++ b/xarray_schema/dataset.py @@ -1,4 +1,7 @@ -from typing import Any, Callable, Dict, Hashable, Iterable, Optional, Union +from __future__ import annotations + +from collections.abc import Hashable, Iterable +from typing import Any, Callable import xarray as xr @@ -8,7 +11,7 @@ class DatasetSchema(BaseSchema): - '''A light-weight xarray.Dataset validator + """A light-weight xarray.Dataset validator Parameters ---------- @@ -16,22 +19,22 @@ class DatasetSchema(BaseSchema): Per-variable DataArraySchema's, by default None checks : Iterable[Callable], optional Dataset wide checks, by default None - ''' + """ _json_schema = { - 'type': 'object', - 'properties': { - 'data_vars': {'type': 'object'}, - 'coords': {'type': 'object'}, - 'attrs': {'type': 'object'}, + "type": "object", + "properties": { + "data_vars": {"type": "object"}, + "coords": {"type": "object"}, + "attrs": {"type": "object"}, }, } def __init__( self, - data_vars: Optional[Dict[Hashable, Optional[DataArraySchema]]] = None, - coords: Union[CoordsSchema, Dict[Hashable, DataArraySchema], None] = None, - attrs: Union[AttrsSchema, Dict[Hashable, AttrSchema], None] = None, + data_vars: dict[Hashable, DataArraySchema | None] | None = None, + coords: CoordsSchema | dict[Hashable, DataArraySchema] | None = None, + attrs: AttrsSchema | dict[Hashable, AttrSchema] | None = None, checks: Iterable[Callable] = None, ) -> None: @@ -43,19 +46,23 @@ def __init__( @classmethod def from_json(cls, obj: dict): kwargs = {} - if 'data_vars' in obj: - kwargs['data_vars'] = { - k: DataArraySchema.from_json(v) for k, v in obj['data_vars'].items() + if "data_vars" in obj: + kwargs["data_vars"] = { + k: DataArraySchema.from_json(v) for k, v in obj["data_vars"].items() + } + if "coords" in obj: + kwargs["coords"] = { + k: CoordsSchema.from_json(v) for k, v in obj["coords"].items() + } + if "attrs" in obj: + kwargs["attrs"] = { + k: AttrsSchema.from_json(v) for k, v in obj["attrs"].items() } - if 'coords' in obj: - kwargs['coords'] = {k: CoordsSchema.from_json(v) for k, v in obj['coords'].items()} - if 'attrs' in obj: - kwargs['attrs'] = {k: AttrsSchema.from_json(v) for k, v in obj['attrs'].items()} return cls(**kwargs) def validate(self, ds: xr.Dataset) -> None: - '''Check if the Dataset complies with the Schema. + """Check if the Dataset complies with the Schema. Parameters ---------- @@ -70,18 +77,18 @@ def validate(self, ds: xr.Dataset) -> None: Raises ------ SchemaError - ''' + """ if self.data_vars is not None: for key, da_schema in self.data_vars.items(): if da_schema is not None: if key not in ds.data_vars: - raise SchemaError(f'data variable {key} not in ds') + raise SchemaError(f"data variable {key} not in ds") else: da_schema.validate(ds.data_vars[key]) if self.coords is not None: # pragma: no cover - raise NotImplementedError('coords schema not implemented yet') + raise NotImplementedError("coords schema not implemented yet") if self.attrs: self.attrs.validate(ds.attrs) @@ -91,22 +98,22 @@ def validate(self, ds: xr.Dataset) -> None: check(ds) @property - def attrs(self) -> Union[AttrsSchema, None]: + def attrs(self) -> AttrsSchema | None: return self._attrs @attrs.setter - def attrs(self, value: Union[AttrsSchema, Dict[Hashable, Any], None]): + def attrs(self, value: AttrsSchema | dict[Hashable, Any] | None): if value is None or isinstance(value, AttrsSchema): self._attrs = value else: self._attrs = AttrsSchema(value) @property - def data_vars(self) -> Optional[Dict[Hashable, Optional[DataArraySchema]]]: + def data_vars(self) -> dict[Hashable, DataArraySchema | None] | None: return self._data_vars # type: ignore @data_vars.setter - def data_vars(self, value: Optional[Dict[Hashable, Optional[DataArraySchema]]]): + def data_vars(self, value: dict[Hashable, DataArraySchema | None] | None): if isinstance(value, dict): self._data_vars = {} for k, v in value.items(): @@ -117,14 +124,14 @@ def data_vars(self, value: Optional[Dict[Hashable, Optional[DataArraySchema]]]): elif value is None: self._data_vars = None # type: ignore else: - raise ValueError('must set data_vars with a dict') + raise ValueError("must set data_vars with a dict") @property - def coords(self) -> Optional[CoordsSchema]: + def coords(self) -> CoordsSchema | None: return self._coords # type: ignore @coords.setter - def coords(self, value: Optional[Union[CoordsSchema, Dict[Hashable, DataArraySchema]]]): + def coords(self, value: CoordsSchema | dict[Hashable, DataArraySchema] | None): if value is None or isinstance(value, CoordsSchema): self._coords = value else: @@ -132,10 +139,13 @@ def coords(self, value: Optional[Union[CoordsSchema, Dict[Hashable, DataArraySch @property def json(self): - obj = {'data_vars': {}, 'attrs': self.attrs.json if self.attrs is not None else {}} + obj = { + "data_vars": {}, + "attrs": self.attrs.json if self.attrs is not None else {}, + } if self.data_vars: for key, var in self.data_vars.items(): - obj['data_vars'][key] = var.json + obj["data_vars"][key] = var.json if self.coords: - obj['coords'] = self.coords.json + obj["coords"] = self.coords.json return obj diff --git a/xarray_schema/types.py b/xarray_schema/types.py index bd76b89..e0cfe86 100644 --- a/xarray_schema/types.py +++ b/xarray_schema/types.py @@ -1,7 +1,7 @@ -from typing import Dict, Tuple, Union +from __future__ import annotations -from numpy.typing import DTypeLike # noqa: F401 +from typing import Union -DimsT = Tuple[Union[str, None]] -ShapeT = Tuple[Union[int, None]] -ChunksT = Union[bool, Dict[str, Union[int, None]]] +DimsT = tuple[Union[str, None]] +ShapeT = tuple[Union[int, None]] +ChunksT = Union[bool, dict[str, Union[int, None]]] From db108c6fd5b58a7cbf2a9d3e2c1696970bf716c4 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 15 Nov 2022 18:35:57 -0700 Subject: [PATCH 3/4] Update pre-commit hooks --- pyproject.toml | 10 ++++++---- .mypy.ini | 4 ---- setup.cfg | 10 ++++++++++ tests/test_core.py | 13 ++++++++++--- xarray_schema/__init__.py | 21 +++++++++++++-------- xarray_schema/dataarray.py | 11 +++++++++-- 6 files changed, 48 insertions(+), 21 deletions(-) delete mode 100644 .mypy.ini diff --git a/ pyproject.toml b/ pyproject.toml index a0465ed..5a9911a 100644 --- a/ pyproject.toml +++ b/ pyproject.toml @@ -57,11 +57,13 @@ version_scheme = "post-release" local_scheme = "dirty-tag" fallback_version = "999" -[tool.isort] -profile = "black" -skip_gitignore = true - [tool.pytest.ini_options] log_cli = true log_level = "INFO" + +[tool.mypy] +allow_redefinition = true +exclude = "properties|asv_bench|doc|tests|flycheck" +files = "xarray_schema/*.py" +show_error_codes = true diff --git a/.mypy.ini b/.mypy.ini deleted file mode 100644 index 54b16a3..0000000 --- a/.mypy.ini +++ /dev/null @@ -1,4 +0,0 @@ -[mypy] -show_column_numbers = true -show_error_codes = true -show_error_context = true diff --git a/setup.cfg b/setup.cfg index 2e70fb8..1799da2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,5 +6,15 @@ max-line-length = 100 max-complexity = 18 select = B,C,E,F,W,T4,B9 +[isort] +profile=black +multi_line_output=3 +include_trailing_comma=True +force_grid_wrap=0 +combine_as_imports=True +line_length=100 +known_first_party=xarray_schema + + [mypy] plugins = numpy.typing.mypy_plugin diff --git a/tests/test_core.py b/tests/test_core.py index 11d4238..2ffffcb 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -8,9 +8,16 @@ from xarray_schema import DataArraySchema, DatasetSchema from xarray_schema.base import SchemaError -from xarray_schema.components import (ArrayTypeSchema, AttrSchema, AttrsSchema, - ChunksSchema, DimsSchema, DTypeSchema, - NameSchema, ShapeSchema) +from xarray_schema.components import ( + ArrayTypeSchema, + AttrSchema, + AttrsSchema, + ChunksSchema, + DimsSchema, + DTypeSchema, + NameSchema, + ShapeSchema, +) from xarray_schema.dataarray import CoordsSchema diff --git a/xarray_schema/__init__.py b/xarray_schema/__init__.py index e28376c..448a676 100644 --- a/xarray_schema/__init__.py +++ b/xarray_schema/__init__.py @@ -1,18 +1,23 @@ #!/usr/bin/env python3 # flake8: noqa -from importlib.metadata import PackageNotFoundError as _PackageNotFoundError -from importlib.metadata import version as _version +from importlib.metadata import PackageNotFoundError as _PackageNotFoundError, version as _version from .base import SchemaError -from .components import (ArrayTypeSchema, AttrSchema, AttrsSchema, - ChunksSchema, DimsSchema, DTypeSchema, NameSchema, - ShapeSchema) +from .components import ( + ArrayTypeSchema, + AttrSchema, + AttrsSchema, + ChunksSchema, + DimsSchema, + DTypeSchema, + NameSchema, + ShapeSchema, +) from .dataarray import CoordsSchema, DataArraySchema from .dataset import DatasetSchema try: __version__ = _version(__name__) -except _PackageNotFoundError: - # package is not installed - __version__ = "unknown" +except _PackageNotFoundError: # pragma: no cover + __version__ = "unknown" # pragma: no cover diff --git a/xarray_schema/dataarray.py b/xarray_schema/dataarray.py index 9e1aac2..10d7127 100644 --- a/xarray_schema/dataarray.py +++ b/xarray_schema/dataarray.py @@ -7,8 +7,15 @@ import xarray as xr from .base import BaseSchema, SchemaError -from .components import (ArrayTypeSchema, AttrsSchema, ChunksSchema, - DimsSchema, DTypeSchema, NameSchema, ShapeSchema) +from .components import ( + ArrayTypeSchema, + AttrsSchema, + ChunksSchema, + DimsSchema, + DTypeSchema, + NameSchema, + ShapeSchema, +) from .types import ChunksT, DimsT, DTypeLike, ShapeT From a7d9805f3da26d0fa8220ade3f95dff50ceceead Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 15 Nov 2022 18:39:51 -0700 Subject: [PATCH 4/4] Update pre-commit hooks --- setup.cfg | 6 ------ xarray_schema/__init__.py | 3 ++- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/setup.cfg b/setup.cfg index 1799da2..3d5c6ea 100644 --- a/setup.cfg +++ b/setup.cfg @@ -8,12 +8,6 @@ select = B,C,E,F,W,T4,B9 [isort] profile=black -multi_line_output=3 -include_trailing_comma=True -force_grid_wrap=0 -combine_as_imports=True -line_length=100 -known_first_party=xarray_schema [mypy] diff --git a/xarray_schema/__init__.py b/xarray_schema/__init__.py index 448a676..b3045b6 100644 --- a/xarray_schema/__init__.py +++ b/xarray_schema/__init__.py @@ -1,7 +1,8 @@ #!/usr/bin/env python3 # flake8: noqa -from importlib.metadata import PackageNotFoundError as _PackageNotFoundError, version as _version +from importlib.metadata import PackageNotFoundError as _PackageNotFoundError +from importlib.metadata import version as _version from .base import SchemaError from .components import (