Skip to content

Commit

Permalink
Feat: Add new specific comparators (#60)
Browse files Browse the repository at this point in the history
  • Loading branch information
adrien-berchet authored Aug 29, 2024
1 parent d7f7d9f commit 40a64ff
Show file tree
Hide file tree
Showing 25 changed files with 843 additions and 67 deletions.
File renamed without changes.
4 changes: 2 additions & 2 deletions .github/workflows/publish-sdist.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.8
- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: 3.8
python-version: 3.10
- name: Build a wheel and a source tarball
run: |
pip install setuptools>=42 build setuptools_scm[toml]>=3.4
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/run-tox.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
min_versions: ["min_versions", "latest_versions"]
exclude:
- min_versions: "min_versions"
include:
- python-version: "3.8"
- python-version: "3.9"
min_versions: "min_versions"

steps:
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
default_language_version:
python: python3.8
python: python3.10
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ When you wish to contribute to the code base, please consider the following guid
or

```shell
tox -e py38 -e lint -e docs -e check-packaging
tox -e py310 -e lint -e docs -e check-packaging
```

* Commit your changes using a descriptive commit message.
Expand Down
2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
global-exclude *.py[co] .DS_Store

include dir_content_diff/comparators/dependencies.json
12 changes: 6 additions & 6 deletions dir_content_diff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
"""
import copy
import importlib.metadata
import logging
import re
from pathlib import Path

Expand All @@ -14,13 +13,12 @@
from dir_content_diff.base_comparators import PdfComparator
from dir_content_diff.base_comparators import XmlComparator
from dir_content_diff.base_comparators import YamlComparator
from dir_content_diff.util import LOGGER
from dir_content_diff.util import diff_msg_formatter
from dir_content_diff.util import format_ext

__version__ = importlib.metadata.version("dir-content-diff")

L = logging.getLogger(__name__)


_DEFAULT_COMPARATORS = {
None: DefaultComparator(),
Expand Down Expand Up @@ -125,7 +123,7 @@ def compare_files(ref_file, comp_file, comparator, *args, return_raw_diffs=False
differences if they are different.
"""
# Get the compared file
L.debug("Compare: %s and %s", ref_file, comp_file)
LOGGER.debug("Compare: %s and %s", ref_file, comp_file)

try:
return comparator(ref_file, comp_file, *args, return_raw_diffs=return_raw_diffs, **kwargs)
Expand Down Expand Up @@ -173,7 +171,7 @@ def export_formatted_file(file, formatted_file, comparator, **kwargs):
"""
if hasattr(comparator, "save_capability") and comparator.save_capability:
# pylint: disable=protected-access
L.debug("Format: %s into %s", file, formatted_file)
LOGGER.debug("Format: %s into %s", file, formatted_file)
data = comparator.load(
file,
**kwargs.get(
Expand Down Expand Up @@ -204,7 +202,9 @@ def export_formatted_file(file, formatted_file, comparator, **kwargs):
),
)
else:
L.info("Skip formatting for '%s' because the comparator has no saving capability.", file)
LOGGER.info(
"Skip formatting for '%s' because the comparator has no saving capability.", file
)


def compare_trees(
Expand Down
1 change: 1 addition & 0 deletions dir_content_diff/comparators/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Sub-package of dir-content-diff for specific comparators."""
9 changes: 9 additions & 0 deletions dir_content_diff/comparators/dependencies.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"pandas": [
"pandas>=1.4",
"pyarrow>=11",
"tables>=3.7"
],
"morphio": ["morphio>=3.3.6", "morph_tool>=2.9"],
"voxcell": ["voxcell>=3.1.1"]
}
43 changes: 43 additions & 0 deletions dir_content_diff/comparators/morphio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""Extension module to process morphology files with MorphIO and morph-tool."""
from dir_content_diff import register_comparator
from dir_content_diff.base_comparators import BaseComparator
from dir_content_diff.util import import_error_message

try:
from morph_tool import diff
from morphio.mut import Morphology
except ImportError: # pragma: no cover
import_error_message(__name__)


class MorphologyComparator(BaseComparator):
"""Comparator for morphology files."""

def load(self, path, **kwargs):
"""Load a morphology file into a :class:`morphio.Morphology` object."""
return Morphology(path, **kwargs)

def diff(self, ref, comp, *args, **kwargs):
"""Compare data from two morphology files.
Args:
ref_path (str): The path to the reference morphology file.
comp_path (str): The path to the compared morphology file.
*args: See :func:`morph_tool.diff` for details.
**kwargs: See :func:`morph_tool.diff` for details.
Returns:
bool or list(str): ``False`` if the morphologies are considered as equal or a list of
strings explaining why they are not considered as equal.
"""
diffs = diff(ref, comp, *args, **kwargs)
if not diffs:
return False
return [diffs.info]


def register(force=False):
"""Register morphology file extensions."""
register_comparator(".asc", MorphologyComparator(), force=force)
register_comparator(".h5", MorphologyComparator(), force=force)
register_comparator(".swc", MorphologyComparator(), force=force)
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""Extension module to process files with Pandas."""
try:
import pandas as pd
except ImportError as exception: # pragma: no cover
raise ImportError("Could not import pandas package, please install it.") from exception

from dir_content_diff import register_comparator
from dir_content_diff.base_comparators import BaseComparator
from dir_content_diff.util import import_error_message

try:
import pandas as pd
except ImportError: # pragma: no cover
import_error_message(__name__)


class DataframeComparator(BaseComparator):
Expand Down
132 changes: 132 additions & 0 deletions dir_content_diff/comparators/voxcell.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""Extension module to process files with Voxcell."""
from dir_content_diff import register_comparator
from dir_content_diff.base_comparators import BaseComparator
from dir_content_diff.comparators.pandas import DataframeComparator
from dir_content_diff.util import import_error_message

try:
import numpy as np
from voxcell import CellCollection
from voxcell import VoxelData
except ImportError: # pragma: no cover
import_error_message(__name__)


class NrrdComparator(BaseComparator):
"""Comparator for NRRD files."""

def load(self, path, **kwargs):
"""Load a NRRD file into a :class:`numpy.ndarray`."""
return VoxelData.load_nrrd(str(path), **kwargs)

def save(self, data, path, **kwargs):
"""Save data to a NRRD file."""
return data.save_nrrd(str(path), **kwargs)

def format_diff(self, difference, **kwargs):
"""Format one element difference."""
k, v = difference
return f"\n{k}: {v}"

def sort(self, differences, **kwargs):
"""Do not sort the entries to keep voxel dimensions as first entry."""
return differences

def diff(self, ref, comp, *args, precision=None, **kwargs):
"""Compare data from two NRRD files.
Note: NRRD files can contain their creation date, so their hashes are depends on
this creation date, even if the actual data are the same. This comparator only compares the
actual data in the files.
Args:
ref_path (str): The path to the reference CSV file.
comp_path (str): The path to the compared CSV file.
precision (int): The desired precision, default is exact precision.
Returns:
bool or list(str): ``False`` if the DataFrames are considered as equal or a list of
strings explaining why they are not considered as equal.
"""
errors = {}

try:
if precision is not None:
np.testing.assert_array_almost_equal(
ref.voxel_dimensions, comp.voxel_dimensions, *args, decimal=precision, **kwargs
)
else:
np.testing.assert_array_equal(
ref.voxel_dimensions, comp.voxel_dimensions, *args, **kwargs
)
except AssertionError as exception:
errors["Voxel dimensions"] = exception.args[0]

try:
if precision is not None:
np.testing.assert_array_almost_equal(
ref.raw, comp.raw, *args, decimal=precision, **kwargs
)
else:
np.testing.assert_array_equal(ref.raw, comp.raw, *args, **kwargs)
except AssertionError as exception:
errors["Internal raw data"] = exception.args[0]

if len(errors) == 0:
return False
return errors

def report(self, ref_file, comp_file, formatted_differences, diff_args, diff_kwargs, **kwargs):
"""Create a report from the formatted differences."""
# pylint: disable=arguments-differ
if "precision" not in diff_kwargs:
diff_kwargs["precision"] = None
return super().report(
ref_file,
comp_file,
formatted_differences,
diff_args,
diff_kwargs,
**kwargs,
)


class Mvd3Comparator(DataframeComparator):
"""Comparator for MVD3 files.
Note: MVD3 files can contain their creation date, so their hashes are depends on
this creation date, even if the data are the same.
This comparator inherits from the :class:`dir_content_diff.pandas.DataframeComparator`, read
the doc of this comparator for details on args and kwargs.
"""

def load(self, path, **kwargs):
"""Load a MVD3 file into a :class:`pandas.DataFrame`."""
return CellCollection.load_mvd3(path, **kwargs).as_dataframe()

def save(self, data, path, **kwargs):
"""Save data to a CellCollection file."""
return CellCollection.from_dataframe(data).save_mvd3(path, **kwargs)


class CellCollectionComparator(DataframeComparator):
"""Comparator for any type of CellCollection file.
This comparator inherits from the :class:`dir_content_diff.pandas.DataframeComparator`, read
the doc of this comparator for details on args and kwargs.
"""

def load(self, path, **kwargs):
"""Load a CellCollection file into a :class:`pandas.DataFrame`."""
return CellCollection.load(path, **kwargs).as_dataframe()

def save(self, data, path, **kwargs):
"""Save data to a CellCollection file."""
return CellCollection.from_dataframe(data).save(path, **kwargs)


def register():
"""Register Voxcell extensions."""
register_comparator(".nrrd", NrrdComparator())
register_comparator(".mvd3", Mvd3Comparator())
50 changes: 50 additions & 0 deletions dir_content_diff/util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
"""Some utils used by the ``dir-content-diff`` package."""
import importlib.resources
import json
import logging
import re
from pathlib import Path

LOGGER = logging.getLogger("dir-content-diff")
_ext_pattern = re.compile(r"\.?(.*)")


Expand Down Expand Up @@ -103,3 +108,48 @@ def format_kwargs(kwargs, name):
f"{kwargs_used}"
f"{reason_used}"
)


def _retrieve_dependencies():
"""Get the comparator dependencies."""
try:
# Package is installed or the cwd is the root of the project
root_dir = importlib.resources.files("dir_content_diff") # pylint: disable=no-member
except ModuleNotFoundError: # pragma: no cover
# Package is not installed and the cwd is not the root of the project
root_dir = Path(__file__).parent / "dir_content_diff"
deps_file = root_dir / "comparators" / "dependencies.json"
with deps_file.open() as f:
deps = json.load(f)
return deps


COMPARATOR_DEPENDENCIES = _retrieve_dependencies()


def import_error_message(name):
"""Raise a log entry for the missing dependencies."""
name = name.split(".")[-1]
try:
dependencies = COMPARATOR_DEPENDENCIES[name]
except KeyError as exception:
msg = (
f"The module {name} has no registered dependency, please add dependencies in the "
"dependencies.json file"
)
raise KeyError(msg) from exception

if len(dependencies) > 1:
req_plural = "s are"
requirements = ", ".join(dependencies[:-1]) + f" and {dependencies[-1]}"
else:
req_plural = " is"
requirements = str(dependencies[0])

msg = (
f"Loading the {name} module without the required dependencies installed "
f"(requirement{req_plural} the following: {requirements}). "
"Will crash at runtime if the related functionalities are used. "
f"These dependencies can be installed with 'pip install dir-content-diff[{name}]'."
)
LOGGER.warning(msg)
4 changes: 3 additions & 1 deletion docs/source/api_ref.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ This page presents the complete API documentation.

dir_content_diff
dir_content_diff.base_comparators
dir_content_diff.comparators.morphio
dir_content_diff.comparators.pandas
dir_content_diff.comparators.voxcell
dir_content_diff.util
dir_content_diff.pandas
dir_content_diff.pytest_plugin
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ build-backend = "setuptools.build_meta"
[tool.black]
line-length = 100
target-version = [
"py38",
"py39",
"py310",
"py311",
"py312",
]

[tool.pydocstyle]
Expand All @@ -33,3 +33,6 @@ force_single_line = true
testpaths = [
"tests",
]
markers = [
"comparators_missing_deps: marks tests for missing dependencies",
]
Loading

0 comments on commit 40a64ff

Please sign in to comment.