From 16341ef0df8eb0584da81792c0dc56ab194abac8 Mon Sep 17 00:00:00 2001 From: juaristi22 Date: Tue, 22 Jul 2025 12:29:05 +0200 Subject: [PATCH 1/6] evaluation of estimates within tolerance levels --- changelog_entry.yaml | 4 ++ src/microcalibrate/__init__.py | 1 + src/microcalibrate/evalutation.py | 65 +++++++++++++++++++++++ tests/test_evaluation.py | 86 +++++++++++++++++++++++++++++++ 4 files changed, 156 insertions(+) create mode 100644 src/microcalibrate/evalutation.py create mode 100644 tests/test_evaluation.py diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29..e9461eb 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: minor + changes: + added: + - A function to evaluate whether estimates are within desired tolerance levels. diff --git a/src/microcalibrate/__init__.py b/src/microcalibrate/__init__.py index e45111f..62f3112 100644 --- a/src/microcalibrate/__init__.py +++ b/src/microcalibrate/__init__.py @@ -1 +1,2 @@ from .calibration import Calibration +from .evalutation import evaluate_estimate_distance_to_targets diff --git a/src/microcalibrate/evalutation.py b/src/microcalibrate/evalutation.py new file mode 100644 index 0000000..703c2b6 --- /dev/null +++ b/src/microcalibrate/evalutation.py @@ -0,0 +1,65 @@ +import logging +from typing import List, Optional + +import numpy as np +import pandas as pd + +logger = logging.getLogger(__name__) + + +def evaluate_estimate_distance_to_targets( + targets: np.ndarray, + estimates: np.ndarray, + tolerances: np.ndarray, + target_names: Optional[List[str]] = None, + raise_on_error: Optional[bool] = False, +): + """ + Evaluate the distance between estimates and targets against tolerances. + + Args: + targets (np.ndarray): The ground truth target values. + estimates (np.ndarray): The estimated values to compare against the targets. + tolerances (np.ndarray): The acceptable tolerance levels for each target. + target_names (Optional[List[str]]): The names of the targets for reporting. + raise_on_error (Optional[bool]): If True, raises an error if any estimate is outside its tolerance. Default is False. + + Returns: + evals (pd.DataFrame): A DataFrame containing the evaluation results, including: + - target_names: Names of the targets (if provided). + - distances: The absolute differences between estimates and targets. + - tolerances: The tolerance levels for each target. + - within_tolerance: Boolean array indicating if each estimate is within its tolerance. + """ + if targets.shape != estimates.shape or targets.shape != tolerances.shape: + raise ValueError( + "Targets, estimates, and tolerances must have the same shape." + ) + + distances = np.abs(estimates - targets) + within_tolerance = distances <= tolerances + + evals = { + "target_names": target_names if target_names is not None else [], + "distances": distances, + "tolerances": tolerances, + "within_tolerance": within_tolerance, + } + + if target_names is not None: + for i, target in enumerate(evals["within_tolerance"]): + if not target: + logger.warning( + f"The estimate corresponding to {evals['target_names'][i]} is outside the tolerance had an error {evals['distances'][i]} larger than the tolerance {evals['tolerances'][i]}." + ) + if raise_on_error: + raise ValueError( + f"{(~within_tolerance).sum()} targets are outside their tolerance levels." + ) + else: + if raise_on_error: + raise ValueError( + f"{(~within_tolerance).sum()} targets are outside their tolerance levels." + ) + + return pd.DataFrame(evals) diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py new file mode 100644 index 0000000..dad09b5 --- /dev/null +++ b/tests/test_evaluation.py @@ -0,0 +1,86 @@ +""" +Test the evaluation functionality for the calibration process. +""" + +import pytest +from src.microcalibrate.calibration import Calibration +from src.microcalibrate.evalutation import ( + evaluate_estimate_distance_to_targets, +) +import numpy as np +import pandas as pd + + +def test_evaluate_estimate_distance_to_targets() -> None: + """Test the evaluation of estimates against targets with tolerances.""" + + # Create a mock dataset with age and income + random_generator = np.random.default_rng(0) + data = pd.DataFrame( + { + "age": random_generator.integers(18, 70, size=100), + "income": random_generator.normal(40000, 50000, size=100), + } + ) + weights = np.ones(len(data)) + targets_matrix = pd.DataFrame( + { + "income_aged_20_30": ( + (data["age"] >= 20) & (data["age"] <= 30) + ).astype(float) + * data["income"], + "income_aged_40_50": ( + (data["age"] >= 40) & (data["age"] <= 50) + ).astype(float) + * data["income"], + } + ) + targets = np.array( + [ + (targets_matrix["income_aged_20_30"] * weights).sum() * 50, + (targets_matrix["income_aged_40_50"] * weights).sum() * 50, + ] + ) + + calibrator = Calibration( + estimate_matrix=targets_matrix, + weights=weights, + targets=targets, + noise_level=0.05, + epochs=50, + learning_rate=0.01, + dropout_rate=0, + ) + + performance_df = calibrator.calibrate() + final_estimates = calibrator.estimate() + tolerances = np.array([0.001, 0.005]) + + # Evaluate the estimates against the targets without raising an error + evals_df = evaluate_estimate_distance_to_targets( + targets=targets, + estimates=final_estimates, + tolerances=tolerances, + target_names=["Income Aged 20-30", "Income Aged 40-50"], + raise_on_error=False, + ) + + # Check that the evaluation DataFrame has the expected structure + assert set(evals_df.columns) == { + "target_names", + "distances", + "tolerances", + "within_tolerance", + } + + # Evaluate the estimates against the targets raising an error + with pytest.raises(ValueError) as exc_info: + evals_df = evaluate_estimate_distance_to_targets( + targets=targets, + estimates=final_estimates, + tolerances=tolerances, + target_names=["Income Aged 20-30", "Income Aged 40-50"], + raise_on_error=True, + ) + + assert "targets are outside their tolerance levels" in str(exc_info.value) From 4257a5ae25bf9e22dc661c5e91def436e20bc163 Mon Sep 17 00:00:00 2001 From: juaristi22 Date: Tue, 22 Jul 2025 12:31:09 +0200 Subject: [PATCH 2/6] fix dependency conflicts --- pyproject.toml | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ea6e46b..771ef3d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,40 +10,39 @@ authors = [ requires-python = ">=3.11" dependencies = [ "torch>=2.7.0", - "numpy>=1.26.0,<2.0.0", - "pandas>=2.2.0,<3.0.0", - "plotly>=5.24.0,<6.0.0", - "tqdm>=4.65.0,<5.0.0", + "numpy", + "pandas", + "tqdm", ] [project.optional-dependencies] dev = [ - "pytest>=8.0.0,<9.0.0", - "pytest-cov>=6.0.0,<7.0.0", - "flake8>=6.0.0,<7.0.0", - "black>=23.0.0", - "isort>=5.9.0,<6.0.0", - "mypy>=1.0.0,<2.0.0", - "build>=1.0.0,<2.0.0", + "pytest", + "pytest-cov", + "flake8>=6.0.0", + "black", + "isort", + "mypy", + "build", "linecheck", "yaml-changelog>=0.1.7", ] docs = [ - "sphinx>=5.0.0,<6.0.0", - "docutils>=0.17.0,<0.18.0", - "jupyter-book>=0.16.0", + "sphinx>=5.0.0", + "docutils>=0.17.0", + "jupyter-book>=0.15.0", "sphinx-book-theme>=1.0.0", "sphinx-copybutton>=0.5.0", "sphinx-design>=0.3.0", - "ipywidgets>=7.8.0,<8.0.0", - "plotly>=5.24.0,<6.0.0", + "ipywidgets>=7.8.0", + "plotly", "sphinx-argparse>=0.5.0", "sphinx-math-dollar>=1.2.1", - "myst-parser==0.18.1", - "myst-nb==0.17.2", + "myst-parser>=0.18.1", + "myst-nb>=0.17.2", "pyyaml", - "furo==2022.12.7", + "furo>=2022.12.7", "h5py>=3.1.0,<4.0.0", ] @@ -67,4 +66,4 @@ policyengine-calibrate = "policyengine_calibrate:main" [build-system] requires = ["hatchling"] -build-backend = "hatchling.build" \ No newline at end of file +build-backend = "hatchling.build" From 4a6051b41296cc256774bcf36f51a9d2534aa651 Mon Sep 17 00:00:00 2001 From: juaristi22 Date: Thu, 24 Jul 2025 13:09:17 +0200 Subject: [PATCH 3/6] move publishing to after versioning --- .github/workflows/main.yml | 25 ------------------------- .github/workflows/versioning.yaml | 25 +++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 76c3103..b835c71 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -59,28 +59,3 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} BRANCH: gh-pages # The branch the action should deploy to. FOLDER: docs/_build/html # The folder the action should deploy. - publish-to-pypi: - name: Publish to PyPI - needs: Test - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - fetch-depth: 0 # Fetch all history for all tags and branches - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install package - run: make install - - name: Build package - run: python -m build - - name: Publish a git tag - run: ".github/publish-git-tag.sh || true" - - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 - with: - user: __token__ - password: ${{ secrets.PYPI }} - skip-existing: true diff --git a/.github/workflows/versioning.yaml b/.github/workflows/versioning.yaml index 18bcb0b..2b1ad35 100644 --- a/.github/workflows/versioning.yaml +++ b/.github/workflows/versioning.yaml @@ -35,4 +35,29 @@ jobs: with: add: "." message: Update package version + publish-to-pypi: + name: Publish to PyPI + needs: Test + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch all history for all tags and branches + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install package + run: make install + - name: Build package + run: python -m build + - name: Publish a git tag + run: ".github/publish-git-tag.sh || true" + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI }} + skip-existing: true \ No newline at end of file From d741e7a72c5a5beebfe31d16a169a8f36ad6b3a4 Mon Sep 17 00:00:00 2001 From: juaristi22 Date: Thu, 24 Jul 2025 13:32:03 +0200 Subject: [PATCH 4/6] switch condition --- .github/workflows/versioning.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/versioning.yaml b/.github/workflows/versioning.yaml index 2b1ad35..f244e75 100644 --- a/.github/workflows/versioning.yaml +++ b/.github/workflows/versioning.yaml @@ -37,7 +37,7 @@ jobs: message: Update package version publish-to-pypi: name: Publish to PyPI - needs: Test + if: (github.event.head_commit.message == 'Update package version') runs-on: ubuntu-latest steps: - name: Checkout code From baeb94edfc089b26169aeb66c097dc82c0ddaf59 Mon Sep 17 00:00:00 2001 From: juaristi22 Date: Fri, 25 Jul 2025 16:19:09 +0200 Subject: [PATCH 5/6] add test for when all estimates are within tolerance --- src/microcalibrate/__init__.py | 2 +- .../{evalutation.py => evaluation.py} | 0 tests/test_evaluation.py | 22 +++++++++++++++++-- 3 files changed, 21 insertions(+), 3 deletions(-) rename src/microcalibrate/{evalutation.py => evaluation.py} (100%) diff --git a/src/microcalibrate/__init__.py b/src/microcalibrate/__init__.py index 62f3112..0b8f3fa 100644 --- a/src/microcalibrate/__init__.py +++ b/src/microcalibrate/__init__.py @@ -1,2 +1,2 @@ from .calibration import Calibration -from .evalutation import evaluate_estimate_distance_to_targets +from .evaluation import evaluate_estimate_distance_to_targets diff --git a/src/microcalibrate/evalutation.py b/src/microcalibrate/evaluation.py similarity index 100% rename from src/microcalibrate/evalutation.py rename to src/microcalibrate/evaluation.py diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py index dad09b5..09f2e49 100644 --- a/tests/test_evaluation.py +++ b/tests/test_evaluation.py @@ -4,7 +4,7 @@ import pytest from src.microcalibrate.calibration import Calibration -from src.microcalibrate.evalutation import ( +from microcalibrate.evaluation import ( evaluate_estimate_distance_to_targets, ) import numpy as np @@ -12,7 +12,7 @@ def test_evaluate_estimate_distance_to_targets() -> None: - """Test the evaluation of estimates against targets with tolerances.""" + """Test the evaluation of estimates against targets with tolerances, for a case in which estimates are not within tolerance.""" # Create a mock dataset with age and income random_generator = np.random.default_rng(0) @@ -84,3 +84,21 @@ def test_evaluate_estimate_distance_to_targets() -> None: ) assert "targets are outside their tolerance levels" in str(exc_info.value) + + +def test_all_within_tolerance(): + """Tests a simple case where all estimates are within their tolerances.""" + targets = np.array([10, 20, 30]) + estimates = np.array([10.1, 19.8, 30.0]) + tolerances = np.array([0.2, 0.3, 0.1]) + target_names = ["A", "B", "C"] + + result_df = evaluate_estimate_distance_to_targets( + targets, estimates, tolerances, target_names + ) + + assert result_df["within_tolerance"].all() + assert result_df.shape == (3, 4) + np.testing.assert_array_almost_equal( + result_df["distances"], [0.1, 0.2, 0.0] + ) From d0bf4e853518d717c93d4419fa3477a887078e46 Mon Sep 17 00:00:00 2001 From: juaristi22 Date: Fri, 25 Jul 2025 16:35:02 +0200 Subject: [PATCH 6/6] minor changes --- src/microcalibrate/evaluation.py | 26 ++++++++++---------------- tests/test_evaluation.py | 4 +++- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/src/microcalibrate/evaluation.py b/src/microcalibrate/evaluation.py index 703c2b6..52bc30e 100644 --- a/src/microcalibrate/evaluation.py +++ b/src/microcalibrate/evaluation.py @@ -40,26 +40,20 @@ def evaluate_estimate_distance_to_targets( within_tolerance = distances <= tolerances evals = { - "target_names": target_names if target_names is not None else [], + "target_names": ( + target_names + if target_names is not None + else list(np.nan for _ in targets) + ), "distances": distances, "tolerances": tolerances, "within_tolerance": within_tolerance, } - if target_names is not None: - for i, target in enumerate(evals["within_tolerance"]): - if not target: - logger.warning( - f"The estimate corresponding to {evals['target_names'][i]} is outside the tolerance had an error {evals['distances'][i]} larger than the tolerance {evals['tolerances'][i]}." - ) - if raise_on_error: - raise ValueError( - f"{(~within_tolerance).sum()} targets are outside their tolerance levels." - ) - else: - if raise_on_error: - raise ValueError( - f"{(~within_tolerance).sum()} targets are outside their tolerance levels." - ) + num_outside_tolerance = (~within_tolerance).sum() + if raise_on_error and num_outside_tolerance > 0: + raise ValueError( + f"{num_outside_tolerance} target(s) are outside their tolerance levels." + ) return pd.DataFrame(evals) diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py index 09f2e49..1a7b371 100644 --- a/tests/test_evaluation.py +++ b/tests/test_evaluation.py @@ -83,7 +83,9 @@ def test_evaluate_estimate_distance_to_targets() -> None: raise_on_error=True, ) - assert "targets are outside their tolerance levels" in str(exc_info.value) + assert "target(s) are outside their tolerance levels" in str( + exc_info.value + ) def test_all_within_tolerance():