From 16341ef0df8eb0584da81792c0dc56ab194abac8 Mon Sep 17 00:00:00 2001
From: juaristi22 <juaristi@uni.minerva.edu>
Date: Tue, 22 Jul 2025 12:29:05 +0200
Subject: [PATCH 1/6] evaluation of estimates within tolerance levels

---
 changelog_entry.yaml              |  4 ++
 src/microcalibrate/__init__.py    |  1 +
 src/microcalibrate/evalutation.py | 65 +++++++++++++++++++++++
 tests/test_evaluation.py          | 86 +++++++++++++++++++++++++++++++
 4 files changed, 156 insertions(+)
 create mode 100644 src/microcalibrate/evalutation.py
 create mode 100644 tests/test_evaluation.py

diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index e69de29..e9461eb 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -0,0 +1,4 @@
+- bump: minor
+  changes:
+    added:
+    - A function to evaluate whether estimates are within desired tolerance levels.
diff --git a/src/microcalibrate/__init__.py b/src/microcalibrate/__init__.py
index e45111f..62f3112 100644
--- a/src/microcalibrate/__init__.py
+++ b/src/microcalibrate/__init__.py
@@ -1 +1,2 @@
 from .calibration import Calibration
+from .evalutation import evaluate_estimate_distance_to_targets
diff --git a/src/microcalibrate/evalutation.py b/src/microcalibrate/evalutation.py
new file mode 100644
index 0000000..703c2b6
--- /dev/null
+++ b/src/microcalibrate/evalutation.py
@@ -0,0 +1,65 @@
+import logging
+from typing import List, Optional
+
+import numpy as np
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+
+def evaluate_estimate_distance_to_targets(
+    targets: np.ndarray,
+    estimates: np.ndarray,
+    tolerances: np.ndarray,
+    target_names: Optional[List[str]] = None,
+    raise_on_error: Optional[bool] = False,
+):
+    """
+    Evaluate the distance between estimates and targets against tolerances.
+
+    Args:
+        targets (np.ndarray): The ground truth target values.
+        estimates (np.ndarray): The estimated values to compare against the targets.
+        tolerances (np.ndarray): The acceptable tolerance levels for each target.
+        target_names (Optional[List[str]]): The names of the targets for reporting.
+        raise_on_error (Optional[bool]): If True, raises an error if any estimate is outside its tolerance. Default is False.
+
+    Returns:
+        evals (pd.DataFrame): A DataFrame containing the evaluation results, including:
+            - target_names: Names of the targets (if provided).
+            - distances: The absolute differences between estimates and targets.
+            - tolerances: The tolerance levels for each target.
+            - within_tolerance: Boolean array indicating if each estimate is within its tolerance.
+    """
+    if targets.shape != estimates.shape or targets.shape != tolerances.shape:
+        raise ValueError(
+            "Targets, estimates, and tolerances must have the same shape."
+        )
+
+    distances = np.abs(estimates - targets)
+    within_tolerance = distances <= tolerances
+
+    evals = {
+        "target_names": target_names if target_names is not None else [],
+        "distances": distances,
+        "tolerances": tolerances,
+        "within_tolerance": within_tolerance,
+    }
+
+    if target_names is not None:
+        for i, target in enumerate(evals["within_tolerance"]):
+            if not target:
+                logger.warning(
+                    f"The estimate corresponding to {evals['target_names'][i]} is outside the tolerance had an error {evals['distances'][i]} larger than the tolerance {evals['tolerances'][i]}."
+                )
+        if raise_on_error:
+            raise ValueError(
+                f"{(~within_tolerance).sum()} targets are outside their tolerance levels."
+            )
+    else:
+        if raise_on_error:
+            raise ValueError(
+                f"{(~within_tolerance).sum()} targets are outside their tolerance levels."
+            )
+
+    return pd.DataFrame(evals)
diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py
new file mode 100644
index 0000000..dad09b5
--- /dev/null
+++ b/tests/test_evaluation.py
@@ -0,0 +1,86 @@
+"""
+Test the evaluation functionality for the calibration process.
+"""
+
+import pytest
+from src.microcalibrate.calibration import Calibration
+from src.microcalibrate.evalutation import (
+    evaluate_estimate_distance_to_targets,
+)
+import numpy as np
+import pandas as pd
+
+
+def test_evaluate_estimate_distance_to_targets() -> None:
+    """Test the evaluation of estimates against targets with tolerances."""
+
+    # Create a mock dataset with age and income
+    random_generator = np.random.default_rng(0)
+    data = pd.DataFrame(
+        {
+            "age": random_generator.integers(18, 70, size=100),
+            "income": random_generator.normal(40000, 50000, size=100),
+        }
+    )
+    weights = np.ones(len(data))
+    targets_matrix = pd.DataFrame(
+        {
+            "income_aged_20_30": (
+                (data["age"] >= 20) & (data["age"] <= 30)
+            ).astype(float)
+            * data["income"],
+            "income_aged_40_50": (
+                (data["age"] >= 40) & (data["age"] <= 50)
+            ).astype(float)
+            * data["income"],
+        }
+    )
+    targets = np.array(
+        [
+            (targets_matrix["income_aged_20_30"] * weights).sum() * 50,
+            (targets_matrix["income_aged_40_50"] * weights).sum() * 50,
+        ]
+    )
+
+    calibrator = Calibration(
+        estimate_matrix=targets_matrix,
+        weights=weights,
+        targets=targets,
+        noise_level=0.05,
+        epochs=50,
+        learning_rate=0.01,
+        dropout_rate=0,
+    )
+
+    performance_df = calibrator.calibrate()
+    final_estimates = calibrator.estimate()
+    tolerances = np.array([0.001, 0.005])
+
+    # Evaluate the estimates against the targets without raising an error
+    evals_df = evaluate_estimate_distance_to_targets(
+        targets=targets,
+        estimates=final_estimates,
+        tolerances=tolerances,
+        target_names=["Income Aged 20-30", "Income Aged 40-50"],
+        raise_on_error=False,
+    )
+
+    # Check that the evaluation DataFrame has the expected structure
+    assert set(evals_df.columns) == {
+        "target_names",
+        "distances",
+        "tolerances",
+        "within_tolerance",
+    }
+
+    # Evaluate the estimates against the targets raising an error
+    with pytest.raises(ValueError) as exc_info:
+        evals_df = evaluate_estimate_distance_to_targets(
+            targets=targets,
+            estimates=final_estimates,
+            tolerances=tolerances,
+            target_names=["Income Aged 20-30", "Income Aged 40-50"],
+            raise_on_error=True,
+        )
+
+    assert "targets are outside their tolerance levels" in str(exc_info.value)

From 4257a5ae25bf9e22dc661c5e91def436e20bc163 Mon Sep 17 00:00:00 2001
From: juaristi22 <juaristi@uni.minerva.edu>
Date: Tue, 22 Jul 2025 12:31:09 +0200
Subject: [PATCH 2/6] fix dependency conflicts

---
 pyproject.toml | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index ea6e46b..771ef3d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,40 +10,39 @@ authors = [
 requires-python = ">=3.11"
 dependencies = [
     "torch>=2.7.0",
-    "numpy>=1.26.0,<2.0.0",
-    "pandas>=2.2.0,<3.0.0",
-    "plotly>=5.24.0,<6.0.0",
-    "tqdm>=4.65.0,<5.0.0",
+    "numpy",
+    "pandas",
+    "tqdm",
 ]
 
 [project.optional-dependencies]
 dev = [
-    "pytest>=8.0.0,<9.0.0",
-    "pytest-cov>=6.0.0,<7.0.0",
-    "flake8>=6.0.0,<7.0.0",
-    "black>=23.0.0",
-    "isort>=5.9.0,<6.0.0",
-    "mypy>=1.0.0,<2.0.0",
-    "build>=1.0.0,<2.0.0",
+    "pytest",
+    "pytest-cov",
+    "flake8>=6.0.0",
+    "black",
+    "isort",
+    "mypy",
+    "build",
     "linecheck",
     "yaml-changelog>=0.1.7",
 ]
 
 docs = [
-    "sphinx>=5.0.0,<6.0.0",
-    "docutils>=0.17.0,<0.18.0",
-    "jupyter-book>=0.16.0",
+    "sphinx>=5.0.0",
+    "docutils>=0.17.0",
+    "jupyter-book>=0.15.0",
     "sphinx-book-theme>=1.0.0",
     "sphinx-copybutton>=0.5.0",
     "sphinx-design>=0.3.0",
-    "ipywidgets>=7.8.0,<8.0.0",
-    "plotly>=5.24.0,<6.0.0",
+    "ipywidgets>=7.8.0",
+    "plotly",
     "sphinx-argparse>=0.5.0",
     "sphinx-math-dollar>=1.2.1",
-    "myst-parser==0.18.1",
-    "myst-nb==0.17.2",
+    "myst-parser>=0.18.1",
+    "myst-nb>=0.17.2",
     "pyyaml",
-    "furo==2022.12.7",
+    "furo>=2022.12.7",
     "h5py>=3.1.0,<4.0.0",
 ]
 
@@ -67,4 +66,4 @@ policyengine-calibrate = "policyengine_calibrate:main"
 
 [build-system]
 requires = ["hatchling"]
-build-backend = "hatchling.build"
\ No newline at end of file
+build-backend = "hatchling.build"

From 4a6051b41296cc256774bcf36f51a9d2534aa651 Mon Sep 17 00:00:00 2001
From: juaristi22 <juaristi@uni.minerva.edu>
Date: Thu, 24 Jul 2025 13:09:17 +0200
Subject: [PATCH 3/6] move publishing to after versioning

---
 .github/workflows/main.yml        | 25 -------------------------
 .github/workflows/versioning.yaml | 25 +++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 76c3103..b835c71 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -59,28 +59,3 @@ jobs:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           BRANCH: gh-pages  # The branch the action should deploy to.
           FOLDER: docs/_build/html  # The folder the action should deploy.
-  publish-to-pypi:
-    name: Publish to PyPI
-    needs: Test
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0 # Fetch all history for all tags and branches
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install package
-        run: make install
-      - name: Build package
-        run: python -m build
-      - name: Publish a git tag
-        run: ".github/publish-git-tag.sh || true"
-      - name: Publish to PyPI
-        uses: pypa/gh-action-pypi-publish@release/v1
-        with:
-          user: __token__
-          password: ${{ secrets.PYPI }}
-          skip-existing: true
diff --git a/.github/workflows/versioning.yaml b/.github/workflows/versioning.yaml
index 18bcb0b..2b1ad35 100644
--- a/.github/workflows/versioning.yaml
+++ b/.github/workflows/versioning.yaml
@@ -35,4 +35,29 @@ jobs:
               with:
                 add: "."
                 message: Update package version
+    publish-to-pypi:
+      name: Publish to PyPI
+      needs: Test
+      runs-on: ubuntu-latest
+      steps:
+        - name: Checkout code
+          uses: actions/checkout@v4
+          with:
+            fetch-depth: 0 # Fetch all history for all tags and branches
+        - name: Set up Python
+          uses: actions/setup-python@v5
+          with:
+            python-version: ${{ matrix.python-version }}
+        - name: Install package
+          run: make install
+        - name: Build package
+          run: python -m build
+        - name: Publish a git tag
+          run: ".github/publish-git-tag.sh || true"
+        - name: Publish to PyPI
+          uses: pypa/gh-action-pypi-publish@release/v1
+          with:
+            user: __token__
+            password: ${{ secrets.PYPI }}
+            skip-existing: true
   
\ No newline at end of file

From d741e7a72c5a5beebfe31d16a169a8f36ad6b3a4 Mon Sep 17 00:00:00 2001
From: juaristi22 <juaristi@uni.minerva.edu>
Date: Thu, 24 Jul 2025 13:32:03 +0200
Subject: [PATCH 4/6] switch condition

---
 .github/workflows/versioning.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/versioning.yaml b/.github/workflows/versioning.yaml
index 2b1ad35..f244e75 100644
--- a/.github/workflows/versioning.yaml
+++ b/.github/workflows/versioning.yaml
@@ -37,7 +37,7 @@ jobs:
                 message: Update package version
     publish-to-pypi:
       name: Publish to PyPI
-      needs: Test
+      if: (github.event.head_commit.message == 'Update package version')
       runs-on: ubuntu-latest
       steps:
         - name: Checkout code

From baeb94edfc089b26169aeb66c097dc82c0ddaf59 Mon Sep 17 00:00:00 2001
From: juaristi22 <juaristi@uni.minerva.edu>
Date: Fri, 25 Jul 2025 16:19:09 +0200
Subject: [PATCH 5/6] add test for when all estimates are within tolerance

---
 src/microcalibrate/__init__.py                |  2 +-
 .../{evalutation.py => evaluation.py}         |  0
 tests/test_evaluation.py                      | 22 +++++++++++++++++--
 3 files changed, 21 insertions(+), 3 deletions(-)
 rename src/microcalibrate/{evalutation.py => evaluation.py} (100%)

diff --git a/src/microcalibrate/__init__.py b/src/microcalibrate/__init__.py
index 62f3112..0b8f3fa 100644
--- a/src/microcalibrate/__init__.py
+++ b/src/microcalibrate/__init__.py
@@ -1,2 +1,2 @@
 from .calibration import Calibration
-from .evalutation import evaluate_estimate_distance_to_targets
+from .evaluation import evaluate_estimate_distance_to_targets
diff --git a/src/microcalibrate/evalutation.py b/src/microcalibrate/evaluation.py
similarity index 100%
rename from src/microcalibrate/evalutation.py
rename to src/microcalibrate/evaluation.py
diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py
index dad09b5..09f2e49 100644
--- a/tests/test_evaluation.py
+++ b/tests/test_evaluation.py
@@ -4,7 +4,7 @@
 
 import pytest
 from src.microcalibrate.calibration import Calibration
-from src.microcalibrate.evalutation import (
+from microcalibrate.evaluation import (
     evaluate_estimate_distance_to_targets,
 )
 import numpy as np
@@ -12,7 +12,7 @@
 
 
 def test_evaluate_estimate_distance_to_targets() -> None:
-    """Test the evaluation of estimates against targets with tolerances."""
+    """Test the evaluation of estimates against targets with tolerances, for a case in which estimates are not within tolerance."""
 
     # Create a mock dataset with age and income
     random_generator = np.random.default_rng(0)
@@ -84,3 +84,21 @@ def test_evaluate_estimate_distance_to_targets() -> None:
         )
 
     assert "targets are outside their tolerance levels" in str(exc_info.value)
+
+
+def test_all_within_tolerance():
+    """Tests a simple case where all estimates are within their tolerances."""
+    targets = np.array([10, 20, 30])
+    estimates = np.array([10.1, 19.8, 30.0])
+    tolerances = np.array([0.2, 0.3, 0.1])
+    target_names = ["A", "B", "C"]
+
+    result_df = evaluate_estimate_distance_to_targets(
+        targets, estimates, tolerances, target_names
+    )
+
+    assert result_df["within_tolerance"].all()
+    assert result_df.shape == (3, 4)
+    np.testing.assert_array_almost_equal(
+        result_df["distances"], [0.1, 0.2, 0.0]
+    )

From d0bf4e853518d717c93d4419fa3477a887078e46 Mon Sep 17 00:00:00 2001
From: juaristi22 <juaristi@uni.minerva.edu>
Date: Fri, 25 Jul 2025 16:35:02 +0200
Subject: [PATCH 6/6] minor changes

---
 src/microcalibrate/evaluation.py | 26 ++++++++++----------------
 tests/test_evaluation.py         |  4 +++-
 2 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/src/microcalibrate/evaluation.py b/src/microcalibrate/evaluation.py
index 703c2b6..52bc30e 100644
--- a/src/microcalibrate/evaluation.py
+++ b/src/microcalibrate/evaluation.py
@@ -40,26 +40,20 @@ def evaluate_estimate_distance_to_targets(
     within_tolerance = distances <= tolerances
 
     evals = {
-        "target_names": target_names if target_names is not None else [],
+        "target_names": (
+            target_names
+            if target_names is not None
+            else list(np.nan for _ in targets)
+        ),
         "distances": distances,
         "tolerances": tolerances,
         "within_tolerance": within_tolerance,
     }
 
-    if target_names is not None:
-        for i, target in enumerate(evals["within_tolerance"]):
-            if not target:
-                logger.warning(
-                    f"The estimate corresponding to {evals['target_names'][i]} is outside the tolerance had an error {evals['distances'][i]} larger than the tolerance {evals['tolerances'][i]}."
-                )
-        if raise_on_error:
-            raise ValueError(
-                f"{(~within_tolerance).sum()} targets are outside their tolerance levels."
-            )
-    else:
-        if raise_on_error:
-            raise ValueError(
-                f"{(~within_tolerance).sum()} targets are outside their tolerance levels."
-            )
+    num_outside_tolerance = (~within_tolerance).sum()
+    if raise_on_error and num_outside_tolerance > 0:
+        raise ValueError(
+            f"{num_outside_tolerance} target(s) are outside their tolerance levels."
+        )
 
     return pd.DataFrame(evals)
diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py
index 09f2e49..1a7b371 100644
--- a/tests/test_evaluation.py
+++ b/tests/test_evaluation.py
@@ -83,7 +83,9 @@ def test_evaluate_estimate_distance_to_targets() -> None:
             raise_on_error=True,
         )
 
-    assert "targets are outside their tolerance levels" in str(exc_info.value)
+    assert "target(s) are outside their tolerance levels" in str(
+        exc_info.value
+    )
 
 
 def test_all_within_tolerance():