diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 76c3103..b835c71 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -59,28 +59,3 @@ jobs:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           BRANCH: gh-pages  # The branch the action should deploy to.
           FOLDER: docs/_build/html  # The folder the action should deploy.
-  publish-to-pypi:
-    name: Publish to PyPI
-    needs: Test
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0 # Fetch all history for all tags and branches
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install package
-        run: make install
-      - name: Build package
-        run: python -m build
-      - name: Publish a git tag
-        run: ".github/publish-git-tag.sh || true"
-      - name: Publish to PyPI
-        uses: pypa/gh-action-pypi-publish@release/v1
-        with:
-          user: __token__
-          password: ${{ secrets.PYPI }}
-          skip-existing: true
diff --git a/.github/workflows/versioning.yaml b/.github/workflows/versioning.yaml
index 18bcb0b..f244e75 100644
--- a/.github/workflows/versioning.yaml
+++ b/.github/workflows/versioning.yaml
@@ -35,4 +35,29 @@ jobs:
               with:
                 add: "."
                 message: Update package version
+    publish-to-pypi:
+      name: Publish to PyPI
+      if: (github.event.head_commit.message == 'Update package version')
+      runs-on: ubuntu-latest
+      steps:
+        - name: Checkout code
+          uses: actions/checkout@v4
+          with:
+            fetch-depth: 0 # Fetch all history for all tags and branches
+        - name: Set up Python
+          uses: actions/setup-python@v5
+          with:
+            python-version: ${{ matrix.python-version }}
+        - name: Install package
+          run: make install
+        - name: Build package
+          run: python -m build
+        - name: Publish a git tag
+          run: ".github/publish-git-tag.sh || true"
+        - name: Publish to PyPI
+          uses: pypa/gh-action-pypi-publish@release/v1
+          with:
+            user: __token__
+            password: ${{ secrets.PYPI }}
+            skip-existing: true
   
\ No newline at end of file
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index e69de29..e9461eb 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -0,0 +1,4 @@
+- bump: minor
+  changes:
+    added:
+    - A function to evaluate whether estimates are within desired tolerance levels.
diff --git a/pyproject.toml b/pyproject.toml
index 87766c1..a3c5f62 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -66,4 +66,4 @@ policyengine-calibrate = "policyengine_calibrate:main"
 
 [build-system]
 requires = ["hatchling"]
-build-backend = "hatchling.build"
\ No newline at end of file
+build-backend = "hatchling.build"
diff --git a/src/microcalibrate/__init__.py b/src/microcalibrate/__init__.py
index e45111f..0b8f3fa 100644
--- a/src/microcalibrate/__init__.py
+++ b/src/microcalibrate/__init__.py
@@ -1 +1,2 @@
 from .calibration import Calibration
+from .evaluation import evaluate_estimate_distance_to_targets
diff --git a/src/microcalibrate/evaluation.py b/src/microcalibrate/evaluation.py
new file mode 100644
index 0000000..52bc30e
--- /dev/null
+++ b/src/microcalibrate/evaluation.py
@@ -0,0 +1,59 @@
+import logging
+from typing import List, Optional
+
+import numpy as np
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+
+def evaluate_estimate_distance_to_targets(
+    targets: np.ndarray,
+    estimates: np.ndarray,
+    tolerances: np.ndarray,
+    target_names: Optional[List[str]] = None,
+    raise_on_error: Optional[bool] = False,
+):
+    """
+    Evaluate the distance between estimates and targets against tolerances.
+
+    Args:
+        targets (np.ndarray): The ground truth target values.
+        estimates (np.ndarray): The estimated values to compare against the targets.
+        tolerances (np.ndarray): The acceptable tolerance levels for each target.
+        target_names (Optional[List[str]]): The names of the targets for reporting.
+        raise_on_error (Optional[bool]): If True, raises an error if any estimate is outside its tolerance. Default is False.
+
+    Returns:
+        evals (pd.DataFrame): A DataFrame containing the evaluation results, including:
+            - target_names: Names of the targets (if provided).
+            - distances: The absolute differences between estimates and targets.
+            - tolerances: The tolerance levels for each target.
+            - within_tolerance: Boolean array indicating if each estimate is within its tolerance.
+    """
+    if targets.shape != estimates.shape or targets.shape != tolerances.shape:
+        raise ValueError(
+            "Targets, estimates, and tolerances must have the same shape."
+        )
+
+    distances = np.abs(estimates - targets)
+    within_tolerance = distances <= tolerances
+
+    evals = {
+        "target_names": (
+            target_names
+            if target_names is not None
+            else list(np.nan for _ in targets)
+        ),
+        "distances": distances,
+        "tolerances": tolerances,
+        "within_tolerance": within_tolerance,
+    }
+
+    num_outside_tolerance = (~within_tolerance).sum()
+    if raise_on_error and num_outside_tolerance > 0:
+        raise ValueError(
+            f"{num_outside_tolerance} target(s) are outside their tolerance levels."
+        )
+
+    return pd.DataFrame(evals)
diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py
new file mode 100644
index 0000000..1a7b371
--- /dev/null
+++ b/tests/test_evaluation.py
@@ -0,0 +1,106 @@
+"""
+Test the evaluation functionality for the calibration process.
+"""
+
+import pytest
+from src.microcalibrate.calibration import Calibration
+from microcalibrate.evaluation import (
+    evaluate_estimate_distance_to_targets,
+)
+import numpy as np
+import pandas as pd
+
+
+def test_evaluate_estimate_distance_to_targets() -> None:
+    """Test the evaluation of estimates against targets with tolerances, for a case in which estimates are not within tolerance."""
+
+    # Create a mock dataset with age and income
+    random_generator = np.random.default_rng(0)
+    data = pd.DataFrame(
+        {
+            "age": random_generator.integers(18, 70, size=100),
+            "income": random_generator.normal(40000, 50000, size=100),
+        }
+    )
+    weights = np.ones(len(data))
+    targets_matrix = pd.DataFrame(
+        {
+            "income_aged_20_30": (
+                (data["age"] >= 20) & (data["age"] <= 30)
+            ).astype(float)
+            * data["income"],
+            "income_aged_40_50": (
+                (data["age"] >= 40) & (data["age"] <= 50)
+            ).astype(float)
+            * data["income"],
+        }
+    )
+    targets = np.array(
+        [
+            (targets_matrix["income_aged_20_30"] * weights).sum() * 50,
+            (targets_matrix["income_aged_40_50"] * weights).sum() * 50,
+        ]
+    )
+
+    calibrator = Calibration(
+        estimate_matrix=targets_matrix,
+        weights=weights,
+        targets=targets,
+        noise_level=0.05,
+        epochs=50,
+        learning_rate=0.01,
+        dropout_rate=0,
+    )
+
+    performance_df = calibrator.calibrate()
+    final_estimates = calibrator.estimate()
+    tolerances = np.array([0.001, 0.005])
+
+    # Evaluate the estimates against the targets without raising an error
+    evals_df = evaluate_estimate_distance_to_targets(
+        targets=targets,
+        estimates=final_estimates,
+        tolerances=tolerances,
+        target_names=["Income Aged 20-30", "Income Aged 40-50"],
+        raise_on_error=False,
+    )
+
+    # Check that the evaluation DataFrame has the expected structure
+    assert set(evals_df.columns) == {
+        "target_names",
+        "distances",
+        "tolerances",
+        "within_tolerance",
+    }
+
+    # Evaluate the estimates against the targets raising an error
+    with pytest.raises(ValueError) as exc_info:
+        evals_df = evaluate_estimate_distance_to_targets(
+            targets=targets,
+            estimates=final_estimates,
+            tolerances=tolerances,
+            target_names=["Income Aged 20-30", "Income Aged 40-50"],
+            raise_on_error=True,
+        )
+
+    assert "target(s) are outside their tolerance levels" in str(
+        exc_info.value
+    )
+
+
+def test_all_within_tolerance():
+    """Tests a simple case where all estimates are within their tolerances."""
+    targets = np.array([10, 20, 30])
+    estimates = np.array([10.1, 19.8, 30.0])
+    tolerances = np.array([0.2, 0.3, 0.1])
+    target_names = ["A", "B", "C"]
+
+    result_df = evaluate_estimate_distance_to_targets(
+        targets, estimates, tolerances, target_names
+    )
+
+    assert result_df["within_tolerance"].all()
+    assert result_df.shape == (3, 4)
+    np.testing.assert_array_almost_equal(
+        result_df["distances"], [0.1, 0.2, 0.0]
+    )