Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.11"]
python-version: ["3.13"]

steps:
- name: Checkout repo
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.11"
python-version: "3.13"
- name: Install uv
uses: astral-sh/setup-uv@v5
- name: Install relevant dependencies
Expand All @@ -24,7 +24,7 @@ jobs:
strategy:
matrix:
os: [ ubuntu-latest ]
python-version: ["3.11"]
python-version: ["3.13"]
fail-fast: false
runs-on: ${{ matrix.os }}
steps:
Expand Down Expand Up @@ -64,7 +64,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.11"
python-version: "3.13"
- name: Install dependencies
run: |
uv pip install -e ".[dev,docs]" --system
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/versioning.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: 3.11
python-version: 3.13
- name: Build changelog
run: pip install yaml-changelog && make changelog
- name: Preview changelog update
Expand Down
5 changes: 5 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
- bump: patch
changes:
changed:
- Moved to PolicyEngine's L0 package for regularization implementation.
- Moved to python 3.13.
8 changes: 6 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@ authors = [
{ name = "Nikhil Woodruff", email = "[email protected]" },
{ name = "María Juaristi", email = "[email protected]" }
]
requires-python = ">=3.11"
requires-python = ">=3.13"
dependencies = [
"torch>=2.7.0",
"numpy",
"pandas",
"tqdm",
"l0-python",
]

[project.optional-dependencies]
Expand Down Expand Up @@ -53,13 +54,16 @@ include-package-data = true
[tool.setuptools.package-data]
"microcalibrate" = ["**/*"]

[tool.hatch.metadata]
allow-direct-references = true

[tool.isort]
profile = "black"
line_length = 79

[tool.black]
line-length = 79
target-version = ["py311"]
target-version = ["py313"]

[project.scripts]
policyengine-calibrate = "policyengine_calibrate:main"
Expand Down
23 changes: 12 additions & 11 deletions src/microcalibrate/calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
import torch
from torch import Tensor

logger = logging.getLogger(__name__)


class Calibration:
def __init__(
Expand Down Expand Up @@ -62,6 +60,8 @@ def __init__(
else "mps" if torch.mps.is_available() else "cpu"
)

self.logger = logging.getLogger(__name__)

self.original_estimate_matrix = estimate_matrix
self.original_targets = targets
self.original_target_names = target_names
Expand Down Expand Up @@ -117,7 +117,7 @@ def __init__(
"Either estimate_function or estimate_matrix must be provided"
)
elif self.excluded_targets:
logger.warning(
self.logger.warning(
"You are passing an estimate function with excluded targets. "
"Make sure the function handles excluded targets correctly, as reweight() will handle the filtering."
)
Expand Down Expand Up @@ -156,6 +156,7 @@ def calibrate(self) -> None:
temperature=self.temperature,
sparse_learning_rate=self.sparse_learning_rate,
regularize_with_l0=self.regularize_with_l0,
logger=self.logger,
)

self.weights = new_weights
Expand Down Expand Up @@ -196,10 +197,10 @@ def exclude_targets(
else None
)

logger.info(
self.logger.info(
f"Excluded {len(excluded_indices)} targets from calibration: {self.excluded_targets}"
)
logger.info(f"Calibrating {len(targets_array)} targets")
self.logger.info(f"Calibrating {len(targets_array)} targets")
else:
targets_array = self.original_targets
target_names = self.original_target_names
Expand Down Expand Up @@ -301,7 +302,7 @@ def _assess_targets(
ValueError: If the targets do not match the expected format or values.
ValueError: If the targets are not compatible with each other.
"""
logger.info("Performing basic target assessment...")
self.logger.info("Performing basic target assessment...")

if targets.ndim != 1:
raise ValueError("Targets must be a 1D NumPy array.")
Expand All @@ -310,7 +311,7 @@ def _assess_targets(
raise ValueError("Targets contain NaN values.")

if np.any(targets < 0):
logger.warning(
self.logger.warning(
"Some targets are negative. This may not make sense for totals."
)

Expand Down Expand Up @@ -340,13 +341,13 @@ def _assess_targets(
)

if estimate_val == 0:
logger.warning(
self.logger.warning(
f"Column {target_name} has a zero estimate sum; using ε={eps} for comparison."
)

order_diff = np.log10(abs(ratio)) if ratio != 0 else np.inf
if order_diff > 1:
logger.warning(
self.logger.warning(
f"Target {target_name} ({target_val:.2e}) differs from initial estimate ({estimate_val:.2e}) "
f"by {order_diff:.2f} orders of magnitude."
)
Expand All @@ -364,7 +365,7 @@ def _assess_targets(
/ estimate_matrix.shape[0]
)
if contribution_ratio < 0.01:
logger.warning(
self.logger.warning(
f"Target {target_name} is supported by only {contribution_ratio:.2%} "
f"of records in the loss matrix. This may make calibration unstable or ineffective."
)
Expand Down Expand Up @@ -421,7 +422,7 @@ def _get_linear_loss(metrics_matrix, target_vector, sparse=False):
for i in range(len(self.original_estimate_matrix.columns))
}

logger.info(
self.logger.info(
"Assessing analytical solution to the optimization problem for each target... \n"
"This evaluates how much each target complicates achieving calibration accuracy. The loss reported is the mean squared error of the least squares solution."
)
Expand Down
76 changes: 75 additions & 1 deletion src/microcalibrate/evaluation.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import logging
from typing import List, Optional
from typing import List, Optional, Union

import numpy as np
import pandas as pd
import torch

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -57,3 +58,76 @@ def evaluate_estimate_distance_to_targets(
)

return pd.DataFrame(evals)


def evaluate_sparse_weights(
optimised_weights: Union[torch.Tensor, np.ndarray],
estimate_matrix: Union[torch.Tensor, np.ndarray],
targets_array: Union[torch.Tensor, np.ndarray],
label: Optional[str] = "L0 Sparse Weights",
) -> float:
"""
Evaluate the performance of sparse weights against targets.

Args:
optimised_weights (torch.Tensor or np.ndarray): The optimised weights.
estimate_matrix (torch.Tensor or pd.DataFrame): The estimate matrix.
targets_array (torch.Tensor or np.ndarray): The target values.
label (str): A label for logging purposes.

Returns:
float: The percentage of estimates within 10% of the targets.
"""
# Convert all inputs to NumPy arrays right at the start
optimised_weights_np = (
optimised_weights.numpy()
if hasattr(optimised_weights, "numpy")
else np.asarray(optimised_weights)
)
estimate_matrix_np = (
estimate_matrix.numpy()
if hasattr(estimate_matrix, "numpy")
else np.asarray(estimate_matrix)
)
targets_array_np = (
targets_array.numpy()
if hasattr(targets_array, "numpy")
else np.asarray(targets_array)
)

logging.info(f"\n\n---{label}: reweighting quick diagnostics----\n")
logging.info(
f"{np.sum(optimised_weights_np == 0)} are zero, "
f"{np.sum(optimised_weights_np != 0)} weights are nonzero"
)

# All subsequent calculations use the guaranteed NumPy versions
estimate = optimised_weights_np @ estimate_matrix_np

rel_error = (
((estimate - targets_array_np) + 1) / (targets_array_np + 1)
) ** 2
within_10_percent_mask = np.abs(estimate - targets_array_np) <= (
0.10 * np.abs(targets_array_np)
)
percent_within_10 = np.mean(within_10_percent_mask) * 100
logging.info(
f"rel_error: min: {np.min(rel_error):.2f}\n"
f"max: {np.max(rel_error):.2f}\n"
f"mean: {np.mean(rel_error):.2f}\n"
f"median: {np.median(rel_error):.2f}\n"
f"Within 10% of target: {percent_within_10:.2f}%"
)
logging.info("Relative error over 100% for:")
for i in np.where(rel_error > 1)[0]:
# Keep this check, as Tensors won't have a .columns attribute
if hasattr(estimate_matrix, "columns"):
logging.info(f"target_name: {estimate_matrix.columns[i]}")
else:
logging.info(f"target_index: {i}")

logging.info(f"target_value: {targets_array_np[i]}")
logging.info(f"estimate_value: {estimate[i]}")
logging.info(f"has rel_error: {rel_error[i]:.2f}\n")
logging.info("---End of reweighting quick diagnostics------")
return percent_within_10
11 changes: 6 additions & 5 deletions src/microcalibrate/reweight.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,17 @@
import logging
import os
from pathlib import Path
from typing import Callable, List, Optional, Union

import numpy as np
import pandas as pd
import torch
from l0 import HardConcrete
from torch import Tensor
from tqdm import tqdm

from .utils.l0 import HardConcrete
from .utils.log_performance import log_performance_over_epochs
from .utils.metrics import loss, pct_close

logger = logging.getLogger(__name__)


def reweight(
original_weights: np.ndarray,
Expand All @@ -35,6 +32,7 @@ def reweight(
excluded_target_data: Optional[dict] = None,
csv_path: Optional[str] = None,
device: Optional[str] = None,
logger: Optional[logging.Logger] = None,
) -> tuple[np.ndarray, Union[np.ndarray, None], pd.DataFrame]:
"""Reweight the original weights based on the loss matrix and targets.

Expand All @@ -57,6 +55,7 @@ def reweight(
excluded_target_data (Optional[dict]): Optional dictionary containing excluded target data with initial estimates and targets.
csv_path (Optional[str]): Optional path to save the performance metrics as a CSV file.
device (Optional[str]): Device to run the calibration on (e.g., 'cpu' or 'cuda'). If None, uses the default device.
logger (Optional[logging.Logger]): Logger for logging progress and metrics.

Returns:
np.ndarray: Reweighted weights.
Expand Down Expand Up @@ -197,7 +196,9 @@ def dropout_weights(weights: torch.Tensor, p: float) -> torch.Tensor:
device=device,
)
gates = HardConcrete(
len(original_weights), init_mean=init_mean, temperature=temperature
len(original_weights),
init_mean=init_mean,
temperature=temperature,
).to(device)
# NOTE: Results are pretty sensitve to learning rates
# optimizer breaks down somewhere near .005, does better at above .1
Expand Down
1 change: 0 additions & 1 deletion src/microcalibrate/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
from .l0 import HardConcrete, evaluate_sparse_weights
from .log_performance import log_performance_over_epochs
from .metrics import loss, pct_close
Loading