diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b835c71..be22f89 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.11"] + python-version: ["3.13"] steps: - name: Checkout repo diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index efd2780..621338b 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -11,7 +11,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: "3.11" + python-version: "3.13" - name: Install uv uses: astral-sh/setup-uv@v5 - name: Install relevant dependencies @@ -24,7 +24,7 @@ jobs: strategy: matrix: os: [ ubuntu-latest ] - python-version: ["3.11"] + python-version: ["3.13"] fail-fast: false runs-on: ${{ matrix.os }} steps: @@ -64,7 +64,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: "3.11" + python-version: "3.13" - name: Install dependencies run: | uv pip install -e ".[dev,docs]" --system diff --git a/.github/workflows/versioning.yaml b/.github/workflows/versioning.yaml index f244e75..d2709e6 100644 --- a/.github/workflows/versioning.yaml +++ b/.github/workflows/versioning.yaml @@ -25,7 +25,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v5 with: - python-version: 3.11 + python-version: 3.13 - name: Build changelog run: pip install yaml-changelog && make changelog - name: Preview changelog update diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29..53ffe63 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,5 @@ +- bump: patch + changes: + changed: + - Moved to PolicyEngine's L0 package for regularization implementation. + - Moved to python 3.13. diff --git a/pyproject.toml b/pyproject.toml index dedcd6b..cdd4199 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,12 +7,13 @@ authors = [ { name = "Nikhil Woodruff", email = "nikhil.woodruff@outlook.com" }, { name = "María Juaristi", email = "juaristi@uni.minerva.edu" } ] -requires-python = ">=3.11" +requires-python = ">=3.13" dependencies = [ "torch>=2.7.0", "numpy", "pandas", "tqdm", + "l0-python", ] [project.optional-dependencies] @@ -53,13 +54,16 @@ include-package-data = true [tool.setuptools.package-data] "microcalibrate" = ["**/*"] +[tool.hatch.metadata] +allow-direct-references = true + [tool.isort] profile = "black" line_length = 79 [tool.black] line-length = 79 -target-version = ["py311"] +target-version = ["py313"] [project.scripts] policyengine-calibrate = "policyengine_calibrate:main" diff --git a/src/microcalibrate/calibration.py b/src/microcalibrate/calibration.py index 2258c42..a42d76a 100644 --- a/src/microcalibrate/calibration.py +++ b/src/microcalibrate/calibration.py @@ -6,8 +6,6 @@ import torch from torch import Tensor -logger = logging.getLogger(__name__) - class Calibration: def __init__( @@ -62,6 +60,8 @@ def __init__( else "mps" if torch.mps.is_available() else "cpu" ) + self.logger = logging.getLogger(__name__) + self.original_estimate_matrix = estimate_matrix self.original_targets = targets self.original_target_names = target_names @@ -117,7 +117,7 @@ def __init__( "Either estimate_function or estimate_matrix must be provided" ) elif self.excluded_targets: - logger.warning( + self.logger.warning( "You are passing an estimate function with excluded targets. " "Make sure the function handles excluded targets correctly, as reweight() will handle the filtering." ) @@ -156,6 +156,7 @@ def calibrate(self) -> None: temperature=self.temperature, sparse_learning_rate=self.sparse_learning_rate, regularize_with_l0=self.regularize_with_l0, + logger=self.logger, ) self.weights = new_weights @@ -196,10 +197,10 @@ def exclude_targets( else None ) - logger.info( + self.logger.info( f"Excluded {len(excluded_indices)} targets from calibration: {self.excluded_targets}" ) - logger.info(f"Calibrating {len(targets_array)} targets") + self.logger.info(f"Calibrating {len(targets_array)} targets") else: targets_array = self.original_targets target_names = self.original_target_names @@ -301,7 +302,7 @@ def _assess_targets( ValueError: If the targets do not match the expected format or values. ValueError: If the targets are not compatible with each other. """ - logger.info("Performing basic target assessment...") + self.logger.info("Performing basic target assessment...") if targets.ndim != 1: raise ValueError("Targets must be a 1D NumPy array.") @@ -310,7 +311,7 @@ def _assess_targets( raise ValueError("Targets contain NaN values.") if np.any(targets < 0): - logger.warning( + self.logger.warning( "Some targets are negative. This may not make sense for totals." ) @@ -340,13 +341,13 @@ def _assess_targets( ) if estimate_val == 0: - logger.warning( + self.logger.warning( f"Column {target_name} has a zero estimate sum; using ε={eps} for comparison." ) order_diff = np.log10(abs(ratio)) if ratio != 0 else np.inf if order_diff > 1: - logger.warning( + self.logger.warning( f"Target {target_name} ({target_val:.2e}) differs from initial estimate ({estimate_val:.2e}) " f"by {order_diff:.2f} orders of magnitude." ) @@ -364,7 +365,7 @@ def _assess_targets( / estimate_matrix.shape[0] ) if contribution_ratio < 0.01: - logger.warning( + self.logger.warning( f"Target {target_name} is supported by only {contribution_ratio:.2%} " f"of records in the loss matrix. This may make calibration unstable or ineffective." ) @@ -421,7 +422,7 @@ def _get_linear_loss(metrics_matrix, target_vector, sparse=False): for i in range(len(self.original_estimate_matrix.columns)) } - logger.info( + self.logger.info( "Assessing analytical solution to the optimization problem for each target... \n" "This evaluates how much each target complicates achieving calibration accuracy. The loss reported is the mean squared error of the least squares solution." ) diff --git a/src/microcalibrate/evaluation.py b/src/microcalibrate/evaluation.py index 52bc30e..e485194 100644 --- a/src/microcalibrate/evaluation.py +++ b/src/microcalibrate/evaluation.py @@ -1,8 +1,9 @@ import logging -from typing import List, Optional +from typing import List, Optional, Union import numpy as np import pandas as pd +import torch logger = logging.getLogger(__name__) @@ -57,3 +58,76 @@ def evaluate_estimate_distance_to_targets( ) return pd.DataFrame(evals) + + +def evaluate_sparse_weights( + optimised_weights: Union[torch.Tensor, np.ndarray], + estimate_matrix: Union[torch.Tensor, np.ndarray], + targets_array: Union[torch.Tensor, np.ndarray], + label: Optional[str] = "L0 Sparse Weights", +) -> float: + """ + Evaluate the performance of sparse weights against targets. + + Args: + optimised_weights (torch.Tensor or np.ndarray): The optimised weights. + estimate_matrix (torch.Tensor or pd.DataFrame): The estimate matrix. + targets_array (torch.Tensor or np.ndarray): The target values. + label (str): A label for logging purposes. + + Returns: + float: The percentage of estimates within 10% of the targets. + """ + # Convert all inputs to NumPy arrays right at the start + optimised_weights_np = ( + optimised_weights.numpy() + if hasattr(optimised_weights, "numpy") + else np.asarray(optimised_weights) + ) + estimate_matrix_np = ( + estimate_matrix.numpy() + if hasattr(estimate_matrix, "numpy") + else np.asarray(estimate_matrix) + ) + targets_array_np = ( + targets_array.numpy() + if hasattr(targets_array, "numpy") + else np.asarray(targets_array) + ) + + logging.info(f"\n\n---{label}: reweighting quick diagnostics----\n") + logging.info( + f"{np.sum(optimised_weights_np == 0)} are zero, " + f"{np.sum(optimised_weights_np != 0)} weights are nonzero" + ) + + # All subsequent calculations use the guaranteed NumPy versions + estimate = optimised_weights_np @ estimate_matrix_np + + rel_error = ( + ((estimate - targets_array_np) + 1) / (targets_array_np + 1) + ) ** 2 + within_10_percent_mask = np.abs(estimate - targets_array_np) <= ( + 0.10 * np.abs(targets_array_np) + ) + percent_within_10 = np.mean(within_10_percent_mask) * 100 + logging.info( + f"rel_error: min: {np.min(rel_error):.2f}\n" + f"max: {np.max(rel_error):.2f}\n" + f"mean: {np.mean(rel_error):.2f}\n" + f"median: {np.median(rel_error):.2f}\n" + f"Within 10% of target: {percent_within_10:.2f}%" + ) + logging.info("Relative error over 100% for:") + for i in np.where(rel_error > 1)[0]: + # Keep this check, as Tensors won't have a .columns attribute + if hasattr(estimate_matrix, "columns"): + logging.info(f"target_name: {estimate_matrix.columns[i]}") + else: + logging.info(f"target_index: {i}") + + logging.info(f"target_value: {targets_array_np[i]}") + logging.info(f"estimate_value: {estimate[i]}") + logging.info(f"has rel_error: {rel_error[i]:.2f}\n") + logging.info("---End of reweighting quick diagnostics------") + return percent_within_10 diff --git a/src/microcalibrate/reweight.py b/src/microcalibrate/reweight.py index d69c0d1..018ccba 100644 --- a/src/microcalibrate/reweight.py +++ b/src/microcalibrate/reweight.py @@ -1,20 +1,17 @@ import logging -import os from pathlib import Path from typing import Callable, List, Optional, Union import numpy as np import pandas as pd import torch +from l0 import HardConcrete from torch import Tensor from tqdm import tqdm -from .utils.l0 import HardConcrete from .utils.log_performance import log_performance_over_epochs from .utils.metrics import loss, pct_close -logger = logging.getLogger(__name__) - def reweight( original_weights: np.ndarray, @@ -35,6 +32,7 @@ def reweight( excluded_target_data: Optional[dict] = None, csv_path: Optional[str] = None, device: Optional[str] = None, + logger: Optional[logging.Logger] = None, ) -> tuple[np.ndarray, Union[np.ndarray, None], pd.DataFrame]: """Reweight the original weights based on the loss matrix and targets. @@ -57,6 +55,7 @@ def reweight( excluded_target_data (Optional[dict]): Optional dictionary containing excluded target data with initial estimates and targets. csv_path (Optional[str]): Optional path to save the performance metrics as a CSV file. device (Optional[str]): Device to run the calibration on (e.g., 'cpu' or 'cuda'). If None, uses the default device. + logger (Optional[logging.Logger]): Logger for logging progress and metrics. Returns: np.ndarray: Reweighted weights. @@ -197,7 +196,9 @@ def dropout_weights(weights: torch.Tensor, p: float) -> torch.Tensor: device=device, ) gates = HardConcrete( - len(original_weights), init_mean=init_mean, temperature=temperature + len(original_weights), + init_mean=init_mean, + temperature=temperature, ).to(device) # NOTE: Results are pretty sensitve to learning rates # optimizer breaks down somewhere near .005, does better at above .1 diff --git a/src/microcalibrate/utils/__init__.py b/src/microcalibrate/utils/__init__.py index f8877b8..0125483 100644 --- a/src/microcalibrate/utils/__init__.py +++ b/src/microcalibrate/utils/__init__.py @@ -1,3 +1,2 @@ -from .l0 import HardConcrete, evaluate_sparse_weights from .log_performance import log_performance_over_epochs from .metrics import loss, pct_close diff --git a/src/microcalibrate/utils/l0.py b/src/microcalibrate/utils/l0.py deleted file mode 100644 index a01242f..0000000 --- a/src/microcalibrate/utils/l0.py +++ /dev/null @@ -1,147 +0,0 @@ -import logging -import math -from typing import Optional, Union - -import numpy as np -import torch -import torch.nn as nn - - -class HardConcrete(nn.Module): - """HardConcrete distribution for L0 regularization.""" - - def __init__( - self, - input_dim, - output_dim=None, - temperature=0.5, - stretch=0.1, - init_mean=0.5, - ): - super().__init__() - if output_dim is None: - self.gate_size = (input_dim,) - else: - self.gate_size = (input_dim, output_dim) - self.qz_logits = nn.Parameter(torch.zeros(self.gate_size)) - self.temperature = temperature - self.stretch = stretch - self.gamma = -0.1 - self.zeta = 1.1 - self.init_mean = init_mean - self.reset_parameters() - - def reset_parameters(self) -> None: - if self.init_mean is not None: - init_val = math.log(self.init_mean / (1 - self.init_mean)) - self.qz_logits.data.fill_(init_val) - - def forward( - self, input_shape: Optional[torch.Size] = None - ) -> torch.Tensor: - if self.training: - gates = self._sample_gates() - else: - gates = self._deterministic_gates() - if input_shape is not None and len(input_shape) > len(gates.shape): - gates = gates.unsqueeze(-1).unsqueeze(-1) - return gates - - def _sample_gates(self) -> torch.Tensor: - u = torch.zeros_like(self.qz_logits).uniform_(1e-8, 1.0 - 1e-8) - s = torch.log(u) - torch.log(1 - u) + self.qz_logits - s = torch.sigmoid(s / self.temperature) - s = s * (self.zeta - self.gamma) + self.gamma - gates = torch.clamp(s, 0, 1) - return gates - - def _deterministic_gates(self) -> torch.Tensor: - probs = torch.sigmoid(self.qz_logits) - gates = probs * (self.zeta - self.gamma) + self.gamma - return torch.clamp(gates, 0, 1) - - def get_penalty(self) -> torch.Tensor: - logits_shifted = self.qz_logits - self.temperature * math.log( - -self.gamma / self.zeta - ) - prob_active = torch.sigmoid(logits_shifted) - return prob_active.sum() - - def get_active_prob(self) -> torch.Tensor: - logits_shifted = self.qz_logits - self.temperature * math.log( - -self.gamma / self.zeta - ) - return torch.sigmoid(logits_shifted) - - -def evaluate_sparse_weights( - optimised_weights: Union[torch.Tensor, np.ndarray], - estimate_matrix: Union[torch.Tensor, np.ndarray], - targets_array: Union[torch.Tensor, np.ndarray], - label: Optional[str] = "L0 Sparse Weights", -) -> float: - """ - Evaluate the performance of sparse weights against targets. - - Args: - optimised_weights (torch.Tensor or np.ndarray): The optimised weights. - estimate_matrix (torch.Tensor or pd.DataFrame): The estimate matrix. - targets_array (torch.Tensor or np.ndarray): The target values. - label (str): A label for logging purposes. - - Returns: - float: The percentage of estimates within 10% of the targets. - """ - # Convert all inputs to NumPy arrays right at the start - optimised_weights_np = ( - optimised_weights.numpy() - if hasattr(optimised_weights, "numpy") - else np.asarray(optimised_weights) - ) - estimate_matrix_np = ( - estimate_matrix.numpy() - if hasattr(estimate_matrix, "numpy") - else np.asarray(estimate_matrix) - ) - targets_array_np = ( - targets_array.numpy() - if hasattr(targets_array, "numpy") - else np.asarray(targets_array) - ) - - logging.info(f"\n\n---{label}: reweighting quick diagnostics----\n") - logging.info( - f"{np.sum(optimised_weights_np == 0)} are zero, " - f"{np.sum(optimised_weights_np != 0)} weights are nonzero" - ) - - # All subsequent calculations use the guaranteed NumPy versions - estimate = optimised_weights_np @ estimate_matrix_np - - rel_error = ( - ((estimate - targets_array_np) + 1) / (targets_array_np + 1) - ) ** 2 - within_10_percent_mask = np.abs(estimate - targets_array_np) <= ( - 0.10 * np.abs(targets_array_np) - ) - percent_within_10 = np.mean(within_10_percent_mask) * 100 - logging.info( - f"rel_error: min: {np.min(rel_error):.2f}\n" - f"max: {np.max(rel_error):.2f}\n" - f"mean: {np.mean(rel_error):.2f}\n" - f"median: {np.median(rel_error):.2f}\n" - f"Within 10% of target: {percent_within_10:.2f}%" - ) - logging.info("Relative error over 100% for:") - for i in np.where(rel_error > 1)[0]: - # Keep this check, as Tensors won't have a .columns attribute - if hasattr(estimate_matrix, "columns"): - logging.info(f"target_name: {estimate_matrix.columns[i]}") - else: - logging.info(f"target_index: {i}") - - logging.info(f"target_value: {targets_array_np[i]}") - logging.info(f"estimate_value: {estimate[i]}") - logging.info(f"has rel_error: {rel_error[i]:.2f}\n") - logging.info("---End of reweighting quick diagnostics------") - return percent_within_10 diff --git a/tests/test_regularization.py b/tests/test_regularization.py index 34bf707..d074ea8 100644 --- a/tests/test_regularization.py +++ b/tests/test_regularization.py @@ -3,7 +3,7 @@ """ from microcalibrate.calibration import Calibration -from microcalibrate.utils.l0 import evaluate_sparse_weights +from microcalibrate.evaluation import evaluate_sparse_weights import logging import numpy as np import pandas as pd