diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29..0d7336a 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: minor + changes: + added: + - Add excluded_targets logic to handle holdout targets. diff --git a/microcalibration-dashboard/src/components/CalibrationSummary.tsx b/microcalibration-dashboard/src/components/CalibrationSummary.tsx index 24d6633..24b448c 100644 --- a/microcalibration-dashboard/src/components/CalibrationSummary.tsx +++ b/microcalibration-dashboard/src/components/CalibrationSummary.tsx @@ -39,6 +39,18 @@ export default function CalibrationSummary({ data }: CalibrationSummaryProps) { targetGroups.get(point.target_name)!.push(point); }); + // Identify excluded targets (those with constant estimates across epochs) + const excludedTargets: string[] = []; + targetGroups.forEach((points, targetName) => { + if (points.length > 1) { + const estimates = points.map(p => p.estimate); + const isConstant = estimates.every(est => Math.abs(est - estimates[0]) < 1e-6); + if (isConstant) { + excludedTargets.push(targetName); + } + } + }); + // Calculate metrics for each target targetGroups.forEach((points, targetName) => { // Sort by epoch to get initial and final states @@ -48,7 +60,8 @@ export default function CalibrationSummary({ data }: CalibrationSummaryProps) { if (initialPoint && finalPoint && initialPoint.rel_abs_error !== undefined && finalPoint.rel_abs_error !== undefined && - !isNaN(initialPoint.rel_abs_error) && !isNaN(finalPoint.rel_abs_error)) { + !isNaN(initialPoint.rel_abs_error) && !isNaN(finalPoint.rel_abs_error) && + !excludedTargets.includes(targetName)) { // Exclude excluded targets from improvement calculations const initialError = initialPoint.rel_abs_error; const finalError = finalPoint.rel_abs_error; @@ -175,10 +188,36 @@ export default function CalibrationSummary({ data }: CalibrationSummaryProps) { return (

Calibration progress summary

-

+

Analysis of how calibration affected each target's accuracy from initial to final epoch

+ {/* Excluded Targets Info */} +
+
+
+

Excluded targets

+

+ Targets excluded from calibration +

+
+
+ {excludedTargets.length > 0 ? ( + <> +
{excludedTargets.length}
+
+ {excludedTargets.length <= 3 + ? excludedTargets.join(', ') + : `${excludedTargets.slice(0, 3).join(', ')}, +${excludedTargets.length - 3} more`} +
+ + ) : ( +
None
+ )} +
+
+
+ {/* Summary Statistics */}
{/* Improved Significantly */} diff --git a/microcalibration-dashboard/src/components/ComparisonSummary.tsx b/microcalibration-dashboard/src/components/ComparisonSummary.tsx index 8db4472..c739755 100644 --- a/microcalibration-dashboard/src/components/ComparisonSummary.tsx +++ b/microcalibration-dashboard/src/components/ComparisonSummary.tsx @@ -13,6 +13,7 @@ interface ComparisonSummaryProps { interface DatasetSummary { totalTargets: number; uniqueTargets: Set; + excludedTargets: string[]; epochs: number[]; maxEpoch: number; minEpoch: number; @@ -33,6 +34,28 @@ export default function ComparisonSummary({ firstData, secondData, firstName, se const uniqueTargets = new Set(data.map(d => d.target_name)); const epochs = Array.from(new Set(data.map(d => d.epoch))).sort((a, b) => a - b); + // Identify excluded targets (those with constant estimates across epochs) + const excludedTargets: string[] = []; + const targetGroups = new Map(); + + // Group by target name + data.forEach(point => { + if (!targetGroups.has(point.target_name)) { + targetGroups.set(point.target_name, []); + } + targetGroups.get(point.target_name)!.push(point); + }); + + targetGroups.forEach((points, targetName) => { + if (points.length > 1) { + const estimates = points.map(p => p.estimate); + const isConstant = estimates.every(est => Math.abs(est - estimates[0]) < 1e-6); + if (isConstant) { + excludedTargets.push(targetName); + } + } + }); + // Get final epoch data for quality assessment const maxEpoch = Math.max(...epochs); const minEpoch = Math.min(...epochs); @@ -62,7 +85,8 @@ export default function ComparisonSummary({ firstData, secondData, firstName, se if (initialTarget && finalTarget && initialTarget.rel_abs_error !== undefined && finalTarget.rel_abs_error !== undefined && - !isNaN(initialTarget.rel_abs_error) && !isNaN(finalTarget.rel_abs_error)) { + !isNaN(initialTarget.rel_abs_error) && !isNaN(finalTarget.rel_abs_error) && + !excludedTargets.includes(targetName)) { // Exclude excluded targets from progress calculations const improvement = initialTarget.rel_abs_error - finalTarget.rel_abs_error; const relativeImprovement = initialTarget.rel_abs_error > 0 ? improvement / initialTarget.rel_abs_error : 0; @@ -76,6 +100,7 @@ export default function ComparisonSummary({ firstData, secondData, firstName, se return { totalTargets: uniqueTargets.size, uniqueTargets, + excludedTargets, epochs, maxEpoch, minEpoch, @@ -94,6 +119,13 @@ export default function ComparisonSummary({ firstData, secondData, firstName, se const overlappingTargets = new Set([...firstSummary.uniqueTargets].filter(x => secondSummary.uniqueTargets.has(x))); const firstOnlyTargets = new Set([...firstSummary.uniqueTargets].filter(x => !secondSummary.uniqueTargets.has(x))); const secondOnlyTargets = new Set([...secondSummary.uniqueTargets].filter(x => !firstSummary.uniqueTargets.has(x))); + + // Find excluded targets overlap + const firstExcluded = new Set(firstSummary.excludedTargets); + const secondExcluded = new Set(secondSummary.excludedTargets); + const overlappingExcluded = new Set([...firstExcluded].filter(x => secondExcluded.has(x))); + const firstOnlyExcluded = new Set([...firstExcluded].filter(x => !secondExcluded.has(x))); + const secondOnlyExcluded = new Set([...secondExcluded].filter(x => !firstExcluded.has(x))); return (
@@ -122,6 +154,12 @@ export default function ComparisonSummary({ firstData, secondData, firstName, se Avg final error: {(firstSummary.avgFinalError * 100).toFixed(2)}%
+
+ Excluded targets: + + {firstSummary.excludedTargets.length > 0 ? firstSummary.excludedTargets.length : 'None'} + +
{/* Quality distribution */} @@ -181,6 +219,12 @@ export default function ComparisonSummary({ firstData, secondData, firstName, se Avg final error: {(secondSummary.avgFinalError * 100).toFixed(2)}%
+
+ Excluded targets: + + {secondSummary.excludedTargets.length > 0 ? secondSummary.excludedTargets.length : 'None'} + +
{/* Quality distribution */} @@ -277,6 +321,69 @@ export default function ComparisonSummary({ firstData, secondData, firstName, se

+ + {/* Excluded targets analysis */} + {(firstExcluded.size > 0 || secondExcluded.size > 0) && ( +
+

+ Excluded targets analysis +

+ +
+ {/* Common excluded */} +
+
+
+

Both excluded

+
+
{overlappingExcluded.size}
+
+ {overlappingExcluded.size <= 3 + ? [...overlappingExcluded].join(', ') || 'None' + : `${[...overlappingExcluded].slice(0, 3).join(', ')}, +${overlappingExcluded.size - 3} more`} +
+
+ + {/* First dataset excluded only */} +
+
+
+

First excluded only

+
+
{firstOnlyExcluded.size}
+
+ {firstOnlyExcluded.size <= 3 + ? [...firstOnlyExcluded].join(', ') || 'None' + : `${[...firstOnlyExcluded].slice(0, 3).join(', ')}, +${firstOnlyExcluded.size - 3} more`} +
+
+ + {/* Second dataset excluded only */} +
+
+
+

Second excluded only

+
+
{secondOnlyExcluded.size}
+
+ {secondOnlyExcluded.size <= 3 + ? [...secondOnlyExcluded].join(', ') || 'None' + : `${[...secondOnlyExcluded].slice(0, 3).join(', ')}, +${secondOnlyExcluded.size - 3} more`} +
+
+
+ + {/* Excluded targets summary */} +
+

+ Excluded targets note: These targets were held constant during calibration and appear in logs with their initial estimates for reference. + {overlappingExcluded.size > 0 && ` ${overlappingExcluded.size} targets were excluded in both runs.`} + {firstOnlyExcluded.size > 0 && ` ${firstOnlyExcluded.size} targets were excluded only in the first run.`} + {secondOnlyExcluded.size > 0 && ` ${secondOnlyExcluded.size} targets were excluded only in the second run.`} +

+
+
+ )} ); } \ No newline at end of file diff --git a/src/microcalibrate/calibration.py b/src/microcalibrate/calibration.py index 44b1da1..5ad52d5 100644 --- a/src/microcalibrate/calibration.py +++ b/src/microcalibrate/calibration.py @@ -1,5 +1,5 @@ import logging -from typing import Callable, Optional +from typing import Callable, List, Optional import numpy as np import pandas as pd @@ -22,6 +22,7 @@ def __init__( learning_rate: Optional[float] = 1e-3, dropout_rate: Optional[float] = 0.1, normalization_factor: Optional[torch.Tensor] = None, + excluded_targets: Optional[List[str]] = None, csv_path: Optional[str] = None, device: str = None, ): @@ -38,12 +39,10 @@ def __init__( learning_rate (float): Optional learning rate for the optimizer. Defaults to 1e-3. dropout_rate (float): Optional probability of dropping weights during training. Defaults to 0.1. normalization_factor (Optional[torch.Tensor]): Optional normalization factor for the loss (handles multi-level geographical calibration). Defaults to None. + excluded_targets (Optional[List]): Optional List of targets to exclude from calibration. Defaults to None. csv_path (str): Optional path to save performance logs as CSV. Defaults to None. + device (str): Optional device to run the calibration on. Defaults to None, which will use CUDA if available, otherwise MPS, otherwise CPU. """ - - self.estimate_function = estimate_function - self.target_names = target_names - if device is not None: self.device = torch.device(device) else: @@ -52,21 +51,12 @@ def __init__( if torch.cuda.is_available() else "mps" if torch.mps.is_available() else "cpu" ) - - if self.estimate_function is None: - self.estimate_function = ( - lambda weights: weights @ self.estimate_matrix_tensor - ) - if estimate_matrix is not None: - self.estimate_matrix = estimate_matrix - self.estimate_matrix_tensor = torch.tensor( - estimate_matrix.values, dtype=torch.float32, device=self.device - ) - self.target_names = estimate_matrix.columns.to_numpy() - else: - self.estimate_matrix = None + self.original_estimate_matrix = estimate_matrix + self.original_targets = targets + self.original_target_names = target_names self.weights = weights - self.targets = targets + self.excluded_targets = excluded_targets + self.estimate_function = estimate_function self.epochs = epochs self.noise_level = noise_level self.learning_rate = learning_rate @@ -75,12 +65,54 @@ def __init__( self.csv_path = csv_path self.performance_df = None + self.estimate_matrix = None + self.targets = None + self.target_names = None + self.excluded_target_data = {} + + # Set target names from estimate_matrix if not provided + if target_names is None and self.original_estimate_matrix is not None: + self.original_target_names = ( + self.original_estimate_matrix.columns.to_numpy() + ) + + if self.excluded_targets is not None: + self.exclude_targets() + else: + self.targets = self.original_targets + self.target_names = self.original_target_names + if self.original_estimate_matrix is not None: + self.estimate_matrix = torch.tensor( + self.original_estimate_matrix.values, + dtype=torch.float32, + device=self.device, + ) + else: + self.estimate_matrix = None + + if self.estimate_function is None: + if self.estimate_matrix is not None: + self.estimate_function = ( + lambda weights: weights @ self.estimate_matrix + ) + else: + raise ValueError( + "Either estimate_function or estimate_matrix must be provided" + ) + elif self.excluded_targets: + logger.warning( + "You are passing an estimate function with excluded targets. " + "Make sure the function handles excluded targets correctly, as reweight() will handle the filtering." + ) + def calibrate(self) -> None: """Calibrate the weights based on the estimate function and targets.""" self._assess_targets( estimate_function=self.estimate_function, - estimate_matrix=self.estimate_matrix, + estimate_matrix=getattr( + self, "original_estimate_matrix", self.estimate_matrix + ), weights=self.weights, targets=self.targets, target_names=self.target_names, @@ -98,6 +130,8 @@ def calibrate(self) -> None: learning_rate=self.learning_rate, dropout_rate=self.dropout_rate, normalization_factor=self.normalization_factor, + excluded_targets=self.excluded_targets, + excluded_target_data=self.excluded_target_data, csv_path=self.csv_path, device=self.device, ) @@ -106,6 +140,110 @@ def calibrate(self) -> None: return self.performance_df + def exclude_targets( + self, excluded_targets: Optional[List[str]] = None + ) -> None: + """Exclude specified targets from calibration. + + Args: + excluded_targets (Optional[List[str]]): List of target names to exclude from calibration. If None, the original excluded_targets passed to the calibration constructor will be excluded. + """ + if excluded_targets is not None: + self.excluded_targets = excluded_targets + excluded_indices = [] + self.excluded_target_data = {} + if self.excluded_targets and self.original_target_names is not None: + # Find indices of excluded targets + for i, name in enumerate(self.original_target_names): + if name in self.excluded_targets: + excluded_indices.append(i) + self.excluded_target_data[name] = { + "target": self.original_targets[i], + "index": i, + } + + # Remove excluded targets from calibration + calibration_mask = ~np.isin( + np.arange(len(self.original_target_names)), excluded_indices + ) + targets_array = self.original_targets[calibration_mask] + target_names = ( + self.original_target_names[calibration_mask] + if self.original_target_names is not None + else None + ) + + logger.info( + f"Excluded {len(excluded_indices)} targets from calibration: {self.excluded_targets}" + ) + logger.info(f"Calibrating {len(targets_array)} targets") + else: + targets_array = self.original_targets + target_names = self.original_target_names + + # Get initial estimates for excluded targets if needed + if self.excluded_targets: + initial_weights_tensor = torch.tensor( + self.weights, dtype=torch.float32, device=self.device + ) + if self.estimate_function is not None: + initial_estimates_all = ( + self.estimate_function(initial_weights_tensor) + .detach() + .cpu() + .numpy() + ) + elif self.original_estimate_matrix is not None: + # Get initial estimates using the original full matrix + original_estimate_matrix_tensor = torch.tensor( + self.original_estimate_matrix.values, + dtype=torch.float32, + device=self.device, + ) + initial_estimates_all = ( + (initial_weights_tensor @ original_estimate_matrix_tensor) + .detach() + .cpu() + .numpy() + ) + + # Filter estimate matrix for calibration + filtered_estimate_matrix = self.original_estimate_matrix.iloc[ + :, calibration_mask + ] + self.estimate_matrix = torch.tensor( + filtered_estimate_matrix.values, + dtype=torch.float32, + device=self.device, + ) + else: + raise ValueError( + "Either estimate_function or estimate_matrix must be provided" + ) + + # Store initial estimates for excluded targets + for name in self.excluded_targets: + if name in self.excluded_target_data: + self.excluded_target_data[name]["initial_estimate"] = ( + initial_estimates_all[ + self.excluded_target_data[name]["index"] + ] + ) + + else: + if self.original_estimate_matrix is not None: + self.estimate_matrix = torch.tensor( + self.original_estimate_matrix.values, + dtype=torch.float32, + device=self.device, + ) + else: + self.estimate_matrix = None + + # Set up final attributes + self.targets = targets_array + self.target_names = target_names + def estimate(self) -> pd.Series: return pd.Series( index=self.target_names, @@ -174,25 +312,39 @@ def _assess_targets( for i, (target_val, estimate_val, ratio) in enumerate( zip(targets, estimates, ratios) ): + target_name = ( + target_names[i] if target_names is not None else f"target_{i}" + ) + if estimate_val == 0: logger.warning( - f"Column {target_names[i]} has a zero estimate sum; using ε={eps} for comparison." + f"Column {target_name} has a zero estimate sum; using ε={eps} for comparison." ) order_diff = np.log10(abs(ratio)) if ratio != 0 else np.inf if order_diff > 1: logger.warning( - f"Target {target_names[i]} ({target_val:.2e}) differs from initial estimate ({estimate_val:.2e}) " + f"Target {target_name} ({target_val:.2e}) differs from initial estimate ({estimate_val:.2e}) " f"by {order_diff:.2f} orders of magnitude." ) if estimate_matrix is not None: - contributing_mask = estimate_matrix.iloc[:, i] != 0 - contribution_ratio = ( - contributing_mask.sum() / estimate_matrix.shape[0] - ) + # Check if estimate_matrix is a tensor or DataFrame + if hasattr(estimate_matrix, "iloc"): + # It's a DataFrame + contributing_mask = estimate_matrix.iloc[:, i] != 0 + contribution_ratio = ( + contributing_mask.sum() / estimate_matrix.shape[0] + ) + else: + # It's a tensor + contributing_mask = estimate_matrix[:, i] != 0 + contribution_ratio = ( + contributing_mask.sum().item() + / estimate_matrix.shape[0] + ) if contribution_ratio < 0.01: logger.warning( - f"Target {target_names[i]} is supported by only {contribution_ratio:.2%} " + f"Target {target_name} is supported by only {contribution_ratio:.2%} " f"of records in the loss matrix. This may make calibration unstable or ineffective." ) diff --git a/src/microcalibrate/reweight.py b/src/microcalibrate/reweight.py index e56ae71..1bc40f1 100644 --- a/src/microcalibrate/reweight.py +++ b/src/microcalibrate/reweight.py @@ -1,7 +1,7 @@ import logging import os from pathlib import Path -from typing import Callable, Optional +from typing import Callable, List, Optional import numpy as np import pandas as pd @@ -25,6 +25,8 @@ def reweight( noise_level: Optional[float] = 10.0, learning_rate: Optional[float] = 1e-3, normalization_factor: Optional[torch.Tensor] = None, + excluded_targets: Optional[List] = None, + excluded_target_data: Optional[dict] = None, csv_path: Optional[str] = None, device: Optional[str] = None, ) -> tuple[np.ndarray, np.ndarray]: @@ -40,6 +42,8 @@ def reweight( noise_level (float): Optional level of noise to add to the original weights. learning_rate (float): Optional learning rate for the optimizer. normalization_factor (Optional[torch.Tensor]): Optional normalization factor for the loss (handles multi-level geographical calibration). + excluded_targets (Optional[List]): Optional List of targets to exclude from calibration. + excluded_target_data (Optional[dict]): Optional dictionary containing excluded target data with initial estimates and targets. csv_path (Optional[str]): Optional path to save the performance metrics as a CSV file. device (Optional[str]): Device to run the calibration on (e.g., 'cpu' or 'cuda'). If None, uses the default device. @@ -58,7 +62,12 @@ def reweight( f"std: {original_weights.std():.4f}" ) - targets = torch.tensor(targets_array, dtype=torch.float32, device=device) + targets = torch.tensor( + targets_array, + dtype=torch.float32, + device=device, + ) + random_noise = np.random.random(original_weights.shape) * noise_level weights = torch.tensor( np.log(original_weights + random_noise), @@ -147,7 +156,11 @@ def dropout_weights(weights: torch.Tensor, p: float) -> torch.Tensor: } performance_df = log_performance_over_epochs( - tracker_dict, targets, target_names + tracker_dict, + targets, + target_names, + excluded_targets, + excluded_target_data, ) if csv_path: diff --git a/src/microcalibrate/utils/log_performance.py b/src/microcalibrate/utils/log_performance.py index ce67dce..07351da 100644 --- a/src/microcalibrate/utils/log_performance.py +++ b/src/microcalibrate/utils/log_performance.py @@ -9,14 +9,18 @@ def log_performance_over_epochs( tracked: Dict[str, List[Any]], targets: torch.Tensor, target_names: List[str], + excluded_targets: Optional[List[str]] = None, + excluded_target_data: Optional[Dict[str, Dict[str, Any]]] = None, ) -> pd.DataFrame: """ Calculate the errors and performance metrics for the model for all the logged epochs. Args: tracked (Dict[str, List[Any]]): Dictionary containing lists of tracked metrics. - targets (torch.Tensor): Array of target values. - targets_names (List[str]): Array of target names. + targets (torch.Tensor): Array of target values for calibration targets only. + target_names (List[str]): Array of target names for calibration targets only. + excluded_targets (Optional[List[str]]): List of target names excluded from calibration that should be present in the performance logging. + excluded_target_data (Optional[Dict[str, Dict[str, Any]]]): Dictionary containing excluded target data with initial estimates and targets. Returns: performance_df: DataFrame containing the calculated errors and performance metrics. @@ -61,6 +65,31 @@ def log_performance_over_epochs( } ) + # Add excluded targets with their initial estimates for each epoch + if excluded_targets and excluded_target_data: + for target_name in excluded_targets: + if target_name in excluded_target_data: + target_data = excluded_target_data[target_name] + target_val = target_data["target"] + est_val = target_data["initial_estimate"] + err = est_val - target_val + + rows.append( + { + **base, + "target_name": target_name, + "target": target_val, + "estimate": est_val, + "error": err, + "abs_error": abs(err), + "rel_abs_error": ( + abs(err) / abs(target_val) + if target_val != 0 + else np.nan + ), + } + ) + df = pd.DataFrame(rows) if target_names is None: diff --git a/tests/test_calibration.py b/tests/test_calibration.py index 7b0719e..31fec46 100644 --- a/tests/test_calibration.py +++ b/tests/test_calibration.py @@ -193,3 +193,80 @@ def test_calibration_warnings_system(caplog) -> None: assert ( "Some targets are negative" in log_text ), "Negative target warning not emitted." + + +def test_calibration_excluded_targets() -> None: + """Test the calibration process works correctly with excluded targets.""" + + # Create a mock dataset with age and income + random_generator = np.random.default_rng(0) + data = pd.DataFrame( + { + "age": random_generator.integers(18, 70, size=100), + "income": random_generator.normal(40000, 50000, size=100), + } + ) + weights = np.ones(len(data)) + targets_matrix = pd.DataFrame( + { + "income_aged_20_30": ( + (data["age"] >= 20) & (data["age"] < 30) + ).astype(float) + * data["income"], + "income_aged_30_40": ( + (data["age"] >= 30) & (data["age"] < 40) + ).astype(float) + * data["income"], + "income_aged_40_50": ( + (data["age"] >= 40) & (data["age"] < 50) + ).astype(float) + * data["income"], + "income_aged_50_60": ( + (data["age"] >= 50) & (data["age"] < 60) + ).astype(float) + * data["income"], + "income_aged_60_70": ( + (data["age"] >= 60) & (data["age"] <= 70) + ).astype(float) + * data["income"], + } + ) + targets = np.array( + [ + (targets_matrix["income_aged_20_30"] * weights).sum() * 1.2, + (targets_matrix["income_aged_30_40"] * weights).sum() * 1.2, + (targets_matrix["income_aged_40_50"] * weights).sum() * 1.2, + (targets_matrix["income_aged_50_60"] * weights).sum() * 1.2, + (targets_matrix["income_aged_60_70"] * weights).sum() * 1.2, + ] + ) + + excluded_targets = ["income_aged_20_30"] + + calibrator = Calibration( + estimate_matrix=targets_matrix, + weights=weights, + targets=targets, + noise_level=0.05, + epochs=528, + learning_rate=0.01, + dropout_rate=0, + excluded_targets=excluded_targets, + ) + + first_performance_df = calibrator.calibrate() + first_calibration_estimates = calibrator.estimate() + + assert len(first_calibration_estimates) == len( + np.array(calibrator.targets) + ), "Excluded target income_aged_20_30 should not be calibrated." + + # iteratively exclude new targets and calibrate + new_target_to_exclude = ["income_aged_30_40"] + calibrator.exclude_targets(new_target_to_exclude) + second_performance_df = calibrator.calibrate() + second_calibration_estimates = calibrator.estimate() + + assert ( + new_target_to_exclude[0] not in calibrator.target_names + ), f"Target {new_target_to_exclude[0]} should be excluded from calibration."