diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index e69de29..0d7336a 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -0,0 +1,4 @@
+- bump: minor
+ changes:
+ added:
+ - Add excluded_targets logic to handle holdout targets.
diff --git a/microcalibration-dashboard/src/components/CalibrationSummary.tsx b/microcalibration-dashboard/src/components/CalibrationSummary.tsx
index 24d6633..24b448c 100644
--- a/microcalibration-dashboard/src/components/CalibrationSummary.tsx
+++ b/microcalibration-dashboard/src/components/CalibrationSummary.tsx
@@ -39,6 +39,18 @@ export default function CalibrationSummary({ data }: CalibrationSummaryProps) {
targetGroups.get(point.target_name)!.push(point);
});
+ // Identify excluded targets (those with constant estimates across epochs)
+ const excludedTargets: string[] = [];
+ targetGroups.forEach((points, targetName) => {
+ if (points.length > 1) {
+ const estimates = points.map(p => p.estimate);
+ const isConstant = estimates.every(est => Math.abs(est - estimates[0]) < 1e-6);
+ if (isConstant) {
+ excludedTargets.push(targetName);
+ }
+ }
+ });
+
// Calculate metrics for each target
targetGroups.forEach((points, targetName) => {
// Sort by epoch to get initial and final states
@@ -48,7 +60,8 @@ export default function CalibrationSummary({ data }: CalibrationSummaryProps) {
if (initialPoint && finalPoint &&
initialPoint.rel_abs_error !== undefined && finalPoint.rel_abs_error !== undefined &&
- !isNaN(initialPoint.rel_abs_error) && !isNaN(finalPoint.rel_abs_error)) {
+ !isNaN(initialPoint.rel_abs_error) && !isNaN(finalPoint.rel_abs_error) &&
+ !excludedTargets.includes(targetName)) { // Exclude excluded targets from improvement calculations
const initialError = initialPoint.rel_abs_error;
const finalError = finalPoint.rel_abs_error;
@@ -175,10 +188,36 @@ export default function CalibrationSummary({ data }: CalibrationSummaryProps) {
return (
Calibration progress summary
-
+
Analysis of how calibration affected each target's accuracy from initial to final epoch
+ {/* Excluded Targets Info */}
+
+
+
+
Excluded targets
+
+ Targets excluded from calibration
+
+
+
+ {excludedTargets.length > 0 ? (
+ <>
+
{excludedTargets.length}
+
+ {excludedTargets.length <= 3
+ ? excludedTargets.join(', ')
+ : `${excludedTargets.slice(0, 3).join(', ')}, +${excludedTargets.length - 3} more`}
+
+ >
+ ) : (
+
None
+ )}
+
+
+
+
{/* Summary Statistics */}
{/* Improved Significantly */}
diff --git a/microcalibration-dashboard/src/components/ComparisonSummary.tsx b/microcalibration-dashboard/src/components/ComparisonSummary.tsx
index 8db4472..c739755 100644
--- a/microcalibration-dashboard/src/components/ComparisonSummary.tsx
+++ b/microcalibration-dashboard/src/components/ComparisonSummary.tsx
@@ -13,6 +13,7 @@ interface ComparisonSummaryProps {
interface DatasetSummary {
totalTargets: number;
uniqueTargets: Set
;
+ excludedTargets: string[];
epochs: number[];
maxEpoch: number;
minEpoch: number;
@@ -33,6 +34,28 @@ export default function ComparisonSummary({ firstData, secondData, firstName, se
const uniqueTargets = new Set(data.map(d => d.target_name));
const epochs = Array.from(new Set(data.map(d => d.epoch))).sort((a, b) => a - b);
+ // Identify excluded targets (those with constant estimates across epochs)
+ const excludedTargets: string[] = [];
+ const targetGroups = new Map();
+
+ // Group by target name
+ data.forEach(point => {
+ if (!targetGroups.has(point.target_name)) {
+ targetGroups.set(point.target_name, []);
+ }
+ targetGroups.get(point.target_name)!.push(point);
+ });
+
+ targetGroups.forEach((points, targetName) => {
+ if (points.length > 1) {
+ const estimates = points.map(p => p.estimate);
+ const isConstant = estimates.every(est => Math.abs(est - estimates[0]) < 1e-6);
+ if (isConstant) {
+ excludedTargets.push(targetName);
+ }
+ }
+ });
+
// Get final epoch data for quality assessment
const maxEpoch = Math.max(...epochs);
const minEpoch = Math.min(...epochs);
@@ -62,7 +85,8 @@ export default function ComparisonSummary({ firstData, secondData, firstName, se
if (initialTarget && finalTarget &&
initialTarget.rel_abs_error !== undefined && finalTarget.rel_abs_error !== undefined &&
- !isNaN(initialTarget.rel_abs_error) && !isNaN(finalTarget.rel_abs_error)) {
+ !isNaN(initialTarget.rel_abs_error) && !isNaN(finalTarget.rel_abs_error) &&
+ !excludedTargets.includes(targetName)) { // Exclude excluded targets from progress calculations
const improvement = initialTarget.rel_abs_error - finalTarget.rel_abs_error;
const relativeImprovement = initialTarget.rel_abs_error > 0 ? improvement / initialTarget.rel_abs_error : 0;
@@ -76,6 +100,7 @@ export default function ComparisonSummary({ firstData, secondData, firstName, se
return {
totalTargets: uniqueTargets.size,
uniqueTargets,
+ excludedTargets,
epochs,
maxEpoch,
minEpoch,
@@ -94,6 +119,13 @@ export default function ComparisonSummary({ firstData, secondData, firstName, se
const overlappingTargets = new Set([...firstSummary.uniqueTargets].filter(x => secondSummary.uniqueTargets.has(x)));
const firstOnlyTargets = new Set([...firstSummary.uniqueTargets].filter(x => !secondSummary.uniqueTargets.has(x)));
const secondOnlyTargets = new Set([...secondSummary.uniqueTargets].filter(x => !firstSummary.uniqueTargets.has(x)));
+
+ // Find excluded targets overlap
+ const firstExcluded = new Set(firstSummary.excludedTargets);
+ const secondExcluded = new Set(secondSummary.excludedTargets);
+ const overlappingExcluded = new Set([...firstExcluded].filter(x => secondExcluded.has(x)));
+ const firstOnlyExcluded = new Set([...firstExcluded].filter(x => !secondExcluded.has(x)));
+ const secondOnlyExcluded = new Set([...secondExcluded].filter(x => !firstExcluded.has(x)));
return (
@@ -122,6 +154,12 @@ export default function ComparisonSummary({ firstData, secondData, firstName, se
Avg final error:
{(firstSummary.avgFinalError * 100).toFixed(2)}%
+
+ Excluded targets:
+
+ {firstSummary.excludedTargets.length > 0 ? firstSummary.excludedTargets.length : 'None'}
+
+
{/* Quality distribution */}
@@ -181,6 +219,12 @@ export default function ComparisonSummary({ firstData, secondData, firstName, se
Avg final error:
{(secondSummary.avgFinalError * 100).toFixed(2)}%
+
+ Excluded targets:
+
+ {secondSummary.excludedTargets.length > 0 ? secondSummary.excludedTargets.length : 'None'}
+
+
{/* Quality distribution */}
@@ -277,6 +321,69 @@ export default function ComparisonSummary({ firstData, secondData, firstName, se
+
+ {/* Excluded targets analysis */}
+ {(firstExcluded.size > 0 || secondExcluded.size > 0) && (
+
+
+ Excluded targets analysis
+
+
+
+ {/* Common excluded */}
+
+
+
{overlappingExcluded.size}
+
+ {overlappingExcluded.size <= 3
+ ? [...overlappingExcluded].join(', ') || 'None'
+ : `${[...overlappingExcluded].slice(0, 3).join(', ')}, +${overlappingExcluded.size - 3} more`}
+
+
+
+ {/* First dataset excluded only */}
+
+
+
+
First excluded only
+
+
{firstOnlyExcluded.size}
+
+ {firstOnlyExcluded.size <= 3
+ ? [...firstOnlyExcluded].join(', ') || 'None'
+ : `${[...firstOnlyExcluded].slice(0, 3).join(', ')}, +${firstOnlyExcluded.size - 3} more`}
+
+
+
+ {/* Second dataset excluded only */}
+
+
+
+
Second excluded only
+
+
{secondOnlyExcluded.size}
+
+ {secondOnlyExcluded.size <= 3
+ ? [...secondOnlyExcluded].join(', ') || 'None'
+ : `${[...secondOnlyExcluded].slice(0, 3).join(', ')}, +${secondOnlyExcluded.size - 3} more`}
+
+
+
+
+ {/* Excluded targets summary */}
+
+
+ Excluded targets note: These targets were held constant during calibration and appear in logs with their initial estimates for reference.
+ {overlappingExcluded.size > 0 && ` ${overlappingExcluded.size} targets were excluded in both runs.`}
+ {firstOnlyExcluded.size > 0 && ` ${firstOnlyExcluded.size} targets were excluded only in the first run.`}
+ {secondOnlyExcluded.size > 0 && ` ${secondOnlyExcluded.size} targets were excluded only in the second run.`}
+
+
+
+ )}
);
}
\ No newline at end of file
diff --git a/src/microcalibrate/calibration.py b/src/microcalibrate/calibration.py
index 44b1da1..5ad52d5 100644
--- a/src/microcalibrate/calibration.py
+++ b/src/microcalibrate/calibration.py
@@ -1,5 +1,5 @@
import logging
-from typing import Callable, Optional
+from typing import Callable, List, Optional
import numpy as np
import pandas as pd
@@ -22,6 +22,7 @@ def __init__(
learning_rate: Optional[float] = 1e-3,
dropout_rate: Optional[float] = 0.1,
normalization_factor: Optional[torch.Tensor] = None,
+ excluded_targets: Optional[List[str]] = None,
csv_path: Optional[str] = None,
device: str = None,
):
@@ -38,12 +39,10 @@ def __init__(
learning_rate (float): Optional learning rate for the optimizer. Defaults to 1e-3.
dropout_rate (float): Optional probability of dropping weights during training. Defaults to 0.1.
normalization_factor (Optional[torch.Tensor]): Optional normalization factor for the loss (handles multi-level geographical calibration). Defaults to None.
+ excluded_targets (Optional[List]): Optional List of targets to exclude from calibration. Defaults to None.
csv_path (str): Optional path to save performance logs as CSV. Defaults to None.
+ device (str): Optional device to run the calibration on. Defaults to None, which will use CUDA if available, otherwise MPS, otherwise CPU.
"""
-
- self.estimate_function = estimate_function
- self.target_names = target_names
-
if device is not None:
self.device = torch.device(device)
else:
@@ -52,21 +51,12 @@ def __init__(
if torch.cuda.is_available()
else "mps" if torch.mps.is_available() else "cpu"
)
-
- if self.estimate_function is None:
- self.estimate_function = (
- lambda weights: weights @ self.estimate_matrix_tensor
- )
- if estimate_matrix is not None:
- self.estimate_matrix = estimate_matrix
- self.estimate_matrix_tensor = torch.tensor(
- estimate_matrix.values, dtype=torch.float32, device=self.device
- )
- self.target_names = estimate_matrix.columns.to_numpy()
- else:
- self.estimate_matrix = None
+ self.original_estimate_matrix = estimate_matrix
+ self.original_targets = targets
+ self.original_target_names = target_names
self.weights = weights
- self.targets = targets
+ self.excluded_targets = excluded_targets
+ self.estimate_function = estimate_function
self.epochs = epochs
self.noise_level = noise_level
self.learning_rate = learning_rate
@@ -75,12 +65,54 @@ def __init__(
self.csv_path = csv_path
self.performance_df = None
+ self.estimate_matrix = None
+ self.targets = None
+ self.target_names = None
+ self.excluded_target_data = {}
+
+ # Set target names from estimate_matrix if not provided
+ if target_names is None and self.original_estimate_matrix is not None:
+ self.original_target_names = (
+ self.original_estimate_matrix.columns.to_numpy()
+ )
+
+ if self.excluded_targets is not None:
+ self.exclude_targets()
+ else:
+ self.targets = self.original_targets
+ self.target_names = self.original_target_names
+ if self.original_estimate_matrix is not None:
+ self.estimate_matrix = torch.tensor(
+ self.original_estimate_matrix.values,
+ dtype=torch.float32,
+ device=self.device,
+ )
+ else:
+ self.estimate_matrix = None
+
+ if self.estimate_function is None:
+ if self.estimate_matrix is not None:
+ self.estimate_function = (
+ lambda weights: weights @ self.estimate_matrix
+ )
+ else:
+ raise ValueError(
+ "Either estimate_function or estimate_matrix must be provided"
+ )
+ elif self.excluded_targets:
+ logger.warning(
+ "You are passing an estimate function with excluded targets. "
+ "Make sure the function handles excluded targets correctly, as reweight() will handle the filtering."
+ )
+
def calibrate(self) -> None:
"""Calibrate the weights based on the estimate function and targets."""
self._assess_targets(
estimate_function=self.estimate_function,
- estimate_matrix=self.estimate_matrix,
+ estimate_matrix=getattr(
+ self, "original_estimate_matrix", self.estimate_matrix
+ ),
weights=self.weights,
targets=self.targets,
target_names=self.target_names,
@@ -98,6 +130,8 @@ def calibrate(self) -> None:
learning_rate=self.learning_rate,
dropout_rate=self.dropout_rate,
normalization_factor=self.normalization_factor,
+ excluded_targets=self.excluded_targets,
+ excluded_target_data=self.excluded_target_data,
csv_path=self.csv_path,
device=self.device,
)
@@ -106,6 +140,110 @@ def calibrate(self) -> None:
return self.performance_df
+ def exclude_targets(
+ self, excluded_targets: Optional[List[str]] = None
+ ) -> None:
+ """Exclude specified targets from calibration.
+
+ Args:
+ excluded_targets (Optional[List[str]]): List of target names to exclude from calibration. If None, the original excluded_targets passed to the calibration constructor will be excluded.
+ """
+ if excluded_targets is not None:
+ self.excluded_targets = excluded_targets
+ excluded_indices = []
+ self.excluded_target_data = {}
+ if self.excluded_targets and self.original_target_names is not None:
+ # Find indices of excluded targets
+ for i, name in enumerate(self.original_target_names):
+ if name in self.excluded_targets:
+ excluded_indices.append(i)
+ self.excluded_target_data[name] = {
+ "target": self.original_targets[i],
+ "index": i,
+ }
+
+ # Remove excluded targets from calibration
+ calibration_mask = ~np.isin(
+ np.arange(len(self.original_target_names)), excluded_indices
+ )
+ targets_array = self.original_targets[calibration_mask]
+ target_names = (
+ self.original_target_names[calibration_mask]
+ if self.original_target_names is not None
+ else None
+ )
+
+ logger.info(
+ f"Excluded {len(excluded_indices)} targets from calibration: {self.excluded_targets}"
+ )
+ logger.info(f"Calibrating {len(targets_array)} targets")
+ else:
+ targets_array = self.original_targets
+ target_names = self.original_target_names
+
+ # Get initial estimates for excluded targets if needed
+ if self.excluded_targets:
+ initial_weights_tensor = torch.tensor(
+ self.weights, dtype=torch.float32, device=self.device
+ )
+ if self.estimate_function is not None:
+ initial_estimates_all = (
+ self.estimate_function(initial_weights_tensor)
+ .detach()
+ .cpu()
+ .numpy()
+ )
+ elif self.original_estimate_matrix is not None:
+ # Get initial estimates using the original full matrix
+ original_estimate_matrix_tensor = torch.tensor(
+ self.original_estimate_matrix.values,
+ dtype=torch.float32,
+ device=self.device,
+ )
+ initial_estimates_all = (
+ (initial_weights_tensor @ original_estimate_matrix_tensor)
+ .detach()
+ .cpu()
+ .numpy()
+ )
+
+ # Filter estimate matrix for calibration
+ filtered_estimate_matrix = self.original_estimate_matrix.iloc[
+ :, calibration_mask
+ ]
+ self.estimate_matrix = torch.tensor(
+ filtered_estimate_matrix.values,
+ dtype=torch.float32,
+ device=self.device,
+ )
+ else:
+ raise ValueError(
+ "Either estimate_function or estimate_matrix must be provided"
+ )
+
+ # Store initial estimates for excluded targets
+ for name in self.excluded_targets:
+ if name in self.excluded_target_data:
+ self.excluded_target_data[name]["initial_estimate"] = (
+ initial_estimates_all[
+ self.excluded_target_data[name]["index"]
+ ]
+ )
+
+ else:
+ if self.original_estimate_matrix is not None:
+ self.estimate_matrix = torch.tensor(
+ self.original_estimate_matrix.values,
+ dtype=torch.float32,
+ device=self.device,
+ )
+ else:
+ self.estimate_matrix = None
+
+ # Set up final attributes
+ self.targets = targets_array
+ self.target_names = target_names
+
def estimate(self) -> pd.Series:
return pd.Series(
index=self.target_names,
@@ -174,25 +312,39 @@ def _assess_targets(
for i, (target_val, estimate_val, ratio) in enumerate(
zip(targets, estimates, ratios)
):
+ target_name = (
+ target_names[i] if target_names is not None else f"target_{i}"
+ )
+
if estimate_val == 0:
logger.warning(
- f"Column {target_names[i]} has a zero estimate sum; using ε={eps} for comparison."
+ f"Column {target_name} has a zero estimate sum; using ε={eps} for comparison."
)
order_diff = np.log10(abs(ratio)) if ratio != 0 else np.inf
if order_diff > 1:
logger.warning(
- f"Target {target_names[i]} ({target_val:.2e}) differs from initial estimate ({estimate_val:.2e}) "
+ f"Target {target_name} ({target_val:.2e}) differs from initial estimate ({estimate_val:.2e}) "
f"by {order_diff:.2f} orders of magnitude."
)
if estimate_matrix is not None:
- contributing_mask = estimate_matrix.iloc[:, i] != 0
- contribution_ratio = (
- contributing_mask.sum() / estimate_matrix.shape[0]
- )
+ # Check if estimate_matrix is a tensor or DataFrame
+ if hasattr(estimate_matrix, "iloc"):
+ # It's a DataFrame
+ contributing_mask = estimate_matrix.iloc[:, i] != 0
+ contribution_ratio = (
+ contributing_mask.sum() / estimate_matrix.shape[0]
+ )
+ else:
+ # It's a tensor
+ contributing_mask = estimate_matrix[:, i] != 0
+ contribution_ratio = (
+ contributing_mask.sum().item()
+ / estimate_matrix.shape[0]
+ )
if contribution_ratio < 0.01:
logger.warning(
- f"Target {target_names[i]} is supported by only {contribution_ratio:.2%} "
+ f"Target {target_name} is supported by only {contribution_ratio:.2%} "
f"of records in the loss matrix. This may make calibration unstable or ineffective."
)
diff --git a/src/microcalibrate/reweight.py b/src/microcalibrate/reweight.py
index e56ae71..1bc40f1 100644
--- a/src/microcalibrate/reweight.py
+++ b/src/microcalibrate/reweight.py
@@ -1,7 +1,7 @@
import logging
import os
from pathlib import Path
-from typing import Callable, Optional
+from typing import Callable, List, Optional
import numpy as np
import pandas as pd
@@ -25,6 +25,8 @@ def reweight(
noise_level: Optional[float] = 10.0,
learning_rate: Optional[float] = 1e-3,
normalization_factor: Optional[torch.Tensor] = None,
+ excluded_targets: Optional[List] = None,
+ excluded_target_data: Optional[dict] = None,
csv_path: Optional[str] = None,
device: Optional[str] = None,
) -> tuple[np.ndarray, np.ndarray]:
@@ -40,6 +42,8 @@ def reweight(
noise_level (float): Optional level of noise to add to the original weights.
learning_rate (float): Optional learning rate for the optimizer.
normalization_factor (Optional[torch.Tensor]): Optional normalization factor for the loss (handles multi-level geographical calibration).
+ excluded_targets (Optional[List]): Optional List of targets to exclude from calibration.
+ excluded_target_data (Optional[dict]): Optional dictionary containing excluded target data with initial estimates and targets.
csv_path (Optional[str]): Optional path to save the performance metrics as a CSV file.
device (Optional[str]): Device to run the calibration on (e.g., 'cpu' or 'cuda'). If None, uses the default device.
@@ -58,7 +62,12 @@ def reweight(
f"std: {original_weights.std():.4f}"
)
- targets = torch.tensor(targets_array, dtype=torch.float32, device=device)
+ targets = torch.tensor(
+ targets_array,
+ dtype=torch.float32,
+ device=device,
+ )
+
random_noise = np.random.random(original_weights.shape) * noise_level
weights = torch.tensor(
np.log(original_weights + random_noise),
@@ -147,7 +156,11 @@ def dropout_weights(weights: torch.Tensor, p: float) -> torch.Tensor:
}
performance_df = log_performance_over_epochs(
- tracker_dict, targets, target_names
+ tracker_dict,
+ targets,
+ target_names,
+ excluded_targets,
+ excluded_target_data,
)
if csv_path:
diff --git a/src/microcalibrate/utils/log_performance.py b/src/microcalibrate/utils/log_performance.py
index ce67dce..07351da 100644
--- a/src/microcalibrate/utils/log_performance.py
+++ b/src/microcalibrate/utils/log_performance.py
@@ -9,14 +9,18 @@ def log_performance_over_epochs(
tracked: Dict[str, List[Any]],
targets: torch.Tensor,
target_names: List[str],
+ excluded_targets: Optional[List[str]] = None,
+ excluded_target_data: Optional[Dict[str, Dict[str, Any]]] = None,
) -> pd.DataFrame:
"""
Calculate the errors and performance metrics for the model for all the logged epochs.
Args:
tracked (Dict[str, List[Any]]): Dictionary containing lists of tracked metrics.
- targets (torch.Tensor): Array of target values.
- targets_names (List[str]): Array of target names.
+ targets (torch.Tensor): Array of target values for calibration targets only.
+ target_names (List[str]): Array of target names for calibration targets only.
+ excluded_targets (Optional[List[str]]): List of target names excluded from calibration that should be present in the performance logging.
+ excluded_target_data (Optional[Dict[str, Dict[str, Any]]]): Dictionary containing excluded target data with initial estimates and targets.
Returns:
performance_df: DataFrame containing the calculated errors and performance metrics.
@@ -61,6 +65,31 @@ def log_performance_over_epochs(
}
)
+ # Add excluded targets with their initial estimates for each epoch
+ if excluded_targets and excluded_target_data:
+ for target_name in excluded_targets:
+ if target_name in excluded_target_data:
+ target_data = excluded_target_data[target_name]
+ target_val = target_data["target"]
+ est_val = target_data["initial_estimate"]
+ err = est_val - target_val
+
+ rows.append(
+ {
+ **base,
+ "target_name": target_name,
+ "target": target_val,
+ "estimate": est_val,
+ "error": err,
+ "abs_error": abs(err),
+ "rel_abs_error": (
+ abs(err) / abs(target_val)
+ if target_val != 0
+ else np.nan
+ ),
+ }
+ )
+
df = pd.DataFrame(rows)
if target_names is None:
diff --git a/tests/test_calibration.py b/tests/test_calibration.py
index 7b0719e..31fec46 100644
--- a/tests/test_calibration.py
+++ b/tests/test_calibration.py
@@ -193,3 +193,80 @@ def test_calibration_warnings_system(caplog) -> None:
assert (
"Some targets are negative" in log_text
), "Negative target warning not emitted."
+
+
+def test_calibration_excluded_targets() -> None:
+ """Test the calibration process works correctly with excluded targets."""
+
+ # Create a mock dataset with age and income
+ random_generator = np.random.default_rng(0)
+ data = pd.DataFrame(
+ {
+ "age": random_generator.integers(18, 70, size=100),
+ "income": random_generator.normal(40000, 50000, size=100),
+ }
+ )
+ weights = np.ones(len(data))
+ targets_matrix = pd.DataFrame(
+ {
+ "income_aged_20_30": (
+ (data["age"] >= 20) & (data["age"] < 30)
+ ).astype(float)
+ * data["income"],
+ "income_aged_30_40": (
+ (data["age"] >= 30) & (data["age"] < 40)
+ ).astype(float)
+ * data["income"],
+ "income_aged_40_50": (
+ (data["age"] >= 40) & (data["age"] < 50)
+ ).astype(float)
+ * data["income"],
+ "income_aged_50_60": (
+ (data["age"] >= 50) & (data["age"] < 60)
+ ).astype(float)
+ * data["income"],
+ "income_aged_60_70": (
+ (data["age"] >= 60) & (data["age"] <= 70)
+ ).astype(float)
+ * data["income"],
+ }
+ )
+ targets = np.array(
+ [
+ (targets_matrix["income_aged_20_30"] * weights).sum() * 1.2,
+ (targets_matrix["income_aged_30_40"] * weights).sum() * 1.2,
+ (targets_matrix["income_aged_40_50"] * weights).sum() * 1.2,
+ (targets_matrix["income_aged_50_60"] * weights).sum() * 1.2,
+ (targets_matrix["income_aged_60_70"] * weights).sum() * 1.2,
+ ]
+ )
+
+ excluded_targets = ["income_aged_20_30"]
+
+ calibrator = Calibration(
+ estimate_matrix=targets_matrix,
+ weights=weights,
+ targets=targets,
+ noise_level=0.05,
+ epochs=528,
+ learning_rate=0.01,
+ dropout_rate=0,
+ excluded_targets=excluded_targets,
+ )
+
+ first_performance_df = calibrator.calibrate()
+ first_calibration_estimates = calibrator.estimate()
+
+ assert len(first_calibration_estimates) == len(
+ np.array(calibrator.targets)
+ ), "Excluded target income_aged_20_30 should not be calibrated."
+
+ # iteratively exclude new targets and calibrate
+ new_target_to_exclude = ["income_aged_30_40"]
+ calibrator.exclude_targets(new_target_to_exclude)
+ second_performance_df = calibrator.calibrate()
+ second_calibration_estimates = calibrator.estimate()
+
+ assert (
+ new_target_to_exclude[0] not in calibrator.target_names
+ ), f"Target {new_target_to_exclude[0]} should be excluded from calibration."