From 4f01de8ab0d49a78e6292f29aab2a741f613c2f1 Mon Sep 17 00:00:00 2001 From: Luisa Date: Fri, 27 Feb 2026 10:38:03 -0700 Subject: [PATCH 1/7] tests --- .../ultra/unit/test_ultra_l1b_culling.py | 193 +++++++++++-- imap_processing/ultra/constants.py | 4 + imap_processing/ultra/l1b/extendedspin.py | 41 ++- .../ultra/l1b/ultra_l1b_culling.py | 271 +++++++++++++++--- 4 files changed, 437 insertions(+), 72 deletions(-) diff --git a/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py b/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py index 2fbba6c56..cfa9da551 100644 --- a/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py +++ b/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py @@ -27,6 +27,7 @@ flag_low_voltage, flag_rates, flag_scattering, + flag_statistical_outliers, get_binned_energy_ranges, get_binned_spins_edges, get_de_rejection_mask, @@ -34,6 +35,7 @@ get_energy_histogram, get_energy_range_flags, get_n_sigma, + get_poisson_stats, get_pulses_per_spin, get_spin_data, get_valid_earth_angle_events, @@ -644,10 +646,10 @@ def test_flag_high_energy(): energy_range_edges = np.array([3, 5, 7, 18, 25]) # Example energy bin edges # Spin bin 1 (events 0-3) 4 events fall within the culling energy bin # - This is above all the energy thresholds except the second one (10), - # so it should be flagged for all energy ranges except flag #2 + # so it should be flagged for all energy ranges except flag the 2nd # Spin bin 2 (events 4-7) only 1 event falls within the culling energy bin # - This is above the lowest energy threshold (1) but below the rest, so - # it should only be flagged with the last energy range flag (#3) + # it should only be flagged at the last energy range # Spin bin 3 (events 8-11) No events fall within the culling energy bin, # so it should not be flagged for any energy range energy = np.array([17, 16, 12, 15, 5, 8, 4, 6, 4, 1, 22, 20]) @@ -666,26 +668,31 @@ def test_flag_high_energy(): spin_tbin_edges = np.arange( start=0, stop=len(energy) + 1, step=4 ) # create spin bins of 4 seconds - energy_range_flags = get_energy_range_flags(energy_range_edges) quality_flags = flag_high_energy( de_dataset, spin_tbin_edges, energy_range_edges, - energy_range_flags, + None, cull_thresholds, 90, ) # check shape - assert len(quality_flags) == len(spin_tbin_edges) - 1 + np.testing.assert_array_equal( + quality_flags.shape, (len(energy_range_edges) - 1, len(spin_tbin_edges) - 1) + ) # Assert that the first spin bin is flagged for high energy for all energy ranges # except the second one - assert quality_flags[0] == (2**0 | 2**2 | 2**3) - # Assert that the second spin bin is only flagged for high energy for the last - # energy range - assert quality_flags[1] == 2**3 - # Assert that the third spin bin is not flagged for any energy range - assert quality_flags[2] == 0 + quality_flags[0, 0] = True + quality_flags[1, 0] = False + quality_flags[2, 0] = True + quality_flags[3, 0] = True + # # Assert that the second spin bin is only flagged for high energy for the last + # # energy range + quality_flags[3, 1] = True + assert not np.any(quality_flags[0:3, 1]) + # # Assert that the third spin bin is not flagged for any energy range + assert not np.any(quality_flags[:, 2]) @pytest.mark.external_test_data @@ -720,23 +727,155 @@ def test_validate_high_energy_cull(): intervals, _, _ = build_energy_bins() # Get the energy ranges energy_ranges = get_binned_energy_ranges(intervals) - flags = get_energy_range_flags(energy_ranges) e_flags = flag_high_energy( - de_ds, spin_tbin_edges, energy_ranges, flags, mock_thresholds - ) - # The ULTRA IT flags are shaped n_energy_ranges, spin_bin while the SDC - # is only spin_bin but different flags are set for different energy ranges, so we - # need to check each energy separately - # We also need to invert the expected flags since the ULTRA IT mask is True - # for good spins (counts below threshold) while the SDC quality flags are set - # for bad spins (counts exceed threshold). - for i in range(expected_qf.shape[0]): - np.testing.assert_array_equal( - (e_flags & 2**i) > 0, - ~expected_qf[i, :].astype(bool), - err_msg=f"High energy flag mismatch for energy range {i} with edges" - f" {energy_ranges[i]}-{energy_ranges[i + 1]}", - ) + de_ds, spin_tbin_edges, energy_ranges, None, mock_thresholds + ) + np.testing.assert_array_equal(e_flags, ~expected_qf.astype(bool)) + + +def test_flag_statistical_outliers(): + """Tests flag_statistical_outliers function.""" + energy_range_edges = np.array([3, 5, 7, 18, 25]) # Example energy bin edges + e_bin_3_value = 23 + n_spin_bins = 12 + spin_step = 7 + energy = np.full(spin_step * n_spin_bins, 0) + # Make sure there are at least 3 other bins with counts in the 3rd energy bin. + # This is to ensure that we have enough events in that energy bin to calculate + # the statistics. + energy[spin_step] = e_bin_3_value + energy[spin_step * 2] = e_bin_3_value + energy[spin_step * 3] = e_bin_3_value + # Make the last spin bin have higher counts. It should get flagged as an outlier. + energy[-spin_step:] = e_bin_3_value + + de_dataset = xr.Dataset( + { + "de_event_met": ("epoch", np.arange(len(energy))), + "energy_spacecraft": ("epoch", energy), + "quality_outliers": ("epoch", np.full(len(energy), 0)), + "quality_scattering": ("epoch", np.full(len(energy), 0)), + "ebin": ("epoch", np.full(len(energy), 10)), + } + ) + spin_tbin_edges = np.arange( + start=0, stop=len(energy) + 1, step=spin_step + ) # create spin bins of 4 seconds + quality_flags, convergence, iterations, std_diff = flag_statistical_outliers( + de_dataset, + spin_tbin_edges, + energy_range_edges, + np.zeros((len(energy_range_edges) - 1, len(spin_tbin_edges) - 1), dtype=bool), + combine_flags_across_energy_bins=True, + ) + + # check shape + np.testing.assert_array_equal( + quality_flags.shape, (len(energy_range_edges) - 1, len(spin_tbin_edges) - 1) + ) + # check that none of the flags are set except for the last spin bin + # since combine_flags_across_energy_bins is True, the entire last spin bin should + # be flagged even though only one energy bin had high counts + expected_flags = np.zeros( + (len(energy_range_edges) - 1, len(spin_tbin_edges) - 1), dtype=bool + ) + expected_flags[:, -1] = True + np.testing.assert_array_equal(quality_flags, expected_flags) + # all energy bins should have converged + # The first 2 didn't have enough events to calculate statistics, but they should + # still be marked as converged + assert np.all(convergence) + # Only the last energy bin should have had any iterations since it's the only one + # with enough events. + assert np.nonzero(iterations) == np.array([3]) + # Only the last energy bin should have a valid std_diff since it's the only one + # with enough events to calculate statistics. + assert np.nonzero(std_diff != -1) == np.array([3]) + + +def test_flag_statistical_outliers_invalid_events(): + """Tests flag_statistical_outliers function when there are no valid events.""" + energy_range_edges = np.array([3, 5, 7, 18, 25]) # Example energy bin edges + energy = np.arange(25) + de_dataset = xr.Dataset( + { + "de_event_met": ("epoch", np.arange(len(energy))), + "energy_spacecraft": ("epoch", energy), + "quality_outliers": ("epoch", np.full(len(energy), 0)), + "quality_scattering": ("epoch", np.full(len(energy), 0)), + "ebin": ("epoch", np.full(len(energy), 10)), + } + ) + spin_tbin_edges = np.arange( + start=0, stop=len(energy) + 1, step=5 + ) # create spin bins of 4 seconds + mask = np.ones((len(energy_range_edges) - 1, len(spin_tbin_edges) - 1), dtype=bool) + quality_flags, convergence, iterations, std_diff = flag_statistical_outliers( + de_dataset, + spin_tbin_edges, + energy_range_edges, + mask, + ) + # check that all flags are false since there are no valid events to flag as + np.testing.assert_array_equal( + quality_flags, np.zeros_like(quality_flags, dtype=bool) + ) + # check that all energy bins are marked as converged (no valid events is not a + # failure case for convergence since we just can't calculate statistics, but we + # still want to mark it as converged so that it doesn't get flagged as a failure + # in the logs) + assert np.all(convergence) + # check that there were no iterations + assert np.sum(iterations) == 0 + # Check that std_diff is all invalid (-1) + assert np.all(std_diff == -1) + + +@pytest.mark.external_test_data +def test_validate_stat_cull(): + """Validate that flag_statistical_outliers are correctly flagged""" + # read test data from csv files + # xspin = pd.read_csv(TEST_PATH / "extendedspin_test_data_repoint00047.csv") + # expected_qf = pd.read_csv( + # TEST_PATH / "validate_high_energy_culling_results_repoint00047.csv" + # ).to_numpy() + # de_df = pd.read_csv(TEST_PATH / "de_test_data_repoint00047.csv") + # de_ds = xr.Dataset( + # { + # "de_event_met": ("epoch", de_df.event_times.values), + # "energy_spacecraft": ("epoch", de_df.energy_spacecraft.values), + # "quality_outliers": ("epoch", de_df.quality_outliers.values), + # "quality_scattering": ("epoch", de_df.quality_scattering.values), + # "ebin": ("epoch", de_df.ebin.values), + # } + # ) + # # Use constants from the code to ensure consistency with the actual culling code + # spin_bin_size = UltraConstants.SPIN_BIN_SIZE + # spin_tbin_edges = get_binned_spins_edges( + # xspin.spin_number.values, + # xspin.spin_period.values, + # xspin.spin_start_time.values, + # spin_bin_size, + # ) + # intervals, _, _ = build_energy_bins() + # # Get the energy ranges + # energy_ranges = get_binned_energy_ranges(intervals) + pass + + +def test_get_poisson_stats(): + """Tests get_poisson_stats function.""" + counts = np.full(20, 0) + counts[-1] = 100 # Make the last bin have high counts + std_diff, outlier_mask = get_poisson_stats(counts) + # std_diff should be counts/sqrt(counts) = sqrt(counts) + assert ( + std_diff == np.std(counts) / np.sqrt(5) - 1 + ) # std_diff should be counts/sqrt(counts) = sqrt(counts) + assert ( + np.sum(outlier_mask) == 1 + ) # Only the last bin should be flagged as an outlier + assert outlier_mask[-1] def test_get_energy_range_flags(): diff --git a/imap_processing/ultra/constants.py b/imap_processing/ultra/constants.py index ce759f859..bb6c52b79 100644 --- a/imap_processing/ultra/constants.py +++ b/imap_processing/ultra/constants.py @@ -204,3 +204,7 @@ class UltraConstants: ) # Use the channel defined below to determine which spins are contaminated HIGH_ENERGY_CULL_CHANNEL = 4 + # Number of iterations to perform for statistical outlier culling. + STAT_CULLING_N_ITER = 5 + # Sigma threshold to use for statistical outlier culling. + STAT_CULLING_STD_THRESHOLD = 0.05 diff --git a/imap_processing/ultra/l1b/extendedspin.py b/imap_processing/ultra/l1b/extendedspin.py index a68f0fb6a..1da3a1572 100644 --- a/imap_processing/ultra/l1b/extendedspin.py +++ b/imap_processing/ultra/l1b/extendedspin.py @@ -4,7 +4,6 @@ import xarray as xr from numpy.typing import NDArray -from imap_processing.quality_flags import ImapRatesUltraFlags from imap_processing.ultra.constants import UltraConstants from imap_processing.ultra.l1b.ultra_l1b_culling import ( count_rejected_events_per_spin, @@ -15,6 +14,7 @@ flag_imap_instruments, flag_low_voltage, flag_rates, + flag_statistical_outliers, get_binned_energy_ranges, get_binned_spins_edges, get_energy_histogram, @@ -75,27 +75,32 @@ def calculate_extendedspin( spin, spin_period, spin_starttime, spin_bin_size ) voltage_qf = flag_low_voltage(spin_tbin_edges, status_dataset) - voltage_qf = expand_bin_flags_to_spins(len(spin), voltage_qf, spin_bin_size) # Get energy bins used at l1c intervals, _, _ = build_energy_bins() # Get the energy ranges energy_ranges = get_binned_energy_ranges(intervals) energy_bin_flags = get_energy_range_flags(energy_ranges) - # Calculate the high energy quality flags using the de dataset with low voltage - # events removed. Use the same spin and energy bins that - # were used for low voltage flags to maintain consistency in the flags. - valid_voltage_spins = spin[np.where(voltage_qf == 0)] - valid_de_spins = np.isin(de_dataset["spin"].values, valid_voltage_spins) - de_dataset_filtered = de_dataset.isel(epoch=valid_de_spins) + # Calculate the high energy quality flags energy_thresholds = UltraConstants.HIGH_ENERGY_CULL_THRESHOLDS high_energy_qf = flag_high_energy( - de_dataset_filtered, + de_dataset, spin_tbin_edges, energy_ranges, - energy_bin_flags, + voltage_qf, energy_thresholds, instrument_id, ) + # Combine high energy and voltage flags to use for statistical outlier flagging. + mask = ( + voltage_qf[np.newaxis, :] & high_energy_qf + ) # Shape (n_energy_bins, n_spins_bins) + stat_outliers_qf, _, _, _ = flag_statistical_outliers( + de_dataset, + spin_tbin_edges, + energy_ranges, + mask, + instrument_id, + ) # Get the number of pulses per spin. pulses = get_pulses_per_spin(aux_dataset, rates_dataset) @@ -133,7 +138,19 @@ def calculate_extendedspin( coin_per_spin[valid] = pulses.coin_per_spin[idx[valid]] # Expand binned quality flags to individual spins. + # high energy and statistical outlier flags are energy dependent + # Collapse them into a single flag array + high_energy_qf = np.bitwise_or.reduce( + high_energy_qf * energy_bin_flags[:, np.newaxis], axis=1 + ) + stat_outliers_qf = np.bitwise_or.reduce( + stat_outliers_qf * energy_bin_flags[:, np.newaxis], axis=1 + ) high_energy_qf = expand_bin_flags_to_spins(len(spin), high_energy_qf, spin_bin_size) + voltage_qf = expand_bin_flags_to_spins(len(spin), voltage_qf, spin_bin_size) + stat_outliers_qf = expand_bin_flags_to_spins( + len(spin), stat_outliers_qf, spin_bin_size + ) # account for rates spins which are not in the direct event spins extendedspin_dict["start_pulses_per_spin"] = start_per_spin extendedspin_dict["stop_pulses_per_spin"] = stop_per_spin @@ -146,9 +163,7 @@ def calculate_extendedspin( extendedspin_dict["quality_low_voltage"] = voltage_qf # shape (nspin,) # TODO calculate flags for high energy (SEPS) and statistics culling # Initialize these flags to NONE for now. - extendedspin_dict["quality_statistics"] = np.full_like( - voltage_qf, ImapRatesUltraFlags.NONE.value, np.uint16 - ) # shape (nspin,) + extendedspin_dict["quality_statistics"] = stat_outliers_qf # shape (nspin,) extendedspin_dict["quality_high_energy"] = high_energy_qf # shape (nspin,) # Add an array of flags for each energy bin. Shape: (n_energy_bins) extendedspin_dict["energy_range_flags"] = energy_bin_flags diff --git a/imap_processing/ultra/l1b/ultra_l1b_culling.py b/imap_processing/ultra/l1b/ultra_l1b_culling.py index 946364923..9130e5efe 100644 --- a/imap_processing/ultra/l1b/ultra_l1b_culling.py +++ b/imap_processing/ultra/l1b/ultra_l1b_culling.py @@ -543,7 +543,7 @@ def get_energy_and_spin_dependent_rejection_mask( goodtimes_dataset : xr.Dataset Dataset containing valid spins and energy bin flags. energy : np.ndarray - The particle energy. + The particle energy at each direct event. spin_number : np.ndarray Spin number at each direct event. @@ -677,6 +677,8 @@ def flag_low_voltage( # For each low voltage ind, flag the corresponding flag quality_flags[lv_spin_inds] = low_voltage_flag + # TODO add log summary. + return quality_flags @@ -684,7 +686,7 @@ def flag_high_energy( de_dataset: xr.Dataset, spin_tbin_edges: NDArray, energy_ranges: NDArray, - energy_range_flags: np.ndarray, + mask: NDArray = None, energy_thresholds: np.ndarray = UltraConstants.HIGH_ENERGY_CULL_THRESHOLDS, sensor_id: int = 90, ) -> NDArray: @@ -699,8 +701,9 @@ def flag_high_energy( Edges of the spin time bins. energy_ranges : numpy.ndarray Array of energy range edges. - energy_range_flags : numpy.ndarray - Array of quality flag values corresponding to each energy range. + mask : numpy.ndarray, optional + Mask indicating which events to consider for high energy flagging + (e.g., after low voltage culling). energy_thresholds : numpy.ndarray Array of count thresholds for flagging high energy events corresponding to each energy range. @@ -710,48 +713,252 @@ def flag_high_energy( Returns ------- quality_flags : numpy.ndarray - Quality flags. + Quality flags shaped (n_energy_bins, n_spin_bins). """ + # expand energy thresholds to have shape (n_energy_bins, 1) for comparison with + # the counts per spin + energy_thresholds = energy_thresholds[:, np.newaxis] # Shape (n_energy_bins, 1) cull_channel = UltraConstants.HIGH_ENERGY_CULL_CHANNEL - valid_events_per_energy = get_valid_events_per_energy_range( - de_dataset, energy_ranges, UltraConstants.EARTH_ANGLE_45_THRESHOLD, sensor_id - ) - # check to make sure the number of energy ranges matches the number of energy range - # flags - num_e_ranges = valid_events_per_energy.shape[0] - if num_e_ranges != len(energy_range_flags) or num_e_ranges != len( - energy_thresholds - ): - raise ValueError( - f"Number of energy ranges ({num_e_ranges}) does not match number of energy" - f" range flags ({len(energy_range_flags)}) or expected number of " - f"energy range thresholds ({len(energy_thresholds)})." - ) - if cull_channel >= num_e_ranges: + n_energy_bins = len(energy_thresholds) + if cull_channel >= n_energy_bins: raise ValueError( f"HIGH_ENERGY_CULL_CHANNEL ({cull_channel}) is out of bounds" - f" for {num_e_ranges} energy ranges." + f" for {n_energy_bins} energy ranges." ) # Initialize all spin bins to have no high energy flag spin_bin_size = len(spin_tbin_edges) - 1 - quality_flags = np.full( - spin_bin_size, ImapRatesUltraFlags.NONE.value, dtype=np.uint16 - ) + quality_flags = np.zeros((n_energy_bins, spin_bin_size), dtype=bool) # Get valid events and counts at each spin bin for the # designated culling channel. - cull_channel_events = valid_events_per_energy[cull_channel] - # get each valid event count per spin bin for the culling channel - cull_channel_counts = np.histogram( - de_dataset["de_event_met"].values[cull_channel_events], spin_tbin_edges - )[0] - # loop through each energy range - for flag, e_threshold in zip(energy_range_flags, energy_thresholds, strict=False): - quality_flags[cull_channel_counts >= e_threshold] |= flag + de_counts = get_valid_de_count_summary( + de_dataset, energy_ranges, spin_tbin_edges, sensor_id + ) + cull_channel_counts = de_counts[cull_channel] + # flag spins where the counts in the cull channel exceed the threshold for that + # energy range + flagged = ( + cull_channel_counts[np.newaxis, :] >= energy_thresholds + ) # (n_energy_bins, n_spin_bins) + + if mask is not None: + quality_flags[:, mask] = flagged[:, mask] + else: + quality_flags = flagged + # TODO add log summary. E.g Tim's hi goodtimes code return quality_flags +def flag_statistical_outliers( + de_dataset: xr.Dataset, + spin_tbin_edges: NDArray, + energy_ranges: NDArray, + mask: NDArray, + sensor_id: int = 90, + n_iterations: int = UltraConstants.STAT_CULLING_N_ITER, + std_threshold: float = UltraConstants.STAT_CULLING_STD_THRESHOLD, + combine_flags_across_energy_bins: bool = True, +) -> tuple[NDArray, NDArray, NDArray, NDArray]: + """ + Flag statistical outlier events based on count rates per spin. + + After low voltage and high energy spins have been flagged, there still appears to + be some time dependency in the signal. This algorithm identifies those outliers. + + Stat culling steps: + 1. Compute the count rate per spin bin and energy range. + 2. Flag spins and energy ranges where there are less than 3 bins with counts + 3. Check how close the data is to poisson stats: + - For a perfect poisson distribution, the standard deviation should be the + square root of the mean. So we calculate the difference between the + observed standard deviation and the expected standard deviation (sqrt of mean) + to see how well the data follows a Poisson distribution. + 4. Flag bins where the count is more than 3 standard deviations from the mean. This + will be a different quality flag array as step #3 + 5. If combine_flags_across_energy_bins is True, then if a spin bin is flagged in + any energy channel, flag it in all. Recalculate the convergence after combining + flags across energy bins. + + Parameters + ---------- + de_dataset : xr.Dataset + Direct event dataset. + spin_tbin_edges : numpy.ndarray + Edges of the spin time bins. + energy_ranges : numpy.ndarray + Array of energy range edges. + mask : numpy.ndarray + Mask indicating which events to consider for statistical outlier flagging. + This should be a 2d boolean array of shape (n_energy_bins, n_spin_bins) where + True indicates the spin bins to consider for outlier flagging for each energy + bin (e.g., after low voltage and high energy culling). + sensor_id : int + Sensor ID (e.g., 45 or 90). + n_iterations : int + Maximum number of iterations to perform for outlier flagging. + std_threshold : float + Threshold for standard deviation difference from Poisson stats to determine. + combine_flags_across_energy_bins : bool + Whether to link energy channels such that if a spin bin is flagged in any energy + channel, it is flagged in all energy channels. + + Returns + ------- + quality_stats : numpy.ndarray + Quality flags for statistical outliers, shaped (n_energy_bins, n_spin_bins). + convergence : numpy.ndarray + Boolean array of shape (n_energy_bins,) indicating whether the outlier flagging + converged for each energy bin. + iterations : numpy.ndarray + Array of shape (n_energy_bins,) indicating how many iterations were performed + for each energy bin. + std_diff : numpy.ndarray + Array of shape (n_energy_bins,) containing the final standard deviation + difference from Poisson stats for each energy bin. + """ + # Initialize all spin bins to have no high energy flag + spin_bin_size = len(spin_tbin_edges) - 1 + n_energy_bins = len(energy_ranges) - 1 + # make a copy of the mask to avoid modifying the original mask passed in + iter_mask = mask.copy() + quality_stats = np.zeros((n_energy_bins, spin_bin_size), dtype=bool) + # Initialize convergence array to keep track of poisson stats + convergence = np.full(n_energy_bins, False) + # Keep track of how many iterations we have done of flagging outliers and + # recalculating stats per energy bin + iterations = np.zeros(n_energy_bins) + # keep track of the standard deviation difference from poisson stats per energy bin + std_diff = np.zeros(n_energy_bins, dtype=float) + count_summary = get_valid_de_count_summary( + de_dataset, energy_ranges, spin_tbin_edges, sensor_id + ) # shape (n_energy_bins, n_spin_bins) + for e_idx in np.arange(n_energy_bins): # shape: (n_energy_bins,) + for it in range(n_iterations): + # only consider bins that are currently unflagged for this energy bin + counts = count_summary[e_idx, ~iter_mask[e_idx]] + # Step 1. check if any energy bins have less than 3 spin bins with counts. + # If so, flag all spins for that energy bin and skip to the next iteration + if np.sum(counts > 0) < 3: + quality_stats[e_idx] = False + convergence[e_idx] = True + std_diff[e_idx] = -1 + break + # Step 2. Check how close the data is to poisson stats + std_ratio, outlier_mask = get_poisson_stats(counts) + std_diff[e_idx] = std_ratio + # Step 3. Flag bins where the count is more than 3 standard deviations from + # the mean. + outlier_inds = np.where(~iter_mask[e_idx])[0][outlier_mask] + # Set the quality flag to True for the outlier inds + quality_stats[e_idx, outlier_inds] = True + # Also update the iter_mask to exclude the outlier bins for the next + # iteration + iter_mask[e_idx, outlier_inds] = True + iterations[e_idx] = it + 1 + # Check for convergence: if the standard deviation difference from + # poisson stats is below the threshold, then we can stop iterating for this + # energy bin + if std_ratio < std_threshold: + convergence[e_idx] = True + break + + if combine_flags_across_energy_bins: + # If a spin bin is flagged in any energy channel flag it in all energy channels + # Use np.any to check if a spin bin is flagged in any energy channel, + # then flag it in all energy channels + combined_mask = np.any(quality_stats, axis=0) # (n_spin_bins,) + quality_stats[:] = combined_mask # Broadcast to all energy bins + + # Recalculate convergence with the combined mask. + for e_idx in range(n_energy_bins): + if not convergence[e_idx]: + valid_inds = np.where(combined_mask)[0] + counts = count_summary[e_idx, valid_inds] + std_ratio, _ = get_poisson_stats(counts) + std_diff[e_idx] = std_ratio + if std_ratio < std_threshold: + convergence[e_idx] = True + + num_culled: int = np.sum(quality_stats) + logger.debug( + f"Statistical culling removed {num_culled} spin bins across {n_energy_bins}" + f" energy channels. Convergence: {convergence} after " + f"{iterations} iterations." + ) + + return quality_stats, convergence, iterations, std_diff + + +def get_poisson_stats(counts: NDArray) -> tuple[float, NDArray]: + """ + Calculate Poisson statistics for a given array of counts. + + For a perfect Poisson distribution, the standard deviation should equal + the square root of the mean. The std_ratio measures how far the observed + distribution deviates from this. + + Outliers are identified as bins where the counts deviate more than 3 + standard deviations from the mean. + + Parameters + ---------- + counts : numpy.ndarray + Array of counts per spin bin for a given energy range. + + Returns + ------- + std_ratio : float + Ratio of the observed standard deviation to the expected Poisson + standard deviation. + sub_mask : numpy.ndarray + Boolean array of the same length as counts. True where a bin is + a statistical outlier (more than 3 sigma from the mean). + """ + std = np.std(counts) + std_ratio = std / np.sqrt(np.mean(counts)) - 1 + sub_mask = np.abs((counts - np.mean(counts)) / std) > 3 + return std_ratio, sub_mask + + +def get_valid_de_count_summary( + de_dataset: xr.Dataset, + energy_ranges: NDArray, + spin_tbin_edges: NDArray, + sensor_id: int = 90, +) -> NDArray: + """ + Get a summary of valid counts per energy range and spin bin. + + Parameters + ---------- + de_dataset : xr.Dataset + Direct event dataset. + energy_ranges : numpy.ndarray + Array of energy range edges. + spin_tbin_edges : numpy.ndarray + Array of spin time bin edges. + sensor_id : int + Sensor ID (e.g., 45 or 90). + + Returns + ------- + counts : numpy.ndarray + A 2D array of counts per energy range and spin bin for valid events. + """ + valid_events = get_valid_events_per_energy_range( + de_dataset, energy_ranges, UltraConstants.EARTH_ANGLE_45_THRESHOLD, sensor_id + ) + counts = np.zeros((len(energy_ranges) - 1, len(spin_tbin_edges) - 1), dtype=float) + + for i in range(len(energy_ranges) - 1): + counts[i, :], _ = np.histogram( + de_dataset["de_event_met"].values[valid_events[i, :]], bins=spin_tbin_edges + ) + + return counts + + def get_valid_events_per_energy_range( de_dataset: xr.Dataset, energy_ranges: NDArray, earth_ang_45: float, sensor_id: int ) -> NDArray: From a6b7cd7fa71df11f74cc4cefd7c6e276949b620f Mon Sep 17 00:00:00 2001 From: Luisa Date: Fri, 27 Feb 2026 10:59:34 -0700 Subject: [PATCH 2/7] fix voltage flags --- imap_processing/ultra/l1b/extendedspin.py | 11 ++++++++--- imap_processing/ultra/l1b/ultra_l1b_culling.py | 9 ++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/imap_processing/ultra/l1b/extendedspin.py b/imap_processing/ultra/l1b/extendedspin.py index 1da3a1572..773d5266d 100644 --- a/imap_processing/ultra/l1b/extendedspin.py +++ b/imap_processing/ultra/l1b/extendedspin.py @@ -137,15 +137,20 @@ def calculate_extendedspin( stop_per_spin[valid] = pulses.stop_per_spin[idx[valid]] coin_per_spin[valid] = pulses.coin_per_spin[idx[valid]] - # Expand binned quality flags to individual spins. - # high energy and statistical outlier flags are energy dependent - # Collapse them into a single flag array + # high energy and statistical outlier flags are energy dependent boolean arrays + # with shape (n_energy_bins, n_spin_bins). We want to collapse the energy dimension + # using a bitwise OR to get a single boolean flag per spin. high_energy_qf = np.bitwise_or.reduce( high_energy_qf * energy_bin_flags[:, np.newaxis], axis=1 ) stat_outliers_qf = np.bitwise_or.reduce( stat_outliers_qf * energy_bin_flags[:, np.newaxis], axis=1 ) + # Low voltage flag is shape (n_spin_bins,) but we want to convert from a boolean + # to a bitwise flag to be consistent with the other flags, where each spin that + # is flagged will have the bitflag of all the energy flags combined. + voltage_qf = voltage_qf * np.bitwise_or.reduce(energy_bin_flags) + # Expand binned quality flags to individual spins. high_energy_qf = expand_bin_flags_to_spins(len(spin), high_energy_qf, spin_bin_size) voltage_qf = expand_bin_flags_to_spins(len(spin), voltage_qf, spin_bin_size) stat_outliers_qf = expand_bin_flags_to_spins( diff --git a/imap_processing/ultra/l1b/ultra_l1b_culling.py b/imap_processing/ultra/l1b/ultra_l1b_culling.py index 9130e5efe..ff6db9088 100644 --- a/imap_processing/ultra/l1b/ultra_l1b_culling.py +++ b/imap_processing/ultra/l1b/ultra_l1b_culling.py @@ -629,7 +629,6 @@ def flag_low_voltage( spin_tbin_edges: NDArray, status_dataset: xr.Dataset, voltage_threshold: float = UltraConstants.LOW_VOLTAGE_CULL_THRESHOLD, - low_voltage_flag: int = 65535, # default is max uint16 ) -> NDArray: """ Flag low voltage events. @@ -642,8 +641,6 @@ def flag_low_voltage( Status dataset containing voltage information. voltage_threshold : float Voltage threshold below which to flag low voltage events. - low_voltage_flag : int - The flag value to set for low voltage events. Returns ------- @@ -652,9 +649,7 @@ def flag_low_voltage( """ spin_bin_size = len(spin_tbin_edges) - 1 # initialize all spins to have no low voltage flag - quality_flags = np.full( - spin_bin_size, ImapRatesUltraFlags.NONE.value, dtype=np.uint16 - ) + quality_flags = np.zeros(spin_bin_size, dtype=bool) # Get the min voltage across both deflection plate at each epoch min_voltage = np.minimum( status_dataset["rightdeflection_v"].data, @@ -675,7 +670,7 @@ def flag_low_voltage( valid_bin_inds = (lv_spin_inds >= 0) & (lv_spin_inds < spin_bin_size) lv_spin_inds = lv_spin_inds[valid_bin_inds] # For each low voltage ind, flag the corresponding flag - quality_flags[lv_spin_inds] = low_voltage_flag + quality_flags[lv_spin_inds] = True # TODO add log summary. From 5d9220ca2f4487a31c3b900741a3d94fb0d96dfd Mon Sep 17 00:00:00 2001 From: Luisa Date: Fri, 27 Feb 2026 11:01:36 -0700 Subject: [PATCH 3/7] fix voltage qf tests --- .../tests/ultra/unit/test_ultra_l1b_culling.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py b/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py index cfa9da551..873350a5b 100644 --- a/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py +++ b/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py @@ -339,7 +339,6 @@ def test_flag_low_voltage(test_data): "leftdeflection_v": np.full(n_spins, 1.5), } ) - flagged = 65535 spins = np.arange(n_spins) spin_bin_size = 5 spin_period = np.full(n_spins, 15.0) @@ -355,14 +354,14 @@ def test_flag_low_voltage(test_data): # Check quality flag shape assert quality_flags.shape == (len(spin_tbin_edges) - 1,) # Check that every spin is flagged for low voltage - assert np.all(quality_flags == flagged) + assert np.all(quality_flags) # Set only the first spin to be below threshold mock_status_dataset["rightdeflection_v"].data[1:] += 5000 mock_status_dataset["leftdeflection_v"].data[1:] += 5000 quality_flags = flag_low_voltage(spin_tbin_edges, mock_status_dataset) # Check that only the first spin is flagged for low voltage - assert np.all(quality_flags[0] == flagged) + assert np.all(quality_flags[0]) # The rest should not be flagged assert np.all(quality_flags[1:] == 0) @@ -390,9 +389,7 @@ def test_flag_low_voltage_incomplete_bins(test_data): # check quality flag assert quality_flags.shape == (n_spins // spin_bin_size,) - # Check that every spin is flagged for low voltage - flagged = 65535 - assert np.all(quality_flags == flagged) + assert np.all(quality_flags) def test_expand_bin_flags_to_spins(caplog): @@ -481,9 +478,7 @@ def test_validate_voltage_cull(): xspin.spin_start_time.values, spin_bin_size, ) - lv_flags = flag_low_voltage( - spin_tbin_edges, status_ds, lv_threshold, low_voltage_flag=1 - ) + lv_flags = flag_low_voltage(spin_tbin_edges, status_ds, lv_threshold) assert np.array_equal(lv_flags, validation_low_voltage_qf) From 07a9882e5f81aa63ccd52949aa32cf5ba390b0f3 Mon Sep 17 00:00:00 2001 From: Luisa Date: Fri, 27 Feb 2026 11:38:01 -0700 Subject: [PATCH 4/7] remove comments --- .../ultra/unit/test_ultra_l1b_culling.py | 68 +++++-------------- .../ultra/l1b/ultra_l1b_culling.py | 28 ++++---- 2 files changed, 29 insertions(+), 67 deletions(-) diff --git a/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py b/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py index 873350a5b..8a1cedf4e 100644 --- a/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py +++ b/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py @@ -731,18 +731,17 @@ def test_validate_high_energy_cull(): def test_flag_statistical_outliers(): """Tests flag_statistical_outliers function.""" energy_range_edges = np.array([3, 5, 7, 18, 25]) # Example energy bin edges - e_bin_3_value = 23 n_spin_bins = 12 spin_step = 7 energy = np.full(spin_step * n_spin_bins, 0) - # Make sure there are at least 3 other bins with counts in the 3rd energy bin. - # This is to ensure that we have enough events in that energy bin to calculate - # the statistics. - energy[spin_step] = e_bin_3_value - energy[spin_step * 2] = e_bin_3_value - energy[spin_step * 3] = e_bin_3_value + # Make sure there are at least 3 other bins with counts in each energy bin so that + # the statistics can be calculated. + energy[::spin_step] = 3 + energy[1::spin_step] = 5 + energy[2::spin_step] = 7 + energy[3::spin_step] = 18 # Make the last spin bin have higher counts. It should get flagged as an outlier. - energy[-spin_step:] = e_bin_3_value + energy[-spin_step:] = 23 de_dataset = xr.Dataset( { @@ -780,12 +779,12 @@ def test_flag_statistical_outliers(): # The first 2 didn't have enough events to calculate statistics, but they should # still be marked as converged assert np.all(convergence) - # Only the last energy bin should have had any iterations since it's the only one - # with enough events. - assert np.nonzero(iterations) == np.array([3]) - # Only the last energy bin should have a valid std_diff since it's the only one - # with enough events to calculate statistics. - assert np.nonzero(std_diff != -1) == np.array([3]) + # All energy bins should have iterated 1 time except the last one which should have + # iterated twice. + assert np.all(iterations[:-1] == 1) + assert iterations[-1] == 2 + # Check that all std_diff values were set (not zero) + assert np.all(std_diff != 0) def test_flag_statistical_outliers_invalid_events(): @@ -811,14 +810,13 @@ def test_flag_statistical_outliers_invalid_events(): energy_range_edges, mask, ) - # check that all flags are false since there are no valid events to flag as + # check that all flags are set because there are no valid events in any energy bin + # so it fails the stat outlier check by default. np.testing.assert_array_equal( - quality_flags, np.zeros_like(quality_flags, dtype=bool) + quality_flags, np.ones_like(quality_flags, dtype=bool) ) # check that all energy bins are marked as converged (no valid events is not a - # failure case for convergence since we just can't calculate statistics, but we - # still want to mark it as converged so that it doesn't get flagged as a failure - # in the logs) + # failure case for convergence since we just can't calculate statistics. assert np.all(convergence) # check that there were no iterations assert np.sum(iterations) == 0 @@ -826,38 +824,6 @@ def test_flag_statistical_outliers_invalid_events(): assert np.all(std_diff == -1) -@pytest.mark.external_test_data -def test_validate_stat_cull(): - """Validate that flag_statistical_outliers are correctly flagged""" - # read test data from csv files - # xspin = pd.read_csv(TEST_PATH / "extendedspin_test_data_repoint00047.csv") - # expected_qf = pd.read_csv( - # TEST_PATH / "validate_high_energy_culling_results_repoint00047.csv" - # ).to_numpy() - # de_df = pd.read_csv(TEST_PATH / "de_test_data_repoint00047.csv") - # de_ds = xr.Dataset( - # { - # "de_event_met": ("epoch", de_df.event_times.values), - # "energy_spacecraft": ("epoch", de_df.energy_spacecraft.values), - # "quality_outliers": ("epoch", de_df.quality_outliers.values), - # "quality_scattering": ("epoch", de_df.quality_scattering.values), - # "ebin": ("epoch", de_df.ebin.values), - # } - # ) - # # Use constants from the code to ensure consistency with the actual culling code - # spin_bin_size = UltraConstants.SPIN_BIN_SIZE - # spin_tbin_edges = get_binned_spins_edges( - # xspin.spin_number.values, - # xspin.spin_period.values, - # xspin.spin_start_time.values, - # spin_bin_size, - # ) - # intervals, _, _ = build_energy_bins() - # # Get the energy ranges - # energy_ranges = get_binned_energy_ranges(intervals) - pass - - def test_get_poisson_stats(): """Tests get_poisson_stats function.""" counts = np.full(20, 0) diff --git a/imap_processing/ultra/l1b/ultra_l1b_culling.py b/imap_processing/ultra/l1b/ultra_l1b_culling.py index ff6db9088..82e137005 100644 --- a/imap_processing/ultra/l1b/ultra_l1b_culling.py +++ b/imap_processing/ultra/l1b/ultra_l1b_culling.py @@ -645,7 +645,7 @@ def flag_low_voltage( Returns ------- quality_flags : NDArray - Quality flags. + Boolean quality flags shaped (n_spin_bins,). """ spin_bin_size = len(spin_tbin_edges) - 1 # initialize all spins to have no low voltage flag @@ -708,7 +708,7 @@ def flag_high_energy( Returns ------- quality_flags : numpy.ndarray - Quality flags shaped (n_energy_bins, n_spin_bins). + Boolean quality flags shaped (n_energy_bins, n_spin_bins). """ # expand energy thresholds to have shape (n_energy_bins, 1) for comparison with # the counts per spin @@ -761,19 +761,15 @@ def flag_statistical_outliers( After low voltage and high energy spins have been flagged, there still appears to be some time dependency in the signal. This algorithm identifies those outliers. - Stat culling steps: - 1. Compute the count rate per spin bin and energy range. - 2. Flag spins and energy ranges where there are less than 3 bins with counts - 3. Check how close the data is to poisson stats: - - For a perfect poisson distribution, the standard deviation should be the - square root of the mean. So we calculate the difference between the - observed standard deviation and the expected standard deviation (sqrt of mean) - to see how well the data follows a Poisson distribution. - 4. Flag bins where the count is more than 3 standard deviations from the mean. This - will be a different quality flag array as step #3 - 5. If combine_flags_across_energy_bins is True, then if a spin bin is flagged in - any energy channel, flag it in all. Recalculate the convergence after combining - flags across energy bins. + Iterative algorithm to identify areas consistent with Poisson statistics + For each pointing and energy, + 1. Flag where there are less than 3 bins with counts + 2. Calculate the mean (μ) and standard deviation (σ) of the counts in each bin. + 3. Find bins where the counts, c, yield |(c-μ)/σ|>3,  cull these bins + 4. Calculate ε=σ/√μ-1 + 5. If ε is less than a threshold value (0.05 for now) stop iterating + 6. If number of iterations exceeds threshold (5 for now), stop iterating + 7. Return to step 1 Parameters ---------- @@ -835,7 +831,7 @@ def flag_statistical_outliers( # Step 1. check if any energy bins have less than 3 spin bins with counts. # If so, flag all spins for that energy bin and skip to the next iteration if np.sum(counts > 0) < 3: - quality_stats[e_idx] = False + quality_stats[e_idx] = True convergence[e_idx] = True std_diff[e_idx] = -1 break From 5b910e8d2ae931409bedf151b2e86a078593dd22 Mon Sep 17 00:00:00 2001 From: Luisa Date: Fri, 27 Feb 2026 11:57:04 -0700 Subject: [PATCH 5/7] address pr comments --- .../tests/ultra/unit/test_ultra_l1b_culling.py | 14 +++++++------- imap_processing/ultra/l1b/extendedspin.py | 4 ++-- imap_processing/ultra/l1b/ultra_l1b_culling.py | 16 +++++++++------- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py b/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py index 8a1cedf4e..f1601c63e 100644 --- a/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py +++ b/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py @@ -678,13 +678,13 @@ def test_flag_high_energy(): ) # Assert that the first spin bin is flagged for high energy for all energy ranges # except the second one - quality_flags[0, 0] = True - quality_flags[1, 0] = False - quality_flags[2, 0] = True - quality_flags[3, 0] = True + assert quality_flags[0, 0] + assert not quality_flags[1, 0] + assert quality_flags[2, 0] + assert quality_flags[3, 0] # # Assert that the second spin bin is only flagged for high energy for the last # # energy range - quality_flags[3, 1] = True + assert quality_flags[3, 1] assert not np.any(quality_flags[0:3, 1]) # # Assert that the third spin bin is not flagged for any energy range assert not np.any(quality_flags[:, 2]) @@ -754,7 +754,7 @@ def test_flag_statistical_outliers(): ) spin_tbin_edges = np.arange( start=0, stop=len(energy) + 1, step=spin_step - ) # create spin bins of 4 seconds + ) # create spin bins of 7 seconds quality_flags, convergence, iterations, std_diff = flag_statistical_outliers( de_dataset, spin_tbin_edges, @@ -802,7 +802,7 @@ def test_flag_statistical_outliers_invalid_events(): ) spin_tbin_edges = np.arange( start=0, stop=len(energy) + 1, step=5 - ) # create spin bins of 4 seconds + ) # create spin bins of 5 seconds mask = np.ones((len(energy_range_edges) - 1, len(spin_tbin_edges) - 1), dtype=bool) quality_flags, convergence, iterations, std_diff = flag_statistical_outliers( de_dataset, diff --git a/imap_processing/ultra/l1b/extendedspin.py b/imap_processing/ultra/l1b/extendedspin.py index 773d5266d..ae9b4ed25 100644 --- a/imap_processing/ultra/l1b/extendedspin.py +++ b/imap_processing/ultra/l1b/extendedspin.py @@ -141,10 +141,10 @@ def calculate_extendedspin( # with shape (n_energy_bins, n_spin_bins). We want to collapse the energy dimension # using a bitwise OR to get a single boolean flag per spin. high_energy_qf = np.bitwise_or.reduce( - high_energy_qf * energy_bin_flags[:, np.newaxis], axis=1 + high_energy_qf * energy_bin_flags[:, np.newaxis], axis=0 ) stat_outliers_qf = np.bitwise_or.reduce( - stat_outliers_qf * energy_bin_flags[:, np.newaxis], axis=1 + stat_outliers_qf * energy_bin_flags[:, np.newaxis], axis=0 ) # Low voltage flag is shape (n_spin_bins,) but we want to convert from a boolean # to a bitwise flag to be consistent with the other flags, where each spin that diff --git a/imap_processing/ultra/l1b/ultra_l1b_culling.py b/imap_processing/ultra/l1b/ultra_l1b_culling.py index 82e137005..c67c6a544 100644 --- a/imap_processing/ultra/l1b/ultra_l1b_culling.py +++ b/imap_processing/ultra/l1b/ultra_l1b_culling.py @@ -762,7 +762,7 @@ def flag_statistical_outliers( be some time dependency in the signal. This algorithm identifies those outliers. Iterative algorithm to identify areas consistent with Poisson statistics - For each pointing and energy, + For each energy range: 1. Flag where there are less than 3 bins with counts 2. Calculate the mean (μ) and standard deviation (σ) of the counts in each bin. 3. Find bins where the counts, c, yield |(c-μ)/σ|>3,  cull these bins @@ -782,14 +782,16 @@ def flag_statistical_outliers( mask : numpy.ndarray Mask indicating which events to consider for statistical outlier flagging. This should be a 2d boolean array of shape (n_energy_bins, n_spin_bins) where - True indicates the spin bins to consider for outlier flagging for each energy - bin (e.g., after low voltage and high energy culling). + True indicates the spin bins that have been flagged in previous steps (e.g., + after low voltage and high energy culling) and should be excluded from the + outlier flagging process. sensor_id : int Sensor ID (e.g., 45 or 90). n_iterations : int Maximum number of iterations to perform for outlier flagging. std_threshold : float - Threshold for standard deviation difference from Poisson stats to determine. + Threshold for standard deviation difference from Poisson stats to determine + convergence. combine_flags_across_energy_bins : bool Whether to link energy channels such that if a spin bin is flagged in any energy channel, it is flagged in all energy channels. @@ -808,7 +810,7 @@ def flag_statistical_outliers( Array of shape (n_energy_bins,) containing the final standard deviation difference from Poisson stats for each energy bin. """ - # Initialize all spin bins to have no high energy flag + # Initialize all spin bins to have no outlier flag spin_bin_size = len(spin_tbin_edges) - 1 n_energy_bins = len(energy_ranges) - 1 # make a copy of the mask to avoid modifying the original mask passed in @@ -864,8 +866,8 @@ def flag_statistical_outliers( # Recalculate convergence with the combined mask. for e_idx in range(n_energy_bins): if not convergence[e_idx]: - valid_inds = np.where(combined_mask)[0] - counts = count_summary[e_idx, valid_inds] + # Select counts that have not been flagged in any channel. + counts = count_summary[e_idx, ~combined_mask] std_ratio, _ = get_poisson_stats(counts) std_diff[e_idx] = std_ratio if std_ratio < std_threshold: From 7ebb697f81d4e724c17271415e2261926dafebc4 Mon Sep 17 00:00:00 2001 From: Luisa Date: Fri, 27 Feb 2026 15:51:08 -0700 Subject: [PATCH 6/7] pr comments --- .../tests/ultra/unit/test_ultra_l1b_culling.py | 2 +- imap_processing/ultra/l1b/extendedspin.py | 2 +- imap_processing/ultra/l1b/ultra_l1b_culling.py | 11 ++++++++--- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py b/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py index f1601c63e..a06b7b7f6 100644 --- a/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py +++ b/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py @@ -682,7 +682,7 @@ def test_flag_high_energy(): assert not quality_flags[1, 0] assert quality_flags[2, 0] assert quality_flags[3, 0] - # # Assert that the second spin bin is only flagged for high energy for the last + # Assert that the second spin bin is only flagged for high energy for the last # # energy range assert quality_flags[3, 1] assert not np.any(quality_flags[0:3, 1]) diff --git a/imap_processing/ultra/l1b/extendedspin.py b/imap_processing/ultra/l1b/extendedspin.py index ae9b4ed25..41eed8642 100644 --- a/imap_processing/ultra/l1b/extendedspin.py +++ b/imap_processing/ultra/l1b/extendedspin.py @@ -92,7 +92,7 @@ def calculate_extendedspin( ) # Combine high energy and voltage flags to use for statistical outlier flagging. mask = ( - voltage_qf[np.newaxis, :] & high_energy_qf + voltage_qf[np.newaxis, :] | high_energy_qf ) # Shape (n_energy_bins, n_spins_bins) stat_outliers_qf, _, _, _ = flag_statistical_outliers( de_dataset, diff --git a/imap_processing/ultra/l1b/ultra_l1b_culling.py b/imap_processing/ultra/l1b/ultra_l1b_culling.py index c67c6a544..a803de862 100644 --- a/imap_processing/ultra/l1b/ultra_l1b_culling.py +++ b/imap_processing/ultra/l1b/ultra_l1b_culling.py @@ -698,7 +698,8 @@ def flag_high_energy( Array of energy range edges. mask : numpy.ndarray, optional Mask indicating which events to consider for high energy flagging - (e.g., after low voltage culling). + (e.g., after low voltage culling). True indicates the spin bins that should + NOT be considered for high energy flagging. energy_thresholds : numpy.ndarray Array of count thresholds for flagging high energy events corresponding to each energy range. @@ -737,7 +738,7 @@ def flag_high_energy( ) # (n_energy_bins, n_spin_bins) if mask is not None: - quality_flags[:, mask] = flagged[:, mask] + quality_flags[:, ~mask] = flagged[:, ~mask] else: quality_flags = flagged # TODO add log summary. E.g Tim's hi goodtimes code @@ -826,7 +827,7 @@ def flag_statistical_outliers( count_summary = get_valid_de_count_summary( de_dataset, energy_ranges, spin_tbin_edges, sensor_id ) # shape (n_energy_bins, n_spin_bins) - for e_idx in np.arange(n_energy_bins): # shape: (n_energy_bins,) + for e_idx in np.arange(n_energy_bins): for it in range(n_iterations): # only consider bins that are currently unflagged for this energy bin counts = count_summary[e_idx, ~iter_mask[e_idx]] @@ -909,6 +910,10 @@ def get_poisson_stats(counts: NDArray) -> tuple[float, NDArray]: a statistical outlier (more than 3 sigma from the mean). """ std = np.std(counts) + if std == 0: + # If std is 0, then all counts are the same. In this case, we can consider + # there to be no outliers and the distribution to perfectly match Poisson + return 0, np.zeros_like(counts, dtype=bool) std_ratio = std / np.sqrt(np.mean(counts)) - 1 sub_mask = np.abs((counts - np.mean(counts)) / std) > 3 return std_ratio, sub_mask From 9a3de08880adf253c1cadf81d8381b11630ae345 Mon Sep 17 00:00:00 2001 From: Luisa Date: Mon, 2 Mar 2026 08:58:03 -0700 Subject: [PATCH 7/7] fix test --- imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py b/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py index a06b7b7f6..2277e489b 100644 --- a/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py +++ b/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py @@ -783,8 +783,9 @@ def test_flag_statistical_outliers(): # iterated twice. assert np.all(iterations[:-1] == 1) assert iterations[-1] == 2 - # Check that all std_diff values were set (not zero) - assert np.all(std_diff != 0) + # Check that all std_diff values were set (not zero) except the last one + assert np.all(std_diff[:-1] != 0) + assert std_diff[-1] == 0 def test_flag_statistical_outliers_invalid_events():