ULTRA l1b culling updates after running files locally (#2816)

lacoak21 · web-flow · commit 97817e47d221 · 2026-03-06T07:59:43.000-07:00
* updates from processing maps

* Fix test
diff --git a/imap_processing/tests/external_test_data_config.py b/imap_processing/tests/external_test_data_config.py
@@ -153,7 +153,7 @@
     ("status_test_data_repoint00047.csv", "ultra/data/l1/"),
     ("voltage_culling_results_repoint00047.csv", "ultra/data/l1/"),
     ("validate_high_energy_culling_results_repoint00047_v2.csv", "ultra/data/l1/"),
-    ("validate_stat_culling_results_repoint00047.csv", "ultra/data/l1/"),
+    ("validate_stat_culling_results_repoint00047_v2.csv", "ultra/data/l1/"),
     ("de_test_data_repoint00047.csv", "ultra/data/l1/"),
     ("FM45_UltraFM45Extra_TV_Tests_2024-01-22T0930_20240122T093008.CCSDS", "ultra/data/l0/"),
     ("ultra45_raw_sc_rawnrgevnt_19840122_00.csv", "ultra/data/l0/"),
diff --git a/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py b/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py
@@ -490,7 +490,7 @@ def test_get_valid_earth_angle_events(mock_spkezr):
     de_dps_velocity = np.random.random((12, 3))
     de_dataset = xr.Dataset(
         {
-            "de_dps_velocity": (("epoch", "component"), de_dps_velocity),
+            "velocity_dps_sc": (("epoch", "component"), de_dps_velocity),
             "event_times": ("epoch", np.arange(12)),
         }
     )
@@ -608,7 +608,7 @@ def test_get_valid_events_per_energy_range_ultra45(mock_spkezr):
 
     de_dataset = xr.Dataset(
         {
-            "de_dps_velocity": (("epoch", "component"), de_dps_velocity),
+            "velocity_dps_sc": (("epoch", "component"), de_dps_velocity),
             "event_times": ("epoch", np.full(len(energy), 798033671)),
             "energy_spacecraft": ("epoch", energy),
             "quality_outliers": ("epoch", np.full(len(energy), 0)),
@@ -815,10 +815,10 @@ def test_flag_statistical_outliers_invalid_events():
         energy_range_edges,
         mask,
     )
-    # check that all flags are set because there are no valid events in any energy bin
-    # so it fails the stat outlier check by default.
+    # check that no flags are set because there were no valid events to calculate
+    # statistics on.
     np.testing.assert_array_equal(
-        quality_flags, np.ones_like(quality_flags, dtype=bool)
+        quality_flags, np.zeros_like(quality_flags, dtype=bool)
     )
     # check that all energy bins are marked as converged (no valid events is not a
     # failure case for convergence since we just can't calculate statistics.
@@ -850,7 +850,7 @@ def test_validate_stat_cull():
     # read test data from csv files
     xspin = pd.read_csv(TEST_PATH / "extendedspin_test_data_repoint00047.csv")
     results_df = pd.read_csv(
-        TEST_PATH / "validate_stat_culling_results_repoint00047.csv"
+        TEST_PATH / "validate_stat_culling_results_repoint00047_v2.csv"
     )
     de_df = pd.read_csv(TEST_PATH / "de_test_data_repoint00047.csv")
     de_ds = xr.Dataset(
@@ -873,7 +873,14 @@ def test_validate_stat_cull():
     intervals, _, _ = build_energy_bins()
     # Get the energy ranges
     energy_ranges = get_binned_energy_ranges(intervals)
+
+    # Create a mask of flagged events to test that the stat cull algorithm
+    # properly ignores these. The test data was created using this exact mask as well.
     mask = np.zeros((len(energy_ranges) - 1, len(spin_tbin_edges) - 1), dtype=bool)
+    mask[0:2, 0:2] = (
+        True  # This will mark the first 2 energy bins and first 2 spin bins as flagged
+    )
+    # ignored in the statistics calculation and flagging.
     flags, con, it, std = flag_statistical_outliers(
         de_ds, spin_tbin_edges, energy_ranges, mask, 90
     )
diff --git a/imap_processing/ultra/l1b/extendedspin.py b/imap_processing/ultra/l1b/extendedspin.py
@@ -176,5 +176,4 @@ def calculate_extendedspin(
     extendedspin_dict["energy_range_edges"] = np.array(energy_ranges)
 
     extendedspin_dataset = create_dataset(extendedspin_dict, name, "l1b")
-
     return extendedspin_dataset
diff --git a/imap_processing/ultra/l1b/ultra_l1b_culling.py b/imap_processing/ultra/l1b/ultra_l1b_culling.py
@@ -824,9 +824,17 @@ def flag_statistical_outliers(
     # Initialize all spin bins to have no outlier flag
     spin_bin_size = len(spin_tbin_edges) - 1
     n_energy_bins = len(energy_ranges) - 1
-    # make a copy of the mask to avoid modifying the original mask passed in
-    iter_mask = mask.copy()
+    # make a copy of the current mask to avoid modifying the original mask passed in.
+    # This contains flags from previous steps (e.g., after low voltage and high
+    # energy culling) and will be updated iteratively to include the outlier flags as
+    # well
+    curr_mask = mask.copy()
+    # Initialize quality_stats to keep track of which bins are flagged as outliers for
+    # each energy bin
     quality_stats = np.zeros((n_energy_bins, spin_bin_size), dtype=bool)
+    # Initialize a mask to keep track of spin bins that have been flagged across all
+    # energy bins
+    all_channel_mask = np.zeros(spin_bin_size, dtype=bool)
     # Initialize convergence array to keep track of poisson stats
     convergence = np.full(n_energy_bins, False)
     # Keep track of how many iterations we have done of flagging outliers and
@@ -838,13 +846,14 @@ def flag_statistical_outliers(
         de_dataset, energy_ranges, spin_tbin_edges, sensor_id=sensor_id
     )  # shape (n_energy_bins, n_spin_bins)
     for e_idx in np.arange(n_energy_bins):
+        good_mask = ~curr_mask[e_idx]  # spin bins that are not currently flagged
         for it in range(n_iterations):
-            # only consider bins that are currently unflagged for this energy bin
-            counts = count_summary[e_idx, ~iter_mask[e_idx]]
+            counts = count_summary[e_idx, good_mask]
             # Step 1. check if any energy bins have less than 3 spin bins with counts.
             # If so, flag all spins for that energy bin and skip to the next iteration
             if np.sum(counts > 0) < 3:
                 quality_stats[e_idx] = True
+                curr_mask[e_idx] = True
                 convergence[e_idx] = True
                 std_diff[e_idx] = -1
                 break
@@ -853,12 +862,11 @@ def flag_statistical_outliers(
             std_diff[e_idx] = std_ratio
             # Step 3. Flag bins where the count is more than 3 standard deviations from
             # the mean.
-            outlier_inds = np.where(~iter_mask[e_idx])[0][outlier_mask]
+            outlier_inds = np.where(good_mask)[0][outlier_mask]
             # Set the quality flag to True for the outlier inds
             quality_stats[e_idx, outlier_inds] = True
-            # Also update the iter_mask to exclude the outlier bins for the next
-            # iteration
-            iter_mask[e_idx, outlier_inds] = True
+            all_channel_mask[outlier_inds] = True
+            good_mask[outlier_inds] = False
             iterations[e_idx] = it + 1
             # Check for convergence: if the standard deviation difference from
             # poisson stats is below the threshold, then we can stop iterating for this
@@ -868,17 +876,13 @@ def flag_statistical_outliers(
                 break
 
     if combine_flags_across_energy_bins:
-        # If a spin bin is flagged in any energy channel flag it in all energy channels
-        # Use np.any to check if a spin bin is flagged in any energy channel,
-        # then flag it in all energy channels
-        combined_mask = np.any(quality_stats, axis=0)  # (n_spin_bins,)
-        quality_stats[:] = combined_mask  # Broadcast to all energy bins
-
+        # If true, then use the all_channel_mask for every energy channel.
+        quality_stats[:] = all_channel_mask
         # Recalculate convergence with the combined mask.
         for e_idx in range(n_energy_bins):
             if not convergence[e_idx]:
                 # Select counts that have not been flagged in any channel.
-                counts = count_summary[e_idx, ~combined_mask]
+                counts = count_summary[e_idx, ~all_channel_mask]
                 std_ratio, _ = get_poisson_stats(counts)
                 if std_ratio < std_threshold:
                     convergence[e_idx] = True
@@ -1064,7 +1068,7 @@ def get_valid_earth_angle_events(
         A boolean array indicating which events have Earth angle greater than the
         specified threshold.
     """
-    de_dps_velocity = de_dataset_subset["de_dps_velocity"].values
+    velocity_dps_sc = de_dataset_subset["velocity_dps_sc"].values
     # Use the mean event time to compute the Earth unit vector since the spacecraft
     # position doesn't change significantly over the course of the energy bin.
     et = np.mean(de_dataset_subset["event_times"].values)
@@ -1082,10 +1086,10 @@ def get_valid_earth_angle_events(
     distance = np.linalg.norm(position)
     earth_unit_vector = position / distance
     # Calculate the magnitude of the velocity vector for each event
-    particle_mag = np.linalg.norm(de_dps_velocity, axis=1)
+    particle_mag = np.linalg.norm(velocity_dps_sc, axis=1)
     # Normalize and flip to get where each particle is looking.
     unit_look_dirs = (
-        -de_dps_velocity / particle_mag[:, np.newaxis]
+        -velocity_dps_sc / particle_mag[:, np.newaxis]
     )  # shape (n_events, 3)
     # Get cos(theta) between each particle look direction and Earth direction
     cos_sep = np.dot(unit_look_dirs, earth_unit_vector)  # shape (n_events,)
@@ -1217,7 +1221,11 @@ def get_binned_spins_edges(
         )
     # Append the last start time plus the spin period to account for low times
     # that occur after the last spin start time
-    spin_tbin_edges = np.append(spin_tbin_edges, spin_tbin_edges[-1] + spin_periods[-1])
+    last_spin_idx = min(n_spin_bins * spin_bin_size - 1, len(spins) - 1)
+    spin_tbin_edges = np.append(
+        spin_tbin_edges, spin_start_times[last_spin_idx] + spin_periods[last_spin_idx]
+    )
+    return spin_tbin_edges
     return spin_tbin_edges