Skip to content

Commit 97817e4

Browse files
authored
ULTRA l1b culling updates after running files locally (#2816)
* updates from processing maps * Fix test
1 parent 9b032a3 commit 97817e4

4 files changed

Lines changed: 41 additions & 27 deletions

File tree

imap_processing/tests/external_test_data_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@
153153
("status_test_data_repoint00047.csv", "ultra/data/l1/"),
154154
("voltage_culling_results_repoint00047.csv", "ultra/data/l1/"),
155155
("validate_high_energy_culling_results_repoint00047_v2.csv", "ultra/data/l1/"),
156-
("validate_stat_culling_results_repoint00047.csv", "ultra/data/l1/"),
156+
("validate_stat_culling_results_repoint00047_v2.csv", "ultra/data/l1/"),
157157
("de_test_data_repoint00047.csv", "ultra/data/l1/"),
158158
("FM45_UltraFM45Extra_TV_Tests_2024-01-22T0930_20240122T093008.CCSDS", "ultra/data/l0/"),
159159
("ultra45_raw_sc_rawnrgevnt_19840122_00.csv", "ultra/data/l0/"),

imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,7 @@ def test_get_valid_earth_angle_events(mock_spkezr):
490490
de_dps_velocity = np.random.random((12, 3))
491491
de_dataset = xr.Dataset(
492492
{
493-
"de_dps_velocity": (("epoch", "component"), de_dps_velocity),
493+
"velocity_dps_sc": (("epoch", "component"), de_dps_velocity),
494494
"event_times": ("epoch", np.arange(12)),
495495
}
496496
)
@@ -608,7 +608,7 @@ def test_get_valid_events_per_energy_range_ultra45(mock_spkezr):
608608

609609
de_dataset = xr.Dataset(
610610
{
611-
"de_dps_velocity": (("epoch", "component"), de_dps_velocity),
611+
"velocity_dps_sc": (("epoch", "component"), de_dps_velocity),
612612
"event_times": ("epoch", np.full(len(energy), 798033671)),
613613
"energy_spacecraft": ("epoch", energy),
614614
"quality_outliers": ("epoch", np.full(len(energy), 0)),
@@ -815,10 +815,10 @@ def test_flag_statistical_outliers_invalid_events():
815815
energy_range_edges,
816816
mask,
817817
)
818-
# check that all flags are set because there are no valid events in any energy bin
819-
# so it fails the stat outlier check by default.
818+
# check that no flags are set because there were no valid events to calculate
819+
# statistics on.
820820
np.testing.assert_array_equal(
821-
quality_flags, np.ones_like(quality_flags, dtype=bool)
821+
quality_flags, np.zeros_like(quality_flags, dtype=bool)
822822
)
823823
# check that all energy bins are marked as converged (no valid events is not a
824824
# failure case for convergence since we just can't calculate statistics.
@@ -850,7 +850,7 @@ def test_validate_stat_cull():
850850
# read test data from csv files
851851
xspin = pd.read_csv(TEST_PATH / "extendedspin_test_data_repoint00047.csv")
852852
results_df = pd.read_csv(
853-
TEST_PATH / "validate_stat_culling_results_repoint00047.csv"
853+
TEST_PATH / "validate_stat_culling_results_repoint00047_v2.csv"
854854
)
855855
de_df = pd.read_csv(TEST_PATH / "de_test_data_repoint00047.csv")
856856
de_ds = xr.Dataset(
@@ -873,7 +873,14 @@ def test_validate_stat_cull():
873873
intervals, _, _ = build_energy_bins()
874874
# Get the energy ranges
875875
energy_ranges = get_binned_energy_ranges(intervals)
876+
877+
# Create a mask of flagged events to test that the stat cull algorithm
878+
# properly ignores these. The test data was created using this exact mask as well.
876879
mask = np.zeros((len(energy_ranges) - 1, len(spin_tbin_edges) - 1), dtype=bool)
880+
mask[0:2, 0:2] = (
881+
True # This will mark the first 2 energy bins and first 2 spin bins as flagged
882+
)
883+
# ignored in the statistics calculation and flagging.
877884
flags, con, it, std = flag_statistical_outliers(
878885
de_ds, spin_tbin_edges, energy_ranges, mask, 90
879886
)

imap_processing/ultra/l1b/extendedspin.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,5 +176,4 @@ def calculate_extendedspin(
176176
extendedspin_dict["energy_range_edges"] = np.array(energy_ranges)
177177

178178
extendedspin_dataset = create_dataset(extendedspin_dict, name, "l1b")
179-
180179
return extendedspin_dataset

imap_processing/ultra/l1b/ultra_l1b_culling.py

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -824,9 +824,17 @@ def flag_statistical_outliers(
824824
# Initialize all spin bins to have no outlier flag
825825
spin_bin_size = len(spin_tbin_edges) - 1
826826
n_energy_bins = len(energy_ranges) - 1
827-
# make a copy of the mask to avoid modifying the original mask passed in
828-
iter_mask = mask.copy()
827+
# make a copy of the current mask to avoid modifying the original mask passed in.
828+
# This contains flags from previous steps (e.g., after low voltage and high
829+
# energy culling) and will be updated iteratively to include the outlier flags as
830+
# well
831+
curr_mask = mask.copy()
832+
# Initialize quality_stats to keep track of which bins are flagged as outliers for
833+
# each energy bin
829834
quality_stats = np.zeros((n_energy_bins, spin_bin_size), dtype=bool)
835+
# Initialize a mask to keep track of spin bins that have been flagged across all
836+
# energy bins
837+
all_channel_mask = np.zeros(spin_bin_size, dtype=bool)
830838
# Initialize convergence array to keep track of poisson stats
831839
convergence = np.full(n_energy_bins, False)
832840
# Keep track of how many iterations we have done of flagging outliers and
@@ -838,13 +846,14 @@ def flag_statistical_outliers(
838846
de_dataset, energy_ranges, spin_tbin_edges, sensor_id=sensor_id
839847
) # shape (n_energy_bins, n_spin_bins)
840848
for e_idx in np.arange(n_energy_bins):
849+
good_mask = ~curr_mask[e_idx] # spin bins that are not currently flagged
841850
for it in range(n_iterations):
842-
# only consider bins that are currently unflagged for this energy bin
843-
counts = count_summary[e_idx, ~iter_mask[e_idx]]
851+
counts = count_summary[e_idx, good_mask]
844852
# Step 1. check if any energy bins have less than 3 spin bins with counts.
845853
# If so, flag all spins for that energy bin and skip to the next iteration
846854
if np.sum(counts > 0) < 3:
847855
quality_stats[e_idx] = True
856+
curr_mask[e_idx] = True
848857
convergence[e_idx] = True
849858
std_diff[e_idx] = -1
850859
break
@@ -853,12 +862,11 @@ def flag_statistical_outliers(
853862
std_diff[e_idx] = std_ratio
854863
# Step 3. Flag bins where the count is more than 3 standard deviations from
855864
# the mean.
856-
outlier_inds = np.where(~iter_mask[e_idx])[0][outlier_mask]
865+
outlier_inds = np.where(good_mask)[0][outlier_mask]
857866
# Set the quality flag to True for the outlier inds
858867
quality_stats[e_idx, outlier_inds] = True
859-
# Also update the iter_mask to exclude the outlier bins for the next
860-
# iteration
861-
iter_mask[e_idx, outlier_inds] = True
868+
all_channel_mask[outlier_inds] = True
869+
good_mask[outlier_inds] = False
862870
iterations[e_idx] = it + 1
863871
# Check for convergence: if the standard deviation difference from
864872
# poisson stats is below the threshold, then we can stop iterating for this
@@ -868,17 +876,13 @@ def flag_statistical_outliers(
868876
break
869877

870878
if combine_flags_across_energy_bins:
871-
# If a spin bin is flagged in any energy channel flag it in all energy channels
872-
# Use np.any to check if a spin bin is flagged in any energy channel,
873-
# then flag it in all energy channels
874-
combined_mask = np.any(quality_stats, axis=0) # (n_spin_bins,)
875-
quality_stats[:] = combined_mask # Broadcast to all energy bins
876-
879+
# If true, then use the all_channel_mask for every energy channel.
880+
quality_stats[:] = all_channel_mask
877881
# Recalculate convergence with the combined mask.
878882
for e_idx in range(n_energy_bins):
879883
if not convergence[e_idx]:
880884
# Select counts that have not been flagged in any channel.
881-
counts = count_summary[e_idx, ~combined_mask]
885+
counts = count_summary[e_idx, ~all_channel_mask]
882886
std_ratio, _ = get_poisson_stats(counts)
883887
if std_ratio < std_threshold:
884888
convergence[e_idx] = True
@@ -1064,7 +1068,7 @@ def get_valid_earth_angle_events(
10641068
A boolean array indicating which events have Earth angle greater than the
10651069
specified threshold.
10661070
"""
1067-
de_dps_velocity = de_dataset_subset["de_dps_velocity"].values
1071+
velocity_dps_sc = de_dataset_subset["velocity_dps_sc"].values
10681072
# Use the mean event time to compute the Earth unit vector since the spacecraft
10691073
# position doesn't change significantly over the course of the energy bin.
10701074
et = np.mean(de_dataset_subset["event_times"].values)
@@ -1082,10 +1086,10 @@ def get_valid_earth_angle_events(
10821086
distance = np.linalg.norm(position)
10831087
earth_unit_vector = position / distance
10841088
# Calculate the magnitude of the velocity vector for each event
1085-
particle_mag = np.linalg.norm(de_dps_velocity, axis=1)
1089+
particle_mag = np.linalg.norm(velocity_dps_sc, axis=1)
10861090
# Normalize and flip to get where each particle is looking.
10871091
unit_look_dirs = (
1088-
-de_dps_velocity / particle_mag[:, np.newaxis]
1092+
-velocity_dps_sc / particle_mag[:, np.newaxis]
10891093
) # shape (n_events, 3)
10901094
# Get cos(theta) between each particle look direction and Earth direction
10911095
cos_sep = np.dot(unit_look_dirs, earth_unit_vector) # shape (n_events,)
@@ -1217,7 +1221,11 @@ def get_binned_spins_edges(
12171221
)
12181222
# Append the last start time plus the spin period to account for low times
12191223
# that occur after the last spin start time
1220-
spin_tbin_edges = np.append(spin_tbin_edges, spin_tbin_edges[-1] + spin_periods[-1])
1224+
last_spin_idx = min(n_spin_bins * spin_bin_size - 1, len(spins) - 1)
1225+
spin_tbin_edges = np.append(
1226+
spin_tbin_edges, spin_start_times[last_spin_idx] + spin_periods[last_spin_idx]
1227+
)
1228+
return spin_tbin_edges
12211229
return spin_tbin_edges
12221230

12231231

0 commit comments

Comments
 (0)