Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion imap_processing/tests/external_test_data_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@
("status_test_data_repoint00047.csv", "ultra/data/l1/"),
("voltage_culling_results_repoint00047.csv", "ultra/data/l1/"),
("validate_high_energy_culling_results_repoint00047_v2.csv", "ultra/data/l1/"),
("validate_stat_culling_results_repoint00047.csv", "ultra/data/l1/"),
("validate_stat_culling_results_repoint00047_v2.csv", "ultra/data/l1/"),
("de_test_data_repoint00047.csv", "ultra/data/l1/"),
("FM45_UltraFM45Extra_TV_Tests_2024-01-22T0930_20240122T093008.CCSDS", "ultra/data/l0/"),
("ultra45_raw_sc_rawnrgevnt_19840122_00.csv", "ultra/data/l0/"),
Expand Down
19 changes: 13 additions & 6 deletions imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ def test_get_valid_earth_angle_events(mock_spkezr):
de_dps_velocity = np.random.random((12, 3))
de_dataset = xr.Dataset(
{
"de_dps_velocity": (("epoch", "component"), de_dps_velocity),
"velocity_dps_sc": (("epoch", "component"), de_dps_velocity),
"event_times": ("epoch", np.arange(12)),
}
)
Expand Down Expand Up @@ -608,7 +608,7 @@ def test_get_valid_events_per_energy_range_ultra45(mock_spkezr):

de_dataset = xr.Dataset(
{
"de_dps_velocity": (("epoch", "component"), de_dps_velocity),
"velocity_dps_sc": (("epoch", "component"), de_dps_velocity),
"event_times": ("epoch", np.full(len(energy), 798033671)),
"energy_spacecraft": ("epoch", energy),
"quality_outliers": ("epoch", np.full(len(energy), 0)),
Expand Down Expand Up @@ -815,10 +815,10 @@ def test_flag_statistical_outliers_invalid_events():
energy_range_edges,
mask,
)
# check that all flags are set because there are no valid events in any energy bin
# so it fails the stat outlier check by default.
# check that no flags are set because there were no valid events to calculate
# statistics on.
np.testing.assert_array_equal(
quality_flags, np.ones_like(quality_flags, dtype=bool)
quality_flags, np.zeros_like(quality_flags, dtype=bool)
)
# check that all energy bins are marked as converged (no valid events is not a
# failure case for convergence since we just can't calculate statistics.
Expand Down Expand Up @@ -850,7 +850,7 @@ def test_validate_stat_cull():
# read test data from csv files
xspin = pd.read_csv(TEST_PATH / "extendedspin_test_data_repoint00047.csv")
results_df = pd.read_csv(
TEST_PATH / "validate_stat_culling_results_repoint00047.csv"
TEST_PATH / "validate_stat_culling_results_repoint00047_v2.csv"
)
de_df = pd.read_csv(TEST_PATH / "de_test_data_repoint00047.csv")
de_ds = xr.Dataset(
Expand All @@ -873,7 +873,14 @@ def test_validate_stat_cull():
intervals, _, _ = build_energy_bins()
# Get the energy ranges
energy_ranges = get_binned_energy_ranges(intervals)

# Create a mask of flagged events to test that the stat cull algorithm
# properly ignores these. The test data was created using this exact mask as well.
mask = np.zeros((len(energy_ranges) - 1, len(spin_tbin_edges) - 1), dtype=bool)
mask[0:2, 0:2] = (
True # This will mark the first 2 energy bins and first 2 spin bins as flagged
)
# ignored in the statistics calculation and flagging.
flags, con, it, std = flag_statistical_outliers(
de_ds, spin_tbin_edges, energy_ranges, mask, 90
)
Expand Down
1 change: 0 additions & 1 deletion imap_processing/ultra/l1b/extendedspin.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,5 +176,4 @@ def calculate_extendedspin(
extendedspin_dict["energy_range_edges"] = np.array(energy_ranges)

extendedspin_dataset = create_dataset(extendedspin_dict, name, "l1b")

return extendedspin_dataset
46 changes: 27 additions & 19 deletions imap_processing/ultra/l1b/ultra_l1b_culling.py
Original file line number Diff line number Diff line change
Expand Up @@ -824,9 +824,17 @@ def flag_statistical_outliers(
# Initialize all spin bins to have no outlier flag
spin_bin_size = len(spin_tbin_edges) - 1
n_energy_bins = len(energy_ranges) - 1
# make a copy of the mask to avoid modifying the original mask passed in
iter_mask = mask.copy()
# make a copy of the current mask to avoid modifying the original mask passed in.
# This contains flags from previous steps (e.g., after low voltage and high
# energy culling) and will be updated iteratively to include the outlier flags as
# well
curr_mask = mask.copy()
# Initialize quality_stats to keep track of which bins are flagged as outliers for
# each energy bin
quality_stats = np.zeros((n_energy_bins, spin_bin_size), dtype=bool)
# Initialize a mask to keep track of spin bins that have been flagged across all
# energy bins
all_channel_mask = np.zeros(spin_bin_size, dtype=bool)
# Initialize convergence array to keep track of poisson stats
convergence = np.full(n_energy_bins, False)
# Keep track of how many iterations we have done of flagging outliers and
Expand All @@ -838,13 +846,14 @@ def flag_statistical_outliers(
de_dataset, energy_ranges, spin_tbin_edges, sensor_id=sensor_id
) # shape (n_energy_bins, n_spin_bins)
for e_idx in np.arange(n_energy_bins):
good_mask = ~curr_mask[e_idx] # spin bins that are not currently flagged
for it in range(n_iterations):
# only consider bins that are currently unflagged for this energy bin
counts = count_summary[e_idx, ~iter_mask[e_idx]]
counts = count_summary[e_idx, good_mask]
# Step 1. check if any energy bins have less than 3 spin bins with counts.
# If so, flag all spins for that energy bin and skip to the next iteration
if np.sum(counts > 0) < 3:
quality_stats[e_idx] = True
curr_mask[e_idx] = True
convergence[e_idx] = True
std_diff[e_idx] = -1
break
Expand All @@ -853,12 +862,11 @@ def flag_statistical_outliers(
std_diff[e_idx] = std_ratio
# Step 3. Flag bins where the count is more than 3 standard deviations from
# the mean.
outlier_inds = np.where(~iter_mask[e_idx])[0][outlier_mask]
outlier_inds = np.where(good_mask)[0][outlier_mask]
# Set the quality flag to True for the outlier inds
quality_stats[e_idx, outlier_inds] = True
# Also update the iter_mask to exclude the outlier bins for the next
# iteration
iter_mask[e_idx, outlier_inds] = True
all_channel_mask[outlier_inds] = True
good_mask[outlier_inds] = False
iterations[e_idx] = it + 1
# Check for convergence: if the standard deviation difference from
# poisson stats is below the threshold, then we can stop iterating for this
Expand All @@ -868,17 +876,13 @@ def flag_statistical_outliers(
break

if combine_flags_across_energy_bins:
# If a spin bin is flagged in any energy channel flag it in all energy channels
# Use np.any to check if a spin bin is flagged in any energy channel,
# then flag it in all energy channels
combined_mask = np.any(quality_stats, axis=0) # (n_spin_bins,)
quality_stats[:] = combined_mask # Broadcast to all energy bins

# If true, then use the all_channel_mask for every energy channel.
quality_stats[:] = all_channel_mask
# Recalculate convergence with the combined mask.
for e_idx in range(n_energy_bins):
if not convergence[e_idx]:
# Select counts that have not been flagged in any channel.
counts = count_summary[e_idx, ~combined_mask]
counts = count_summary[e_idx, ~all_channel_mask]
std_ratio, _ = get_poisson_stats(counts)
if std_ratio < std_threshold:
convergence[e_idx] = True
Expand Down Expand Up @@ -1064,7 +1068,7 @@ def get_valid_earth_angle_events(
A boolean array indicating which events have Earth angle greater than the
specified threshold.
"""
de_dps_velocity = de_dataset_subset["de_dps_velocity"].values
velocity_dps_sc = de_dataset_subset["velocity_dps_sc"].values
# Use the mean event time to compute the Earth unit vector since the spacecraft
# position doesn't change significantly over the course of the energy bin.
et = np.mean(de_dataset_subset["event_times"].values)
Expand All @@ -1082,10 +1086,10 @@ def get_valid_earth_angle_events(
distance = np.linalg.norm(position)
earth_unit_vector = position / distance
# Calculate the magnitude of the velocity vector for each event
particle_mag = np.linalg.norm(de_dps_velocity, axis=1)
particle_mag = np.linalg.norm(velocity_dps_sc, axis=1)
# Normalize and flip to get where each particle is looking.
unit_look_dirs = (
-de_dps_velocity / particle_mag[:, np.newaxis]
-velocity_dps_sc / particle_mag[:, np.newaxis]
) # shape (n_events, 3)
# Get cos(theta) between each particle look direction and Earth direction
cos_sep = np.dot(unit_look_dirs, earth_unit_vector) # shape (n_events,)
Expand Down Expand Up @@ -1217,7 +1221,11 @@ def get_binned_spins_edges(
)
# Append the last start time plus the spin period to account for low times
# that occur after the last spin start time
spin_tbin_edges = np.append(spin_tbin_edges, spin_tbin_edges[-1] + spin_periods[-1])
last_spin_idx = min(n_spin_bins * spin_bin_size - 1, len(spins) - 1)
spin_tbin_edges = np.append(
spin_tbin_edges, spin_start_times[last_spin_idx] + spin_periods[last_spin_idx]
)
return spin_tbin_edges
return spin_tbin_edges


Expand Down