diff --git a/imap_processing/tests/external_test_data_config.py b/imap_processing/tests/external_test_data_config.py index 98a71415e..79c13668b 100644 --- a/imap_processing/tests/external_test_data_config.py +++ b/imap_processing/tests/external_test_data_config.py @@ -152,7 +152,7 @@ ("extendedspin_test_data_repoint00047.csv", "ultra/data/l1/"), ("status_test_data_repoint00047.csv", "ultra/data/l1/"), ("voltage_culling_results_repoint00047.csv", "ultra/data/l1/"), - ("validate_high_energy_culling_results_repoint00047.csv", "ultra/data/l1/"), + ("validate_high_energy_culling_results_repoint00047_v2.csv", "ultra/data/l1/"), ("de_test_data_repoint00047.csv", "ultra/data/l1/"), ("FM45_UltraFM45Extra_TV_Tests_2024-01-22T0930_20240122T093008.CCSDS", "ultra/data/l0/"), ("ultra45_raw_sc_rawnrgevnt_19840122_00.csv", "ultra/data/l0/"), diff --git a/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py b/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py index 2277e489b..5757dc781 100644 --- a/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py +++ b/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py @@ -632,6 +632,10 @@ def test_get_valid_events_per_energy_range_ultra45(mock_spkezr): "imap_processing.ultra.l1b.ultra_l1b_culling.UltraConstants.HIGH_ENERGY_CULL_CHANNEL", 2, ) +@mock.patch( + "imap_processing.ultra.l1b.ultra_l1b_culling.UltraConstants.HIGH_ENERGY_COMBINED_SPIN_BIN_RADIUS", + None, +) def test_flag_high_energy(): """Tests flag_high_energy function.""" # 7-18 is the culling energy bin and shown in the mock.patch above @@ -695,11 +699,11 @@ def test_validate_high_energy_cull(): """Validate that high energy spins are correctly flagged""" # Mock thresholds to match the test data (I used fake ones to create more # complexity) - mock_thresholds = np.array([0.05, 1.5, 0.6, 119.2, 0.2, 0.25]) * 20 + mock_thresholds = np.array([0.05, 1.5, 0.6, 119.2, 0.2]) * 20 # read test data from csv files xspin = pd.read_csv(TEST_PATH / "extendedspin_test_data_repoint00047.csv") expected_qf = pd.read_csv( - TEST_PATH / "validate_high_energy_culling_results_repoint00047.csv" + TEST_PATH / "validate_high_energy_culling_results_repoint00047_v2.csv" ).to_numpy() de_df = pd.read_csv(TEST_PATH / "de_test_data_repoint00047.csv") de_ds = xr.Dataset( @@ -848,4 +852,13 @@ def test_get_energy_range_flags(): energy_ranges = get_binned_energy_ranges(intervals) flags = get_energy_range_flags(energy_ranges) - np.testing.assert_array_equal(flags, 2 ** np.arange(6)) + np.testing.assert_array_equal(flags, 2 ** np.arange(5)) + + +def test_get_binned_energy_ranges(): + """Tests get_binned_energy_ranges function.""" + intervals, _, _ = build_energy_bins() + energy_ranges = get_binned_energy_ranges(intervals) + + expected_energy_ranges = np.array([4.2, 9.4425, 21.2116, 47.2388, 105.202, 316.335]) + np.testing.assert_array_equal(energy_ranges, expected_energy_ranges) diff --git a/imap_processing/ultra/constants.py b/imap_processing/ultra/constants.py index bb6c52b79..1ce6a2c3b 100644 --- a/imap_processing/ultra/constants.py +++ b/imap_processing/ultra/constants.py @@ -191,19 +191,25 @@ class UltraConstants: # Number of energy bins to use in energy dependent culling N_CULL_EBINS = 8 # Bin to start culling at - BASE_CULL_EBIN = 4 + BASE_CULL_EBIN = 3 + # Maximum energy threshold in keV. When creating the energy ranges for culling, + # merge all energy bins above this threshold into one bin. + MAX_ENERGY_THRESHOLD = 100 # Angle threshold in radians for ULTRA 45 degree culling. - # This is only needed for ULTRA 45 since earth may be in the FOV. - EARTH_ANGLE_45_THRESHOLD = np.radians(15) + # This is only needed for ULTRA 45 since Earth may be in the FOV. + EARTH_ANGLE_45_THRESHOLD = np.radians(20) # An array of energy thresholds to use for culling. Each one corresponds to # the number of energy bins used. # n_bins=len(PSET_ENERGY_BIN_EDGES)[BASE_CULL_EBIN:] // N_CULL_EBINS # an error will be raised if this does not match n_bins - HIGH_ENERGY_CULL_THRESHOLDS = ( - np.array([2.0, 1.5, 0.6, 0.25, 0.25, 0.25]) * SPIN_BIN_SIZE - ) + HIGH_ENERGY_CULL_THRESHOLDS = np.array([2.0, 1.5, 0.6, 0.2, 0.2]) * SPIN_BIN_SIZE # Use the channel defined below to determine which spins are contaminated HIGH_ENERGY_CULL_CHANNEL = 4 + # For the high energy cull, we want to combine spin bins because an SEP event is + # expected to be over a longer time period. Low voltage and statistical culling + # will still be done on the original spin bins. The variable below defines the + # radius (in number of spin bins) to use when combining for the high energy cull. + HIGH_ENERGY_COMBINED_SPIN_BIN_RADIUS = 3 # Number of iterations to perform for statistical outlier culling. STAT_CULLING_N_ITER = 5 # Sigma threshold to use for statistical outlier culling. diff --git a/imap_processing/ultra/l1b/ultra_l1b_culling.py b/imap_processing/ultra/l1b/ultra_l1b_culling.py index a803de862..0d13b3c2b 100644 --- a/imap_processing/ultra/l1b/ultra_l1b_culling.py +++ b/imap_processing/ultra/l1b/ultra_l1b_culling.py @@ -7,6 +7,7 @@ import pandas as pd import spiceypy as sp import xarray as xr +from numpy.lib.stride_tricks import sliding_window_view from numpy.typing import NDArray from imap_processing.quality_flags import ( @@ -715,7 +716,12 @@ def flag_high_energy( # the counts per spin energy_thresholds = energy_thresholds[:, np.newaxis] # Shape (n_energy_bins, 1) cull_channel = UltraConstants.HIGH_ENERGY_CULL_CHANNEL - n_energy_bins = len(energy_thresholds) + n_energy_bins = len(energy_ranges) - 1 + if len(energy_thresholds) != n_energy_bins: + raise ValueError( + f"Length of energy_thresholds ({len(energy_thresholds)}) must match" + f" the number of energy bins ({n_energy_bins})." + ) if cull_channel >= n_energy_bins: raise ValueError( f"HIGH_ENERGY_CULL_CHANNEL ({cull_channel}) is out of bounds" @@ -728,7 +734,11 @@ def flag_high_energy( # Get valid events and counts at each spin bin for the # designated culling channel. de_counts = get_valid_de_count_summary( - de_dataset, energy_ranges, spin_tbin_edges, sensor_id + de_dataset, + energy_ranges, + spin_tbin_edges, + UltraConstants.HIGH_ENERGY_COMBINED_SPIN_BIN_RADIUS, + sensor_id, ) cull_channel_counts = de_counts[cull_channel] # flag spins where the counts in the cull channel exceed the threshold for that @@ -825,7 +835,7 @@ def flag_statistical_outliers( # keep track of the standard deviation difference from poisson stats per energy bin std_diff = np.zeros(n_energy_bins, dtype=float) count_summary = get_valid_de_count_summary( - de_dataset, energy_ranges, spin_tbin_edges, sensor_id + de_dataset, energy_ranges, spin_tbin_edges, sensor_id=sensor_id ) # shape (n_energy_bins, n_spin_bins) for e_idx in np.arange(n_energy_bins): for it in range(n_iterations): @@ -923,6 +933,7 @@ def get_valid_de_count_summary( de_dataset: xr.Dataset, energy_ranges: NDArray, spin_tbin_edges: NDArray, + combine_spin_bin_radius: int | None = None, sensor_id: int = 90, ) -> NDArray: """ @@ -936,6 +947,9 @@ def get_valid_de_count_summary( Array of energy range edges. spin_tbin_edges : numpy.ndarray Array of spin time bin edges. + combine_spin_bin_radius : int + If not None, average counts across this many spin bins x 2 to get a smoother + estimate of the counts per bin. sensor_id : int Sensor ID (e.g., 45 or 90). @@ -954,6 +968,17 @@ def get_valid_de_count_summary( de_dataset["de_event_met"].values[valid_events[i, :]], bins=spin_tbin_edges ) + if combine_spin_bin_radius is not None and combine_spin_bin_radius > 0: + # Pad array along the spin bin axis to ensure sliding_window_view returns + # an array of the correct shape. + counts_padded = np.pad( + counts, + ((0, 0), (combine_spin_bin_radius, combine_spin_bin_radius)), + mode="edge", + ) + window_size = combine_spin_bin_radius * 2 + 1 + windows = sliding_window_view(counts_padded, window_shape=window_size, axis=1) + counts = np.mean(windows, axis=-1) return counts @@ -1099,6 +1124,7 @@ def get_energy_range_flags(energy_ranges_edges: NDArray) -> NDArray: def get_binned_energy_ranges( energy_bin_edges: list[tuple[float, float]], + max_energy: int | None = UltraConstants.MAX_ENERGY_THRESHOLD, ) -> NDArray: """ Create L1C energy ranges by grouping energy bins. @@ -1107,6 +1133,8 @@ def get_binned_energy_ranges( ---------- energy_bin_edges : list[tuple[float, float]] List of (start, stop) tuples for each energy bin. + max_energy : int | None + Maximum energy to include in the energy ranges. If None, don't set a max. Returns ------- @@ -1128,6 +1156,27 @@ def get_binned_energy_ranges( energy_ranges = np.append( energy_starts, energy_bin_edges[last_group_end_ind - 1][1] ) + + if max_energy is not None: + # get the first index where the energy range exceeds the max energy + # exclude the last edge since it is the stop energy of the last range + max_reached_idx = np.where(energy_ranges[:-1] > max_energy)[0] + if np.any(energy_ranges[:-1] > max_energy): + max_reached_idx = max_reached_idx[0] + else: + # if no energy range exceeds the max energy, return the original energy + # ranges + return energy_ranges + # Merge all energy ranges above the max energy into a single range and set the + # stop + energy_ranges_lim = energy_ranges[ + : max_reached_idx + 2 + ].copy() # include the first edge above max energy and the last edge + # Set the last edge to be the max energy to make the last bin a "catch-all" for + # all energies above the max energy. + energy_ranges_lim[-1] = energy_ranges[-1] + energy_ranges = energy_ranges_lim + return energy_ranges