diff --git a/CITATION.cff b/CITATION.cff index 637c790..b22cc5d 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -37,8 +37,8 @@ authors: family-names: Vainio affiliation: University of Turku orcid: https://orcid.org/0000-0002-3298-2067 -version: 0.4.2 -date-released: 2025-12-17 +version: 0.4.3 +date-released: 2026-01-23 repository-code: https://github.com/serpentine-h2020/SEPpy license: BSD-3-Clause preferred-citation: diff --git a/seppy/__init__.py b/seppy/__init__.py index e2e1ede..3ce7fec 100644 --- a/seppy/__init__.py +++ b/seppy/__init__.py @@ -1,4 +1,7 @@ from .version import version as __version__ +from seppy.util import custom_warning # __all__ = [] # defines which functions, variables etc. will be loaded when running "from pyonset import *" + +custom_warning('Breaking changes in SEPpy v0.4.0: The metadata for SOHO/EPHIN, SOHO/ERNE, STEREO/SEPT, and Wind/3DP have changed! See https://github.com/serpentine-h2020/SEPpy/releases/tag/v0.4.0 for details. (You can ignore this if you do not invoke SEPpy manually.)') diff --git a/seppy/loader/bepi.py b/seppy/loader/bepi.py index 74c6b1e..8143095 100644 --- a/seppy/loader/bepi.py +++ b/seppy/loader/bepi.py @@ -158,7 +158,7 @@ def bepi_sixsp_l3_loader(startdate, enddate=None, resample=None, path=None, pos_ # optional resampling: if isinstance(resample, str): if len(df) > 0: - df = resample_df(df, resample, pos_timestamp=pos_timestamp) + df = resample_df(df, resample, pos_timestamp=pos_timestamp, cols_unc='auto', verbose=False) else: df = [] diff --git a/seppy/loader/juice.py b/seppy/loader/juice.py index c1ca78f..c27c8b1 100644 --- a/seppy/loader/juice.py +++ b/seppy/loader/juice.py @@ -142,7 +142,7 @@ def juice_radem_load(startdate, enddate, resample=None, path=None, pos_timestamp df['TIME_UTC'] = pd.to_datetime(df['TIME_UTC']) if resample: - df = resample_df(df, resample, pos_timestamp=pos_timestamp) + df = resample_df(df, resample, pos_timestamp=pos_timestamp, cols_unc=[], verbose=False) energies_dict, metadata_dict = juice_radem_load_metadata(filename=downloaded_files[0]) diff --git a/seppy/loader/psp.py b/seppy/loader/psp.py index 23af90c..cec3a96 100644 --- a/seppy/loader/psp.py +++ b/seppy/loader/psp.py @@ -44,152 +44,6 @@ def _get_cdf_vars(cdf): return var_list -# def _cdf2df_3d_psp(cdf, index_key, dtimeindex=True, ignore=None, include=None): -# """ -# Converts a cdf file to a pandas dataframe. -# Note that this only works for 1 dimensional data, other data such as -# distribution functions or pitch angles will not work properly. -# Parameters -# ---------- -# cdf : cdf -# Opened CDF file. -# index_key : str -# The CDF key to use as the index in the output DataFrame. -# dtimeindex : bool -# If ``True``, the DataFrame index is parsed as a datetime. -# Default is ``True``. -# ignore : list -# In case a CDF file has columns that are unused / not required, then -# the column names can be passed as a list into the function. -# include : str, list -# If only specific columns of a CDF file are desired, then the column -# names can be passed as a list into the function. Should not be used -# with ``ignore``. -# Returns -# ------- -# df : :class:`pandas.DataFrame` -# Data frame with read in data. -# """ -# if include is not None: -# if ignore is not None: -# raise ValueError('ignore and include are incompatible keywords') -# if isinstance(include, str): -# include = [include] -# if index_key not in include: -# include.append(index_key) - -# # Extract index values -# index_info = cdf.varinq(index_key) -# if index_info['Last_Rec'] == -1: -# warnings.warn(f"No records present in CDF file {cdf.cdf_info()['CDF'].name}") -# return_df = pd.DataFrame() -# else: -# index = cdf.varget(index_key) -# try: -# # If there are multiple indexes, take the first one -# # TODO: this is just plain wrong, there should be a way to get all -# # the indexes out -# index = index[...][:, 0] -# except IndexError: -# pass - -# if dtimeindex: -# index = cdflib.epochs.CDFepoch.breakdown(index, to_np=True) -# index_df = pd.DataFrame({'year': index[:, 0], -# 'month': index[:, 1], -# 'day': index[:, 2], -# 'hour': index[:, 3], -# 'minute': index[:, 4], -# 'second': index[:, 5], -# 'ms': index[:, 6], -# }) -# # Not all CDFs store pass milliseconds -# try: -# index_df['us'] = index[:, 7] -# index_df['ns'] = index[:, 8] -# except IndexError: -# pass -# index = pd.DatetimeIndex(pd.to_datetime(index_df), name='Time') -# data_dict = {} -# npoints = len(index) - -# var_list = _get_cdf_vars(cdf) -# keys = {} -# # Get mapping from each attr to sub-variables -# for cdf_key in var_list: -# if ignore: -# if cdf_key in ignore: -# continue -# elif include: -# if cdf_key not in include: -# continue -# if cdf_key == 'Epoch': -# keys[cdf_key] = 'Time' -# else: -# keys[cdf_key] = cdf_key -# # Remove index key, as we have already used it to create the index -# keys.pop(index_key) -# # Remove keys for data that doesn't have the right shape to load in CDF -# # Mapping of keys to variable data -# vars = {} -# for cdf_key in keys.copy(): -# try: -# vars[cdf_key] = cdf.varget(cdf_key) -# except ValueError: -# vars[cdf_key] = '' -# for cdf_key in keys: -# var = vars[cdf_key] -# if type(var) is np.ndarray: -# key_shape = var.shape -# if len(key_shape) == 0 or key_shape[0] != npoints: -# vars.pop(cdf_key) -# else: -# vars.pop(cdf_key) - -# # Loop through each key and put data into the dataframe -# for cdf_key in vars: -# df_key = keys[cdf_key] -# # Get fill value for this key -# # First catch string FILLVAL's -# if type(cdf.varattsget(cdf_key)['FILLVAL']) is str: -# fillval = cdf.varattsget(cdf_key)['FILLVAL'] -# else: -# try: -# fillval = float(cdf.varattsget(cdf_key)['FILLVAL']) -# except KeyError: -# fillval = np.nan - -# if isinstance(df_key, list): -# for i, subkey in enumerate(df_key): -# data = vars[cdf_key][...][:, i] -# data = _fillval_nan(data, fillval) -# data_dict[subkey] = data -# else: -# # If ndims is 1, we just have a single column of data -# # If ndims is 2, have multiple columns of data under same key -# # If ndims is 3, have multiple columns of data under same key, with 2 sub_keys (e.g., energy and pitch-angle) -# key_shape = vars[cdf_key].shape -# ndims = len(key_shape) -# if ndims == 1: -# data = vars[cdf_key][...] -# data = _fillval_nan(data, fillval) -# data_dict[df_key] = data -# elif ndims == 2: -# for i in range(key_shape[1]): -# data = vars[cdf_key][...][:, i] -# data = _fillval_nan(data, fillval) -# data_dict[f'{df_key}_{i}'] = data -# elif ndims == 3: -# for i in range(key_shape[2]): -# for j in range(key_shape[1]): -# data = vars[cdf_key][...][:, j, i] -# data = _fillval_nan(data, fillval) -# data_dict[f'{df_key}_E{i}_P{j}'] = data -# return_df = pd.DataFrame(index=index, data=data_dict) - -# return return_df - - def psp_isois_load(dataset, startdate, enddate, epilo_channel='F', epilo_threshold=None, path=None, resample=None, all_columns=False): """ Downloads CDF files via SunPy/Fido from CDAWeb for ISOIS onboard PSP @@ -375,7 +229,13 @@ def psp_isois_load(dataset, startdate, enddate, epilo_channel='F', epilo_thresho energies_dict = '' if isinstance(resample, str): - df = resample_df(df=df, resample=resample, pos_timestamp="center", origin="start") + if dataset.upper() in ['PSP_ISOIS-EPILO_L2-PE']: + cols_unc = [] + keywords_unc = [] + elif dataset.upper() in ['PSP_ISOIS-EPIHI_L2-HET-RATES60', 'PSP_ISOIS-EPIHI_L2-LET1-RATES60', 'PSP_ISOIS-EPIHI_L2-LET2-RATES60', 'PSP_ISOIS-EPILO_L2-IC']: + cols_unc = 'auto' + keywords_unc=['unc', 'err', 'sigma', '_DELTA_'] # 'PSP_ISOIS-EPILO_L2-IC' has 'H_Flux_ChanP_DELTA_Exx_Pxx' uncertainty columns + df = resample_df(df=df, resample=resample, pos_timestamp="center", origin="start", cols_unc=cols_unc, verbose=False, keywords_unc=keywords_unc) except (RuntimeError, IndexError): print(f'Unable to obtain "{dataset}" data!') @@ -619,7 +479,7 @@ def _read_cdf_psp(fname, index_key, ignore_vars=[]): # Only for selected index_key: index = cdf.varget(index_key) - # TODO: use to_astropy_time() instead here when we drop pandas in timeseries + # use to_astropy_time() instead here when we drop pandas in timeseries index = CDFepoch.to_datetime(index) # df = pd.DataFrame(index=pd.DatetimeIndex(name=index_key, data=index)) # units = {} diff --git a/seppy/loader/soho.py b/seppy/loader/soho.py index 63acb5f..e2d85e1 100644 --- a/seppy/loader/soho.py +++ b/seppy/loader/soho.py @@ -140,7 +140,7 @@ def soho_load(dataset, startdate, enddate, path=None, resample=None, pos_timesta if os.path.exists(f) and os.path.getsize(f) == 0: os.remove(f) if not os.path.exists(f): - downloaded_file = Fido.fetch(result[0][i], path=path, max_conn=max_conn) + _downloaded_file = Fido.fetch(result[0][i], path=path, max_conn=max_conn) # downloaded_files = Fido.fetch(result, path=path, max_conn=max_conn) # use Fido.fetch(result, path='/ThisIs/MyPath/to/Data/{file}') to use a specific local folder for saving data files # downloaded_files.sort() @@ -148,10 +148,10 @@ def soho_load(dataset, startdate, enddate, path=None, resample=None, pos_timesta df = data.to_dataframe() metadata = _get_metadata(dataset, downloaded_files[0]) - if dataset.upper() == 'SOHO_ERNE-HED_L2-1MIN' or dataset.upper() == 'SOHO_ERNE-LED_L3I-1MIN': - custom_warning(f'The format of "channels_dict_df_p" in the metadata for {dataset} has been changed providing the full width of energy channels for DE (instead of the half)!') - elif dataset.upper() == 'SOHO_ERNE-LED_L2-1MIN': - custom_warning(f'The format of the metadata for {dataset} has been changed. The previous metadata is now provided in meta["energy_labels"]!') + # if dataset.upper() == 'SOHO_ERNE-HED_L2-1MIN' or dataset.upper() == 'SOHO_ERNE-LED_L3I-1MIN': + # custom_warning(f'The format of "channels_dict_df_p" in the metadata for {dataset} has been changed providing the full width of energy channels for DE (instead of the half)!') + # elif dataset.upper() == 'SOHO_ERNE-LED_L2-1MIN': + # custom_warning(f'The format of the metadata for {dataset} has been changed. The previous metadata is now provided in meta["energy_labels"]!') # remove this (i.e. following lines) when sunpy's read_cdf is updated, # and FILLVAL will be replaced directly, see @@ -178,7 +178,13 @@ def soho_load(dataset, startdate, enddate, path=None, resample=None, pos_timesta df.index = df.index-pd.Timedelta('7.5s') if isinstance(resample, str): - df = resample_df(df, resample, pos_timestamp=pos_timestamp) + if dataset.upper() in ['SOHO_ERNE-HED_L2-1MIN', 'SOHO_ERNE-LED_L2-1MIN']: + cols_unc = [] + keywords_unc = [] + elif dataset.upper() in ['SOHO_COSTEP-EPHIN_L3I-1MIN']: + cols_unc = 'auto' + keywords_unc = ['_sys_', '_stat_'] + df = resample_df(df, resample, pos_timestamp=pos_timestamp, cols_unc=cols_unc, verbose=False, keywords_unc=keywords_unc) except (RuntimeError, IndexError): print(f'Unable to obtain "{dataset}" data!') downloaded_files = [] @@ -406,7 +412,7 @@ def soho_ephin_loader(startdate, enddate, resample=None, path=None, all_columns= cs_he25 = '25 - 53 MeV/n' if max(fmodes)==2: # # warnings.warn('Careful: EPHIN ring off!') - custom_warning('SOHO/EPHIN ring is off! This means high risk of contaminated measurements!') + custom_warning('SOHO/EPHIN ring is off. This means high risk of contaminated measurements!') # failure mode D since 4 Oct 2017: # dates[-1].date() is enddate, used to catch cases when enddate is a string @@ -426,7 +432,7 @@ def soho_ephin_loader(startdate, enddate, resample=None, path=None, all_columns= # optional resampling: if isinstance(resample, str): - df = resample_df(df, resample, pos_timestamp=pos_timestamp) + df = resample_df(df, resample, pos_timestamp=pos_timestamp, cols_unc=[], verbose=False) else: df = [] diff --git a/seppy/loader/stereo.py b/seppy/loader/stereo.py index 86d58e1..f5189e1 100644 --- a/seppy/loader/stereo.py +++ b/seppy/loader/stereo.py @@ -17,7 +17,7 @@ from sunpy.net import attrs as a from sunpy.timeseries import TimeSeries -from seppy.util import custom_notification, custom_warning, resample_df +from seppy.util import resample_df # custom_notification, custom_warning # omit Pandas' PerformanceWarning warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning) @@ -212,9 +212,9 @@ def stereo_sept_loader(startdate, enddate, spacecraft, species, viewing, resampl # optional resampling: if isinstance(resample, str): - df = resample_df(df, resample, pos_timestamp=pos_timestamp) + df = resample_df(df, resample, pos_timestamp=pos_timestamp, cols_unc='auto', verbose=False) - custom_warning('The format of "channels_dict_df_X" in the the metadata for STEREO/SEPT has been changed providing "mean_E" in MeV (instead of keV)! The metadata is also now given as a dictionary containing the dataframe "channels_dict_df_X".') + # custom_warning('The format of "channels_dict_df_X" in the the metadata for STEREO/SEPT has been changed providing "mean_E" in MeV (instead of keV)! The metadata is also now given as a dictionary containing the dataframe "channels_dict_df_X".') else: df = [] @@ -416,7 +416,7 @@ def stereo_load(instrument, startdate, enddate, spacecraft='ahead', mag_coord='R if os.path.exists(f) and os.path.getsize(f) == 0: os.remove(f) if not os.path.exists(f): - downloaded_file = Fido.fetch(result[0][i], path=path, max_conn=max_conn) + _downloaded_file = Fido.fetch(result[0][i], path=path, max_conn=max_conn) # downloaded_files = Fido.fetch(result, path=path, max_conn=max_conn) data = TimeSeries(downloaded_files, concatenate=True) @@ -464,7 +464,11 @@ def stereo_load(instrument, startdate, enddate, spacecraft='ahead', mag_coord='R df.index = df.index-pd.Timedelta('30s') if isinstance(resample, str): - df = resample_df(df, resample, pos_timestamp=pos_timestamp) + if instrument.upper() in ['LET', 'MAG', 'MAGB', 'MAGPLASMA']: + cols_unc = [] + elif instrument.upper() in ['HET']: + cols_unc = 'auto' + df = resample_df(df, resample, pos_timestamp=pos_timestamp, cols_unc=cols_unc, verbose=False) except (RuntimeError, IndexError): print(f'Unable to obtain "{dataset}" data for {startdate}-{enddate}!') downloaded_files = [] diff --git a/seppy/loader/wind.py b/seppy/loader/wind.py index e9b65fd..94931bb 100644 --- a/seppy/loader/wind.py +++ b/seppy/loader/wind.py @@ -14,7 +14,7 @@ from sunpy.net import Fido from sunpy.net import attrs as a -from seppy.util import custom_notification, custom_warning, resample_df +from seppy.util import resample_df # custom_notification, custom_warning logger = pooch.get_logger() @@ -79,7 +79,7 @@ def wind3dp_download_fido(dataset, startdate, enddate, path=None, max_conn=5): if os.path.exists(f) and os.path.getsize(f) == 0: os.remove(f) if not os.path.exists(f): - downloaded_file = Fido.fetch(result[0][i], path=path, max_conn=max_conn) + _downloaded_file = Fido.fetch(result[0][i], path=path, max_conn=max_conn) # downloaded_files = Fido.fetch(result, path=path, max_conn=max_conn) # downloaded_files.sort() except (RuntimeError, IndexError): @@ -218,7 +218,7 @@ def _wind3dp_load(files, resample="1min", threshold=None): df = pd.concat([df2, df], axis=1) if isinstance(resample, str): - df = resample_df(df=df, resample=resample, pos_timestamp="center", origin="start") + df = resample_df(df=df, resample=resample, pos_timestamp="center", origin="start", cols_unc=[]) return df # except: @@ -289,7 +289,7 @@ def wind3dp_load(dataset, startdate, enddate, resample="1min", multi_index=True, for col in ['mean_E', 'delta_e', 'DE', 'lower_E', 'upper_E']: energies[col] = energies[col]/1e6 # convert energies to MeV - custom_warning('Wind/3DP: The energy values in the metadata are now provided as MeV (previously eV)! Also DE is now 2*delta_e (previously just delta_e). Please adapt your scripts accordingly!') + # custom_warning('Wind/3DP: The energy values in the metadata are now provided as MeV (previously eV)! Also DE is now 2*delta_e (previously just delta_e). Please adapt your scripts accordingly!') meta = {'channels_dict_df': energies, 'ENERGY_UNITS': metacdf.varattsget('ENERGY')['UNITS'], @@ -301,7 +301,7 @@ def wind3dp_load(dataset, startdate, enddate, resample="1min", multi_index=True, try: meta['APPROX_ENERGY_LABELS'] = metacdf.varget('APPROX_ENERGY_LABELS') meta['FLUX_LABELS'] = metacdf.varget('FLUX_ENERGY_LABL') - except: + except ValueError: pass # create multi-index data frame of flux @@ -362,12 +362,12 @@ def _read_cdf_wind3dp(fname, ignore_vars=[]): cdf = cdflib.CDF(str(fname)) # Extract the time varying variables cdf_info = cdf.cdf_info() - meta = cdf.globalattsget() + # meta = cdf.globalattsget() if hasattr(cdflib, "__version__") and Version(cdflib.__version__) >= Version("1.0.0"): all_var_keys = cdf_info.rVariables + cdf_info.zVariables else: all_var_keys = cdf_info['rVariables'] + cdf_info['zVariables'] - var_attrs = {key: cdf.varattsget(key) for key in all_var_keys} + # var_attrs = {key: cdf.varattsget(key) for key in all_var_keys} # Get keys that depend on time # var_keys = [var for var in var_attrs if 'DEPEND_0' in var_attrs[var] and var_attrs[var]['DEPEND_0'] is not None] @@ -379,7 +379,7 @@ def _read_cdf_wind3dp(fname, ignore_vars=[]): # Manually define time index key for Wind/3DP cdf files, as they don't follow the standard time_index_keys = [var_keys.pop(var_keys.index('Epoch'))] - all_ts = [] + # all_ts = [] # For each time index, construct a GenericTimeSeries for index_key in time_index_keys: try: @@ -387,7 +387,7 @@ def _read_cdf_wind3dp(fname, ignore_vars=[]): except ValueError: # Empty index for cdflib >= 0.3.20 continue - # TODO: use to_astropy_time() instead here when we drop pandas in timeseries + # use to_astropy_time() instead here when we drop pandas in timeseries index = CDFepoch.to_datetime(index) df = pd.DataFrame(index=pd.DatetimeIndex(name=index_key, data=index)) # units = {} @@ -397,7 +397,7 @@ def _read_cdf_wind3dp(fname, ignore_vars=[]): if var_key in ignore_vars: continue # leave for-loop, skipping var_key - attrs = var_attrs[var_key] + # attrs = var_attrs[var_key] # Skip the following check for Wind/3DP cdf files, as they don't follow the standard # # If this variable doesn't depend on this index, continue # if attrs['DEPEND_0'] != index_key: diff --git a/seppy/tests/baseline/test_onset_Bepi_SIXS_L2_offline.png b/seppy/tests/baseline/test_onset_Bepi_SIXS_L2_offline.png index 9a88df0..6bfa3ae 100644 Binary files a/seppy/tests/baseline/test_onset_Bepi_SIXS_L2_offline.png and b/seppy/tests/baseline/test_onset_Bepi_SIXS_L2_offline.png differ diff --git a/seppy/tests/baseline/test_onset_Bepi_SIXS_L3_online_0-electrons-channels0-5min.png b/seppy/tests/baseline/test_onset_Bepi_SIXS_L3_online_0-electrons-channels0-5min.png index 3ab4c8f..76e0660 100644 Binary files a/seppy/tests/baseline/test_onset_Bepi_SIXS_L3_online_0-electrons-channels0-5min.png and b/seppy/tests/baseline/test_onset_Bepi_SIXS_L3_online_0-electrons-channels0-5min.png differ diff --git a/seppy/tests/baseline/test_onset_Bepi_SIXS_L3_online_1-electrons-2-None.png b/seppy/tests/baseline/test_onset_Bepi_SIXS_L3_online_1-electrons-2-None.png index 0d77165..9ba7956 100644 Binary files a/seppy/tests/baseline/test_onset_Bepi_SIXS_L3_online_1-electrons-2-None.png and b/seppy/tests/baseline/test_onset_Bepi_SIXS_L3_online_1-electrons-2-None.png differ diff --git a/seppy/tests/baseline/test_onset_Bepi_SIXS_L3_online_2-protons-channels1-10min.png b/seppy/tests/baseline/test_onset_Bepi_SIXS_L3_online_2-protons-channels1-10min.png index 6edc7e3..8410ea4 100644 Binary files a/seppy/tests/baseline/test_onset_Bepi_SIXS_L3_online_2-protons-channels1-10min.png and b/seppy/tests/baseline/test_onset_Bepi_SIXS_L3_online_2-protons-channels1-10min.png differ diff --git a/seppy/tests/baseline/test_onset_spectrum_tsa_PSP_ISOIS_EPIHI_online.png b/seppy/tests/baseline/test_onset_spectrum_tsa_PSP_ISOIS_EPIHI_online.png index 4ea230e..2b4b1fe 100644 Binary files a/seppy/tests/baseline/test_onset_spectrum_tsa_PSP_ISOIS_EPIHI_online.png and b/seppy/tests/baseline/test_onset_spectrum_tsa_PSP_ISOIS_EPIHI_online.png differ diff --git a/seppy/tests/baseline/test_onset_spectrum_tsa_PSP_ISOIS_EPILO_e_online.png b/seppy/tests/baseline/test_onset_spectrum_tsa_PSP_ISOIS_EPILO_e_online.png index 71d7572..e397fcf 100644 Binary files a/seppy/tests/baseline/test_onset_spectrum_tsa_PSP_ISOIS_EPILO_e_online.png and b/seppy/tests/baseline/test_onset_spectrum_tsa_PSP_ISOIS_EPILO_e_online.png differ diff --git a/seppy/tests/baseline/test_onset_spectrum_tsa_SOHO_EPHIN_online.png b/seppy/tests/baseline/test_onset_spectrum_tsa_SOHO_EPHIN_online.png index 888fc01..7fe6592 100644 Binary files a/seppy/tests/baseline/test_onset_spectrum_tsa_SOHO_EPHIN_online.png and b/seppy/tests/baseline/test_onset_spectrum_tsa_SOHO_EPHIN_online.png differ diff --git a/seppy/tests/baseline/test_onset_spectrum_tsa_SOHO_ERNE_online.png b/seppy/tests/baseline/test_onset_spectrum_tsa_SOHO_ERNE_online.png index 01a1880..e743364 100644 Binary files a/seppy/tests/baseline/test_onset_spectrum_tsa_SOHO_ERNE_online.png and b/seppy/tests/baseline/test_onset_spectrum_tsa_SOHO_ERNE_online.png differ diff --git a/seppy/tests/baseline/test_onset_spectrum_tsa_SOLO_EPT_online.png b/seppy/tests/baseline/test_onset_spectrum_tsa_SOLO_EPT_online.png index 6843af8..c3b9808 100644 Binary files a/seppy/tests/baseline/test_onset_spectrum_tsa_SOLO_EPT_online.png and b/seppy/tests/baseline/test_onset_spectrum_tsa_SOLO_EPT_online.png differ diff --git a/seppy/tests/baseline/test_onset_spectrum_tsa_SOLO_HET_online.png b/seppy/tests/baseline/test_onset_spectrum_tsa_SOLO_HET_online.png index 26fc30b..1694f18 100644 Binary files a/seppy/tests/baseline/test_onset_spectrum_tsa_SOLO_HET_online.png and b/seppy/tests/baseline/test_onset_spectrum_tsa_SOLO_HET_online.png differ diff --git a/seppy/tests/baseline/test_onset_spectrum_tsa_SOLO_STEP_ions_new_data_online.png b/seppy/tests/baseline/test_onset_spectrum_tsa_SOLO_STEP_ions_new_data_online.png index 68a8f07..0c62ea7 100644 Binary files a/seppy/tests/baseline/test_onset_spectrum_tsa_SOLO_STEP_ions_new_data_online.png and b/seppy/tests/baseline/test_onset_spectrum_tsa_SOLO_STEP_ions_new_data_online.png differ diff --git a/seppy/tests/baseline/test_onset_spectrum_tsa_SOLO_STEP_ions_old_data_online.png b/seppy/tests/baseline/test_onset_spectrum_tsa_SOLO_STEP_ions_old_data_online.png index 3ef6617..64d86de 100644 Binary files a/seppy/tests/baseline/test_onset_spectrum_tsa_SOLO_STEP_ions_old_data_online.png and b/seppy/tests/baseline/test_onset_spectrum_tsa_SOLO_STEP_ions_old_data_online.png differ diff --git a/seppy/tests/baseline/test_onset_spectrum_tsa_STEREOA_SEPT_e_online.png b/seppy/tests/baseline/test_onset_spectrum_tsa_STEREOA_SEPT_e_online.png index 2c53201..cc342fa 100644 Binary files a/seppy/tests/baseline/test_onset_spectrum_tsa_STEREOA_SEPT_e_online.png and b/seppy/tests/baseline/test_onset_spectrum_tsa_STEREOA_SEPT_e_online.png differ diff --git a/seppy/tests/baseline/test_onset_spectrum_tsa_STEREOA_SEPT_p_online.png b/seppy/tests/baseline/test_onset_spectrum_tsa_STEREOA_SEPT_p_online.png index 21feac5..964f6af 100644 Binary files a/seppy/tests/baseline/test_onset_spectrum_tsa_STEREOA_SEPT_p_online.png and b/seppy/tests/baseline/test_onset_spectrum_tsa_STEREOA_SEPT_p_online.png differ diff --git a/seppy/tests/baseline/test_onset_spectrum_tsa_STEREOB_HET_e_online.png b/seppy/tests/baseline/test_onset_spectrum_tsa_STEREOB_HET_e_online.png index 1bc5230..42c8b99 100644 Binary files a/seppy/tests/baseline/test_onset_spectrum_tsa_STEREOB_HET_e_online.png and b/seppy/tests/baseline/test_onset_spectrum_tsa_STEREOB_HET_e_online.png differ diff --git a/seppy/tests/baseline/test_onset_spectrum_tsa_STEREOB_HET_p_online.png b/seppy/tests/baseline/test_onset_spectrum_tsa_STEREOB_HET_p_online.png index 63432a9..d4ee8af 100644 Binary files a/seppy/tests/baseline/test_onset_spectrum_tsa_STEREOB_HET_p_online.png and b/seppy/tests/baseline/test_onset_spectrum_tsa_STEREOB_HET_p_online.png differ diff --git a/seppy/tests/baseline/test_onset_spectrum_tsa_Wind_3DP_e_online.png b/seppy/tests/baseline/test_onset_spectrum_tsa_Wind_3DP_e_online.png index 665fd20..552ef62 100644 Binary files a/seppy/tests/baseline/test_onset_spectrum_tsa_Wind_3DP_e_online.png and b/seppy/tests/baseline/test_onset_spectrum_tsa_Wind_3DP_e_online.png differ diff --git a/seppy/tests/baseline/test_onset_spectrum_tsa_Wind_3DP_p_online.png b/seppy/tests/baseline/test_onset_spectrum_tsa_Wind_3DP_p_online.png index ee692d4..3359e3b 100644 Binary files a/seppy/tests/baseline/test_onset_spectrum_tsa_Wind_3DP_p_online.png and b/seppy/tests/baseline/test_onset_spectrum_tsa_Wind_3DP_p_online.png differ diff --git a/seppy/tests/baseline/test_onset_tsa_SOHO_ERNE_offline.png b/seppy/tests/baseline/test_onset_tsa_SOHO_ERNE_offline.png index 7e0a1b4..dbc89eb 100644 Binary files a/seppy/tests/baseline/test_onset_tsa_SOHO_ERNE_offline.png and b/seppy/tests/baseline/test_onset_tsa_SOHO_ERNE_offline.png differ diff --git a/seppy/tests/test_loader.py b/seppy/tests/test_loader.py index 1a4c555..bc2f9e9 100644 --- a/seppy/tests/test_loader.py +++ b/seppy/tests/test_loader.py @@ -51,7 +51,7 @@ def test_juice_radem_load_without_resample(): assert metadata['PROTONS']['FILLVAL'] == 4294967295 -def test_juice_radem_load_wit_resample(): +def test_juice_radem_load_with_resample(): df, energies, metadata = juice_radem_load(startdate=dt.datetime(2025, 1, 1), enddate=dt.datetime(2025, 1, 1), resample='1h', path=None, pos_timestamp="start") assert "TIME_OBT" not in df.columns assert pd.api.types.is_datetime64_any_dtype(df["TIME_UTC"]) @@ -60,6 +60,17 @@ def test_juice_radem_load_wit_resample(): assert df['PROTONS_5'].sum() == pytest.approx(np.float64(0.8500000000000001)) assert energies['LABEL_PROTONS'][0] == 'P-Stack_Bin_1' assert metadata['PROTONS']['FILLVAL'] == 4294967295 + assert df.keys().to_list() == ['CONFIGURATION_ID', 'CUSTOM_0', 'CUSTOM_1', 'CUSTOM_2', 'CUSTOM_3', + 'CUSTOM_4', 'CUSTOM_5', 'CUSTOM_6', 'CUSTOM_7', 'CUSTOM_8', 'CUSTOM_9', + 'CUSTOM_10', 'CUSTOM_11', 'DD_0', 'DD_1', 'DD_2', 'DD_3', 'DD_4', + 'DD_5', 'DD_6', 'DD_7', 'DD_8', 'DD_9', 'DD_10', 'DD_11', 'DD_12', + 'DD_13', 'DD_14', 'DD_15', 'DD_16', 'DD_17', 'DD_18', 'DD_19', 'DD_20', + 'DD_21', 'DD_22', 'DD_23', 'DD_24', 'DD_25', 'DD_26', 'DD_27', 'DD_28', + 'DD_29', 'DD_30', 'ELECTRONS_0', 'ELECTRONS_1', 'ELECTRONS_2', + 'ELECTRONS_3', 'ELECTRONS_4', 'ELECTRONS_5', 'ELECTRONS_6', + 'ELECTRONS_7', 'HEAVY_IONS_0', 'HEAVY_IONS_1', 'INTEGRATION_TIME', + 'PROTONS_0', 'PROTONS_1', 'PROTONS_2', 'PROTONS_3', 'PROTONS_4', + 'PROTONS_5', 'PROTONS_6', 'PROTONS_7', 'TIME_UTC'] def test_psp_load_online(): @@ -118,7 +129,7 @@ def test_psp_load_online(): # assert np.sum(np.isnan(df['B_H_Uncertainty_14'])) == 48 -def test_soho_ephin_load_online(): +def test_soho_ephin_l2_load_online(): df, meta = soho_load(dataset='SOHO_COSTEP-EPHIN_L2-1MIN', startdate="2021/04/16", enddate="2021/04/16", path=None, resample="2min", pos_timestamp='center') assert isinstance(df, pd.DataFrame) @@ -128,7 +139,7 @@ def test_soho_ephin_load_online(): assert np.sum(np.isnan(df['E1300'])) == 219 -def test_soho_ephin_load_offline(): +def test_soho_ephin_l2_load_offline(): fullpath = get_pkg_data_filename('data/test/epi21106.rl2', package='seppy') path = Path(fullpath).parent.as_posix() df, meta = soho_load(dataset='SOHO_COSTEP-EPHIN_L2-1MIN', startdate="2021/04/16", enddate="2021/04/16", @@ -140,7 +151,18 @@ def test_soho_ephin_load_offline(): assert np.sum(np.isnan(df['E1300'])) == 219 -def test_soho_erne_load_online(): +# deactivating testing of SOHO EPHIN L3I loading bc. it increases the total test duration by a factor of 3.5 (JG 2026/01/21) +# def test_soho_ephin_l3i_load_online(): +# df, meta = soho_load(dataset='SOHO_COSTEP-EPHIN_L3I-1MIN', startdate="2021/04/16", enddate="2021/04/16", +# path=None, resample="2min", pos_timestamp='center') +# assert isinstance(df, pd.DataFrame) +# assert df.shape == (262766, 26) +# assert meta['channels_dict_df_p']['ch_strings'].iloc[1] == '7.8 - 25 MeV' +# # Check that fillvals are replaced by NaN +# assert np.sum(np.isnan(df['P_int_0'])) == 26999 + + +def test_soho_erne_hed_load_online(): df, meta = soho_load(dataset='SOHO_ERNE-HED_L2-1MIN', startdate="2021/04/16", enddate="2021/04/17", path=None, resample="1min", pos_timestamp='center') assert isinstance(df, pd.DataFrame) @@ -149,6 +171,15 @@ def test_soho_erne_load_online(): assert df['PHC_9'].sum() == 1295.0 +def test_soho_erne_led_load_online(): + df, meta = soho_load(dataset='SOHO_ERNE-LED_L2-1MIN', startdate="2002/04/16", enddate="2002/04/17", + path=None, resample="1min", pos_timestamp='center') + assert isinstance(df, pd.DataFrame) + assert df.shape == (1438, 41) + assert meta['channels_dict_df_p']['ch_strings'].iloc[9] == '10 - 13 MeV' + assert df['PLC_9'].sum() == 155.0 + + def test_solo_mag_load_online(): df = mag_load("2021/07/12", "2021/07/13", level='l2', data_type='normal-1-minute', frame='rtn', path=None) assert isinstance(df, pd.DataFrame) diff --git a/seppy/tools/__init__.py b/seppy/tools/__init__.py index cdcd3ed..53a576f 100644 --- a/seppy/tools/__init__.py +++ b/seppy/tools/__init__.py @@ -18,7 +18,7 @@ from seppy.loader.soho import calc_av_en_flux_ERNE, soho_load from seppy.loader.stereo import calc_av_en_flux_SEPT, calc_av_en_flux_ST_HET, stereo_load from seppy.loader.wind import wind3dp_load -from seppy.util import bepi_sixs_load, calc_av_en_flux_sixs, custom_notification, custom_warning, flux2series, resample_df, k_parameter, k_legacy +from seppy.util import bepi_sixs_load, calc_av_en_flux_sixs, custom_notification, custom_warning, _flux2series, resample_df, k_parameter, k_legacy from solo_epd_loader import combine_channels as solo_epd_combine_channels from solo_epd_loader import epd_load @@ -1535,7 +1535,7 @@ def find_onset(self, viewing, bg_start=None, bg_length=None, background_range=No if resample_period is not None: - df_averaged = resample_df(df=df_flux, resample=resample_period) + df_averaged = resample_df(df=df_flux, resample=resample_period, cols_unc=[]) # TODO: this ignores all uncertainty columns, but they are not used anyway else: @@ -1777,7 +1777,7 @@ def combine_grids_and_ybins(grid, grid1, y_arr, y_arr1): # Resample only if requested if resample is not None: - particle_data = resample_df(df=particle_data, resample=resample) + particle_data = resample_df(df=particle_data, resample=resample, cols_unc=[]) # TODO: this ignores all uncertainty columns, but they are not used anyway if xlim is None: df = particle_data[:] @@ -1790,7 +1790,12 @@ def combine_grids_and_ybins(grid, grid1, y_arr, y_arr1): df = particle_data.loc[(particle_data.index >= (t_start-td)) & (particle_data.index <= (t_end+td))] # In practice this seeks the date on which the highest flux is observed - date_of_event = df.iloc[np.argmax(df[df.columns[0]])].name.date() + # Addendum JG 2026-01-23: returns the first date of the data if all data is NaN + if not df[df.columns[0]].isna().all(): + # Since pandas 3.0.0, this raises an ValueError if df consists only of NaNs. Older pandas versions would return -1 (!) in that case + date_of_event = df.iloc[np.argmax(df[df.columns[0]])].name.date() + else: + date_of_event = df.iloc[0].name.date() # Assert time and channel bins time = df.index @@ -1925,7 +1930,7 @@ def combine_grids_and_ybins(grid, grid1, y_arr, y_arr1): # Resample only if requested if resample is not None: - particle_data1 = resample_df(df=particle_data1, resample=resample) + particle_data1 = resample_df(df=particle_data1, resample=resample, cols_unc=[]) # TODO: this ignores all uncertainty columns, but they are not used anyway if xlim is None: df1 = particle_data1[:] @@ -2264,8 +2269,8 @@ def tsa_plot(self, view, selection=None, xlim=None, resample=None): for i, channel in enumerate(selected_channels): # construct series and its normalized counterpart - series = flux2series(dataframe[channel], dataframe.index, resample) - series_normalized = flux2series(series.values/np.nanmax(series.values), series.index, resample) + series = _flux2series(dataframe[channel], dataframe.index, resample) # TODO: this uses "cols_unc=[]" for resampling, which ignores all uncertainty columns! But they are not used here anyway for now. + series_normalized = _flux2series(series.values/np.nanmax(series.values), series.index) # deactivate resampling here be. series is already resampled in the line above # store all series to arrays for later referencing series_natural.append(series) diff --git a/seppy/tools/widgets.py b/seppy/tools/widgets.py index 4be480c..821fdf1 100644 --- a/seppy/tools/widgets.py +++ b/seppy/tools/widgets.py @@ -61,7 +61,7 @@ "None": None, "STEREO-A": ("ahead", "STEREO-A"), "STEREO-B": ("behind", "STEREO-B"), - # "WIND (Coming soon!)": ("wind", "WIND") # TODO: re-add when supported! + # "WIND (Coming soon!)": ("wind", "WIND") } spacecraft_drop = widgets.Dropdown(options=list_of_sc, diff --git a/seppy/tools/widgets_dynspec.py b/seppy/tools/widgets_dynspec.py index 5efc644..f044527 100644 --- a/seppy/tools/widgets_dynspec.py +++ b/seppy/tools/widgets_dynspec.py @@ -79,7 +79,7 @@ "None": None, "STEREO-A": ("ahead", "STEREO-A"), "STEREO-B": ("behind", "STEREO-B"), - # "WIND (Coming soon!)": ("wind", "WIND") # TODO: re-add when supported! + # "WIND (Coming soon!)": ("wind", "WIND") } spacecraft_drop = widgets.Dropdown(options=list_of_sc, diff --git a/seppy/tools/widgets_onset.py b/seppy/tools/widgets_onset.py index 09abceb..28f2d1a 100644 --- a/seppy/tools/widgets_onset.py +++ b/seppy/tools/widgets_onset.py @@ -79,7 +79,7 @@ "None": None, "STEREO-A": ("ahead", "STEREO-A"), "STEREO-B": ("behind", "STEREO-B"), - # "WIND (Coming soon!)": ("wind", "WIND") # TODO: re-add when supported! + # "WIND (Coming soon!)": ("wind", "WIND") } spacecraft_drop = widgets.Dropdown(options=list_of_sc, diff --git a/seppy/tools/widgets_tsa.py b/seppy/tools/widgets_tsa.py index 2faa72e..58ce992 100644 --- a/seppy/tools/widgets_tsa.py +++ b/seppy/tools/widgets_tsa.py @@ -79,7 +79,7 @@ "None": None, "STEREO-A": ("ahead", "STEREO-A"), "STEREO-B": ("behind", "STEREO-B"), - # "WIND (Coming soon!)": ("wind", "WIND") # TODO: re-add when supported! + # "WIND (Coming soon!)": ("wind", "WIND") } spacecraft_drop = widgets.Dropdown(options=list_of_sc, diff --git a/seppy/util/__init__.py b/seppy/util/__init__.py index 3f461ea..07c329c 100644 --- a/seppy/util/__init__.py +++ b/seppy/util/__init__.py @@ -136,7 +136,7 @@ def sqrt_sum_squares(series): Sqrt of sum of squares divided by number of samples """ - return np.sqrt(np.nansum(series**2)) / len(series) + return np.sqrt(np.nansum(series**2)) / series.count() def reduce_list_generic(original_list, placeholder="xx", seperator="_"): @@ -181,7 +181,7 @@ def reduce_list_generic(original_list, placeholder="xx", seperator="_"): return sorted(list(patterns)) -def resample_df(df, resample, pos_timestamp="center", origin="start", cols_unc='auto'): +def resample_df(df, resample, pos_timestamp="center", origin="start", cols_unc='auto', keywords_unc=['unc', 'err', 'sigma'], verbose=True): """ Resamples a Pandas Dataframe or Series to a new frequency. Note that this is just a simple wrapper around the pandas resample function that is @@ -203,16 +203,25 @@ def resample_df(df, resample, pos_timestamp="center", origin="start", cols_unc=' input dataframe/series ('start'), or at the start of the day ('start_day') cols_unc : list, default 'auto' - List of columns in the dataframe (or name of the series) that - contain uncertainties. These columns will be resampled using a - custom function (sqrt of the sum of squares divided by number of - samples in the bin) instead of just the arithmetic mean. It an empty - list is provided (i.e. []), all columns will bre resampled using the - arithmetic mean. If set to 'auto' (default), the function will try - to automatically detect columns with uncertainties based on their - names (looking for 'uncertainty', 'err', or 'sigma' in the column - name). Note that this automatic detection only works for single- - level column DataFrames and Series. + List of specific column names in the dataframe (or name of the + series) that contain uncertainties. These columns will be resampled + using a custom function (sqrt of the sum of squares divided by + number of samples in the bin) instead of just the arithmetic mean. + If an empty list is provided (i.e. []), all columns will be + resampled using the arithmetic mean. If set to 'auto' (default), the + function will try to automatically detect columns with uncertainties + based on their names (looking for the keywords provided in + keywords_unc, by default 'unc', 'err' or 'sigma'). Note that this + automatic detection only works for single-level column DataFrames + and Series. + keywords_unc : list, default ['unc', 'err', 'sigma'] + List of keywords to use for automatic detection of uncertainty. All + columns with these keywords in their name will be treated as + uncertainty columns when cols_unc is set to 'auto'. + verbose : bool, default True + If True, will print additional debug information, e.g., about + automatically detected uncertainty columns. Deactivate at own risk + and only if you know what you are doing! Returns ------- @@ -235,22 +244,25 @@ def resample_df(df, resample, pos_timestamp="center", origin="start", cols_unc=' if type(cols_unc) is str and cols_unc == 'auto': if isinstance(df, pd.DataFrame): if type(df.columns) is not pd.core.indexes.multi.MultiIndex: - cols_unc = [col for col in df.columns if 'uncertainty' in col.lower() or 'err' in col.lower() or 'sigma' in col.lower()] + # cols_unc = [col for col in df.columns if 'uncertainty' in col.lower() or 'err' in col.lower() or 'sigma' in col.lower()] + cols_unc = [col for col in df.columns if any(keyword.lower() in col.lower() for keyword in keywords_unc)] elif type(df.columns) is pd.core.indexes.multi.MultiIndex: cols_unc = [] custom_warning("\nResampling of MultiIndex DataFrames with uncertainty columns not implemented yet! Proceeding without uncertainty handling.\n") elif isinstance(df, pd.Series): try: - if 'unc' in df.name.lower() or 'error' in df.name.lower(): + # if 'unc' in df.name.lower() or 'error' in df.name.lower(): + if any(keyword.lower() in df.name.lower() for keyword in keywords_unc): cols_unc = [df.name] else: cols_unc = [] except AttributeError: cols_unc = [] if len(cols_unc) > 0 and cols_unc != 'auto': - custom_notification(f"Automatically detected columns with uncertainties: {reduce_list_generic(cols_unc)}. Please report this behaviour if you think it is wrong!") + if verbose: + custom_notification(f"Automatically detected columns with uncertainties: {reduce_list_generic(cols_unc)}. Please report this behaviour if you think it is wrong!") else: - custom_warning("\nNo columns with uncertainties automatically detected! You might need to provide them manually via the 'cols_unc' parameter. Please report this behaviour if you think it should work automatically.\n") + custom_warning("No columns with uncertainties automatically detected! You might need to provide them manually via the 'cols_unc' parameter. Please report this behaviour if you think it should work automatically.") # resampling try: @@ -284,7 +296,7 @@ def resample_df(df, resample, pos_timestamp="center", origin="start", cols_unc=' return df -def flux2series(flux, dates, cadence=None): +def _flux2series(flux, dates, cadence=None): """ Converts an array of observed particle flux + timestamps into a pandas series with the desired cadence. @@ -308,7 +320,7 @@ def flux2series(flux, dates, cadence=None): # if no cadence given, then just return the series with the original # time resolution if cadence is not None: - flux_series = resample_df(df=flux_series, resample=cadence, pos_timestamp="center", origin="start") + flux_series = resample_df(df=flux_series, resample=cadence, pos_timestamp="center", origin="start", cols_unc=[]) # TODO: this ignores all uncertainty columns, but they are not used here anyway return flux_series diff --git a/seppy/version.py b/seppy/version.py index 2f3d384..b8b47e1 100644 --- a/seppy/version.py +++ b/seppy/version.py @@ -14,4 +14,4 @@ ) del warnings - version = '0.4.2' + version = '0.4.3'