Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
c61c7a4
don't try to find uncertainty columns when resampling wind/3dp (there…
jgieseler Jan 16, 2026
d8e5465
enhance resample_df function to support custom keywords for uncertain…
jgieseler Jan 19, 2026
92fdbf7
Combine warnings to reduce noise
jgieseler Jan 19, 2026
70ec7ea
Add verbose option to resample_df for additional debug information
jgieseler Jan 19, 2026
0f164c4
add explicit list of keys for juice radem test
jgieseler Jan 20, 2026
45e6357
add test for soho erne led
jgieseler Jan 20, 2026
789bec4
add test for SOHO_COSTEP-EPHIN_L3I-1MIN
jgieseler Jan 20, 2026
6a92bc6
handle uppercase column names in resample_df
jgieseler Jan 20, 2026
5a1aa20
use explicit call to resample function to handle uncertainties correc…
jgieseler Jan 20, 2026
ae5ea50
hide flux2series function because it's oudated and should only be use…
jgieseler Jan 20, 2026
b7e927f
deactivate special uncertatinty resampling bc. they are not used in t…
jgieseler Jan 20, 2026
7295db6
fix bug introduced that skipped some resampling for ephin and sept
jgieseler Jan 20, 2026
491c73e
test deactivating test for SOHO_COSTEP-EPHIN_L3I-1MIN
jgieseler Jan 20, 2026
8535339
add comment
jgieseler Jan 21, 2026
d998e11
clean comments
jgieseler Jan 21, 2026
b68b3b7
adjust warning in resample_df
jgieseler Jan 21, 2026
fe31918
adjust calculation of uncertainties in resample_df to account for NaN…
jgieseler Jan 21, 2026
faf0c27
add comment
jgieseler Jan 22, 2026
ee6e433
disable uncertainty resampling in specific internal function
jgieseler Jan 22, 2026
5f479a5
deactivate duplicate resampling of series_normalized in tsa_plot()
jgieseler Jan 22, 2026
f2e274d
update test images
jgieseler Jan 22, 2026
33c0c5e
fix breaking bug with pandas 3.0.0
jgieseler Jan 23, 2026
b51bfba
update version number
jgieseler Jan 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ authors:
family-names: Vainio
affiliation: University of Turku
orcid: https://orcid.org/0000-0002-3298-2067
version: 0.4.2
date-released: 2025-12-17
version: 0.4.3
date-released: 2026-01-23
repository-code: https://github.com/serpentine-h2020/SEPpy
license: BSD-3-Clause
preferred-citation:
Expand Down
3 changes: 3 additions & 0 deletions seppy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@

from .version import version as __version__
from seppy.util import custom_warning

# __all__ = [] # defines which functions, variables etc. will be loaded when running "from pyonset import *"

custom_warning('Breaking changes in SEPpy v0.4.0: The metadata for SOHO/EPHIN, SOHO/ERNE, STEREO/SEPT, and Wind/3DP have changed! See https://github.com/serpentine-h2020/SEPpy/releases/tag/v0.4.0 for details. (You can ignore this if you do not invoke SEPpy manually.)')
2 changes: 1 addition & 1 deletion seppy/loader/bepi.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def bepi_sixsp_l3_loader(startdate, enddate=None, resample=None, path=None, pos_
# optional resampling:
if isinstance(resample, str):
if len(df) > 0:
df = resample_df(df, resample, pos_timestamp=pos_timestamp)
df = resample_df(df, resample, pos_timestamp=pos_timestamp, cols_unc='auto', verbose=False)
else:
df = []

Expand Down
2 changes: 1 addition & 1 deletion seppy/loader/juice.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def juice_radem_load(startdate, enddate, resample=None, path=None, pos_timestamp
df['TIME_UTC'] = pd.to_datetime(df['TIME_UTC'])

if resample:
df = resample_df(df, resample, pos_timestamp=pos_timestamp)
df = resample_df(df, resample, pos_timestamp=pos_timestamp, cols_unc=[], verbose=False)

energies_dict, metadata_dict = juice_radem_load_metadata(filename=downloaded_files[0])

Expand Down
156 changes: 8 additions & 148 deletions seppy/loader/psp.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,152 +44,6 @@ def _get_cdf_vars(cdf):
return var_list


# def _cdf2df_3d_psp(cdf, index_key, dtimeindex=True, ignore=None, include=None):
# """
# Converts a cdf file to a pandas dataframe.
# Note that this only works for 1 dimensional data, other data such as
# distribution functions or pitch angles will not work properly.
# Parameters
# ----------
# cdf : cdf
# Opened CDF file.
# index_key : str
# The CDF key to use as the index in the output DataFrame.
# dtimeindex : bool
# If ``True``, the DataFrame index is parsed as a datetime.
# Default is ``True``.
# ignore : list
# In case a CDF file has columns that are unused / not required, then
# the column names can be passed as a list into the function.
# include : str, list
# If only specific columns of a CDF file are desired, then the column
# names can be passed as a list into the function. Should not be used
# with ``ignore``.
# Returns
# -------
# df : :class:`pandas.DataFrame`
# Data frame with read in data.
# """
# if include is not None:
# if ignore is not None:
# raise ValueError('ignore and include are incompatible keywords')
# if isinstance(include, str):
# include = [include]
# if index_key not in include:
# include.append(index_key)

# # Extract index values
# index_info = cdf.varinq(index_key)
# if index_info['Last_Rec'] == -1:
# warnings.warn(f"No records present in CDF file {cdf.cdf_info()['CDF'].name}")
# return_df = pd.DataFrame()
# else:
# index = cdf.varget(index_key)
# try:
# # If there are multiple indexes, take the first one
# # TODO: this is just plain wrong, there should be a way to get all
# # the indexes out
# index = index[...][:, 0]
# except IndexError:
# pass

# if dtimeindex:
# index = cdflib.epochs.CDFepoch.breakdown(index, to_np=True)
# index_df = pd.DataFrame({'year': index[:, 0],
# 'month': index[:, 1],
# 'day': index[:, 2],
# 'hour': index[:, 3],
# 'minute': index[:, 4],
# 'second': index[:, 5],
# 'ms': index[:, 6],
# })
# # Not all CDFs store pass milliseconds
# try:
# index_df['us'] = index[:, 7]
# index_df['ns'] = index[:, 8]
# except IndexError:
# pass
# index = pd.DatetimeIndex(pd.to_datetime(index_df), name='Time')
# data_dict = {}
# npoints = len(index)

# var_list = _get_cdf_vars(cdf)
# keys = {}
# # Get mapping from each attr to sub-variables
# for cdf_key in var_list:
# if ignore:
# if cdf_key in ignore:
# continue
# elif include:
# if cdf_key not in include:
# continue
# if cdf_key == 'Epoch':
# keys[cdf_key] = 'Time'
# else:
# keys[cdf_key] = cdf_key
# # Remove index key, as we have already used it to create the index
# keys.pop(index_key)
# # Remove keys for data that doesn't have the right shape to load in CDF
# # Mapping of keys to variable data
# vars = {}
# for cdf_key in keys.copy():
# try:
# vars[cdf_key] = cdf.varget(cdf_key)
# except ValueError:
# vars[cdf_key] = ''
# for cdf_key in keys:
# var = vars[cdf_key]
# if type(var) is np.ndarray:
# key_shape = var.shape
# if len(key_shape) == 0 or key_shape[0] != npoints:
# vars.pop(cdf_key)
# else:
# vars.pop(cdf_key)

# # Loop through each key and put data into the dataframe
# for cdf_key in vars:
# df_key = keys[cdf_key]
# # Get fill value for this key
# # First catch string FILLVAL's
# if type(cdf.varattsget(cdf_key)['FILLVAL']) is str:
# fillval = cdf.varattsget(cdf_key)['FILLVAL']
# else:
# try:
# fillval = float(cdf.varattsget(cdf_key)['FILLVAL'])
# except KeyError:
# fillval = np.nan

# if isinstance(df_key, list):
# for i, subkey in enumerate(df_key):
# data = vars[cdf_key][...][:, i]
# data = _fillval_nan(data, fillval)
# data_dict[subkey] = data
# else:
# # If ndims is 1, we just have a single column of data
# # If ndims is 2, have multiple columns of data under same key
# # If ndims is 3, have multiple columns of data under same key, with 2 sub_keys (e.g., energy and pitch-angle)
# key_shape = vars[cdf_key].shape
# ndims = len(key_shape)
# if ndims == 1:
# data = vars[cdf_key][...]
# data = _fillval_nan(data, fillval)
# data_dict[df_key] = data
# elif ndims == 2:
# for i in range(key_shape[1]):
# data = vars[cdf_key][...][:, i]
# data = _fillval_nan(data, fillval)
# data_dict[f'{df_key}_{i}'] = data
# elif ndims == 3:
# for i in range(key_shape[2]):
# for j in range(key_shape[1]):
# data = vars[cdf_key][...][:, j, i]
# data = _fillval_nan(data, fillval)
# data_dict[f'{df_key}_E{i}_P{j}'] = data
# return_df = pd.DataFrame(index=index, data=data_dict)

# return return_df


def psp_isois_load(dataset, startdate, enddate, epilo_channel='F', epilo_threshold=None, path=None, resample=None, all_columns=False):
"""
Downloads CDF files via SunPy/Fido from CDAWeb for ISOIS onboard PSP
Expand Down Expand Up @@ -375,7 +229,13 @@ def psp_isois_load(dataset, startdate, enddate, epilo_channel='F', epilo_thresho
energies_dict = ''

if isinstance(resample, str):
df = resample_df(df=df, resample=resample, pos_timestamp="center", origin="start")
if dataset.upper() in ['PSP_ISOIS-EPILO_L2-PE']:
cols_unc = []
keywords_unc = []
elif dataset.upper() in ['PSP_ISOIS-EPIHI_L2-HET-RATES60', 'PSP_ISOIS-EPIHI_L2-LET1-RATES60', 'PSP_ISOIS-EPIHI_L2-LET2-RATES60', 'PSP_ISOIS-EPILO_L2-IC']:
cols_unc = 'auto'
keywords_unc=['unc', 'err', 'sigma', '_DELTA_'] # 'PSP_ISOIS-EPILO_L2-IC' has 'H_Flux_ChanP_DELTA_Exx_Pxx' uncertainty columns
df = resample_df(df=df, resample=resample, pos_timestamp="center", origin="start", cols_unc=cols_unc, verbose=False, keywords_unc=keywords_unc)

except (RuntimeError, IndexError):
print(f'Unable to obtain "{dataset}" data!')
Expand Down Expand Up @@ -619,7 +479,7 @@ def _read_cdf_psp(fname, index_key, ignore_vars=[]):
# Only for selected index_key:
index = cdf.varget(index_key)

# TODO: use to_astropy_time() instead here when we drop pandas in timeseries
# use to_astropy_time() instead here when we drop pandas in timeseries
index = CDFepoch.to_datetime(index)
# df = pd.DataFrame(index=pd.DatetimeIndex(name=index_key, data=index))
# units = {}
Expand Down
22 changes: 14 additions & 8 deletions seppy/loader/soho.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,18 +140,18 @@ def soho_load(dataset, startdate, enddate, path=None, resample=None, pos_timesta
if os.path.exists(f) and os.path.getsize(f) == 0:
os.remove(f)
if not os.path.exists(f):
downloaded_file = Fido.fetch(result[0][i], path=path, max_conn=max_conn)
_downloaded_file = Fido.fetch(result[0][i], path=path, max_conn=max_conn)

# downloaded_files = Fido.fetch(result, path=path, max_conn=max_conn) # use Fido.fetch(result, path='/ThisIs/MyPath/to/Data/{file}') to use a specific local folder for saving data files
# downloaded_files.sort()
data = TimeSeries(downloaded_files, concatenate=True)
df = data.to_dataframe()

metadata = _get_metadata(dataset, downloaded_files[0])
if dataset.upper() == 'SOHO_ERNE-HED_L2-1MIN' or dataset.upper() == 'SOHO_ERNE-LED_L3I-1MIN':
custom_warning(f'The format of "channels_dict_df_p" in the metadata for {dataset} has been changed providing the full width of energy channels for DE (instead of the half)!')
elif dataset.upper() == 'SOHO_ERNE-LED_L2-1MIN':
custom_warning(f'The format of the metadata for {dataset} has been changed. The previous metadata is now provided in meta["energy_labels"]!')
# if dataset.upper() == 'SOHO_ERNE-HED_L2-1MIN' or dataset.upper() == 'SOHO_ERNE-LED_L3I-1MIN':
# custom_warning(f'The format of "channels_dict_df_p" in the metadata for {dataset} has been changed providing the full width of energy channels for DE (instead of the half)!')
# elif dataset.upper() == 'SOHO_ERNE-LED_L2-1MIN':
# custom_warning(f'The format of the metadata for {dataset} has been changed. The previous metadata is now provided in meta["energy_labels"]!')

# remove this (i.e. following lines) when sunpy's read_cdf is updated,
# and FILLVAL will be replaced directly, see
Expand All @@ -178,7 +178,13 @@ def soho_load(dataset, startdate, enddate, path=None, resample=None, pos_timesta
df.index = df.index-pd.Timedelta('7.5s')

if isinstance(resample, str):
df = resample_df(df, resample, pos_timestamp=pos_timestamp)
if dataset.upper() in ['SOHO_ERNE-HED_L2-1MIN', 'SOHO_ERNE-LED_L2-1MIN']:
cols_unc = []
keywords_unc = []
elif dataset.upper() in ['SOHO_COSTEP-EPHIN_L3I-1MIN']:
cols_unc = 'auto'
keywords_unc = ['_sys_', '_stat_']
df = resample_df(df, resample, pos_timestamp=pos_timestamp, cols_unc=cols_unc, verbose=False, keywords_unc=keywords_unc)
except (RuntimeError, IndexError):
print(f'Unable to obtain "{dataset}" data!')
downloaded_files = []
Expand Down Expand Up @@ -406,7 +412,7 @@ def soho_ephin_loader(startdate, enddate, resample=None, path=None, all_columns=
cs_he25 = '25 - 53 MeV/n'
if max(fmodes)==2:
# # warnings.warn('Careful: EPHIN ring off!')
custom_warning('SOHO/EPHIN ring is off! This means high risk of contaminated measurements!')
custom_warning('SOHO/EPHIN ring is off. This means high risk of contaminated measurements!')

# failure mode D since 4 Oct 2017:
# dates[-1].date() is enddate, used to catch cases when enddate is a string
Expand All @@ -426,7 +432,7 @@ def soho_ephin_loader(startdate, enddate, resample=None, path=None, all_columns=

# optional resampling:
if isinstance(resample, str):
df = resample_df(df, resample, pos_timestamp=pos_timestamp)
df = resample_df(df, resample, pos_timestamp=pos_timestamp, cols_unc=[], verbose=False)
else:
df = []

Expand Down
14 changes: 9 additions & 5 deletions seppy/loader/stereo.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from sunpy.net import attrs as a
from sunpy.timeseries import TimeSeries

from seppy.util import custom_notification, custom_warning, resample_df
from seppy.util import resample_df # custom_notification, custom_warning

# omit Pandas' PerformanceWarning
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
Expand Down Expand Up @@ -212,9 +212,9 @@ def stereo_sept_loader(startdate, enddate, spacecraft, species, viewing, resampl

# optional resampling:
if isinstance(resample, str):
df = resample_df(df, resample, pos_timestamp=pos_timestamp)
df = resample_df(df, resample, pos_timestamp=pos_timestamp, cols_unc='auto', verbose=False)

custom_warning('The format of "channels_dict_df_X" in the the metadata for STEREO/SEPT has been changed providing "mean_E" in MeV (instead of keV)! The metadata is also now given as a dictionary containing the dataframe "channels_dict_df_X".')
# custom_warning('The format of "channels_dict_df_X" in the the metadata for STEREO/SEPT has been changed providing "mean_E" in MeV (instead of keV)! The metadata is also now given as a dictionary containing the dataframe "channels_dict_df_X".')
else:
df = []

Expand Down Expand Up @@ -416,7 +416,7 @@ def stereo_load(instrument, startdate, enddate, spacecraft='ahead', mag_coord='R
if os.path.exists(f) and os.path.getsize(f) == 0:
os.remove(f)
if not os.path.exists(f):
downloaded_file = Fido.fetch(result[0][i], path=path, max_conn=max_conn)
_downloaded_file = Fido.fetch(result[0][i], path=path, max_conn=max_conn)

# downloaded_files = Fido.fetch(result, path=path, max_conn=max_conn)
data = TimeSeries(downloaded_files, concatenate=True)
Expand Down Expand Up @@ -464,7 +464,11 @@ def stereo_load(instrument, startdate, enddate, spacecraft='ahead', mag_coord='R
df.index = df.index-pd.Timedelta('30s')

if isinstance(resample, str):
df = resample_df(df, resample, pos_timestamp=pos_timestamp)
if instrument.upper() in ['LET', 'MAG', 'MAGB', 'MAGPLASMA']:
cols_unc = []
elif instrument.upper() in ['HET']:
cols_unc = 'auto'
df = resample_df(df, resample, pos_timestamp=pos_timestamp, cols_unc=cols_unc, verbose=False)
except (RuntimeError, IndexError):
print(f'Unable to obtain "{dataset}" data for {startdate}-{enddate}!')
downloaded_files = []
Expand Down
Loading