serpentine-h2020 · jgieseler · Jan 23, 2026 · Jan 16, 2026 · Jan 19, 2026 · Jan 19, 2026
diff --git a/CITATION.cff b/CITATION.cff
@@ -37,8 +37,8 @@ authors:
   family-names: Vainio
   affiliation: University of Turku
   orcid: https://orcid.org/0000-0002-3298-2067
-version: 0.4.2
-date-released: 2025-12-17
+version: 0.4.3
+date-released: 2026-01-23
 repository-code: https://github.com/serpentine-h2020/SEPpy
 license: BSD-3-Clause
 preferred-citation:

diff --git a/seppy/__init__.py b/seppy/__init__.py
@@ -1,4 +1,7 @@
 
 from .version import version as __version__
+from seppy.util import custom_warning
 
 # __all__ = []  # defines which functions, variables etc. will be loaded when running "from pyonset import *"
+
+custom_warning('Breaking changes in SEPpy v0.4.0: The metadata for SOHO/EPHIN, SOHO/ERNE, STEREO/SEPT, and Wind/3DP have changed! See https://github.com/serpentine-h2020/SEPpy/releases/tag/v0.4.0 for details. (You can ignore this if you do not invoke SEPpy manually.)')
diff --git a/seppy/loader/bepi.py b/seppy/loader/bepi.py
@@ -158,7 +158,7 @@ def bepi_sixsp_l3_loader(startdate, enddate=None, resample=None, path=None, pos_
         # optional resampling:
         if isinstance(resample, str):
             if len(df) > 0:
-                df = resample_df(df, resample, pos_timestamp=pos_timestamp)
+                df = resample_df(df, resample, pos_timestamp=pos_timestamp, cols_unc='auto', verbose=False)
     else:
         df = []
 

diff --git a/seppy/loader/juice.py b/seppy/loader/juice.py
@@ -142,7 +142,7 @@ def juice_radem_load(startdate, enddate, resample=None, path=None, pos_timestamp
     df['TIME_UTC'] = pd.to_datetime(df['TIME_UTC'])
 
     if resample:
-        df = resample_df(df, resample, pos_timestamp=pos_timestamp)
+        df = resample_df(df, resample, pos_timestamp=pos_timestamp, cols_unc=[], verbose=False)
 
     energies_dict, metadata_dict = juice_radem_load_metadata(filename=downloaded_files[0])
 

diff --git a/seppy/loader/psp.py b/seppy/loader/psp.py
@@ -44,152 +44,6 @@ def _get_cdf_vars(cdf):
     return var_list
 
 
-# def _cdf2df_3d_psp(cdf, index_key, dtimeindex=True, ignore=None, include=None):
-#     """
-#     Converts a cdf file to a pandas dataframe.
-#     Note that this only works for 1 dimensional data, other data such as
-#     distribution functions or pitch angles will not work properly.
-#     Parameters
-#     ----------
-#     cdf : cdf
-#         Opened CDF file.
-#     index_key : str
-#         The CDF key to use as the index in the output DataFrame.
-#     dtimeindex : bool
-#         If ``True``, the DataFrame index is parsed as a datetime.
-#         Default is ``True``.
-#     ignore : list
-#         In case a CDF file has columns that are unused / not required, then
-#         the column names can be passed as a list into the function.
-#     include : str, list
-#         If only specific columns of a CDF file are desired, then the column
-#         names can be passed as a list into the function. Should not be used
-#         with ``ignore``.
-#     Returns
-#     -------
-#     df : :class:`pandas.DataFrame`
-#         Data frame with read in data.
-#     """
-#     if include is not None:
-#         if ignore is not None:
-#             raise ValueError('ignore and include are incompatible keywords')
-#         if isinstance(include, str):
-#             include = [include]
-#         if index_key not in include:
-#             include.append(index_key)
-
-#     # Extract index values
-#     index_info = cdf.varinq(index_key)
-#     if index_info['Last_Rec'] == -1:
-#         warnings.warn(f"No records present in CDF file {cdf.cdf_info()['CDF'].name}")
-#         return_df = pd.DataFrame()
-#     else:
-#         index = cdf.varget(index_key)
-#         try:
-#             # If there are multiple indexes, take the first one
-#             # TODO: this is just plain wrong, there should be a way to get all
-#             # the indexes out
-#             index = index[...][:, 0]
-#         except IndexError:
-#             pass
-
-#         if dtimeindex:
-#             index = cdflib.epochs.CDFepoch.breakdown(index, to_np=True)
-#             index_df = pd.DataFrame({'year': index[:, 0],
-#                                      'month': index[:, 1],
-#                                      'day': index[:, 2],
-#                                      'hour': index[:, 3],
-#                                      'minute': index[:, 4],
-#                                      'second': index[:, 5],
-#                                      'ms': index[:, 6],
-#                                      })
-#             # Not all CDFs store pass milliseconds
-#             try:
-#                 index_df['us'] = index[:, 7]
-#                 index_df['ns'] = index[:, 8]
-#             except IndexError:
-#                 pass
-#             index = pd.DatetimeIndex(pd.to_datetime(index_df), name='Time')
-#         data_dict = {}
-#         npoints = len(index)
-
-#         var_list = _get_cdf_vars(cdf)
-#         keys = {}
-#         # Get mapping from each attr to sub-variables
-#         for cdf_key in var_list:
-#             if ignore:
-#                 if cdf_key in ignore:
-#                     continue
-#             elif include:
-#                 if cdf_key not in include:
-#                     continue
-#             if cdf_key == 'Epoch':
-#                 keys[cdf_key] = 'Time'
-#             else:
-#                 keys[cdf_key] = cdf_key
-#         # Remove index key, as we have already used it to create the index
-#         keys.pop(index_key)
-#         # Remove keys for data that doesn't have the right shape to load in CDF
-#         # Mapping of keys to variable data
-#         vars = {}
-#         for cdf_key in keys.copy():
-#             try:
-#                 vars[cdf_key] = cdf.varget(cdf_key)
-#             except ValueError:
-#                 vars[cdf_key] = ''
-#         for cdf_key in keys:
-#             var = vars[cdf_key]
-#             if type(var) is np.ndarray:
-#                 key_shape = var.shape
-#                 if len(key_shape) == 0 or key_shape[0] != npoints:
-#                     vars.pop(cdf_key)
-#             else:
-#                 vars.pop(cdf_key)
-
-#         # Loop through each key and put data into the dataframe
-#         for cdf_key in vars:
-#             df_key = keys[cdf_key]
-#             # Get fill value for this key
-#             # First catch string FILLVAL's
-#             if type(cdf.varattsget(cdf_key)['FILLVAL']) is str:
-#                 fillval = cdf.varattsget(cdf_key)['FILLVAL']
-#             else:
-#                 try:
-#                     fillval = float(cdf.varattsget(cdf_key)['FILLVAL'])
-#                 except KeyError:
-#                     fillval = np.nan
-
-#             if isinstance(df_key, list):
-#                 for i, subkey in enumerate(df_key):
-#                     data = vars[cdf_key][...][:, i]
-#                     data = _fillval_nan(data, fillval)
-#                     data_dict[subkey] = data
-#             else:
-#                 # If ndims is 1, we just have a single column of data
-#                 # If ndims is 2, have multiple columns of data under same key
-#                 # If ndims is 3, have multiple columns of data under same key, with 2 sub_keys (e.g., energy and pitch-angle)
-#                 key_shape = vars[cdf_key].shape
-#                 ndims = len(key_shape)
-#                 if ndims == 1:
-#                     data = vars[cdf_key][...]
-#                     data = _fillval_nan(data, fillval)
-#                     data_dict[df_key] = data
-#                 elif ndims == 2:
-#                     for i in range(key_shape[1]):
-#                         data = vars[cdf_key][...][:, i]
-#                         data = _fillval_nan(data, fillval)
-#                         data_dict[f'{df_key}_{i}'] = data
-#                 elif ndims == 3:
-#                     for i in range(key_shape[2]):
-#                         for j in range(key_shape[1]):
-#                             data = vars[cdf_key][...][:, j, i]
-#                             data = _fillval_nan(data, fillval)
-#                             data_dict[f'{df_key}_E{i}_P{j}'] = data
-#         return_df = pd.DataFrame(index=index, data=data_dict)
-
-#     return return_df
-
-
 def psp_isois_load(dataset, startdate, enddate, epilo_channel='F', epilo_threshold=None, path=None, resample=None, all_columns=False):
     """
     Downloads CDF files via SunPy/Fido from CDAWeb for ISOIS onboard PSP
@@ -375,7 +229,13 @@ def psp_isois_load(dataset, startdate, enddate, epilo_channel='F', epilo_thresho
                 energies_dict = ''
 
         if isinstance(resample, str):
-            df = resample_df(df=df, resample=resample, pos_timestamp="center", origin="start")
+            if dataset.upper() in ['PSP_ISOIS-EPILO_L2-PE']:
+                cols_unc = []
+                keywords_unc = []
+            elif dataset.upper() in ['PSP_ISOIS-EPIHI_L2-HET-RATES60', 'PSP_ISOIS-EPIHI_L2-LET1-RATES60', 'PSP_ISOIS-EPIHI_L2-LET2-RATES60', 'PSP_ISOIS-EPILO_L2-IC']:
+                cols_unc = 'auto'
+                keywords_unc=['unc', 'err', 'sigma', '_DELTA_']  # 'PSP_ISOIS-EPILO_L2-IC' has 'H_Flux_ChanP_DELTA_Exx_Pxx' uncertainty columns
+            df = resample_df(df=df, resample=resample, pos_timestamp="center", origin="start", cols_unc=cols_unc, verbose=False, keywords_unc=keywords_unc)
 
     except (RuntimeError, IndexError):
         print(f'Unable to obtain "{dataset}" data!')
@@ -619,7 +479,7 @@ def _read_cdf_psp(fname, index_key, ignore_vars=[]):
     # Only for selected index_key:
     index = cdf.varget(index_key)
 
-    # TODO: use to_astropy_time() instead here when we drop pandas in timeseries
+    # use to_astropy_time() instead here when we drop pandas in timeseries
     index = CDFepoch.to_datetime(index)
     # df = pd.DataFrame(index=pd.DatetimeIndex(name=index_key, data=index))
     # units = {}

diff --git a/seppy/loader/soho.py b/seppy/loader/soho.py
@@ -140,18 +140,18 @@ def soho_load(dataset, startdate, enddate, path=None, resample=None, pos_timesta
                 if os.path.exists(f) and os.path.getsize(f) == 0:
                     os.remove(f)
                 if not os.path.exists(f):
-                    downloaded_file = Fido.fetch(result[0][i], path=path, max_conn=max_conn)
+                    _downloaded_file = Fido.fetch(result[0][i], path=path, max_conn=max_conn)
 
             # downloaded_files = Fido.fetch(result, path=path, max_conn=max_conn)  # use Fido.fetch(result, path='/ThisIs/MyPath/to/Data/{file}') to use a specific local folder for saving data files
             # downloaded_files.sort()
             data = TimeSeries(downloaded_files, concatenate=True)
             df = data.to_dataframe()
 
             metadata = _get_metadata(dataset, downloaded_files[0])
-            if dataset.upper() == 'SOHO_ERNE-HED_L2-1MIN' or dataset.upper() == 'SOHO_ERNE-LED_L3I-1MIN':
-                custom_warning(f'The format of "channels_dict_df_p" in the metadata for {dataset} has been changed providing the full width of energy channels for DE (instead of the half)!')
-            elif dataset.upper() == 'SOHO_ERNE-LED_L2-1MIN':
-                custom_warning(f'The format of the metadata for {dataset} has been changed. The previous metadata is now provided in meta["energy_labels"]!')
+            # if dataset.upper() == 'SOHO_ERNE-HED_L2-1MIN' or dataset.upper() == 'SOHO_ERNE-LED_L3I-1MIN':
+            #     custom_warning(f'The format of "channels_dict_df_p" in the metadata for {dataset} has been changed providing the full width of energy channels for DE (instead of the half)!')
+            # elif dataset.upper() == 'SOHO_ERNE-LED_L2-1MIN':
+            #     custom_warning(f'The format of the metadata for {dataset} has been changed. The previous metadata is now provided in meta["energy_labels"]!')
 
             # remove this (i.e. following lines) when sunpy's read_cdf is updated,
             # and FILLVAL will be replaced directly, see
@@ -178,7 +178,13 @@ def soho_load(dataset, startdate, enddate, path=None, resample=None, pos_timesta
                     df.index = df.index-pd.Timedelta('7.5s')
 
             if isinstance(resample, str):
-                df = resample_df(df, resample, pos_timestamp=pos_timestamp)
+                if dataset.upper() in ['SOHO_ERNE-HED_L2-1MIN', 'SOHO_ERNE-LED_L2-1MIN']:
+                    cols_unc = []
+                    keywords_unc = []
+                elif dataset.upper() in ['SOHO_COSTEP-EPHIN_L3I-1MIN']:
+                    cols_unc = 'auto'
+                    keywords_unc = ['_sys_', '_stat_']
+                df = resample_df(df, resample, pos_timestamp=pos_timestamp, cols_unc=cols_unc, verbose=False, keywords_unc=keywords_unc)
         except (RuntimeError, IndexError):
             print(f'Unable to obtain "{dataset}" data!')
             downloaded_files = []
@@ -406,7 +412,7 @@ def soho_ephin_loader(startdate, enddate, resample=None, path=None, all_columns=
             cs_he25 = '25 - 53 MeV/n'
         if max(fmodes)==2:
             # # warnings.warn('Careful: EPHIN ring off!')
-            custom_warning('SOHO/EPHIN ring is off! This means high risk of contaminated measurements!')
+            custom_warning('SOHO/EPHIN ring is off. This means high risk of contaminated measurements!')
 
         # failure mode D since 4 Oct 2017:
         # dates[-1].date() is enddate, used to catch cases when enddate is a string
@@ -426,7 +432,7 @@ def soho_ephin_loader(startdate, enddate, resample=None, path=None, all_columns=
 
         # optional resampling:
         if isinstance(resample, str):
-            df = resample_df(df, resample, pos_timestamp=pos_timestamp)
+            df = resample_df(df, resample, pos_timestamp=pos_timestamp, cols_unc=[], verbose=False)
     else:
         df = []
 

diff --git a/seppy/loader/stereo.py b/seppy/loader/stereo.py
@@ -17,7 +17,7 @@
 from sunpy.net import attrs as a
 from sunpy.timeseries import TimeSeries
 
-from seppy.util import custom_notification, custom_warning, resample_df
+from seppy.util import resample_df  # custom_notification, custom_warning
 
 # omit Pandas' PerformanceWarning
 warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
@@ -212,9 +212,9 @@ def stereo_sept_loader(startdate, enddate, spacecraft, species, viewing, resampl
 
         # optional resampling:
         if isinstance(resample, str):
-            df = resample_df(df, resample, pos_timestamp=pos_timestamp)
+            df = resample_df(df, resample, pos_timestamp=pos_timestamp, cols_unc='auto', verbose=False)
 
-        custom_warning('The format of "channels_dict_df_X" in the the metadata for STEREO/SEPT has been changed providing "mean_E" in MeV (instead of keV)! The metadata is also now given as a dictionary containing the dataframe "channels_dict_df_X".')
+        # custom_warning('The format of "channels_dict_df_X" in the the metadata for STEREO/SEPT has been changed providing "mean_E" in MeV (instead of keV)! The metadata is also now given as a dictionary containing the dataframe "channels_dict_df_X".')
     else:
         df = []
 
@@ -416,7 +416,7 @@ def stereo_load(instrument, startdate, enddate, spacecraft='ahead', mag_coord='R
                 if os.path.exists(f) and os.path.getsize(f) == 0:
                     os.remove(f)
                 if not os.path.exists(f):
-                    downloaded_file = Fido.fetch(result[0][i], path=path, max_conn=max_conn)
+                    _downloaded_file = Fido.fetch(result[0][i], path=path, max_conn=max_conn)
 
             # downloaded_files = Fido.fetch(result, path=path, max_conn=max_conn)
             data = TimeSeries(downloaded_files, concatenate=True)
@@ -464,7 +464,11 @@ def stereo_load(instrument, startdate, enddate, spacecraft='ahead', mag_coord='R
                     df.index = df.index-pd.Timedelta('30s')
 
             if isinstance(resample, str):
-                df = resample_df(df, resample, pos_timestamp=pos_timestamp)
+                if instrument.upper() in ['LET', 'MAG', 'MAGB', 'MAGPLASMA']:
+                    cols_unc = []
+                elif instrument.upper() in ['HET']:
+                    cols_unc = 'auto'
+                df = resample_df(df, resample, pos_timestamp=pos_timestamp, cols_unc=cols_unc, verbose=False)
         except (RuntimeError, IndexError):
             print(f'Unable to obtain "{dataset}" data for {startdate}-{enddate}!')
             downloaded_files = []