From e41aeb16823ac1a4581588f134b7855c1337fbda Mon Sep 17 00:00:00 2001
From: DanGass <55432146+DanGass@users.noreply.github.com>
Date: Tue, 24 Jun 2025 17:39:17 +0100
Subject: [PATCH 01/14] Added new functionality to split runs into categories,
 F1, F2, N1, and N2. Also padded inproperly sized submaps using np.pad.

---
 .../timeseries/drms_pipeline.py               |  7 +-
 .../timeseries/sdo_processing.py              | 97 +++++++++++++++++--
 2 files changed, 94 insertions(+), 10 deletions(-)

diff --git a/arccnet/data_generation/timeseries/drms_pipeline.py b/arccnet/data_generation/timeseries/drms_pipeline.py
index 6827bb2b..d353aaeb 100644
--- a/arccnet/data_generation/timeseries/drms_pipeline.py
+++ b/arccnet/data_generation/timeseries/drms_pipeline.py
@@ -79,8 +79,8 @@
                 )
                 packed_maps = namedtuple("packed_maps", ["hmi_origin", "l2_map", "ar_num"])
                 hmi_origin_patch = crop_map(hmi_proc[0], center, patch_height, patch_width, date)
-                l2_hmi_packed = [[hmi_origin_patch, hmi_map, noaa_ar] for hmi_map in hmi_proc]
-                l2_aia_packed = [[hmi_origin_patch, aia_map, noaa_ar] for aia_map in aia_proc]
+                l2_hmi_packed = [[hmi_origin_patch, hmi_map, noaa_ar, center] for hmi_map in hmi_proc]
+                l2_aia_packed = [[hmi_origin_patch, aia_map, noaa_ar, center] for aia_map in aia_proc]
 
                 # Went back to tuples because this was failing in a weird way - something to do with pickle and concurrent futures. Left for future debugging.
                 # l2_hmi_packed = [packed_maps(hmi_origin_patch, hmi_map, noaa_ar) for hmi_map in hmi_proc]
@@ -88,13 +88,14 @@
 
                 hmi_patch_paths = tqdm(executor.map(map_reproject, l2_hmi_packed), total=len(l2_hmi_packed))
                 aia_patch_paths = tqdm(executor.map(map_reproject, l2_aia_packed), total=len(l2_aia_packed))
-                print(hmi_patch_paths)
 
                 # For some reason, aia_proc becomes an empty list after this function call.
                 home_table, aia_patch_paths, aia_quality, aia_time, hmi_patch_paths, hmi_quality, hmi_time = (
                     table_match(list(aia_patch_paths), list(hmi_patch_paths))
                 )
 
+                
+
                 # This can probably be streamlined/functionalized to make the pipeline look better.
                 batched_name = f"{config['paths']['data_folder']}/04_final"
                 Path(f"{batched_name}/records").mkdir(parents=True, exist_ok=True)
diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py
index e78ff1f5..e7256c36 100644
--- a/arccnet/data_generation/timeseries/sdo_processing.py
+++ b/arccnet/data_generation/timeseries/sdo_processing.py
@@ -51,7 +51,7 @@
 ]
 
 
-def read_data(hek_path: str, srs_path: str, size: int, duration: int):
+def read_data(hek_path: str, srs_path: str, size: int, duration: int, flares: str):
     r"""
     Read and process data from a parquet file containing HEK catalogue information regarding flaring events.
 
@@ -65,6 +65,9 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int):
             The size of the sample to be generated. (Generates 10% X, 30% M, 60% C by default)
         duration : `int`
             The duration of the data sample in hours.
+        flares : `str`
+            Determines if runs provided are 'positive' (flares), 'negative' (no flares), or 'both' (50/50 split of both)
+
 
     Returns
     -------
@@ -77,7 +80,8 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int):
             - The date of the run, used for reprojection
             - The coordinate of the noaa active region
     """
-
+    assert (flares == 'positive' or flares == 'negative' or flares == 'both')
+    # if flares == 'positive' or flares == 'both':
     table = Table.read(hek_path)
     noaa_num_df = table[table["noaa_number"] > 0]
     flares = noaa_num_df[noaa_num_df["event_type"] == "FL"]
@@ -106,7 +110,6 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int):
     # c_flares = c_flares[c_flares["noaa_number"] == 11818]
     # c_flares = c_flares[c_flares["goes_class"] == "C1.6"]
     c_flares = c_flares[c_flares["tb_date"] == "2013-08-20"]
-    # 025925
 
     combined = vstack([x_flares, m_flares, c_flares])
     combined["c_coord"] = [
@@ -116,9 +119,31 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int):
     combined["start_time"] = [time - (duration + 1) * u.hour for time in combined["start_time"]]
     combined["start_time"].format = "fits"
     combined["end_time"] = combined["start_time"] + duration * u.hour
-    subset = combined["noaa_number", "goes_class", "start_time", "end_time", "frm_daterun", "c_coord"]
-
-    return subset
+    if flares == 'positive' or flares == 'both':
+        combined['category'] = [f"F{flare_check(row['start_time'], row['end_time'], row['noaa_number'], combined)}" for row in combined]
+        srs_isolated = srs[srs['target_time'] > "2011-01-01"]
+        srs_isolated = srs_isolated[abs(srs_isolated["longitude"].value) <= 65]
+        ar_cat = []
+        for ar in srs_isolated:
+            erl_time = Time(ar['target_time']) - (duration+1) * u.hour
+            cat = flare_check(erl_time, Time(ar['target_time']) - 1 * u.hour, ar['number'], combined)
+            ar_cat.append(cat)
+            srs_isolated['category'] = ar_cat
+        
+
+        if flares == 'positive':
+            subset = combined["noaa_number", "goes_class", "start_time", "end_time", "frm_daterun", "c_coord", 'category']
+            return subset
+        
+        # if flares == 'both':
+        #     Combine both the flare samples and the non flare samples into subset and return
+        #     return subset
+
+        # if flares == 'negative':
+        #     return only the non flare samples as subset
+        #     return subset
+
+        
 
 
 def change_time(time: str, shift: int):
@@ -709,7 +734,8 @@ def map_reproject(sdo_packed):
         fits_path : `str`
             The path location of the saved submap.
     """
-    hmi_origin, sdo_path, ar_num = sdo_packed
+    hmi_origin, sdo_path, ar_num, center = sdo_packed
+    lon = int(center.lon.value)
     # sdo_map = sunpy.map.Map(sdo_packed.l2_map)
     sdo_map = sunpy.map.Map(sdo_path)
     with propagate_with_solar_surface():
@@ -723,6 +749,8 @@ def map_reproject(sdo_packed):
     sdo_rpr.meta["quality"] = sdo_map.meta["quality"]
     sdo_rpr.meta["wavelnth"] = sdo_map.meta["wavelnth"]
     sdo_rpr.meta["date-obs"] = sdo_map.meta["date-obs"]
+    if sdo_rpr.dimensions[0].value > config["drms"]["patch_width"]:
+        sdo_rpr = pad_map(sdo_rpr, lon, config["drms"]["patch_width"])
     save_compressed_map(sdo_rpr, fits_path, hdu_type=CompImageHDU, overwrite=True)
 
     return fits_path
@@ -856,3 +884,58 @@ def l4_file_pack(aia_paths, hmi_paths, dir_path, rec, out_table, anim_path):
 
     shutil.copy(anim_path, f"{dir_path}/data/{rec}")
     out_table.write(f"{dir_path}/data/{rec}/{rec}.csv", overwrite=True)
+
+def pad_map(map, long, targ_width):
+    r"""
+    Pads the map data of a submap which is narrower than the specified submap width due to reaching the edge of the image array.
+
+    Parameters
+    ----------
+        map : `sunpy.map`
+            The sunpy submap to be resized.
+        long : `float` or `int`
+            The longitude of the active region, used only to check which side of the disk it is located.
+        targ_width : `int`
+            The target width of the submaps within the data run.
+
+    Returns
+    ----------
+        new_map : `sunpy.map`
+            The new, padded submap.
+    """
+    x_dim = map.dimensions[0].value
+    diff = int(targ_width - x_dim)
+    if long > 0:
+        data = np.pad(map.data, ((0,0),(diff,0)), constant_values=np.nan)
+    else:
+        data = np.pad(map.data, ((0,0),(0,diff)), constant_values=np.nan)
+    new_map = sunpy.map.Map(data,map.meta)
+    return new_map
+
+def flare_check(start, end, ar_num, table):
+    r"""
+    Checks a provided start time and end time, along with an active region number, and determines if a flare occurs within that period.
+
+    Parameters
+    ----------
+        start : `Astropy.Time`
+            The start time (duration - 1 hour) of a target run.
+        end : `Astropy.Time`
+            The end time (start_time - 1 hour) of a target run.
+        ar_num : `str`
+            The active region number of a target run.
+        table : `int`
+            The table of flare times used to check for flare events within run duration.
+
+    Returns
+    ----------
+        category : `int`
+            The category of the run. If no flare detected in run, category = 1, otherwise, category = 2.
+    """
+    ar_table = table[table["noaa_number"] == ar_num]
+    ar_table = ar_table[Time(ar_table["start_time"]) < end]
+    ar_table = ar_table[Time(ar_table["start_time"]) > start]
+    category = 1
+    if (len(ar_table) > 0):
+        category = 2
+    return category
\ No newline at end of file

From 8c13daaf7b6dd8040ecceb8c1d7ce971ebcaa3b0 Mon Sep 17 00:00:00 2001
From: DanGass <55432146+DanGass@users.noreply.github.com>
Date: Wed, 16 Jul 2025 14:03:12 +0100
Subject: [PATCH 02/14] Completed category splitting, updated padding method.

---
 .../timeseries/drms_pipeline.py               |  16 +-
 .../timeseries/sdo_processing.py              | 150 ++++++++++--------
 2 files changed, 95 insertions(+), 71 deletions(-)

diff --git a/arccnet/data_generation/timeseries/drms_pipeline.py b/arccnet/data_generation/timeseries/drms_pipeline.py
index 6e26f9eb..58dabed9 100644
--- a/arccnet/data_generation/timeseries/drms_pipeline.py
+++ b/arccnet/data_generation/timeseries/drms_pipeline.py
@@ -39,21 +39,22 @@
     starts = read_data(
         hek_path=Path(f"{data_path}/flare_files/hek_swpc_1996-01-01T00:00:00-2023-01-01T00:00:00_dev.parq"),
         srs_path=Path(f"{data_path}/flare_files/srs_processed_catalog.parq"),
-        size=10,
+        size=1,
         duration=6,
+        long_lim=65,
+        types=["F1", "F2", "N1", "N2"],
     )
     cores = int(config["drms"]["cores"])
     with ProcessPoolExecutor(cores) as executor:
-        for record in [starts[-1]]:
-            noaa_ar, fl_class, start, end, date, center = record
+        for record in starts:
+            noaa_ar, fl_class, start, end, date, center, category = record
             pointing_table = calibrate.util.get_pointing_table(source="jsoc", time_range=[start - 6 * u.hour, end])
-
             start_split = start.value.split("T")[0]
-            file_name = f"{start_split}_{fl_class}_{noaa_ar}"
+            file_name = f"{category}_{start_split}_{fl_class}_{noaa_ar}"
             patch_height = int(config["drms"]["patch_height"]) * u.pix
             patch_width = int(config["drms"]["patch_width"]) * u.pix
             try:
-                print(record["noaa_number"], record["goes_class"], record["start_time"])
+                print(record["noaa_number"], record["goes_class"], record["start_time"], record["category"])
                 aia_maps, hmi_maps = drms_pipeline(
                     start_t=start,
                     end_t=end,
@@ -93,9 +94,6 @@
                     table_match(list(aia_patch_paths), list(hmi_patch_paths))
                 )
 
-                
-
-                # This can probably be streamlined/functionalized to make the pipeline look better.
                 batched_name = f"{config['paths']['data_folder']}/04_final"
                 Path(f"{batched_name}/records").mkdir(parents=True, exist_ok=True)
                 Path(f"{batched_name}/tars").mkdir(parents=True, exist_ok=True)
diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py
index 9d683cd0..96d01718 100644
--- a/arccnet/data_generation/timeseries/sdo_processing.py
+++ b/arccnet/data_generation/timeseries/sdo_processing.py
@@ -1,4 +1,3 @@
-# Define required libraries - check to see if Arccnet already has these as requirements.
 import os
 import sys
 import glob
@@ -51,7 +50,33 @@
 ]
 
 
-def read_data(hek_path: str, srs_path: str, size: int, duration: int, flares: str):
+def rand_select(table, size, types: list):
+    r"""
+    Randomly selects targets from provided table and list of data subsets.
+
+    Parameters
+    ----------
+        table : `Astropy.Table`
+            Provided full table of records.
+        size : `int`
+            The size of the returned subsets.
+        types: `list`
+            List of datatypes to include.
+
+    Returns
+    -------
+        comb_sample : `Astropy.Table`
+            Returned random subset containing size records from each data subset.
+    """
+    selection = []
+    for type in types:
+        subtable = table[table["category"] == type]
+        selection.append(subtable[sample(range(len(subtable)), k=int(size))])
+    comb_sample = vstack(selection)
+    return comb_sample
+
+
+def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim: int, types: list):
     r"""
     Read and process data from a parquet file containing HEK catalogue information regarding flaring events.
 
@@ -62,11 +87,15 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, flares: st
         srs_path : `str`
             The path to the parquet file containing parsed noaa srs active region information.
         size : `int`
-            The size of the sample to be generated. (Generates 10% X, 30% M, 60% C by default)
+            The size of each subsample to be generated.
         duration : `int`
             The duration of the data sample in hours.
         flares : `str`
-            Determines if runs provided are 'positive' (flares), 'negative' (no flares), or 'both' (50/50 split of both)
+            Determines if runs provided 'positive' (flares), 'negative' (no flares), or 'both' (50/50 split of both)
+        long_lim : `str`
+            The longitudinal limit of Active Regions which are accepted for target runs.
+        types : `list`
+            Types of data to include in final subsection, corresponds to flares vs non flares (F v N) and incidental and clear runs (1 v 2)
 
 
     Returns
@@ -74,76 +103,71 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, flares: st
         `Astropy.Table`
             A table of tuples containing the following columns for each flare:
             - NOAA Active Region Number
-            - GOES Flare class (C,M,X classes)
+            - GOES Flare class (C,M,X classes or N for none)
             - Start time (Duration + 1 hours before event in FITS format)
             - End time (1 hour before flaring event start time)
             - The date of the run, used for reprojection
             - The coordinate of the noaa active region
     """
-    assert (flares == 'positive' or flares == 'negative' or flares == 'both')
-    # if flares == 'positive' or flares == 'both':
+
     table = Table.read(hek_path)
+    srs = Table.read(srs_path)
+
     noaa_num_df = table[table["noaa_number"] > 0]
     flares = noaa_num_df[noaa_num_df["event_type"] == "FL"]
     flares = flares[flares["frm_daterun"] > "2011-01-01"]
-    srs = Table.read(srs_path)
+    flares = flares[
+        [flare.startswith("C") or flare.startswith("M") or flare.startswith("X") for flare in flares["goes_class"]]
+    ]
+
     srs = srs[srs["number"] > 0]
+    srs = srs[srs["target_time"] > "2011-01-01"]
+    srs = srs[abs(srs["longitude"].value) <= long_lim]
     srs = srs[~srs["filtered"]]
     srs["srs_date"] = srs["target_time"].value
     srs["srs_date"] = [date.split("T")[0] for date in srs["srs_date"]]
+    srs["srs_end_time"] = [(Time(time) + duration * u.hour) for time in srs["target_time"]]
+    print("Parsing Flares")
+    flares["start_time"] = [time - (duration + 1) * u.hour for time in flares["start_time"]]
+    flares["start_time"].format = "fits"
     flares["tb_date"] = flares["start_time"].value
-    flares["tb_date"] = [date.split(" ")[0] for date in flares["tb_date"]]
-    flares = join(flares, srs, keys_left="noaa_number", keys_right="number")
+    flares["tb_date"] = [date.split("T")[0] for date in flares["tb_date"]]
+    flares["end_time"] = flares["start_time"] + duration * u.hour
+    flares["category"] = [
+        f"F{flare_check(row['start_time'], row['end_time'], row['noaa_number'], flares)}" for row in flares
+    ]
 
-    flares = flares[abs(flares["longitude"].value) <= 65]
+    flares = join(flares, srs, keys_left="noaa_number", keys_right="number")
     flares = flares[flares["tb_date"] == flares["srs_date"]]
-    x_flares = flares[[flare.startswith("X") for flare in flares["goes_class"]]]
-    # x_flares = x_flares[x_flares["noaa_number"] == 11158]
-    # x_flares = x_flares[x_flares["goes_class"] == "X2.2"]
-    # x_flares = x_flares[x_flares["tb_date"] == "2014-10-27"]
-    x_flares = x_flares[sample(range(len(x_flares)), k=int(0.1 * size))]
-    m_flares = flares[[flare.startswith("M") for flare in flares["goes_class"]]]
-    m_flares = m_flares[sample(range(len(m_flares)), k=int(0.3 * size))]
-
-    c_flares = flares[[flare.startswith("C") for flare in flares["goes_class"]]]
-    # c_flares = c_flares[sample(range(len(c_flares)), k=int(0.6 * size))]
-    c_flares = c_flares[c_flares["noaa_number"] == 11818]
-    c_flares = c_flares[c_flares["goes_class"] == "C1.6"]
-    c_flares = c_flares[c_flares["tb_date"] == "2013-08-20"]
-
-    combined = vstack([x_flares, m_flares, c_flares])
-    combined["c_coord"] = [
+
+    flares["c_coord"] = [
         SkyCoord(lon * u.deg, lat * u.deg, obstime=t_time, observer="earth", frame=frames.HeliographicStonyhurst)
-        for lat, lon, t_time in zip(combined["latitude"], combined["longitude"], combined["target_time"])
+        for lat, lon, t_time in zip(flares["latitude"], flares["longitude"], flares["target_time"])
     ]
-    combined["start_time"] = [time - (duration + 1) * u.hour for time in combined["start_time"]]
-    combined["start_time"].format = "fits"
-    combined["end_time"] = combined["start_time"] + duration * u.hour
-    if flares == 'positive' or flares == 'both':
-        combined['category'] = [f"F{flare_check(row['start_time'], row['end_time'], row['noaa_number'], combined)}" for row in combined]
-        srs_isolated = srs[srs['target_time'] > "2011-01-01"]
-        srs_isolated = srs_isolated[abs(srs_isolated["longitude"].value) <= 65]
-        ar_cat = []
-        for ar in srs_isolated:
-            erl_time = Time(ar['target_time']) - (duration+1) * u.hour
-            cat = flare_check(erl_time, Time(ar['target_time']) - 1 * u.hour, ar['number'], combined)
-            ar_cat.append(cat)
-            srs_isolated['category'] = ar_cat
-        
-
-        if flares == 'positive':
-            subset = combined["noaa_number", "goes_class", "start_time", "end_time", "frm_daterun", "c_coord", 'category']
-            return subset
-        
-        # if flares == 'both':
-        #     Combine both the flare samples and the non flare samples into subset and return
-        #     return subset
-
-        # if flares == 'negative':
-        #     return only the non flare samples as subset
-        #     return subset
-
-        
+    srs["c_coord"] = [
+        SkyCoord(lon * u.deg, lat * u.deg, obstime=t_time, observer="earth", frame=frames.HeliographicStonyhurst)
+        for lat, lon, t_time in zip(srs["latitude"], srs["longitude"], srs["target_time"])
+    ]
+    print("Parsing Active Regions")
+    ar_cat = []
+    for ar in srs:
+        erl_time = Time(ar["target_time"]) - (duration + 1) * u.hour
+        cat = f"N{flare_check(erl_time, Time(ar['target_time']) - 1 * u.hour, ar['number'], flares)}"
+        ar_cat.append(cat)
+    srs["category"] = ar_cat
+    srs["ar"] = "N"
+
+    srs_exp = srs["number", "ar", "target_time", "srs_end_time", "srs_date", "c_coord", "category"]
+    flares_exp = flares["noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category"]
+    srs_exp.rename_columns(
+        names=("number", "ar", "target_time", "srs_end_time", "srs_date", "c_coord", "category"),
+        new_names=("noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category"),
+    )
+    combined = vstack([flares_exp, srs_exp])
+
+    final = rand_select(combined, size, types)
+    subset = final["noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category"]
+    return subset
 
 
 def change_time(time: str, shift: int):
@@ -749,7 +773,7 @@ def map_reproject(sdo_packed):
     sdo_rpr.meta["quality"] = sdo_map.meta["quality"]
     sdo_rpr.meta["wavelnth"] = sdo_map.meta["wavelnth"]
     sdo_rpr.meta["date-obs"] = sdo_map.meta["date-obs"]
-    if sdo_rpr.dimensions[0].value > config["drms"]["patch_width"]:
+    if sdo_rpr.dimensions[0].value > int(config["drms"]["patch_width"]):
         sdo_rpr = pad_map(sdo_rpr, lon, config["drms"]["patch_width"])
     save_compressed_map(sdo_rpr, fits_path, hdu_type=CompImageHDU, overwrite=True)
 
@@ -825,6 +849,7 @@ def l4_file_pack(aia_paths, hmi_paths, dir_path, rec, out_table, anim_path):
     shutil.copy(anim_path, f"{dir_path}/data/{rec}")
     out_table.write(f"{dir_path}/data/{rec}/{rec}.csv", overwrite=True)
 
+
 def pad_map(map, long, targ_width):
     r"""
     Pads the map data of a submap which is narrower than the specified submap width due to reaching the edge of the image array.
@@ -846,12 +871,13 @@ def pad_map(map, long, targ_width):
     x_dim = map.dimensions[0].value
     diff = int(targ_width - x_dim)
     if long > 0:
-        data = np.pad(map.data, ((0,0),(diff,0)), constant_values=np.nan)
+        data = np.pad(map.data, ((0, 0), (diff, 0)), constant_values=np.nan)
     else:
-        data = np.pad(map.data, ((0,0),(0,diff)), constant_values=np.nan)
-    new_map = sunpy.map.Map(data,map.meta)
+        data = np.pad(map.data, ((0, 0), (0, diff)), constant_values=np.nan)
+    new_map = sunpy.map.Map(data, map.meta)
     return new_map
 
+
 def flare_check(start, end, ar_num, table):
     r"""
     Checks a provided start time and end time, along with an active region number, and determines if a flare occurs within that period.
@@ -876,6 +902,6 @@ def flare_check(start, end, ar_num, table):
     ar_table = ar_table[Time(ar_table["start_time"]) < end]
     ar_table = ar_table[Time(ar_table["start_time"]) > start]
     category = 1
-    if (len(ar_table) > 0):
+    if len(ar_table) > 0:
         category = 2
-    return category
\ No newline at end of file
+    return category

From 0d790fda6cd9a5dd81ddd15902dcf342aa817a3b Mon Sep 17 00:00:00 2001
From: DanGass <55432146+DanGass@users.noreply.github.com>
Date: Wed, 16 Jul 2025 17:36:34 +0100
Subject: [PATCH 03/14] formatting

---
 arccnet/data_generation/timeseries/sdo_processing.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py
index 96d01718..becf9787 100644
--- a/arccnet/data_generation/timeseries/sdo_processing.py
+++ b/arccnet/data_generation/timeseries/sdo_processing.py
@@ -136,10 +136,8 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim:
     flares["category"] = [
         f"F{flare_check(row['start_time'], row['end_time'], row['noaa_number'], flares)}" for row in flares
     ]
-
     flares = join(flares, srs, keys_left="noaa_number", keys_right="number")
     flares = flares[flares["tb_date"] == flares["srs_date"]]
-
     flares["c_coord"] = [
         SkyCoord(lon * u.deg, lat * u.deg, obstime=t_time, observer="earth", frame=frames.HeliographicStonyhurst)
         for lat, lon, t_time in zip(flares["latitude"], flares["longitude"], flares["target_time"])
@@ -156,7 +154,6 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim:
         ar_cat.append(cat)
     srs["category"] = ar_cat
     srs["ar"] = "N"
-
     srs_exp = srs["number", "ar", "target_time", "srs_end_time", "srs_date", "c_coord", "category"]
     flares_exp = flares["noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category"]
     srs_exp.rename_columns(

From 55aa12e003fed5c0890f07aa9d2f4160baf7c109 Mon Sep 17 00:00:00 2001
From: DanGass <55432146+DanGass@users.noreply.github.com>
Date: Mon, 28 Jul 2025 15:36:15 +0100
Subject: [PATCH 04/14] Worked on feedback - added test functions which need
 access to data. Passes tests locally.

---
 .../timeseries/sdo_processing.py              | 165 +++++++++++++++---
 1 file changed, 138 insertions(+), 27 deletions(-)

diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py
index becf9787..a8b9172e 100644
--- a/arccnet/data_generation/timeseries/sdo_processing.py
+++ b/arccnet/data_generation/timeseries/sdo_processing.py
@@ -33,6 +33,7 @@
 reproj_log = logging.getLogger("reproject.common")
 reproj_log.setLevel("WARNING")
 os.environ["JSOC_EMAIL"] = "danielgass192@gmail.com"
+data_path = config["paths"]["data_folder"]
 
 __all__ = [
     "read_data",
@@ -49,6 +50,30 @@
     "l4_file_pack",
 ]
 
+# def test_rand():
+#     combined = read_data(
+#     hek_path=Path(f"{data_path}/flare_files/hek_swpc_1996-01-01T00:00:00-2023-01-01T00:00:00_dev.parq"),
+#     srs_path=Path(f"{data_path}/flare_files/srs_processed_catalog.parq"),
+#     size=1,
+#     duration=6,
+#     long_lim=65,
+#     types=["F1", "F2", "N1", "N2"],
+#     )[1]
+
+#     types=["F1", "F2", "N1", "N2"]
+
+#     # 1. test with full list of samples
+#     rand_comb_1 = rand_select(combined, 1, types)
+#     rand_comb_2 = rand_select(combined, 1, types)
+#     assert (rand_comb_1 != rand_comb_2)
+#     # 2. test with partial list of samples
+#     rand_comb_1 = rand_select(combined, 1, ["F1", "N1"])
+
+#     # 3. test with higher number of sizes
+#     rand_comb_1 = rand_select(combined, 3, types)
+#     rand_comb_2 = rand_select(combined, 3, types)
+#     assert (rand_comb_1 != rand_comb_2)
+
 
 def rand_select(table, size, types: list):
     r"""
@@ -109,7 +134,12 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim:
             - The date of the run, used for reprojection
             - The coordinate of the noaa active region
     """
-
+    # table_name = f"{size}_{duration}_{long_lim}_{types}.parq".strip(' ')
+    # if os.path.exists(f"{data_path}/flare_files/{table_name}"):
+    #     print("Run table cached - loading")
+    #     combined = Table.read(table_name)
+    # else:
+    #     print("No run table cached - creating new table")
     table = Table.read(hek_path)
     srs = Table.read(srs_path)
 
@@ -133,9 +163,9 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim:
     flares["tb_date"] = flares["start_time"].value
     flares["tb_date"] = [date.split("T")[0] for date in flares["tb_date"]]
     flares["end_time"] = flares["start_time"] + duration * u.hour
-    flares["category"] = [
-        f"F{flare_check(row['start_time'], row['end_time'], row['noaa_number'], flares)}" for row in flares
-    ]
+    flare_splits = [flare_check(row["start_time"], row["end_time"], row["noaa_number"], flares) for row in flares]
+    flares["category"] = [f"F{flare[0]}" for flare in flare_splits]
+    flares["fl_count"] = [flare[1] for flare in flare_splits]
     flares = join(flares, srs, keys_left="noaa_number", keys_right="number")
     flares = flares[flares["tb_date"] == flares["srs_date"]]
     flares["c_coord"] = [
@@ -147,24 +177,30 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim:
         for lat, lon, t_time in zip(srs["latitude"], srs["longitude"], srs["target_time"])
     ]
     print("Parsing Active Regions")
-    ar_cat = []
+    ar_cat, fl_cat = [], []
     for ar in srs:
         erl_time = Time(ar["target_time"]) - (duration + 1) * u.hour
-        cat = f"N{flare_check(erl_time, Time(ar['target_time']) - 1 * u.hour, ar['number'], flares)}"
-        ar_cat.append(cat)
+        n_splits = flare_check(erl_time, Time(ar["target_time"]) - 1 * u.hour, ar["number"], flares)
+        ar_cat.append(f"N{n_splits[0]}")
+        fl_cat.append(n_splits[1])
     srs["category"] = ar_cat
+    srs["n_fl_count"] = fl_cat
     srs["ar"] = "N"
-    srs_exp = srs["number", "ar", "target_time", "srs_end_time", "srs_date", "c_coord", "category"]
-    flares_exp = flares["noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category"]
+
+    srs_exp = srs["number", "ar", "target_time", "srs_end_time", "srs_date", "c_coord", "category", "n_fl_count"]
+    flares_exp = flares[
+        "noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category", "fl_count"
+    ]
     srs_exp.rename_columns(
-        names=("number", "ar", "target_time", "srs_end_time", "srs_date", "c_coord", "category"),
-        new_names=("noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category"),
+        names=("number", "ar", "target_time", "srs_end_time", "srs_date", "c_coord", "category", "n_fl_count"),
+        new_names=("noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category", "fl_count"),
     )
     combined = vstack([flares_exp, srs_exp])
+    # combined.write(f"{data_path}/flare_files/{table_name}", format='parquet')
 
     final = rand_select(combined, size, types)
     subset = final["noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category"]
-    return subset
+    return subset, combined
 
 
 def change_time(time: str, shift: int):
@@ -216,7 +252,7 @@ def match_files(aia_maps, hmi_maps, table):
             List of AIA maps.
         hmi_maps : `list`
             List of HMI maps.
-        table : `JSOCResponse`
+        table : `JSOC Response`
             AIA pointing table as provided by JSOC.
 
     Returns
@@ -321,15 +357,15 @@ def hmi_query_export(time_1, time_2, keys: list, sample: int):
     """
     client = drms.Client()
     duration = round((time_2 - time_1).to_value(u.hour))
-    qstr_hmi = f"hmi.M_720s[{time_1.value}/{duration}h@{sample}m]" + "{magnetogram}"
-    hmi_query = client.query(ds=qstr_hmi, key=keys)
+    qstr_m_hmi = f"hmi.M_720s[{time_1.value}/{duration}h@{sample}m]" + "{magnetogram}"
+    hmi_query = client.query(ds=qstr_m_hmi, key=keys)
 
     good_result = hmi_query[hmi_query.QUALITY == 0]
     good_num = good_result["*recnum*"].values
     bad_result = hmi_query[hmi_query.QUALITY != 0]
 
-    qstrs_hmi = [f"hmi.M_720s[{time}]" + "{magnetogram}" for time in bad_result["T_REC"]]
-    hmi_values = [hmi_rec_find(qstr, keys) for qstr in qstrs_hmi]
+    qstrs_m_hmi = [f"hmi.M_720s[{time}]" + "{magnetogram}" for time in bad_result["T_REC"]]
+    hmi_values = [hmi_rec_find(qstr, keys, True) for qstr in qstrs_m_hmi]
     patched_num = [*hmi_values]
 
     joined_num = [*good_num, *patched_num]
@@ -339,7 +375,25 @@ def hmi_query_export(time_1, time_2, keys: list, sample: int):
     hmi_query_full = client.query(ds=hmi_qstr, key=keys)
     hmi_result = client.export(hmi_qstr, method="url", protocol="fits", email=os.environ["JSOC_EMAIL"])
     hmi_result.wait()
-    return hmi_query_full, hmi_result
+    ic_query_full, ic_result = hmi_continuum_export(hmi_query_full, keys)
+    return hmi_query_full, hmi_result, ic_query_full, ic_result
+
+
+def hmi_continuum_export(hmi_query, keys):
+    client = drms.Client()
+    qstrs_ic = [f"hmi.Ic_720s[{time}]" + "{image}" for time in hmi_query["T_REC"]]
+
+    ic_value = [hmi_rec_find(qstr, keys, 3, 12) for qstr in qstrs_ic]
+    unpacked_ic = list(itertools.chain.from_iterable(ic_value))
+    joined_num = [str(num) for num in unpacked_ic]
+    ic_num_str = str(joined_num).strip("[]")
+    ic_comb_qstr = f"aia.lev1[! FSN in ({ic_num_str}) !]" + "{image_lev1}"
+
+    ic_query_full = client.query(ds=ic_comb_qstr, key=keys)
+
+    ic_result = client.export(ic_comb_qstr, method="url", protocol="fits", email=os.environ["JSOC_EMAIL"])
+    ic_result.wait()
+    return ic_query_full, ic_result
 
 
 def aia_query_export(hmi_query, keys, wavelength):
@@ -363,11 +417,11 @@ def aia_query_export(hmi_query, keys, wavelength):
     qstrs_euv = [f"aia.lev1_euv_12s[{time}][{wavelength}]" + "{image}" for time in hmi_query["T_REC"]]
     qstrs_uv = [f"aia.lev1_uv_24s[{time}]{[1600, 1700]}" + "{image}" for time in hmi_query["T_REC"]]
     qstrs_vis = [f"aia.lev1_vis_1h[{time}]{[4500]}" + "{image}" for time in hmi_query["T_REC"]]
-
     euv_value = [aia_rec_find(qstr, keys, 3, 12) for qstr in qstrs_euv]
     uv_value = [aia_rec_find(qstr, keys, 2, 24) for qstr in qstrs_uv]
     vis_value = [aia_rec_find(qstr, keys, 1, 60) for qstr in qstrs_vis]
     unpacked_aia = list(itertools.chain(euv_value, uv_value, vis_value))
+    unpacked_aia = list(itertools.chain(euv_value, uv_value))
     unpacked_aia = [set for set in unpacked_aia if set is not None]
     unpacked_aia = list(itertools.chain.from_iterable(unpacked_aia))
     joined_num = [str(num) for num in unpacked_aia]
@@ -381,7 +435,7 @@ def aia_query_export(hmi_query, keys, wavelength):
     return aia_query_full, aia_result
 
 
-def hmi_rec_find(qstr, keys):
+def hmi_rec_find(qstr, keys, cont=False):
     r"""
     Find the HMI record number for a given query string.
 
@@ -391,18 +445,26 @@ def hmi_rec_find(qstr, keys):
             A query string.
         keys : `list`
             List of keys to query.
+        cont : `bool`
+            indicates whether continuum is needed, searches for magnetogram if false.
 
     Returns
     -------
         `int`
             The HMI record number.
     """
+    seg = "{magnetogram}"
+    series = "hmi.M_720s"
+    if cont:
+        seg = "{image}"
+        series = "hmi.Ic_720s"
+
     client = drms.Client()
     retries = 0
     qry = client.query(ds=qstr, key=keys)
     time = sunpy.time.parse_time(qry["T_REC"].values[0])
     while qry["QUALITY"].values[0] != 0 and retries <= 3:
-        qry = client.query(ds=f"hmi.M_720s[{time}]" + "{magnetogram}", key=keys)
+        qry = client.query(ds=f"{series}[{time}]" + seg, key=keys)
         time = change_time(time, 720)
         retries += 1
     return qry["*recnum*"].values[0]
@@ -770,7 +832,7 @@ def map_reproject(sdo_packed):
     sdo_rpr.meta["quality"] = sdo_map.meta["quality"]
     sdo_rpr.meta["wavelnth"] = sdo_map.meta["wavelnth"]
     sdo_rpr.meta["date-obs"] = sdo_map.meta["date-obs"]
-    if sdo_rpr.dimensions[0].value > int(config["drms"]["patch_width"]):
+    if sdo_rpr.dimensions[0].value < int(config["drms"]["patch_width"]):
         sdo_rpr = pad_map(sdo_rpr, lon, config["drms"]["patch_width"])
     save_compressed_map(sdo_rpr, fits_path, hdu_type=CompImageHDU, overwrite=True)
 
@@ -847,7 +909,20 @@ def l4_file_pack(aia_paths, hmi_paths, dir_path, rec, out_table, anim_path):
     out_table.write(f"{dir_path}/data/{rec}/{rec}.csv", overwrite=True)
 
 
-def pad_map(map, long, targ_width):
+# def test_padding():
+#     map_width = int(config["drms"]["patch_width"])
+#     # 1. test with undersized AR cutout on right side of map, check map size.
+#     sdo_map = sunpy.map.Map('/Users/danielgass/Library/Application Support/ARCCnet/arccnet/03_processed/2013/8/20/SDO/AIA/11818_03_171.aia.lev1.2013-08-20T075936Z.73314087.image_lev1.fits')
+#     assert(int(pad_map(sdo_map, map_width).dimensions[0].value) == map_width)
+#     # 2. test with undersized AR cutout on left side of map, check map size.
+#     sdo_map = sunpy.map.Map('/Users/danielgass/Library/Application Support/ARCCnet/arccnet/03_processed/2013/1/6/SDO/AIA/11653_03_1700.aia.lev1.2013-01-06T234743Z.60394065.image_lev1.fits')
+#     assert(int(pad_map(sdo_map, int(map_width)).dimensions[0].value) == map_width)
+#     # 3. test with different targ width, to check that it works with arbitrary widths.
+#     sdo_map = sunpy.map.Map('/Users/danielgass/Library/Application Support/ARCCnet/arccnet/03_processed/2013/1/6/SDO/AIA/11653_03_1700.aia.lev1.2013-01-06T234743Z.60394065.image_lev1.fits')
+#     assert(int(pad_map(sdo_map, 900).dimensions[0].value) == 900)
+
+
+def pad_map(map, targ_width):
     r"""
     Pads the map data of a submap which is narrower than the specified submap width due to reaching the edge of the image array.
 
@@ -855,8 +930,6 @@ def pad_map(map, long, targ_width):
     ----------
         map : `sunpy.map`
             The sunpy submap to be resized.
-        long : `float` or `int`
-            The longitude of the active region, used only to check which side of the disk it is located.
         targ_width : `int`
             The target width of the submaps within the data run.
 
@@ -867,7 +940,7 @@ def pad_map(map, long, targ_width):
     """
     x_dim = map.dimensions[0].value
     diff = int(targ_width - x_dim)
-    if long > 0:
+    if map.center.Tx > 0:
         data = np.pad(map.data, ((0, 0), (diff, 0)), constant_values=np.nan)
     else:
         data = np.pad(map.data, ((0, 0), (0, diff)), constant_values=np.nan)
@@ -875,6 +948,41 @@ def pad_map(map, long, targ_width):
     return new_map
 
 
+# def test_flare_check():
+#     combined = read_data(
+#     hek_path=Path(f"{data_path}/flare_files/hek_swpc_1996-01-01T00:00:00-2023-01-01T00:00:00_dev.parq"),
+#     srs_path=Path(f"{data_path}/flare_files/srs_processed_catalog.parq"),
+#     size=1,
+#     duration=6,
+#     long_lim=65,
+#     types=["F1", "F2", "N1", "N2"],
+#     )[1]
+#     # 1. test a flare run known to contain flares
+#     flares = combined[combined['goes_class'] != 'N']
+#     flare = flares[flares['goes_class'] == "C3.7"]
+#     flare = flare[flare["noaa_number"] == 12644]
+#     assert(flare_check(flare['start_time'], flare['end_time'], flare['noaa_number'], flares)[0]) == 2
+#     # 2. test a non flare run known to contain flares
+#     ar = combined[combined['goes_class'] == "N"]
+#     ar = ar[ar["noaa_number"] == 12038]
+#     ar = ar[ar['tb_date'] == '2014-04-23']
+#     erl_time = Time(ar["start_time"]) - (6 + 1) * u.hour
+#     assert(flare_check(erl_time, Time(ar["start_time"]) - 1 * u.hour, ar["noaa_number"], flares)[0]) == 2
+#     # 3. test a flare run without flares
+#     flares = combined[combined['goes_class'] != 'N']
+#     flare = combined[combined['goes_class'] == "M1.6"]
+#     flare = flare[flare["noaa_number"] == 12192]
+#     print(flare)
+#     assert(flare_check(flare['start_time'], flare['end_time'], flare['noaa_number'], flares)[0]) == 1
+#     # 4. test a non flare run without flares
+#     ar = combined[combined['goes_class'] == "N"]
+#     ar = ar[ar["noaa_number"] == 12524]
+#     ar = ar[ar['tb_date'] == '2016-03-20']
+#     erl_time = Time(ar["start_time"]) - (6 + 1) * u.hour
+#     assert(flare_check(erl_time, Time(ar["start_time"]) - 1 * u.hour, ar["noaa_number"], flares)[0]) == 1
+# assert(flare_check(flare['start_time'], flare['end_time'], flare['noaa_number'], combined)[0]) == 1
+
+
 def flare_check(start, end, ar_num, table):
     r"""
     Checks a provided start time and end time, along with an active region number, and determines if a flare occurs within that period.
@@ -899,6 +1007,9 @@ def flare_check(start, end, ar_num, table):
     ar_table = ar_table[Time(ar_table["start_time"]) < end]
     ar_table = ar_table[Time(ar_table["start_time"]) > start]
     category = 1
+    flares = {"X": 0, "M": 0, "C": 0}
     if len(ar_table) > 0:
         category = 2
-    return category
+        for cat in flares:
+            flares[cat] = int(len(ar_table[[flare.startswith(cat) for flare in ar_table["goes_class"]]]))
+    return category, flares

From 595ad1a6c16d71ec1ae78d6a0f9500b7ba709ddd Mon Sep 17 00:00:00 2001
From: DanGass <55432146+DanGass@users.noreply.github.com>
Date: Thu, 7 Aug 2025 16:57:34 +0100
Subject: [PATCH 05/14] Refactored tests into separate file within testing
 folder, created test data.

---
 .../tests/data/ts_test_data.parq              | Bin 0 -> 6617 bytes
 .../tests/test_sdo_processing.py              |  78 +++++++++
 .../timeseries/sdo_processing.py              | 165 ++++++------------
 3 files changed, 136 insertions(+), 107 deletions(-)
 create mode 100644 arccnet/data_generation/tests/data/ts_test_data.parq
 create mode 100644 arccnet/data_generation/tests/test_sdo_processing.py

diff --git a/arccnet/data_generation/tests/data/ts_test_data.parq b/arccnet/data_generation/tests/data/ts_test_data.parq
new file mode 100644
index 0000000000000000000000000000000000000000..0ca12c92e81fbd001c0192d2a6e3facfe9e77e34
GIT binary patch
literal 6617
zcmdT}du&_P8NWBZPMy>a$;dUf2*WMZ%}Siyd;N$Vb#t$sI&tba@AyHt)Nebz*LGq%
zFH{K7L5Em{7$B$$Obkt|>nem$w=t+Y+6}Z-Py&Lkqo^y0F%4DxF%Ux?>^sMH{7A}5
zH)-2#&bjw|=X-y@^PMOi<cC?BqCcYO_iLQAizdEC5WlFAK6(=&A&E0`De(`vl)Ac2
zat%djV6_8A3dXBIZPnJb5-wPTVEh#bQ|)zX;s7ju1B=>@Iwdg;iyy)`3gavgYZwr@
z5f*6}cLCvk21Fi!#X%S!0)b|MfE3;i;~pS1b3kBVu>}S)yF*u}-XiJ<{LB>IG(mgl
z9c?f*(zSGLi&9$LQKDGzqaa#Wp~6<%p&FPyWUYLwT6&ym?Dn_xKGEiUcgtY2<LWW*
zH4nwFeQW-D<vlle_X8hf>tGH2JJLyOaJC*K84AcYwp~xc+a;G1MsU?6kxIFC24-D5
zM@nV=hISn(rEDxMCCrjW!XP&h`Z|W{XXTBAv91wzK#HsD&3_o>t*yHDPOO=0*F$_x
zdax9qk!U4dN?>hgz^3Mbd;k);dM|%Z-TFPLLDTxZ_q|bb=)32vbEI8mmEN=`@A@$b
zePE3f@`MU~t_DdjZhYR#!<!fc2c}>oVcZ7eP8g^R--ode#!q4V9L7bcLgx$eo$wie
z@f<8=N1;lnuv=mHVW6r%Univ&b1)I7V09h_0+S+FH%N@5tq9u60C^g%?nbMl=<Dym
zwQx+bm0Elm(cXZi|F}d+VJ9A|lS&t_KLOwZ7u)VVk$3%mc(23&3JUEgrSO5nL>O2}
z1xh&vWF-LH*gqQnw&%y<jz90-*}5+je=K(JFmM9~7T3_H;lwaFk;h0X79mOLQ41U!
z**VMezamN5Te;I`f%bysq=b~7%3Y9<H~uzv-n8#s)4ta%l8#>DTrRb=Y4_5mpIJ`b
zp*?+v_Td~^+j{zr*K*F!o*ek>$>Z{^vJ)1f2~_zloQ@=^-{&Nq!tPgc^+$HDJF@eS
zmT&##2k^6Gc~)}s(~_G{=429O+gZzAr0eu^mYriH+5Mp9owMk9J(p^EX{zO=U*@Wz
zgp&g?owF=|<%(&lqykel(hl4?Jou+W$M)aCJa)st!*`Eg`=Yt?z#5z&K-!Tqq_u!F
zE&(Ye22oinkcP@$0X4)?!kT0!g@qOai2fXcKnZb3){p2_m_u|7I$$TNQHM4W?1;U9
zziQM0X;shx1{ZIulQP7A4|eyhp+5lFaTs<llhpo40Ag=p*qzFKbOEq(G&eJgX^Z6b
zjgNdU*SY~<@Qmf_sQOdy<XjkAuUp94!GX_CE+bC^INZJT2TMdkQfDlrv3bLWQx*n5
zC;gG-n54CLujLcR<9~BJ{zmRah45>Ia5lG-?38VK)I!Sl5+6&{ZA-ZTU~=7&H!b6U
zJLzkd56H)RFC5N&{snqo&7DBzTaH^+!Y_iKGXTG$j|hG%sjI*b8eQ-Dr=Iqtuez)8
z&I7lP-;$JmdKd7)`8-ueU!;nS?gPqH4`hyESuLZ}GRAtkL1xhESXG0}q-FG~2BMQ|
zA*5z4bKPbc%w`pS8QL2NO2^O*jVz;OI8}p`)p9D6jODakQ;U+*G6pSUl5eIsEjdc)
zKq>HX*EL*cmw^?7s)f>N$#<Jh5IRYq=}5C4bZFV8r)2P<SDBRXGHE%ZJVodwDV2%5
zwP`=0Cj<03@Qq<r7YPF_Hlvwfn=X)NDX@AgEYDF~CkPpk(z*srI$^L9#`VVtj{Q!v
zQOk6K89UKQrU<6V1n+>MUeeICpW;|@GiYj{I<-x=k`1b-2yWv;1S=n<IF7v7v_v*^
zgNOQjvN-sJK#kRn6)vB8jw$Xc;k%et$njThqzuhDX(`6DvKifDc8;voyEy#GqS?iQ
z2d#8l#r0R?;*>RZ7Pgz6`P;8Xz9;taGXqZyKCteKHvs~w0|r7_n-0dpqMvRA!p0iq
zx<u0NpGaiIKzN#<Wk9KqCd288px{rZHxf?NrRaJfH>5NE>C8kXE{5ADL+olqE>Dz&
z6QL@;auRSMGix(}iI6`NF4{meX%Gm}<n(-jsPL#-RQGC#7VzCb)HbRr+@)(29>w`u
z#oLr--x`hdtNe<iYJ&qo&@}{1jquv?jarq4@Ib7{n5<x|EECw)(i+7lHJVpyHSbX^
zQ1a#9H&$p<yiKag71kFNR^<>YeP*rVeu?Ixb(%A4Ap-E=@-FBVHY*}n-5`ezzE3G$
zq%`kcrTI`+Fj#r1UE->{JcH$hW{dG1l4$nOn#0f^{?qZ5o6AS{38h$~H0M>CGYT-b
zvS(hRlYV)0ML-lWivxRNonon0^RilV=vst^OErKMniU__R)C|Zd>Q;$`(aY?EU7uU
zLG$@~q`i2!S898@1U1Ep4uwY}RUEv5Q0-7&mGK9J@Pru7_$TK5qM-cAS~ze1%zP@`
zRj*awlJJY+E_K=QZ&NSttK*4`QNO#yR_Y1mT<OeoJP|EptLzcwtPvsUuT)vV%Y8X-
zc}KZy&VQk^(0|I+t%#@Ck5*$siS1IaU!N5HDM-}BL^?bj_Y3ibaA+c!6tZF>Ju%T$
zuZE}Setx9}Uff<`TkQv%$<%y1?yJG%bhv#KIxLn_t0T#2(Vyv3N8*`uffP?nq_csD
z2-HoRHx-D`kHM2=RjBUmBg@{{urtMi7kxoZeD|_EcO~*%Q9hZ?tm0K_Ivk9r<H<yq
zT30ZX4*CTET{aWMgc1b)7cx`w=1Ox@q2b@j4xv=w)7#Z0gcDs|rPHS*^@Q^3^1T9#
zl{G9wD4WZ7ir`g}9HEr)!@~pauKeY%Jk->I9$mDD=lLNR=-JNmdfpo4?K~glZBb0K
zS}XQv5pmn{z5{*Z!2=&?K&q@s^f`bgtlG*TQ;4yk3bXbj{Z`D+c-?(z-)Pbp=o!xr
zOr|Eio?*eqndigUGuy5zc^lHyKZ@l0(NrzZcm?w=q=R>LgQy8>!AA40GDzNz?Dr1G
zyyBbyHq$=0-Z&tJW_%u3#LLaaf;yx-%6AvzMMBOfEb~gHhG4&EI2P)$GhvTa7>Fh_
zj^0ex>t=<4xHTK#>@g2#U<2+xAsC;H`hX97G6Z|ZjE){58|wBlk)aBiM1UJ#@NllQ
z*W+4n+N>ysSg2Q+h1^7gy5V`h+mNV|SpfU<A-kFJgI%YX+SNZ&ZOfMiUl`Zqu-O)8
zqG8Su3%J3DUKjY&$9QzZDOhLy;?R7xO`kY7<Mj-2u3l!lbHv&i5bYV{i_dM?RVFug
zg>si|FVxc)LHYAl#|AM3MIr0cRr=?Jczy2BOfa!4?I^EN+`16z?Mns3kl7}(La1k(
z(P7&b3vg^K$U`mc=17nmhdkTUeosp9+0ByyZVqZ_Sn#kWqb<%y9X6{F5Qih79s%WI
zyTgWPeAL4NZ;yQ;$eJTif56Z1d_ZS4I1~B&dI8VA5yKd&Q(h0q_4sCjA{z^JLrz73
z3C<?5F0hdb2<C;5yH5;whQP<*R9|Ut%GZtjwwK$$ICcsy0rC+G^aw_ud$>FAyS`L&
zIA0J`2`e6E*?(03fjA!n>_JY=v*8g#{#=b1Mtz>Xl-E6%3X9{rhB$LN<Yu#FSTE&c
z!~nka0nYL{arBs3z~-gmm<@{KlYWmY72xznM|c0SFG0=)F(8bNGu}enkk42!@qe24
zm1k`Z&T+BUkTb`X)pMoZ`7*sTA<u9ms2h*_M3#ZtCbq|er5yM630_WE@Vm`f*jrex
zS+8pnYK?jr8|pzlkRL=7^qDyH=SaW}H5oS>pwFdIf8=>vKWg(MxGhGZ&35B{D<YvD
zH1W9K=I@8mo1h;9X`3D22iD?!g85*=mqJzl?;h)z>F8hvJ3BfM`_LHHpI41Pu`pKE
zI~@0Ls3(u{9f-eUvS_ax+ru|UzJKQ}B86q=8ROT1x4d8z+l%sPbpIemehzOTC-e6m
zUh+6e)jbh!^#wo@#p~d`g9Uv<{n+Wem6HG6Nb4%~gnVu<*thY0qXm823h`i9@>eT1
zsD~MPT=kuhHv=h)qWc}*_^}o4M_3f<u_8a{Dz{eLONbfW<G7wM!7mFCikFA{<2>L^
ztR7fm_+uBEF@K)!M6ZBojscFc&`aVzkAnx(AP@e{B7KEt48OlgDAg(d^i(#S(FRi~
mt$%ttIjf!tPov*0)g03f|5W$FA1YS;AwF${Kjg!I>iiS3+1Wq<

literal 0
HcmV?d00001

diff --git a/arccnet/data_generation/tests/test_sdo_processing.py b/arccnet/data_generation/tests/test_sdo_processing.py
new file mode 100644
index 00000000..3478e01d
--- /dev/null
+++ b/arccnet/data_generation/tests/test_sdo_processing.py
@@ -0,0 +1,78 @@
+from pathlib import Path
+
+import sunpy.data.sample
+import sunpy.map
+
+import astropy.units as u
+from astropy.coordinates import SkyCoord
+from astropy.table import Table
+from astropy.time import Time
+
+from arccnet import config
+from arccnet.data_generation.timeseries.sdo_processing import crop_map, pad_map, rand_select
+
+data_path = config["paths"]["data_folder"]
+test_path = Path().resolve().parent
+
+
+def test_rand():
+    combined = Table.read(f"{test_path}/tests/data/ts_test_data.parq")
+    types = ["F1", "F2", "N1", "N2"]
+
+    # 1. test with full list of samples
+    rand_comb_1 = rand_select(combined, 3, types)
+    rand_comb_2 = rand_select(combined, 3, types)
+    assert list(rand_comb_1) != list(rand_comb_2)
+    # 2. test with partial list of samples
+    rand_comb_1 = rand_select(combined, 3, ["F1", "N1"])
+    rand_comb_2 = rand_select(combined, 3, ["F1", "N1"])
+    assert list(rand_comb_1) != list(rand_comb_2)
+    # 3. test with higher number of sizes
+    rand_comb_1 = rand_select(combined, 6, types)
+    rand_comb_2 = rand_select(combined, 6, types)
+    assert list(rand_comb_1) != list(rand_comb_2)
+
+
+def test_padding():
+    map_width = int(config["drms"]["patch_width"])
+    map_height = int(config["drms"]["patch_height"])
+    aia_map = sunpy.map.Map(sunpy.data.sample.AIA_171_IMAGE)
+    # 1. test with undersized AR cutout on right side of map, check map size.
+    center = [-800, 0] * u.arcsec
+    center = SkyCoord(*center, frame=aia_map.coordinate_frame)
+    aia_smap = crop_map(aia_map, center, map_height * u.pix, (map_width - 100) * u.pix, Time(aia_map.date))
+    assert int(pad_map(aia_smap, map_width).dimensions[0].value) == map_width
+    # 2. test with undersized AR cutout on left side of map, check map size.
+    center = [800, 0] * u.arcsec
+    center = SkyCoord(*center, frame=aia_map.coordinate_frame)
+    aia_smap = crop_map(aia_map, center, map_height * u.pix, (map_width - 100) * u.pix, Time(aia_map.date))
+    assert int(pad_map(aia_smap, map_width).dimensions[0].value) == map_width
+    # 3. test with different targ width, to check that it works with arbitrary widths.
+    assert int(pad_map(aia_smap, 900).dimensions[0].value) == 900
+
+
+# This test won't work correctly unless we use the full flare catalogue from HEK, as we don't want to add large files to repo, skipping for now.
+# def test_flare_check():
+# 	combined = Table.read(f"{test_path}/tests/data/ts_test_data.parq")
+# 	flares = combined[combined['goes_class'] != 'N']
+# 	flare = flares[flares['goes_class'] == "C3.7"]
+# 	flare = flare[flare["noaa_number"] == 12644]
+# 	assert(flare_check(flare['start_time'], flare['end_time'], flare['noaa_number'], flares)[0]) == 2
+# 	# 2. test a non flare run known to contain flares
+# 	ar = combined[combined['goes_class'] == "N"]
+# 	ar = ar[ar["noaa_number"] == 12038]
+# 	ar = ar[ar['tb_date'] == '2014-04-23']
+# 	erl_time = Time(ar["start_time"]) - (6 + 1) * u.hour
+# 	assert(flare_check(erl_time, Time(ar["start_time"]) - 1 * u.hour, ar["noaa_number"], flares)[0]) == 2
+# 	# 3. test a flare run without flares
+# 	flares = combined[combined['goes_class'] != 'N']
+# 	flare = combined[combined['goes_class'] == "M1.6"]
+# 	flare = flare[flare["noaa_number"] == 12192]
+# 	print(flare)
+# 	assert(flare_check(flare['start_time'], flare['end_time'], flare['noaa_number'], flares)[0]) == 1
+# 	# 4. test a non flare run without flares
+# 	ar = combined[combined['goes_class'] == "N"]
+# 	ar = ar[ar["noaa_number"] == 12524]
+# 	ar = ar[ar['tb_date'] == '2016-03-20']
+# 	erl_time = Time(ar["start_time"]) - (6 + 1) * u.hour
+# 	assert(flare_check(erl_time, Time(ar["start_time"]) - 1 * u.hour, ar["noaa_number"], flares)[0]) == 1
diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py
index a8b9172e..481d76f3 100644
--- a/arccnet/data_generation/timeseries/sdo_processing.py
+++ b/arccnet/data_generation/timeseries/sdo_processing.py
@@ -50,30 +50,6 @@
     "l4_file_pack",
 ]
 
-# def test_rand():
-#     combined = read_data(
-#     hek_path=Path(f"{data_path}/flare_files/hek_swpc_1996-01-01T00:00:00-2023-01-01T00:00:00_dev.parq"),
-#     srs_path=Path(f"{data_path}/flare_files/srs_processed_catalog.parq"),
-#     size=1,
-#     duration=6,
-#     long_lim=65,
-#     types=["F1", "F2", "N1", "N2"],
-#     )[1]
-
-#     types=["F1", "F2", "N1", "N2"]
-
-#     # 1. test with full list of samples
-#     rand_comb_1 = rand_select(combined, 1, types)
-#     rand_comb_2 = rand_select(combined, 1, types)
-#     assert (rand_comb_1 != rand_comb_2)
-#     # 2. test with partial list of samples
-#     rand_comb_1 = rand_select(combined, 1, ["F1", "N1"])
-
-#     # 3. test with higher number of sizes
-#     rand_comb_1 = rand_select(combined, 3, types)
-#     rand_comb_2 = rand_select(combined, 3, types)
-#     assert (rand_comb_1 != rand_comb_2)
-
 
 def rand_select(table, size, types: list):
     r"""
@@ -321,23 +297,28 @@ def drms_pipeline(
         aia_maps, hmi_maps : `tuple`
             A tuple containing the AIA maps and HMI maps.
     """
-    # print(start_t)
-    hmi_query, hmi_export = hmi_query_export(start_t, end_t, hmi_keys, sample)
+
+    hmi_query, hmi_export, ic_query, ic_export = hmi_query_export(start_t, end_t, hmi_keys, sample)
     aia_query, aia_export = aia_query_export(hmi_query, aia_keys, wavelengths)
+    # ic_query.rename(columns={"*recnum*":"FSN"}, inplace=True)
+    # img_query = pd.concat([aia_query, ic_query],ignore_index=True)
+    # img_urls = pd.concat([aia_export.urls, ic_export.urls], ignore_index=True)
 
     hmi_dls, hmi_exs = l1_file_save(hmi_export, hmi_query, path)
+    cnt_dls, cnt_exs = l1_file_save(ic_query, ic_export, path)
     aia_dls, aia_exs = l1_file_save(aia_export, aia_query, path)
+    img_exs = list(itertools.chain(aia_exs, cnt_exs))
 
     hmi_maps = sunpy.map.Map(hmi_exs)
     hmi_maps = add_fnames(hmi_maps, hmi_exs)
-    aia_maps = sunpy.map.Map(aia_exs)
-    aia_maps = add_fnames(aia_maps, aia_exs)
-    return aia_maps, hmi_maps
+    img_maps = sunpy.map.Map(img_exs)
+    img_maps = add_fnames(img_maps, aia_exs)
+    return img_maps, hmi_maps
 
 
 def hmi_query_export(time_1, time_2, keys: list, sample: int):
     r"""
-    Query and export HMI data from the JSOC database.
+    Query and export HMI magnetogram data from the JSOC database.
 
     Parameters
     ----------
@@ -352,8 +333,8 @@ def hmi_query_export(time_1, time_2, keys: list, sample: int):
 
     Returns
     -------
-        hmi_query_full, hmi_result : `tuple`
-            A tuple containing the query result (pandas df) and the export data response (drms export object).
+        hmi_query_full, hmi_result, ic_query_full, ic_export : `tuple`
+            A tuple containing the query results of the hmi mag and ic_no_limbdark (pandas df) and the export data response (drms export object).
     """
     client = drms.Client()
     duration = round((time_2 - time_1).to_value(u.hour))
@@ -365,12 +346,13 @@ def hmi_query_export(time_1, time_2, keys: list, sample: int):
     bad_result = hmi_query[hmi_query.QUALITY != 0]
 
     qstrs_m_hmi = [f"hmi.M_720s[{time}]" + "{magnetogram}" for time in bad_result["T_REC"]]
-    hmi_values = [hmi_rec_find(qstr, keys, True) for qstr in qstrs_m_hmi]
+    hmi_values = [hmi_rec_find(qstr, keys, 3, 720) for qstr in qstrs_m_hmi]
     patched_num = [*hmi_values]
 
     joined_num = [*good_num, *patched_num]
     joined_num = [str(num) for num in joined_num]
     hmi_num_str = str(joined_num).strip("[]")
+
     hmi_qstr = f"hmi.M_720s[! recnum in ({hmi_num_str}) !]" + "{magnetogram}"
     hmi_query_full = client.query(ds=hmi_qstr, key=keys)
     hmi_result = client.export(hmi_qstr, method="url", protocol="fits", email=os.environ["JSOC_EMAIL"])
@@ -380,14 +362,29 @@ def hmi_query_export(time_1, time_2, keys: list, sample: int):
 
 
 def hmi_continuum_export(hmi_query, keys):
+    r"""
+    Query and export HMI continuum data from the JSOC database.
+
+    Parameters
+    ----------
+        hmi_query : `drms query`
+            HMI query result containing target times.
+        keys : `list`
+            A list of keys to query.
+
+    Returns
+    -------
+        hmi_query_full, hmi_result, ic_query_full, ic_export : `tuple`
+            A tuple containing the query results of the hmi mag and ic_no_limbdark (pandas df) and the export data response (drms export object).
+    """
     client = drms.Client()
-    qstrs_ic = [f"hmi.Ic_720s[{time}]" + "{image}" for time in hmi_query["T_REC"]]
+    qstrs_ic = [f"hmi.Ic_noLimbDark_720s[{time}]" + "{image}" for time in hmi_query["T_REC"]]
 
     ic_value = [hmi_rec_find(qstr, keys, 3, 12) for qstr in qstrs_ic]
     unpacked_ic = list(itertools.chain.from_iterable(ic_value))
     joined_num = [str(num) for num in unpacked_ic]
     ic_num_str = str(joined_num).strip("[]")
-    ic_comb_qstr = f"aia.lev1[! FSN in ({ic_num_str}) !]" + "{image_lev1}"
+    ic_comb_qstr = f"hmi.Ic_noLimbDark_720s[! recnum in ({ic_num_str}) !]" + "{image}"
 
     ic_query_full = client.query(ds=ic_comb_qstr, key=keys)
 
@@ -403,7 +400,7 @@ def aia_query_export(hmi_query, keys, wavelength):
     Parameters
     ----------
         hmi_query : `drms query`
-            The HMI query result.
+            The HMI query result containing target times.
         keys : `list`
             List of keys to query.
         wavelength : `list`
@@ -416,11 +413,11 @@ def aia_query_export(hmi_query, keys, wavelength):
     client = drms.Client()
     qstrs_euv = [f"aia.lev1_euv_12s[{time}][{wavelength}]" + "{image}" for time in hmi_query["T_REC"]]
     qstrs_uv = [f"aia.lev1_uv_24s[{time}]{[1600, 1700]}" + "{image}" for time in hmi_query["T_REC"]]
-    qstrs_vis = [f"aia.lev1_vis_1h[{time}]{[4500]}" + "{image}" for time in hmi_query["T_REC"]]
+    # qstrs_vis = [f"aia.lev1_vis_1h[{time}]{[4500]}" + "{image}" for time in hmi_query["T_REC"]]
     euv_value = [aia_rec_find(qstr, keys, 3, 12) for qstr in qstrs_euv]
     uv_value = [aia_rec_find(qstr, keys, 2, 24) for qstr in qstrs_uv]
-    vis_value = [aia_rec_find(qstr, keys, 1, 60) for qstr in qstrs_vis]
-    unpacked_aia = list(itertools.chain(euv_value, uv_value, vis_value))
+    # vis_value = [aia_rec_find(qstr, keys, 1, 60) for qstr in qstrs_vis]
+    # unpacked_aia = list(itertools.chain(euv_value, uv_value, vis_value))
     unpacked_aia = list(itertools.chain(euv_value, uv_value))
     unpacked_aia = [set for set in unpacked_aia if set is not None]
     unpacked_aia = list(itertools.chain.from_iterable(unpacked_aia))
@@ -435,7 +432,7 @@ def aia_query_export(hmi_query, keys, wavelength):
     return aia_query_full, aia_result
 
 
-def hmi_rec_find(qstr, keys, cont=False):
+def hmi_rec_find(qstr, keys, retries, sample, cont=False):
     r"""
     Find the HMI record number for a given query string.
 
@@ -445,9 +442,9 @@ def hmi_rec_find(qstr, keys, cont=False):
             A query string.
         keys : `list`
             List of keys to query.
+        resample: `int`
         cont : `bool`
             indicates whether continuum is needed, searches for magnetogram if false.
-
     Returns
     -------
         `int`
@@ -456,17 +453,16 @@ def hmi_rec_find(qstr, keys, cont=False):
     seg = "{magnetogram}"
     series = "hmi.M_720s"
     if cont:
-        seg = "{image}"
-        series = "hmi.Ic_720s"
-
+        seg = "{continuum}"
+        series = "hmi.Ic_noLimbDark_720s"
     client = drms.Client()
-    retries = 0
+    count = 0
     qry = client.query(ds=qstr, key=keys)
     time = sunpy.time.parse_time(qry["T_REC"].values[0])
-    while qry["QUALITY"].values[0] != 0 and retries <= 3:
+    while qry["QUALITY"].values[0] != 0 and count <= retries:
         qry = client.query(ds=f"{series}[{time}]" + seg, key=keys)
-        time = change_time(time, 720)
-        retries += 1
+        time = change_time(time, sample)
+        count += 1
     return qry["*recnum*"].values[0]
 
 
@@ -677,17 +673,20 @@ def aia_l2(packed_maps):
             Path to the processed AIA map.
     """
     path = config["paths"]["data_folder"]
-    aia_map, hmi_match, table = packed_maps
-    time = aia_map.date.to_value("ymdhms")
-    year, month, day = time[0], time[1], time[2]
-    map_path = f"{path}/02_intermediate/{year}/{month}/{day}/SDO/{aia_map.nickname}"
-    proc_path = f"{map_path}/02_{aia_map.meta['fname']}"
-    if not os.path.exists(proc_path):
-        aia_map = aia_process(aia_map, table)
-        aia_map = aia_reproject(aia_map, hmi_match)
-        proc_path = l2_file_save(aia_map, path)
-    # This updates process status for tqdm more effectively.
-    sys.stdout.flush()
+    sdo_map, hmi_match, table = packed_maps
+    if sdo_map.nickname == "HMI":
+        proc_path = hmi_l2(sdo_map)
+    else:
+        time = sdo_map.date.to_value("ymdhms")
+        year, month, day = time[0], time[1], time[2]
+        map_path = f"{path}/02_intermediate/{year}/{month}/{day}/SDO/{sdo_map.nickname}"
+        proc_path = f"{map_path}/02_{sdo_map.meta['fname']}"
+        if not os.path.exists(proc_path):
+            sdo_map = aia_process(sdo_map, table)
+            sdo_map = aia_reproject(sdo_map, hmi_match)
+            proc_path = l2_file_save(sdo_map, path)
+        # This updates process status for tqdm more effectively.
+        sys.stdout.flush()
 
     return proc_path
 
@@ -909,19 +908,6 @@ def l4_file_pack(aia_paths, hmi_paths, dir_path, rec, out_table, anim_path):
     out_table.write(f"{dir_path}/data/{rec}/{rec}.csv", overwrite=True)
 
 
-# def test_padding():
-#     map_width = int(config["drms"]["patch_width"])
-#     # 1. test with undersized AR cutout on right side of map, check map size.
-#     sdo_map = sunpy.map.Map('/Users/danielgass/Library/Application Support/ARCCnet/arccnet/03_processed/2013/8/20/SDO/AIA/11818_03_171.aia.lev1.2013-08-20T075936Z.73314087.image_lev1.fits')
-#     assert(int(pad_map(sdo_map, map_width).dimensions[0].value) == map_width)
-#     # 2. test with undersized AR cutout on left side of map, check map size.
-#     sdo_map = sunpy.map.Map('/Users/danielgass/Library/Application Support/ARCCnet/arccnet/03_processed/2013/1/6/SDO/AIA/11653_03_1700.aia.lev1.2013-01-06T234743Z.60394065.image_lev1.fits')
-#     assert(int(pad_map(sdo_map, int(map_width)).dimensions[0].value) == map_width)
-#     # 3. test with different targ width, to check that it works with arbitrary widths.
-#     sdo_map = sunpy.map.Map('/Users/danielgass/Library/Application Support/ARCCnet/arccnet/03_processed/2013/1/6/SDO/AIA/11653_03_1700.aia.lev1.2013-01-06T234743Z.60394065.image_lev1.fits')
-#     assert(int(pad_map(sdo_map, 900).dimensions[0].value) == 900)
-
-
 def pad_map(map, targ_width):
     r"""
     Pads the map data of a submap which is narrower than the specified submap width due to reaching the edge of the image array.
@@ -948,41 +934,6 @@ def pad_map(map, targ_width):
     return new_map
 
 
-# def test_flare_check():
-#     combined = read_data(
-#     hek_path=Path(f"{data_path}/flare_files/hek_swpc_1996-01-01T00:00:00-2023-01-01T00:00:00_dev.parq"),
-#     srs_path=Path(f"{data_path}/flare_files/srs_processed_catalog.parq"),
-#     size=1,
-#     duration=6,
-#     long_lim=65,
-#     types=["F1", "F2", "N1", "N2"],
-#     )[1]
-#     # 1. test a flare run known to contain flares
-#     flares = combined[combined['goes_class'] != 'N']
-#     flare = flares[flares['goes_class'] == "C3.7"]
-#     flare = flare[flare["noaa_number"] == 12644]
-#     assert(flare_check(flare['start_time'], flare['end_time'], flare['noaa_number'], flares)[0]) == 2
-#     # 2. test a non flare run known to contain flares
-#     ar = combined[combined['goes_class'] == "N"]
-#     ar = ar[ar["noaa_number"] == 12038]
-#     ar = ar[ar['tb_date'] == '2014-04-23']
-#     erl_time = Time(ar["start_time"]) - (6 + 1) * u.hour
-#     assert(flare_check(erl_time, Time(ar["start_time"]) - 1 * u.hour, ar["noaa_number"], flares)[0]) == 2
-#     # 3. test a flare run without flares
-#     flares = combined[combined['goes_class'] != 'N']
-#     flare = combined[combined['goes_class'] == "M1.6"]
-#     flare = flare[flare["noaa_number"] == 12192]
-#     print(flare)
-#     assert(flare_check(flare['start_time'], flare['end_time'], flare['noaa_number'], flares)[0]) == 1
-#     # 4. test a non flare run without flares
-#     ar = combined[combined['goes_class'] == "N"]
-#     ar = ar[ar["noaa_number"] == 12524]
-#     ar = ar[ar['tb_date'] == '2016-03-20']
-#     erl_time = Time(ar["start_time"]) - (6 + 1) * u.hour
-#     assert(flare_check(erl_time, Time(ar["start_time"]) - 1 * u.hour, ar["noaa_number"], flares)[0]) == 1
-# assert(flare_check(flare['start_time'], flare['end_time'], flare['noaa_number'], combined)[0]) == 1
-
-
 def flare_check(start, end, ar_num, table):
     r"""
     Checks a provided start time and end time, along with an active region number, and determines if a flare occurs within that period.

From 91653c72a6b40e197043389b873f92173eec8754 Mon Sep 17 00:00:00 2001
From: DanGass <55432146+DanGass@users.noreply.github.com>
Date: Fri, 8 Aug 2025 12:53:57 +0100
Subject: [PATCH 06/14] Added remote data flag to test padding, and updated
 test data so test_flare_check can work properly

---
 .../tests/data/ts_test_data.parq              | Bin 6617 -> 6660 bytes
 .../tests/test_sdo_processing.py              |  53 +++++++++---------
 2 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/arccnet/data_generation/tests/data/ts_test_data.parq b/arccnet/data_generation/tests/data/ts_test_data.parq
index 0ca12c92e81fbd001c0192d2a6e3facfe9e77e34..c74c2b230d5a0ba4bf8fcafb420108e287cad941 100644
GIT binary patch
delta 2672
zcmb7GeQX@X6@PQLb2sbTz1%@|bC+CdJA}=V++KF~&i0u`?8S*(o0?#d9p&Qy=SwA3
zR7$0mg0Sz3(x^mMs`?-bRse@0i-r~)np%;dUYnY@q9Hg=NNiAJtC3I$X_P-K)H-SD
zo3))HTT#{3{pRD%dv9jm`_0UarS_)tjcXq^6+!8u%9N);X;R=0fME}Nc@<Csmt`jS
z6Lf(9cghUP5#L1=5a$pV5dTEnKrK}l>)<i$v><jN_90H=z~-cAxPn6{xPk)TYO&1K
z>_Wls@>ujBjv+oke25yNA!Z?noz)2L?<xv+nqnL}u``JH67hEwK4FRRHP<X~zlzj!
zt$?yg$t9GW5>kAMFBW05BN4tW_fsK0E&bJ`n$HoRoGp{qZ;Sam;(@c71q<u%Y;HdP
z^Ie~w{P9Kqz9aDiJ9oeGcnM&brigNaG$>_mm|6{j&Nc{GtTU4pgh`vsWHnantYjr+
z%uJdx6GN;bSw%xEolLv36lIVWnec!q#~~%hS>8+<v<#}{vN-Cv-mGK>cac@r|92Oa
z5Er=nh=6k$IAz(h9xQW2kO9iQS=a^P?-azY9_arFjtASN>wLZaRO#_HU_5wQCp<B<
zfV<zTSxXo^iZOf^k<a5|3Orq}Ao{Qa|H8)CGGpYAHM9;EV=s!(xn53W0AU6@#E?*U
ziYvb<GFI^P(K?|(d4CyKdx*ETp-~6#IlRZSJA^oe_yjSE`SUomMaYAXE5*1{76<we
zXdxAz>93)qf--<pBFzN$CyH|hxZ0S^66wL3wR}4}u%4gt*^vn2Exu)*^7&Gf<P*Q0
zj^r#0>RwAUTw2q({DG!jn-?A2z2w5vrIG!2)gS!)`R*$&3%b0@O*|~=>BE8~phTFk
zslO2>oQV~!BP&`v?8Y9twTHfGFJDFo`_Mj0N!_r0yd}`v5_sSKQEceR*w6)=(CWlU
z-sygD>h%Yw2AyCpDVK(vzIO>p3^}1a1L~fEZu{yrLZs8SmBQ*T4hv&-%-+~cNPOJ>
z<)zGlOPPy~M6>aa?XJoCS10Q~v0F%~)cm4-nUsbOylc0NkR$A6dy<saz1r^_eu;kr
z_Lbs&Uli~A%u&FvG$@TowBil#y?7R=T(-B&Cmz|`vH{}1p_{z@Kl5h`4?Sn>reAFj
zS)5hFROv%jZE^R1*~8gb;34MWy+tdY)A_98d7VQmO!p%QWR_6K@4(hPTA^$iVwzOQ
zh5{0>Lk@w1!VfqfhPbUuEZ!S+8R^i+g&Ag{T9yxRau?8uA+yK?D<}gLrQhI|oMkX3
zG6Vh(a=AOdMXur#R3*9M0&i)4bLWcAqpJcuU_D-CKL)JQ4cMqln1*4e4T&<c;*4E_
zoak*iYu~|B`&T!$b>q$!mq24S<`;jX4x<^?<(wd^6M<g)vYWkQ_Ae+&^w`hoqxc)K
zfASPM7`F}1lwarVn)OJ%bDVmKVdq<%aZjFdd|Syj)FFU6!_Kw<d7gdhY$SQ9`HX!p
zeOfwi|NCnk`q<H0$+4Yh?1;<4KKt}3{*Bu1np;mIZ)Rw7>z4H)-?#RR+Yz?08NYn<
zxWmITM~qTriT1+uk)wEdg$AJjjS~3prmDA#_~EC<{l@M;Ec#v`&0}{}RIUrNWcy0U
zl%Tm~n5J%|bfdCF2}+f^RTT}gjBYKBN!7YxMq{j6&tpaF<(On~NmB%?3f(lKF$t@*
z6$HqjQI&3l+XPD|L8yqvAd}31gucz_N)imW^MP-N-+&ZpSK1chd}-YZzkv<Y(9Q66
z8U!nvA@$)-NSAAZWy$T3rnQwp2sc4KO?n{}z6NQs9o1^fHK<l<NpG}PFwH1h(6(^9
z7z|H91$rOB)^?~U-vb8Pq6AFzT1hfsg*$O<B~+0S9Q$D?qZ^i9@eEYcCK*?5&=GkM
zS8pmS1<NG8;U;ET`r3xfI&(Tw*fXkiD>e6y#0RCsGG<33*q)6k26*O5<UFqW29FKJ
zXi%8T;>bdt!3`Gm876PlPL~!=6IT-aI(eHd4&{hPUY}%JoO}L!?y(=VvEmpzj>qGG
zSgPgJ4!?R`&|dRu?DlVT@vIm}gPY<~HK!i-suw-luuuDk5X0rW?_n`f5@lCO)ECtt
zSL`cU+GHvo)P2XVc2li8pq=-lcU++RQ2fv5AeunRf#LxyLRhW2XV!7KoKyEv^>aZx
z6w+==MIt1MkpMsJs{^I0a_akj^*Gh~742`~xf9O0B3>$4?piHrvfQN}r0S%g^)J>A
zE<(%s=jAr1=ypq$rN!%VYF|J->C-yPwf!+3tJ|e|o)h(ue<qgSmzLN$^(~^lL$t$p
qX~WC7AI}CW;;A)ZzTJtL9AKo?qlTK(m^$_QK}nX`jSv9;oB1D6iAlQv

delta 2655
zcmb_eZ)_Ar6rZ`y9=mtD=bd(!+w`z4>T<!`V|VW_N?ck<Q<8dIP|2x8+EWn|ltPJ$
z1g=2EN|YE5i8KWa`hj321W8qdZiOmD5z0xVR%<E{1JNKKlqjP3X5nZlU}ECt-pst;
zn|U+y-fw2ECHhSym#&(ji$pv}#2+1XVoF4}BJ`ny-FP=*FzOdr^q0Vr+d|A}f)vnZ
zfi!{~0W_#`L6idJ36P%vSP<qaP&+7}g5sRO`O!j9-U8_Y8353s!H_#aX#!aVKyIxD
zW7dJT4diD)L>&O?aXH9p07?x2G*Bji(0OM?xQa>7M3DVGccm7Qm=R}%pzw-L(HZcw
zMT?(q#{MJ}9Es*x%;z%LDNL?l_iNs%4T0J%p}Lck(xuaGd+6cO>z^(=zPQ}K`i_U&
zzyri-n?w8@*NKW9r#G-h0G^_UqZsOxAfPy`G0Cv35!RrLgbmE{_s7B!%#ta($fAji
z7sZ4migFscUl+V6&UtfSf?)<%d$|!8g26~QVY@cMQP^3Xm>$|$90hU84_(6=OscGN
zK$*deP|;eonyYMSOeZT_Uq9yPcxkZu1zaOnvv;>Z4_JFQBes{+L4;_I2OV^D^IdzY
zt8&mlX;{Gmkoh1vkd+`b1FwOs0eKhX1CWc5fW%&*9-3K@-JtSakRY1W$si3NG~;_X
zmNZ*XpkB~UfKVby7MDgdsc8;1EdqEUmjm@>s&-MsAJFZ^Ze}uRK1i`+pf>Dh{KQ_x
zCXQvB%X@%Xu*T4<J=Ilo?Q5aFKL16FpM{paQJVkjQN$q>>ae4jU(S~2p58Qf+ji&%
z5o<0H``}b)cEq|dnL7t#w#$U0!4H~yKE{|oZuJcSw%0twV7Avf%iuddvrZ(}oJ_7c
zYBCdQ(V*2>7`L=A?me@2h1$15ebd6uVBd<PR^5enreAnxzc88aF_8~kHJC;OW0Ly9
zViNO~9=2Sa_3qC4Z_WG8<l$E^cQKFdU>-eW@r*w-V75|ceY?&2hcKS{hWY&f{l2mq
z0|yoa4t!)?B~4x=_W;3*^k`o>BmF~1ciR$e(;d^_e0gTszKMzUk|I4ihtVTYj2dQ9
znr7k}cF;^+Ll4?xh#yQnu?ID0DW2O+(>6pCI=F!vMi7PKHdCNG8c7vuTu2AC0%=Av
z1)v)e1$fh(;~ro&^gnUsrc`;oL_7^tF>I<F#H4Kl(BimFm0s)IS;%jf_1yC|){M)&
z8(y)3V<B@p&D&|h7JP4|Y(9KtVrP2#g+o`_q5yZ67QQj(Fqrh4I9@t-Y_F*SQP{W5
zTsIS}Z8gtN+x+Xa&Bv^L(!5Wkc>`8GPVnP4npkK>zcCe|f|UgxxjVl$X98E)qvmP6
zx%TXK>+ehSJ7V?F`2+jStCYEVgU+$Pe$&qu7fDah5SlB(K$RQZL$#G#cVwDwS>;{X
zzI^7A`RwnjE}yDiPP|Bp_t0sQbOCH=x~^&wRg1es4<Az_y6oYTsuq<!C}9K;J5klf
zjOU@8DBD#m>_Isa(L|3|*Hq1rJ*=)8a+22#)$j%UhN{I>Eh&sAhKe6V5%3EAj4_@u
zH9SlclLI88;*-7}6k)Qy&eACOP<7u{9vV?O>4z$*8gZc!MVUr9iI@4>P!wmyL0Fuo
z%NJ1$XLI6sS|Qzc77q}Zb{VLH#7MxX81>3|Y^WzzjpCKv$j~1vjjLJ$=BYsm+=w(^
z66$Qs#dv&e#L)40@Z=#0)wc|L<gLiKYdzA12Z>?ei@pN(Oob&zFNe#3W)ij<A9?Y?
z^WZWxYNl2{w2P62Fb})uvcv8<O*HC6)~2q%l-`6j26jeMhTYU@nbKjNpcuO58FHiy
zzoSM6b7`G@e`1HwGWWNg)3>D8xi5W<@|fF&Gx<)gT**i$oYHqhSyQ60H-DS+e{#VY
z8Stl|fXK@+<n+pZ*HiPKY?}S}yoRPG(II7|LWy+Vp&WK9r$}J<TPTO%#zvw8LE(jJ
zlJp&xow9;5`7^FDN!=d2bCk4}QP#Va{!y?Dn8|)T{_Tm#f9WcNO`LI63K{7~BJCr}
zsauqv`Ft4(<)^qMusz}~k7gur-N7g=qOu+6_g`E;;x4<_uvd4UNCl#tkd=N3Tnrtw
z;yJta_+3HsRc?P`WY9(}%FceGVbPPe+bwRX;8YG)C>>=qV#DZ>f0i%T&N(lKX++%F
z$6@?a^A|3*eQd|lF033HtNdM=-^<@#y5mZ0VUW}_LCPqswC(+0ctK!)_rkvk_*eN4
D+4o$R

diff --git a/arccnet/data_generation/tests/test_sdo_processing.py b/arccnet/data_generation/tests/test_sdo_processing.py
index 3478e01d..519d11aa 100644
--- a/arccnet/data_generation/tests/test_sdo_processing.py
+++ b/arccnet/data_generation/tests/test_sdo_processing.py
@@ -1,5 +1,6 @@
 from pathlib import Path
 
+import pytest
 import sunpy.data.sample
 import sunpy.map
 
@@ -9,7 +10,7 @@
 from astropy.time import Time
 
 from arccnet import config
-from arccnet.data_generation.timeseries.sdo_processing import crop_map, pad_map, rand_select
+from arccnet.data_generation.timeseries.sdo_processing import crop_map, flare_check, pad_map, rand_select
 
 data_path = config["paths"]["data_folder"]
 test_path = Path().resolve().parent
@@ -33,6 +34,7 @@ def test_rand():
     assert list(rand_comb_1) != list(rand_comb_2)
 
 
+@pytest.mark.remote_data
 def test_padding():
     map_width = int(config["drms"]["patch_width"])
     map_height = int(config["drms"]["patch_height"])
@@ -51,28 +53,27 @@ def test_padding():
     assert int(pad_map(aia_smap, 900).dimensions[0].value) == 900
 
 
-# This test won't work correctly unless we use the full flare catalogue from HEK, as we don't want to add large files to repo, skipping for now.
-# def test_flare_check():
-# 	combined = Table.read(f"{test_path}/tests/data/ts_test_data.parq")
-# 	flares = combined[combined['goes_class'] != 'N']
-# 	flare = flares[flares['goes_class'] == "C3.7"]
-# 	flare = flare[flare["noaa_number"] == 12644]
-# 	assert(flare_check(flare['start_time'], flare['end_time'], flare['noaa_number'], flares)[0]) == 2
-# 	# 2. test a non flare run known to contain flares
-# 	ar = combined[combined['goes_class'] == "N"]
-# 	ar = ar[ar["noaa_number"] == 12038]
-# 	ar = ar[ar['tb_date'] == '2014-04-23']
-# 	erl_time = Time(ar["start_time"]) - (6 + 1) * u.hour
-# 	assert(flare_check(erl_time, Time(ar["start_time"]) - 1 * u.hour, ar["noaa_number"], flares)[0]) == 2
-# 	# 3. test a flare run without flares
-# 	flares = combined[combined['goes_class'] != 'N']
-# 	flare = combined[combined['goes_class'] == "M1.6"]
-# 	flare = flare[flare["noaa_number"] == 12192]
-# 	print(flare)
-# 	assert(flare_check(flare['start_time'], flare['end_time'], flare['noaa_number'], flares)[0]) == 1
-# 	# 4. test a non flare run without flares
-# 	ar = combined[combined['goes_class'] == "N"]
-# 	ar = ar[ar["noaa_number"] == 12524]
-# 	ar = ar[ar['tb_date'] == '2016-03-20']
-# 	erl_time = Time(ar["start_time"]) - (6 + 1) * u.hour
-# 	assert(flare_check(erl_time, Time(ar["start_time"]) - 1 * u.hour, ar["noaa_number"], flares)[0]) == 1
+def test_flare_check():
+    combined = Table.read(f"{test_path}/tests/data/ts_test_data.parq")
+    flares = combined[combined["goes_class"] != "N"]
+    flare = flares[flares["goes_class"] == "C3.7"]
+    flare = flare[flare["noaa_number"] == 12644]
+    assert (flare_check(flare["start_time"], flare["end_time"], flare["noaa_number"], flares)[0]) == 2
+    # 2. test a non flare run known to contain flares
+    ar = combined[combined["goes_class"] == "N"]
+    ar = ar[ar["noaa_number"] == 12038]
+    ar = ar[ar["tb_date"] == "2014-04-23"]
+    erl_time = Time(ar["start_time"]) - (6 + 1) * u.hour
+    assert (flare_check(erl_time, Time(ar["start_time"]) - 1 * u.hour, ar["noaa_number"], flares)[0]) == 2
+    # 3. test a flare run without flares
+    flares = combined[combined["goes_class"] != "N"]
+    flare = combined[combined["goes_class"] == "M1.6"]
+    flare = flare[flare["noaa_number"] == 12192]
+    print(flare)
+    assert (flare_check(flare["start_time"], flare["end_time"], flare["noaa_number"], flares)[0]) == 1
+    # 4. test a non flare run without flares
+    ar = combined[combined["goes_class"] == "N"]
+    ar = ar[ar["noaa_number"] == 12524]
+    ar = ar[ar["tb_date"] == "2016-03-20"]
+    erl_time = Time(ar["start_time"]) - (6 + 1) * u.hour
+    assert (flare_check(erl_time, Time(ar["start_time"]) - 1 * u.hour, ar["noaa_number"], flares)[0]) == 1

From 777e72fec8ac654dd389b80c203beb5333903b33 Mon Sep 17 00:00:00 2001
From: DanGass <55432146+DanGass@users.noreply.github.com>
Date: Fri, 8 Aug 2025 14:52:37 +0100
Subject: [PATCH 07/14] fixed path localisation for accessing testing data

---
 arccnet/data_generation/tests/test_sdo_processing.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arccnet/data_generation/tests/test_sdo_processing.py b/arccnet/data_generation/tests/test_sdo_processing.py
index 519d11aa..c409e173 100644
--- a/arccnet/data_generation/tests/test_sdo_processing.py
+++ b/arccnet/data_generation/tests/test_sdo_processing.py
@@ -13,11 +13,11 @@
 from arccnet.data_generation.timeseries.sdo_processing import crop_map, flare_check, pad_map, rand_select
 
 data_path = config["paths"]["data_folder"]
-test_path = Path().resolve().parent
+test_path = Path(__file__).resolve().parent
 
 
 def test_rand():
-    combined = Table.read(f"{test_path}/tests/data/ts_test_data.parq")
+    combined = Table.read(f"{test_path}/data/ts_test_data.parq")
     types = ["F1", "F2", "N1", "N2"]
 
     # 1. test with full list of samples
@@ -54,7 +54,7 @@ def test_padding():
 
 
 def test_flare_check():
-    combined = Table.read(f"{test_path}/tests/data/ts_test_data.parq")
+    combined = Table.read(f"{test_path}/data/ts_test_data.parq")
     flares = combined[combined["goes_class"] != "N"]
     flare = flares[flares["goes_class"] == "C3.7"]
     flare = flare[flare["noaa_number"] == 12644]

From 0e4c290318811478c1d7c901256006eee6d69e2c Mon Sep 17 00:00:00 2001
From: DanGass <55432146+DanGass@users.noreply.github.com>
Date: Wed, 13 Aug 2025 11:43:04 +0100
Subject: [PATCH 08/14] Fixing continuum pipeline and integrating results with
 aia query/exports.

---
 .../tests/data/ts_test_data.parq              | Bin 6660 -> 0 bytes
 .../tests/test_sdo_processing.py              |   4 +-
 .../timeseries/drms_pipeline.py               |   2 +-
 .../timeseries/sdo_processing.py              |  54 +++++++++---------
 arccnet/visualisation/data.py                 |   8 ++-
 5 files changed, 35 insertions(+), 33 deletions(-)
 delete mode 100644 arccnet/data_generation/tests/data/ts_test_data.parq

diff --git a/arccnet/data_generation/tests/data/ts_test_data.parq b/arccnet/data_generation/tests/data/ts_test_data.parq
deleted file mode 100644
index c74c2b230d5a0ba4bf8fcafb420108e287cad941..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6660
zcmdT}du&_P89z7iO|BC=#L#POmaR^qZYXhb@3o!U8N<0w>e#9Ce%U50@hi4dzZyHw
zk&+>X2CA4;wu)5+3Y#ikn^MFoB(}O`g-(n?2(Q94S`CSHtV8@!p{=m7?;P8S^Po`n
z&vu*d-gCb5z0dD_$AT`ghi4`1VVb>E<YfC;bTvYYMa+XYA(}$x<&31XmcB}Yu8}jS
z8fX{LJkT3JZv*`k=u?nV>ZEow43iO{Akf`FufT%4PRbF+1HkwY7$x;m4Us$yj9&mP
z038K-0q7+VkaE%{Q~{HEAZ+deFax}_1|5XSDWJar{T-MIL0WV3M*<>`QJ?*?h;^|;
zwd@dE$(FDsRVpUGqQY5`M~P}S;KEYvQ$_F|qDtgV8d;0Cs&s!%`Rk^uHaV{8?|<tD
z!FOMrc(3&CM{4d1L>>y4A*kCWihY)HvKmsiIgSGD4hg)P`8p%T$XtA#89vQIT?5T9
zd>t=mq?L@ZZU<e-80rj!r>7ZYkTQ~XUS5Or@){81Xbm#UH8mhYj0trnNUMo$SxKt_
z)kv^)l>}UI;8AmXHgt&`q3Yr$6hw;FdFkN=bm9-_nTma~k8!vsXm+942?fI2e6FYp
z^w8E{TSbaN--V*y4s-{EGLLufA)w<hLH~lm=W>Rjeq*KW=sK9Gfpl1INXj6jV31#0
zDVd+b!cR&WX1?e+ZI{f`c)kUTb>op2l=kCU2G8(rp9Xpv=uMzAu%AU4dPIUsuuK(}
zX#)OnAW)Fa&z`W-XC&-N@CcxjFu$e2WjPjmN6yq5PFY2qj32)15s+U|F?fU%#t2x?
z1<qOo*dMKRzJGJq4efnF_q7Ki-*|gwv+CaK9S1(R=g5Zy0f&p(&*0=3vL^=sKvR-)
z7P_<xoy(ltHn+5GZoj2#fxdNte$sNo7K&nCvOGak_C?DxBjv|N%AdE4SDoHdb^2`!
zMVo7vGDkWu{k-$isZ7N&s#<nBbNo4qsy&^ljGk0RPad&c_=uupuUG^<e14x{6!1A}
z8R(~|nzNQ4y>Ghjebajx8QoO#s^#!S$HNyLZ(2sE&9eTVSk6<MEAM;GGO|QH!aQiX
zNNu)1d?It`r}#N(`SAW*{(Ao{f6PGPoNU($#<JxCpjehinS9|^bD?nBA7}AL%Qbgt
zN~Dk6HPV4<9;aQ!C%#|WG~azE7ok7#RSFn|i!^&2Q0O5gJ70v+JC8R7Z^)Ox5F=p?
z4gnyy0Ra>x^L`ktgCX#?0A)c4@R<j|?w41h3@~5706L9T2@vt@3_);)4(Fi=Voi7f
zABK5QVgQQNIcA<cNdvb4j3D@ql;O}7fRW^#uPFvEsdT;?mjUB#t|PIbzs!w5FLEE;
zcJPUt%5h~EirIUivJIrNX9yUbi<!Lu!liAmS;_!!$3|YaT#a$_CxRRM*^GyP^BoI-
z2ma1v7}~@f&OA%i*Oni%oF~vYYWX2e)h<}>)SZF9CClU#7&vR;FbscYv9<uJ-@vG@
zUCew7gL%)(nUY&6FNjbGL>4pNa_S!D-OK=WhphiKOCvoad&~0g&td7+jCPJX8hFj3
zA_zZjdF5sNoUxFQ=U&_ZlO=GJZe2JFm>lTG&sl=W_1smMM5yu<nnztSRJxT`79XZc
z(KOe_MSgW{sN8@ncR|X2EXiZ?6N#w|)CG>`b(~(uHI%UxvId>7LtVj`biy~QWM&=5
ztE(8Z4&JmwUL_MSqfZ4NqmJj)RWkS(gbIXAptD2ADfUVP9aVvh>MCTaGa(t6=GEJ&
z3dKIW@NLCoNKfr!_ih4r2A!aI3<f->;}t%-0tspp<xm_%hH9-u5aj!ifwnbNAVnWC
zP{)v7@ezD|AZ4r8A~S2Fj;U=Do>zkcy;reMTA?_Hj9`2TMm}V$jv@|JfrEH3+CZ6*
zpg0I?+tChc3D%O8CLJf}jI+p0_sJn}j$V?Vg5Y^}yF}orV~RdT5Oku`#AnL{6V9v?
z^lLjWTvBBMmtR%EIA2hx?iGWZV>u_w5NNA5;bzkz&?Ov?N4D%+fB<UP+LaCl0Tguz
zsv;W?LYth~4pk&?sbf5u8-Mr4aC|SrO4NspNL7;Zhl4RMTM1y;TBItEC%oR__(aSX
z97n7SIAx(kFg5ItdQ+*b$cb-GwhY)6skC=IJ)Dljf^}m7ej_IIJS-^-#sh2Q3Rxh9
z-IS(%!vSwPnD>Esj2{@G#Q02(sT@*QsrPA76~t~tC6($`;j%@_A?5y3<;N23qs3b0
zN`3&TYvCjywg|zi5<WNISfbXVA;=XwljWQhRD$?ARIGfjNV`~~{fDFq3SRiLWSv3f
z1xj6Lvn*$GO&N*Zr%RPbXzh`5?OUag1O!2D_w490clbJ=tCHB*p+FqoO)Ec;XdkH5
zekRK~Tz$*s`}Arq`f3gf_nIf}o-b8CLu*g4+CMAS=6AVk;mX}*vpYa5FG{p0uG1d4
z7Tm4xmpHri+%L&4kBK~NNn*##l`ocP4_0gMt-^r#q86|&5#<A=tH6;rzKnlj{g;&T
zX-a$OdhOyCY(0Oz*IRpb1!~GCIOUL*QC_+psfScorM<pra5xrBdxvMdv8d|tQaEzn
z^h`3?Sf<l_FYb*68#M(TpjNZIu8G9c4TiZDv6Z$^D3wZ&N8+IZvDIy&P;@k!@UAww
zN>;eMY++OR%93AVGuMI&&8^EP-;y>G!i8<rR97cr-Xs)icsLatk9ebz*<fJUpNLMx
z;;G@`#xe~&`S7zd4akxX3(FcW_)H{c>PTz#C&q(yeb8iymPRw07>{|=jhfL&I+bHZ
z;=`#4-)IcD+qQ4ZF`*?xB+I7ozSs3GN7GD#$!~Z*7Tm|@mi2|Nqc7z36BFqTqDqbj
z{gG585pUG!bB<DeZxlc`k@geD3IzTaDzg&J*R9Pe3;#`ZkV;8TZ)0OL7;kJ`IejY1
zfK=BMuH|5?D&je$+Elob2d|0}kV+=@^mGn3X77L1Q;K^0=tHfdD0TzkXO}1%L~BU2
ziDF1>4iTQ!x@vt2Gj}epJMfs02y$Qno3bYGuoJ(CX)A|aVZod!BHE7aTZug99&Af_
z`VtMk)`5x6vE-QB)f4py=9wUgta<ku{bp>by$|cR<JVe!&K))H#x}%`7Em=2E5vCw
zR1WLguz!0`*d3dWg3pv^(9qBs3ru=k9iwhxI_%eD+aa+fpD$K&hG3dCvZou?TYJKR
zRvQ;|S)-kyMA~6bPq+v9XlKMa;S+3Omtf?5gKbfNWGdtVIf%*VZ|!ezv_>ZaEpBeK
zdzDVyCk)KGgpQQk)iLXAw&EPZ0ef@`Y7_G7duF_Y#`qeYS@1s-u$eh8_;tpTyW4x$
z`tqb87OrEg$J`v@LP5b8_6<S|_6~@rjdSUHk}#j}#=2+L`t-!6C*7`Yp~KGYa`swx
z_+qv+j>R)*++CnI{WbNzvcEuU+bFJ|XKikfgFhCX@aR{^=Z1VegMmqZe0R!GxJMD|
zY{1@@^u+?^<`^FhwC-$hH17=i1U~E+VJ~dvQNJ(%b+)Cvu4L3>GmrU%Y1l*Ktcy1_
zG)KgcquCnu#d<~qtx;TyU5;kV6GJW@WLs^se%?F^`w#f(nepkZMrS-*UpL^{(`)R<
z`;@f<daa&Ge~b?YTA-$}DCeI_5L@6Q>5H0Y1A}cbpQ{^U^zUg~S=-}j!ExIPeGnWw
zqa9JGN7&aIZSV~Cv}9v9tn4|QFUV;HD=uz1e!Tyk5itzdgPNMBg1yG<x#~6cd0cHt
z_uzCg7#rB#Etpe*L4KkD>nru>H9~A{fU|5(9Ia*^u=z!KO!;F2V_sKB(kB=i94+n3
zvG|1!$N^(?fOF^ahI)qm@&D7huRm+kaE|kP4K;Ioz5QHocc#GZWWd!k>emlMJTabw
zy^ZgR_*ZIdZ;QHx=&X0pJOOL7%X?<;7=yjWJ&X^u;vOh=;TQCo2=wPs-yrPCfY}Ir
zE`|G}C^om_Hs4FyVhGx73+cBptkjBMBI&o;`=Re9=*Pg?X(RW6HGiKFIe3vvVNL&U
z>93!xujjgU)YoJ2?tWrFYnlv+GBIWEiMRyZll#SbEMGsC_t!%Fk((pizq1~(!GiNl
z@arH;ZtzL`g~SxTf3To9O_p$w+53)6MN*`-J&`Q6`9Krr>k!>tIeXphB<QS{mH1n*
z)eZI&>bWcDzgcYS%h}tR%ZG%Ly;_Mw1H7Tft-TYnZlGi#e80n&Jk7cL5hl6)SXUo>
zm0MTcOIR4+<77YK1wR%b6}JfWCv_l;#5{<E$zV5r6ZtIPi9f!WISe?OfL;>uxEvz5
z27QQc3fs#){p9-{q|zk4<9jB8X`Mfr)Op9p6H}VW;5hzmsS&t3_@~|n|E*%fKlEuO
KLS69xfBpl7N6BLV

diff --git a/arccnet/data_generation/tests/test_sdo_processing.py b/arccnet/data_generation/tests/test_sdo_processing.py
index c409e173..7fb6fb72 100644
--- a/arccnet/data_generation/tests/test_sdo_processing.py
+++ b/arccnet/data_generation/tests/test_sdo_processing.py
@@ -17,7 +17,7 @@
 
 
 def test_rand():
-    combined = Table.read(f"{test_path}/data/ts_test_data.parq")
+    combined = Table.read(f"{test_path}/data/ts_test_data.ecsv")
     types = ["F1", "F2", "N1", "N2"]
 
     # 1. test with full list of samples
@@ -54,7 +54,7 @@ def test_padding():
 
 
 def test_flare_check():
-    combined = Table.read(f"{test_path}/data/ts_test_data.parq")
+    combined = Table.read(f"{test_path}/data/ts_test_data.ecsv")
     flares = combined[combined["goes_class"] != "N"]
     flare = flares[flares["goes_class"] == "C3.7"]
     flare = flare[flare["noaa_number"] == 12644]
diff --git a/arccnet/data_generation/timeseries/drms_pipeline.py b/arccnet/data_generation/timeseries/drms_pipeline.py
index 58dabed9..978b0c18 100644
--- a/arccnet/data_generation/timeseries/drms_pipeline.py
+++ b/arccnet/data_generation/timeseries/drms_pipeline.py
@@ -43,7 +43,7 @@
         duration=6,
         long_lim=65,
         types=["F1", "F2", "N1", "N2"],
-    )
+    )[0]
     cores = int(config["drms"]["cores"])
     with ProcessPoolExecutor(cores) as executor:
         for record in starts:
diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py
index 481d76f3..41d84155 100644
--- a/arccnet/data_generation/timeseries/sdo_processing.py
+++ b/arccnet/data_generation/timeseries/sdo_processing.py
@@ -1,4 +1,5 @@
 import os
+import re
 import sys
 import glob
 import shutil
@@ -33,7 +34,6 @@
 reproj_log = logging.getLogger("reproject.common")
 reproj_log.setLevel("WARNING")
 os.environ["JSOC_EMAIL"] = "danielgass192@gmail.com"
-data_path = config["paths"]["data_folder"]
 
 __all__ = [
     "read_data",
@@ -110,12 +110,6 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim:
             - The date of the run, used for reprojection
             - The coordinate of the noaa active region
     """
-    # table_name = f"{size}_{duration}_{long_lim}_{types}.parq".strip(' ')
-    # if os.path.exists(f"{data_path}/flare_files/{table_name}"):
-    #     print("Run table cached - loading")
-    #     combined = Table.read(table_name)
-    # else:
-    #     print("No run table cached - creating new table")
     table = Table.read(hek_path)
     srs = Table.read(srs_path)
 
@@ -172,7 +166,6 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim:
         new_names=("noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category", "fl_count"),
     )
     combined = vstack([flares_exp, srs_exp])
-    # combined.write(f"{data_path}/flare_files/{table_name}", format='parquet')
 
     final = rand_select(combined, size, types)
     subset = final["noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category"]
@@ -300,19 +293,16 @@ def drms_pipeline(
 
     hmi_query, hmi_export, ic_query, ic_export = hmi_query_export(start_t, end_t, hmi_keys, sample)
     aia_query, aia_export = aia_query_export(hmi_query, aia_keys, wavelengths)
-    # ic_query.rename(columns={"*recnum*":"FSN"}, inplace=True)
-    # img_query = pd.concat([aia_query, ic_query],ignore_index=True)
-    # img_urls = pd.concat([aia_export.urls, ic_export.urls], ignore_index=True)
 
     hmi_dls, hmi_exs = l1_file_save(hmi_export, hmi_query, path)
-    cnt_dls, cnt_exs = l1_file_save(ic_query, ic_export, path)
+    cnt_dls, cnt_exs = l1_file_save(ic_export, ic_query, path)
     aia_dls, aia_exs = l1_file_save(aia_export, aia_query, path)
     img_exs = list(itertools.chain(aia_exs, cnt_exs))
 
     hmi_maps = sunpy.map.Map(hmi_exs)
     hmi_maps = add_fnames(hmi_maps, hmi_exs)
     img_maps = sunpy.map.Map(img_exs)
-    img_maps = add_fnames(img_maps, aia_exs)
+    img_maps = add_fnames(img_maps, img_exs)
     return img_maps, hmi_maps
 
 
@@ -378,13 +368,11 @@ def hmi_continuum_export(hmi_query, keys):
             A tuple containing the query results of the hmi mag and ic_no_limbdark (pandas df) and the export data response (drms export object).
     """
     client = drms.Client()
-    qstrs_ic = [f"hmi.Ic_noLimbDark_720s[{time}]" + "{image}" for time in hmi_query["T_REC"]]
-
-    ic_value = [hmi_rec_find(qstr, keys, 3, 12) for qstr in qstrs_ic]
-    unpacked_ic = list(itertools.chain.from_iterable(ic_value))
-    joined_num = [str(num) for num in unpacked_ic]
+    qstrs_ic = [f"hmi.Ic_noLimbDark_720s[{time}]{{continuum}}" for time in hmi_query["T_REC"]]
+    ic_value = [hmi_rec_find(qstr, keys, 3, 12, cont=True) for qstr in qstrs_ic]
+    joined_num = [str(num) for num in ic_value]
     ic_num_str = str(joined_num).strip("[]")
-    ic_comb_qstr = f"hmi.Ic_noLimbDark_720s[! recnum in ({ic_num_str}) !]" + "{image}"
+    ic_comb_qstr = f"hmi.Ic_noLimbDark_720s[! recnum in ({ic_num_str}) !]" + "{continuum}"
 
     ic_query_full = client.query(ds=ic_comb_qstr, key=keys)
 
@@ -413,11 +401,8 @@ def aia_query_export(hmi_query, keys, wavelength):
     client = drms.Client()
     qstrs_euv = [f"aia.lev1_euv_12s[{time}][{wavelength}]" + "{image}" for time in hmi_query["T_REC"]]
     qstrs_uv = [f"aia.lev1_uv_24s[{time}]{[1600, 1700]}" + "{image}" for time in hmi_query["T_REC"]]
-    # qstrs_vis = [f"aia.lev1_vis_1h[{time}]{[4500]}" + "{image}" for time in hmi_query["T_REC"]]
     euv_value = [aia_rec_find(qstr, keys, 3, 12) for qstr in qstrs_euv]
     uv_value = [aia_rec_find(qstr, keys, 2, 24) for qstr in qstrs_uv]
-    # vis_value = [aia_rec_find(qstr, keys, 1, 60) for qstr in qstrs_vis]
-    # unpacked_aia = list(itertools.chain(euv_value, uv_value, vis_value))
     unpacked_aia = list(itertools.chain(euv_value, uv_value))
     unpacked_aia = [set for set in unpacked_aia if set is not None]
     unpacked_aia = list(itertools.chain.from_iterable(unpacked_aia))
@@ -457,12 +442,24 @@ def hmi_rec_find(qstr, keys, retries, sample, cont=False):
         series = "hmi.Ic_noLimbDark_720s"
     client = drms.Client()
     count = 0
+    # print(qstr)
     qry = client.query(ds=qstr, key=keys)
-    time = sunpy.time.parse_time(qry["T_REC"].values[0])
+    # print(qry)
+    if qry.empty:
+        time = sunpy.time.parse_time(re.search(r"\[(.*?)\]", qstr).group(1))
+        qry["QUALITY"] = [10000]
+        # print(qry)
+    else:
+        time = sunpy.time.parse_time(qry["T_REC"].values[0])
+    # print(qry["QUALITY"])
     while qry["QUALITY"].values[0] != 0 and count <= retries:
         qry = client.query(ds=f"{series}[{time}]" + seg, key=keys)
+        if qry.empty:
+            time = sunpy.time.parse_time(re.search(r"\[(.*?)\]", qstr).group(1))
+            qry["QUALITY"] = [10000]
         time = change_time(time, sample)
         count += 1
+        qry["*recnum*"] = [0]
     return qry["*recnum*"].values[0]
 
 
@@ -817,8 +814,6 @@ def map_reproject(sdo_packed):
             The path location of the saved submap.
     """
     hmi_origin, sdo_path, ar_num, center = sdo_packed
-    lon = int(center.lon.value)
-    # sdo_map = sunpy.map.Map(sdo_packed.l2_map)
     sdo_map = sunpy.map.Map(sdo_path)
     with propagate_with_solar_surface():
         sdo_rpr = sdo_map.reproject_to(hmi_origin.wcs)
@@ -832,7 +827,7 @@ def map_reproject(sdo_packed):
     sdo_rpr.meta["wavelnth"] = sdo_map.meta["wavelnth"]
     sdo_rpr.meta["date-obs"] = sdo_map.meta["date-obs"]
     if sdo_rpr.dimensions[0].value < int(config["drms"]["patch_width"]):
-        sdo_rpr = pad_map(sdo_rpr, lon, config["drms"]["patch_width"])
+        sdo_rpr = pad_map(sdo_rpr, config["drms"]["patch_width"])
     save_compressed_map(sdo_rpr, fits_path, hdu_type=CompImageHDU, overwrite=True)
 
     return fits_path
@@ -856,11 +851,12 @@ def vid_match(table, name, path):
         output_file : `str`
             A string containing the path of the completed mosaic animation.
     """
-    # Check this carefully
     hmi_files = table["HMI files"].value
+    table["Wavelength"] = [int(wave) for wave in table["Wavelength"]]
     wvls = np.unique([table["Wavelength"].value])
+
     hmi_files = np.unique(hmi_files)
-    nrows, ncols = 4, 3  # define your subplot grid
+    nrows, ncols = 4, 3  # define subplot grid
     for file in range(len(hmi_files)):
         hmi = hmi_files[file]
         mosaic_plot(hmi, name, file, nrows, ncols, wvls, table, path)
@@ -925,7 +921,9 @@ def pad_map(map, targ_width):
             The new, padded submap.
     """
     x_dim = map.dimensions[0].value
+    targ_width = int(targ_width)
     diff = int(targ_width - x_dim)
+    # print(diff, map.dimensions, map.wavelength)
     if map.center.Tx > 0:
         data = np.pad(map.data, ((0, 0), (diff, 0)), constant_values=np.nan)
     else:
diff --git a/arccnet/visualisation/data.py b/arccnet/visualisation/data.py
index 79ce8c0c..1033fecb 100644
--- a/arccnet/visualisation/data.py
+++ b/arccnet/visualisation/data.py
@@ -522,15 +522,19 @@ def mosaic_plot(hmi, name, file, nrows, ncols, wvls, table, path):
             files = table[table["Wavelength"] == wv]
             files = files[files["HMI files"] == hmi]
             aia_files = files["AIA files"]
+            cmap = f"sdoaia{wv}"
+            if wv == 6173:
+                # keep yellow cmap
+                cmap = "sdoaia4500"
             try:
                 aia_map = Map(aia_files.value[0])
                 ax = fig.add_subplot(nrows, ncols, i + 1)
-                ax.imshow(np.sqrt(aia_map.data), cmap=f"sdoaia{wv}")
+                ax.imshow(np.sqrt(aia_map.data), cmap=cmap)
                 ax.text(0.05, 0.05, f"{wv} - {aia_map.date}", color="w", transform=ax.transAxes, fontsize=5)
             except IndexError:
                 aia_map = np.zeros(hmi_map.data.shape)
                 ax = fig.add_subplot(nrows, ncols, i + 1)
-                ax.imshow(np.sqrt(aia_map.data), cmap=f"sdoaia{wv}")
+                ax.imshow(np.sqrt(aia_map.data), cmap=cmap)
                 ax.text(0.05, 0.05, f"{wv} - MISSING", color="w", transform=ax.transAxes, fontsize=5)
 
         else:

From 0cf99ddbeb4140a9e19bd05320445651015c9909 Mon Sep 17 00:00:00 2001
From: DanGass <55432146+DanGass@users.noreply.github.com>
Date: Wed, 13 Aug 2025 12:19:14 +0100
Subject: [PATCH 09/14] Fixed some formatting, removed unnecessary commented
 code, changed time series test data from .parq to .ecsv

---
 .../tests/data/ts_test_data.ecsv              |  75 ++++++++++++++++++
 .../tests/data/ts_test_data.parq              | Bin 6660 -> 0 bytes
 .../tests/test_sdo_processing.py              |   1 -
 .../timeseries/sdo_processing.py              |  31 +++-----
 4 files changed, 84 insertions(+), 23 deletions(-)
 create mode 100644 arccnet/data_generation/tests/data/ts_test_data.ecsv
 delete mode 100644 arccnet/data_generation/tests/data/ts_test_data.parq

diff --git a/arccnet/data_generation/tests/data/ts_test_data.ecsv b/arccnet/data_generation/tests/data/ts_test_data.ecsv
new file mode 100644
index 00000000..bb2c54cd
--- /dev/null
+++ b/arccnet/data_generation/tests/data/ts_test_data.ecsv
@@ -0,0 +1,75 @@
+# %ECSV 1.0
+# ---
+# datatype:
+# - {name: noaa_number, datatype: int64}
+# - {name: goes_class, datatype: string}
+# - {name: start_time, datatype: string}
+# - {name: end_time, datatype: string}
+# - {name: tb_date, datatype: string}
+# - {name: category, datatype: string}
+# meta: !!omap
+# - __serialized_columns__:
+#     end_time:
+#       __class__: astropy.time.core.Time
+#       format: fits
+#       in_subfmt: '*'
+#       out_subfmt: '*'
+#       precision: 3
+#       scale: utc
+#       value: !astropy.table.SerializedColumn {name: end_time}
+#     start_time:
+#       __class__: astropy.time.core.Time
+#       format: fits
+#       in_subfmt: '*'
+#       out_subfmt: '*'
+#       precision: 3
+#       scale: utc
+#       value: !astropy.table.SerializedColumn {name: start_time}
+# schema: astropy-2.0
+noaa_number goes_class start_time end_time tb_date category
+11165 N 2011-03-06T00:00:00.000 2011-03-06T06:00:00.000 2011-03-06 N2
+11240 N 2011-06-27T00:00:00.000 2011-06-27T06:00:00.000 2011-06-27 N1
+11271 N 2011-08-26T00:00:00.000 2011-08-26T06:00:00.000 2011-08-26 N2
+11283 N 2011-09-01T00:00:00.000 2011-09-01T06:00:00.000 2011-09-01 N1
+11297 C1.5 2011-09-16T12:26:00.000 2011-09-16T18:26:00.000 2011-09-16 F1
+11339 X1.9 2011-11-03T13:16:00.000 2011-11-03T19:16:00.000 2011-11-03 F2
+11392 C2.6 2012-01-06T16:54:00.000 2012-01-06T22:54:00.000 2012-01-06 F1
+11476 N 2012-05-10T00:00:00.000 2012-05-10T06:00:00.000 2012-05-10 N2
+11476 N 2012-05-14T00:00:00.000 2012-05-14T06:00:00.000 2012-05-14 N2
+11479 C1.1 2012-05-18T18:15:00.000 2012-05-19T00:15:00.000 2012-05-18 F1
+11513 N 2012-07-07T00:00:00.000 2012-07-07T06:00:00.000 2012-07-07 N2
+11515 N 2012-07-05T00:00:00.000 2012-07-05T06:00:00.000 2012-07-05 N2
+11542 C1.6 2012-08-14T15:23:00.000 2012-08-14T21:23:00.000 2012-08-14 F1
+11615 C5.7 2012-11-17T20:55:00.000 2012-11-18T02:55:00.000 2012-11-17 F1
+11616 N 2012-11-16T00:00:00.000 2012-11-16T06:00:00.000 2012-11-16 N1
+11621 N 2012-12-01T00:00:00.000 2012-12-01T06:00:00.000 2012-12-01 N1
+11654 C1.5 2013-01-10T05:15:00.000 2013-01-10T11:15:00.000 2013-01-10 F2
+11726 C1.5 2013-04-23T08:04:00.000 2013-04-23T14:04:00.000 2013-04-23 F2
+11865 C4.4 2013-10-15T07:37:00.000 2013-10-15T13:37:00.000 2013-10-15 F2
+11877 C1.1 2013-10-21T07:33:00.000 2013-10-21T13:33:00.000 2013-10-21 F1
+11928 N 2013-12-18T00:00:00.000 2013-12-18T06:00:00.000 2013-12-18 N1
+12010 C1.3 2014-03-22T13:04:00.000 2014-03-22T19:04:00.000 2014-03-22 F1
+12038 C4.3 2014-04-22T17:48:00.000 2014-04-22T23:48:00.000 2014-04-22 F1
+12038 N 2014-04-23T00:00:00.000 2014-04-23T06:00:00.000 2014-04-23 N2
+12127 N 2014-07-30T00:00:00.000 2014-07-30T06:00:00.000 2014-07-30 N2
+12192 M1.6 2014-10-28T06:54:00.000 2014-10-28T12:54:00.000 2014-10-28 F1
+12205 N 2014-11-09T00:00:00.000 2014-11-09T06:00:00.000 2014-11-09 N2
+12208 C3.7 2014-11-12T13:36:00.000 2014-11-12T19:36:00.000 2014-11-12 F2
+12209 N 2014-11-23T00:00:00.000 2014-11-23T06:00:00.000 2014-11-23 N2
+12229 N 2014-12-11T00:00:00.000 2014-12-11T06:00:00.000 2014-12-11 N1
+12234 N 2014-12-16T00:00:00.000 2014-12-16T06:00:00.000 2014-12-16 N1
+12297 C1.0 2015-03-10T23:45:00.000 2015-03-11T05:45:00.000 2015-03-10 F2
+12302 C9.3 2015-03-18T00:40:00.000 2015-03-18T06:40:00.000 2015-03-18 F2
+12335 C1.0 2015-05-02T17:53:00.000 2015-05-02T23:53:00.000 2015-05-02 F2
+12384 N 2015-07-13T00:00:00.000 2015-07-13T06:00:00.000 2015-07-13 N1
+12524 N 2016-03-20T00:00:00.000 2016-03-20T06:00:00.000 2016-03-20 N1
+12524 C1.1 2016-03-22T19:59:00.000 2016-03-23T01:59:00.000 2016-03-22 F1
+12644 C3.7 2017-04-01T12:30:00.000 2017-04-01T18:30:00.000 2017-04-01 F2
+12644 M4.4 2017-04-01T14:35:00.000 2017-04-01T20:35:00.000 2017-04-01 F1
+12698 N 2018-02-05T00:00:00.000 2018-02-05T06:00:00.000 2018-02-05 N1
+12740 C1.0 2019-05-06T12:41:00.000 2019-05-06T18:41:00.000 2019-05-06 F1
+12779 C1.3 2020-10-29T08:53:00.000 2020-10-29T14:53:00.000 2020-10-29 F2
+12781 C1.0 2020-11-05T15:18:00.000 2020-11-05T21:18:00.000 2020-11-05 F2
+12911 N 2021-12-22T00:00:00.000 2021-12-22T06:00:00.000 2021-12-22 N1
+13038 N 2022-06-21T00:00:00.000 2022-06-21T06:00:00.000 2022-06-21 N2
+13109 C2.8 2022-09-23T01:47:00.000 2022-09-23T07:47:00.000 2022-09-23 F1
diff --git a/arccnet/data_generation/tests/data/ts_test_data.parq b/arccnet/data_generation/tests/data/ts_test_data.parq
deleted file mode 100644
index c74c2b230d5a0ba4bf8fcafb420108e287cad941..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6660
zcmdT}du&_P89z7iO|BC=#L#POmaR^qZYXhb@3o!U8N<0w>e#9Ce%U50@hi4dzZyHw
zk&+>X2CA4;wu)5+3Y#ikn^MFoB(}O`g-(n?2(Q94S`CSHtV8@!p{=m7?;P8S^Po`n
z&vu*d-gCb5z0dD_$AT`ghi4`1VVb>E<YfC;bTvYYMa+XYA(}$x<&31XmcB}Yu8}jS
z8fX{LJkT3JZv*`k=u?nV>ZEow43iO{Akf`FufT%4PRbF+1HkwY7$x;m4Us$yj9&mP
z038K-0q7+VkaE%{Q~{HEAZ+deFax}_1|5XSDWJar{T-MIL0WV3M*<>`QJ?*?h;^|;
zwd@dE$(FDsRVpUGqQY5`M~P}S;KEYvQ$_F|qDtgV8d;0Cs&s!%`Rk^uHaV{8?|<tD
z!FOMrc(3&CM{4d1L>>y4A*kCWihY)HvKmsiIgSGD4hg)P`8p%T$XtA#89vQIT?5T9
zd>t=mq?L@ZZU<e-80rj!r>7ZYkTQ~XUS5Or@){81Xbm#UH8mhYj0trnNUMo$SxKt_
z)kv^)l>}UI;8AmXHgt&`q3Yr$6hw;FdFkN=bm9-_nTma~k8!vsXm+942?fI2e6FYp
z^w8E{TSbaN--V*y4s-{EGLLufA)w<hLH~lm=W>Rjeq*KW=sK9Gfpl1INXj6jV31#0
zDVd+b!cR&WX1?e+ZI{f`c)kUTb>op2l=kCU2G8(rp9Xpv=uMzAu%AU4dPIUsuuK(}
zX#)OnAW)Fa&z`W-XC&-N@CcxjFu$e2WjPjmN6yq5PFY2qj32)15s+U|F?fU%#t2x?
z1<qOo*dMKRzJGJq4efnF_q7Ki-*|gwv+CaK9S1(R=g5Zy0f&p(&*0=3vL^=sKvR-)
z7P_<xoy(ltHn+5GZoj2#fxdNte$sNo7K&nCvOGak_C?DxBjv|N%AdE4SDoHdb^2`!
zMVo7vGDkWu{k-$isZ7N&s#<nBbNo4qsy&^ljGk0RPad&c_=uupuUG^<e14x{6!1A}
z8R(~|nzNQ4y>Ghjebajx8QoO#s^#!S$HNyLZ(2sE&9eTVSk6<MEAM;GGO|QH!aQiX
zNNu)1d?It`r}#N(`SAW*{(Ao{f6PGPoNU($#<JxCpjehinS9|^bD?nBA7}AL%Qbgt
zN~Dk6HPV4<9;aQ!C%#|WG~azE7ok7#RSFn|i!^&2Q0O5gJ70v+JC8R7Z^)Ox5F=p?
z4gnyy0Ra>x^L`ktgCX#?0A)c4@R<j|?w41h3@~5706L9T2@vt@3_);)4(Fi=Voi7f
zABK5QVgQQNIcA<cNdvb4j3D@ql;O}7fRW^#uPFvEsdT;?mjUB#t|PIbzs!w5FLEE;
zcJPUt%5h~EirIUivJIrNX9yUbi<!Lu!liAmS;_!!$3|YaT#a$_CxRRM*^GyP^BoI-
z2ma1v7}~@f&OA%i*Oni%oF~vYYWX2e)h<}>)SZF9CClU#7&vR;FbscYv9<uJ-@vG@
zUCew7gL%)(nUY&6FNjbGL>4pNa_S!D-OK=WhphiKOCvoad&~0g&td7+jCPJX8hFj3
zA_zZjdF5sNoUxFQ=U&_ZlO=GJZe2JFm>lTG&sl=W_1smMM5yu<nnztSRJxT`79XZc
z(KOe_MSgW{sN8@ncR|X2EXiZ?6N#w|)CG>`b(~(uHI%UxvId>7LtVj`biy~QWM&=5
ztE(8Z4&JmwUL_MSqfZ4NqmJj)RWkS(gbIXAptD2ADfUVP9aVvh>MCTaGa(t6=GEJ&
z3dKIW@NLCoNKfr!_ih4r2A!aI3<f->;}t%-0tspp<xm_%hH9-u5aj!ifwnbNAVnWC
zP{)v7@ezD|AZ4r8A~S2Fj;U=Do>zkcy;reMTA?_Hj9`2TMm}V$jv@|JfrEH3+CZ6*
zpg0I?+tChc3D%O8CLJf}jI+p0_sJn}j$V?Vg5Y^}yF}orV~RdT5Oku`#AnL{6V9v?
z^lLjWTvBBMmtR%EIA2hx?iGWZV>u_w5NNA5;bzkz&?Ov?N4D%+fB<UP+LaCl0Tguz
zsv;W?LYth~4pk&?sbf5u8-Mr4aC|SrO4NspNL7;Zhl4RMTM1y;TBItEC%oR__(aSX
z97n7SIAx(kFg5ItdQ+*b$cb-GwhY)6skC=IJ)Dljf^}m7ej_IIJS-^-#sh2Q3Rxh9
z-IS(%!vSwPnD>Esj2{@G#Q02(sT@*QsrPA76~t~tC6($`;j%@_A?5y3<;N23qs3b0
zN`3&TYvCjywg|zi5<WNISfbXVA;=XwljWQhRD$?ARIGfjNV`~~{fDFq3SRiLWSv3f
z1xj6Lvn*$GO&N*Zr%RPbXzh`5?OUag1O!2D_w490clbJ=tCHB*p+FqoO)Ec;XdkH5
zekRK~Tz$*s`}Arq`f3gf_nIf}o-b8CLu*g4+CMAS=6AVk;mX}*vpYa5FG{p0uG1d4
z7Tm4xmpHri+%L&4kBK~NNn*##l`ocP4_0gMt-^r#q86|&5#<A=tH6;rzKnlj{g;&T
zX-a$OdhOyCY(0Oz*IRpb1!~GCIOUL*QC_+psfScorM<pra5xrBdxvMdv8d|tQaEzn
z^h`3?Sf<l_FYb*68#M(TpjNZIu8G9c4TiZDv6Z$^D3wZ&N8+IZvDIy&P;@k!@UAww
zN>;eMY++OR%93AVGuMI&&8^EP-;y>G!i8<rR97cr-Xs)icsLatk9ebz*<fJUpNLMx
z;;G@`#xe~&`S7zd4akxX3(FcW_)H{c>PTz#C&q(yeb8iymPRw07>{|=jhfL&I+bHZ
z;=`#4-)IcD+qQ4ZF`*?xB+I7ozSs3GN7GD#$!~Z*7Tm|@mi2|Nqc7z36BFqTqDqbj
z{gG585pUG!bB<DeZxlc`k@geD3IzTaDzg&J*R9Pe3;#`ZkV;8TZ)0OL7;kJ`IejY1
zfK=BMuH|5?D&je$+Elob2d|0}kV+=@^mGn3X77L1Q;K^0=tHfdD0TzkXO}1%L~BU2
ziDF1>4iTQ!x@vt2Gj}epJMfs02y$Qno3bYGuoJ(CX)A|aVZod!BHE7aTZug99&Af_
z`VtMk)`5x6vE-QB)f4py=9wUgta<ku{bp>by$|cR<JVe!&K))H#x}%`7Em=2E5vCw
zR1WLguz!0`*d3dWg3pv^(9qBs3ru=k9iwhxI_%eD+aa+fpD$K&hG3dCvZou?TYJKR
zRvQ;|S)-kyMA~6bPq+v9XlKMa;S+3Omtf?5gKbfNWGdtVIf%*VZ|!ezv_>ZaEpBeK
zdzDVyCk)KGgpQQk)iLXAw&EPZ0ef@`Y7_G7duF_Y#`qeYS@1s-u$eh8_;tpTyW4x$
z`tqb87OrEg$J`v@LP5b8_6<S|_6~@rjdSUHk}#j}#=2+L`t-!6C*7`Yp~KGYa`swx
z_+qv+j>R)*++CnI{WbNzvcEuU+bFJ|XKikfgFhCX@aR{^=Z1VegMmqZe0R!GxJMD|
zY{1@@^u+?^<`^FhwC-$hH17=i1U~E+VJ~dvQNJ(%b+)Cvu4L3>GmrU%Y1l*Ktcy1_
zG)KgcquCnu#d<~qtx;TyU5;kV6GJW@WLs^se%?F^`w#f(nepkZMrS-*UpL^{(`)R<
z`;@f<daa&Ge~b?YTA-$}DCeI_5L@6Q>5H0Y1A}cbpQ{^U^zUg~S=-}j!ExIPeGnWw
zqa9JGN7&aIZSV~Cv}9v9tn4|QFUV;HD=uz1e!Tyk5itzdgPNMBg1yG<x#~6cd0cHt
z_uzCg7#rB#Etpe*L4KkD>nru>H9~A{fU|5(9Ia*^u=z!KO!;F2V_sKB(kB=i94+n3
zvG|1!$N^(?fOF^ahI)qm@&D7huRm+kaE|kP4K;Ioz5QHocc#GZWWd!k>emlMJTabw
zy^ZgR_*ZIdZ;QHx=&X0pJOOL7%X?<;7=yjWJ&X^u;vOh=;TQCo2=wPs-yrPCfY}Ir
zE`|G}C^om_Hs4FyVhGx73+cBptkjBMBI&o;`=Re9=*Pg?X(RW6HGiKFIe3vvVNL&U
z>93!xujjgU)YoJ2?tWrFYnlv+GBIWEiMRyZll#SbEMGsC_t!%Fk((pizq1~(!GiNl
z@arH;ZtzL`g~SxTf3To9O_p$w+53)6MN*`-J&`Q6`9Krr>k!>tIeXphB<QS{mH1n*
z)eZI&>bWcDzgcYS%h}tR%ZG%Ly;_Mw1H7Tft-TYnZlGi#e80n&Jk7cL5hl6)SXUo>
zm0MTcOIR4+<77YK1wR%b6}JfWCv_l;#5{<E$zV5r6ZtIPi9f!WISe?OfL;>uxEvz5
z27QQc3fs#){p9-{q|zk4<9jB8X`Mfr)Op9p6H}VW;5hzmsS&t3_@~|n|E*%fKlEuO
KLS69xfBpl7N6BLV

diff --git a/arccnet/data_generation/tests/test_sdo_processing.py b/arccnet/data_generation/tests/test_sdo_processing.py
index c409e173..8d19371d 100644
--- a/arccnet/data_generation/tests/test_sdo_processing.py
+++ b/arccnet/data_generation/tests/test_sdo_processing.py
@@ -12,7 +12,6 @@
 from arccnet import config
 from arccnet.data_generation.timeseries.sdo_processing import crop_map, flare_check, pad_map, rand_select
 
-data_path = config["paths"]["data_folder"]
 test_path = Path(__file__).resolve().parent
 
 
diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py
index 481d76f3..4cdd3f45 100644
--- a/arccnet/data_generation/timeseries/sdo_processing.py
+++ b/arccnet/data_generation/timeseries/sdo_processing.py
@@ -110,12 +110,6 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim:
             - The date of the run, used for reprojection
             - The coordinate of the noaa active region
     """
-    # table_name = f"{size}_{duration}_{long_lim}_{types}.parq".strip(' ')
-    # if os.path.exists(f"{data_path}/flare_files/{table_name}"):
-    #     print("Run table cached - loading")
-    #     combined = Table.read(table_name)
-    # else:
-    #     print("No run table cached - creating new table")
     table = Table.read(hek_path)
     srs = Table.read(srs_path)
 
@@ -172,7 +166,6 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim:
         new_names=("noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category", "fl_count"),
     )
     combined = vstack([flares_exp, srs_exp])
-    # combined.write(f"{data_path}/flare_files/{table_name}", format='parquet')
 
     final = rand_select(combined, size, types)
     subset = final["noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category"]
@@ -300,9 +293,6 @@ def drms_pipeline(
 
     hmi_query, hmi_export, ic_query, ic_export = hmi_query_export(start_t, end_t, hmi_keys, sample)
     aia_query, aia_export = aia_query_export(hmi_query, aia_keys, wavelengths)
-    # ic_query.rename(columns={"*recnum*":"FSN"}, inplace=True)
-    # img_query = pd.concat([aia_query, ic_query],ignore_index=True)
-    # img_urls = pd.concat([aia_export.urls, ic_export.urls], ignore_index=True)
 
     hmi_dls, hmi_exs = l1_file_save(hmi_export, hmi_query, path)
     cnt_dls, cnt_exs = l1_file_save(ic_query, ic_export, path)
@@ -338,14 +328,14 @@ def hmi_query_export(time_1, time_2, keys: list, sample: int):
     """
     client = drms.Client()
     duration = round((time_2 - time_1).to_value(u.hour))
-    qstr_m_hmi = f"hmi.M_720s[{time_1.value}/{duration}h@{sample}m]" + "{magnetogram}"
+    qstr_m_hmi = f"hmi.M_720s[{time_1.value}/{duration}h@{sample}m]{{magnetogram}}"
     hmi_query = client.query(ds=qstr_m_hmi, key=keys)
 
     good_result = hmi_query[hmi_query.QUALITY == 0]
     good_num = good_result["*recnum*"].values
     bad_result = hmi_query[hmi_query.QUALITY != 0]
 
-    qstrs_m_hmi = [f"hmi.M_720s[{time}]" + "{magnetogram}" for time in bad_result["T_REC"]]
+    qstrs_m_hmi = [f"hmi.M_720s[{time}]{{magnetogram}}" for time in bad_result["T_REC"]]
     hmi_values = [hmi_rec_find(qstr, keys, 3, 720) for qstr in qstrs_m_hmi]
     patched_num = [*hmi_values]
 
@@ -353,7 +343,7 @@ def hmi_query_export(time_1, time_2, keys: list, sample: int):
     joined_num = [str(num) for num in joined_num]
     hmi_num_str = str(joined_num).strip("[]")
 
-    hmi_qstr = f"hmi.M_720s[! recnum in ({hmi_num_str}) !]" + "{magnetogram}"
+    hmi_qstr = f"hmi.M_720s[! recnum in ({hmi_num_str}) !]{{magnetogram}}"
     hmi_query_full = client.query(ds=hmi_qstr, key=keys)
     hmi_result = client.export(hmi_qstr, method="url", protocol="fits", email=os.environ["JSOC_EMAIL"])
     hmi_result.wait()
@@ -378,13 +368,13 @@ def hmi_continuum_export(hmi_query, keys):
             A tuple containing the query results of the hmi mag and ic_no_limbdark (pandas df) and the export data response (drms export object).
     """
     client = drms.Client()
-    qstrs_ic = [f"hmi.Ic_noLimbDark_720s[{time}]" + "{image}" for time in hmi_query["T_REC"]]
+    qstrs_ic = [f"hmi.Ic_noLimbDark_720s[{time}]{{continuum}}" for time in hmi_query["T_REC"]]
 
     ic_value = [hmi_rec_find(qstr, keys, 3, 12) for qstr in qstrs_ic]
     unpacked_ic = list(itertools.chain.from_iterable(ic_value))
     joined_num = [str(num) for num in unpacked_ic]
     ic_num_str = str(joined_num).strip("[]")
-    ic_comb_qstr = f"hmi.Ic_noLimbDark_720s[! recnum in ({ic_num_str}) !]" + "{image}"
+    ic_comb_qstr = f"hmi.Ic_noLimbDark_720s[! recnum in ({ic_num_str}) !]{{continuum}}"
 
     ic_query_full = client.query(ds=ic_comb_qstr, key=keys)
 
@@ -411,19 +401,16 @@ def aia_query_export(hmi_query, keys, wavelength):
         aia_query_full, aia_result (tuple): A tuple containing the query result and the export data response.
     """
     client = drms.Client()
-    qstrs_euv = [f"aia.lev1_euv_12s[{time}][{wavelength}]" + "{image}" for time in hmi_query["T_REC"]]
-    qstrs_uv = [f"aia.lev1_uv_24s[{time}]{[1600, 1700]}" + "{image}" for time in hmi_query["T_REC"]]
-    # qstrs_vis = [f"aia.lev1_vis_1h[{time}]{[4500]}" + "{image}" for time in hmi_query["T_REC"]]
+    qstrs_euv = [f"aia.lev1_euv_12s[{time}][{wavelength}]{{image}}" for time in hmi_query["T_REC"]]
+    qstrs_uv = [f"aia.lev1_uv_24s[{time}]{[1600, 1700]}{{image}}" for time in hmi_query["T_REC"]]
     euv_value = [aia_rec_find(qstr, keys, 3, 12) for qstr in qstrs_euv]
     uv_value = [aia_rec_find(qstr, keys, 2, 24) for qstr in qstrs_uv]
-    # vis_value = [aia_rec_find(qstr, keys, 1, 60) for qstr in qstrs_vis]
-    # unpacked_aia = list(itertools.chain(euv_value, uv_value, vis_value))
     unpacked_aia = list(itertools.chain(euv_value, uv_value))
     unpacked_aia = [set for set in unpacked_aia if set is not None]
     unpacked_aia = list(itertools.chain.from_iterable(unpacked_aia))
     joined_num = [str(num) for num in unpacked_aia]
     aia_num_str = str(joined_num).strip("[]")
-    aia_comb_qstr = f"aia.lev1[! FSN in ({aia_num_str}) !]" + "{image_lev1}"
+    aia_comb_qstr = f"aia.lev1[! FSN in ({aia_num_str}) !]{{image_lev1}}"
 
     aia_query_full = client.query(ds=aia_comb_qstr, key=keys)
 
@@ -491,7 +478,7 @@ def aia_rec_find(qstr, keys, retries, time_add):
         if wvl == "4500":
             return qry["FSN"].values
         while qry["QUALITY"].values[0] != 0 and retry < retries:
-            qry = client.query(ds=f"{qstr_head}[{time}][{wvl}]" + "{image}", key=keys)
+            qry = client.query(ds=f"{qstr_head}[{time}][{wvl}]{{image}}", key=keys)
             time = change_time(time, time_add)
             retry += 1
         if qry["QUALITY"].values[0] == 0:

From 95119281de4e87f103afe0896a902dbbaf5b9df0 Mon Sep 17 00:00:00 2001
From: DanGass <55432146+DanGass@users.noreply.github.com>
Date: Wed, 13 Aug 2025 12:27:44 +0100
Subject: [PATCH 10/14] updated file paths

---
 arccnet/data_generation/tests/test_sdo_processing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arccnet/data_generation/tests/test_sdo_processing.py b/arccnet/data_generation/tests/test_sdo_processing.py
index 8d19371d..e60e6e5b 100644
--- a/arccnet/data_generation/tests/test_sdo_processing.py
+++ b/arccnet/data_generation/tests/test_sdo_processing.py
@@ -16,7 +16,7 @@
 
 
 def test_rand():
-    combined = Table.read(f"{test_path}/data/ts_test_data.parq")
+    combined = Table.read(f"{test_path}/data/ts_test_data.ecsv")
     types = ["F1", "F2", "N1", "N2"]
 
     # 1. test with full list of samples
@@ -53,7 +53,7 @@ def test_padding():
 
 
 def test_flare_check():
-    combined = Table.read(f"{test_path}/data/ts_test_data.parq")
+    combined = Table.read(f"{test_path}/data/ts_test_data.ecsv")
     flares = combined[combined["goes_class"] != "N"]
     flare = flares[flares["goes_class"] == "C3.7"]
     flare = flare[flare["noaa_number"] == 12644]

From e302bfa922265edf72ac2b2335c12e96c429ab2f Mon Sep 17 00:00:00 2001
From: DanGass <55432146+DanGass@users.noreply.github.com>
Date: Thu, 14 Aug 2025 16:10:16 +0100
Subject: [PATCH 11/14] Improved pipeline, added log for empty queries, WIP.

---
 .../timeseries/drms_pipeline.py               |  7 +--
 .../timeseries/sdo_processing.py              | 47 +++++++++++++++++--
 2 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/arccnet/data_generation/timeseries/drms_pipeline.py b/arccnet/data_generation/timeseries/drms_pipeline.py
index 978b0c18..340bf5e5 100644
--- a/arccnet/data_generation/timeseries/drms_pipeline.py
+++ b/arccnet/data_generation/timeseries/drms_pipeline.py
@@ -29,9 +29,9 @@
 
     # Logging settings here.
     drms_log = logging.getLogger("drms")
-    drms_log.setLevel("WARNING")
+    drms_log.setLevel("ERROR")
     reproj_log = logging.getLogger("reproject.common")
-    reproj_log.setLevel("WARNING")
+    reproj_log.setLevel("ERROR")
     # May need to find a more robust solution with filters/exceptions for this.
     astropy_log.setLevel("ERROR")
     data_path = config["paths"]["data_folder"]
@@ -42,7 +42,8 @@
         size=1,
         duration=6,
         long_lim=65,
-        types=["F1", "F2", "N1", "N2"],
+        # types=["F1", "F2", "N1", "N2"],
+        types=["F1"],
     )[0]
     cores = int(config["drms"]["cores"])
     with ProcessPoolExecutor(cores) as executor:
diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py
index cd9220b9..63108f66 100644
--- a/arccnet/data_generation/timeseries/sdo_processing.py
+++ b/arccnet/data_generation/timeseries/sdo_processing.py
@@ -30,9 +30,12 @@
 from arccnet.data_generation.utils.utils import save_compressed_map
 from arccnet.visualisation.data import mosaic_animate, mosaic_plot
 
+data_path = config["paths"]["data_folder"]
+qry_log = "bad_queries"
+
 warnings.simplefilter("ignore", RuntimeWarning)
 reproj_log = logging.getLogger("reproject.common")
-reproj_log.setLevel("WARNING")
+reproj_log.setLevel("ERROR")
 os.environ["JSOC_EMAIL"] = "danielgass192@gmail.com"
 
 __all__ = [
@@ -51,6 +54,32 @@
 ]
 
 
+def bad_qry(qry, data_path, name):
+    r"""
+    Logs bad drms queries to document for future reference.
+
+    Parameters
+    ----------
+        qry: `str`
+            Drms query which returns an empty dataframe.
+        data_path : `str`
+            Path to arccnet data.
+        name: `str`
+            Log filename.
+    """
+    print(f"Bad Query Detected - {qry}")
+    print(data_path)
+    logging.warning(f"Bad Query Detected - {qry}")
+    f_name = f"{data_path}/logs/{name}.txt"
+    if not os.path.exists(f_name):
+        file = open(f"{data_path}/logs/{name}.txt", "x")
+    file = open(f"{data_path}/logs/{name}.txt", "+")
+    entries = [row for row in file]
+    if qry not in entries:
+        file.write(qry)
+    file.close()
+
+
 def rand_select(table, size, types: list):
     r"""
     Randomly selects targets from provided table and list of data subsets.
@@ -156,6 +185,9 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim:
     srs["category"] = ar_cat
     srs["n_fl_count"] = fl_cat
     srs["ar"] = "N"
+    flares = flares[flares["goes_class"] == "C1.1"]
+    flares = flares[flares["noaa_number"] == 11169]
+    flares = flares[flares["tb_date"] == "2011-03-15"]
 
     srs_exp = srs["number", "ar", "target_time", "srs_end_time", "srs_date", "c_coord", "category", "n_fl_count"]
     flares_exp = flares[
@@ -442,13 +474,14 @@ def hmi_rec_find(qstr, keys, retries, sample, cont=False):
         series = "hmi.Ic_noLimbDark_720s"
     client = drms.Client()
     count = 0
-    # print(qstr)
     qry = client.query(ds=qstr, key=keys)
-    # print(qry)
+    print(qry)
     if qry.empty:
+        print("Bad Query - HMI")
         time = sunpy.time.parse_time(re.search(r"\[(.*?)\]", qstr).group(1))
         qry["QUALITY"] = [10000]
-        # print(qry)
+        bad_qry(qry, data_path, qry_log)
+
     else:
         time = sunpy.time.parse_time(qry["T_REC"].values[0])
     # print(qry["QUALITY"])
@@ -457,9 +490,9 @@ def hmi_rec_find(qstr, keys, retries, sample, cont=False):
         if qry.empty:
             time = sunpy.time.parse_time(re.search(r"\[(.*?)\]", qstr).group(1))
             qry["QUALITY"] = [10000]
+            bad_qry(qry, data_path, qry_log)
         time = change_time(time, sample)
         count += 1
-        qry["*recnum*"] = [0]
     return qry["*recnum*"].values[0]
 
 
@@ -483,6 +516,7 @@ def aia_rec_find(qstr, keys, retries, time_add):
     retry = 0
     qry = client.query(ds=qstr, key=keys)
     qstr_head = qstr.split("[")[0]
+    print(qry)
     if not qry.empty:
         time, wvl = qry["T_REC"].values[0][0:-1], qry["WAVELNTH"].values[0]
         if wvl == "4500":
@@ -493,6 +527,9 @@ def aia_rec_find(qstr, keys, retries, time_add):
             retry += 1
         if qry["QUALITY"].values[0] == 0:
             return qry["FSN"].values
+    else:
+        print("Bad Query - AIA")
+        bad_qry(qry, data_path, qry_log)
 
 
 def l1_file_save(export, query, path):

From 5d6e751ba307ff2469c852e2523c8f0d16d55699 Mon Sep 17 00:00:00 2001
From: DanGass <55432146+DanGass@users.noreply.github.com>
Date: Fri, 15 Aug 2025 10:15:21 +0100
Subject: [PATCH 12/14] Removed print statements from bad_qry function.

---
 arccnet/data_generation/timeseries/sdo_processing.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py
index 63108f66..a0daac22 100644
--- a/arccnet/data_generation/timeseries/sdo_processing.py
+++ b/arccnet/data_generation/timeseries/sdo_processing.py
@@ -67,8 +67,6 @@ def bad_qry(qry, data_path, name):
         name: `str`
             Log filename.
     """
-    print(f"Bad Query Detected - {qry}")
-    print(data_path)
     logging.warning(f"Bad Query Detected - {qry}")
     f_name = f"{data_path}/logs/{name}.txt"
     if not os.path.exists(f_name):

From 78d279013c3d3244d7e87070ce8468ac231e7329 Mon Sep 17 00:00:00 2001
From: DanGass <55432146+DanGass@users.noreply.github.com>
Date: Fri, 15 Aug 2025 13:40:14 +0100
Subject: [PATCH 13/14] Fixed formatting for pre-commit.

---
 arccnet/data_generation/timeseries/drms_pipeline.py  | 2 +-
 arccnet/data_generation/timeseries/sdo_processing.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arccnet/data_generation/timeseries/drms_pipeline.py b/arccnet/data_generation/timeseries/drms_pipeline.py
index 710f9529..989ab9ea 100644
--- a/arccnet/data_generation/timeseries/drms_pipeline.py
+++ b/arccnet/data_generation/timeseries/drms_pipeline.py
@@ -44,7 +44,7 @@
         long_lim=65,
         types=["F1", "F2", "N1", "N2"],
     )[0]
-    
+
     cores = int(config["drms"]["cores"])
     with ProcessPoolExecutor(cores) as executor:
         for record in starts:
diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py
index c87e0521..18efa867 100644
--- a/arccnet/data_generation/timeseries/sdo_processing.py
+++ b/arccnet/data_generation/timeseries/sdo_processing.py
@@ -78,6 +78,7 @@ def bad_qry(qry, data_path, name):
         file.write(qry)
     file.close()
 
+
 def rand_select(table, size, types: list):
     r"""
     Randomly selects targets from provided table and list of data subsets.
@@ -173,7 +174,7 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim:
         SkyCoord(lon * u.deg, lat * u.deg, obstime=t_time, observer="earth", frame=frames.HeliographicStonyhurst)
         for lat, lon, t_time in zip(srs["latitude"], srs["longitude"], srs["target_time"])
     ]
-    
+
     print("Parsing Active Regions")
     ar_cat, fl_cat = [], []
     for ar in srs:

From ed52fa9e05050b7620b4f3bb91c189621a7822cc Mon Sep 17 00:00:00 2001
From: DanGass <55432146+DanGass@users.noreply.github.com>
Date: Tue, 19 Aug 2025 16:48:05 +0100
Subject: [PATCH 14/14] Replaced prints with logs, changed plot to use empty
 subplot, deleted frames once anim was plotted, and skip bad runs.

---
 .../timeseries/drms_pipeline.py               |  7 ++++++-
 .../timeseries/sdo_processing.py              | 20 +++++++++----------
 arccnet/visualisation/data.py                 |  6 +++---
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/arccnet/data_generation/timeseries/drms_pipeline.py b/arccnet/data_generation/timeseries/drms_pipeline.py
index 989ab9ea..19f5eec8 100644
--- a/arccnet/data_generation/timeseries/drms_pipeline.py
+++ b/arccnet/data_generation/timeseries/drms_pipeline.py
@@ -55,7 +55,9 @@
             patch_height = int(config["drms"]["patch_height"]) * u.pix
             patch_width = int(config["drms"]["patch_width"]) * u.pix
             try:
-                print(record["noaa_number"], record["goes_class"], record["start_time"], record["category"])
+                logging.info(
+                    f"{record['noaa_number']} {record['goes_class']} {record['start_time']} {record['category']}"
+                )
                 aia_maps, hmi_maps = drms_pipeline(
                     start_t=start,
                     end_t=end,
@@ -65,6 +67,9 @@
                     wavelengths=config["drms"]["wavelengths"],
                     sample=config["drms"]["sample"],
                 )
+                if len(aia_maps != 60):
+                    logging.warning("Bad Run detected - missing frames.")
+                    continue
 
                 hmi_proc = list(
                     tqdm(
diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py
index 18efa867..79439797 100644
--- a/arccnet/data_generation/timeseries/sdo_processing.py
+++ b/arccnet/data_generation/timeseries/sdo_processing.py
@@ -55,7 +55,7 @@
 ]
 
 
-def bad_qry(qry, data_path, name):
+def bad_query(qry, data_path, name):
     r"""
     Logs bad drms queries to document for future reference.
 
@@ -64,7 +64,7 @@ def bad_qry(qry, data_path, name):
         qry: `str`
             Drms query which returns an empty dataframe.
         data_path : `str`
-            Path to arccnet data.
+            Path to arccnet level 04 data.
         name: `str`
             Log filename.
     """
@@ -155,7 +155,7 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim:
     srs["srs_date"] = srs["target_time"].value
     srs["srs_date"] = [date.split("T")[0] for date in srs["srs_date"]]
     srs["srs_end_time"] = [(Time(time) + duration * u.hour) for time in srs["target_time"]]
-    print("Parsing Flares")
+    logging.info("Parsing Flares")
     flares["start_time"] = [time - (duration + 1) * u.hour for time in flares["start_time"]]
     flares["start_time"].format = "fits"
     flares["tb_date"] = flares["start_time"].value
@@ -175,7 +175,7 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim:
         for lat, lon, t_time in zip(srs["latitude"], srs["longitude"], srs["target_time"])
     ]
 
-    print("Parsing Active Regions")
+    logging.info("Parsing Active Regions")
     ar_cat, fl_cat = [], []
     for ar in srs:
         erl_time = Time(ar["target_time"]) - (duration + 1) * u.hour
@@ -473,10 +473,10 @@ def hmi_rec_find(qstr, keys, retries, sample, cont=False):
     count = 0
     qry = client.query(ds=qstr, key=keys)
     if qry.empty:
-        print("Bad Query - HMI")
         time = sunpy.time.parse_time(re.search(r"\[(.*?)\]", qstr).group(1))
         qry["QUALITY"] = [10000]
-        bad_qry(qry, data_path, qry_log)
+        logging.warning("Bad Query - HMI")
+        bad_query(qry, data_path, qry_log)
 
     else:
         time = sunpy.time.parse_time(qry["T_REC"].values[0])
@@ -485,7 +485,8 @@ def hmi_rec_find(qstr, keys, retries, sample, cont=False):
         if qry.empty:
             time = sunpy.time.parse_time(re.search(r"\[(.*?)\]", qstr).group(1))
             qry["QUALITY"] = [10000]
-            bad_qry(qry, data_path, qry_log)
+            logging.warning("Bad Query - HMI")
+            bad_query(qry, data_path, qry_log)
         time = change_time(time, sample)
         count += 1
     return qry["*recnum*"].values[0]
@@ -511,7 +512,6 @@ def aia_rec_find(qstr, keys, retries, time_add):
     retry = 0
     qry = client.query(ds=qstr, key=keys)
     qstr_head = qstr.split("[")[0]
-    print(qry)
     if not qry.empty:
         time, wvl = qry["T_REC"].values[0][0:-1], qry["WAVELNTH"].values[0]
         if wvl == "4500":
@@ -523,8 +523,8 @@ def aia_rec_find(qstr, keys, retries, time_add):
         if qry["QUALITY"].values[0] == 0:
             return qry["FSN"].values
     else:
-        print("Bad Query - AIA")
-        bad_qry(qry, data_path, qry_log)
+        logging.warning("Bad Query - AIA")
+        bad_query(qry, data_path, qry_log)
 
 
 def l1_file_save(export, query, path):
diff --git a/arccnet/visualisation/data.py b/arccnet/visualisation/data.py
index 1033fecb..9e314397 100644
--- a/arccnet/visualisation/data.py
+++ b/arccnet/visualisation/data.py
@@ -532,10 +532,8 @@ def mosaic_plot(hmi, name, file, nrows, ncols, wvls, table, path):
                 ax.imshow(np.sqrt(aia_map.data), cmap=cmap)
                 ax.text(0.05, 0.05, f"{wv} - {aia_map.date}", color="w", transform=ax.transAxes, fontsize=5)
             except IndexError:
-                aia_map = np.zeros(hmi_map.data.shape)
                 ax = fig.add_subplot(nrows, ncols, i + 1)
-                ax.imshow(np.sqrt(aia_map.data), cmap=cmap)
-                ax.text(0.05, 0.05, f"{wv} - MISSING", color="w", transform=ax.transAxes, fontsize=5)
+                ax.text(0.05, 0.05, f"{wv} - MISSING", color="black", transform=ax.transAxes, fontsize=5)
 
         else:
             ax = fig.add_subplot(nrows, ncols, i + 1)
@@ -585,6 +583,8 @@ def mosaic_animate(base_dir, name):
     for file in sorted_files:
         frame = cv2.imread(file)
         out.write(frame)
+        # Deletes frame after being added to animation to save data on unneeded frames.
+        os.remove(file)
     out.release()
     print(f"Video saved to {output_file}")
     return output_file