From e41aeb16823ac1a4581588f134b7855c1337fbda Mon Sep 17 00:00:00 2001 From: DanGass <55432146+DanGass@users.noreply.github.com> Date: Tue, 24 Jun 2025 17:39:17 +0100 Subject: [PATCH 01/14] Added new functionality to split runs into categories, F1, F2, N1, and N2. Also padded inproperly sized submaps using np.pad. --- .../timeseries/drms_pipeline.py | 7 +- .../timeseries/sdo_processing.py | 97 +++++++++++++++++-- 2 files changed, 94 insertions(+), 10 deletions(-) diff --git a/arccnet/data_generation/timeseries/drms_pipeline.py b/arccnet/data_generation/timeseries/drms_pipeline.py index 6827bb2b..d353aaeb 100644 --- a/arccnet/data_generation/timeseries/drms_pipeline.py +++ b/arccnet/data_generation/timeseries/drms_pipeline.py @@ -79,8 +79,8 @@ ) packed_maps = namedtuple("packed_maps", ["hmi_origin", "l2_map", "ar_num"]) hmi_origin_patch = crop_map(hmi_proc[0], center, patch_height, patch_width, date) - l2_hmi_packed = [[hmi_origin_patch, hmi_map, noaa_ar] for hmi_map in hmi_proc] - l2_aia_packed = [[hmi_origin_patch, aia_map, noaa_ar] for aia_map in aia_proc] + l2_hmi_packed = [[hmi_origin_patch, hmi_map, noaa_ar, center] for hmi_map in hmi_proc] + l2_aia_packed = [[hmi_origin_patch, aia_map, noaa_ar, center] for aia_map in aia_proc] # Went back to tuples because this was failing in a weird way - something to do with pickle and concurrent futures. Left for future debugging. # l2_hmi_packed = [packed_maps(hmi_origin_patch, hmi_map, noaa_ar) for hmi_map in hmi_proc] @@ -88,13 +88,14 @@ hmi_patch_paths = tqdm(executor.map(map_reproject, l2_hmi_packed), total=len(l2_hmi_packed)) aia_patch_paths = tqdm(executor.map(map_reproject, l2_aia_packed), total=len(l2_aia_packed)) - print(hmi_patch_paths) # For some reason, aia_proc becomes an empty list after this function call. home_table, aia_patch_paths, aia_quality, aia_time, hmi_patch_paths, hmi_quality, hmi_time = ( table_match(list(aia_patch_paths), list(hmi_patch_paths)) ) + + # This can probably be streamlined/functionalized to make the pipeline look better. batched_name = f"{config['paths']['data_folder']}/04_final" Path(f"{batched_name}/records").mkdir(parents=True, exist_ok=True) diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py index e78ff1f5..e7256c36 100644 --- a/arccnet/data_generation/timeseries/sdo_processing.py +++ b/arccnet/data_generation/timeseries/sdo_processing.py @@ -51,7 +51,7 @@ ] -def read_data(hek_path: str, srs_path: str, size: int, duration: int): +def read_data(hek_path: str, srs_path: str, size: int, duration: int, flares: str): r""" Read and process data from a parquet file containing HEK catalogue information regarding flaring events. @@ -65,6 +65,9 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int): The size of the sample to be generated. (Generates 10% X, 30% M, 60% C by default) duration : `int` The duration of the data sample in hours. + flares : `str` + Determines if runs provided are 'positive' (flares), 'negative' (no flares), or 'both' (50/50 split of both) + Returns ------- @@ -77,7 +80,8 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int): - The date of the run, used for reprojection - The coordinate of the noaa active region """ - + assert (flares == 'positive' or flares == 'negative' or flares == 'both') + # if flares == 'positive' or flares == 'both': table = Table.read(hek_path) noaa_num_df = table[table["noaa_number"] > 0] flares = noaa_num_df[noaa_num_df["event_type"] == "FL"] @@ -106,7 +110,6 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int): # c_flares = c_flares[c_flares["noaa_number"] == 11818] # c_flares = c_flares[c_flares["goes_class"] == "C1.6"] c_flares = c_flares[c_flares["tb_date"] == "2013-08-20"] - # 025925 combined = vstack([x_flares, m_flares, c_flares]) combined["c_coord"] = [ @@ -116,9 +119,31 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int): combined["start_time"] = [time - (duration + 1) * u.hour for time in combined["start_time"]] combined["start_time"].format = "fits" combined["end_time"] = combined["start_time"] + duration * u.hour - subset = combined["noaa_number", "goes_class", "start_time", "end_time", "frm_daterun", "c_coord"] - - return subset + if flares == 'positive' or flares == 'both': + combined['category'] = [f"F{flare_check(row['start_time'], row['end_time'], row['noaa_number'], combined)}" for row in combined] + srs_isolated = srs[srs['target_time'] > "2011-01-01"] + srs_isolated = srs_isolated[abs(srs_isolated["longitude"].value) <= 65] + ar_cat = [] + for ar in srs_isolated: + erl_time = Time(ar['target_time']) - (duration+1) * u.hour + cat = flare_check(erl_time, Time(ar['target_time']) - 1 * u.hour, ar['number'], combined) + ar_cat.append(cat) + srs_isolated['category'] = ar_cat + + + if flares == 'positive': + subset = combined["noaa_number", "goes_class", "start_time", "end_time", "frm_daterun", "c_coord", 'category'] + return subset + + # if flares == 'both': + # Combine both the flare samples and the non flare samples into subset and return + # return subset + + # if flares == 'negative': + # return only the non flare samples as subset + # return subset + + def change_time(time: str, shift: int): @@ -709,7 +734,8 @@ def map_reproject(sdo_packed): fits_path : `str` The path location of the saved submap. """ - hmi_origin, sdo_path, ar_num = sdo_packed + hmi_origin, sdo_path, ar_num, center = sdo_packed + lon = int(center.lon.value) # sdo_map = sunpy.map.Map(sdo_packed.l2_map) sdo_map = sunpy.map.Map(sdo_path) with propagate_with_solar_surface(): @@ -723,6 +749,8 @@ def map_reproject(sdo_packed): sdo_rpr.meta["quality"] = sdo_map.meta["quality"] sdo_rpr.meta["wavelnth"] = sdo_map.meta["wavelnth"] sdo_rpr.meta["date-obs"] = sdo_map.meta["date-obs"] + if sdo_rpr.dimensions[0].value > config["drms"]["patch_width"]: + sdo_rpr = pad_map(sdo_rpr, lon, config["drms"]["patch_width"]) save_compressed_map(sdo_rpr, fits_path, hdu_type=CompImageHDU, overwrite=True) return fits_path @@ -856,3 +884,58 @@ def l4_file_pack(aia_paths, hmi_paths, dir_path, rec, out_table, anim_path): shutil.copy(anim_path, f"{dir_path}/data/{rec}") out_table.write(f"{dir_path}/data/{rec}/{rec}.csv", overwrite=True) + +def pad_map(map, long, targ_width): + r""" + Pads the map data of a submap which is narrower than the specified submap width due to reaching the edge of the image array. + + Parameters + ---------- + map : `sunpy.map` + The sunpy submap to be resized. + long : `float` or `int` + The longitude of the active region, used only to check which side of the disk it is located. + targ_width : `int` + The target width of the submaps within the data run. + + Returns + ---------- + new_map : `sunpy.map` + The new, padded submap. + """ + x_dim = map.dimensions[0].value + diff = int(targ_width - x_dim) + if long > 0: + data = np.pad(map.data, ((0,0),(diff,0)), constant_values=np.nan) + else: + data = np.pad(map.data, ((0,0),(0,diff)), constant_values=np.nan) + new_map = sunpy.map.Map(data,map.meta) + return new_map + +def flare_check(start, end, ar_num, table): + r""" + Checks a provided start time and end time, along with an active region number, and determines if a flare occurs within that period. + + Parameters + ---------- + start : `Astropy.Time` + The start time (duration - 1 hour) of a target run. + end : `Astropy.Time` + The end time (start_time - 1 hour) of a target run. + ar_num : `str` + The active region number of a target run. + table : `int` + The table of flare times used to check for flare events within run duration. + + Returns + ---------- + category : `int` + The category of the run. If no flare detected in run, category = 1, otherwise, category = 2. + """ + ar_table = table[table["noaa_number"] == ar_num] + ar_table = ar_table[Time(ar_table["start_time"]) < end] + ar_table = ar_table[Time(ar_table["start_time"]) > start] + category = 1 + if (len(ar_table) > 0): + category = 2 + return category \ No newline at end of file From 8c13daaf7b6dd8040ecceb8c1d7ce971ebcaa3b0 Mon Sep 17 00:00:00 2001 From: DanGass <55432146+DanGass@users.noreply.github.com> Date: Wed, 16 Jul 2025 14:03:12 +0100 Subject: [PATCH 02/14] Completed category splitting, updated padding method. --- .../timeseries/drms_pipeline.py | 16 +- .../timeseries/sdo_processing.py | 150 ++++++++++-------- 2 files changed, 95 insertions(+), 71 deletions(-) diff --git a/arccnet/data_generation/timeseries/drms_pipeline.py b/arccnet/data_generation/timeseries/drms_pipeline.py index 6e26f9eb..58dabed9 100644 --- a/arccnet/data_generation/timeseries/drms_pipeline.py +++ b/arccnet/data_generation/timeseries/drms_pipeline.py @@ -39,21 +39,22 @@ starts = read_data( hek_path=Path(f"{data_path}/flare_files/hek_swpc_1996-01-01T00:00:00-2023-01-01T00:00:00_dev.parq"), srs_path=Path(f"{data_path}/flare_files/srs_processed_catalog.parq"), - size=10, + size=1, duration=6, + long_lim=65, + types=["F1", "F2", "N1", "N2"], ) cores = int(config["drms"]["cores"]) with ProcessPoolExecutor(cores) as executor: - for record in [starts[-1]]: - noaa_ar, fl_class, start, end, date, center = record + for record in starts: + noaa_ar, fl_class, start, end, date, center, category = record pointing_table = calibrate.util.get_pointing_table(source="jsoc", time_range=[start - 6 * u.hour, end]) - start_split = start.value.split("T")[0] - file_name = f"{start_split}_{fl_class}_{noaa_ar}" + file_name = f"{category}_{start_split}_{fl_class}_{noaa_ar}" patch_height = int(config["drms"]["patch_height"]) * u.pix patch_width = int(config["drms"]["patch_width"]) * u.pix try: - print(record["noaa_number"], record["goes_class"], record["start_time"]) + print(record["noaa_number"], record["goes_class"], record["start_time"], record["category"]) aia_maps, hmi_maps = drms_pipeline( start_t=start, end_t=end, @@ -93,9 +94,6 @@ table_match(list(aia_patch_paths), list(hmi_patch_paths)) ) - - - # This can probably be streamlined/functionalized to make the pipeline look better. batched_name = f"{config['paths']['data_folder']}/04_final" Path(f"{batched_name}/records").mkdir(parents=True, exist_ok=True) Path(f"{batched_name}/tars").mkdir(parents=True, exist_ok=True) diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py index 9d683cd0..96d01718 100644 --- a/arccnet/data_generation/timeseries/sdo_processing.py +++ b/arccnet/data_generation/timeseries/sdo_processing.py @@ -1,4 +1,3 @@ -# Define required libraries - check to see if Arccnet already has these as requirements. import os import sys import glob @@ -51,7 +50,33 @@ ] -def read_data(hek_path: str, srs_path: str, size: int, duration: int, flares: str): +def rand_select(table, size, types: list): + r""" + Randomly selects targets from provided table and list of data subsets. + + Parameters + ---------- + table : `Astropy.Table` + Provided full table of records. + size : `int` + The size of the returned subsets. + types: `list` + List of datatypes to include. + + Returns + ------- + comb_sample : `Astropy.Table` + Returned random subset containing size records from each data subset. + """ + selection = [] + for type in types: + subtable = table[table["category"] == type] + selection.append(subtable[sample(range(len(subtable)), k=int(size))]) + comb_sample = vstack(selection) + return comb_sample + + +def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim: int, types: list): r""" Read and process data from a parquet file containing HEK catalogue information regarding flaring events. @@ -62,11 +87,15 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, flares: st srs_path : `str` The path to the parquet file containing parsed noaa srs active region information. size : `int` - The size of the sample to be generated. (Generates 10% X, 30% M, 60% C by default) + The size of each subsample to be generated. duration : `int` The duration of the data sample in hours. flares : `str` - Determines if runs provided are 'positive' (flares), 'negative' (no flares), or 'both' (50/50 split of both) + Determines if runs provided 'positive' (flares), 'negative' (no flares), or 'both' (50/50 split of both) + long_lim : `str` + The longitudinal limit of Active Regions which are accepted for target runs. + types : `list` + Types of data to include in final subsection, corresponds to flares vs non flares (F v N) and incidental and clear runs (1 v 2) Returns @@ -74,76 +103,71 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, flares: st `Astropy.Table` A table of tuples containing the following columns for each flare: - NOAA Active Region Number - - GOES Flare class (C,M,X classes) + - GOES Flare class (C,M,X classes or N for none) - Start time (Duration + 1 hours before event in FITS format) - End time (1 hour before flaring event start time) - The date of the run, used for reprojection - The coordinate of the noaa active region """ - assert (flares == 'positive' or flares == 'negative' or flares == 'both') - # if flares == 'positive' or flares == 'both': + table = Table.read(hek_path) + srs = Table.read(srs_path) + noaa_num_df = table[table["noaa_number"] > 0] flares = noaa_num_df[noaa_num_df["event_type"] == "FL"] flares = flares[flares["frm_daterun"] > "2011-01-01"] - srs = Table.read(srs_path) + flares = flares[ + [flare.startswith("C") or flare.startswith("M") or flare.startswith("X") for flare in flares["goes_class"]] + ] + srs = srs[srs["number"] > 0] + srs = srs[srs["target_time"] > "2011-01-01"] + srs = srs[abs(srs["longitude"].value) <= long_lim] srs = srs[~srs["filtered"]] srs["srs_date"] = srs["target_time"].value srs["srs_date"] = [date.split("T")[0] for date in srs["srs_date"]] + srs["srs_end_time"] = [(Time(time) + duration * u.hour) for time in srs["target_time"]] + print("Parsing Flares") + flares["start_time"] = [time - (duration + 1) * u.hour for time in flares["start_time"]] + flares["start_time"].format = "fits" flares["tb_date"] = flares["start_time"].value - flares["tb_date"] = [date.split(" ")[0] for date in flares["tb_date"]] - flares = join(flares, srs, keys_left="noaa_number", keys_right="number") + flares["tb_date"] = [date.split("T")[0] for date in flares["tb_date"]] + flares["end_time"] = flares["start_time"] + duration * u.hour + flares["category"] = [ + f"F{flare_check(row['start_time'], row['end_time'], row['noaa_number'], flares)}" for row in flares + ] - flares = flares[abs(flares["longitude"].value) <= 65] + flares = join(flares, srs, keys_left="noaa_number", keys_right="number") flares = flares[flares["tb_date"] == flares["srs_date"]] - x_flares = flares[[flare.startswith("X") for flare in flares["goes_class"]]] - # x_flares = x_flares[x_flares["noaa_number"] == 11158] - # x_flares = x_flares[x_flares["goes_class"] == "X2.2"] - # x_flares = x_flares[x_flares["tb_date"] == "2014-10-27"] - x_flares = x_flares[sample(range(len(x_flares)), k=int(0.1 * size))] - m_flares = flares[[flare.startswith("M") for flare in flares["goes_class"]]] - m_flares = m_flares[sample(range(len(m_flares)), k=int(0.3 * size))] - - c_flares = flares[[flare.startswith("C") for flare in flares["goes_class"]]] - # c_flares = c_flares[sample(range(len(c_flares)), k=int(0.6 * size))] - c_flares = c_flares[c_flares["noaa_number"] == 11818] - c_flares = c_flares[c_flares["goes_class"] == "C1.6"] - c_flares = c_flares[c_flares["tb_date"] == "2013-08-20"] - - combined = vstack([x_flares, m_flares, c_flares]) - combined["c_coord"] = [ + + flares["c_coord"] = [ SkyCoord(lon * u.deg, lat * u.deg, obstime=t_time, observer="earth", frame=frames.HeliographicStonyhurst) - for lat, lon, t_time in zip(combined["latitude"], combined["longitude"], combined["target_time"]) + for lat, lon, t_time in zip(flares["latitude"], flares["longitude"], flares["target_time"]) ] - combined["start_time"] = [time - (duration + 1) * u.hour for time in combined["start_time"]] - combined["start_time"].format = "fits" - combined["end_time"] = combined["start_time"] + duration * u.hour - if flares == 'positive' or flares == 'both': - combined['category'] = [f"F{flare_check(row['start_time'], row['end_time'], row['noaa_number'], combined)}" for row in combined] - srs_isolated = srs[srs['target_time'] > "2011-01-01"] - srs_isolated = srs_isolated[abs(srs_isolated["longitude"].value) <= 65] - ar_cat = [] - for ar in srs_isolated: - erl_time = Time(ar['target_time']) - (duration+1) * u.hour - cat = flare_check(erl_time, Time(ar['target_time']) - 1 * u.hour, ar['number'], combined) - ar_cat.append(cat) - srs_isolated['category'] = ar_cat - - - if flares == 'positive': - subset = combined["noaa_number", "goes_class", "start_time", "end_time", "frm_daterun", "c_coord", 'category'] - return subset - - # if flares == 'both': - # Combine both the flare samples and the non flare samples into subset and return - # return subset - - # if flares == 'negative': - # return only the non flare samples as subset - # return subset - - + srs["c_coord"] = [ + SkyCoord(lon * u.deg, lat * u.deg, obstime=t_time, observer="earth", frame=frames.HeliographicStonyhurst) + for lat, lon, t_time in zip(srs["latitude"], srs["longitude"], srs["target_time"]) + ] + print("Parsing Active Regions") + ar_cat = [] + for ar in srs: + erl_time = Time(ar["target_time"]) - (duration + 1) * u.hour + cat = f"N{flare_check(erl_time, Time(ar['target_time']) - 1 * u.hour, ar['number'], flares)}" + ar_cat.append(cat) + srs["category"] = ar_cat + srs["ar"] = "N" + + srs_exp = srs["number", "ar", "target_time", "srs_end_time", "srs_date", "c_coord", "category"] + flares_exp = flares["noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category"] + srs_exp.rename_columns( + names=("number", "ar", "target_time", "srs_end_time", "srs_date", "c_coord", "category"), + new_names=("noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category"), + ) + combined = vstack([flares_exp, srs_exp]) + + final = rand_select(combined, size, types) + subset = final["noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category"] + return subset def change_time(time: str, shift: int): @@ -749,7 +773,7 @@ def map_reproject(sdo_packed): sdo_rpr.meta["quality"] = sdo_map.meta["quality"] sdo_rpr.meta["wavelnth"] = sdo_map.meta["wavelnth"] sdo_rpr.meta["date-obs"] = sdo_map.meta["date-obs"] - if sdo_rpr.dimensions[0].value > config["drms"]["patch_width"]: + if sdo_rpr.dimensions[0].value > int(config["drms"]["patch_width"]): sdo_rpr = pad_map(sdo_rpr, lon, config["drms"]["patch_width"]) save_compressed_map(sdo_rpr, fits_path, hdu_type=CompImageHDU, overwrite=True) @@ -825,6 +849,7 @@ def l4_file_pack(aia_paths, hmi_paths, dir_path, rec, out_table, anim_path): shutil.copy(anim_path, f"{dir_path}/data/{rec}") out_table.write(f"{dir_path}/data/{rec}/{rec}.csv", overwrite=True) + def pad_map(map, long, targ_width): r""" Pads the map data of a submap which is narrower than the specified submap width due to reaching the edge of the image array. @@ -846,12 +871,13 @@ def pad_map(map, long, targ_width): x_dim = map.dimensions[0].value diff = int(targ_width - x_dim) if long > 0: - data = np.pad(map.data, ((0,0),(diff,0)), constant_values=np.nan) + data = np.pad(map.data, ((0, 0), (diff, 0)), constant_values=np.nan) else: - data = np.pad(map.data, ((0,0),(0,diff)), constant_values=np.nan) - new_map = sunpy.map.Map(data,map.meta) + data = np.pad(map.data, ((0, 0), (0, diff)), constant_values=np.nan) + new_map = sunpy.map.Map(data, map.meta) return new_map + def flare_check(start, end, ar_num, table): r""" Checks a provided start time and end time, along with an active region number, and determines if a flare occurs within that period. @@ -876,6 +902,6 @@ def flare_check(start, end, ar_num, table): ar_table = ar_table[Time(ar_table["start_time"]) < end] ar_table = ar_table[Time(ar_table["start_time"]) > start] category = 1 - if (len(ar_table) > 0): + if len(ar_table) > 0: category = 2 - return category \ No newline at end of file + return category From 0d790fda6cd9a5dd81ddd15902dcf342aa817a3b Mon Sep 17 00:00:00 2001 From: DanGass <55432146+DanGass@users.noreply.github.com> Date: Wed, 16 Jul 2025 17:36:34 +0100 Subject: [PATCH 03/14] formatting --- arccnet/data_generation/timeseries/sdo_processing.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py index 96d01718..becf9787 100644 --- a/arccnet/data_generation/timeseries/sdo_processing.py +++ b/arccnet/data_generation/timeseries/sdo_processing.py @@ -136,10 +136,8 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim: flares["category"] = [ f"F{flare_check(row['start_time'], row['end_time'], row['noaa_number'], flares)}" for row in flares ] - flares = join(flares, srs, keys_left="noaa_number", keys_right="number") flares = flares[flares["tb_date"] == flares["srs_date"]] - flares["c_coord"] = [ SkyCoord(lon * u.deg, lat * u.deg, obstime=t_time, observer="earth", frame=frames.HeliographicStonyhurst) for lat, lon, t_time in zip(flares["latitude"], flares["longitude"], flares["target_time"]) @@ -156,7 +154,6 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim: ar_cat.append(cat) srs["category"] = ar_cat srs["ar"] = "N" - srs_exp = srs["number", "ar", "target_time", "srs_end_time", "srs_date", "c_coord", "category"] flares_exp = flares["noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category"] srs_exp.rename_columns( From 55aa12e003fed5c0890f07aa9d2f4160baf7c109 Mon Sep 17 00:00:00 2001 From: DanGass <55432146+DanGass@users.noreply.github.com> Date: Mon, 28 Jul 2025 15:36:15 +0100 Subject: [PATCH 04/14] Worked on feedback - added test functions which need access to data. Passes tests locally. --- .../timeseries/sdo_processing.py | 165 +++++++++++++++--- 1 file changed, 138 insertions(+), 27 deletions(-) diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py index becf9787..a8b9172e 100644 --- a/arccnet/data_generation/timeseries/sdo_processing.py +++ b/arccnet/data_generation/timeseries/sdo_processing.py @@ -33,6 +33,7 @@ reproj_log = logging.getLogger("reproject.common") reproj_log.setLevel("WARNING") os.environ["JSOC_EMAIL"] = "danielgass192@gmail.com" +data_path = config["paths"]["data_folder"] __all__ = [ "read_data", @@ -49,6 +50,30 @@ "l4_file_pack", ] +# def test_rand(): +# combined = read_data( +# hek_path=Path(f"{data_path}/flare_files/hek_swpc_1996-01-01T00:00:00-2023-01-01T00:00:00_dev.parq"), +# srs_path=Path(f"{data_path}/flare_files/srs_processed_catalog.parq"), +# size=1, +# duration=6, +# long_lim=65, +# types=["F1", "F2", "N1", "N2"], +# )[1] + +# types=["F1", "F2", "N1", "N2"] + +# # 1. test with full list of samples +# rand_comb_1 = rand_select(combined, 1, types) +# rand_comb_2 = rand_select(combined, 1, types) +# assert (rand_comb_1 != rand_comb_2) +# # 2. test with partial list of samples +# rand_comb_1 = rand_select(combined, 1, ["F1", "N1"]) + +# # 3. test with higher number of sizes +# rand_comb_1 = rand_select(combined, 3, types) +# rand_comb_2 = rand_select(combined, 3, types) +# assert (rand_comb_1 != rand_comb_2) + def rand_select(table, size, types: list): r""" @@ -109,7 +134,12 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim: - The date of the run, used for reprojection - The coordinate of the noaa active region """ - + # table_name = f"{size}_{duration}_{long_lim}_{types}.parq".strip(' ') + # if os.path.exists(f"{data_path}/flare_files/{table_name}"): + # print("Run table cached - loading") + # combined = Table.read(table_name) + # else: + # print("No run table cached - creating new table") table = Table.read(hek_path) srs = Table.read(srs_path) @@ -133,9 +163,9 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim: flares["tb_date"] = flares["start_time"].value flares["tb_date"] = [date.split("T")[0] for date in flares["tb_date"]] flares["end_time"] = flares["start_time"] + duration * u.hour - flares["category"] = [ - f"F{flare_check(row['start_time'], row['end_time'], row['noaa_number'], flares)}" for row in flares - ] + flare_splits = [flare_check(row["start_time"], row["end_time"], row["noaa_number"], flares) for row in flares] + flares["category"] = [f"F{flare[0]}" for flare in flare_splits] + flares["fl_count"] = [flare[1] for flare in flare_splits] flares = join(flares, srs, keys_left="noaa_number", keys_right="number") flares = flares[flares["tb_date"] == flares["srs_date"]] flares["c_coord"] = [ @@ -147,24 +177,30 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim: for lat, lon, t_time in zip(srs["latitude"], srs["longitude"], srs["target_time"]) ] print("Parsing Active Regions") - ar_cat = [] + ar_cat, fl_cat = [], [] for ar in srs: erl_time = Time(ar["target_time"]) - (duration + 1) * u.hour - cat = f"N{flare_check(erl_time, Time(ar['target_time']) - 1 * u.hour, ar['number'], flares)}" - ar_cat.append(cat) + n_splits = flare_check(erl_time, Time(ar["target_time"]) - 1 * u.hour, ar["number"], flares) + ar_cat.append(f"N{n_splits[0]}") + fl_cat.append(n_splits[1]) srs["category"] = ar_cat + srs["n_fl_count"] = fl_cat srs["ar"] = "N" - srs_exp = srs["number", "ar", "target_time", "srs_end_time", "srs_date", "c_coord", "category"] - flares_exp = flares["noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category"] + + srs_exp = srs["number", "ar", "target_time", "srs_end_time", "srs_date", "c_coord", "category", "n_fl_count"] + flares_exp = flares[ + "noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category", "fl_count" + ] srs_exp.rename_columns( - names=("number", "ar", "target_time", "srs_end_time", "srs_date", "c_coord", "category"), - new_names=("noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category"), + names=("number", "ar", "target_time", "srs_end_time", "srs_date", "c_coord", "category", "n_fl_count"), + new_names=("noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category", "fl_count"), ) combined = vstack([flares_exp, srs_exp]) + # combined.write(f"{data_path}/flare_files/{table_name}", format='parquet') final = rand_select(combined, size, types) subset = final["noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category"] - return subset + return subset, combined def change_time(time: str, shift: int): @@ -216,7 +252,7 @@ def match_files(aia_maps, hmi_maps, table): List of AIA maps. hmi_maps : `list` List of HMI maps. - table : `JSOCResponse` + table : `JSOC Response` AIA pointing table as provided by JSOC. Returns @@ -321,15 +357,15 @@ def hmi_query_export(time_1, time_2, keys: list, sample: int): """ client = drms.Client() duration = round((time_2 - time_1).to_value(u.hour)) - qstr_hmi = f"hmi.M_720s[{time_1.value}/{duration}h@{sample}m]" + "{magnetogram}" - hmi_query = client.query(ds=qstr_hmi, key=keys) + qstr_m_hmi = f"hmi.M_720s[{time_1.value}/{duration}h@{sample}m]" + "{magnetogram}" + hmi_query = client.query(ds=qstr_m_hmi, key=keys) good_result = hmi_query[hmi_query.QUALITY == 0] good_num = good_result["*recnum*"].values bad_result = hmi_query[hmi_query.QUALITY != 0] - qstrs_hmi = [f"hmi.M_720s[{time}]" + "{magnetogram}" for time in bad_result["T_REC"]] - hmi_values = [hmi_rec_find(qstr, keys) for qstr in qstrs_hmi] + qstrs_m_hmi = [f"hmi.M_720s[{time}]" + "{magnetogram}" for time in bad_result["T_REC"]] + hmi_values = [hmi_rec_find(qstr, keys, True) for qstr in qstrs_m_hmi] patched_num = [*hmi_values] joined_num = [*good_num, *patched_num] @@ -339,7 +375,25 @@ def hmi_query_export(time_1, time_2, keys: list, sample: int): hmi_query_full = client.query(ds=hmi_qstr, key=keys) hmi_result = client.export(hmi_qstr, method="url", protocol="fits", email=os.environ["JSOC_EMAIL"]) hmi_result.wait() - return hmi_query_full, hmi_result + ic_query_full, ic_result = hmi_continuum_export(hmi_query_full, keys) + return hmi_query_full, hmi_result, ic_query_full, ic_result + + +def hmi_continuum_export(hmi_query, keys): + client = drms.Client() + qstrs_ic = [f"hmi.Ic_720s[{time}]" + "{image}" for time in hmi_query["T_REC"]] + + ic_value = [hmi_rec_find(qstr, keys, 3, 12) for qstr in qstrs_ic] + unpacked_ic = list(itertools.chain.from_iterable(ic_value)) + joined_num = [str(num) for num in unpacked_ic] + ic_num_str = str(joined_num).strip("[]") + ic_comb_qstr = f"aia.lev1[! FSN in ({ic_num_str}) !]" + "{image_lev1}" + + ic_query_full = client.query(ds=ic_comb_qstr, key=keys) + + ic_result = client.export(ic_comb_qstr, method="url", protocol="fits", email=os.environ["JSOC_EMAIL"]) + ic_result.wait() + return ic_query_full, ic_result def aia_query_export(hmi_query, keys, wavelength): @@ -363,11 +417,11 @@ def aia_query_export(hmi_query, keys, wavelength): qstrs_euv = [f"aia.lev1_euv_12s[{time}][{wavelength}]" + "{image}" for time in hmi_query["T_REC"]] qstrs_uv = [f"aia.lev1_uv_24s[{time}]{[1600, 1700]}" + "{image}" for time in hmi_query["T_REC"]] qstrs_vis = [f"aia.lev1_vis_1h[{time}]{[4500]}" + "{image}" for time in hmi_query["T_REC"]] - euv_value = [aia_rec_find(qstr, keys, 3, 12) for qstr in qstrs_euv] uv_value = [aia_rec_find(qstr, keys, 2, 24) for qstr in qstrs_uv] vis_value = [aia_rec_find(qstr, keys, 1, 60) for qstr in qstrs_vis] unpacked_aia = list(itertools.chain(euv_value, uv_value, vis_value)) + unpacked_aia = list(itertools.chain(euv_value, uv_value)) unpacked_aia = [set for set in unpacked_aia if set is not None] unpacked_aia = list(itertools.chain.from_iterable(unpacked_aia)) joined_num = [str(num) for num in unpacked_aia] @@ -381,7 +435,7 @@ def aia_query_export(hmi_query, keys, wavelength): return aia_query_full, aia_result -def hmi_rec_find(qstr, keys): +def hmi_rec_find(qstr, keys, cont=False): r""" Find the HMI record number for a given query string. @@ -391,18 +445,26 @@ def hmi_rec_find(qstr, keys): A query string. keys : `list` List of keys to query. + cont : `bool` + indicates whether continuum is needed, searches for magnetogram if false. Returns ------- `int` The HMI record number. """ + seg = "{magnetogram}" + series = "hmi.M_720s" + if cont: + seg = "{image}" + series = "hmi.Ic_720s" + client = drms.Client() retries = 0 qry = client.query(ds=qstr, key=keys) time = sunpy.time.parse_time(qry["T_REC"].values[0]) while qry["QUALITY"].values[0] != 0 and retries <= 3: - qry = client.query(ds=f"hmi.M_720s[{time}]" + "{magnetogram}", key=keys) + qry = client.query(ds=f"{series}[{time}]" + seg, key=keys) time = change_time(time, 720) retries += 1 return qry["*recnum*"].values[0] @@ -770,7 +832,7 @@ def map_reproject(sdo_packed): sdo_rpr.meta["quality"] = sdo_map.meta["quality"] sdo_rpr.meta["wavelnth"] = sdo_map.meta["wavelnth"] sdo_rpr.meta["date-obs"] = sdo_map.meta["date-obs"] - if sdo_rpr.dimensions[0].value > int(config["drms"]["patch_width"]): + if sdo_rpr.dimensions[0].value < int(config["drms"]["patch_width"]): sdo_rpr = pad_map(sdo_rpr, lon, config["drms"]["patch_width"]) save_compressed_map(sdo_rpr, fits_path, hdu_type=CompImageHDU, overwrite=True) @@ -847,7 +909,20 @@ def l4_file_pack(aia_paths, hmi_paths, dir_path, rec, out_table, anim_path): out_table.write(f"{dir_path}/data/{rec}/{rec}.csv", overwrite=True) -def pad_map(map, long, targ_width): +# def test_padding(): +# map_width = int(config["drms"]["patch_width"]) +# # 1. test with undersized AR cutout on right side of map, check map size. +# sdo_map = sunpy.map.Map('/Users/danielgass/Library/Application Support/ARCCnet/arccnet/03_processed/2013/8/20/SDO/AIA/11818_03_171.aia.lev1.2013-08-20T075936Z.73314087.image_lev1.fits') +# assert(int(pad_map(sdo_map, map_width).dimensions[0].value) == map_width) +# # 2. test with undersized AR cutout on left side of map, check map size. +# sdo_map = sunpy.map.Map('/Users/danielgass/Library/Application Support/ARCCnet/arccnet/03_processed/2013/1/6/SDO/AIA/11653_03_1700.aia.lev1.2013-01-06T234743Z.60394065.image_lev1.fits') +# assert(int(pad_map(sdo_map, int(map_width)).dimensions[0].value) == map_width) +# # 3. test with different targ width, to check that it works with arbitrary widths. +# sdo_map = sunpy.map.Map('/Users/danielgass/Library/Application Support/ARCCnet/arccnet/03_processed/2013/1/6/SDO/AIA/11653_03_1700.aia.lev1.2013-01-06T234743Z.60394065.image_lev1.fits') +# assert(int(pad_map(sdo_map, 900).dimensions[0].value) == 900) + + +def pad_map(map, targ_width): r""" Pads the map data of a submap which is narrower than the specified submap width due to reaching the edge of the image array. @@ -855,8 +930,6 @@ def pad_map(map, long, targ_width): ---------- map : `sunpy.map` The sunpy submap to be resized. - long : `float` or `int` - The longitude of the active region, used only to check which side of the disk it is located. targ_width : `int` The target width of the submaps within the data run. @@ -867,7 +940,7 @@ def pad_map(map, long, targ_width): """ x_dim = map.dimensions[0].value diff = int(targ_width - x_dim) - if long > 0: + if map.center.Tx > 0: data = np.pad(map.data, ((0, 0), (diff, 0)), constant_values=np.nan) else: data = np.pad(map.data, ((0, 0), (0, diff)), constant_values=np.nan) @@ -875,6 +948,41 @@ def pad_map(map, long, targ_width): return new_map +# def test_flare_check(): +# combined = read_data( +# hek_path=Path(f"{data_path}/flare_files/hek_swpc_1996-01-01T00:00:00-2023-01-01T00:00:00_dev.parq"), +# srs_path=Path(f"{data_path}/flare_files/srs_processed_catalog.parq"), +# size=1, +# duration=6, +# long_lim=65, +# types=["F1", "F2", "N1", "N2"], +# )[1] +# # 1. test a flare run known to contain flares +# flares = combined[combined['goes_class'] != 'N'] +# flare = flares[flares['goes_class'] == "C3.7"] +# flare = flare[flare["noaa_number"] == 12644] +# assert(flare_check(flare['start_time'], flare['end_time'], flare['noaa_number'], flares)[0]) == 2 +# # 2. test a non flare run known to contain flares +# ar = combined[combined['goes_class'] == "N"] +# ar = ar[ar["noaa_number"] == 12038] +# ar = ar[ar['tb_date'] == '2014-04-23'] +# erl_time = Time(ar["start_time"]) - (6 + 1) * u.hour +# assert(flare_check(erl_time, Time(ar["start_time"]) - 1 * u.hour, ar["noaa_number"], flares)[0]) == 2 +# # 3. test a flare run without flares +# flares = combined[combined['goes_class'] != 'N'] +# flare = combined[combined['goes_class'] == "M1.6"] +# flare = flare[flare["noaa_number"] == 12192] +# print(flare) +# assert(flare_check(flare['start_time'], flare['end_time'], flare['noaa_number'], flares)[0]) == 1 +# # 4. test a non flare run without flares +# ar = combined[combined['goes_class'] == "N"] +# ar = ar[ar["noaa_number"] == 12524] +# ar = ar[ar['tb_date'] == '2016-03-20'] +# erl_time = Time(ar["start_time"]) - (6 + 1) * u.hour +# assert(flare_check(erl_time, Time(ar["start_time"]) - 1 * u.hour, ar["noaa_number"], flares)[0]) == 1 +# assert(flare_check(flare['start_time'], flare['end_time'], flare['noaa_number'], combined)[0]) == 1 + + def flare_check(start, end, ar_num, table): r""" Checks a provided start time and end time, along with an active region number, and determines if a flare occurs within that period. @@ -899,6 +1007,9 @@ def flare_check(start, end, ar_num, table): ar_table = ar_table[Time(ar_table["start_time"]) < end] ar_table = ar_table[Time(ar_table["start_time"]) > start] category = 1 + flares = {"X": 0, "M": 0, "C": 0} if len(ar_table) > 0: category = 2 - return category + for cat in flares: + flares[cat] = int(len(ar_table[[flare.startswith(cat) for flare in ar_table["goes_class"]]])) + return category, flares From 595ad1a6c16d71ec1ae78d6a0f9500b7ba709ddd Mon Sep 17 00:00:00 2001 From: DanGass <55432146+DanGass@users.noreply.github.com> Date: Thu, 7 Aug 2025 16:57:34 +0100 Subject: [PATCH 05/14] Refactored tests into separate file within testing folder, created test data. --- .../tests/data/ts_test_data.parq | Bin 0 -> 6617 bytes .../tests/test_sdo_processing.py | 78 +++++++++ .../timeseries/sdo_processing.py | 165 ++++++------------ 3 files changed, 136 insertions(+), 107 deletions(-) create mode 100644 arccnet/data_generation/tests/data/ts_test_data.parq create mode 100644 arccnet/data_generation/tests/test_sdo_processing.py diff --git a/arccnet/data_generation/tests/data/ts_test_data.parq b/arccnet/data_generation/tests/data/ts_test_data.parq new file mode 100644 index 0000000000000000000000000000000000000000..0ca12c92e81fbd001c0192d2a6e3facfe9e77e34 GIT binary patch literal 6617 zcmdT}du&_P8NWBZPMy>a$;dUf2*WMZ%}Siyd;N$Vb#t$sI&tba@AyHt)Nebz*LGq% zFH{K7L5Em{7$B$$Obkt|>nem$w=t+Y+6}Z-Py&Lkqo^y0F%4DxF%Ux?>^sMH{7A}5 zH)-2#&bjw|=X-y@^PMOi@Iwdg;iyy)`3gavgYZwr@ z5f*6}cLCvk21Fi!#X%S!0)b|MfE3;i;~pS1b3kBVu>}S)yF*u}-XiJ<{LB>IG(mgl z9c?f*(zSGLi&9$LQKDGzqaa#Wp~6<%p&FPyWUYLwT6&ym?Dn_xKGEiUcgtY2tGH2JJLyOaJC*K84AcYwp~xc+a;G1MsU?6kxIFC24-D5 zM@nV=hISn(rEDxMCCrjW!XP&h`Z|W{XXTBAv91wzK#HsD&3_o>t*yHDPOO=0*F$_x zdax9qk!U4dN?>hgz^3Mbd;k);dM|%Z-TFPLLDTxZ_q|bb=)32vbEI8mmEN=`@A@$b zePE3f@`MU~t_DdjZhYR#!oVcZ7eP8g^R--ode#!q4V9L7bcLgx$eo$wie z@f<8=N1;lnuv=mHVW6r%Univ&b1)I7V09h_0+S+FH%N@5tq9u60C^g%?nbMl=O2} z1xh&vWF-LH*gqQnw&%yDTrRb=Y4_5mpIJ`b zp*?+v_Td~^+j{zr*K*F!o*ek>$>Z{^vJ)1f2~_zloQ@=^-{&Nq!tPgc^+$HDJF@eS zmT&##2k^6Gc~)}s(~_G{=429O+gZzAr0eu^mYriH+5Mp9owMk9J(p^EX{zO=U*@Wz zgp&g?owF=|<%(&lqykel(hl4?Jou+W$M)aCJa)st!*`Eg`=Yt?z#5z&K-!Tqq_u!F zE&(Ye22oinkcP@$0X4)?!kT0!g@qOai2fXcKnZb3){p2_m_u|7I$$TNQHM4W?1;U9 zziQM0X;shx1{ZIulQP7A4|eyhp+5lFaTsIa5lG-?38VK)I!Sl5+6&{ZA-ZTU~=7&H!b6U zJLzkd56H)RFC5N&{snqo&7DBzTaH^+!Y_iKGXTG$j|hG%sjI*b8eQ-Dr=Iqtuez)8 z&I7lP-;$JmdKd7)`8-ueU!;nS?gPqH4`hyESuLZ}GRAtkL1xhESXG0}q-FG~2BMQ| zA*5z4bKPbc%w`pS8QL2NO2^O*jVz;OI8}p`)p9D6jODakQ;U+*G6pSUl5eIsEjdc) zKq>HX*EL*cmw^?7s)f>N$#MUeeICpW;|@GiYj{I<-x=k`1b-2yWv;1S=nRZ7Pgz6`P;8Xz9;taGXqZyKCteKHvs~w0|r7_n-0dpqMvRA!p0iq zx5NE>C8kXE{5ADL+olqE>Dz& z6QL@;auRSMGix(}iI6`NF4{meX%Gm}w`u z#oLr--x`hdtNeYeP*rVeu?Ixb(%A4Ap-E=@-FBVHY*}n-5`ezzE3G$ zq%`kcrTI`+Fj#r1UE->{JcH$hW{dG1l4$nOn#0f^{?qZ5o6AS{38h$~H0M>CGYT-b zvS(hRlYV)0ML-lWivxRNonon0^RilV=vst^OErKMniU__R)C|Zd>Q;$`(aY?EU7uU zLG$@~q`i2!S898@1U1Ep4uwY}RUEv5Q0-7&mGK9J@Pru7_$TK5qM-cAS~ze1%zP@` zRj*awlJJY+E_K=QZ&NSttK*4`QNO#yR_Y1mTJu%T$ zuZE}Setx9}Uff<`TkQv%$<%y1?yJG%bhv#KIxLn_t0T#2(Vyv3N8*`uffP?nq_csD z2-HoRHx-D`kHM2=RjBUmBg@{{urtMi7kxoZeD|_EcO~*%Q9hZ?tm0K_Ivk9rI9$mDD=lLNR=-JNmdfpo4?K~glZBb0K zS}XQv5pmn{z5{*Z!2=&?K&q@s^f`bgtlG*TQ;4yk3bXbj{Z`D+c-?(z-)Pbp=o!xr zOr|Eio?*eqndigUGuy5zc^lHyKZ@l0(NrzZcm?w=q=R>LgQy8>!AA40GDzNz?Dr1G zyyBbyHq$=0-Z&tJW_%u3#LLaaf;yx-%6AvzMMBOfEb~gHhG4&EI2P)$GhvTa7>Fh_ zj^0ex>t=<4xHTK#>@g2#U<2+xAsC;H`hX97G6Z|ZjE){58|wBlk)aBiM1UJ#@NllQ z*W+4n+N>ysSg2Q+h1^7gy5V`h+mNV|SpfUsi|FVxc)LHYAl#|AM3MIr0cRr=?Jczy2BOfa!4?I^EN+`16z?Mns3kl7}(La1k( z(P7&b3vg^K$U`mc=17nmhdkTUeosp9+0ByyZVqZ_Sn#kWqb<%y9X6{F5Qih79s%WI zyTgWPeAL4NZ;yQ;$eJTif56Z1d_ZS4I1~B&dI8VA5yKd&Q(h0q_4sCjA{z^JLrz73 z3CFAyS`L& zIA0J`2`e6E*?(03fjA!n>_JY=v*8g#{#=b1Mtz>Xl-E6%3X9{rhB$LNpwFdIf8=>vKWg(MxGhGZ&35B{DF8hvJ3BfM`_LHHpI41Pu`pKE zI~@0Ls3(u{9f-eUvS_ax+ru|UzJKQ}B86q=8ROT1x4d8z+l%sPbpIemehzOTC-e6m zUh+6e)jbh!^#wo@#p~d`g9Uv<{n+Wem6HG6Nb4%~gnVu<*thY0qXm823h`i9@>eT1 zsD~MPT=kuhHv=h)qWc}*_^}o4M_3fL^ ztR7fm_+uBEF@K)!M6ZBojscFc&`aVzkAnx(AP@e{B7KEt48OlgDAg(d^i(#S(FRi~ mt$%ttIjf!tPov*0)g03f|5W$FA1YS;AwF${Kjg!I>iiS3+1Wq< literal 0 HcmV?d00001 diff --git a/arccnet/data_generation/tests/test_sdo_processing.py b/arccnet/data_generation/tests/test_sdo_processing.py new file mode 100644 index 00000000..3478e01d --- /dev/null +++ b/arccnet/data_generation/tests/test_sdo_processing.py @@ -0,0 +1,78 @@ +from pathlib import Path + +import sunpy.data.sample +import sunpy.map + +import astropy.units as u +from astropy.coordinates import SkyCoord +from astropy.table import Table +from astropy.time import Time + +from arccnet import config +from arccnet.data_generation.timeseries.sdo_processing import crop_map, pad_map, rand_select + +data_path = config["paths"]["data_folder"] +test_path = Path().resolve().parent + + +def test_rand(): + combined = Table.read(f"{test_path}/tests/data/ts_test_data.parq") + types = ["F1", "F2", "N1", "N2"] + + # 1. test with full list of samples + rand_comb_1 = rand_select(combined, 3, types) + rand_comb_2 = rand_select(combined, 3, types) + assert list(rand_comb_1) != list(rand_comb_2) + # 2. test with partial list of samples + rand_comb_1 = rand_select(combined, 3, ["F1", "N1"]) + rand_comb_2 = rand_select(combined, 3, ["F1", "N1"]) + assert list(rand_comb_1) != list(rand_comb_2) + # 3. test with higher number of sizes + rand_comb_1 = rand_select(combined, 6, types) + rand_comb_2 = rand_select(combined, 6, types) + assert list(rand_comb_1) != list(rand_comb_2) + + +def test_padding(): + map_width = int(config["drms"]["patch_width"]) + map_height = int(config["drms"]["patch_height"]) + aia_map = sunpy.map.Map(sunpy.data.sample.AIA_171_IMAGE) + # 1. test with undersized AR cutout on right side of map, check map size. + center = [-800, 0] * u.arcsec + center = SkyCoord(*center, frame=aia_map.coordinate_frame) + aia_smap = crop_map(aia_map, center, map_height * u.pix, (map_width - 100) * u.pix, Time(aia_map.date)) + assert int(pad_map(aia_smap, map_width).dimensions[0].value) == map_width + # 2. test with undersized AR cutout on left side of map, check map size. + center = [800, 0] * u.arcsec + center = SkyCoord(*center, frame=aia_map.coordinate_frame) + aia_smap = crop_map(aia_map, center, map_height * u.pix, (map_width - 100) * u.pix, Time(aia_map.date)) + assert int(pad_map(aia_smap, map_width).dimensions[0].value) == map_width + # 3. test with different targ width, to check that it works with arbitrary widths. + assert int(pad_map(aia_smap, 900).dimensions[0].value) == 900 + + +# This test won't work correctly unless we use the full flare catalogue from HEK, as we don't want to add large files to repo, skipping for now. +# def test_flare_check(): +# combined = Table.read(f"{test_path}/tests/data/ts_test_data.parq") +# flares = combined[combined['goes_class'] != 'N'] +# flare = flares[flares['goes_class'] == "C3.7"] +# flare = flare[flare["noaa_number"] == 12644] +# assert(flare_check(flare['start_time'], flare['end_time'], flare['noaa_number'], flares)[0]) == 2 +# # 2. test a non flare run known to contain flares +# ar = combined[combined['goes_class'] == "N"] +# ar = ar[ar["noaa_number"] == 12038] +# ar = ar[ar['tb_date'] == '2014-04-23'] +# erl_time = Time(ar["start_time"]) - (6 + 1) * u.hour +# assert(flare_check(erl_time, Time(ar["start_time"]) - 1 * u.hour, ar["noaa_number"], flares)[0]) == 2 +# # 3. test a flare run without flares +# flares = combined[combined['goes_class'] != 'N'] +# flare = combined[combined['goes_class'] == "M1.6"] +# flare = flare[flare["noaa_number"] == 12192] +# print(flare) +# assert(flare_check(flare['start_time'], flare['end_time'], flare['noaa_number'], flares)[0]) == 1 +# # 4. test a non flare run without flares +# ar = combined[combined['goes_class'] == "N"] +# ar = ar[ar["noaa_number"] == 12524] +# ar = ar[ar['tb_date'] == '2016-03-20'] +# erl_time = Time(ar["start_time"]) - (6 + 1) * u.hour +# assert(flare_check(erl_time, Time(ar["start_time"]) - 1 * u.hour, ar["noaa_number"], flares)[0]) == 1 diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py index a8b9172e..481d76f3 100644 --- a/arccnet/data_generation/timeseries/sdo_processing.py +++ b/arccnet/data_generation/timeseries/sdo_processing.py @@ -50,30 +50,6 @@ "l4_file_pack", ] -# def test_rand(): -# combined = read_data( -# hek_path=Path(f"{data_path}/flare_files/hek_swpc_1996-01-01T00:00:00-2023-01-01T00:00:00_dev.parq"), -# srs_path=Path(f"{data_path}/flare_files/srs_processed_catalog.parq"), -# size=1, -# duration=6, -# long_lim=65, -# types=["F1", "F2", "N1", "N2"], -# )[1] - -# types=["F1", "F2", "N1", "N2"] - -# # 1. test with full list of samples -# rand_comb_1 = rand_select(combined, 1, types) -# rand_comb_2 = rand_select(combined, 1, types) -# assert (rand_comb_1 != rand_comb_2) -# # 2. test with partial list of samples -# rand_comb_1 = rand_select(combined, 1, ["F1", "N1"]) - -# # 3. test with higher number of sizes -# rand_comb_1 = rand_select(combined, 3, types) -# rand_comb_2 = rand_select(combined, 3, types) -# assert (rand_comb_1 != rand_comb_2) - def rand_select(table, size, types: list): r""" @@ -321,23 +297,28 @@ def drms_pipeline( aia_maps, hmi_maps : `tuple` A tuple containing the AIA maps and HMI maps. """ - # print(start_t) - hmi_query, hmi_export = hmi_query_export(start_t, end_t, hmi_keys, sample) + + hmi_query, hmi_export, ic_query, ic_export = hmi_query_export(start_t, end_t, hmi_keys, sample) aia_query, aia_export = aia_query_export(hmi_query, aia_keys, wavelengths) + # ic_query.rename(columns={"*recnum*":"FSN"}, inplace=True) + # img_query = pd.concat([aia_query, ic_query],ignore_index=True) + # img_urls = pd.concat([aia_export.urls, ic_export.urls], ignore_index=True) hmi_dls, hmi_exs = l1_file_save(hmi_export, hmi_query, path) + cnt_dls, cnt_exs = l1_file_save(ic_query, ic_export, path) aia_dls, aia_exs = l1_file_save(aia_export, aia_query, path) + img_exs = list(itertools.chain(aia_exs, cnt_exs)) hmi_maps = sunpy.map.Map(hmi_exs) hmi_maps = add_fnames(hmi_maps, hmi_exs) - aia_maps = sunpy.map.Map(aia_exs) - aia_maps = add_fnames(aia_maps, aia_exs) - return aia_maps, hmi_maps + img_maps = sunpy.map.Map(img_exs) + img_maps = add_fnames(img_maps, aia_exs) + return img_maps, hmi_maps def hmi_query_export(time_1, time_2, keys: list, sample: int): r""" - Query and export HMI data from the JSOC database. + Query and export HMI magnetogram data from the JSOC database. Parameters ---------- @@ -352,8 +333,8 @@ def hmi_query_export(time_1, time_2, keys: list, sample: int): Returns ------- - hmi_query_full, hmi_result : `tuple` - A tuple containing the query result (pandas df) and the export data response (drms export object). + hmi_query_full, hmi_result, ic_query_full, ic_export : `tuple` + A tuple containing the query results of the hmi mag and ic_no_limbdark (pandas df) and the export data response (drms export object). """ client = drms.Client() duration = round((time_2 - time_1).to_value(u.hour)) @@ -365,12 +346,13 @@ def hmi_query_export(time_1, time_2, keys: list, sample: int): bad_result = hmi_query[hmi_query.QUALITY != 0] qstrs_m_hmi = [f"hmi.M_720s[{time}]" + "{magnetogram}" for time in bad_result["T_REC"]] - hmi_values = [hmi_rec_find(qstr, keys, True) for qstr in qstrs_m_hmi] + hmi_values = [hmi_rec_find(qstr, keys, 3, 720) for qstr in qstrs_m_hmi] patched_num = [*hmi_values] joined_num = [*good_num, *patched_num] joined_num = [str(num) for num in joined_num] hmi_num_str = str(joined_num).strip("[]") + hmi_qstr = f"hmi.M_720s[! recnum in ({hmi_num_str}) !]" + "{magnetogram}" hmi_query_full = client.query(ds=hmi_qstr, key=keys) hmi_result = client.export(hmi_qstr, method="url", protocol="fits", email=os.environ["JSOC_EMAIL"]) @@ -380,14 +362,29 @@ def hmi_query_export(time_1, time_2, keys: list, sample: int): def hmi_continuum_export(hmi_query, keys): + r""" + Query and export HMI continuum data from the JSOC database. + + Parameters + ---------- + hmi_query : `drms query` + HMI query result containing target times. + keys : `list` + A list of keys to query. + + Returns + ------- + hmi_query_full, hmi_result, ic_query_full, ic_export : `tuple` + A tuple containing the query results of the hmi mag and ic_no_limbdark (pandas df) and the export data response (drms export object). + """ client = drms.Client() - qstrs_ic = [f"hmi.Ic_720s[{time}]" + "{image}" for time in hmi_query["T_REC"]] + qstrs_ic = [f"hmi.Ic_noLimbDark_720s[{time}]" + "{image}" for time in hmi_query["T_REC"]] ic_value = [hmi_rec_find(qstr, keys, 3, 12) for qstr in qstrs_ic] unpacked_ic = list(itertools.chain.from_iterable(ic_value)) joined_num = [str(num) for num in unpacked_ic] ic_num_str = str(joined_num).strip("[]") - ic_comb_qstr = f"aia.lev1[! FSN in ({ic_num_str}) !]" + "{image_lev1}" + ic_comb_qstr = f"hmi.Ic_noLimbDark_720s[! recnum in ({ic_num_str}) !]" + "{image}" ic_query_full = client.query(ds=ic_comb_qstr, key=keys) @@ -403,7 +400,7 @@ def aia_query_export(hmi_query, keys, wavelength): Parameters ---------- hmi_query : `drms query` - The HMI query result. + The HMI query result containing target times. keys : `list` List of keys to query. wavelength : `list` @@ -416,11 +413,11 @@ def aia_query_export(hmi_query, keys, wavelength): client = drms.Client() qstrs_euv = [f"aia.lev1_euv_12s[{time}][{wavelength}]" + "{image}" for time in hmi_query["T_REC"]] qstrs_uv = [f"aia.lev1_uv_24s[{time}]{[1600, 1700]}" + "{image}" for time in hmi_query["T_REC"]] - qstrs_vis = [f"aia.lev1_vis_1h[{time}]{[4500]}" + "{image}" for time in hmi_query["T_REC"]] + # qstrs_vis = [f"aia.lev1_vis_1h[{time}]{[4500]}" + "{image}" for time in hmi_query["T_REC"]] euv_value = [aia_rec_find(qstr, keys, 3, 12) for qstr in qstrs_euv] uv_value = [aia_rec_find(qstr, keys, 2, 24) for qstr in qstrs_uv] - vis_value = [aia_rec_find(qstr, keys, 1, 60) for qstr in qstrs_vis] - unpacked_aia = list(itertools.chain(euv_value, uv_value, vis_value)) + # vis_value = [aia_rec_find(qstr, keys, 1, 60) for qstr in qstrs_vis] + # unpacked_aia = list(itertools.chain(euv_value, uv_value, vis_value)) unpacked_aia = list(itertools.chain(euv_value, uv_value)) unpacked_aia = [set for set in unpacked_aia if set is not None] unpacked_aia = list(itertools.chain.from_iterable(unpacked_aia)) @@ -435,7 +432,7 @@ def aia_query_export(hmi_query, keys, wavelength): return aia_query_full, aia_result -def hmi_rec_find(qstr, keys, cont=False): +def hmi_rec_find(qstr, keys, retries, sample, cont=False): r""" Find the HMI record number for a given query string. @@ -445,9 +442,9 @@ def hmi_rec_find(qstr, keys, cont=False): A query string. keys : `list` List of keys to query. + resample: `int` cont : `bool` indicates whether continuum is needed, searches for magnetogram if false. - Returns ------- `int` @@ -456,17 +453,16 @@ def hmi_rec_find(qstr, keys, cont=False): seg = "{magnetogram}" series = "hmi.M_720s" if cont: - seg = "{image}" - series = "hmi.Ic_720s" - + seg = "{continuum}" + series = "hmi.Ic_noLimbDark_720s" client = drms.Client() - retries = 0 + count = 0 qry = client.query(ds=qstr, key=keys) time = sunpy.time.parse_time(qry["T_REC"].values[0]) - while qry["QUALITY"].values[0] != 0 and retries <= 3: + while qry["QUALITY"].values[0] != 0 and count <= retries: qry = client.query(ds=f"{series}[{time}]" + seg, key=keys) - time = change_time(time, 720) - retries += 1 + time = change_time(time, sample) + count += 1 return qry["*recnum*"].values[0] @@ -677,17 +673,20 @@ def aia_l2(packed_maps): Path to the processed AIA map. """ path = config["paths"]["data_folder"] - aia_map, hmi_match, table = packed_maps - time = aia_map.date.to_value("ymdhms") - year, month, day = time[0], time[1], time[2] - map_path = f"{path}/02_intermediate/{year}/{month}/{day}/SDO/{aia_map.nickname}" - proc_path = f"{map_path}/02_{aia_map.meta['fname']}" - if not os.path.exists(proc_path): - aia_map = aia_process(aia_map, table) - aia_map = aia_reproject(aia_map, hmi_match) - proc_path = l2_file_save(aia_map, path) - # This updates process status for tqdm more effectively. - sys.stdout.flush() + sdo_map, hmi_match, table = packed_maps + if sdo_map.nickname == "HMI": + proc_path = hmi_l2(sdo_map) + else: + time = sdo_map.date.to_value("ymdhms") + year, month, day = time[0], time[1], time[2] + map_path = f"{path}/02_intermediate/{year}/{month}/{day}/SDO/{sdo_map.nickname}" + proc_path = f"{map_path}/02_{sdo_map.meta['fname']}" + if not os.path.exists(proc_path): + sdo_map = aia_process(sdo_map, table) + sdo_map = aia_reproject(sdo_map, hmi_match) + proc_path = l2_file_save(sdo_map, path) + # This updates process status for tqdm more effectively. + sys.stdout.flush() return proc_path @@ -909,19 +908,6 @@ def l4_file_pack(aia_paths, hmi_paths, dir_path, rec, out_table, anim_path): out_table.write(f"{dir_path}/data/{rec}/{rec}.csv", overwrite=True) -# def test_padding(): -# map_width = int(config["drms"]["patch_width"]) -# # 1. test with undersized AR cutout on right side of map, check map size. -# sdo_map = sunpy.map.Map('/Users/danielgass/Library/Application Support/ARCCnet/arccnet/03_processed/2013/8/20/SDO/AIA/11818_03_171.aia.lev1.2013-08-20T075936Z.73314087.image_lev1.fits') -# assert(int(pad_map(sdo_map, map_width).dimensions[0].value) == map_width) -# # 2. test with undersized AR cutout on left side of map, check map size. -# sdo_map = sunpy.map.Map('/Users/danielgass/Library/Application Support/ARCCnet/arccnet/03_processed/2013/1/6/SDO/AIA/11653_03_1700.aia.lev1.2013-01-06T234743Z.60394065.image_lev1.fits') -# assert(int(pad_map(sdo_map, int(map_width)).dimensions[0].value) == map_width) -# # 3. test with different targ width, to check that it works with arbitrary widths. -# sdo_map = sunpy.map.Map('/Users/danielgass/Library/Application Support/ARCCnet/arccnet/03_processed/2013/1/6/SDO/AIA/11653_03_1700.aia.lev1.2013-01-06T234743Z.60394065.image_lev1.fits') -# assert(int(pad_map(sdo_map, 900).dimensions[0].value) == 900) - - def pad_map(map, targ_width): r""" Pads the map data of a submap which is narrower than the specified submap width due to reaching the edge of the image array. @@ -948,41 +934,6 @@ def pad_map(map, targ_width): return new_map -# def test_flare_check(): -# combined = read_data( -# hek_path=Path(f"{data_path}/flare_files/hek_swpc_1996-01-01T00:00:00-2023-01-01T00:00:00_dev.parq"), -# srs_path=Path(f"{data_path}/flare_files/srs_processed_catalog.parq"), -# size=1, -# duration=6, -# long_lim=65, -# types=["F1", "F2", "N1", "N2"], -# )[1] -# # 1. test a flare run known to contain flares -# flares = combined[combined['goes_class'] != 'N'] -# flare = flares[flares['goes_class'] == "C3.7"] -# flare = flare[flare["noaa_number"] == 12644] -# assert(flare_check(flare['start_time'], flare['end_time'], flare['noaa_number'], flares)[0]) == 2 -# # 2. test a non flare run known to contain flares -# ar = combined[combined['goes_class'] == "N"] -# ar = ar[ar["noaa_number"] == 12038] -# ar = ar[ar['tb_date'] == '2014-04-23'] -# erl_time = Time(ar["start_time"]) - (6 + 1) * u.hour -# assert(flare_check(erl_time, Time(ar["start_time"]) - 1 * u.hour, ar["noaa_number"], flares)[0]) == 2 -# # 3. test a flare run without flares -# flares = combined[combined['goes_class'] != 'N'] -# flare = combined[combined['goes_class'] == "M1.6"] -# flare = flare[flare["noaa_number"] == 12192] -# print(flare) -# assert(flare_check(flare['start_time'], flare['end_time'], flare['noaa_number'], flares)[0]) == 1 -# # 4. test a non flare run without flares -# ar = combined[combined['goes_class'] == "N"] -# ar = ar[ar["noaa_number"] == 12524] -# ar = ar[ar['tb_date'] == '2016-03-20'] -# erl_time = Time(ar["start_time"]) - (6 + 1) * u.hour -# assert(flare_check(erl_time, Time(ar["start_time"]) - 1 * u.hour, ar["noaa_number"], flares)[0]) == 1 -# assert(flare_check(flare['start_time'], flare['end_time'], flare['noaa_number'], combined)[0]) == 1 - - def flare_check(start, end, ar_num, table): r""" Checks a provided start time and end time, along with an active region number, and determines if a flare occurs within that period. From 91653c72a6b40e197043389b873f92173eec8754 Mon Sep 17 00:00:00 2001 From: DanGass <55432146+DanGass@users.noreply.github.com> Date: Fri, 8 Aug 2025 12:53:57 +0100 Subject: [PATCH 06/14] Added remote data flag to test padding, and updated test data so test_flare_check can work properly --- .../tests/data/ts_test_data.parq | Bin 6617 -> 6660 bytes .../tests/test_sdo_processing.py | 53 +++++++++--------- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/arccnet/data_generation/tests/data/ts_test_data.parq b/arccnet/data_generation/tests/data/ts_test_data.parq index 0ca12c92e81fbd001c0192d2a6e3facfe9e77e34..c74c2b230d5a0ba4bf8fcafb420108e287cad941 100644 GIT binary patch delta 2672 zcmb7GeQX@X6@PQLb2sbTz1%@|bC+CdJA}=V++KF~&i0u`?8S*(o0?#d9p&Qy=SwA3 zR7$0mg0Sz3(x^mMs`?-bRse@0i-r~)np%;dUYnY@q9Hg=NNiAJtC3I$X_P-K)H-SD zo3))HTT#{3{pRD%dv9jm`_0UarS_)tjcXq^6+!8u%9N);X;R=0fME}Nc@)_Wls@>ujBjv+oke25yNA!Z?noz)2L?kAMFBW05BN4tW_fsK0E&bJ`n$HoRoGp{qZ;Sam;(@c71q_mm|6{j&Nc{GtTU4pgh`vsWHnantYjr+ z%uJdx6GN;bSw%xEolLv36lIVWnec!q#~~%hS>8+cac@r|92Oa z5Er=nh=6k$IAz(h9xQW2kO9iQS=a^P?-azY9_arFjtASN>wLZaRO#_HU_5wQCp93)qf--RwAUTw2q({DG!jn-?A2z2w5vrIG!2)gS!)`R*$&3%b0@O*|~=>BE8~phTFk zslO2>oQV~!BP&`v?8Y9twTHfGFJDFo`_Mj0N!_r0yd}`v5_sSKQEceR*w6)=(CWlU z-sygD>h%Yw2AyCpDVK(vzIO>p3^}1a1L~fEZu{yrLZs8SmBQ*T4hv&-%-+~cNPOJ> z<)zGlOPPy~M6>aa?XJoCS10Q~v0F%~)cm4-nUsbOylc0NkR$A6dyreAFj zS)5hFROv%jZE^R1*~8gb;34MWy+tdY)A_98d7VQmO!p%QWR_6K@4(hPTA^$iVwzOQ zh5{0>Lk@w1!VfqfhPbUuEZ!S+8R^i+g&Ag{T9yxRau?8uA+yK?D<}gLrQhI|oMkX3 zG6Vh(a=AOdMXur#R3*9M0&i)4bLWcAqpJcuU_D-CKL)JQ4cMqln1*4e4T&q$!mq24S<`;jX4x<^?<(wd^6Mz4H)-?#RR+Yz?08NYn< zxWmITM~qTriT1+uk)wEdg$AJjjS~3prmDA#_~EC<{l@M;Ec#v`&0}{}RIUrNWcy0U zl%Tm~n5J%|bfdCF2}+f^RTT}gjBYKBN!7YxMq{j6&tpaF<(On~NmB%?3f(lKF$t@* z6$HqjQI&3l+XPD|L8yqvAd}31gucz_N)imW^MP-N-+&ZpSK1chd}-YZzkve6y#0RCsGG<33*q)6k26*O5bdt!3`Gm876PlPL~!=6IT-aI(eHd4&{hPUY}%JoO}L!?y(=VvEmpzj>qGG zSgPgJ4!?R`&|dRu?DlVT@vIm}gPY<~HK!i-suw-luuuDk5X0rW?_n`f5@lCO)ECtt zSL`cU+GHvo)P2XVc2li8pq=-lcU++RQ2fv5AeunRf#LxyLRhW2XV!7KoKyEv^>aZx z6w+==MIt1MkpMsJs{^I0a_akj^*Gh~742`~xf9O0B3>$4?piHrvfQN}r0S%g^)J>A zE<(%s=jAr1=ypq$rN!%VYF|J->C-yPwf!+3tJ|e|o)h(ueT&O?aXH9p07?x2G*Bji(0OM?xQa>7M3DVGccm7Qm=R}%pzw-L(HZcw zMT?(q#{MJ}9Es*x%;z%LDNL?l_iNs%4T0J%p}Lck(xuaGd+6cO>z^(=zPQ}K`i_U& zzyri-n?w8@*NKW9r#G-h0G^_UqZsOxAfPy`G0Cv35!RrLgbmE{_s7B!%#ta($fAji z7sZ4migFscUl+V6&UtfSf?)<%d$|!8g26~QVY@cMQP^3Xm>$|$90hU84_(6=OscGN zK$*deP|;eonyYMSOeZT_Uq9yPcxkZu1zaOnvv;>Z4_JFQBes{+L4;_I2OV^D^IdzY zt8&mlX;{Gmkoh1vkd+`b1FwOs0eKhX1CWc5fW%&*9-3K@-JtSakRY1W$si3NG~;_X zmNZ*XpkB~UfKVby7MDgdsc8;1EdqEUmjm@>s&-MsAJFZ^Ze}uRK1i`+pf>Dh{KQ_x zCXQvB%X@%Xu*T4>ae4jU(S~2p58Qf+ji&% z5o<0H``}b)cEq|dnL7t#w#$U0!4H~yKE{|oZuJcSw%0twV7Avf%iuddvrZ(}oJ_7c zYBCdQ(V*2>7`L=A?me@2h1$15ebd6uVBd-eW@r*w-V75|ceY?&2hcKS{hWY&f{l2mq z0|yoa4t!)?B~4x=_W;3*^k`o>BmF~1ciR$e(;d^_e0gTszKMzUk|I4ihtVTYj2dQ9 znr7k}cF;^+Ll4?xh#yQnu?ID0DW2O+(>6pCI=F!vMi7PKHdCNG8c7vuTu2AC0%=Av z1)v)e1$fh(;~ro&^gnUsrc`;oL_7^tF>I`8GPVnP4npkK>zcCe|f|UgxxjVl$X98E)qvmP6 zx%TXK>+ehSJ7V?F`2+jStCYEVgU+$Pe$&qu7fDah5SlB(K$RQZL$#G#cVwDwS>;{X zzI^7A`RwnjE}yDiPP|Bp_t0sQbOCH=x~^&wRg1es4_Isa(L|3|*Hq1rJ*=)8a+22#)$j%UhN{I>Eh&sAhKe6V5%3EAj4_@u zH9SlclLI88;*-7}6k)Qy&eACOP<7u{9vV?O>4z$*8gZc!MVUr9iI@4>P!wmyL0Fuo z%NJ1$XLI6sS|Qzc77q}Zb{VLH#7MxX81>3|Y^WzzjpCKv$j~1vjjLJ$=BYsm+=w(^ z66$Qs#dv&e#L)40@Z=#0)wc|L?ei@pN(Oob&zFNe#3W)ijD8xi5W<@|fF&Gx<)gT**i$oYHqhSyQ60H-DS+e{#VY z8Stl|fXK@+>=qV#DZ>f0i%T&N(lKX++%F z$6@?a^A|3*eQd|lF033HtNdM=-^<@#y5mZ0VUW}_LCPqswC(+0ctK!)_rkvk_*eN4 D+4o$R diff --git a/arccnet/data_generation/tests/test_sdo_processing.py b/arccnet/data_generation/tests/test_sdo_processing.py index 3478e01d..519d11aa 100644 --- a/arccnet/data_generation/tests/test_sdo_processing.py +++ b/arccnet/data_generation/tests/test_sdo_processing.py @@ -1,5 +1,6 @@ from pathlib import Path +import pytest import sunpy.data.sample import sunpy.map @@ -9,7 +10,7 @@ from astropy.time import Time from arccnet import config -from arccnet.data_generation.timeseries.sdo_processing import crop_map, pad_map, rand_select +from arccnet.data_generation.timeseries.sdo_processing import crop_map, flare_check, pad_map, rand_select data_path = config["paths"]["data_folder"] test_path = Path().resolve().parent @@ -33,6 +34,7 @@ def test_rand(): assert list(rand_comb_1) != list(rand_comb_2) +@pytest.mark.remote_data def test_padding(): map_width = int(config["drms"]["patch_width"]) map_height = int(config["drms"]["patch_height"]) @@ -51,28 +53,27 @@ def test_padding(): assert int(pad_map(aia_smap, 900).dimensions[0].value) == 900 -# This test won't work correctly unless we use the full flare catalogue from HEK, as we don't want to add large files to repo, skipping for now. -# def test_flare_check(): -# combined = Table.read(f"{test_path}/tests/data/ts_test_data.parq") -# flares = combined[combined['goes_class'] != 'N'] -# flare = flares[flares['goes_class'] == "C3.7"] -# flare = flare[flare["noaa_number"] == 12644] -# assert(flare_check(flare['start_time'], flare['end_time'], flare['noaa_number'], flares)[0]) == 2 -# # 2. test a non flare run known to contain flares -# ar = combined[combined['goes_class'] == "N"] -# ar = ar[ar["noaa_number"] == 12038] -# ar = ar[ar['tb_date'] == '2014-04-23'] -# erl_time = Time(ar["start_time"]) - (6 + 1) * u.hour -# assert(flare_check(erl_time, Time(ar["start_time"]) - 1 * u.hour, ar["noaa_number"], flares)[0]) == 2 -# # 3. test a flare run without flares -# flares = combined[combined['goes_class'] != 'N'] -# flare = combined[combined['goes_class'] == "M1.6"] -# flare = flare[flare["noaa_number"] == 12192] -# print(flare) -# assert(flare_check(flare['start_time'], flare['end_time'], flare['noaa_number'], flares)[0]) == 1 -# # 4. test a non flare run without flares -# ar = combined[combined['goes_class'] == "N"] -# ar = ar[ar["noaa_number"] == 12524] -# ar = ar[ar['tb_date'] == '2016-03-20'] -# erl_time = Time(ar["start_time"]) - (6 + 1) * u.hour -# assert(flare_check(erl_time, Time(ar["start_time"]) - 1 * u.hour, ar["noaa_number"], flares)[0]) == 1 +def test_flare_check(): + combined = Table.read(f"{test_path}/tests/data/ts_test_data.parq") + flares = combined[combined["goes_class"] != "N"] + flare = flares[flares["goes_class"] == "C3.7"] + flare = flare[flare["noaa_number"] == 12644] + assert (flare_check(flare["start_time"], flare["end_time"], flare["noaa_number"], flares)[0]) == 2 + # 2. test a non flare run known to contain flares + ar = combined[combined["goes_class"] == "N"] + ar = ar[ar["noaa_number"] == 12038] + ar = ar[ar["tb_date"] == "2014-04-23"] + erl_time = Time(ar["start_time"]) - (6 + 1) * u.hour + assert (flare_check(erl_time, Time(ar["start_time"]) - 1 * u.hour, ar["noaa_number"], flares)[0]) == 2 + # 3. test a flare run without flares + flares = combined[combined["goes_class"] != "N"] + flare = combined[combined["goes_class"] == "M1.6"] + flare = flare[flare["noaa_number"] == 12192] + print(flare) + assert (flare_check(flare["start_time"], flare["end_time"], flare["noaa_number"], flares)[0]) == 1 + # 4. test a non flare run without flares + ar = combined[combined["goes_class"] == "N"] + ar = ar[ar["noaa_number"] == 12524] + ar = ar[ar["tb_date"] == "2016-03-20"] + erl_time = Time(ar["start_time"]) - (6 + 1) * u.hour + assert (flare_check(erl_time, Time(ar["start_time"]) - 1 * u.hour, ar["noaa_number"], flares)[0]) == 1 From 777e72fec8ac654dd389b80c203beb5333903b33 Mon Sep 17 00:00:00 2001 From: DanGass <55432146+DanGass@users.noreply.github.com> Date: Fri, 8 Aug 2025 14:52:37 +0100 Subject: [PATCH 07/14] fixed path localisation for accessing testing data --- arccnet/data_generation/tests/test_sdo_processing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arccnet/data_generation/tests/test_sdo_processing.py b/arccnet/data_generation/tests/test_sdo_processing.py index 519d11aa..c409e173 100644 --- a/arccnet/data_generation/tests/test_sdo_processing.py +++ b/arccnet/data_generation/tests/test_sdo_processing.py @@ -13,11 +13,11 @@ from arccnet.data_generation.timeseries.sdo_processing import crop_map, flare_check, pad_map, rand_select data_path = config["paths"]["data_folder"] -test_path = Path().resolve().parent +test_path = Path(__file__).resolve().parent def test_rand(): - combined = Table.read(f"{test_path}/tests/data/ts_test_data.parq") + combined = Table.read(f"{test_path}/data/ts_test_data.parq") types = ["F1", "F2", "N1", "N2"] # 1. test with full list of samples @@ -54,7 +54,7 @@ def test_padding(): def test_flare_check(): - combined = Table.read(f"{test_path}/tests/data/ts_test_data.parq") + combined = Table.read(f"{test_path}/data/ts_test_data.parq") flares = combined[combined["goes_class"] != "N"] flare = flares[flares["goes_class"] == "C3.7"] flare = flare[flare["noaa_number"] == 12644] From 0e4c290318811478c1d7c901256006eee6d69e2c Mon Sep 17 00:00:00 2001 From: DanGass <55432146+DanGass@users.noreply.github.com> Date: Wed, 13 Aug 2025 11:43:04 +0100 Subject: [PATCH 08/14] Fixing continuum pipeline and integrating results with aia query/exports. --- .../tests/data/ts_test_data.parq | Bin 6660 -> 0 bytes .../tests/test_sdo_processing.py | 4 +- .../timeseries/drms_pipeline.py | 2 +- .../timeseries/sdo_processing.py | 54 +++++++++--------- arccnet/visualisation/data.py | 8 ++- 5 files changed, 35 insertions(+), 33 deletions(-) delete mode 100644 arccnet/data_generation/tests/data/ts_test_data.parq diff --git a/arccnet/data_generation/tests/data/ts_test_data.parq b/arccnet/data_generation/tests/data/ts_test_data.parq deleted file mode 100644 index c74c2b230d5a0ba4bf8fcafb420108e287cad941..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6660 zcmdT}du&_P89z7iO|BC=#L#POmaR^qZYXhb@3o!U8N<0w>e#9Ce%U50@hi4dzZyHw zk&+>X2CA4;wu)5+3Y#ikn^MFoB(}O`g-(n?2(Q94S`CSHtV8@!p{=m7?;P8S^Po`n z&vu*d-gCb5z0dD_$AT`ghi4`1VVb>EZEow43iO{Akf`FufT%4PRbF+1HkwY7$x;m4Us$yj9&mP z038K-0q7+VkaE%{Q~{HEAZ+deFax}_1|5XSDWJar{T-MIL0WV3M*<>`QJ?*?h;^|; zwd@dE$(FDsRVpUGqQY5`M~P}S;KEYvQ$_F|qDtgV8d;0Cs&s!%`Rk^uHaV{8?|>y4A*kCWihY)HvKmsiIgSGD4hg)P`8p%T$XtA#89vQIT?5T9 zd>t=mq?L@ZZU7ZYkTQ~XUS5Or@){81Xbm#UH8mhYj0trnNUMo$SxKt_ z)kv^)l>}UI;8AmXHgt&`q3Yr$6hw;FdFkN=bm9-_nTma~k8!vsXm+942?fI2e6FYp z^w8E{TSbaN--V*y4s-{EGLLufA)wRjeq*KW=sK9Gfpl1INXj6jV31#0 zDVd+b!cR&WX1?e+ZI{f`c)kUTb>op2l=kCU2G8(rp9Xpv=uMzAu%AU4dPIUsuuK(} zX#)OnAW)Fa&z`W-XC&-N@CcxjFu$e2WjPjmN6yq5PFY2qj32)15s+U|F?fU%#t2x? z1Ghjebajx8QoO#s^#!S$HNyLZ(2sE&9eTVSk6x^L`ktgCX#?0A)c4@R)SZF9CClU#7&vR;FbscYv94pNa_S!D-OK=WhphiKOCvoad&~0g&td7+jCPJX8hFj3 zA_zZjdF5sNoUxFQ=U&_ZlO=GJZe2JFm>lTG&sl=W_1smMM5yu`b(~(uHI%UxvId>7LtVj`biy~QWM&=5 ztE(8Z4&JmwUL_MSqfZ4NqmJj)RWkS(gbIXAptD2ADfUVP9aVvh>MCTaGa(t6=GEJ& z3dKIW@NLCoNKfr!_ih4r2A!aI3;}t%-0tsppzkcy;reMTA?_Hj9`2TMm}V$jv@|JfrEH3+CZ6* zpg0I?+tChc3D%O8CLJf}jI+p0_sJn}j$V?Vg5Y^}yF}orV~RdT5Oku`#AnL{6V9v? z^lLjWTvBBMmtR%EIA2hx?iGWZV>u_w5NNA5;bzkz&?Ov?N4D%+fBEsj2{@G#Q02(sT@*QsrPA76~t~tC6($`;j%@_A?5y3<;N23qs3b0 zN`3&TYvCjywg|zi5;eMY++OR%93AVGuMI&&8^EP-;y>G!i8PTz#C&q(yeb8iymPRw07>{|=jhfL&I+bHZ z;=`#4-)IcD+qQ4ZF`*?xB+I7ozSs3GN7GD#$!~Z*7Tm|@mi2|Nqc7z36BFqTqDqbj z{gG585pUG!bB4iTQ!x@vt2Gj}epJMfs02y$Qno3bYGuoJ(CX)A|aVZod!BHE7aTZug99&Af_ z`VtMk)`5x6vE-QB)f4py=9wUgtaby$|cRZaEpBeK zdzDVyCk)KGgpQQk)iLXAw&EPZ0ef@`Y7_G7duF_Y#`qeYS@1s-u$eh8_;tpTyW4x$ z`tqb87OrEg$J`v@LP5b8_6R)*++CnI{WbNzvcEuU+bFJ|XKikfgFhCX@aR{^=Z1VegMmqZe0R!GxJMD| zY{1@@^u+?^<`^FhwC-$hH17=i1U~E+VJ~dvQNJ(%b+)Cvu4L3>GmrU%Y1l*Ktcy1_ zG)KgcquCnu#d<~qtx;TyU5;kV6GJW@WLs^se%?F^`w#f(nepkZMrS-*UpL^{(`)R< z`;@f5H0Y1A}cbpQ{^U^zUg~S=-}j!ExIPeGnWw zqa9JGN7&aIZSV~Cv}9v9tn4|QFUV;HD=uz1e!Tyk5itzdgPNMBg1yGnru>H9~A{fU|5(9Ia*^u=z!KO!;F2V_sKB(kB=i94+n3 zvG|1!$N^(?fOF^ahI)qm@&D7huRm+kaE|kP4K;Ioz5QHocc#GZWWd!k>emlMJTabw zy^ZgR_*ZIdZ;QHx=&X0pJOOL7%X?<;7=yjWJ&X^u;vOh=;TQCo2=wPs-yrPCfY}Ir zE`|G}C^om_Hs4FyVhGx73+cBptkjBMBI&o;`=Re9=*Pg?X(RW6HGiKFIe3vvVNL&U z>93!xujjgU)YoJ2?tWrFYnlv+GBIWEiMRyZll#SbEMGsC_t!%Fk((pizq1~(!GiNl z@arH;ZtzL`g~SxTf3To9O_p$w+53)6MN*`-J&`Q6`9Krr>k!>tIeXphBbWcDzgcYS%h}tR%ZG%Ly;_Mw1H7Tft-TYnZlGi#e80n&Jk7cL5hl6)SXUo> zm0MTcOIR4+<77YK1wR%b6}JfWCv_l;#5{uxEvz5 z27QQc3fs#){p9-{q|zk4<9jB8X`Mfr)Op9p6H}VW;5hzmsS&t3_@~|n|E*%fKlEuO KLS69xfBpl7N6BLV diff --git a/arccnet/data_generation/tests/test_sdo_processing.py b/arccnet/data_generation/tests/test_sdo_processing.py index c409e173..7fb6fb72 100644 --- a/arccnet/data_generation/tests/test_sdo_processing.py +++ b/arccnet/data_generation/tests/test_sdo_processing.py @@ -17,7 +17,7 @@ def test_rand(): - combined = Table.read(f"{test_path}/data/ts_test_data.parq") + combined = Table.read(f"{test_path}/data/ts_test_data.ecsv") types = ["F1", "F2", "N1", "N2"] # 1. test with full list of samples @@ -54,7 +54,7 @@ def test_padding(): def test_flare_check(): - combined = Table.read(f"{test_path}/data/ts_test_data.parq") + combined = Table.read(f"{test_path}/data/ts_test_data.ecsv") flares = combined[combined["goes_class"] != "N"] flare = flares[flares["goes_class"] == "C3.7"] flare = flare[flare["noaa_number"] == 12644] diff --git a/arccnet/data_generation/timeseries/drms_pipeline.py b/arccnet/data_generation/timeseries/drms_pipeline.py index 58dabed9..978b0c18 100644 --- a/arccnet/data_generation/timeseries/drms_pipeline.py +++ b/arccnet/data_generation/timeseries/drms_pipeline.py @@ -43,7 +43,7 @@ duration=6, long_lim=65, types=["F1", "F2", "N1", "N2"], - ) + )[0] cores = int(config["drms"]["cores"]) with ProcessPoolExecutor(cores) as executor: for record in starts: diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py index 481d76f3..41d84155 100644 --- a/arccnet/data_generation/timeseries/sdo_processing.py +++ b/arccnet/data_generation/timeseries/sdo_processing.py @@ -1,4 +1,5 @@ import os +import re import sys import glob import shutil @@ -33,7 +34,6 @@ reproj_log = logging.getLogger("reproject.common") reproj_log.setLevel("WARNING") os.environ["JSOC_EMAIL"] = "danielgass192@gmail.com" -data_path = config["paths"]["data_folder"] __all__ = [ "read_data", @@ -110,12 +110,6 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim: - The date of the run, used for reprojection - The coordinate of the noaa active region """ - # table_name = f"{size}_{duration}_{long_lim}_{types}.parq".strip(' ') - # if os.path.exists(f"{data_path}/flare_files/{table_name}"): - # print("Run table cached - loading") - # combined = Table.read(table_name) - # else: - # print("No run table cached - creating new table") table = Table.read(hek_path) srs = Table.read(srs_path) @@ -172,7 +166,6 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim: new_names=("noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category", "fl_count"), ) combined = vstack([flares_exp, srs_exp]) - # combined.write(f"{data_path}/flare_files/{table_name}", format='parquet') final = rand_select(combined, size, types) subset = final["noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category"] @@ -300,19 +293,16 @@ def drms_pipeline( hmi_query, hmi_export, ic_query, ic_export = hmi_query_export(start_t, end_t, hmi_keys, sample) aia_query, aia_export = aia_query_export(hmi_query, aia_keys, wavelengths) - # ic_query.rename(columns={"*recnum*":"FSN"}, inplace=True) - # img_query = pd.concat([aia_query, ic_query],ignore_index=True) - # img_urls = pd.concat([aia_export.urls, ic_export.urls], ignore_index=True) hmi_dls, hmi_exs = l1_file_save(hmi_export, hmi_query, path) - cnt_dls, cnt_exs = l1_file_save(ic_query, ic_export, path) + cnt_dls, cnt_exs = l1_file_save(ic_export, ic_query, path) aia_dls, aia_exs = l1_file_save(aia_export, aia_query, path) img_exs = list(itertools.chain(aia_exs, cnt_exs)) hmi_maps = sunpy.map.Map(hmi_exs) hmi_maps = add_fnames(hmi_maps, hmi_exs) img_maps = sunpy.map.Map(img_exs) - img_maps = add_fnames(img_maps, aia_exs) + img_maps = add_fnames(img_maps, img_exs) return img_maps, hmi_maps @@ -378,13 +368,11 @@ def hmi_continuum_export(hmi_query, keys): A tuple containing the query results of the hmi mag and ic_no_limbdark (pandas df) and the export data response (drms export object). """ client = drms.Client() - qstrs_ic = [f"hmi.Ic_noLimbDark_720s[{time}]" + "{image}" for time in hmi_query["T_REC"]] - - ic_value = [hmi_rec_find(qstr, keys, 3, 12) for qstr in qstrs_ic] - unpacked_ic = list(itertools.chain.from_iterable(ic_value)) - joined_num = [str(num) for num in unpacked_ic] + qstrs_ic = [f"hmi.Ic_noLimbDark_720s[{time}]{{continuum}}" for time in hmi_query["T_REC"]] + ic_value = [hmi_rec_find(qstr, keys, 3, 12, cont=True) for qstr in qstrs_ic] + joined_num = [str(num) for num in ic_value] ic_num_str = str(joined_num).strip("[]") - ic_comb_qstr = f"hmi.Ic_noLimbDark_720s[! recnum in ({ic_num_str}) !]" + "{image}" + ic_comb_qstr = f"hmi.Ic_noLimbDark_720s[! recnum in ({ic_num_str}) !]" + "{continuum}" ic_query_full = client.query(ds=ic_comb_qstr, key=keys) @@ -413,11 +401,8 @@ def aia_query_export(hmi_query, keys, wavelength): client = drms.Client() qstrs_euv = [f"aia.lev1_euv_12s[{time}][{wavelength}]" + "{image}" for time in hmi_query["T_REC"]] qstrs_uv = [f"aia.lev1_uv_24s[{time}]{[1600, 1700]}" + "{image}" for time in hmi_query["T_REC"]] - # qstrs_vis = [f"aia.lev1_vis_1h[{time}]{[4500]}" + "{image}" for time in hmi_query["T_REC"]] euv_value = [aia_rec_find(qstr, keys, 3, 12) for qstr in qstrs_euv] uv_value = [aia_rec_find(qstr, keys, 2, 24) for qstr in qstrs_uv] - # vis_value = [aia_rec_find(qstr, keys, 1, 60) for qstr in qstrs_vis] - # unpacked_aia = list(itertools.chain(euv_value, uv_value, vis_value)) unpacked_aia = list(itertools.chain(euv_value, uv_value)) unpacked_aia = [set for set in unpacked_aia if set is not None] unpacked_aia = list(itertools.chain.from_iterable(unpacked_aia)) @@ -457,12 +442,24 @@ def hmi_rec_find(qstr, keys, retries, sample, cont=False): series = "hmi.Ic_noLimbDark_720s" client = drms.Client() count = 0 + # print(qstr) qry = client.query(ds=qstr, key=keys) - time = sunpy.time.parse_time(qry["T_REC"].values[0]) + # print(qry) + if qry.empty: + time = sunpy.time.parse_time(re.search(r"\[(.*?)\]", qstr).group(1)) + qry["QUALITY"] = [10000] + # print(qry) + else: + time = sunpy.time.parse_time(qry["T_REC"].values[0]) + # print(qry["QUALITY"]) while qry["QUALITY"].values[0] != 0 and count <= retries: qry = client.query(ds=f"{series}[{time}]" + seg, key=keys) + if qry.empty: + time = sunpy.time.parse_time(re.search(r"\[(.*?)\]", qstr).group(1)) + qry["QUALITY"] = [10000] time = change_time(time, sample) count += 1 + qry["*recnum*"] = [0] return qry["*recnum*"].values[0] @@ -817,8 +814,6 @@ def map_reproject(sdo_packed): The path location of the saved submap. """ hmi_origin, sdo_path, ar_num, center = sdo_packed - lon = int(center.lon.value) - # sdo_map = sunpy.map.Map(sdo_packed.l2_map) sdo_map = sunpy.map.Map(sdo_path) with propagate_with_solar_surface(): sdo_rpr = sdo_map.reproject_to(hmi_origin.wcs) @@ -832,7 +827,7 @@ def map_reproject(sdo_packed): sdo_rpr.meta["wavelnth"] = sdo_map.meta["wavelnth"] sdo_rpr.meta["date-obs"] = sdo_map.meta["date-obs"] if sdo_rpr.dimensions[0].value < int(config["drms"]["patch_width"]): - sdo_rpr = pad_map(sdo_rpr, lon, config["drms"]["patch_width"]) + sdo_rpr = pad_map(sdo_rpr, config["drms"]["patch_width"]) save_compressed_map(sdo_rpr, fits_path, hdu_type=CompImageHDU, overwrite=True) return fits_path @@ -856,11 +851,12 @@ def vid_match(table, name, path): output_file : `str` A string containing the path of the completed mosaic animation. """ - # Check this carefully hmi_files = table["HMI files"].value + table["Wavelength"] = [int(wave) for wave in table["Wavelength"]] wvls = np.unique([table["Wavelength"].value]) + hmi_files = np.unique(hmi_files) - nrows, ncols = 4, 3 # define your subplot grid + nrows, ncols = 4, 3 # define subplot grid for file in range(len(hmi_files)): hmi = hmi_files[file] mosaic_plot(hmi, name, file, nrows, ncols, wvls, table, path) @@ -925,7 +921,9 @@ def pad_map(map, targ_width): The new, padded submap. """ x_dim = map.dimensions[0].value + targ_width = int(targ_width) diff = int(targ_width - x_dim) + # print(diff, map.dimensions, map.wavelength) if map.center.Tx > 0: data = np.pad(map.data, ((0, 0), (diff, 0)), constant_values=np.nan) else: diff --git a/arccnet/visualisation/data.py b/arccnet/visualisation/data.py index 79ce8c0c..1033fecb 100644 --- a/arccnet/visualisation/data.py +++ b/arccnet/visualisation/data.py @@ -522,15 +522,19 @@ def mosaic_plot(hmi, name, file, nrows, ncols, wvls, table, path): files = table[table["Wavelength"] == wv] files = files[files["HMI files"] == hmi] aia_files = files["AIA files"] + cmap = f"sdoaia{wv}" + if wv == 6173: + # keep yellow cmap + cmap = "sdoaia4500" try: aia_map = Map(aia_files.value[0]) ax = fig.add_subplot(nrows, ncols, i + 1) - ax.imshow(np.sqrt(aia_map.data), cmap=f"sdoaia{wv}") + ax.imshow(np.sqrt(aia_map.data), cmap=cmap) ax.text(0.05, 0.05, f"{wv} - {aia_map.date}", color="w", transform=ax.transAxes, fontsize=5) except IndexError: aia_map = np.zeros(hmi_map.data.shape) ax = fig.add_subplot(nrows, ncols, i + 1) - ax.imshow(np.sqrt(aia_map.data), cmap=f"sdoaia{wv}") + ax.imshow(np.sqrt(aia_map.data), cmap=cmap) ax.text(0.05, 0.05, f"{wv} - MISSING", color="w", transform=ax.transAxes, fontsize=5) else: From 0cf99ddbeb4140a9e19bd05320445651015c9909 Mon Sep 17 00:00:00 2001 From: DanGass <55432146+DanGass@users.noreply.github.com> Date: Wed, 13 Aug 2025 12:19:14 +0100 Subject: [PATCH 09/14] Fixed some formatting, removed unnecessary commented code, changed time series test data from .parq to .ecsv --- .../tests/data/ts_test_data.ecsv | 75 ++++++++++++++++++ .../tests/data/ts_test_data.parq | Bin 6660 -> 0 bytes .../tests/test_sdo_processing.py | 1 - .../timeseries/sdo_processing.py | 31 +++----- 4 files changed, 84 insertions(+), 23 deletions(-) create mode 100644 arccnet/data_generation/tests/data/ts_test_data.ecsv delete mode 100644 arccnet/data_generation/tests/data/ts_test_data.parq diff --git a/arccnet/data_generation/tests/data/ts_test_data.ecsv b/arccnet/data_generation/tests/data/ts_test_data.ecsv new file mode 100644 index 00000000..bb2c54cd --- /dev/null +++ b/arccnet/data_generation/tests/data/ts_test_data.ecsv @@ -0,0 +1,75 @@ +# %ECSV 1.0 +# --- +# datatype: +# - {name: noaa_number, datatype: int64} +# - {name: goes_class, datatype: string} +# - {name: start_time, datatype: string} +# - {name: end_time, datatype: string} +# - {name: tb_date, datatype: string} +# - {name: category, datatype: string} +# meta: !!omap +# - __serialized_columns__: +# end_time: +# __class__: astropy.time.core.Time +# format: fits +# in_subfmt: '*' +# out_subfmt: '*' +# precision: 3 +# scale: utc +# value: !astropy.table.SerializedColumn {name: end_time} +# start_time: +# __class__: astropy.time.core.Time +# format: fits +# in_subfmt: '*' +# out_subfmt: '*' +# precision: 3 +# scale: utc +# value: !astropy.table.SerializedColumn {name: start_time} +# schema: astropy-2.0 +noaa_number goes_class start_time end_time tb_date category +11165 N 2011-03-06T00:00:00.000 2011-03-06T06:00:00.000 2011-03-06 N2 +11240 N 2011-06-27T00:00:00.000 2011-06-27T06:00:00.000 2011-06-27 N1 +11271 N 2011-08-26T00:00:00.000 2011-08-26T06:00:00.000 2011-08-26 N2 +11283 N 2011-09-01T00:00:00.000 2011-09-01T06:00:00.000 2011-09-01 N1 +11297 C1.5 2011-09-16T12:26:00.000 2011-09-16T18:26:00.000 2011-09-16 F1 +11339 X1.9 2011-11-03T13:16:00.000 2011-11-03T19:16:00.000 2011-11-03 F2 +11392 C2.6 2012-01-06T16:54:00.000 2012-01-06T22:54:00.000 2012-01-06 F1 +11476 N 2012-05-10T00:00:00.000 2012-05-10T06:00:00.000 2012-05-10 N2 +11476 N 2012-05-14T00:00:00.000 2012-05-14T06:00:00.000 2012-05-14 N2 +11479 C1.1 2012-05-18T18:15:00.000 2012-05-19T00:15:00.000 2012-05-18 F1 +11513 N 2012-07-07T00:00:00.000 2012-07-07T06:00:00.000 2012-07-07 N2 +11515 N 2012-07-05T00:00:00.000 2012-07-05T06:00:00.000 2012-07-05 N2 +11542 C1.6 2012-08-14T15:23:00.000 2012-08-14T21:23:00.000 2012-08-14 F1 +11615 C5.7 2012-11-17T20:55:00.000 2012-11-18T02:55:00.000 2012-11-17 F1 +11616 N 2012-11-16T00:00:00.000 2012-11-16T06:00:00.000 2012-11-16 N1 +11621 N 2012-12-01T00:00:00.000 2012-12-01T06:00:00.000 2012-12-01 N1 +11654 C1.5 2013-01-10T05:15:00.000 2013-01-10T11:15:00.000 2013-01-10 F2 +11726 C1.5 2013-04-23T08:04:00.000 2013-04-23T14:04:00.000 2013-04-23 F2 +11865 C4.4 2013-10-15T07:37:00.000 2013-10-15T13:37:00.000 2013-10-15 F2 +11877 C1.1 2013-10-21T07:33:00.000 2013-10-21T13:33:00.000 2013-10-21 F1 +11928 N 2013-12-18T00:00:00.000 2013-12-18T06:00:00.000 2013-12-18 N1 +12010 C1.3 2014-03-22T13:04:00.000 2014-03-22T19:04:00.000 2014-03-22 F1 +12038 C4.3 2014-04-22T17:48:00.000 2014-04-22T23:48:00.000 2014-04-22 F1 +12038 N 2014-04-23T00:00:00.000 2014-04-23T06:00:00.000 2014-04-23 N2 +12127 N 2014-07-30T00:00:00.000 2014-07-30T06:00:00.000 2014-07-30 N2 +12192 M1.6 2014-10-28T06:54:00.000 2014-10-28T12:54:00.000 2014-10-28 F1 +12205 N 2014-11-09T00:00:00.000 2014-11-09T06:00:00.000 2014-11-09 N2 +12208 C3.7 2014-11-12T13:36:00.000 2014-11-12T19:36:00.000 2014-11-12 F2 +12209 N 2014-11-23T00:00:00.000 2014-11-23T06:00:00.000 2014-11-23 N2 +12229 N 2014-12-11T00:00:00.000 2014-12-11T06:00:00.000 2014-12-11 N1 +12234 N 2014-12-16T00:00:00.000 2014-12-16T06:00:00.000 2014-12-16 N1 +12297 C1.0 2015-03-10T23:45:00.000 2015-03-11T05:45:00.000 2015-03-10 F2 +12302 C9.3 2015-03-18T00:40:00.000 2015-03-18T06:40:00.000 2015-03-18 F2 +12335 C1.0 2015-05-02T17:53:00.000 2015-05-02T23:53:00.000 2015-05-02 F2 +12384 N 2015-07-13T00:00:00.000 2015-07-13T06:00:00.000 2015-07-13 N1 +12524 N 2016-03-20T00:00:00.000 2016-03-20T06:00:00.000 2016-03-20 N1 +12524 C1.1 2016-03-22T19:59:00.000 2016-03-23T01:59:00.000 2016-03-22 F1 +12644 C3.7 2017-04-01T12:30:00.000 2017-04-01T18:30:00.000 2017-04-01 F2 +12644 M4.4 2017-04-01T14:35:00.000 2017-04-01T20:35:00.000 2017-04-01 F1 +12698 N 2018-02-05T00:00:00.000 2018-02-05T06:00:00.000 2018-02-05 N1 +12740 C1.0 2019-05-06T12:41:00.000 2019-05-06T18:41:00.000 2019-05-06 F1 +12779 C1.3 2020-10-29T08:53:00.000 2020-10-29T14:53:00.000 2020-10-29 F2 +12781 C1.0 2020-11-05T15:18:00.000 2020-11-05T21:18:00.000 2020-11-05 F2 +12911 N 2021-12-22T00:00:00.000 2021-12-22T06:00:00.000 2021-12-22 N1 +13038 N 2022-06-21T00:00:00.000 2022-06-21T06:00:00.000 2022-06-21 N2 +13109 C2.8 2022-09-23T01:47:00.000 2022-09-23T07:47:00.000 2022-09-23 F1 diff --git a/arccnet/data_generation/tests/data/ts_test_data.parq b/arccnet/data_generation/tests/data/ts_test_data.parq deleted file mode 100644 index c74c2b230d5a0ba4bf8fcafb420108e287cad941..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6660 zcmdT}du&_P89z7iO|BC=#L#POmaR^qZYXhb@3o!U8N<0w>e#9Ce%U50@hi4dzZyHw zk&+>X2CA4;wu)5+3Y#ikn^MFoB(}O`g-(n?2(Q94S`CSHtV8@!p{=m7?;P8S^Po`n z&vu*d-gCb5z0dD_$AT`ghi4`1VVb>EZEow43iO{Akf`FufT%4PRbF+1HkwY7$x;m4Us$yj9&mP z038K-0q7+VkaE%{Q~{HEAZ+deFax}_1|5XSDWJar{T-MIL0WV3M*<>`QJ?*?h;^|; zwd@dE$(FDsRVpUGqQY5`M~P}S;KEYvQ$_F|qDtgV8d;0Cs&s!%`Rk^uHaV{8?|>y4A*kCWihY)HvKmsiIgSGD4hg)P`8p%T$XtA#89vQIT?5T9 zd>t=mq?L@ZZU7ZYkTQ~XUS5Or@){81Xbm#UH8mhYj0trnNUMo$SxKt_ z)kv^)l>}UI;8AmXHgt&`q3Yr$6hw;FdFkN=bm9-_nTma~k8!vsXm+942?fI2e6FYp z^w8E{TSbaN--V*y4s-{EGLLufA)wRjeq*KW=sK9Gfpl1INXj6jV31#0 zDVd+b!cR&WX1?e+ZI{f`c)kUTb>op2l=kCU2G8(rp9Xpv=uMzAu%AU4dPIUsuuK(} zX#)OnAW)Fa&z`W-XC&-N@CcxjFu$e2WjPjmN6yq5PFY2qj32)15s+U|F?fU%#t2x? z1Ghjebajx8QoO#s^#!S$HNyLZ(2sE&9eTVSk6x^L`ktgCX#?0A)c4@R)SZF9CClU#7&vR;FbscYv94pNa_S!D-OK=WhphiKOCvoad&~0g&td7+jCPJX8hFj3 zA_zZjdF5sNoUxFQ=U&_ZlO=GJZe2JFm>lTG&sl=W_1smMM5yu`b(~(uHI%UxvId>7LtVj`biy~QWM&=5 ztE(8Z4&JmwUL_MSqfZ4NqmJj)RWkS(gbIXAptD2ADfUVP9aVvh>MCTaGa(t6=GEJ& z3dKIW@NLCoNKfr!_ih4r2A!aI3;}t%-0tsppzkcy;reMTA?_Hj9`2TMm}V$jv@|JfrEH3+CZ6* zpg0I?+tChc3D%O8CLJf}jI+p0_sJn}j$V?Vg5Y^}yF}orV~RdT5Oku`#AnL{6V9v? z^lLjWTvBBMmtR%EIA2hx?iGWZV>u_w5NNA5;bzkz&?Ov?N4D%+fBEsj2{@G#Q02(sT@*QsrPA76~t~tC6($`;j%@_A?5y3<;N23qs3b0 zN`3&TYvCjywg|zi5;eMY++OR%93AVGuMI&&8^EP-;y>G!i8PTz#C&q(yeb8iymPRw07>{|=jhfL&I+bHZ z;=`#4-)IcD+qQ4ZF`*?xB+I7ozSs3GN7GD#$!~Z*7Tm|@mi2|Nqc7z36BFqTqDqbj z{gG585pUG!bB4iTQ!x@vt2Gj}epJMfs02y$Qno3bYGuoJ(CX)A|aVZod!BHE7aTZug99&Af_ z`VtMk)`5x6vE-QB)f4py=9wUgtaby$|cRZaEpBeK zdzDVyCk)KGgpQQk)iLXAw&EPZ0ef@`Y7_G7duF_Y#`qeYS@1s-u$eh8_;tpTyW4x$ z`tqb87OrEg$J`v@LP5b8_6R)*++CnI{WbNzvcEuU+bFJ|XKikfgFhCX@aR{^=Z1VegMmqZe0R!GxJMD| zY{1@@^u+?^<`^FhwC-$hH17=i1U~E+VJ~dvQNJ(%b+)Cvu4L3>GmrU%Y1l*Ktcy1_ zG)KgcquCnu#d<~qtx;TyU5;kV6GJW@WLs^se%?F^`w#f(nepkZMrS-*UpL^{(`)R< z`;@f5H0Y1A}cbpQ{^U^zUg~S=-}j!ExIPeGnWw zqa9JGN7&aIZSV~Cv}9v9tn4|QFUV;HD=uz1e!Tyk5itzdgPNMBg1yGnru>H9~A{fU|5(9Ia*^u=z!KO!;F2V_sKB(kB=i94+n3 zvG|1!$N^(?fOF^ahI)qm@&D7huRm+kaE|kP4K;Ioz5QHocc#GZWWd!k>emlMJTabw zy^ZgR_*ZIdZ;QHx=&X0pJOOL7%X?<;7=yjWJ&X^u;vOh=;TQCo2=wPs-yrPCfY}Ir zE`|G}C^om_Hs4FyVhGx73+cBptkjBMBI&o;`=Re9=*Pg?X(RW6HGiKFIe3vvVNL&U z>93!xujjgU)YoJ2?tWrFYnlv+GBIWEiMRyZll#SbEMGsC_t!%Fk((pizq1~(!GiNl z@arH;ZtzL`g~SxTf3To9O_p$w+53)6MN*`-J&`Q6`9Krr>k!>tIeXphBbWcDzgcYS%h}tR%ZG%Ly;_Mw1H7Tft-TYnZlGi#e80n&Jk7cL5hl6)SXUo> zm0MTcOIR4+<77YK1wR%b6}JfWCv_l;#5{uxEvz5 z27QQc3fs#){p9-{q|zk4<9jB8X`Mfr)Op9p6H}VW;5hzmsS&t3_@~|n|E*%fKlEuO KLS69xfBpl7N6BLV diff --git a/arccnet/data_generation/tests/test_sdo_processing.py b/arccnet/data_generation/tests/test_sdo_processing.py index c409e173..8d19371d 100644 --- a/arccnet/data_generation/tests/test_sdo_processing.py +++ b/arccnet/data_generation/tests/test_sdo_processing.py @@ -12,7 +12,6 @@ from arccnet import config from arccnet.data_generation.timeseries.sdo_processing import crop_map, flare_check, pad_map, rand_select -data_path = config["paths"]["data_folder"] test_path = Path(__file__).resolve().parent diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py index 481d76f3..4cdd3f45 100644 --- a/arccnet/data_generation/timeseries/sdo_processing.py +++ b/arccnet/data_generation/timeseries/sdo_processing.py @@ -110,12 +110,6 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim: - The date of the run, used for reprojection - The coordinate of the noaa active region """ - # table_name = f"{size}_{duration}_{long_lim}_{types}.parq".strip(' ') - # if os.path.exists(f"{data_path}/flare_files/{table_name}"): - # print("Run table cached - loading") - # combined = Table.read(table_name) - # else: - # print("No run table cached - creating new table") table = Table.read(hek_path) srs = Table.read(srs_path) @@ -172,7 +166,6 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim: new_names=("noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category", "fl_count"), ) combined = vstack([flares_exp, srs_exp]) - # combined.write(f"{data_path}/flare_files/{table_name}", format='parquet') final = rand_select(combined, size, types) subset = final["noaa_number", "goes_class", "start_time", "end_time", "tb_date", "c_coord", "category"] @@ -300,9 +293,6 @@ def drms_pipeline( hmi_query, hmi_export, ic_query, ic_export = hmi_query_export(start_t, end_t, hmi_keys, sample) aia_query, aia_export = aia_query_export(hmi_query, aia_keys, wavelengths) - # ic_query.rename(columns={"*recnum*":"FSN"}, inplace=True) - # img_query = pd.concat([aia_query, ic_query],ignore_index=True) - # img_urls = pd.concat([aia_export.urls, ic_export.urls], ignore_index=True) hmi_dls, hmi_exs = l1_file_save(hmi_export, hmi_query, path) cnt_dls, cnt_exs = l1_file_save(ic_query, ic_export, path) @@ -338,14 +328,14 @@ def hmi_query_export(time_1, time_2, keys: list, sample: int): """ client = drms.Client() duration = round((time_2 - time_1).to_value(u.hour)) - qstr_m_hmi = f"hmi.M_720s[{time_1.value}/{duration}h@{sample}m]" + "{magnetogram}" + qstr_m_hmi = f"hmi.M_720s[{time_1.value}/{duration}h@{sample}m]{{magnetogram}}" hmi_query = client.query(ds=qstr_m_hmi, key=keys) good_result = hmi_query[hmi_query.QUALITY == 0] good_num = good_result["*recnum*"].values bad_result = hmi_query[hmi_query.QUALITY != 0] - qstrs_m_hmi = [f"hmi.M_720s[{time}]" + "{magnetogram}" for time in bad_result["T_REC"]] + qstrs_m_hmi = [f"hmi.M_720s[{time}]{{magnetogram}}" for time in bad_result["T_REC"]] hmi_values = [hmi_rec_find(qstr, keys, 3, 720) for qstr in qstrs_m_hmi] patched_num = [*hmi_values] @@ -353,7 +343,7 @@ def hmi_query_export(time_1, time_2, keys: list, sample: int): joined_num = [str(num) for num in joined_num] hmi_num_str = str(joined_num).strip("[]") - hmi_qstr = f"hmi.M_720s[! recnum in ({hmi_num_str}) !]" + "{magnetogram}" + hmi_qstr = f"hmi.M_720s[! recnum in ({hmi_num_str}) !]{{magnetogram}}" hmi_query_full = client.query(ds=hmi_qstr, key=keys) hmi_result = client.export(hmi_qstr, method="url", protocol="fits", email=os.environ["JSOC_EMAIL"]) hmi_result.wait() @@ -378,13 +368,13 @@ def hmi_continuum_export(hmi_query, keys): A tuple containing the query results of the hmi mag and ic_no_limbdark (pandas df) and the export data response (drms export object). """ client = drms.Client() - qstrs_ic = [f"hmi.Ic_noLimbDark_720s[{time}]" + "{image}" for time in hmi_query["T_REC"]] + qstrs_ic = [f"hmi.Ic_noLimbDark_720s[{time}]{{continuum}}" for time in hmi_query["T_REC"]] ic_value = [hmi_rec_find(qstr, keys, 3, 12) for qstr in qstrs_ic] unpacked_ic = list(itertools.chain.from_iterable(ic_value)) joined_num = [str(num) for num in unpacked_ic] ic_num_str = str(joined_num).strip("[]") - ic_comb_qstr = f"hmi.Ic_noLimbDark_720s[! recnum in ({ic_num_str}) !]" + "{image}" + ic_comb_qstr = f"hmi.Ic_noLimbDark_720s[! recnum in ({ic_num_str}) !]{{continuum}}" ic_query_full = client.query(ds=ic_comb_qstr, key=keys) @@ -411,19 +401,16 @@ def aia_query_export(hmi_query, keys, wavelength): aia_query_full, aia_result (tuple): A tuple containing the query result and the export data response. """ client = drms.Client() - qstrs_euv = [f"aia.lev1_euv_12s[{time}][{wavelength}]" + "{image}" for time in hmi_query["T_REC"]] - qstrs_uv = [f"aia.lev1_uv_24s[{time}]{[1600, 1700]}" + "{image}" for time in hmi_query["T_REC"]] - # qstrs_vis = [f"aia.lev1_vis_1h[{time}]{[4500]}" + "{image}" for time in hmi_query["T_REC"]] + qstrs_euv = [f"aia.lev1_euv_12s[{time}][{wavelength}]{{image}}" for time in hmi_query["T_REC"]] + qstrs_uv = [f"aia.lev1_uv_24s[{time}]{[1600, 1700]}{{image}}" for time in hmi_query["T_REC"]] euv_value = [aia_rec_find(qstr, keys, 3, 12) for qstr in qstrs_euv] uv_value = [aia_rec_find(qstr, keys, 2, 24) for qstr in qstrs_uv] - # vis_value = [aia_rec_find(qstr, keys, 1, 60) for qstr in qstrs_vis] - # unpacked_aia = list(itertools.chain(euv_value, uv_value, vis_value)) unpacked_aia = list(itertools.chain(euv_value, uv_value)) unpacked_aia = [set for set in unpacked_aia if set is not None] unpacked_aia = list(itertools.chain.from_iterable(unpacked_aia)) joined_num = [str(num) for num in unpacked_aia] aia_num_str = str(joined_num).strip("[]") - aia_comb_qstr = f"aia.lev1[! FSN in ({aia_num_str}) !]" + "{image_lev1}" + aia_comb_qstr = f"aia.lev1[! FSN in ({aia_num_str}) !]{{image_lev1}}" aia_query_full = client.query(ds=aia_comb_qstr, key=keys) @@ -491,7 +478,7 @@ def aia_rec_find(qstr, keys, retries, time_add): if wvl == "4500": return qry["FSN"].values while qry["QUALITY"].values[0] != 0 and retry < retries: - qry = client.query(ds=f"{qstr_head}[{time}][{wvl}]" + "{image}", key=keys) + qry = client.query(ds=f"{qstr_head}[{time}][{wvl}]{{image}}", key=keys) time = change_time(time, time_add) retry += 1 if qry["QUALITY"].values[0] == 0: From 95119281de4e87f103afe0896a902dbbaf5b9df0 Mon Sep 17 00:00:00 2001 From: DanGass <55432146+DanGass@users.noreply.github.com> Date: Wed, 13 Aug 2025 12:27:44 +0100 Subject: [PATCH 10/14] updated file paths --- arccnet/data_generation/tests/test_sdo_processing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arccnet/data_generation/tests/test_sdo_processing.py b/arccnet/data_generation/tests/test_sdo_processing.py index 8d19371d..e60e6e5b 100644 --- a/arccnet/data_generation/tests/test_sdo_processing.py +++ b/arccnet/data_generation/tests/test_sdo_processing.py @@ -16,7 +16,7 @@ def test_rand(): - combined = Table.read(f"{test_path}/data/ts_test_data.parq") + combined = Table.read(f"{test_path}/data/ts_test_data.ecsv") types = ["F1", "F2", "N1", "N2"] # 1. test with full list of samples @@ -53,7 +53,7 @@ def test_padding(): def test_flare_check(): - combined = Table.read(f"{test_path}/data/ts_test_data.parq") + combined = Table.read(f"{test_path}/data/ts_test_data.ecsv") flares = combined[combined["goes_class"] != "N"] flare = flares[flares["goes_class"] == "C3.7"] flare = flare[flare["noaa_number"] == 12644] From e302bfa922265edf72ac2b2335c12e96c429ab2f Mon Sep 17 00:00:00 2001 From: DanGass <55432146+DanGass@users.noreply.github.com> Date: Thu, 14 Aug 2025 16:10:16 +0100 Subject: [PATCH 11/14] Improved pipeline, added log for empty queries, WIP. --- .../timeseries/drms_pipeline.py | 7 +-- .../timeseries/sdo_processing.py | 47 +++++++++++++++++-- 2 files changed, 46 insertions(+), 8 deletions(-) diff --git a/arccnet/data_generation/timeseries/drms_pipeline.py b/arccnet/data_generation/timeseries/drms_pipeline.py index 978b0c18..340bf5e5 100644 --- a/arccnet/data_generation/timeseries/drms_pipeline.py +++ b/arccnet/data_generation/timeseries/drms_pipeline.py @@ -29,9 +29,9 @@ # Logging settings here. drms_log = logging.getLogger("drms") - drms_log.setLevel("WARNING") + drms_log.setLevel("ERROR") reproj_log = logging.getLogger("reproject.common") - reproj_log.setLevel("WARNING") + reproj_log.setLevel("ERROR") # May need to find a more robust solution with filters/exceptions for this. astropy_log.setLevel("ERROR") data_path = config["paths"]["data_folder"] @@ -42,7 +42,8 @@ size=1, duration=6, long_lim=65, - types=["F1", "F2", "N1", "N2"], + # types=["F1", "F2", "N1", "N2"], + types=["F1"], )[0] cores = int(config["drms"]["cores"]) with ProcessPoolExecutor(cores) as executor: diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py index cd9220b9..63108f66 100644 --- a/arccnet/data_generation/timeseries/sdo_processing.py +++ b/arccnet/data_generation/timeseries/sdo_processing.py @@ -30,9 +30,12 @@ from arccnet.data_generation.utils.utils import save_compressed_map from arccnet.visualisation.data import mosaic_animate, mosaic_plot +data_path = config["paths"]["data_folder"] +qry_log = "bad_queries" + warnings.simplefilter("ignore", RuntimeWarning) reproj_log = logging.getLogger("reproject.common") -reproj_log.setLevel("WARNING") +reproj_log.setLevel("ERROR") os.environ["JSOC_EMAIL"] = "danielgass192@gmail.com" __all__ = [ @@ -51,6 +54,32 @@ ] +def bad_qry(qry, data_path, name): + r""" + Logs bad drms queries to document for future reference. + + Parameters + ---------- + qry: `str` + Drms query which returns an empty dataframe. + data_path : `str` + Path to arccnet data. + name: `str` + Log filename. + """ + print(f"Bad Query Detected - {qry}") + print(data_path) + logging.warning(f"Bad Query Detected - {qry}") + f_name = f"{data_path}/logs/{name}.txt" + if not os.path.exists(f_name): + file = open(f"{data_path}/logs/{name}.txt", "x") + file = open(f"{data_path}/logs/{name}.txt", "+") + entries = [row for row in file] + if qry not in entries: + file.write(qry) + file.close() + + def rand_select(table, size, types: list): r""" Randomly selects targets from provided table and list of data subsets. @@ -156,6 +185,9 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim: srs["category"] = ar_cat srs["n_fl_count"] = fl_cat srs["ar"] = "N" + flares = flares[flares["goes_class"] == "C1.1"] + flares = flares[flares["noaa_number"] == 11169] + flares = flares[flares["tb_date"] == "2011-03-15"] srs_exp = srs["number", "ar", "target_time", "srs_end_time", "srs_date", "c_coord", "category", "n_fl_count"] flares_exp = flares[ @@ -442,13 +474,14 @@ def hmi_rec_find(qstr, keys, retries, sample, cont=False): series = "hmi.Ic_noLimbDark_720s" client = drms.Client() count = 0 - # print(qstr) qry = client.query(ds=qstr, key=keys) - # print(qry) + print(qry) if qry.empty: + print("Bad Query - HMI") time = sunpy.time.parse_time(re.search(r"\[(.*?)\]", qstr).group(1)) qry["QUALITY"] = [10000] - # print(qry) + bad_qry(qry, data_path, qry_log) + else: time = sunpy.time.parse_time(qry["T_REC"].values[0]) # print(qry["QUALITY"]) @@ -457,9 +490,9 @@ def hmi_rec_find(qstr, keys, retries, sample, cont=False): if qry.empty: time = sunpy.time.parse_time(re.search(r"\[(.*?)\]", qstr).group(1)) qry["QUALITY"] = [10000] + bad_qry(qry, data_path, qry_log) time = change_time(time, sample) count += 1 - qry["*recnum*"] = [0] return qry["*recnum*"].values[0] @@ -483,6 +516,7 @@ def aia_rec_find(qstr, keys, retries, time_add): retry = 0 qry = client.query(ds=qstr, key=keys) qstr_head = qstr.split("[")[0] + print(qry) if not qry.empty: time, wvl = qry["T_REC"].values[0][0:-1], qry["WAVELNTH"].values[0] if wvl == "4500": @@ -493,6 +527,9 @@ def aia_rec_find(qstr, keys, retries, time_add): retry += 1 if qry["QUALITY"].values[0] == 0: return qry["FSN"].values + else: + print("Bad Query - AIA") + bad_qry(qry, data_path, qry_log) def l1_file_save(export, query, path): From 5d6e751ba307ff2469c852e2523c8f0d16d55699 Mon Sep 17 00:00:00 2001 From: DanGass <55432146+DanGass@users.noreply.github.com> Date: Fri, 15 Aug 2025 10:15:21 +0100 Subject: [PATCH 12/14] Removed print statements from bad_qry function. --- arccnet/data_generation/timeseries/sdo_processing.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py index 63108f66..a0daac22 100644 --- a/arccnet/data_generation/timeseries/sdo_processing.py +++ b/arccnet/data_generation/timeseries/sdo_processing.py @@ -67,8 +67,6 @@ def bad_qry(qry, data_path, name): name: `str` Log filename. """ - print(f"Bad Query Detected - {qry}") - print(data_path) logging.warning(f"Bad Query Detected - {qry}") f_name = f"{data_path}/logs/{name}.txt" if not os.path.exists(f_name): From 78d279013c3d3244d7e87070ce8468ac231e7329 Mon Sep 17 00:00:00 2001 From: DanGass <55432146+DanGass@users.noreply.github.com> Date: Fri, 15 Aug 2025 13:40:14 +0100 Subject: [PATCH 13/14] Fixed formatting for pre-commit. --- arccnet/data_generation/timeseries/drms_pipeline.py | 2 +- arccnet/data_generation/timeseries/sdo_processing.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arccnet/data_generation/timeseries/drms_pipeline.py b/arccnet/data_generation/timeseries/drms_pipeline.py index 710f9529..989ab9ea 100644 --- a/arccnet/data_generation/timeseries/drms_pipeline.py +++ b/arccnet/data_generation/timeseries/drms_pipeline.py @@ -44,7 +44,7 @@ long_lim=65, types=["F1", "F2", "N1", "N2"], )[0] - + cores = int(config["drms"]["cores"]) with ProcessPoolExecutor(cores) as executor: for record in starts: diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py index c87e0521..18efa867 100644 --- a/arccnet/data_generation/timeseries/sdo_processing.py +++ b/arccnet/data_generation/timeseries/sdo_processing.py @@ -78,6 +78,7 @@ def bad_qry(qry, data_path, name): file.write(qry) file.close() + def rand_select(table, size, types: list): r""" Randomly selects targets from provided table and list of data subsets. @@ -173,7 +174,7 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim: SkyCoord(lon * u.deg, lat * u.deg, obstime=t_time, observer="earth", frame=frames.HeliographicStonyhurst) for lat, lon, t_time in zip(srs["latitude"], srs["longitude"], srs["target_time"]) ] - + print("Parsing Active Regions") ar_cat, fl_cat = [], [] for ar in srs: From ed52fa9e05050b7620b4f3bb91c189621a7822cc Mon Sep 17 00:00:00 2001 From: DanGass <55432146+DanGass@users.noreply.github.com> Date: Tue, 19 Aug 2025 16:48:05 +0100 Subject: [PATCH 14/14] Replaced prints with logs, changed plot to use empty subplot, deleted frames once anim was plotted, and skip bad runs. --- .../timeseries/drms_pipeline.py | 7 ++++++- .../timeseries/sdo_processing.py | 20 +++++++++---------- arccnet/visualisation/data.py | 6 +++--- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/arccnet/data_generation/timeseries/drms_pipeline.py b/arccnet/data_generation/timeseries/drms_pipeline.py index 989ab9ea..19f5eec8 100644 --- a/arccnet/data_generation/timeseries/drms_pipeline.py +++ b/arccnet/data_generation/timeseries/drms_pipeline.py @@ -55,7 +55,9 @@ patch_height = int(config["drms"]["patch_height"]) * u.pix patch_width = int(config["drms"]["patch_width"]) * u.pix try: - print(record["noaa_number"], record["goes_class"], record["start_time"], record["category"]) + logging.info( + f"{record['noaa_number']} {record['goes_class']} {record['start_time']} {record['category']}" + ) aia_maps, hmi_maps = drms_pipeline( start_t=start, end_t=end, @@ -65,6 +67,9 @@ wavelengths=config["drms"]["wavelengths"], sample=config["drms"]["sample"], ) + if len(aia_maps != 60): + logging.warning("Bad Run detected - missing frames.") + continue hmi_proc = list( tqdm( diff --git a/arccnet/data_generation/timeseries/sdo_processing.py b/arccnet/data_generation/timeseries/sdo_processing.py index 18efa867..79439797 100644 --- a/arccnet/data_generation/timeseries/sdo_processing.py +++ b/arccnet/data_generation/timeseries/sdo_processing.py @@ -55,7 +55,7 @@ ] -def bad_qry(qry, data_path, name): +def bad_query(qry, data_path, name): r""" Logs bad drms queries to document for future reference. @@ -64,7 +64,7 @@ def bad_qry(qry, data_path, name): qry: `str` Drms query which returns an empty dataframe. data_path : `str` - Path to arccnet data. + Path to arccnet level 04 data. name: `str` Log filename. """ @@ -155,7 +155,7 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim: srs["srs_date"] = srs["target_time"].value srs["srs_date"] = [date.split("T")[0] for date in srs["srs_date"]] srs["srs_end_time"] = [(Time(time) + duration * u.hour) for time in srs["target_time"]] - print("Parsing Flares") + logging.info("Parsing Flares") flares["start_time"] = [time - (duration + 1) * u.hour for time in flares["start_time"]] flares["start_time"].format = "fits" flares["tb_date"] = flares["start_time"].value @@ -175,7 +175,7 @@ def read_data(hek_path: str, srs_path: str, size: int, duration: int, long_lim: for lat, lon, t_time in zip(srs["latitude"], srs["longitude"], srs["target_time"]) ] - print("Parsing Active Regions") + logging.info("Parsing Active Regions") ar_cat, fl_cat = [], [] for ar in srs: erl_time = Time(ar["target_time"]) - (duration + 1) * u.hour @@ -473,10 +473,10 @@ def hmi_rec_find(qstr, keys, retries, sample, cont=False): count = 0 qry = client.query(ds=qstr, key=keys) if qry.empty: - print("Bad Query - HMI") time = sunpy.time.parse_time(re.search(r"\[(.*?)\]", qstr).group(1)) qry["QUALITY"] = [10000] - bad_qry(qry, data_path, qry_log) + logging.warning("Bad Query - HMI") + bad_query(qry, data_path, qry_log) else: time = sunpy.time.parse_time(qry["T_REC"].values[0]) @@ -485,7 +485,8 @@ def hmi_rec_find(qstr, keys, retries, sample, cont=False): if qry.empty: time = sunpy.time.parse_time(re.search(r"\[(.*?)\]", qstr).group(1)) qry["QUALITY"] = [10000] - bad_qry(qry, data_path, qry_log) + logging.warning("Bad Query - HMI") + bad_query(qry, data_path, qry_log) time = change_time(time, sample) count += 1 return qry["*recnum*"].values[0] @@ -511,7 +512,6 @@ def aia_rec_find(qstr, keys, retries, time_add): retry = 0 qry = client.query(ds=qstr, key=keys) qstr_head = qstr.split("[")[0] - print(qry) if not qry.empty: time, wvl = qry["T_REC"].values[0][0:-1], qry["WAVELNTH"].values[0] if wvl == "4500": @@ -523,8 +523,8 @@ def aia_rec_find(qstr, keys, retries, time_add): if qry["QUALITY"].values[0] == 0: return qry["FSN"].values else: - print("Bad Query - AIA") - bad_qry(qry, data_path, qry_log) + logging.warning("Bad Query - AIA") + bad_query(qry, data_path, qry_log) def l1_file_save(export, query, path): diff --git a/arccnet/visualisation/data.py b/arccnet/visualisation/data.py index 1033fecb..9e314397 100644 --- a/arccnet/visualisation/data.py +++ b/arccnet/visualisation/data.py @@ -532,10 +532,8 @@ def mosaic_plot(hmi, name, file, nrows, ncols, wvls, table, path): ax.imshow(np.sqrt(aia_map.data), cmap=cmap) ax.text(0.05, 0.05, f"{wv} - {aia_map.date}", color="w", transform=ax.transAxes, fontsize=5) except IndexError: - aia_map = np.zeros(hmi_map.data.shape) ax = fig.add_subplot(nrows, ncols, i + 1) - ax.imshow(np.sqrt(aia_map.data), cmap=cmap) - ax.text(0.05, 0.05, f"{wv} - MISSING", color="w", transform=ax.transAxes, fontsize=5) + ax.text(0.05, 0.05, f"{wv} - MISSING", color="black", transform=ax.transAxes, fontsize=5) else: ax = fig.add_subplot(nrows, ncols, i + 1) @@ -585,6 +583,8 @@ def mosaic_animate(base_dir, name): for file in sorted_files: frame = cv2.imread(file) out.write(frame) + # Deletes frame after being added to animation to save data on unneeded frames. + os.remove(file) out.release() print(f"Video saved to {output_file}") return output_file