Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions arccnet/catalogs/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import numpy as np

from astropy.table import QTable

from arccnet.data_generation.utils.utils import grouped_stratified_split

# CLI to tar and compress
# tar -cf - fits quicklook cutout-magnetic-catalog-v20231027.parq cutout-mcintosh-catalog-v20231027.parq | pigz > arccnet-cutout-dataset-v20231027.tar.gz


def create_cutout_classification_datasets(cutout_catalog_path, random_seed=42):
# Set seed
np.random.seed(random_seed)

cutout_cat = QTable.read(cutout_catalog_path)

filtered_cat = cutout_cat[~(cutout_cat["filtered_mdi"] | cutout_cat["filtered_hmi"])]

# Can only convert 1D to pands
cols = [name for name in filtered_cat.colnames if len(filtered_cat[name].shape) <= 1]

filtered_cat_df = filtered_cat[cols].to_pandas()

# Make new QS and PLG classes in both magnetic and hale
filtered_cat_df.loc[filtered_cat_df.region_type == "QS", "magnetic_class"] = "QS"
filtered_cat_df.loc[filtered_cat_df.region_type == "QS", "mcintosh_class"] = "QS"

filtered_cat_df.loc[filtered_cat_df.region_type == "IA", "magnetic_class"] = "PLG" # plage
filtered_cat_df.loc[filtered_cat_df.region_type == "IA", "mcintosh_class"] = "PLG"

# Will only tar up the local fits not source so need to change to be relative to that folder
def make_paths_relative(cat, col):
relpahts = ["/".join(p.split("/")[-2:]) for p in cat[col][~cat[col].mask]]
cat[col][~cat[col].mask] = relpahts

make_paths_relative(filtered_cat, "path_image_cutout_hmi")
make_paths_relative(filtered_cat, "quicklook_path_hmi")
make_paths_relative(filtered_cat, "path_image_cutout_mdi")
make_paths_relative(filtered_cat, "quicklook_path_mdi")

# Make train / test split
mag_train, mag_test = grouped_stratified_split(
filtered_cat_df, group_col="number", class_col="magnetic_class", train_size=0.8, test_size=0.2
)
hale_train, hale_test = grouped_stratified_split(
filtered_cat_df, group_col="number", class_col="mcintosh_class", train_size=0.8, test_size=0.2
)

mag_train_cat = filtered_cat[mag_train]
# mag_test_cat = filtered_cat[mag_test]

hale_train_cat = filtered_cat[hale_train]
# hale_test_cat = filtered_cat[hale_test]

mag_train_cat.write("cutout-magnetic-catalog-v20231027.parq")
hale_train_cat.write("cutout-mcintosh-catalog-v20231027.parq")


# CLI to tar and compress
# tar -cf - fits quicklook cutout-magnetic-catalog-v20231027.parq cutout-mcintosh-catalog-v20231027.parq | pigz > arccnet-cutout-dataset-v20231027.tar.gz


def create_detection_datasets(detection_catalog):
np.random.seed(42)

extraction_cat = QTable.read(detection_catalog)
extraction_cat.rename_column("NOAA", "number")

# Can't convert to pandas
cols = extraction_cat.colnames
[cols.remove(n) for n in ["top_right_cutout", "bottom_left_cutout"]]

extraction_df = extraction_cat.to_pandas()

# Will only tar up the local fits not source so need to change to be relative to that folder
# def make_paths_relative(cat, col):
# relpahts = ["/".join(p.split("/")[-2:]) for p in cat[col]]
# cat[col] = relpahts
#
# make_paths_relative(extraction_cat, "processed_path")

# Make train / test split
mag_train, mag_test = grouped_stratified_split(extraction_df, group_col="number", class_col="magnetic_class")
hale_train, hale_test = grouped_stratified_split(extraction_df, group_col="number", class_col="mcintosh_class")

mag_train_cat = extraction_cat[mag_train]
extraction_cat[mag_test]

hale_train_cat = extraction_cat[hale_train]
extraction_cat[hale_test]

mag_train_cat.write("cutout-magnetic-catalog-v20231027.parq")
hale_train_cat.write("cutout-mcintosh-catalog-v20231027.parq")


# CLI to tar and compress
# tar -cf - fits quicklook cutout-magnetic-catalog-v20231027.parq cutout-mcintosh-catalog-v20231027.parq | pigz > arccnet-cutout-dataset-v20231027.tar.gz
13 changes: 6 additions & 7 deletions arccnet/catalogs/flares/helio.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from sunpy.net.helio import HECResponse
from sunpy.time import TimeRange

import astropy.units as u
from astropy.table import QTable, vstack
from astropy.time import Time

Expand Down Expand Up @@ -87,12 +86,12 @@ def search(

# There is a hard server side limit of 20,000 records, making many small queries will be blocked so use
# different size windows for different time periods so for 2022 on use 15 day intervals
years = [tr.end.to_datetime().year for tr in windows]
if 2022 in years:
first_2022 = years.index(2022)
tr_2022_plus = TimeRange(windows[first_2022].start, windows[first_2022 + 1].end)
new_windows = tr_2022_plus.window(5 * u.day, 5 * u.day)
windows = windows[: first_2022 - 1] + new_windows + windows[first_2022 + 1 :]
# years = [tr.end.to_datetime().year for tr in windows]
# if 2022 in years:
# first_2022 = years.index(2022)
# tr_2022_plus = TimeRange(windows[first_2022].start, windows[first_2022 + 1].end)
# new_windows = tr_2022_plus.window(5 * u.day, 5 * u.day)
# windows = windows[: first_2022 - 1] + new_windows + windows[first_2022 + 1 :]

flares = []
for i, window in enumerate(windows):
Expand Down
Loading