Skip to content
This repository was archived by the owner on Jun 11, 2024. It is now read-only.

Commit 3f358da

Browse files
committed
move notebooks to notebooks folder
2 parents 7480c15 + ed1125f commit 3f358da

File tree

6 files changed

+66
-28
lines changed

6 files changed

+66
-28
lines changed

nwp/excarta/merge_excarta.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,20 @@
11
# import libs
2-
import xarray as xr
3-
import pandas as pd
4-
import numpy as np
5-
import datetime
62
import os
7-
import pathlib as Path
8-
from datetime import datetime
9-
import zarr
10-
import ocf_blosc2
3+
4+
import xarray as xr
5+
116

127

138
def merge_zarr_files(zarr_path, merged_zarr_path):
149
# Collect paths of Zarr files in the specified directory
1510
zarr_files = [
11+
<<<<<<< HEAD
1612
os.path.join(zarr_path, file)
1713
for file in os.listdir(zarr_path)
1814
if file.endswith(".zarr")
15+
=======
16+
os.path.join(zarr_path, file) for file in os.listdir(zarr_path) if file.endswith(".zarr")
17+
>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
1918
]
2019

2120
print("1")
@@ -30,7 +29,7 @@ def merge_zarr_files(zarr_path, merged_zarr_path):
3029

3130
# Iterate over the remaining Zarr files and merge them into the initial dataset
3231
for file in zarr_files[1:]:
33-
ds = xr.open_zarr(file)
32+
xr.open_zarr(file)
3433
print(file)
3534

3635
# ds_filt = ds.sel(x=slice(*x_range), y=slice(*y_range))

nwp/excarta/parse_excarta_monthly.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
# Low memory script
2+
<<<<<<< HEAD
3+
=======
4+
import argparse
5+
>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
26
import os
7+
import pathlib
38
from datetime import datetime
9+
410
import pandas as pd
511
import xarray as xr
6-
import argparse
7-
import pathlib
12+
813

914

1015
def _parse_args():
@@ -21,6 +26,7 @@ def data_loader(folder_path, month_to_process):
2126
Only process files for the month 'YYYYMM' given by month_to_process
2227
"""
2328
month_to_process = datetime.strptime(month_to_process, "%Y%m")
29+
<<<<<<< HEAD
2430
column_names = [
2531
"DateTimeUTC",
2632
"LocationId",
@@ -30,6 +36,9 @@ def data_loader(folder_path, month_to_process):
3036
"dhi",
3137
"ghi",
3238
]
39+
=======
40+
column_names = ["DateTimeUTC", "LocationId", "Latitude", "Longitude", "dni", "dhi", "ghi"]
41+
>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
3342
files = os.listdir(folder_path)
3443
datasets = []
3544

@@ -42,10 +51,14 @@ def data_loader(folder_path, month_to_process):
4251
):
4352
file_path = os.path.join(folder_path, filename)
4453
df = pd.read_csv(
54+
<<<<<<< HEAD
4555
file_path,
4656
header=None,
4757
names=column_names,
4858
parse_dates=["DateTimeUTC"],
59+
=======
60+
file_path, header=None, names=column_names, parse_dates=["DateTimeUTC"]
61+
>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
4962
)
5063

5164
df["step"] = (
@@ -80,8 +93,12 @@ def pdtocdf(datasets):
8093
"""
8194

8295
datasets = [
96+
<<<<<<< HEAD
8397
ds.set_index(index=["init_time", "step", "Latitude", "Longitude"])
8498
for ds in datasets
99+
=======
100+
ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]) for ds in datasets
101+
>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
85102
]
86103

87104
ds = xr.concat(datasets, dim="index")
@@ -109,7 +126,9 @@ def main():
109126
raise RuntimeError(f'Output file "{args.output}" already exist')
110127

111128
PATH = "/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/sr_UK_Malta_full/solar_data"
112-
month_to_process = f"{args.year}{args.month:02d}" # combine year and month arguments into the required format
129+
month_to_process = (
130+
f"{args.year}{args.month:02d}" # combine year and month arguments into the required format
131+
)
113132
datasets = load_data_from_all_years(PATH, month_to_process)
114133
ds = pdtocdf(datasets)
115134

nwp/excarta/parse_excarta_to_output.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
import xarray as xr
2-
import pandas as pd
3-
import numpy as np
1+
import argparse
42
import datetime
53
import os
64
import pathlib
75
from datetime import datetime
8-
import argparse
6+
7+
import pandas as pd
8+
import xarray as xr
99

1010

1111
def _parse_args():
@@ -18,6 +18,7 @@ def data_loader(folder_path):
1818
"""
1919
Loads and transforms data from CSV files in the given folder_path.
2020
"""
21+
<<<<<<< HEAD
2122
column_names = [
2223
"DateTimeUTC",
2324
"LocationId",
@@ -27,6 +28,9 @@ def data_loader(folder_path):
2728
"dhi",
2829
"ghi",
2930
]
31+
=======
32+
column_names = ["DateTimeUTC", "LocationId", "Latitude", "Longitude", "dni", "dhi", "ghi"]
33+
>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
3034
files = os.listdir(folder_path)
3135
dfs = []
3236

@@ -71,9 +75,13 @@ def pdtocdf(dfs):
7175
merged_df = pd.concat(dfs, ignore_index=True)
7276

7377
ds = xr.Dataset.from_dataframe(merged_df)
78+
<<<<<<< HEAD
7479
ds = ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]).unstack(
7580
"index"
7681
)
82+
=======
83+
ds = ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]).unstack("index")
84+
>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
7785
ds = ds.drop_vars(["LocationId", "DateTimeUTC"])
7886

7987
var_names = ds.data_vars

nwp/excarta/parse_excarta_to_output_low_mem.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
# Low memory script
2+
<<<<<<< HEAD
3+
=======
4+
import argparse
5+
>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
26
import os
7+
import pathlib
38
from datetime import datetime
9+
410
import pandas as pd
511
import xarray as xr
6-
import argparse
7-
import pathlib
812

913

1014
def _parse_args():
@@ -17,6 +21,7 @@ def data_loader(folder_path):
1721
"""
1822
Loads and transforms data from CSV files in the given folder_path and directly convert each DataFrame into an xarray Dataset.
1923
"""
24+
<<<<<<< HEAD
2025
column_names = [
2126
"DateTimeUTC",
2227
"LocationId",
@@ -26,6 +31,9 @@ def data_loader(folder_path):
2631
"dhi",
2732
"ghi",
2833
]
34+
=======
35+
column_names = ["DateTimeUTC", "LocationId", "Latitude", "Longitude", "dni", "dhi", "ghi"]
36+
>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
2937
files = os.listdir(folder_path)
3038
datasets = []
3139

@@ -70,8 +78,12 @@ def pdtocdf(datasets):
7078
# ds = xr.merge(datasets)
7179

7280
datasets = [
81+
<<<<<<< HEAD
7382
ds.set_index(index=["init_time", "step", "Latitude", "Longitude"])
7483
for ds in datasets
84+
=======
85+
ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]) for ds in datasets
86+
>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
7587
]
7688

7789
ds = xr.concat(datasets, dim="index")

scripts/convert_icon_archive.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,11 @@
66
77
"""
88

9+
import multiprocessing as mp
910
import os
11+
import subprocess
1012
from glob import glob
13+
from pathlib import Path
1114

1215
import xarray as xr
1316
import zarr
@@ -18,10 +21,7 @@
1821
EU_VAR2D_LIST,
1922
EU_VAR3D_LIST,
2023
)
21-
import subprocess
2224

23-
from pathlib import Path
24-
import multiprocessing as mp
2525

2626
def decompress(full_bzip_filename: Path, temp_pth: Path) -> str:
2727
"""
@@ -38,7 +38,7 @@ def decompress(full_bzip_filename: Path, temp_pth: Path) -> str:
3838
base_nat_filename = os.path.splitext(base_bzip_filename)[0]
3939
full_nat_filename = os.path.join(temp_pth, base_nat_filename)
4040
if os.path.exists(full_nat_filename):
41-
return full_nat_filename # Don't decompress a second time
41+
return full_nat_filename # Don't decompress a second time
4242
with open(full_nat_filename, "wb") as nat_file_handler:
4343
process = subprocess.run(
4444
["pbzip2", "--decompress", "--keep", "--stdout", full_bzip_filename],
@@ -179,8 +179,8 @@ def upload_to_hf(dataset_xr, folder, model="eu", run="00", token=None):
179179
encoding = {var: {"compressor": Blosc2("zstd", clevel=9)} for var in dataset_xr.data_vars}
180180
encoding["time"] = {"units": "nanoseconds since 1970-01-01"}
181181
with zarr.ZipStore(
182-
zarr_path,
183-
mode="w",
182+
zarr_path,
183+
mode="w",
184184
) as store:
185185
dataset_xr.chunk(chunking).to_zarr(store, encoding=encoding, compute=True)
186186
done = False
@@ -189,10 +189,10 @@ def upload_to_hf(dataset_xr, folder, model="eu", run="00", token=None):
189189
api.upload_file(
190190
path_or_fileobj=zarr_path,
191191
path_in_repo=f"data/{dataset_xr.time.dt.year.values}/"
192-
f"{dataset_xr.time.dt.month.values}/"
193-
f"{dataset_xr.time.dt.day.values}/"
194-
f"{dataset_xr.time.dt.year.values}{str(dataset_xr.time.dt.month.values).zfill(2)}{str(dataset_xr.time.dt.day.values).zfill(2)}"
195-
f"_{str(dataset_xr.time.dt.hour.values).zfill(2)}.zarr.zip",
192+
f"{dataset_xr.time.dt.month.values}/"
193+
f"{dataset_xr.time.dt.day.values}/"
194+
f"{dataset_xr.time.dt.year.values}{str(dataset_xr.time.dt.month.values).zfill(2)}{str(dataset_xr.time.dt.day.values).zfill(2)}"
195+
f"_{str(dataset_xr.time.dt.hour.values).zfill(2)}.zarr.zip",
196196
repo_id="openclimatefix/dwd-icon-global"
197197
if model == "global"
198198
else "openclimatefix/dwd-icon-eu",

0 commit comments

Comments
 (0)