Skip to content
This repository was archived by the owner on Jun 11, 2024. It is now read-only.

Commit c977e1f

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent e70e332 commit c977e1f

File tree

6 files changed

+91
-84
lines changed

6 files changed

+91
-84
lines changed

nwp/excarta/merge_excarta.py

Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,19 @@
11
# import libs
2-
import xarray as xr
3-
import pandas as pd
4-
import numpy as np
5-
import datetime
62
import os
7-
import pathlib as Path
8-
from datetime import datetime
9-
import zarr
10-
import ocf_blosc2
3+
4+
import xarray as xr
5+
116

127
def merge_zarr_files(zarr_path, merged_zarr_path):
138
# Collect paths of Zarr files in the specified directory
14-
zarr_files = [os.path.join(zarr_path, file) for file in os.listdir(zarr_path) if file.endswith('.zarr')]
9+
zarr_files = [
10+
os.path.join(zarr_path, file) for file in os.listdir(zarr_path) if file.endswith(".zarr")
11+
]
1512

1613
print("1")
1714
# Open the first Zarr file to create the initial dataset
1815
merged_ds = xr.open_zarr(zarr_files[0])
19-
16+
2017
print("2")
2118

2219
# Define the specific range of x and y coordinates
@@ -25,30 +22,25 @@ def merge_zarr_files(zarr_path, merged_zarr_path):
2522

2623
# Iterate over the remaining Zarr files and merge them into the initial dataset
2724
for file in zarr_files[1:]:
28-
ds = xr.open_zarr(file)
25+
xr.open_zarr(file)
2926
print(file)
3027

3128
# ds_filt = ds.sel(x=slice(*x_range), y=slice(*y_range))
3229
merged_ds = merged_ds.combine_first(ds_filt)
33-
30+
3431
print("3")
3532

3633
# Rechunk the merged dataset
3734
merged_ds = merged_ds.chunk(chunks={"init_time": 10, "x": 100, "y": 100})
38-
39-
print("4")
40-
4135

36+
print("4")
4237

43-
4438
print(merged_ds)
4539

4640
# Save the merged dataset as a new Zarr file
4741
merged_ds.to_zarr(merged_zarr_path)
48-
42+
4943
print("5")
50-
51-
5244

5345

5446
# Specify the path where the independent Zarr files are located
@@ -59,4 +51,3 @@ def merge_zarr_files(zarr_path, merged_zarr_path):
5951

6052
# Merge the Zarr files
6153
merge_zarr_files(zarr_path, merged_zarr_path)
62-

nwp/excarta/parse_excarta_monthly.py

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1-
#Low memory script
1+
# Low memory script
2+
import argparse
23
import os
4+
import pathlib
35
from datetime import datetime
6+
47
import pandas as pd
58
import xarray as xr
6-
import argparse
7-
import pathlib
9+
810

911
def _parse_args():
1012
parser = argparse.ArgumentParser()
@@ -14,28 +16,32 @@ def _parse_args():
1416
return parser.parse_args()
1517

1618

17-
1819
def data_loader(folder_path, month_to_process):
1920
"""
2021
Loads and transforms data from CSV files in the given folder_path and directly convert each DataFrame into an xarray Dataset.
2122
Only process files for the month 'YYYYMM' given by month_to_process
2223
"""
2324
month_to_process = datetime.strptime(month_to_process, "%Y%m")
24-
column_names = ['DateTimeUTC', 'LocationId', 'Latitude', 'Longitude', 'dni', 'dhi', 'ghi']
25+
column_names = ["DateTimeUTC", "LocationId", "Latitude", "Longitude", "dni", "dhi", "ghi"]
2526
files = os.listdir(folder_path)
2627
datasets = []
2728

2829
for filename in files:
2930
if filename.endswith(".csv") and not filename.startswith("._"):
3031
file_datetime = datetime.strptime(filename[:-4], "%Y%m%d%H")
3132

32-
if (file_datetime.year == month_to_process.year) and (file_datetime.month == month_to_process.month):
33-
33+
if (file_datetime.year == month_to_process.year) and (
34+
file_datetime.month == month_to_process.month
35+
):
3436
file_path = os.path.join(folder_path, filename)
35-
df = pd.read_csv(file_path, header=None, names=column_names, parse_dates=['DateTimeUTC'])
36-
37-
df['step'] = (df['DateTimeUTC'] - file_datetime).dt.total_seconds() / 3600 # convert timedelta to hours
38-
df['init_time'] = file_datetime
37+
df = pd.read_csv(
38+
file_path, header=None, names=column_names, parse_dates=["DateTimeUTC"]
39+
)
40+
41+
df["step"] = (
42+
df["DateTimeUTC"] - file_datetime
43+
).dt.total_seconds() / 3600 # convert timedelta to hours
44+
df["init_time"] = file_datetime
3945

4046
# Convert the dataframe to an xarray Dataset and append to the list
4147
ds = xr.Dataset.from_dataframe(df)
@@ -62,26 +68,25 @@ def pdtocdf(datasets):
6268
"""
6369
Processes the xarray Datasets and merges them.
6470
"""
65-
66-
datasets = [ds.set_index(index=['init_time', 'step', 'Latitude', 'Longitude']) for ds in datasets]
6771

68-
ds = xr.concat(datasets, dim='index')
72+
datasets = [
73+
ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]) for ds in datasets
74+
]
75+
76+
ds = xr.concat(datasets, dim="index")
6977

7078
# # Define the specific range of x and y coordinates to filter the data on
7179
# x_range = (-10, 2) # Example x coordinate range
7280
# y_range = (49, 59) # Example y coordinate range
7381

7482
ds = ds.rename({"Latitude": "y", "Longitude": "x"})
75-
76-
7783

7884
var_names = ds.data_vars
7985
d2 = xr.concat([ds[v] for v in var_names], dim="variable")
8086
d2 = d2.assign_coords(variable=("variable", var_names))
8187
ds = xr.Dataset(dict(value=d2))
82-
ds = ds.sortby('step')
83-
ds = ds.sortby('init_time')
84-
88+
ds = ds.sortby("step")
89+
ds = ds.sortby("init_time")
8590

8691
return ds
8792

@@ -93,7 +98,9 @@ def main():
9398
raise RuntimeError(f'Output file "{args.output}" already exist')
9499

95100
PATH = "/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/sr_UK_Malta_full/solar_data"
96-
month_to_process = f"{args.year}{args.month:02d}" # combine year and month arguments into the required format
101+
month_to_process = (
102+
f"{args.year}{args.month:02d}" # combine year and month arguments into the required format
103+
)
97104
datasets = load_data_from_all_years(PATH, month_to_process)
98105
ds = pdtocdf(datasets)
99106

@@ -103,7 +110,7 @@ def main():
103110
# ds = ds.sel(x=slice(float(-10), float(2)), y=slice(float(49), float(59)))
104111

105112
print(ds)
106-
ds = ds.unstack('index')
113+
ds = ds.unstack("index")
107114

108115
# data is as UK and Malta all together so splitting
109116

nwp/excarta/parse_excarta_to_output.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
import xarray as xr
2-
import pandas as pd
3-
import numpy as np
1+
import argparse
42
import datetime
53
import os
64
import pathlib
75
from datetime import datetime
8-
import argparse
6+
7+
import pandas as pd
8+
import xarray as xr
99

1010

1111
def _parse_args():
@@ -18,20 +18,24 @@ def data_loader(folder_path):
1818
"""
1919
Loads and transforms data from CSV files in the given folder_path.
2020
"""
21-
column_names = ['DateTimeUTC', 'LocationId', 'Latitude', 'Longitude', 'dni', 'dhi', 'ghi']
21+
column_names = ["DateTimeUTC", "LocationId", "Latitude", "Longitude", "dni", "dhi", "ghi"]
2222
files = os.listdir(folder_path)
2323
dfs = []
2424

2525
for filename in files:
2626
if filename.endswith(".csv") and not filename.startswith("._"):
2727
file_path = os.path.join(folder_path, filename)
28-
df = pd.read_csv(file_path, header=None, names=column_names, parse_dates=['DateTimeUTC'])
28+
df = pd.read_csv(
29+
file_path, header=None, names=column_names, parse_dates=["DateTimeUTC"]
30+
)
2931

30-
datetime_str = filename[:-4]
32+
datetime_str = filename[:-4]
3133
datetime_obj = datetime.strptime(datetime_str, "%Y%m%d%H")
3234

33-
df['step'] = (df['DateTimeUTC'] - datetime_obj).dt.total_seconds() / 3600 # convert timedelta to hours
34-
df['init_time'] = datetime_obj
35+
df["step"] = (
36+
df["DateTimeUTC"] - datetime_obj
37+
).dt.total_seconds() / 3600 # convert timedelta to hours
38+
df["init_time"] = datetime_obj
3539
dfs.append(df)
3640

3741
return dfs
@@ -43,7 +47,6 @@ def load_data_from_all_years(parent_folder_path):
4347
"""
4448
all_dataframes = []
4549

46-
4750
# Actual date range is 2018 to 2022 (for in range use (2018,2023))
4851
for year in range(2018, 2019):
4952
folder_path = os.path.join(parent_folder_path, str(year))
@@ -60,15 +63,15 @@ def pdtocdf(dfs):
6063
merged_df = pd.concat(dfs, ignore_index=True)
6164

6265
ds = xr.Dataset.from_dataframe(merged_df)
63-
ds = ds.set_index(index=['init_time', 'step','Latitude','Longitude']).unstack('index')
66+
ds = ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]).unstack("index")
6467
ds = ds.drop_vars(["LocationId", "DateTimeUTC"])
6568

6669
var_names = ds.data_vars
6770
d2 = xr.concat([ds[v] for v in var_names], dim="variable")
6871
d2 = d2.assign_coords(variable=("variable", var_names))
6972
ds = xr.Dataset(dict(value=d2))
70-
ds = ds.sortby('step')
71-
ds = ds.sortby('init_time')
73+
ds = ds.sortby("step")
74+
ds = ds.sortby("init_time")
7275
ds = ds.rename({"Latitude": "y", "Longitude": "x"})
7376

7477
return ds

nwp/excarta/parse_excarta_to_output_low_mem.py

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1-
#Low memory script
1+
# Low memory script
2+
import argparse
23
import os
4+
import pathlib
35
from datetime import datetime
6+
47
import pandas as pd
58
import xarray as xr
6-
import argparse
7-
import pathlib
89

910

1011
def _parse_args():
@@ -17,19 +18,23 @@ def data_loader(folder_path):
1718
"""
1819
Loads and transforms data from CSV files in the given folder_path and directly convert each DataFrame into an xarray Dataset.
1920
"""
20-
column_names = ['DateTimeUTC', 'LocationId', 'Latitude', 'Longitude', 'dni', 'dhi', 'ghi']
21+
column_names = ["DateTimeUTC", "LocationId", "Latitude", "Longitude", "dni", "dhi", "ghi"]
2122
files = os.listdir(folder_path)
2223
datasets = []
2324

2425
for filename in files:
2526
if filename.endswith(".csv") and not filename.startswith("._"):
2627
file_path = os.path.join(folder_path, filename)
2728

28-
df = pd.read_csv(file_path, header=None, names=column_names, parse_dates=['DateTimeUTC'])
29+
df = pd.read_csv(
30+
file_path, header=None, names=column_names, parse_dates=["DateTimeUTC"]
31+
)
2932
datetime_str = filename[:-4]
3033
datetime_obj = datetime.strptime(datetime_str, "%Y%m%d%H")
31-
df['step'] = (df['DateTimeUTC'] - datetime_obj).dt.total_seconds() / 3600 # convert timedelta to hours
32-
df['init_time'] = datetime_obj
34+
df["step"] = (
35+
df["DateTimeUTC"] - datetime_obj
36+
).dt.total_seconds() / 3600 # convert timedelta to hours
37+
df["init_time"] = datetime_obj
3338

3439
# Convert the dataframe to an xarray Dataset and append to the list
3540
ds = xr.Dataset.from_dataframe(df)
@@ -55,26 +60,29 @@ def pdtocdf(datasets):
5560
Processes the xarray Datasets and merges them.
5661
"""
5762
print(datasets)
58-
# ds = xr.merge(datasets)
63+
# ds = xr.merge(datasets)
5964

60-
datasets = [ds.set_index(index=['init_time', 'step', 'Latitude', 'Longitude']) for ds in datasets]
65+
datasets = [
66+
ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]) for ds in datasets
67+
]
6168

62-
ds = xr.concat(datasets, dim='index')
69+
ds = xr.concat(datasets, dim="index")
6370

6471
# Going to unstack and then combine in a different script
6572
# Get rid of the index dimension and just keep the desired ones
6673
# ds = ds.unstack('index')
67-
74+
6875
var_names = ds.data_vars
6976
d2 = xr.concat([ds[v] for v in var_names], dim="variable")
7077
d2 = d2.assign_coords(variable=("variable", var_names))
7178
ds = xr.Dataset(dict(value=d2))
72-
ds = ds.sortby('step')
73-
ds = ds.sortby('init_time')
79+
ds = ds.sortby("step")
80+
ds = ds.sortby("init_time")
7481
ds = ds.rename({"Latitude": "y", "Longitude": "x"})
7582

7683
return ds
7784

85+
7886
def main():
7987
args = _parse_args()
8088

@@ -87,13 +95,11 @@ def main():
8795

8896
print(ds)
8997

90-
ds = ds.unstack('index')
98+
ds = ds.unstack("index")
9199

92100
ds.to_zarr(args.output)
93-
94-
95101

96102

97103
# Check if script is being run directly
98104
if __name__ == "__main__":
99-
main()
105+
main()

nwp/icon/app.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,13 @@ def download_model_files(runs=None, parent_folder=None, model="global"):
3333
var_2d_list = GLOBAL_VAR2D_LIST
3434
invariant = GLOBAL_INVARIENT_LIST
3535
pressure_levels = GLOBAL_PRESSURE_LEVELS
36-
f_steps = list(range(0, 79)) + list(range(81, 99, 3)) # 4 days
36+
f_steps = list(range(0, 79)) + list(range(81, 99, 3)) # 4 days
3737
else:
3838
var_3d_list = EU_VAR3D_LIST
3939
var_2d_list = EU_VAR2D_LIST
4040
invariant = None
4141
pressure_levels = EU_PRESSURE_LEVELS
42-
f_steps = list(range(0, 79)) + list(range(81, 123, 3)) # 5 days
42+
f_steps = list(range(0, 79)) + list(range(81, 123, 3)) # 5 days
4343
for run in runs:
4444
run_folder = os.path.join(parent_folder, run)
4545
if not os.path.exists(run_folder):
@@ -87,7 +87,7 @@ def process_model_files(
8787
)
8888
lons = lon_ds.tlon.values
8989
lats = lat_ds.tlat.values
90-
f_steps = list(range(0, 79)) + list(range(81, 99, 3)) # 4 days
90+
f_steps = list(range(0, 79)) + list(range(81, 99, 3)) # 4 days
9191
else:
9292
var_base = "icon-eu_europe_regular-lat-lon"
9393
var_3d_list = EU_VAR3D_LIST

0 commit comments

Comments
 (0)