Skip to content
This repository was archived by the owner on Jun 11, 2024. It is now read-only.

Commit 7480c15

Browse files
committed
formated code
1 parent 22f4def commit 7480c15

File tree

5 files changed

+121
-62
lines changed

5 files changed

+121
-62
lines changed

nwp/excarta/merge_excarta.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,19 @@
99
import zarr
1010
import ocf_blosc2
1111

12+
1213
def merge_zarr_files(zarr_path, merged_zarr_path):
1314
# Collect paths of Zarr files in the specified directory
14-
zarr_files = [os.path.join(zarr_path, file) for file in os.listdir(zarr_path) if file.endswith('.zarr')]
15+
zarr_files = [
16+
os.path.join(zarr_path, file)
17+
for file in os.listdir(zarr_path)
18+
if file.endswith(".zarr")
19+
]
1520

1621
print("1")
1722
# Open the first Zarr file to create the initial dataset
1823
merged_ds = xr.open_zarr(zarr_files[0])
19-
24+
2025
print("2")
2126

2227
# Define the specific range of x and y coordinates
@@ -30,25 +35,20 @@ def merge_zarr_files(zarr_path, merged_zarr_path):
3035

3136
# ds_filt = ds.sel(x=slice(*x_range), y=slice(*y_range))
3237
merged_ds = merged_ds.combine_first(ds_filt)
33-
38+
3439
print("3")
3540

3641
# Rechunk the merged dataset
3742
merged_ds = merged_ds.chunk(chunks={"init_time": 10, "x": 100, "y": 100})
38-
39-
print("4")
40-
4143

44+
print("4")
4245

43-
4446
print(merged_ds)
4547

4648
# Save the merged dataset as a new Zarr file
4749
merged_ds.to_zarr(merged_zarr_path)
48-
50+
4951
print("5")
50-
51-
5252

5353

5454
# Specify the path where the independent Zarr files are located
@@ -59,4 +59,3 @@ def merge_zarr_files(zarr_path, merged_zarr_path):
5959

6060
# Merge the Zarr files
6161
merge_zarr_files(zarr_path, merged_zarr_path)
62-

nwp/excarta/parse_excarta_monthly.py

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
#Low memory script
1+
# Low memory script
22
import os
33
from datetime import datetime
44
import pandas as pd
55
import xarray as xr
66
import argparse
77
import pathlib
88

9+
910
def _parse_args():
1011
parser = argparse.ArgumentParser()
1112
parser.add_argument("output", type=pathlib.Path, help="Output zarr file")
@@ -14,28 +15,43 @@ def _parse_args():
1415
return parser.parse_args()
1516

1617

17-
1818
def data_loader(folder_path, month_to_process):
1919
"""
2020
Loads and transforms data from CSV files in the given folder_path and directly convert each DataFrame into an xarray Dataset.
2121
Only process files for the month 'YYYYMM' given by month_to_process
2222
"""
2323
month_to_process = datetime.strptime(month_to_process, "%Y%m")
24-
column_names = ['DateTimeUTC', 'LocationId', 'Latitude', 'Longitude', 'dni', 'dhi', 'ghi']
24+
column_names = [
25+
"DateTimeUTC",
26+
"LocationId",
27+
"Latitude",
28+
"Longitude",
29+
"dni",
30+
"dhi",
31+
"ghi",
32+
]
2533
files = os.listdir(folder_path)
2634
datasets = []
2735

2836
for filename in files:
2937
if filename.endswith(".csv") and not filename.startswith("._"):
3038
file_datetime = datetime.strptime(filename[:-4], "%Y%m%d%H")
3139

32-
if (file_datetime.year == month_to_process.year) and (file_datetime.month == month_to_process.month):
33-
40+
if (file_datetime.year == month_to_process.year) and (
41+
file_datetime.month == month_to_process.month
42+
):
3443
file_path = os.path.join(folder_path, filename)
35-
df = pd.read_csv(file_path, header=None, names=column_names, parse_dates=['DateTimeUTC'])
36-
37-
df['step'] = (df['DateTimeUTC'] - file_datetime).dt.total_seconds() / 3600 # convert timedelta to hours
38-
df['init_time'] = file_datetime
44+
df = pd.read_csv(
45+
file_path,
46+
header=None,
47+
names=column_names,
48+
parse_dates=["DateTimeUTC"],
49+
)
50+
51+
df["step"] = (
52+
df["DateTimeUTC"] - file_datetime
53+
).dt.total_seconds() / 3600 # convert timedelta to hours
54+
df["init_time"] = file_datetime
3955

4056
# Convert the dataframe to an xarray Dataset and append to the list
4157
ds = xr.Dataset.from_dataframe(df)
@@ -62,26 +78,26 @@ def pdtocdf(datasets):
6278
"""
6379
Processes the xarray Datasets and merges them.
6480
"""
65-
66-
datasets = [ds.set_index(index=['init_time', 'step', 'Latitude', 'Longitude']) for ds in datasets]
6781

68-
ds = xr.concat(datasets, dim='index')
82+
datasets = [
83+
ds.set_index(index=["init_time", "step", "Latitude", "Longitude"])
84+
for ds in datasets
85+
]
86+
87+
ds = xr.concat(datasets, dim="index")
6988

7089
# # Define the specific range of x and y coordinates to filter the data on
7190
# x_range = (-10, 2) # Example x coordinate range
7291
# y_range = (49, 59) # Example y coordinate range
7392

7493
ds = ds.rename({"Latitude": "y", "Longitude": "x"})
75-
76-
7794

7895
var_names = ds.data_vars
7996
d2 = xr.concat([ds[v] for v in var_names], dim="variable")
8097
d2 = d2.assign_coords(variable=("variable", var_names))
8198
ds = xr.Dataset(dict(value=d2))
82-
ds = ds.sortby('step')
83-
ds = ds.sortby('init_time')
84-
99+
ds = ds.sortby("step")
100+
ds = ds.sortby("init_time")
85101

86102
return ds
87103

@@ -103,7 +119,7 @@ def main():
103119
# ds = ds.sel(x=slice(float(-10), float(2)), y=slice(float(49), float(59)))
104120

105121
print(ds)
106-
ds = ds.unstack('index')
122+
ds = ds.unstack("index")
107123

108124
ds_filt = ds.sel(x=slice(float(13), float(15)), y=slice(float(35), float(37)))
109125

@@ -118,4 +134,4 @@ def main():
118134

119135
# Check if script is being run directly
120136
if __name__ == "__main__":
121-
main()
137+
main()

nwp/excarta/parse_excarta_to_output.py

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,32 @@ def data_loader(folder_path):
1818
"""
1919
Loads and transforms data from CSV files in the given folder_path.
2020
"""
21-
column_names = ['DateTimeUTC', 'LocationId', 'Latitude', 'Longitude', 'dni', 'dhi', 'ghi']
21+
column_names = [
22+
"DateTimeUTC",
23+
"LocationId",
24+
"Latitude",
25+
"Longitude",
26+
"dni",
27+
"dhi",
28+
"ghi",
29+
]
2230
files = os.listdir(folder_path)
2331
dfs = []
2432

2533
for filename in files:
2634
if filename.endswith(".csv") and not filename.startswith("._"):
2735
file_path = os.path.join(folder_path, filename)
28-
df = pd.read_csv(file_path, header=None, names=column_names, parse_dates=['DateTimeUTC'])
36+
df = pd.read_csv(
37+
file_path, header=None, names=column_names, parse_dates=["DateTimeUTC"]
38+
)
2939

30-
datetime_str = filename[:-4]
40+
datetime_str = filename[:-4]
3141
datetime_obj = datetime.strptime(datetime_str, "%Y%m%d%H")
3242

33-
df['step'] = (df['DateTimeUTC'] - datetime_obj).dt.total_seconds() / 3600 # convert timedelta to hours
34-
df['init_time'] = datetime_obj
43+
df["step"] = (
44+
df["DateTimeUTC"] - datetime_obj
45+
).dt.total_seconds() / 3600 # convert timedelta to hours
46+
df["init_time"] = datetime_obj
3547
dfs.append(df)
3648

3749
return dfs
@@ -43,7 +55,6 @@ def load_data_from_all_years(parent_folder_path):
4355
"""
4456
all_dataframes = []
4557

46-
4758
# Actual date range is 2018 to 2022 (for in range use (2018,2023))
4859
for year in range(2018, 2019):
4960
folder_path = os.path.join(parent_folder_path, str(year))
@@ -60,15 +71,17 @@ def pdtocdf(dfs):
6071
merged_df = pd.concat(dfs, ignore_index=True)
6172

6273
ds = xr.Dataset.from_dataframe(merged_df)
63-
ds = ds.set_index(index=['init_time', 'step','Latitude','Longitude']).unstack('index')
74+
ds = ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]).unstack(
75+
"index"
76+
)
6477
ds = ds.drop_vars(["LocationId", "DateTimeUTC"])
6578

6679
var_names = ds.data_vars
6780
d2 = xr.concat([ds[v] for v in var_names], dim="variable")
6881
d2 = d2.assign_coords(variable=("variable", var_names))
6982
ds = xr.Dataset(dict(value=d2))
70-
ds = ds.sortby('step')
71-
ds = ds.sortby('init_time')
83+
ds = ds.sortby("step")
84+
ds = ds.sortby("init_time")
7285
ds = ds.rename({"Latitude": "y", "Longitude": "x"})
7386

7487
return ds

nwp/excarta/parse_excarta_to_output_low_mem.py

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#Low memory script
1+
# Low memory script
22
import os
33
from datetime import datetime
44
import pandas as pd
@@ -17,19 +17,31 @@ def data_loader(folder_path):
1717
"""
1818
Loads and transforms data from CSV files in the given folder_path and directly convert each DataFrame into an xarray Dataset.
1919
"""
20-
column_names = ['DateTimeUTC', 'LocationId', 'Latitude', 'Longitude', 'dni', 'dhi', 'ghi']
20+
column_names = [
21+
"DateTimeUTC",
22+
"LocationId",
23+
"Latitude",
24+
"Longitude",
25+
"dni",
26+
"dhi",
27+
"ghi",
28+
]
2129
files = os.listdir(folder_path)
2230
datasets = []
2331

2432
for filename in files:
2533
if filename.endswith(".csv") and not filename.startswith("._"):
2634
file_path = os.path.join(folder_path, filename)
2735

28-
df = pd.read_csv(file_path, header=None, names=column_names, parse_dates=['DateTimeUTC'])
36+
df = pd.read_csv(
37+
file_path, header=None, names=column_names, parse_dates=["DateTimeUTC"]
38+
)
2939
datetime_str = filename[:-4]
3040
datetime_obj = datetime.strptime(datetime_str, "%Y%m%d%H")
31-
df['step'] = (df['DateTimeUTC'] - datetime_obj).dt.total_seconds() / 3600 # convert timedelta to hours
32-
df['init_time'] = datetime_obj
41+
df["step"] = (
42+
df["DateTimeUTC"] - datetime_obj
43+
).dt.total_seconds() / 3600 # convert timedelta to hours
44+
df["init_time"] = datetime_obj
3345

3446
# Convert the dataframe to an xarray Dataset and append to the list
3547
ds = xr.Dataset.from_dataframe(df)
@@ -55,26 +67,30 @@ def pdtocdf(datasets):
5567
Processes the xarray Datasets and merges them.
5668
"""
5769
print(datasets)
58-
# ds = xr.merge(datasets)
70+
# ds = xr.merge(datasets)
5971

60-
datasets = [ds.set_index(index=['init_time', 'step', 'Latitude', 'Longitude']) for ds in datasets]
72+
datasets = [
73+
ds.set_index(index=["init_time", "step", "Latitude", "Longitude"])
74+
for ds in datasets
75+
]
6176

62-
ds = xr.concat(datasets, dim='index')
77+
ds = xr.concat(datasets, dim="index")
6378

6479
# Going to unstack and then combine in a different script
6580
# Get rid of the index dimension and just keep the desired ones
6681
# ds = ds.unstack('index')
67-
82+
6883
var_names = ds.data_vars
6984
d2 = xr.concat([ds[v] for v in var_names], dim="variable")
7085
d2 = d2.assign_coords(variable=("variable", var_names))
7186
ds = xr.Dataset(dict(value=d2))
72-
ds = ds.sortby('step')
73-
ds = ds.sortby('init_time')
87+
ds = ds.sortby("step")
88+
ds = ds.sortby("init_time")
7489
ds = ds.rename({"Latitude": "y", "Longitude": "x"})
7590

7691
return ds
7792

93+
7894
def main():
7995
args = _parse_args()
8096

@@ -87,13 +103,11 @@ def main():
87103

88104
print(ds)
89105

90-
ds = ds.unstack('index')
106+
ds = ds.unstack("index")
91107

92108
ds.to_zarr(args.output)
93-
94-
95109

96110

97111
# Check if script is being run directly
98112
if __name__ == "__main__":
99-
main()
113+
main()

0 commit comments

Comments
 (0)