[pre-commit.ci] auto fixes from pre-commit.com hooks

pre-commit-ci[bot] · pre-commit-ci[bot] · commit c977e1f5cd00 · 2023-07-18T18:14:23.000Z
for more information, see https://pre-commit.ci
diff --git a/nwp/excarta/merge_excarta.py b/nwp/excarta/merge_excarta.py
@@ -1,22 +1,19 @@
 # import libs
-import xarray as xr
-import pandas as pd
-import numpy as np
-import datetime
 import os
-import pathlib as Path
-from datetime import datetime
-import zarr
-import ocf_blosc2
+
+import xarray as xr
+
 
 def merge_zarr_files(zarr_path, merged_zarr_path):
     # Collect paths of Zarr files in the specified directory
-    zarr_files = [os.path.join(zarr_path, file) for file in os.listdir(zarr_path) if file.endswith('.zarr')]
+    zarr_files = [
+        os.path.join(zarr_path, file) for file in os.listdir(zarr_path) if file.endswith(".zarr")
+    ]
 
     print("1")
     # Open the first Zarr file to create the initial dataset
     merged_ds = xr.open_zarr(zarr_files[0])
-    
+
     print("2")
 
     # Define the specific range of x and y coordinates
@@ -25,30 +22,25 @@ def merge_zarr_files(zarr_path, merged_zarr_path):
 
     # Iterate over the remaining Zarr files and merge them into the initial dataset
     for file in zarr_files[1:]:
-        ds = xr.open_zarr(file)
+        xr.open_zarr(file)
         print(file)
 
         # ds_filt = ds.sel(x=slice(*x_range), y=slice(*y_range))
         merged_ds = merged_ds.combine_first(ds_filt)
-        
+
     print("3")
 
     # Rechunk the merged dataset
     merged_ds = merged_ds.chunk(chunks={"init_time": 10, "x": 100, "y": 100})
-    
-    print("4")
-    
 
+    print("4")
 
-    
     print(merged_ds)
 
     # Save the merged dataset as a new Zarr file
     merged_ds.to_zarr(merged_zarr_path)
-    
+
     print("5")
-    
-    
 
 
 # Specify the path where the independent Zarr files are located
@@ -59,4 +51,3 @@ def merge_zarr_files(zarr_path, merged_zarr_path):
 
 # Merge the Zarr files
 merge_zarr_files(zarr_path, merged_zarr_path)
-
diff --git a/nwp/excarta/parse_excarta_monthly.py b/nwp/excarta/parse_excarta_monthly.py
@@ -1,10 +1,12 @@
-#Low memory script
+# Low memory script
+import argparse
 import os
+import pathlib
 from datetime import datetime
+
 import pandas as pd
 import xarray as xr
-import argparse
-import pathlib
+
 
 def _parse_args():
     parser = argparse.ArgumentParser()
@@ -14,28 +16,32 @@ def _parse_args():
     return parser.parse_args()
 
 
-
 def data_loader(folder_path, month_to_process):
     """
     Loads and transforms data from CSV files in the given folder_path and directly convert each DataFrame into an xarray Dataset.
     Only process files for the month 'YYYYMM' given by month_to_process
     """
     month_to_process = datetime.strptime(month_to_process, "%Y%m")
-    column_names = ['DateTimeUTC', 'LocationId', 'Latitude', 'Longitude', 'dni', 'dhi', 'ghi']
+    column_names = ["DateTimeUTC", "LocationId", "Latitude", "Longitude", "dni", "dhi", "ghi"]
     files = os.listdir(folder_path)
     datasets = []
 
     for filename in files:
         if filename.endswith(".csv") and not filename.startswith("._"):
             file_datetime = datetime.strptime(filename[:-4], "%Y%m%d%H")
 
-            if (file_datetime.year == month_to_process.year) and (file_datetime.month == month_to_process.month):
-
+            if (file_datetime.year == month_to_process.year) and (
+                file_datetime.month == month_to_process.month
+            ):
                 file_path = os.path.join(folder_path, filename)
-                df = pd.read_csv(file_path, header=None, names=column_names, parse_dates=['DateTimeUTC'])
-    
-                df['step'] = (df['DateTimeUTC'] - file_datetime).dt.total_seconds() / 3600  # convert timedelta to hours
-                df['init_time'] = file_datetime
+                df = pd.read_csv(
+                    file_path, header=None, names=column_names, parse_dates=["DateTimeUTC"]
+                )
+
+                df["step"] = (
+                    df["DateTimeUTC"] - file_datetime
+                ).dt.total_seconds() / 3600  # convert timedelta to hours
+                df["init_time"] = file_datetime
 
                 # Convert the dataframe to an xarray Dataset and append to the list
                 ds = xr.Dataset.from_dataframe(df)
@@ -62,26 +68,25 @@ def pdtocdf(datasets):
     """
     Processes the xarray Datasets and merges them.
     """
-    
-    datasets = [ds.set_index(index=['init_time', 'step', 'Latitude', 'Longitude']) for ds in datasets]
 
-    ds = xr.concat(datasets, dim='index')
+    datasets = [
+        ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]) for ds in datasets
+    ]
+
+    ds = xr.concat(datasets, dim="index")
 
     # # Define the specific range of x and y coordinates to filter the data on
     # x_range = (-10, 2)  # Example x coordinate range
     # y_range = (49, 59)  # Example y coordinate range
 
     ds = ds.rename({"Latitude": "y", "Longitude": "x"})
-    
-
 
     var_names = ds.data_vars
     d2 = xr.concat([ds[v] for v in var_names], dim="variable")
     d2 = d2.assign_coords(variable=("variable", var_names))
     ds = xr.Dataset(dict(value=d2))
-    ds = ds.sortby('step')
-    ds = ds.sortby('init_time')
-    
+    ds = ds.sortby("step")
+    ds = ds.sortby("init_time")
 
     return ds
 
@@ -93,7 +98,9 @@ def main():
         raise RuntimeError(f'Output file "{args.output}" already exist')
 
     PATH = "/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/sr_UK_Malta_full/solar_data"
-    month_to_process = f"{args.year}{args.month:02d}"  # combine year and month arguments into the required format
+    month_to_process = (
+        f"{args.year}{args.month:02d}"  # combine year and month arguments into the required format
+    )
     datasets = load_data_from_all_years(PATH, month_to_process)
     ds = pdtocdf(datasets)
 
@@ -103,7 +110,7 @@ def main():
     # ds = ds.sel(x=slice(float(-10), float(2)), y=slice(float(49), float(59)))
 
     print(ds)
-    ds = ds.unstack('index')
+    ds = ds.unstack("index")
 
     # data is as UK and Malta all together so splitting
 
diff --git a/nwp/excarta/parse_excarta_to_output.py b/nwp/excarta/parse_excarta_to_output.py
@@ -1,11 +1,11 @@
-import xarray as xr
-import pandas as pd
-import numpy as np
+import argparse
 import datetime
 import os
 import pathlib
 from datetime import datetime
-import argparse
+
+import pandas as pd
+import xarray as xr
 
 
 def _parse_args():
@@ -18,20 +18,24 @@ def data_loader(folder_path):
     """
     Loads and transforms data from CSV files in the given folder_path.
     """
-    column_names = ['DateTimeUTC', 'LocationId', 'Latitude', 'Longitude', 'dni', 'dhi', 'ghi']
+    column_names = ["DateTimeUTC", "LocationId", "Latitude", "Longitude", "dni", "dhi", "ghi"]
     files = os.listdir(folder_path)
     dfs = []
 
     for filename in files:
         if filename.endswith(".csv") and not filename.startswith("._"):
             file_path = os.path.join(folder_path, filename)
-            df = pd.read_csv(file_path, header=None, names=column_names, parse_dates=['DateTimeUTC'])
+            df = pd.read_csv(
+                file_path, header=None, names=column_names, parse_dates=["DateTimeUTC"]
+            )
 
-            datetime_str = filename[:-4] 
+            datetime_str = filename[:-4]
             datetime_obj = datetime.strptime(datetime_str, "%Y%m%d%H")
 
-            df['step'] = (df['DateTimeUTC'] - datetime_obj).dt.total_seconds() / 3600  # convert timedelta to hours
-            df['init_time'] = datetime_obj
+            df["step"] = (
+                df["DateTimeUTC"] - datetime_obj
+            ).dt.total_seconds() / 3600  # convert timedelta to hours
+            df["init_time"] = datetime_obj
             dfs.append(df)
 
     return dfs
@@ -43,7 +47,6 @@ def load_data_from_all_years(parent_folder_path):
     """
     all_dataframes = []
 
-    
     # Actual date range is 2018 to 2022 (for in range use (2018,2023))
     for year in range(2018, 2019):
         folder_path = os.path.join(parent_folder_path, str(year))
@@ -60,15 +63,15 @@ def pdtocdf(dfs):
     merged_df = pd.concat(dfs, ignore_index=True)
 
     ds = xr.Dataset.from_dataframe(merged_df)
-    ds = ds.set_index(index=['init_time', 'step','Latitude','Longitude']).unstack('index')    
+    ds = ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]).unstack("index")
     ds = ds.drop_vars(["LocationId", "DateTimeUTC"])
 
     var_names = ds.data_vars
     d2 = xr.concat([ds[v] for v in var_names], dim="variable")
     d2 = d2.assign_coords(variable=("variable", var_names))
     ds = xr.Dataset(dict(value=d2))
-    ds = ds.sortby('step')
-    ds = ds.sortby('init_time')
+    ds = ds.sortby("step")
+    ds = ds.sortby("init_time")
     ds = ds.rename({"Latitude": "y", "Longitude": "x"})
 
     return ds
diff --git a/nwp/excarta/parse_excarta_to_output_low_mem.py b/nwp/excarta/parse_excarta_to_output_low_mem.py
@@ -1,10 +1,11 @@
-#Low memory script
+# Low memory script
+import argparse
 import os
+import pathlib
 from datetime import datetime
+
 import pandas as pd
 import xarray as xr
-import argparse
-import pathlib
 
 
 def _parse_args():
@@ -17,19 +18,23 @@ def data_loader(folder_path):
     """
     Loads and transforms data from CSV files in the given folder_path and directly convert each DataFrame into an xarray Dataset.
     """
-    column_names = ['DateTimeUTC', 'LocationId', 'Latitude', 'Longitude', 'dni', 'dhi', 'ghi']
+    column_names = ["DateTimeUTC", "LocationId", "Latitude", "Longitude", "dni", "dhi", "ghi"]
     files = os.listdir(folder_path)
     datasets = []
 
     for filename in files:
         if filename.endswith(".csv") and not filename.startswith("._"):
             file_path = os.path.join(folder_path, filename)
 
-            df = pd.read_csv(file_path, header=None, names=column_names, parse_dates=['DateTimeUTC'])
+            df = pd.read_csv(
+                file_path, header=None, names=column_names, parse_dates=["DateTimeUTC"]
+            )
             datetime_str = filename[:-4]
             datetime_obj = datetime.strptime(datetime_str, "%Y%m%d%H")
-            df['step'] = (df['DateTimeUTC'] - datetime_obj).dt.total_seconds() / 3600  # convert timedelta to hours
-            df['init_time'] = datetime_obj
+            df["step"] = (
+                df["DateTimeUTC"] - datetime_obj
+            ).dt.total_seconds() / 3600  # convert timedelta to hours
+            df["init_time"] = datetime_obj
 
             # Convert the dataframe to an xarray Dataset and append to the list
             ds = xr.Dataset.from_dataframe(df)
@@ -55,26 +60,29 @@ def pdtocdf(datasets):
     Processes the xarray Datasets and merges them.
     """
     print(datasets)
-#     ds = xr.merge(datasets)
+    #     ds = xr.merge(datasets)
 
-    datasets = [ds.set_index(index=['init_time', 'step', 'Latitude', 'Longitude']) for ds in datasets]
+    datasets = [
+        ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]) for ds in datasets
+    ]
 
-    ds = xr.concat(datasets, dim='index')
+    ds = xr.concat(datasets, dim="index")
 
     # Going to unstack and then combine in a different script
     # Get rid of the index dimension and just keep the desired ones
     # ds = ds.unstack('index')
-    
+
     var_names = ds.data_vars
     d2 = xr.concat([ds[v] for v in var_names], dim="variable")
     d2 = d2.assign_coords(variable=("variable", var_names))
     ds = xr.Dataset(dict(value=d2))
-    ds = ds.sortby('step')
-    ds = ds.sortby('init_time')
+    ds = ds.sortby("step")
+    ds = ds.sortby("init_time")
     ds = ds.rename({"Latitude": "y", "Longitude": "x"})
 
     return ds
 
+
 def main():
     args = _parse_args()
 
@@ -87,13 +95,11 @@ def main():
 
     print(ds)
 
-    ds = ds.unstack('index')
+    ds = ds.unstack("index")
 
     ds.to_zarr(args.output)
-    
-
 
 
 # Check if script is being run directly
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/nwp/icon/app.py b/nwp/icon/app.py
@@ -33,13 +33,13 @@ def download_model_files(runs=None, parent_folder=None, model="global"):
         var_2d_list = GLOBAL_VAR2D_LIST
         invariant = GLOBAL_INVARIENT_LIST
         pressure_levels = GLOBAL_PRESSURE_LEVELS
-        f_steps = list(range(0, 79)) + list(range(81, 99, 3)) # 4 days
+        f_steps = list(range(0, 79)) + list(range(81, 99, 3))  # 4 days
     else:
         var_3d_list = EU_VAR3D_LIST
         var_2d_list = EU_VAR2D_LIST
         invariant = None
         pressure_levels = EU_PRESSURE_LEVELS
-        f_steps = list(range(0, 79)) + list(range(81, 123, 3)) # 5 days
+        f_steps = list(range(0, 79)) + list(range(81, 123, 3))  # 5 days
     for run in runs:
         run_folder = os.path.join(parent_folder, run)
         if not os.path.exists(run_folder):
@@ -87,7 +87,7 @@ def process_model_files(
         )
         lons = lon_ds.tlon.values
         lats = lat_ds.tlat.values
-        f_steps = list(range(0, 79)) + list(range(81, 99, 3)) # 4 days
+        f_steps = list(range(0, 79)) + list(range(81, 99, 3))  # 4 days
     else:
         var_base = "icon-eu_europe_regular-lat-lon"
         var_3d_list = EU_VAR3D_LIST
diff --git a/scripts/convert_icon_archive.py b/scripts/convert_icon_archive.py