move notebooks to notebooks folder

zakwatts · zakwatts · commit 3f358da3e7ad · 2023-07-19T10:18:18.000Z
diff --git a/notebooks/excarta_temp_proc_analysis.ipynb b/notebooks/excarta_temp_proc_analysis.ipynb
diff --git a/nwp/excarta/merge_excarta.py b/nwp/excarta/merge_excarta.py
@@ -1,21 +1,20 @@
 # import libs
-import xarray as xr
-import pandas as pd
-import numpy as np
-import datetime
 import os
-import pathlib as Path
-from datetime import datetime
-import zarr
-import ocf_blosc2
+
+import xarray as xr
+
 
 
 def merge_zarr_files(zarr_path, merged_zarr_path):
     # Collect paths of Zarr files in the specified directory
     zarr_files = [
+<<<<<<< HEAD
         os.path.join(zarr_path, file)
         for file in os.listdir(zarr_path)
         if file.endswith(".zarr")
+=======
+        os.path.join(zarr_path, file) for file in os.listdir(zarr_path) if file.endswith(".zarr")
+>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
     ]
 
     print("1")
@@ -30,7 +29,7 @@ def merge_zarr_files(zarr_path, merged_zarr_path):
 
     # Iterate over the remaining Zarr files and merge them into the initial dataset
     for file in zarr_files[1:]:
-        ds = xr.open_zarr(file)
+        xr.open_zarr(file)
         print(file)
 
         # ds_filt = ds.sel(x=slice(*x_range), y=slice(*y_range))
diff --git a/nwp/excarta/parse_excarta_monthly.py b/nwp/excarta/parse_excarta_monthly.py
@@ -1,10 +1,15 @@
 # Low memory script
+<<<<<<< HEAD
+=======
+import argparse
+>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
 import os
+import pathlib
 from datetime import datetime
+
 import pandas as pd
 import xarray as xr
-import argparse
-import pathlib
+
 
 
 def _parse_args():
@@ -21,6 +26,7 @@ def data_loader(folder_path, month_to_process):
     Only process files for the month 'YYYYMM' given by month_to_process
     """
     month_to_process = datetime.strptime(month_to_process, "%Y%m")
+<<<<<<< HEAD
     column_names = [
         "DateTimeUTC",
         "LocationId",
@@ -30,6 +36,9 @@ def data_loader(folder_path, month_to_process):
         "dhi",
         "ghi",
     ]
+=======
+    column_names = ["DateTimeUTC", "LocationId", "Latitude", "Longitude", "dni", "dhi", "ghi"]
+>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
     files = os.listdir(folder_path)
     datasets = []
 
@@ -42,10 +51,14 @@ def data_loader(folder_path, month_to_process):
             ):
                 file_path = os.path.join(folder_path, filename)
                 df = pd.read_csv(
+<<<<<<< HEAD
                     file_path,
                     header=None,
                     names=column_names,
                     parse_dates=["DateTimeUTC"],
+=======
+                    file_path, header=None, names=column_names, parse_dates=["DateTimeUTC"]
+>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
                 )
 
                 df["step"] = (
@@ -80,8 +93,12 @@ def pdtocdf(datasets):
     """
 
     datasets = [
+<<<<<<< HEAD
         ds.set_index(index=["init_time", "step", "Latitude", "Longitude"])
         for ds in datasets
+=======
+        ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]) for ds in datasets
+>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
     ]
 
     ds = xr.concat(datasets, dim="index")
@@ -109,7 +126,9 @@ def main():
         raise RuntimeError(f'Output file "{args.output}" already exist')
 
     PATH = "/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/sr_UK_Malta_full/solar_data"
-    month_to_process = f"{args.year}{args.month:02d}"  # combine year and month arguments into the required format
+    month_to_process = (
+        f"{args.year}{args.month:02d}"  # combine year and month arguments into the required format
+    )
     datasets = load_data_from_all_years(PATH, month_to_process)
     ds = pdtocdf(datasets)
 
diff --git a/nwp/excarta/parse_excarta_to_output.py b/nwp/excarta/parse_excarta_to_output.py
@@ -1,11 +1,11 @@
-import xarray as xr
-import pandas as pd
-import numpy as np
+import argparse
 import datetime
 import os
 import pathlib
 from datetime import datetime
-import argparse
+
+import pandas as pd
+import xarray as xr
 
 
 def _parse_args():
@@ -18,6 +18,7 @@ def data_loader(folder_path):
     """
     Loads and transforms data from CSV files in the given folder_path.
     """
+<<<<<<< HEAD
     column_names = [
         "DateTimeUTC",
         "LocationId",
@@ -27,6 +28,9 @@ def data_loader(folder_path):
         "dhi",
         "ghi",
     ]
+=======
+    column_names = ["DateTimeUTC", "LocationId", "Latitude", "Longitude", "dni", "dhi", "ghi"]
+>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
     files = os.listdir(folder_path)
     dfs = []
 
@@ -71,9 +75,13 @@ def pdtocdf(dfs):
     merged_df = pd.concat(dfs, ignore_index=True)
 
     ds = xr.Dataset.from_dataframe(merged_df)
+<<<<<<< HEAD
     ds = ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]).unstack(
         "index"
     )
+=======
+    ds = ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]).unstack("index")
+>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
     ds = ds.drop_vars(["LocationId", "DateTimeUTC"])
 
     var_names = ds.data_vars
diff --git a/nwp/excarta/parse_excarta_to_output_low_mem.py b/nwp/excarta/parse_excarta_to_output_low_mem.py
@@ -1,10 +1,14 @@
 # Low memory script
+<<<<<<< HEAD
+=======
+import argparse
+>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
 import os
+import pathlib
 from datetime import datetime
+
 import pandas as pd
 import xarray as xr
-import argparse
-import pathlib
 
 
 def _parse_args():
@@ -17,6 +21,7 @@ def data_loader(folder_path):
     """
     Loads and transforms data from CSV files in the given folder_path and directly convert each DataFrame into an xarray Dataset.
     """
+<<<<<<< HEAD
     column_names = [
         "DateTimeUTC",
         "LocationId",
@@ -26,6 +31,9 @@ def data_loader(folder_path):
         "dhi",
         "ghi",
     ]
+=======
+    column_names = ["DateTimeUTC", "LocationId", "Latitude", "Longitude", "dni", "dhi", "ghi"]
+>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
     files = os.listdir(folder_path)
     datasets = []
 
@@ -70,8 +78,12 @@ def pdtocdf(datasets):
     #     ds = xr.merge(datasets)
 
     datasets = [
+<<<<<<< HEAD
         ds.set_index(index=["init_time", "step", "Latitude", "Longitude"])
         for ds in datasets
+=======
+        ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]) for ds in datasets
+>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
     ]
 
     ds = xr.concat(datasets, dim="index")
diff --git a/scripts/convert_icon_archive.py b/scripts/convert_icon_archive.py
@@ -6,8 +6,11 @@
 
 """
 
+import multiprocessing as mp
 import os
+import subprocess
 from glob import glob
+from pathlib import Path
 
 import xarray as xr
 import zarr
@@ -18,10 +21,7 @@
     EU_VAR2D_LIST,
     EU_VAR3D_LIST,
 )
-import subprocess
 
-from pathlib import Path
-import multiprocessing as mp
 
 def decompress(full_bzip_filename: Path, temp_pth: Path) -> str:
     """
@@ -38,7 +38,7 @@ def decompress(full_bzip_filename: Path, temp_pth: Path) -> str:
     base_nat_filename = os.path.splitext(base_bzip_filename)[0]
     full_nat_filename = os.path.join(temp_pth, base_nat_filename)
     if os.path.exists(full_nat_filename):
-        return full_nat_filename # Don't decompress a second time
+        return full_nat_filename  # Don't decompress a second time
     with open(full_nat_filename, "wb") as nat_file_handler:
         process = subprocess.run(
             ["pbzip2", "--decompress", "--keep", "--stdout", full_bzip_filename],
@@ -179,8 +179,8 @@ def upload_to_hf(dataset_xr, folder, model="eu", run="00", token=None):
     encoding = {var: {"compressor": Blosc2("zstd", clevel=9)} for var in dataset_xr.data_vars}
     encoding["time"] = {"units": "nanoseconds since 1970-01-01"}
     with zarr.ZipStore(
-            zarr_path,
-            mode="w",
+        zarr_path,
+        mode="w",
     ) as store:
         dataset_xr.chunk(chunking).to_zarr(store, encoding=encoding, compute=True)
     done = False
@@ -189,10 +189,10 @@ def upload_to_hf(dataset_xr, folder, model="eu", run="00", token=None):
             api.upload_file(
                 path_or_fileobj=zarr_path,
                 path_in_repo=f"data/{dataset_xr.time.dt.year.values}/"
-                             f"{dataset_xr.time.dt.month.values}/"
-                             f"{dataset_xr.time.dt.day.values}/"
-                             f"{dataset_xr.time.dt.year.values}{str(dataset_xr.time.dt.month.values).zfill(2)}{str(dataset_xr.time.dt.day.values).zfill(2)}"
-                             f"_{str(dataset_xr.time.dt.hour.values).zfill(2)}.zarr.zip",
+                f"{dataset_xr.time.dt.month.values}/"
+                f"{dataset_xr.time.dt.day.values}/"
+                f"{dataset_xr.time.dt.year.values}{str(dataset_xr.time.dt.month.values).zfill(2)}{str(dataset_xr.time.dt.day.values).zfill(2)}"
+                f"_{str(dataset_xr.time.dt.hour.values).zfill(2)}.zarr.zip",
                 repo_id="openclimatefix/dwd-icon-global"
                 if model == "global"
                 else "openclimatefix/dwd-icon-eu",