diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 073db2e..6771ad5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ default_language_version: repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.6.0 hooks: # list of supported hooks: https://pre-commit.com/hooks.html - id: trailing-whitespace @@ -12,20 +12,20 @@ repos: - id: detect-private-key # python code formatting/linting - - repo: https://github.com/charliermarsh/ruff-pre-commit + - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: "v0.0.270" + rev: "v0.4.8" hooks: - id: ruff args: [--fix] - repo: https://github.com/psf/black - rev: 23.3.0 + rev: 24.4.2 hooks: - id: black args: [--line-length, "100"] # yaml formatting - repo: https://github.com/pre-commit/mirrors-prettier - rev: v3.0.0-alpha.9-for-vscode + rev: v4.0.0-alpha.8 hooks: - id: prettier types: [yaml] diff --git a/nwp/icon/app.py b/nwp/icon/app.py index 3fbe3f7..bf961e0 100644 --- a/nwp/icon/app.py +++ b/nwp/icon/app.py @@ -33,13 +33,13 @@ def download_model_files(runs=None, parent_folder=None, model="global", delay=0) var_2d_list = GLOBAL_VAR2D_LIST invariant = GLOBAL_INVARIENT_LIST pressure_levels = GLOBAL_PRESSURE_LEVELS - f_steps = list(range(0, 79)) + list(range(81, 99, 3)) # 4 days + f_steps = list(range(0, 79)) + list(range(81, 99, 3)) # 4 days else: var_3d_list = EU_VAR3D_LIST var_2d_list = EU_VAR2D_LIST invariant = None pressure_levels = EU_PRESSURE_LEVELS - f_steps = list(range(0, 79)) + list(range(81, 123, 3)) # 5 days + f_steps = list(range(0, 79)) + list(range(81, 123, 3)) # 5 days for run in runs: run_folder = os.path.join(parent_folder, run) if not os.path.exists(run_folder): @@ -61,7 +61,7 @@ def download_model_files(runs=None, parent_folder=None, model="global", delay=0) run=run, f_times=f_steps, model=model, - delay=delay + delay=delay, ) not_done = False except Exception as e: @@ -70,7 +70,13 @@ def download_model_files(runs=None, parent_folder=None, model="global", delay=0) def process_model_files( - folder, var_3d_list=None, var_2d_list=None, invariant_list=None, model="global", run="00", delay=0 + folder, + var_3d_list=None, + var_2d_list=None, + invariant_list=None, + model="global", + run="00", + delay=0, ): date_string, _ = get_run(run, delay=delay) if model == "global": @@ -78,18 +84,26 @@ def process_model_files( var_3d_list = GLOBAL_VAR3D_LIST var_2d_list = GLOBAL_VAR2D_LIST lon_ds = xr.open_dataset( - list(glob(os.path.join(folder, run, f"{var_base}_time-invariant_{date_string}_CLON.grib2")))[0], + list( + glob( + os.path.join(folder, run, f"{var_base}_time-invariant_{date_string}_CLON.grib2") + ) + )[0], engine="cfgrib", backend_kwargs={"errors": "ignore"}, ) lat_ds = xr.open_dataset( - list(glob(os.path.join(folder, run, f"{var_base}_time-invariant_{date_string}_CLAT.grib2")))[0], + list( + glob( + os.path.join(folder, run, f"{var_base}_time-invariant_{date_string}_CLAT.grib2") + ) + )[0], engine="cfgrib", backend_kwargs={"errors": "ignore"}, ) lons = lon_ds.tlon.values lats = lat_ds.tlat.values - f_steps = list(range(0, 79)) + list(range(81, 99, 3)) # 4 days + f_steps = list(range(0, 79)) + list(range(81, 99, 3)) # 4 days else: var_base = "icon-eu_europe_regular-lat-lon" var_3d_list = EU_VAR3D_LIST @@ -145,7 +159,9 @@ def process_model_files( print(var_2d) try: ds = xr.open_mfdataset( - os.path.join(folder, run, f"{var_base}_single-level_{date_string}_*_{var_2d.upper()}.grib2"), + os.path.join( + folder, run, f"{var_base}_single-level_{date_string}_*_{var_2d.upper()}.grib2" + ), engine="cfgrib", combine="nested", concat_dim="step", @@ -207,9 +223,11 @@ def upload_to_hf(dataset_xr, folder, model="global", run="00", token=None): f"{dataset_xr.time.dt.day.values}/" f"{dataset_xr.time.dt.year.values}{str(dataset_xr.time.dt.month.values).zfill(2)}{str(dataset_xr.time.dt.day.values).zfill(2)}" f"_{str(dataset_xr.time.dt.hour.values).zfill(2)}.zarr.zip", - repo_id="openclimatefix/dwd-icon-global" - if model == "global" - else "openclimatefix/dwd-icon-eu", + repo_id=( + "openclimatefix/dwd-icon-global" + if model == "global" + else "openclimatefix/dwd-icon-eu" + ), repo_type="dataset", ) done = True diff --git a/nwp/icon/utils.py b/nwp/icon/utils.py index 895ca5d..0809d3f 100644 --- a/nwp/icon/utils.py +++ b/nwp/icon/utils.py @@ -1,4 +1,5 @@ """Utilities for downloading the DWD ICON models""" + import bz2 import os from datetime import datetime, timedelta @@ -125,7 +126,7 @@ def download_extract_url(url_and_folder): else: r = requests.get(url, stream=True) if r.status_code == requests.codes.ok: - #print(f"Downloading {url_and_folder[0]}") + # print(f"Downloading {url_and_folder[0]}") with r.raw as source, open(filename, "wb") as dest: dest.write(bz2.decompress(source.read())) extracted_files = filename @@ -154,9 +155,9 @@ def get_dset( invarient=invarient, f_times=f_times, model_url="icon/grib" if model == "global" else "icon-eu/grib", - var_url_base="icon_global_icosahedral" - if model == "global" - else "icon-eu_europe_regular-lat-lon", + var_url_base=( + "icon_global_icosahedral" if model == "global" else "icon-eu_europe_regular-lat-lon" + ), run=run, delay=delay, ) diff --git a/scripts/convert_icon_archive.py b/scripts/convert_icon_archive.py index 257ecec..0ad60bd 100644 --- a/scripts/convert_icon_archive.py +++ b/scripts/convert_icon_archive.py @@ -23,6 +23,7 @@ from pathlib import Path import multiprocessing as mp + def decompress(full_bzip_filename: Path, temp_pth: Path) -> str: """ Decompresses .bz2 file and returns the non-compressed filename @@ -38,7 +39,7 @@ def decompress(full_bzip_filename: Path, temp_pth: Path) -> str: base_nat_filename = os.path.splitext(base_bzip_filename)[0] full_nat_filename = os.path.join(temp_pth, base_nat_filename) if os.path.exists(full_nat_filename): - return full_nat_filename # Don't decompress a second time + return full_nat_filename # Don't decompress a second time with open(full_nat_filename, "wb") as nat_file_handler: process = subprocess.run( ["pbzip2", "--decompress", "--keep", "--stdout", full_bzip_filename], @@ -179,8 +180,8 @@ def upload_to_hf(dataset_xr, folder, model="eu", run="00", token=None): encoding = {var: {"compressor": Blosc2("zstd", clevel=9)} for var in dataset_xr.data_vars} encoding["time"] = {"units": "nanoseconds since 1970-01-01"} with zarr.ZipStore( - zarr_path, - mode="w", + zarr_path, + mode="w", ) as store: dataset_xr.chunk(chunking).to_zarr(store, encoding=encoding, compute=True) done = False @@ -189,13 +190,15 @@ def upload_to_hf(dataset_xr, folder, model="eu", run="00", token=None): api.upload_file( path_or_fileobj=zarr_path, path_in_repo=f"data/{dataset_xr.time.dt.year.values}/" - f"{dataset_xr.time.dt.month.values}/" - f"{dataset_xr.time.dt.day.values}/" - f"{dataset_xr.time.dt.year.values}{str(dataset_xr.time.dt.month.values).zfill(2)}{str(dataset_xr.time.dt.day.values).zfill(2)}" - f"_{str(dataset_xr.time.dt.hour.values).zfill(2)}.zarr.zip", - repo_id="openclimatefix/dwd-icon-global" - if model == "global" - else "openclimatefix/dwd-icon-eu", + f"{dataset_xr.time.dt.month.values}/" + f"{dataset_xr.time.dt.day.values}/" + f"{dataset_xr.time.dt.year.values}{str(dataset_xr.time.dt.month.values).zfill(2)}{str(dataset_xr.time.dt.day.values).zfill(2)}" + f"_{str(dataset_xr.time.dt.hour.values).zfill(2)}.zarr.zip", + repo_id=( + "openclimatefix/dwd-icon-global" + if model == "global" + else "openclimatefix/dwd-icon-eu" + ), repo_type="dataset", ) done = True diff --git a/setup.py b/setup.py index cfd9f81..493ceb5 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,5 @@ """ Usual setup file for package """ + # read the contents of your README file from pathlib import Path