From 75e47f10e52a2eb8d921c9bd0b0648378aa3e083 Mon Sep 17 00:00:00 2001 From: Federico Battini Date: Mon, 22 Dec 2025 14:22:20 +0100 Subject: [PATCH] Fix Windows path compatibility for data loading --- ## Problem The `metoffice load` command failed on Windows with two issues: 1. Incorrect path structure: The archive path was missing year/month/day subdirectories, causing 404 errors when downloading from HuggingFace. - Expected: `data/2023/12/01/2023-12-01-00.zarr.zip` - Actual: `data/2023-12-01-00.zarr.zip` 2. Windows path separators: On Windows, `Path` objects use backslashes (`\`) which broke HuggingFace URLs when converted to strings. - Expected: `data/2023/12/01/...` - Actual: `data\2023\12\01\...` ## Changes - `main.py`: Added year/month/day subdirectories to archive_path (line 115) - `data_downloader.py`: Convert backslashes to forward slashes in download_from_hf() ## Testing - Tested on Windows 11 - Successfully downloads and loads Met Office UK data - All 53 existing tests pass --- --- src/open_data_pvnet/main.py | 10 ++++++++-- src/open_data_pvnet/utils/data_downloader.py | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/open_data_pvnet/main.py b/src/open_data_pvnet/main.py index f8d733e..c24fe85 100644 --- a/src/open_data_pvnet/main.py +++ b/src/open_data_pvnet/main.py @@ -107,12 +107,18 @@ def handle_load(provider: str, year: int, month: int, day: int, **kwargs): hour = kwargs.get("hour") # Base path for the data - base_path = Path("data") / str(year) / f"{month:02d}" / f"{day:02d}" + base_path = Path("data") try: if hour is not None: # Load specific hour - archive_path = base_path / f"{year}-{month:02d}-{day:02d}-{hour:02d}.zarr.zip" + archive_path = ( + base_path + / str(year) + / f"{month:02d}" + / f"{day:02d}" + / f"{year}-{month:02d}-{day:02d}-{hour:02d}.zarr.zip" + ) dataset = load_zarr_data( archive_path, chunks=chunks, diff --git a/src/open_data_pvnet/utils/data_downloader.py b/src/open_data_pvnet/utils/data_downloader.py index 9fe77f8..b023dbf 100644 --- a/src/open_data_pvnet/utils/data_downloader.py +++ b/src/open_data_pvnet/utils/data_downloader.py @@ -14,6 +14,7 @@ def download_from_hf(repo_path: str, local_path: Path) -> None: """Download a file from HuggingFace.""" + repo_path = repo_path.replace("\\", "/") logger.info(f"Downloading {repo_path} from Hugging Face...") local_path.parent.mkdir(parents=True, exist_ok=True) hf_hub_download(