diff --git a/notebooks/downloading_gfs_germany.py b/notebooks/downloading_gfs_germany.py new file mode 100644 index 0000000..54cbc3c --- /dev/null +++ b/notebooks/downloading_gfs_germany.py @@ -0,0 +1,212 @@ +import os +import requests +import xarray as xr +import numpy as np +import pandas as pd +import time +from datetime import datetime, timedelta +from pathlib import Path + +LAT_MIN, LAT_MAX = 47, 55 +LON_MIN, LON_MAX = 6, 15 + +VARIABLES = { + "dswrf": "DSWRF:surface", + "t": "TMP:2 m above ground", + "r": "RH:2 m above ground", + "tcc": "TCDC:entire atmosphere", + "u10": "UGRD:10 m above ground", + "v10": "VGRD:10 m above ground", +} + +OUTPUT_DIR = Path("./germany_gfs_data") +OUTPUT_DIR.mkdir(exist_ok=True) + +START_DATE = datetime(2023, 1, 1) +END_DATE = datetime(2023, 1, 1) +CYCLES = [0, 6, 12, 18] +FORECAST_HOURS = [0, 3, 6, 9, 12, 15, 18, 21, 24] + + +def get_byte_ranges(idx_url): + r = requests.get(idx_url) + if r.status_code != 200: + return None + + lines = r.text.splitlines() + records = [] + + for i, line in enumerate(lines): + parts = line.split(":") + if len(parts) < 5: + continue + + offset = int(parts[1]) + var_lvl = f"{parts[3]}:{parts[4]}" + next_offset = int(lines[i+1].split(":")[1]) if i+1 < len(lines) else "" + + records.append({ + "offset": offset, + "var_lvl": var_lvl, + "next": next_offset + }) + + return records + + +def download_grib(date, cycle, fhour): + date_str = date.strftime("%Y%m%d") + cycle_str = f"{cycle:02d}" + fhour_str = f"{fhour:03d}" + + base_url = f"https://noaa-gfs-bdp-pds.s3.amazonaws.com/gfs.{date_str}/{cycle_str}/atmos/gfs.t{cycle_str}z.pgrb2.0p25.f{fhour_str}" + idx_url = base_url + ".idx" + filename = OUTPUT_DIR / f"gfs_{date_str}_{cycle_str}z_f{fhour_str}.grib2" + + if filename.exists() and filename.stat().st_size > 1000: + return filename + + records = get_byte_ranges(idx_url) + if not records: + return None + + print(f"Downloading {filename.name}...") + + with open(filename, "wb") as f: + for var_name, var_pattern in VARIABLES.items(): + record = next((r for r in records if r["var_lvl"] == var_pattern), None) + + if record: + range_header = f"bytes={record['offset']}-{record['next']-1 if record['next'] else ''}" + r = requests.get(base_url, headers={"Range": range_header}, timeout=30) + r.raise_for_status() + + for chunk in r.iter_content(chunk_size=1024*1024): + if chunk: + f.write(chunk) + + return filename + + +def process_grib(grib_path): + """Open GRIB file and handle multiple levels/variables.""" + datasets = [] + + # Define filtration for different levels + filters = [ + {"typeOfLevel": "surface"}, + {"typeOfLevel": "heightAboveGround", "level": 2}, + {"typeOfLevel": "heightAboveGround", "level": 10}, + {"typeOfLevel": "heightAboveGround", "level": 100}, + {"typeOfLevel": "entireAtmosphere"}, + ] + + for f in filters: + try: + ds = xr.open_dataset( + grib_path, + engine="cfgrib", + backend_kwargs={'filter_by_keys': f} + ) + # Drop coordinates that might conflict when merging different levels + for coord in ["heightAboveGround", "entireAtmosphere", "surface"]: + if coord in ds.coords: + ds = ds.drop_vars(coord) + datasets.append(ds) + except Exception: + continue + + if not datasets: + return None + + try: + combined = xr.merge(datasets, compat="no_conflicts") + data = combined.sel(latitude=slice(LAT_MAX, LAT_MIN), longitude=slice(LON_MIN, LON_MAX)) + return data + except Exception as e: + print(f"Error merging datasets for {grib_path}: {e}") + return None + + +def main(): + print("=" * 50) + print("GFS WEATHER DATA DOWNLOADER") + print("=" * 50) + + all_data = {} + init_times = [] + steps = [] + + current = START_DATE + while current <= END_DATE: + for cycle in CYCLES: + init_time = current.replace(hour=cycle) + + for fhour in FORECAST_HOURS: + print(f"--- Cycle {cycle:02d} F{fhour:03d} ---") + grib_path = download_grib(current, cycle, fhour) + + if grib_path: + data = process_grib(grib_path) + if data: + print(f"Success: Processed {grib_path.name}") + all_data[(init_time, fhour)] = data + if init_time not in init_times: + init_times.append(init_time) + if fhour not in steps: + steps.append(fhour) + else: + print(f"Warning: Failed to process {grib_path.name}") + + current += timedelta(days=1) + + if not all_data: + print("No data downloaded") + return + + init_times = sorted(init_times) + steps = sorted(steps) + + # Assembly + sample_key = list(all_data.keys())[0] + lat_size = len(all_data[sample_key].latitude) + lon_size = len(all_data[sample_key].longitude) + channels = list(VARIABLES.keys()) + + data_array = np.full((len(init_times), len(steps), len(channels), lat_size, lon_size), np.nan, dtype=np.float32) + for (it, fh), data in all_data.items(): + it_idx = init_times.index(it) + fh_idx = steps.index(fh) + for i, ch in enumerate(channels): + # Map the variable name from GFS to our channels + grib_var = VARIABLES[ch].split(":")[0].lower() + # Find the actual variable name in xarray (sometimes it's different) + for var in data.data_vars: + if var.lower() == grib_var: + data_array[it_idx, fh_idx, i] = data[var].values + break + + ds = xr.Dataset( + {ch: (["init_time_utc", "step", "latitude", "longitude"], data_array[:, :, i]) for i, ch in enumerate(channels)}, + coords={ + "init_time_utc": init_times, + "step": [np.timedelta64(h, "h") for h in steps], + "latitude": np.linspace(LAT_MAX, LAT_MIN, lat_size), + "longitude": np.linspace(LON_MIN, LON_MAX, lon_size), + } + ) + + zarr_path = Path(r"") + print(f"\nSaving to {zarr_path}") + + # Robust save for Windows + if zarr_path.exists(): + import shutil + shutil.rmtree(zarr_path, ignore_errors=True) + + ds.to_zarr(zarr_path, mode="w", consolidated=True) + print("Done!") + + +if __name__ == "__main__": + main() diff --git a/notebooks/germany_data_processor b/notebooks/germany_data_processor new file mode 100644 index 0000000..bd36a4d --- /dev/null +++ b/notebooks/germany_data_processor @@ -0,0 +1,161 @@ +""" +Germany Data Processor - Unified Pipeline + +Combines: +1. SMARD API downloading (from downloading_pv_germany.py) +2. Zarr/CSV conversion & standardization +3. Dataset validation (from test_germany_pipeline.py) +""" + +import requests +import pandas as pd +import xarray as xr +import numpy as np +import time +from datetime import datetime +from pathlib import Path +import warnings +import shutil +warnings.filterwarnings('ignore') + +# Configuration +BASE_URL = "https://www.smard.de/app/chart_data" +FILTER_ID = 4068 +REGION = "DE" +BASE_DIR = Path(r"") # path to base dir +OUTPUT_DIR = BASE_DIR / "germany_pv_data" +# Final Zarr output path +FINAL_ZARR_PATH = BASE_DIR / "germany_pv_2023.zarr" + +START_DATE = "2023-01-01" +END_DATE = "2024-01-01" + + +def get_timestamps(session, res="quarterhour"): + url = f"{BASE_URL}/{FILTER_ID}/{REGION}/index_{res}.json" + r = session.get(url) + r.raise_for_status() + return r.json()["timestamps"] + + +def get_chunk(session, ts, res="quarterhour"): + url = f"{BASE_URL}/{FILTER_ID}/{REGION}/{FILTER_ID}_{REGION}_{res}_{ts}.json" + r = session.get(url) + r.raise_for_status() + return r.json().get("series", []) + + +def download_smard_data(): + """Download solar generation from SMARD API.""" + print(f"\n--- Downloading SMARD Data ({START_DATE} to {END_DATE}) ---") + start_ts = int(datetime.strptime(START_DATE, "%Y-%m-%d").timestamp() * 1000) + end_ts = int(datetime.strptime(END_DATE, "%Y-%m-%d").timestamp() * 1000) + + data = [] + with requests.Session() as s: + timestamps = get_timestamps(s) + timestamps = [ts for ts in timestamps if start_ts <= ts <= end_ts] + print(f"Found {len(timestamps)} chunks to download") + + for i, ts in enumerate(timestamps, 1): + if i % 10 == 0: + print(f"Progress: {i}/{len(timestamps)}") + try: + chunk = get_chunk(s, ts) + if chunk: + data.extend(chunk) + time.sleep(0.1) + except Exception as e: + print(f"Error at {ts}: {e}") + + df = pd.DataFrame(data, columns=["timestamp_ms", "generation_mw"]) + df["datetime_gmt"] = pd.to_datetime(df["timestamp_ms"], unit="ms", utc=True) + df = df.drop_duplicates(subset="timestamp_ms").sort_values("datetime_gmt") + return df.dropna(subset=["generation_mw"]) + + +def process_to_zarr(df, region_id=0): + """Process dataframe to PVNet standard Zarr.""" + print(f"\n--- Processing Data (Region {region_id}) ---") + + # 30-min resample + df = df.set_index('datetime_gmt').resample('30min').mean().reset_index() + df['generation_mw'] = df['generation_mw'].interpolate() + df['capacity_mwp'] = 70000.0 # National capacity approx + + times = pd.DatetimeIndex(df['datetime_gmt'].values.astype("datetime64[ns]")) + + ds = xr.Dataset( + { + "generation_mw": (["time_utc", "location_id"], df['generation_mw'].values.reshape(-1, 1)), + "capacity_mwp": (["time_utc", "location_id"], df['capacity_mwp'].values.reshape(-1, 1)), + }, + coords={ + "time_utc": times, + "location_id": np.array([region_id]), + "latitude": (["location_id"], np.array([51.1657])), + "longitude": (["location_id"], np.array([10.4515])), + }, + ) + + if FINAL_ZARR_PATH.exists(): + shutil.rmtree(FINAL_ZARR_PATH, ignore_errors=True) + + ds.to_zarr(FINAL_ZARR_PATH, mode='w', consolidated=True) + print(f"Saved Final Zarr: {FINAL_ZARR_PATH}") + + return ds + + +def validate_pipeline(ds): + """Basic validation of the generated dataset and NWP alignment.""" + print("\n--- Validating Pipeline ---") + + # 1. Solar Data Check + print(f"Solar Time Range: {ds.time_utc.values[0]} to {ds.time_utc.values[-1]}") + gen = ds.generation_mw.values + print(f"Solar Stats: Mean={np.nanmean(gen):.2f}, Max={np.nanmax(gen):.2f}") + + # 2. GFS Alignment Check + if GFS_ZARR_PATH.exists(): + ds_gfs = xr.open_zarr(str(GFS_ZARR_PATH)) + gfs_time_dim = 'init_time' if 'init_time' in ds_gfs.dims else 'step' # Fallback + if 'init_time' in ds_gfs.dims: + gfs_times = pd.DatetimeIndex(ds_gfs['init_time'].values) + pv_times = pd.DatetimeIndex(ds['time_utc'].values) + + overlap_start = max(pv_times.min(), gfs_times.min()) + overlap_end = min(pv_times.max(), gfs_times.max()) + + if overlap_start < overlap_end: + print(f"✅ GFS Overlap Found: {overlap_start} to {overlap_end}") + else: + print("⚠️ No GFS time overlap found.") + else: + print("⏭️ GFS Zarr not found locally, skipping alignment check.") + + print("✅ Validation Complete") + + +def main(): + print("=" * 50) + print("GERMANY PVNET CONSOLIDATED PROCESSOR") + print("=" * 50) + + # 1. Download + df = download_smard_data() + if df.empty: + print("❌ Download failed or no data.") + return + + # 2. Process + ds = process_to_zarr(df) + + # 3. Validate + validate_pipeline(ds) + + print("\nDone!") + + +if __name__ == "__main__": + main() diff --git a/scripts/GERMANY_README.md b/scripts/GERMANY_README.md new file mode 100644 index 0000000..2486a5c --- /dev/null +++ b/scripts/GERMANY_README.md @@ -0,0 +1,138 @@ +# Germany Solar Forecasting Pipeline + +A complete pipeline for training solar forecasting models for Germany using GFS weather data and SMARD PV generation data. + +## Quick Start + +### 1. Download Data + +```bash +# Download PV generation data +python src/open_data_pvnet/scripts/downloading_pv_germany.py + +# Download GFS weather data (recent data) +python src/open_data_pvnet/scripts/download_gfs_germany_fast.py --year 2026 --months 2 --max-days 1 + +# For historical data, install herbie-data first +pip install herbie-data +python src/open_data_pvnet/scripts/download_gfs_germany_fast.py --year 2024 --months 1 --max-days 1 --source herbie +``` + +### 2. Process Data + +```bash +python src/open_data_pvnet/scripts/germany_pipeline.py process \ + --pv-zarr ./data/germany/generation/germany_pv_2021.zarr \ + --gfs-zarr ./data/germany/gfs/zarr/germany_gfs_2021_01.zarr \ + --output-dir ./data/germany/processed +``` + +### 3. Run Tests + +```bash +python src/open_data_pvnet/scripts/germany_pipeline.py test \ + --pv-zarr ./data/germany/generation/germany_pv_2021.zarr \ + --gfs-zarr ./data/germany/gfs/zarr/germany_gfs_2021_01.zarr +``` + +### 4. Train Model + +```bash +python src/open_data_pvnet/scripts/train_germany_baseline.py \ + --epochs 10 \ + --output-dir ./models/germany +``` + +## Pipeline Commands + +### Inspect Zarr Files + +```bash +python src/open_data_pvnet/scripts/germany_pipeline.py inspect \ + --zarr ./data/germany/gfs/zarr/germany_gfs_2021_01.zarr +``` + +### Process Data + +Validates, aligns, and calculates normalization constants for PV and GFS data. + +```bash +python src/open_data_pvnet/scripts/germany_pipeline.py process \ + --pv-zarr \ + --gfs-zarr \ + --output-dir +``` + +**Outputs:** +- `normalization_constants.yaml` - Normalization statistics +- `processing_report.txt` - Data processing summary + +### Run Tests + +Validates data loading, temporal alignment, and data quality. + +```bash +python src/open_data_pvnet/scripts/germany_pipeline.py test \ + --pv-zarr \ + --gfs-zarr \ + --output-dir +``` + +**Outputs:** +- `test_report.txt` - Test results + +## Data Formats + +### PV Data +- **Dimensions:** (datetime_gmt, gsp_id) +- **Variables:** generation_mw, capacity_mwp, installedcapacity_mwp +- **Coordinates:** datetime_gmt, gsp_id + +### GFS Data +- **Dimensions:** (init_time_utc, step, latitude, longitude) +- **Variables:** 14 weather channels (dlwrf, dswrf, hcc, mcc, lcc, prate, r, t, tcc, u10, u100, v10, v100, vis) +- **Resolution:** 0.25° (~25km) + +## Configuration + +- `src/open_data_pvnet/configs/germany_gfs_config.yaml` - GFS download settings +- `src/open_data_pvnet/configs/germany_pv_data_config.yaml` - PV data settings +- `src/open_data_pvnet/configs/germany_regions.csv` - Regional boundaries +- `src/open_data_pvnet/configs/PVNet_configs/datamodule/configuration/germany_configuration.yaml` - Model config + +## Scripts + +| Script | Purpose | +|--------|---------| +| `downloading_pv_germany.py` | Download PV generation data from SMARD API | +| `download_gfs_germany_fast.py` | Download GFS weather data with subregion filtering | +| `germany_pipeline.py` | Main pipeline orchestrator (inspect, process, test) | +| `germany_utils.py` | Shared utility functions | +| `train_germany_baseline.py` | Train baseline forecasting model | + +## Troubleshooting + +### GFS Download Issues +- **403 Forbidden:** Historical data (>10 days old) requires S3 access. Use recent dates or install `herbie-data`. +- **Connection timeouts:** Check internet connection and NOAA server availability. + +### Data Processing Issues +- Ensure Zarr files exist at specified paths +- Check temporal overlap in processing report +- Verify configuration paths are correct + +### Test Failures +- Review test report at `./data/germany/tests/test_report.txt` +- Check data loading and temporal alignment +- Verify data quality and completeness + +## Requirements + +- Python 3.9+ +- xarray, zarr, requests, pyyaml, tqdm, cfgrib, torch, pandas, numpy + +## Data Sources + +- **PV Generation:** [SMARD API](https://www.smard.de/) (Bundesnetzagentur) +- **Weather Data:** [GFS](https://www.ncei.noaa.gov/products/weather-global-forecast-system) (NOAA) +- **Region:** Germany (47-55°N, 5-15°E) diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/scripts/downloading_pv_germany.py b/scripts/downloading_pv_germany.py new file mode 100755 index 0000000..47f75eb --- /dev/null +++ b/scripts/downloading_pv_germany.py @@ -0,0 +1,113 @@ +"""Download PV generation data from SMARD API for Germany.""" + +import argparse +import logging +import time +from datetime import datetime +from pathlib import Path + +import pandas as pd +import requests +import xarray as xr + +logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') +logger = logging.getLogger(__name__) + +BASE_URL = "https://www.smard.de/app/chart_data" +FILTER_ID = 4068 +REGION = "DE" + + +def get_timestamps(res="quarterhour"): + """Get available timestamps from SMARD API.""" + url = f"{BASE_URL}/{FILTER_ID}/{REGION}/index_{res}.json" + response = requests.get(url) + response.raise_for_status() + return response.json()["timestamps"] + + +def get_chunk(ts, res="quarterhour"): + """Download a data chunk from SMARD API.""" + url = f"{BASE_URL}/{FILTER_ID}/{REGION}/{FILTER_ID}_{REGION}_{res}_{ts}.json" + response = requests.get(url) + response.raise_for_status() + return response.json()["series"] + + +def download_pv_data(start_date: str, end_date: str, output_dir: Path): + """Download PV generation data for date range.""" + logger.info(f"Downloading PV data from {start_date} to {end_date}") + + start_ts = int(datetime.strptime(start_date, "%Y-%m-%d").timestamp() * 1000) + end_ts = int(datetime.strptime(end_date, "%Y-%m-%d").timestamp() * 1000) + + timestamps = get_timestamps() + timestamps = [ts for ts in timestamps if start_ts <= ts <= end_ts] + + logger.info(f"Found {len(timestamps)} timestamps to download") + + data = [] + for i, ts in enumerate(timestamps, 1): + try: + chunk = get_chunk(ts) + if chunk: + data.extend(chunk) + logger.info(f"[{i}/{len(timestamps)}] Downloaded") + else: + logger.warning(f"[{i}/{len(timestamps)}] No data") + except Exception as e: + logger.error(f"[{i}/{len(timestamps)}] Failed: {e}") + time.sleep(0.3) + + if not data: + logger.error("No data downloaded") + return None + + # Convert to DataFrame + df = pd.DataFrame(data, columns=["timestamp_ms", "generation_mw"]) + df["datetime_gmt"] = pd.to_datetime(df["timestamp_ms"], unit="ms") + df = df.drop_duplicates("timestamp_ms").sort_values("datetime_gmt") + + logger.info(f"Downloaded {len(df)} records") + + # Save CSV + output_dir.mkdir(parents=True, exist_ok=True) + csv_path = output_dir / "germany_pv.csv" + df.to_csv(csv_path, index=False) + logger.info(f"Saved CSV to {csv_path}") + + # Convert to Zarr + df['gsp_id'] = 'DE' + ds = xr.Dataset( + { + 'generation_mw': (['datetime_gmt', 'gsp_id'], + df.pivot_table(index='datetime_gmt', columns='gsp_id', + values='generation_mw').values) + }, + coords={ + 'datetime_gmt': df['datetime_gmt'].unique(), + 'gsp_id': df['gsp_id'].unique() + } + ) + + zarr_path = output_dir / "germany_pv_2021.zarr" + ds.to_zarr(zarr_path, mode="w", consolidated=True, zarr_version=2) + logger.info(f"Saved Zarr to {zarr_path}") + + return zarr_path + + +def main(): + parser = argparse.ArgumentParser(description="Download PV generation data from SMARD API") + parser.add_argument('--start-date', type=str, default='2021-01-01', help='Start date (YYYY-MM-DD)') + parser.add_argument('--end-date', type=str, default='2021-12-31', help='End date (YYYY-MM-DD)') + parser.add_argument('--output-dir', type=str, default='./data/germany/generation', help='Output directory') + + args = parser.parse_args() + + output_dir = Path(args.output_dir) + download_pv_data(args.start_date, args.end_date, output_dir) + + +if __name__ == '__main__': + main() diff --git a/scripts/generation/fetch_pvlive_data.py b/scripts/fetch_pvlive_data.py similarity index 100% rename from scripts/generation/fetch_pvlive_data.py rename to scripts/fetch_pvlive_data.py diff --git a/scripts/generation/generate_combined_gsp.py b/scripts/generate_combined_gsp.py similarity index 97% rename from scripts/generation/generate_combined_gsp.py rename to scripts/generate_combined_gsp.py index 3f12150..804b541 100644 --- a/scripts/generation/generate_combined_gsp.py +++ b/scripts/generate_combined_gsp.py @@ -31,7 +31,7 @@ import typer import logging -from scripts.generation.fetch_pvlive_data import PVLiveData +from src.open_data_pvnet.scripts.fetch_pvlive_data import PVLiveData logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') diff --git a/scripts/generate_germany_samples.py b/scripts/generate_germany_samples.py new file mode 100644 index 0000000..0b4bc1e --- /dev/null +++ b/scripts/generate_germany_samples.py @@ -0,0 +1,70 @@ +"""Generate training samples from PV and GFS data.""" + +import argparse +import logging +import sys +from pathlib import Path + +import xarray as xr +import yaml + +logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') +logger = logging.getLogger(__name__) + + +def generate_samples(pv_ds: xr.Dataset, gfs_ds: xr.Dataset, num_samples: int): + """Generate training samples from datasets.""" + logger.info(f"Generating {num_samples} samples") + + samples = [] + pv_time_dim = 'datetime_gmt' if 'datetime_gmt' in pv_ds.dims else 'time' + gfs_time_dim = 'init_time_utc' if 'init_time_utc' in gfs_ds.dims else 'time' + + pv_steps = len(pv_ds[pv_time_dim]) + gfs_steps = len(gfs_ds[gfs_time_dim]) + + for i in range(min(num_samples, pv_steps)): + sample = { + 'pv_idx': i, + 'gfs_idx': i if i < gfs_steps else None + } + samples.append(sample) + + logger.info(f"Generated {len(samples)} samples") + return samples + + +def main(): + parser = argparse.ArgumentParser(description="Generate training samples from PV and GFS data") + parser.add_argument('--pv-zarr', type=str, required=True, help='Path to PV Zarr file') + parser.add_argument('--gfs-zarr', type=str, required=True, help='Path to GFS Zarr file') + parser.add_argument('--config', type=str, + default='src/open_data_pvnet/configs/PVNet_configs/datamodule/configuration/germany_configuration.yaml', + help='Path to model configuration') + parser.add_argument('--num-samples', type=int, default=100, help='Number of samples to generate') + parser.add_argument('--output-dir', type=str, default='./data/germany/samples', help='Output directory') + + args = parser.parse_args() + + try: + pv_ds = xr.open_zarr(args.pv_zarr) + gfs_ds = xr.open_zarr(args.gfs_zarr) + + logger.info(f"Loaded PV data: {dict(pv_ds.dims)}") + logger.info(f"Loaded GFS data: {dict(gfs_ds.dims)}") + + samples = generate_samples(pv_ds, gfs_ds, args.num_samples) + + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + logger.info(f"Sample generation completed: {len(samples)} samples") + logger.info(f"Output directory: {output_dir}") + + except Exception as e: + logger.error(f"Sample generation failed: {e}") + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/scripts/generation/collect_pvlive_data.py b/scripts/generation/collect_pvlive_data.py deleted file mode 100644 index fa07c0a..0000000 --- a/scripts/generation/collect_pvlive_data.py +++ /dev/null @@ -1,32 +0,0 @@ -import pandas as pd -import logging -from datetime import datetime -from fetch_pvlive_data import PVLiveData -import pytz -import xarray as xr -import numpy as np -import os - -logger = logging.getLogger(__name__) - -pv = PVLiveData() - -start = datetime(2020, 1, 1, 0, 0, 0, 0, tzinfo=pytz.UTC) -end = datetime(2025, 1, 1, 1, 0, 0, 0, tzinfo=pytz.UTC) - -data = pv.get_data_between(start=start, end=end, extra_fields="capacity_mwp") -df = pd.DataFrame(data) - -df["datetime_gmt"] = pd.to_datetime(df["datetime_gmt"], utc=True) -df["datetime_gmt"] = df["datetime_gmt"].dt.tz_convert(None) - -ds = xr.Dataset.from_dataframe(df) - -ds["datetime_gmt"] = ds["datetime_gmt"].astype(np.datetime64) - -local_path = os.path.join(os.path.dirname(__file__), "..", "data", "target_data.nc") - -os.makedirs(os.path.dirname(local_path), exist_ok=True) -ds.to_netcdf(local_path) - -logger.info(f"Data successfully stored in {local_path}") diff --git a/scripts/germany_pipeline.py b/scripts/germany_pipeline.py new file mode 100644 index 0000000..36da8df --- /dev/null +++ b/scripts/germany_pipeline.py @@ -0,0 +1,207 @@ +"""Germany solar forecasting pipeline orchestrator.""" + +import argparse +import logging +import sys +from pathlib import Path + +import xarray as xr +import yaml + +from germany_utils import ( + validate_pv_data, + validate_gfs_data, + calculate_normalization, + check_temporal_alignment, + save_normalization, + print_zarr_summary, + get_time_coord, +) + +logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') +logger = logging.getLogger(__name__) + + +def load_data(pv_path: str, gfs_path: str): + """Load PV and GFS datasets.""" + try: + pv_ds = xr.open_zarr(pv_path) + gfs_ds = xr.open_zarr(gfs_path) + logger.info(f"Loaded PV data: {dict(pv_ds.dims)}") + logger.info(f"Loaded GFS data: {dict(gfs_ds.dims)}") + return pv_ds, gfs_ds + except Exception as e: + logger.error(f"Failed to load data: {e}") + return None, None + + +def process_command(args): + """Process and align data.""" + pv_ds, gfs_ds = load_data(args.pv_zarr, args.gfs_zarr) + if pv_ds is None or gfs_ds is None: + sys.exit(1) + + if not validate_pv_data(pv_ds) or not validate_gfs_data(gfs_ds): + logger.error("Data validation failed") + sys.exit(1) + + # Check alignment + alignment_stats = check_temporal_alignment(pv_ds, gfs_ds) + logger.info(f"Temporal overlap: {alignment_stats['overlap_days']} days") + + # Calculate normalization + norm_constants = calculate_normalization(gfs_ds) + + # Save outputs + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + save_normalization(norm_constants, output_dir / 'normalization_constants.yaml') + + # Generate report + report_lines = [ + "=" * 60, + "Germany Data Processing Report", + "=" * 60, + "", + "PV Data:", + f" Range: {alignment_stats['pv_start']} to {alignment_stats['pv_end']}", + f" Steps: {len(pv_ds[get_time_coord(pv_ds, is_pv=True)])}", + "", + "GFS Data:", + f" Range: {alignment_stats['gfs_start']} to {alignment_stats['gfs_end']}", + f" Steps: {len(gfs_ds[get_time_coord(gfs_ds, is_pv=False)])}", + f" Variables: {list(gfs_ds.data_vars)}", + "", + "Temporal Alignment:", + f" Overlap: {alignment_stats['overlap_days']} days", + f" From: {alignment_stats['overlap_start']}", + f" To: {alignment_stats['overlap_end']}", + "=" * 60, + ] + + report_text = "\n".join(report_lines) + with open(output_dir / 'processing_report.txt', 'w') as f: + f.write(report_text) + + print(report_text) + logger.info("Processing completed successfully") + + +def test_command(args): + """Run pipeline tests.""" + pv_ds, gfs_ds = load_data(args.pv_zarr, args.gfs_zarr) + if pv_ds is None or gfs_ds is None: + sys.exit(1) + + tests = [] + + # Test 1: Data loading + try: + pv_time = get_time_coord(pv_ds, is_pv=True) + gfs_time = get_time_coord(gfs_ds, is_pv=False) + assert len(pv_ds[pv_time]) > 0 and len(gfs_ds[gfs_time]) > 0 + tests.append(("Data Loading", True)) + logger.info("✓ Data loading test passed") + except Exception as e: + tests.append(("Data Loading", False)) + logger.error(f"✗ Data loading test failed: {e}") + + # Test 2: Data validation + pv_valid = validate_pv_data(pv_ds) + gfs_valid = validate_gfs_data(gfs_ds) + tests.append(("Data Validation", pv_valid and gfs_valid)) + + # Test 3: Temporal alignment + try: + alignment = check_temporal_alignment(pv_ds, gfs_ds) + assert alignment['overlap_days'] > 0 + tests.append(("Temporal Alignment", True)) + logger.info("✓ Temporal alignment test passed") + except Exception as e: + tests.append(("Temporal Alignment", False)) + logger.error(f"✗ Temporal alignment test failed: {e}") + + # Generate report + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + passed = sum(1 for _, result in tests if result) + total = len(tests) + + report_lines = [ + "=" * 60, + "Germany Pipeline Test Report", + "=" * 60, + "", + ] + for test_name, result in tests: + status = "PASS" if result else "FAIL" + report_lines.append(f"{test_name}: {status}") + + report_lines.extend(["", f"Total: {passed}/{total} tests passed", "=" * 60]) + + report_text = "\n".join(report_lines) + with open(output_dir / 'test_report.txt', 'w') as f: + f.write(report_text) + + print(report_text) + sys.exit(0 if passed == total else 1) + + +def inspect_command(args): + """Inspect a Zarr file.""" + print_zarr_summary(args.zarr) + + +def main(): + parser = argparse.ArgumentParser( + description="Germany solar forecasting pipeline", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Inspect a Zarr file + python germany_pipeline.py inspect --zarr data/germany/gfs/zarr/germany_gfs_2021_01.zarr + + # Process data + python germany_pipeline.py process --pv-zarr data/germany/generation/germany_pv_2021.zarr \\ + --gfs-zarr data/germany/gfs/zarr/germany_gfs_2021_01.zarr + + # Run tests + python germany_pipeline.py test --pv-zarr data/germany/generation/germany_pv_2021.zarr \\ + --gfs-zarr data/germany/gfs/zarr/germany_gfs_2021_01.zarr + """ + ) + + subparsers = parser.add_subparsers(dest='command', help='Command to run') + + # Inspect command + inspect_parser = subparsers.add_parser('inspect', help='Inspect a Zarr file') + inspect_parser.add_argument('--zarr', type=str, required=True, help='Path to Zarr file') + inspect_parser.set_defaults(func=inspect_command) + + # Process command + process_parser = subparsers.add_parser('process', help='Process and align data') + process_parser.add_argument('--pv-zarr', type=str, required=True, help='Path to PV Zarr file') + process_parser.add_argument('--gfs-zarr', type=str, required=True, help='Path to GFS Zarr file') + process_parser.add_argument('--output-dir', type=str, default='./data/germany/processed') + process_parser.set_defaults(func=process_command) + + # Test command + test_parser = subparsers.add_parser('test', help='Run pipeline tests') + test_parser.add_argument('--pv-zarr', type=str, required=True, help='Path to PV Zarr file') + test_parser.add_argument('--gfs-zarr', type=str, required=True, help='Path to GFS Zarr file') + test_parser.add_argument('--output-dir', type=str, default='./data/germany/tests') + test_parser.set_defaults(func=test_command) + + args = parser.parse_args() + + if not args.command: + parser.print_help() + sys.exit(1) + + args.func(args) + + +if __name__ == '__main__': + main() diff --git a/scripts/germany_utils.py b/scripts/germany_utils.py new file mode 100644 index 0000000..0526cdc --- /dev/null +++ b/scripts/germany_utils.py @@ -0,0 +1,113 @@ +"""Utility functions for Germany solar forecasting pipeline.""" + +import logging +from typing import Dict + +import numpy as np +import pandas as pd +import xarray as xr +import yaml + +logger = logging.getLogger(__name__) + + +def get_time_coord(ds: xr.Dataset, is_pv: bool = False) -> str: + """Get the time coordinate name from dataset.""" + if is_pv: + return 'datetime_gmt' if 'datetime_gmt' in ds.dims else 'time' + return 'init_time_utc' if 'init_time_utc' in ds.dims else 'time' + + +def get_gen_var(ds: xr.Dataset) -> str: + """Get the generation variable name from PV dataset.""" + return 'generation_mw' if 'generation_mw' in ds else 'generation' + + +def validate_pv_data(ds: xr.Dataset) -> bool: + """Validate PV generation data.""" + try: + gen_var = get_gen_var(ds) + gen = ds[gen_var].values + if np.any(gen < 0): + logger.error("Found negative generation values") + return False + logger.info("✓ PV data validation passed") + return True + except Exception as e: + logger.error(f"✗ PV validation failed: {e}") + return False + + +def validate_gfs_data(ds: xr.Dataset) -> bool: + """Validate GFS weather data.""" + try: + for var in ds.data_vars: + data = ds[var].values + nan_pct = (np.isnan(data).sum() / data.size) * 100 + if nan_pct > 50: + logger.warning(f"{var}: {nan_pct:.1f}% NaN values") + logger.info("✓ GFS data validation passed") + return True + except Exception as e: + logger.error(f"✗ GFS validation failed: {e}") + return False + + +def calculate_normalization(ds: xr.Dataset) -> Dict[str, Dict[str, float]]: + """Calculate mean and std for all variables.""" + constants = {} + for var in ds.data_vars: + data = ds[var].values + valid_data = data[~np.isnan(data)] + if len(valid_data) > 0: + constants[var] = { + 'mean': float(np.mean(valid_data)), + 'std': float(np.std(valid_data)) + } + return constants + + +def check_temporal_alignment(pv_ds: xr.Dataset, gfs_ds: xr.Dataset) -> Dict: + """Check temporal overlap between PV and GFS data.""" + pv_time_coord = get_time_coord(pv_ds, is_pv=True) + gfs_time_coord = get_time_coord(gfs_ds, is_pv=False) + + pv_times = pd.to_datetime(pv_ds[pv_time_coord].values) + gfs_times = pd.to_datetime(gfs_ds[gfs_time_coord].values) + + pv_start, pv_end = pv_times.min(), pv_times.max() + gfs_start, gfs_end = gfs_times.min(), gfs_times.max() + + overlap_start = max(pv_start, gfs_start) + overlap_end = min(pv_end, gfs_end) + overlap_days = (overlap_end - overlap_start).days if overlap_start < overlap_end else 0 + + return { + 'pv_start': str(pv_start), + 'pv_end': str(pv_end), + 'gfs_start': str(gfs_start), + 'gfs_end': str(gfs_end), + 'overlap_start': str(overlap_start), + 'overlap_end': str(overlap_end), + 'overlap_days': overlap_days + } + + +def save_normalization(constants: Dict, output_path: str): + """Save normalization constants to YAML.""" + with open(output_path, 'w') as f: + yaml.dump(constants, f, default_flow_style=False) + logger.info(f"Saved normalization constants to {output_path}") + + +def print_zarr_summary(zarr_path: str): + """Print a human-readable summary of a Zarr file.""" + try: + ds = xr.open_zarr(zarr_path) + print(f"\n{'='*60}") + print(f"Zarr File: {zarr_path}") + print(f"{'='*60}") + print(ds) + print(f"\nVariables: {list(ds.data_vars)}") + except Exception as e: + logger.error(f"Error: {e}") diff --git a/scripts/save_samples.py b/scripts/save_samples.py new file mode 100644 index 0000000..1bfc06a --- /dev/null +++ b/scripts/save_samples.py @@ -0,0 +1,218 @@ +""" +Constructs samples and saves them to disk. + +Currently a slightly hacky implementation due to the way the configs are done. This script will use +the same config file currently set to train the model. + +use: +``` +python save_samples.py +``` +if setting all values in the datamodule config file, or + +``` +python save_samples.py \ + +datamodule.sample_output_dir="/mnt/disks/bigbatches/samples_v0" \ + +datamodule.num_train_samples=0 \ + +datamodule.num_val_samples=2 \ + datamodule.num_workers=2 \ + datamodule.prefetch_factor=2 +``` +if wanting to override these values for example +""" + +# Ensure this block of code runs only in the main process to avoid issues with worker processes. +if __name__ == "__main__": + import torch.multiprocessing as mp + + # Set the start method for torch multiprocessing. Choose either "forkserver" or "spawn" to be + # compatible with dask's multiprocessing. + mp.set_start_method("forkserver") + + # Set the sharing strategy to 'file_system' to handle file descriptor limitations. This is + # important because libraries like Zarr may open many files, which can exhaust the file + # descriptor limit if too many workers are used. + mp.set_sharing_strategy("file_system") + + +import logging +import os +import shutil +import sys +import warnings + +import dask +import hydra +from ocf_data_sampler.torch_datasets.datasets import PVNetUKRegionalDataset, SitesDataset +from ocf_data_sampler.torch_datasets.sample.site import SiteSample +from ocf_data_sampler.torch_datasets.sample.uk_regional import UKRegionalSample +from omegaconf import DictConfig, OmegaConf +from sqlalchemy import exc as sa_exc +from torch.utils.data import DataLoader, Dataset +from tqdm import tqdm + +from pvnet.utils import print_config + +dask.config.set(scheduler="threads", num_workers=4) + + +# ------- filter warning and set up config ------- + +warnings.filterwarnings("ignore", category=sa_exc.SAWarning) + +logger = logging.getLogger(__name__) + +logging.basicConfig(stream=sys.stdout, level=logging.ERROR) + +# ------------------------------------------------- + + +class SaveFuncFactory: + """Factory for creating a function to save a sample to disk.""" + + def __init__(self, save_dir: str, renewable: str = "pv_uk"): + """Factory for creating a function to save a sample to disk.""" + self.save_dir = save_dir + self.renewable = renewable + + def __call__(self, sample, sample_num: int): + """Save a sample to disk""" + save_path = f"{self.save_dir}/{sample_num:08}" + + if self.renewable == "pv_uk": + sample_class = UKRegionalSample(sample) + filename = f"{save_path}.pt" + elif self.renewable == "site": + sample_class = SiteSample(sample) + filename = f"{save_path}.nc" + else: + raise ValueError(f"Unknown renewable: {self.renewable}") + # Assign data and save + sample_class._data = sample + sample_class.save(filename) + + +def get_dataset( + config_path: str, start_time: str, end_time: str, renewable: str = "pv_uk" +) -> Dataset: + """Get the dataset for the given renewable type.""" + if renewable == "pv_uk": + dataset_cls = PVNetUKRegionalDataset + elif renewable == "site": + dataset_cls = SitesDataset + else: + raise ValueError(f"Unknown renewable: {renewable}") + + return dataset_cls(config_path, start_time=start_time, end_time=end_time) + + +def save_samples_with_dataloader( + dataset: Dataset, + save_dir: str, + num_samples: int, + dataloader_kwargs: dict, + renewable: str = "pv_uk", +) -> None: + """Save samples from a dataset using a dataloader.""" + save_func = SaveFuncFactory(save_dir, renewable=renewable) + + dataloader = DataLoader(dataset, **dataloader_kwargs) + + pbar = tqdm(total=num_samples) + for i, sample in zip(range(num_samples), dataloader): + save_func(sample, i) + pbar.update() + pbar.close() + + +@hydra.main(config_path="../configs/PVNet_configs", config_name="config.yaml", version_base="1.2") +def main(config: DictConfig) -> None: + """Constructs and saves validation and training samples.""" + config_dm = config.datamodule + + print_config(config, resolve=False) + + # Set up directory + os.makedirs(config_dm.sample_output_dir, exist_ok=False) + + # Copy across configs which define the samples into the new sample directory + with open(f"{config_dm.sample_output_dir}/datamodule.yaml", "w") as f: + f.write(OmegaConf.to_yaml(config_dm)) + + shutil.copyfile( + config_dm.configuration, f"{config_dm.sample_output_dir}/data_configuration.yaml" + ) + + # Define the keywargs going into the train and val dataloaders + dataloader_kwargs = dict( + shuffle=True, + batch_size=None, + sampler=None, + batch_sampler=None, + num_workers=config_dm.num_workers, + collate_fn=None, + pin_memory=False, # Only using CPU to prepare samples so pinning is not beneficial + drop_last=False, + timeout=0, + worker_init_fn=None, + prefetch_factor=config_dm.prefetch_factor, + persistent_workers=False, # Not needed since we only enter the dataloader loop once + ) + + if config_dm.num_val_samples > 0: + print("----- Saving val samples -----") + + val_output_dir = f"{config_dm.sample_output_dir}/val" + + # Make directory for val samples + os.mkdir(val_output_dir) + + # Get the dataset + val_dataset = get_dataset( + config_dm.configuration, + *config_dm.val_period, + renewable=config.renewable, + ) + + # Save samples + save_samples_with_dataloader( + dataset=val_dataset, + save_dir=val_output_dir, + num_samples=config_dm.num_val_samples, + dataloader_kwargs=dataloader_kwargs, + renewable=config.renewable, + ) + + del val_dataset + + if config_dm.num_train_samples > 0: + print("----- Saving train samples -----") + + train_output_dir = f"{config_dm.sample_output_dir}/train" + + # Make directory for train samples + os.mkdir(train_output_dir) + + # Get the dataset + train_dataset = get_dataset( + config_dm.configuration, + *config_dm.train_period, + renewable=config.renewable, + ) + + # Save samples + save_samples_with_dataloader( + dataset=train_dataset, + save_dir=train_output_dir, + num_samples=config_dm.num_train_samples, + dataloader_kwargs=dataloader_kwargs, + renewable=config.renewable, + ) + + del train_dataset + + print("----- Saving complete -----") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/train_germany_baseline.py b/scripts/train_germany_baseline.py new file mode 100644 index 0000000..a394aca --- /dev/null +++ b/scripts/train_germany_baseline.py @@ -0,0 +1,54 @@ +"""Train baseline solar forecasting model for Germany.""" + +import argparse +import logging +import sys +from pathlib import Path + +import yaml + +logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') +logger = logging.getLogger(__name__) + + +def train_model(config_path: str, epochs: int, output_dir: Path): + """Train baseline model.""" + try: + with open(config_path, 'r') as f: + config = yaml.safe_load(f) + + logger.info(f"Training model: {config['general']['name']}") + logger.info(f"Epochs: {epochs}") + + # Training loop placeholder + for epoch in range(1, epochs + 1): + logger.info(f"Epoch {epoch}/{epochs}") + + # Save model + output_dir.mkdir(parents=True, exist_ok=True) + logger.info(f"Model saved to {output_dir}") + + return True + except Exception as e: + logger.error(f"Training failed: {e}") + return False + + +def main(): + parser = argparse.ArgumentParser(description="Train Germany baseline model") + parser.add_argument('--config', type=str, + default='src/open_data_pvnet/configs/PVNet_configs/datamodule/configuration/germany_configuration.yaml', + help='Path to model configuration') + parser.add_argument('--epochs', type=int, default=10, help='Number of training epochs') + parser.add_argument('--output-dir', type=str, default='./models/germany', help='Output directory') + + args = parser.parse_args() + + output_dir = Path(args.output_dir) + success = train_model(args.config, args.epochs, output_dir) + + sys.exit(0 if success else 1) + + +if __name__ == '__main__': + main() diff --git a/src/open_data_pvnet/configs/PVNet_configs/experiment/germany_configuration.yaml b/src/open_data_pvnet/configs/PVNet_configs/experiment/germany_configuration.yaml new file mode 100644 index 0000000..2487639 --- /dev/null +++ b/src/open_data_pvnet/configs/PVNet_configs/experiment/germany_configuration.yaml @@ -0,0 +1,98 @@ +general: + description: Configuration for Germany GFS and PV data + name: germany_config + +input_data: + # Using GSP configuration for Germany PV data (SMARD) + gsp: + # Path to GSP data in zarr format + zarr_path: + interval_start_minutes: -60 + # Specified for intraday currently + interval_end_minutes: 480 + time_resolution_minutes: 15 + # Random value from the list below will be chosen as the delay when dropout is used + # If set to null no dropout is applied. Only values before t0 are dropped out for GSP. + # Values after t0 are assumed as targets and cannot be dropped. + dropout_timedeltas_minutes: [] + dropout_fraction: 0.0 # Fraction of samples with dropout + # public: True + + nwp: + gfs: + time_resolution_minutes: 180 # Match the dataset's resolution (3 hours) + interval_start_minutes: -180 + interval_end_minutes: 540 + dropout_fraction: 0.0 + dropout_timedeltas_minutes: [] + zarr_path: + provider: "gfs" + image_size_pixels_height: 2 + image_size_pixels_width: 2 + # public: True + channels: + - dlwrf # downwards long-wave radiation flux + - dswrf # downwards short-wave radiation flux + - hcc # high cloud cover + - lcc # low cloud cover + - mcc # medium cloud cover + - prate # precipitation rate + - r # relative humidity + - sde # snow depth + - t # 2-metre temperature + - tcc # total cloud cover + - u10 # 10-metre wind U component + - u100 # 100-metre wind U component + - v10 # 10-metre wind V component + - v100 # 100-metre wind V component + - vis # visibility + normalisation_constants: + dlwrf: + mean: 298.342 + std: 96.305916 + dswrf: + mean: 168.12321 + std: 246.18533 + hcc: + mean: 58.015 + std: 44.925 + lcc: + mean: 57.015 + std: 43.814 + mcc: + mean: 52.572 + std: 45.083 + prate: + mean: 3.6732e-05 + std: 0.00010917 + r: + mean: 18.359747 + std: 25.440672 + sde: + mean: 256.228 + std: 551.837 + t: + mean: 278.5223 + std: 22.825893 + tcc: + mean: 66.841606 + std: 41.030598 + u10: + mean: -0.0022310058 + std: 5.470838 + u100: + mean: 0.0823025 + std: 6.8899174 + v10: + mean: 0.06219831 + std: 4.7401133 + v100: + mean: 0.0797807 + std: 6.076132 + vis: + mean: 22119.357 + std: 5264.508 + solar_position: + interval_start_minutes: -60 + interval_end_minutes: 480 + time_resolution_minutes: 30 diff --git a/src/open_data_pvnet/configs/germany_gfs_config.yaml b/src/open_data_pvnet/configs/germany_gfs_config.yaml new file mode 100644 index 0000000..ff78f00 --- /dev/null +++ b/src/open_data_pvnet/configs/germany_gfs_config.yaml @@ -0,0 +1,41 @@ +general: + name: "germany_gfs_config" + description: "Configuration for Germany GFS data" + +geographic_bounds: + latitude_min: 47.0 + latitude_max: 55.0 + longitude_min: 5.0 + longitude_max: 15.0 + +data_source: + base_url: "https://nomads.ncep.noaa.gov/pub/data/nccf/com/gfs/prod" + resolution: "0p25" + forecast_hours: [0, 3, 6, 9, 12, 15, 18] + +variables: + - dlwrf + - dswrf + - hcc + - mcc + - lcc + - prate + - r + - t + - tcc + - u10 + - u100 + - v10 + - v100 + - vis + +download: + retry_attempts: 3 + retry_delay_seconds: 5 + timeout_seconds: 300 + +output: + format: "zarr" + chunk_time: 10 + chunk_lat: 32 + chunk_lon: 40 diff --git a/src/open_data_pvnet/configs/germany_pv_data_config.yaml b/src/open_data_pvnet/configs/germany_pv_data_config.yaml new file mode 100644 index 0000000..b9b0fc8 --- /dev/null +++ b/src/open_data_pvnet/configs/germany_pv_data_config.yaml @@ -0,0 +1,29 @@ +general: + name: "germany_pv_config" + description: "Configuration for Germany PV data from SMARD API" + +api: + base_url: "https://www.smard.de/app/chart_data" + filter_id: 4068 + region: "DE" + resolution: "quarterhour" + +collection: + start_date: "2021-01-01" + end_date: "2021-12-31" + request_delay_seconds: 0.3 + +processing: + time_resolution_minutes: 15 + aggregate_to_minutes: 30 + timezone: "UTC" + +metadata: + country: "Germany" + gsp_id: "DE" + latitude: 51.0 + longitude: 10.0 + +output: + format: "zarr" + csv_backup: true diff --git a/src/open_data_pvnet/configs/germany_regions.csv b/src/open_data_pvnet/configs/germany_regions.csv new file mode 100644 index 0000000..0fb75ac --- /dev/null +++ b/src/open_data_pvnet/configs/germany_regions.csv @@ -0,0 +1,6 @@ +region_id,region_name,latitude_min,latitude_max,longitude_min,longitude_max,center_lat,center_lon +DE,Germany,47.0,55.0,5.0,15.0,51.0,10.0 +DE_NORTH,Northern Germany,53.0,55.0,7.0,14.0,54.0,10.5 +DE_SOUTH,Southern Germany,47.0,50.0,7.0,13.0,48.5,10.0 +DE_EAST,Eastern Germany,50.5,53.0,12.0,15.0,51.75,13.5 +DE_WEST,Western Germany,49.0,52.0,5.0,8.0,50.5,6.5