From 08a0bcf8b5c548e8a5246c4d19727cb753886333 Mon Sep 17 00:00:00 2001 From: Jacob Bieker Date: Wed, 8 Mar 2023 13:36:44 +0000 Subject: [PATCH 01/13] Add start of experiment running code --- experiments/configs/config.yaml | 0 experiments/configs/datapipe/forecastor.yaml | 77 ++++++++++ experiments/configs/datapipe/labeller.yaml | 0 experiments/run.py | 145 +++++++++++++++++++ pseudo_labeller/model/idam.py | 3 +- 5 files changed, 224 insertions(+), 1 deletion(-) create mode 100644 experiments/configs/config.yaml create mode 100644 experiments/configs/datapipe/forecastor.yaml create mode 100644 experiments/configs/datapipe/labeller.yaml diff --git a/experiments/configs/config.yaml b/experiments/configs/config.yaml new file mode 100644 index 0000000..e69de29 diff --git a/experiments/configs/datapipe/forecastor.yaml b/experiments/configs/datapipe/forecastor.yaml new file mode 100644 index 0000000..f1d83d7 --- /dev/null +++ b/experiments/configs/datapipe/forecastor.yaml @@ -0,0 +1,77 @@ +general: + description: Config for producing batches on OCF's on-premises hardware. + name: pseudo_irradiance_forecastor +input_data: + default_forecast_minutes: 0 + default_history_minutes: 30 + #---------------------- GSP ------------------- + gsp: + gsp_zarr_path: /mnt/storage_ssd_4tb/metnet_train/pv_gsp.zarr + history_minutes: 120 + forecast_minutes: 0 # 48 Hours + log_level: "ERROR" + + #---------------------- NWP ------------------- + nwp: + nwp_image_size_pixels_height: 128 + nwp_image_size_pixels_width: 128 + nwp_zarr_path: /mnt/leonardo/storage_c/UKV_intermediate_version_7.zarr + forecast_minutes: 1800 # 30 hours + history_minutes: 60 + + #---------------------- PV ---------minutes=60---------- + pv: + pv_files_groups: + - label: solar_sheffield_passiv + pv_filename: /mnt/storage_ssd_4tb/metnet_train/Passiv/ocf_formatted/v0/passiv.netcdf + pv_metadata_filename: /mnt/storage_ssd_4tb/metnet_train/Passiv/ocf_formatted/v0/system_metadata_OCF_ONLY.csv + #- label: pvoutput.org + # pv_filename: /mnt/storage_ssd_4tb/metnet_train/PVOutput.org/UK_PV_timeseries_batch.nc + # pv_metadata_filename: /mnt/storage_ssd_4tb/metnet_train/PVOutput.org/UK_PV_metadata.csv + get_center: false + history_minutes: 60 # 1 hour + forecast_minutes: 1800 # 30 hours + log_level: "INFO" + + #---------------------- Satellite ------------- + satellite: + satellite_channels: + - IR_016 + - IR_039 + - IR_087 + - IR_097 + - IR_108 + - IR_120 + - IR_134 + - VIS006 + - VIS008 + - WV_062 + - WV_073 + satellite_image_size_pixels_height: 48 + satellite_image_size_pixels_width: 96 + satellite_zarr_path: "/mnt/leonardo/storage_c/20*_nonhrv.zarr" + keep_dawn_dusk_hours: 2 + + #---------------------- HRVSatellite ------------- + hrvsatellite: + hrvsatellite_channels: + - HRV + hrvsatellite_image_size_pixels_height: 128 + hrvsatellite_image_size_pixels_width: 256 + hrvsatellite_zarr_path: "/mnt/leonardo/storage_c/20*_hrv.zarr" + + # ------------------------- Topographic ---------------- + topographic: + topographic_filename: /mnt/storage_ssd_4tb/metnet_train/Topo/europe_dem_1km_osgb.tif + topographic_image_size_pixels_width: 300 + topographic_image_size_pixels_height: 300 + +output_data: + filepath: /mnt/storage_ssd_4tb/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/prepared_ML_training_data/v15 +process: + batch_size: 8 + seed: 1234 + upload_every_n_batches: 0 # Write directly to output_data.filepath, not to a temp directory. + n_train_batches: 8000 + n_validation_batches: 0 + n_test_batches: 400 diff --git a/experiments/configs/datapipe/labeller.yaml b/experiments/configs/datapipe/labeller.yaml new file mode 100644 index 0000000..e69de29 diff --git a/experiments/run.py b/experiments/run.py index dbe95d8..e257ef9 100644 --- a/experiments/run.py +++ b/experiments/run.py @@ -1 +1,146 @@ """Code for running experiments""" +import torch + +try: + torch.multiprocessing.set_start_method('spawn') + import torch.multiprocessing as mp + + mp.set_start_method('spawn') +except RuntimeError: + pass +import hydra +from omegaconf import DictConfig, OmegaConf +import pytorch_lightning as pl +from pytorch_lightning.callbacks import ModelCheckpoint +from ocf_datapipes.training.pseudo_irradience import pseudo_irradiance_datapipe +from pseudo_labeller.model import PsuedoIrradienceForecastor +import torch.nn.functional as F +import datetime + + +class LitModel(pl.LightningModule): + def __init__( + self, + config: DictConfig, + ): + super().__init__() + self.forecast_steps = config.forecast_steps + self.learning_rate = config.lr + self.model = PsuedoIrradienceForecastor( + input_channels=config.input_channels, + input_size=config.input_size, + input_steps=config.input_steps, + output_channels=config.latent_channels, + conv3d_channels=config.conv3d_channels, + hidden_dim=config.hidden_dim, + kernel_size=config.kernel_size, + num_layers=config.num_layers, + output_steps=config.output_steps, + pv_meta_input_channels=config.pv_meta_input_channels, + ) + self.config = self.model.config + self.save_hyperparameters() + + def forward(self, x): + return F.relu(self.model(x)) + + def training_step(self, batch, batch_idx): + tag = "train" + x, y = batch + y = y[0] + x = torch.nan_to_num(input=x, posinf=1.0, neginf=0.0) + y = torch.nan_to_num(input=y, posinf=1.0, neginf=0.0) + x = x.half() + y = y.half() + y_hat = self(x) + + out_mask = y > 0.0 + in_mask = x > 0.0 + mask = out_mask & in_mask + + # calculate mse, mae + mse_loss = F.mse_loss(y_hat[mask], y[mask]) + nmae_loss = (y_hat[mask] - y[mask]).abs().mean() + loss = nmae_loss + self.log("loss", loss) + self.log_dict( + { + f"MSE/{tag}": mse_loss, + f"NMAE/{tag}": nmae_loss, + }, + ) + return loss + + def configure_optimizers(self): + return torch.optim.AdamW(self.parameters(), lr=self.learning_rate) + +def create_train_dataloader(config: DictConfig): + return pseudo_irradiance_datapipe( + config.config, + start_time=datetime.datetime(2014, 1, 1), + end_time=datetime.datetime(2020, 12, 31), + use_sun=config.sun, + use_nwp=config.nwp, + use_sat=config.sat, + use_hrv=config.hrv, + use_pv=True, + use_topo=config.topo, + size=config.size, + use_future=config.use_future + ) + + +def create_val_dataloader(config: DictConfig): + return pseudo_irradiance_datapipe( + config.config, + start_time=datetime.datetime(2021, 1, 1), + end_time=datetime.datetime(2021, 12, 31), + use_sun=config.sun, + use_nwp=config.nwp, + use_sat=config.sat, + use_hrv=config.hrv, + use_pv=True, + use_topo=config.topo, + size=config.size, + use_future=config.use_future + ) + + +@hydra.main(version_base=None, config_path="configs", config_name="config") +def experiment(cfg: DictConfig) -> None: + print(OmegaConf.to_yaml(cfg)) + model = LitModel(cfg.model) + train_dataloader = create_train_dataloader(cfg.train_dataloader) + val_dataloader = create_val_dataloader(cfg.val_dataloader) + + model_checkpoint = ModelCheckpoint( + every_n_train_steps=100, + monitor="step", + mode="max", + save_last=True, + save_top_k=10, + ) + + from pytorch_lightning import loggers as pl_loggers + + tb_logger = pl_loggers.TensorBoardLogger(save_dir="logs/") + # early_stopping = EarlyStopping(monitor="loss") + trainer = pl.Trainer( + max_epochs=cfg.epochs, + precision=16 if cfg.fp16 else 32, + devices=[cfg.num_gpu] if not cfg.cpu else 1, + accelerator="auto" if not cfg.cpu else "cpu", + auto_select_gpus=False, + auto_lr_find=False, + log_every_n_steps=1, + # limit_val_batches=400 * args.accumulate, + # limit_train_batches=500 * args.accumulate, + accumulate_grad_batches=cfg.accumulate, + callbacks=[model_checkpoint], + logger=tb_logger + ) + trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader) + + +if __name__ == "__main__": + experiment() diff --git a/pseudo_labeller/model/idam.py b/pseudo_labeller/model/idam.py index 55ef7af..2d83833 100644 --- a/pseudo_labeller/model/idam.py +++ b/pseudo_labeller/model/idam.py @@ -130,8 +130,9 @@ def forward(self, x: torch.Tensor, pv_meta: torch.Tensor = None, output_latents: # Rearrange back to timeseries of latent variables x = einops.rearrange(x, "b (c t) h w -> b c t h w", c=self.output_channels) return x + if pv_meta is None: + raise ValueError(f"'pv_meta' cannot be none if {output_latents=}") pv_meta = self.pv_meta_input(pv_meta) - # Reshape to fit into 3DCNN x = torch.cat([x, pv_meta], dim=1) # Get pv_meta_output x = F.relu(self.pv_meta_output(x)) # Generation can only be positive or 0, so ReLU From 6f57d5d27d1ee77c64077ea3375eb6169fd121e0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 8 Mar 2023 13:38:19 +0000 Subject: [PATCH 02/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- README.md | 2 +- experiments/run.py | 25 ++++++++++++++----------- pseudo_labeller/model/idam.py | 2 +- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 5045510..265e8fe 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # Pseudo-Labeller -Pseudo Irradience Labeller for generating training labels for other PV generation forecasting, and hindcasts. +Pseudo Irradience Labeller for generating training labels for other PV generation forecasting, and hindcasts. ## Install diff --git a/experiments/run.py b/experiments/run.py index e257ef9..78b18dd 100644 --- a/experiments/run.py +++ b/experiments/run.py @@ -2,26 +2,28 @@ import torch try: - torch.multiprocessing.set_start_method('spawn') + torch.multiprocessing.set_start_method("spawn") import torch.multiprocessing as mp - mp.set_start_method('spawn') + mp.set_start_method("spawn") except RuntimeError: pass +import datetime + import hydra -from omegaconf import DictConfig, OmegaConf import pytorch_lightning as pl -from pytorch_lightning.callbacks import ModelCheckpoint +import torch.nn.functional as F from ocf_datapipes.training.pseudo_irradience import pseudo_irradiance_datapipe +from omegaconf import DictConfig, OmegaConf +from pytorch_lightning.callbacks import ModelCheckpoint + from pseudo_labeller.model import PsuedoIrradienceForecastor -import torch.nn.functional as F -import datetime class LitModel(pl.LightningModule): def __init__( - self, - config: DictConfig, + self, + config: DictConfig, ): super().__init__() self.forecast_steps = config.forecast_steps @@ -74,6 +76,7 @@ def training_step(self, batch, batch_idx): def configure_optimizers(self): return torch.optim.AdamW(self.parameters(), lr=self.learning_rate) + def create_train_dataloader(config: DictConfig): return pseudo_irradiance_datapipe( config.config, @@ -86,7 +89,7 @@ def create_train_dataloader(config: DictConfig): use_pv=True, use_topo=config.topo, size=config.size, - use_future=config.use_future + use_future=config.use_future, ) @@ -102,7 +105,7 @@ def create_val_dataloader(config: DictConfig): use_pv=True, use_topo=config.topo, size=config.size, - use_future=config.use_future + use_future=config.use_future, ) @@ -137,7 +140,7 @@ def experiment(cfg: DictConfig) -> None: # limit_train_batches=500 * args.accumulate, accumulate_grad_batches=cfg.accumulate, callbacks=[model_checkpoint], - logger=tb_logger + logger=tb_logger, ) trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader) diff --git a/pseudo_labeller/model/idam.py b/pseudo_labeller/model/idam.py index 2d83833..8df3172 100644 --- a/pseudo_labeller/model/idam.py +++ b/pseudo_labeller/model/idam.py @@ -21,7 +21,7 @@ def __init__( num_layers: int = 1, output_steps: int = 1, pv_meta_input_channels: int = 2, - **kwargs + **kwargs, ): """ Pseudo-Irradience Forecastor/Labeller From 9c825bf9ddb9329d34043465a49fe9f6ce7dd4f9 Mon Sep 17 00:00:00 2001 From: Jacob Bieker Date: Wed, 8 Mar 2023 13:51:44 +0000 Subject: [PATCH 03/13] Fix mask calculation --- experiments/run.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/experiments/run.py b/experiments/run.py index 78b18dd..3880dad 100644 --- a/experiments/run.py +++ b/experiments/run.py @@ -57,8 +57,9 @@ def training_step(self, batch, batch_idx): y_hat = self(x) out_mask = y > 0.0 - in_mask = x > 0.0 - mask = out_mask & in_mask + # Sum across timesteps + out_mask = torch.sum(out_mask, dim=1) # Output from training is [B, T, H, W] + mask = out_mask > 0.0 # calculate mse, mae mse_loss = F.mse_loss(y_hat[mask], y[mask]) From 61eb1cb5a0adee2818a7885c9ac3e68f8d9bed7e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 8 Mar 2023 13:51:55 +0000 Subject: [PATCH 04/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- experiments/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiments/run.py b/experiments/run.py index 3880dad..83f5ceb 100644 --- a/experiments/run.py +++ b/experiments/run.py @@ -58,7 +58,7 @@ def training_step(self, batch, batch_idx): out_mask = y > 0.0 # Sum across timesteps - out_mask = torch.sum(out_mask, dim=1) # Output from training is [B, T, H, W] + out_mask = torch.sum(out_mask, dim=1) # Output from training is [B, T, H, W] mask = out_mask > 0.0 # calculate mse, mae From 37e498164146f36ad8f8b57aa905e377d39ede08 Mon Sep 17 00:00:00 2001 From: Jacob Bieker Date: Wed, 8 Mar 2023 14:02:13 +0000 Subject: [PATCH 05/13] Add config --- experiments/configs/config.yaml | 27 +++++++++++++++++++++++++++ experiments/run.py | 8 ++++---- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/experiments/configs/config.yaml b/experiments/configs/config.yaml index e69de29..1eedabf 100644 --- a/experiments/configs/config.yaml +++ b/experiments/configs/config.yaml @@ -0,0 +1,27 @@ +model: + forecast_steps: 12 + lr: 1e-4 + input_channels: 12 + input_size: 64 + input_steps: 12 + latent_channels: 8 + conv3d_channels: 256 + hidden_dim: 256 + kernel_size: 3 + num_layers: 4 + output_steps: 12 + pv_meta_input_channels: 2 +dataloader: + config: "datapipe/forecastor.yaml" + sun: true + nwp: true + sat: true + hrv: true + topo: true + size: 64 + use_future: false +epochs: 100 +fp16: true +num_gpu: 0 +cpu: false +accumulate: 1 \ No newline at end of file diff --git a/experiments/run.py b/experiments/run.py index 83f5ceb..83b7ada 100644 --- a/experiments/run.py +++ b/experiments/run.py @@ -44,7 +44,7 @@ def __init__( self.save_hyperparameters() def forward(self, x): - return F.relu(self.model(x)) + return self.model(x) def training_step(self, batch, batch_idx): tag = "train" @@ -81,7 +81,7 @@ def configure_optimizers(self): def create_train_dataloader(config: DictConfig): return pseudo_irradiance_datapipe( config.config, - start_time=datetime.datetime(2014, 1, 1), + start_time=datetime.datetime(2008, 1, 1), end_time=datetime.datetime(2020, 12, 31), use_sun=config.sun, use_nwp=config.nwp, @@ -114,8 +114,8 @@ def create_val_dataloader(config: DictConfig): def experiment(cfg: DictConfig) -> None: print(OmegaConf.to_yaml(cfg)) model = LitModel(cfg.model) - train_dataloader = create_train_dataloader(cfg.train_dataloader) - val_dataloader = create_val_dataloader(cfg.val_dataloader) + train_dataloader = create_train_dataloader(cfg.dataloader) + val_dataloader = create_val_dataloader(cfg.dataloader) model_checkpoint = ModelCheckpoint( every_n_train_steps=100, From e9ef89b3ae74caadcd4d208989058eeeb58a269a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 8 Mar 2023 14:02:26 +0000 Subject: [PATCH 06/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- experiments/configs/config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiments/configs/config.yaml b/experiments/configs/config.yaml index 1eedabf..15042fa 100644 --- a/experiments/configs/config.yaml +++ b/experiments/configs/config.yaml @@ -24,4 +24,4 @@ epochs: 100 fp16: true num_gpu: 0 cpu: false -accumulate: 1 \ No newline at end of file +accumulate: 1 From a7472d9972d3e20d9657855ec26a21d24e3353e9 Mon Sep 17 00:00:00 2001 From: Jacob Bieker Date: Wed, 8 Mar 2023 17:27:46 +0000 Subject: [PATCH 07/13] Update training pipeline for new datapipe --- experiments/run.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/experiments/run.py b/experiments/run.py index 83b7ada..c21e155 100644 --- a/experiments/run.py +++ b/experiments/run.py @@ -4,7 +4,6 @@ try: torch.multiprocessing.set_start_method("spawn") import torch.multiprocessing as mp - mp.set_start_method("spawn") except RuntimeError: pass @@ -43,23 +42,20 @@ def __init__( self.config = self.model.config self.save_hyperparameters() - def forward(self, x): - return self.model(x) + def forward(self, x, meta): + return self.model(x, meta, output_latents=False) def training_step(self, batch, batch_idx): tag = "train" - x, y = batch - y = y[0] + x, meta, y = batch x = torch.nan_to_num(input=x, posinf=1.0, neginf=0.0) y = torch.nan_to_num(input=y, posinf=1.0, neginf=0.0) x = x.half() y = y.half() - y_hat = self(x) + meta = meta.half() + y_hat = self(x, meta) - out_mask = y > 0.0 - # Sum across timesteps - out_mask = torch.sum(out_mask, dim=1) # Output from training is [B, T, H, W] - mask = out_mask > 0.0 + mask = meta > 0.0 # calculate mse, mae mse_loss = F.mse_loss(y_hat[mask], y[mask]) From ea2f9c872f826433d3e15e32f8cea21e7ad03b3c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 8 Mar 2023 17:28:25 +0000 Subject: [PATCH 08/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- experiments/run.py | 1 + 1 file changed, 1 insertion(+) diff --git a/experiments/run.py b/experiments/run.py index c21e155..b0d0d46 100644 --- a/experiments/run.py +++ b/experiments/run.py @@ -4,6 +4,7 @@ try: torch.multiprocessing.set_start_method("spawn") import torch.multiprocessing as mp + mp.set_start_method("spawn") except RuntimeError: pass From 667aee2cd88c55f2df8cdca73ab1c8e22efa8366 Mon Sep 17 00:00:00 2001 From: Jacob Bieker Date: Wed, 8 Mar 2023 18:59:47 +0000 Subject: [PATCH 09/13] Expand mask correctly --- experiments/configs/config.yaml | 4 ++-- experiments/run.py | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/experiments/configs/config.yaml b/experiments/configs/config.yaml index 15042fa..bccd526 100644 --- a/experiments/configs/config.yaml +++ b/experiments/configs/config.yaml @@ -6,13 +6,13 @@ model: input_steps: 12 latent_channels: 8 conv3d_channels: 256 - hidden_dim: 256 + hidden_dim: 64 kernel_size: 3 num_layers: 4 output_steps: 12 pv_meta_input_channels: 2 dataloader: - config: "datapipe/forecastor.yaml" + config: "configs/datapipe/forecastor.yaml" sun: true nwp: true sat: true diff --git a/experiments/run.py b/experiments/run.py index b0d0d46..ba5f44b 100644 --- a/experiments/run.py +++ b/experiments/run.py @@ -9,7 +9,7 @@ except RuntimeError: pass import datetime - +import einops import hydra import pytorch_lightning as pl import torch.nn.functional as F @@ -58,6 +58,9 @@ def training_step(self, batch, batch_idx): mask = meta > 0.0 + # Expand to match the ground truth shape + mask = einops.repeat(mask, "b c h w -> b c t h w", t=y.shape[2]) + # calculate mse, mae mse_loss = F.mse_loss(y_hat[mask], y[mask]) nmae_loss = (y_hat[mask] - y[mask]).abs().mean() From e59f817287424708783b27555dd4525fd97f2a4b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 8 Mar 2023 19:00:12 +0000 Subject: [PATCH 10/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- experiments/run.py | 1 + 1 file changed, 1 insertion(+) diff --git a/experiments/run.py b/experiments/run.py index ba5f44b..ffb7642 100644 --- a/experiments/run.py +++ b/experiments/run.py @@ -9,6 +9,7 @@ except RuntimeError: pass import datetime + import einops import hydra import pytorch_lightning as pl From 0e1482ef996b99f1bf5dd94a9e89444660d9c546 Mon Sep 17 00:00:00 2001 From: Jacob Bieker Date: Mon, 13 Mar 2023 10:43:06 +0000 Subject: [PATCH 11/13] Add inferred metadata filename --- experiments/configs/datapipe/forecastor.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/experiments/configs/datapipe/forecastor.yaml b/experiments/configs/datapipe/forecastor.yaml index f1d83d7..186cb9d 100644 --- a/experiments/configs/datapipe/forecastor.yaml +++ b/experiments/configs/datapipe/forecastor.yaml @@ -25,6 +25,7 @@ input_data: - label: solar_sheffield_passiv pv_filename: /mnt/storage_ssd_4tb/metnet_train/Passiv/ocf_formatted/v0/passiv.netcdf pv_metadata_filename: /mnt/storage_ssd_4tb/metnet_train/Passiv/ocf_formatted/v0/system_metadata_OCF_ONLY.csv + inferred_metadata_filename: /mnt/storage_ssd_4tb/metnet_train/Passiv/ocf_formatted_v0/inferred_meta.csv #- label: pvoutput.org # pv_filename: /mnt/storage_ssd_4tb/metnet_train/PVOutput.org/UK_PV_timeseries_batch.nc # pv_metadata_filename: /mnt/storage_ssd_4tb/metnet_train/PVOutput.org/UK_PV_metadata.csv From b4c02056447524bc47dec863af5b8f6549b54b4c Mon Sep 17 00:00:00 2001 From: Jacob Bieker Date: Thu, 23 Mar 2023 16:14:04 +0000 Subject: [PATCH 12/13] Remove limit --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 52d3c8f..ddfe811 100644 --- a/environment.yml +++ b/environment.yml @@ -14,7 +14,7 @@ dependencies: - torchdata - cartopy - rioxarray - - pytorch-cuda==11.6 + - pytorch-cuda pip: - huggingface_hub - einops From d3825f6d552e4c38fedce1399a4df710ae09572a Mon Sep 17 00:00:00 2001 From: Jacob Bieker Date: Mon, 27 Mar 2023 10:38:58 +0100 Subject: [PATCH 13/13] Remove experiments They are being moved to a different repo, so that this stays focused on the model --- experiments/configs/config.yaml | 27 ---- experiments/configs/datapipe/forecastor.yaml | 78 ---------- experiments/configs/datapipe/labeller.yaml | 0 experiments/run.py | 151 ------------------- 4 files changed, 256 deletions(-) delete mode 100644 experiments/configs/config.yaml delete mode 100644 experiments/configs/datapipe/forecastor.yaml delete mode 100644 experiments/configs/datapipe/labeller.yaml delete mode 100644 experiments/run.py diff --git a/experiments/configs/config.yaml b/experiments/configs/config.yaml deleted file mode 100644 index bccd526..0000000 --- a/experiments/configs/config.yaml +++ /dev/null @@ -1,27 +0,0 @@ -model: - forecast_steps: 12 - lr: 1e-4 - input_channels: 12 - input_size: 64 - input_steps: 12 - latent_channels: 8 - conv3d_channels: 256 - hidden_dim: 64 - kernel_size: 3 - num_layers: 4 - output_steps: 12 - pv_meta_input_channels: 2 -dataloader: - config: "configs/datapipe/forecastor.yaml" - sun: true - nwp: true - sat: true - hrv: true - topo: true - size: 64 - use_future: false -epochs: 100 -fp16: true -num_gpu: 0 -cpu: false -accumulate: 1 diff --git a/experiments/configs/datapipe/forecastor.yaml b/experiments/configs/datapipe/forecastor.yaml deleted file mode 100644 index 186cb9d..0000000 --- a/experiments/configs/datapipe/forecastor.yaml +++ /dev/null @@ -1,78 +0,0 @@ -general: - description: Config for producing batches on OCF's on-premises hardware. - name: pseudo_irradiance_forecastor -input_data: - default_forecast_minutes: 0 - default_history_minutes: 30 - #---------------------- GSP ------------------- - gsp: - gsp_zarr_path: /mnt/storage_ssd_4tb/metnet_train/pv_gsp.zarr - history_minutes: 120 - forecast_minutes: 0 # 48 Hours - log_level: "ERROR" - - #---------------------- NWP ------------------- - nwp: - nwp_image_size_pixels_height: 128 - nwp_image_size_pixels_width: 128 - nwp_zarr_path: /mnt/leonardo/storage_c/UKV_intermediate_version_7.zarr - forecast_minutes: 1800 # 30 hours - history_minutes: 60 - - #---------------------- PV ---------minutes=60---------- - pv: - pv_files_groups: - - label: solar_sheffield_passiv - pv_filename: /mnt/storage_ssd_4tb/metnet_train/Passiv/ocf_formatted/v0/passiv.netcdf - pv_metadata_filename: /mnt/storage_ssd_4tb/metnet_train/Passiv/ocf_formatted/v0/system_metadata_OCF_ONLY.csv - inferred_metadata_filename: /mnt/storage_ssd_4tb/metnet_train/Passiv/ocf_formatted_v0/inferred_meta.csv - #- label: pvoutput.org - # pv_filename: /mnt/storage_ssd_4tb/metnet_train/PVOutput.org/UK_PV_timeseries_batch.nc - # pv_metadata_filename: /mnt/storage_ssd_4tb/metnet_train/PVOutput.org/UK_PV_metadata.csv - get_center: false - history_minutes: 60 # 1 hour - forecast_minutes: 1800 # 30 hours - log_level: "INFO" - - #---------------------- Satellite ------------- - satellite: - satellite_channels: - - IR_016 - - IR_039 - - IR_087 - - IR_097 - - IR_108 - - IR_120 - - IR_134 - - VIS006 - - VIS008 - - WV_062 - - WV_073 - satellite_image_size_pixels_height: 48 - satellite_image_size_pixels_width: 96 - satellite_zarr_path: "/mnt/leonardo/storage_c/20*_nonhrv.zarr" - keep_dawn_dusk_hours: 2 - - #---------------------- HRVSatellite ------------- - hrvsatellite: - hrvsatellite_channels: - - HRV - hrvsatellite_image_size_pixels_height: 128 - hrvsatellite_image_size_pixels_width: 256 - hrvsatellite_zarr_path: "/mnt/leonardo/storage_c/20*_hrv.zarr" - - # ------------------------- Topographic ---------------- - topographic: - topographic_filename: /mnt/storage_ssd_4tb/metnet_train/Topo/europe_dem_1km_osgb.tif - topographic_image_size_pixels_width: 300 - topographic_image_size_pixels_height: 300 - -output_data: - filepath: /mnt/storage_ssd_4tb/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/prepared_ML_training_data/v15 -process: - batch_size: 8 - seed: 1234 - upload_every_n_batches: 0 # Write directly to output_data.filepath, not to a temp directory. - n_train_batches: 8000 - n_validation_batches: 0 - n_test_batches: 400 diff --git a/experiments/configs/datapipe/labeller.yaml b/experiments/configs/datapipe/labeller.yaml deleted file mode 100644 index e69de29..0000000 diff --git a/experiments/run.py b/experiments/run.py deleted file mode 100644 index ffb7642..0000000 --- a/experiments/run.py +++ /dev/null @@ -1,151 +0,0 @@ -"""Code for running experiments""" -import torch - -try: - torch.multiprocessing.set_start_method("spawn") - import torch.multiprocessing as mp - - mp.set_start_method("spawn") -except RuntimeError: - pass -import datetime - -import einops -import hydra -import pytorch_lightning as pl -import torch.nn.functional as F -from ocf_datapipes.training.pseudo_irradience import pseudo_irradiance_datapipe -from omegaconf import DictConfig, OmegaConf -from pytorch_lightning.callbacks import ModelCheckpoint - -from pseudo_labeller.model import PsuedoIrradienceForecastor - - -class LitModel(pl.LightningModule): - def __init__( - self, - config: DictConfig, - ): - super().__init__() - self.forecast_steps = config.forecast_steps - self.learning_rate = config.lr - self.model = PsuedoIrradienceForecastor( - input_channels=config.input_channels, - input_size=config.input_size, - input_steps=config.input_steps, - output_channels=config.latent_channels, - conv3d_channels=config.conv3d_channels, - hidden_dim=config.hidden_dim, - kernel_size=config.kernel_size, - num_layers=config.num_layers, - output_steps=config.output_steps, - pv_meta_input_channels=config.pv_meta_input_channels, - ) - self.config = self.model.config - self.save_hyperparameters() - - def forward(self, x, meta): - return self.model(x, meta, output_latents=False) - - def training_step(self, batch, batch_idx): - tag = "train" - x, meta, y = batch - x = torch.nan_to_num(input=x, posinf=1.0, neginf=0.0) - y = torch.nan_to_num(input=y, posinf=1.0, neginf=0.0) - x = x.half() - y = y.half() - meta = meta.half() - y_hat = self(x, meta) - - mask = meta > 0.0 - - # Expand to match the ground truth shape - mask = einops.repeat(mask, "b c h w -> b c t h w", t=y.shape[2]) - - # calculate mse, mae - mse_loss = F.mse_loss(y_hat[mask], y[mask]) - nmae_loss = (y_hat[mask] - y[mask]).abs().mean() - loss = nmae_loss - self.log("loss", loss) - self.log_dict( - { - f"MSE/{tag}": mse_loss, - f"NMAE/{tag}": nmae_loss, - }, - ) - return loss - - def configure_optimizers(self): - return torch.optim.AdamW(self.parameters(), lr=self.learning_rate) - - -def create_train_dataloader(config: DictConfig): - return pseudo_irradiance_datapipe( - config.config, - start_time=datetime.datetime(2008, 1, 1), - end_time=datetime.datetime(2020, 12, 31), - use_sun=config.sun, - use_nwp=config.nwp, - use_sat=config.sat, - use_hrv=config.hrv, - use_pv=True, - use_topo=config.topo, - size=config.size, - use_future=config.use_future, - ) - - -def create_val_dataloader(config: DictConfig): - return pseudo_irradiance_datapipe( - config.config, - start_time=datetime.datetime(2021, 1, 1), - end_time=datetime.datetime(2021, 12, 31), - use_sun=config.sun, - use_nwp=config.nwp, - use_sat=config.sat, - use_hrv=config.hrv, - use_pv=True, - use_topo=config.topo, - size=config.size, - use_future=config.use_future, - ) - - -@hydra.main(version_base=None, config_path="configs", config_name="config") -def experiment(cfg: DictConfig) -> None: - print(OmegaConf.to_yaml(cfg)) - model = LitModel(cfg.model) - train_dataloader = create_train_dataloader(cfg.dataloader) - val_dataloader = create_val_dataloader(cfg.dataloader) - - model_checkpoint = ModelCheckpoint( - every_n_train_steps=100, - monitor="step", - mode="max", - save_last=True, - save_top_k=10, - ) - - from pytorch_lightning import loggers as pl_loggers - - tb_logger = pl_loggers.TensorBoardLogger(save_dir="logs/") - # early_stopping = EarlyStopping(monitor="loss") - trainer = pl.Trainer( - max_epochs=cfg.epochs, - precision=16 if cfg.fp16 else 32, - devices=[cfg.num_gpu] if not cfg.cpu else 1, - accelerator="auto" if not cfg.cpu else "cpu", - auto_select_gpus=False, - auto_lr_find=False, - log_every_n_steps=1, - # limit_val_batches=400 * args.accumulate, - # limit_train_batches=500 * args.accumulate, - accumulate_grad_batches=cfg.accumulate, - callbacks=[model_checkpoint], - logger=tb_logger, - ) - trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader) - - -if __name__ == "__main__": - experiment()