diff --git a/nowcasting_dataset/config/gcp.yaml b/nowcasting_dataset/config/gcp.yaml index dd3a486b..ff47f511 100644 --- a/nowcasting_dataset/config/gcp.yaml +++ b/nowcasting_dataset/config/gcp.yaml @@ -7,7 +7,7 @@ input_data: gsp: forecast_minutes: 60 gsp_zarr_path: gs://solar-pv-nowcasting-data/PV/GSP/v2/pv_gsp.zarr - history_minutes: 30 + history_minutes: 60 nwp: forecast_minutes: 60 history_minutes: 30 diff --git a/nowcasting_dataset/config/on_premises.yaml b/nowcasting_dataset/config/on_premises.yaml index 55acec89..ee5d1e96 100644 --- a/nowcasting_dataset/config/on_premises.yaml +++ b/nowcasting_dataset/config/on_premises.yaml @@ -23,6 +23,7 @@ input_data: - hcc nwp_image_size_pixels: 64 nwp_zarr_path: /mnt/storage_b/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/NWP/UK_Met_Office/UKV/zarr/UKV__2018-01_to_2019-12__chunks__variable10__init_time1__step1__x548__y704__.zarr + history_minutes: 60 #---------------------- PV ------------------- pv: diff --git a/nowcasting_dataset/data_sources/data_source.py b/nowcasting_dataset/data_sources/data_source.py index 1dc825af..e80a2999 100644 --- a/nowcasting_dataset/data_sources/data_source.py +++ b/nowcasting_dataset/data_sources/data_source.py @@ -57,7 +57,7 @@ def __post_init__(self): assert self.forecast_length >= 0 assert self.history_minutes % self.sample_period_minutes == 0, ( f"sample period ({self.sample_period_minutes}) minutes " - f"does not fit into historic minutes ({self.forecast_minutes})" + f"does not fit into historic minutes ({self.history_minutes})" ) assert self.forecast_minutes % self.sample_period_minutes == 0, ( f"sample period ({self.sample_period_minutes}) minutes " diff --git a/nowcasting_dataset/data_sources/nwp/nwp_data_source.py b/nowcasting_dataset/data_sources/nwp/nwp_data_source.py index d5d0a7fa..e758bcef 100644 --- a/nowcasting_dataset/data_sources/nwp/nwp_data_source.py +++ b/nowcasting_dataset/data_sources/nwp/nwp_data_source.py @@ -114,8 +114,7 @@ def _post_process_example( """Resamples to 5 minutely.""" start_dt = self._get_start_dt(t0_dt) end_dt = self._get_end_dt(t0_dt) - selected_data = selected_data.resample({"target_time": "5T"}) - selected_data = selected_data.interpolate() + selected_data = selected_data.sel(target_time=slice(start_dt, end_dt)) selected_data = selected_data.rename({"target_time": "time", "variable": "channels"}) selected_data.data = selected_data.data.astype(np.float16) @@ -133,8 +132,12 @@ def datetime_index(self) -> pd.DatetimeIndex: target_times = np.unique(target_times) target_times = np.sort(target_times) target_times = pd.DatetimeIndex(target_times) - resampler = pd.Series(0, index=target_times).resample("5T") - return resampler.ffill(limit=11).dropna().index + return target_times + + @property + def sample_period_minutes(self) -> int: + """Override the default sample minutes""" + return 60 def open_nwp(zarr_path: str, consolidated: bool) -> xr.Dataset: diff --git a/tests/config/test.yaml b/tests/config/test.yaml index 6afee7ac..7cfc3153 100644 --- a/tests/config/test.yaml +++ b/tests/config/test.yaml @@ -10,6 +10,7 @@ input_data: - t nwp_image_size_pixels: 2 nwp_zarr_path: tests/data/nwp_data/test.zarr + history_minutes: 60 pv: pv_filename: tests/data/pv_data/test.nc pv_metadata_filename: tests/data/pv_metadata/UK_PV_metadata.csv diff --git a/tests/data_sources/test_nwp_data_source.py b/tests/data_sources/test_nwp_data_source.py index fe7d785d..9200607d 100644 --- a/tests/data_sources/test_nwp_data_source.py +++ b/tests/data_sources/test_nwp_data_source.py @@ -15,7 +15,7 @@ def test_nwp_data_source_init(): # noqa: D103 _ = NWPDataSource( zarr_path=NWP_ZARR_PATH, - history_minutes=30, + history_minutes=60, forecast_minutes=60, ) @@ -23,7 +23,7 @@ def test_nwp_data_source_init(): # noqa: D103 def test_nwp_data_source_open(): # noqa: D103 nwp = NWPDataSource( zarr_path=NWP_ZARR_PATH, - history_minutes=30, + history_minutes=60, forecast_minutes=60, channels=["t"], ) @@ -34,7 +34,7 @@ def test_nwp_data_source_open(): # noqa: D103 def test_nwp_data_source_batch(): # noqa: D103 nwp = NWPDataSource( zarr_path=NWP_ZARR_PATH, - history_minutes=30, + history_minutes=60, forecast_minutes=60, channels=["t"], ) @@ -47,13 +47,17 @@ def test_nwp_data_source_batch(): # noqa: D103 batch = nwp.get_batch(t0_datetimes=t0_datetimes, x_locations=x, y_locations=y) - assert batch.data.shape == (4, 1, 19, 2, 2) + # batch size 4 + # channel 1 + # time series, 1 int he past, 1 now, 1 in the future + # x,y of size 2 + assert batch.data.shape == (4, 1, 3, 2, 2) def test_nwp_get_contiguous_time_periods(): # noqa: D103 nwp = NWPDataSource( zarr_path=NWP_ZARR_PATH, - history_minutes=30, + history_minutes=60, forecast_minutes=60, channels=["t"], ) @@ -68,13 +72,13 @@ def test_nwp_get_contiguous_time_periods(): # noqa: D103 def test_nwp_get_contiguous_t0_time_periods(): # noqa: D103 nwp = NWPDataSource( zarr_path=NWP_ZARR_PATH, - history_minutes=30, + history_minutes=60, forecast_minutes=60, channels=["t"], ) contiguous_time_periods = nwp.get_contiguous_t0_time_periods() correct_time_periods = pd.DataFrame( - [{"start_dt": pd.Timestamp("2019-01-01 00:30"), "end_dt": pd.Timestamp("2019-01-02 01:00")}] + [{"start_dt": pd.Timestamp("2019-01-01 01:00"), "end_dt": pd.Timestamp("2019-01-02 01:00")}] ) pd.testing.assert_frame_equal(contiguous_time_periods, correct_time_periods)