Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.

dont interpolate nwp data #338

Merged
merged 4 commits into from
Nov 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion nowcasting_dataset/config/gcp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ input_data:
gsp:
forecast_minutes: 60
gsp_zarr_path: gs://solar-pv-nowcasting-data/PV/GSP/v2/pv_gsp.zarr
history_minutes: 30
history_minutes: 60
nwp:
forecast_minutes: 60
history_minutes: 30
Expand Down
1 change: 1 addition & 0 deletions nowcasting_dataset/config/on_premises.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ input_data:
- hcc
nwp_image_size_pixels: 64
nwp_zarr_path: /mnt/storage_b/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/NWP/UK_Met_Office/UKV/zarr/UKV__2018-01_to_2019-12__chunks__variable10__init_time1__step1__x548__y704__.zarr
history_minutes: 60

#---------------------- PV -------------------
pv:
Expand Down
2 changes: 1 addition & 1 deletion nowcasting_dataset/data_sources/data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def __post_init__(self):
assert self.forecast_length >= 0
assert self.history_minutes % self.sample_period_minutes == 0, (
f"sample period ({self.sample_period_minutes}) minutes "
f"does not fit into historic minutes ({self.forecast_minutes})"
f"does not fit into historic minutes ({self.history_minutes})"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch!

)
assert self.forecast_minutes % self.sample_period_minutes == 0, (
f"sample period ({self.sample_period_minutes}) minutes "
Expand Down
11 changes: 7 additions & 4 deletions nowcasting_dataset/data_sources/nwp/nwp_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,7 @@ def _post_process_example(
"""Resamples to 5 minutely."""
start_dt = self._get_start_dt(t0_dt)
end_dt = self._get_end_dt(t0_dt)
selected_data = selected_data.resample({"target_time": "5T"})
selected_data = selected_data.interpolate()

selected_data = selected_data.sel(target_time=slice(start_dt, end_dt))
selected_data = selected_data.rename({"target_time": "time", "variable": "channels"})
selected_data.data = selected_data.data.astype(np.float16)
Expand All @@ -133,8 +132,12 @@ def datetime_index(self) -> pd.DatetimeIndex:
target_times = np.unique(target_times)
target_times = np.sort(target_times)
target_times = pd.DatetimeIndex(target_times)
resampler = pd.Series(0, index=target_times).resample("5T")
return resampler.ffill(limit=11).dropna().index
return target_times

@property
def sample_period_minutes(self) -> int:
"""Override the default sample minutes"""
return 60


def open_nwp(zarr_path: str, consolidated: bool) -> xr.Dataset:
Expand Down
1 change: 1 addition & 0 deletions tests/config/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ input_data:
- t
nwp_image_size_pixels: 2
nwp_zarr_path: tests/data/nwp_data/test.zarr
history_minutes: 60
pv:
pv_filename: tests/data/pv_data/test.nc
pv_metadata_filename: tests/data/pv_metadata/UK_PV_metadata.csv
Expand Down
18 changes: 11 additions & 7 deletions tests/data_sources/test_nwp_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@
def test_nwp_data_source_init(): # noqa: D103
_ = NWPDataSource(
zarr_path=NWP_ZARR_PATH,
history_minutes=30,
history_minutes=60,
forecast_minutes=60,
)


def test_nwp_data_source_open(): # noqa: D103
nwp = NWPDataSource(
zarr_path=NWP_ZARR_PATH,
history_minutes=30,
history_minutes=60,
forecast_minutes=60,
channels=["t"],
)
Expand All @@ -34,7 +34,7 @@ def test_nwp_data_source_open(): # noqa: D103
def test_nwp_data_source_batch(): # noqa: D103
nwp = NWPDataSource(
zarr_path=NWP_ZARR_PATH,
history_minutes=30,
history_minutes=60,
forecast_minutes=60,
channels=["t"],
)
Expand All @@ -47,13 +47,17 @@ def test_nwp_data_source_batch(): # noqa: D103

batch = nwp.get_batch(t0_datetimes=t0_datetimes, x_locations=x, y_locations=y)

assert batch.data.shape == (4, 1, 19, 2, 2)
# batch size 4
# channel 1
# time series, 1 int he past, 1 now, 1 in the future
# x,y of size 2
assert batch.data.shape == (4, 1, 3, 2, 2)


def test_nwp_get_contiguous_time_periods(): # noqa: D103
nwp = NWPDataSource(
zarr_path=NWP_ZARR_PATH,
history_minutes=30,
history_minutes=60,
forecast_minutes=60,
channels=["t"],
)
Expand All @@ -68,13 +72,13 @@ def test_nwp_get_contiguous_time_periods(): # noqa: D103
def test_nwp_get_contiguous_t0_time_periods(): # noqa: D103
nwp = NWPDataSource(
zarr_path=NWP_ZARR_PATH,
history_minutes=30,
history_minutes=60,
forecast_minutes=60,
channels=["t"],
)

contiguous_time_periods = nwp.get_contiguous_t0_time_periods()
correct_time_periods = pd.DataFrame(
[{"start_dt": pd.Timestamp("2019-01-01 00:30"), "end_dt": pd.Timestamp("2019-01-02 01:00")}]
[{"start_dt": pd.Timestamp("2019-01-01 01:00"), "end_dt": pd.Timestamp("2019-01-02 01:00")}]
)
pd.testing.assert_frame_equal(contiguous_time_periods, correct_time_periods)