Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.

Commit b5a6232

Browse files
committed
I think #223 is finished. But test_datamodule is failing, although that might be a separate issue, see #276
1 parent 1a3cefb commit b5a6232

File tree

3 files changed

+17
-14
lines changed

3 files changed

+17
-14
lines changed

nowcasting_dataset/dataset/datamodule.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ class NowcastingDataModule(pl.LightningDataModule):
7878
pv_load_azimuth_and_elevation: bool = False
7979
split_method: SplitMethod = SplitMethod.DAY # which split method should be used
8080
seed: Optional[int] = None # seed used to make quasi random split data
81+
t0_datetime_freq: str = "5T" # Frequency of the t0 datetimes.
8182

8283
skip_n_train_batches: int = 0 # number of train batches to skip
8384
skip_n_validation_batches: int = 0 # number of validation batches to skip
@@ -379,10 +380,14 @@ def _get_t0_datetimes(self) -> pd.DatetimeIndex:
379380

380381
# TODO: Allow user to configure the frequency using the config yaml file.
381382
# https://github.com/openclimatefix/nowcasting_dataset/issues/277
382-
return nd_time.time_periods_to_datetimes(
383-
time_periods=intersection_of_t0_time_periods, freq="30T"
383+
t0_datetimes = nd_time.time_periods_to_datetimes(
384+
time_periods=intersection_of_t0_time_periods, freq=self.t0_datetime_freq
384385
)
385386

387+
# Align to the nearest t0_datetime_freq. For example, if t0_datetime_freq is '5T'
388+
# then ensure the minutes past the hour are exactly divisible by 5.
389+
return t0_datetimes.round(self.t0_datetime_freq)
390+
386391
def _check_has_prepared_data(self):
387392
if not self.has_prepared_data:
388393
raise RuntimeError("Must run prepare_data() first!")

nowcasting_dataset/time.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def select_daylight_datetimes(
5555
return datetimes[mask]
5656

5757

58-
def period_to_datetime_index(period: pd.DataFrame, freq: str) -> pd.DatetimeIndex:
58+
def period_to_datetime_index(period: pd.Series, freq: str) -> pd.DatetimeIndex:
5959
"""Return a DatetimeIndex from period['start_dt'] to period['end_dt'] at frequency freq."""
6060
return pd.date_range(period["start_dt"], period["end_dt"], freq=freq)
6161

tests/test_dataset.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,16 @@
77
from nowcasting_dataset.dataset.batch import Batch
88

99

10+
def _get_t0_datetimes(data_source, freq) -> pd.DatetimeIndex:
11+
t0_periods = data_source.get_contiguous_t0_time_periods()
12+
t0_datetimes = nd_time.time_periods_to_datetimes(t0_periods, freq=freq)
13+
return t0_datetimes
14+
15+
1016
@pytest.fixture
1117
def dataset(sat_data_source, general_data_source):
12-
all_datetimes = sat_data_source.datetime_index()
13-
t0_datetimes = nd_time.get_t0_datetimes(
14-
datetimes=all_datetimes, total_seq_length=2, history_duration=pd.Timedelta(0)
15-
)
18+
t0_datetimes = _get_t0_datetimes(sat_data_source, freq="5T")
19+
1620
return NowcastingDataset(
1721
batch_size=8,
1822
n_batches_per_epoch_per_worker=64,
@@ -24,13 +28,7 @@ def dataset(sat_data_source, general_data_source):
2428

2529
@pytest.fixture
2630
def dataset_gsp(gsp_data_source, general_data_source):
27-
all_datetimes = gsp_data_source.datetime_index()
28-
t0_datetimes = nd_time.get_t0_datetimes(
29-
datetimes=all_datetimes,
30-
total_seq_length=2,
31-
history_duration=pd.Timedelta(0),
32-
max_gap=nd_time.THIRTY_MINUTES,
33-
)
31+
t0_datetimes = _get_t0_datetimes(gsp_data_source, freq="30T")
3432

3533
return NowcastingDataset(
3634
batch_size=8,

0 commit comments

Comments
 (0)