From 689ddff24bc7bdb421259e8e9dfa9e66846a915a Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Wed, 10 Nov 2021 12:17:43 +0000 Subject: [PATCH 1/6] add make batches test for manager --- nowcasting_dataset/dataset/split/split.py | 5 +- nowcasting_dataset/filesystem/utils.py | 2 +- nowcasting_dataset/manager.py | 4 ++ tests/config/test.yaml | 3 ++ tests/test_manager.py | 62 +++++++++++++++++++++++ 5 files changed, 74 insertions(+), 2 deletions(-) diff --git a/nowcasting_dataset/dataset/split/split.py b/nowcasting_dataset/dataset/split/split.py index 4f1e134b..1a6f1eb4 100644 --- a/nowcasting_dataset/dataset/split/split.py +++ b/nowcasting_dataset/dataset/split/split.py @@ -200,6 +200,9 @@ def split_data( logger.debug("Split data done!") for split_name, dt in split_datetimes._asdict().items(): - logger.debug(f"{split_name} has {len(dt):,d} datetimes, from {dt[0]} to {dt[-1]}") + if len(dt) == 0: + logger.warning(f"{split_name} has {len(dt):,d} datetimes") + else: + logger.debug(f"{split_name} has {len(dt):,d} datetimes, from {dt[0]} to {dt[-1]}") return split_datetimes diff --git a/nowcasting_dataset/filesystem/utils.py b/nowcasting_dataset/filesystem/utils.py index 4f9feed9..12bb27f7 100644 --- a/nowcasting_dataset/filesystem/utils.py +++ b/nowcasting_dataset/filesystem/utils.py @@ -16,7 +16,7 @@ def upload_and_delete_local_files(dst_path: str, local_path: Path): """ _LOG.info("Uploading!") filesystem = get_filesystem(dst_path) - filesystem.put(str(local_path), dst_path, recursive=True) + filesystem.put(str(local_path), str(dst_path), recursive=True) delete_all_files_in_temp_path(local_path) diff --git a/nowcasting_dataset/manager.py b/nowcasting_dataset/manager.py index 9383a73e..5a180fed 100644 --- a/nowcasting_dataset/manager.py +++ b/nowcasting_dataset/manager.py @@ -369,6 +369,10 @@ def create_batches(self, overwrite_batches: bool) -> None: for worker_id, (data_source_name, data_source) in enumerate( self.data_sources.items() ): + + if len(locations_for_split) == 0: + break + # Get indexes of first batch and example. And subset locations_for_split. idx_of_first_batch = first_batches_to_create[split_name][data_source_name] idx_of_first_example = idx_of_first_batch * self.config.process.batch_size diff --git a/tests/config/test.yaml b/tests/config/test.yaml index 7cfc3153..37f846cc 100644 --- a/tests/config/test.yaml +++ b/tests/config/test.yaml @@ -30,3 +30,6 @@ process: local_temp_path: ~/temp/ seed: 1234 upload_every_n_batches: 16 + n_train_batches: 2 + n_validation_batches: 0 + n_test_batches: 0 diff --git a/tests/test_manager.py b/tests/test_manager.py index 81daf75e..1dc00174 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -1,4 +1,6 @@ """Test Manager.""" +import os +import tempfile from datetime import datetime from pathlib import Path @@ -77,3 +79,63 @@ def test_get_daylight_datetime_index(): # TODO: Issue #322: Test the other Manager methods! + + +def test_batches(): + """Test that batches can be made""" + filename = Path(nowcasting_dataset.__file__).parent.parent / "tests" / "data" / "sat_data.zarr" + + sat = SatelliteDataSource( + zarr_path=filename, + history_minutes=30, + forecast_minutes=60, + image_size_pixels=64, + meters_per_pixel=2000, + channels=("HRV",), + ) + + filename = ( + Path(nowcasting_dataset.__file__).parent.parent / "tests" / "data" / "gsp" / "test.zarr" + ) + + gsp = GSPDataSource( + zarr_path=filename, + start_dt=datetime(2019, 1, 1), + end_dt=datetime(2019, 1, 2), + history_minutes=30, + forecast_minutes=60, + image_size_pixels=64, + meters_per_pixel=2000, + ) + + manager = Manager() + + # load config + local_path = Path(nowcasting_dataset.__file__).parent.parent + filename = local_path / "tests" / "config" / "test.yaml" + manager.load_yaml_configuration(filename=filename) + + with tempfile.TemporaryDirectory(dir="./") as local_temp_path, tempfile.TemporaryDirectory( + dir="./" + ) as dst_path: + + # set local temp path, and dst path + manager.config.output_data.filepath = Path(dst_path) + manager.local_temp_path = Path(local_temp_path) + + # just set satellite as data source + manager.data_sources = {"gsp": gsp, "sat": sat} + manager.data_source_which_defines_geospatial_locations = gsp + + # make file for locations + manager.create_files_specifying_spatial_and_temporal_locations_of_each_example_if_necessary() # noqa 101 + + # make batches + manager.create_batches(overwrite_batches=True) + + assert os.path.exists(f"{dst_path}/train") + assert os.path.exists(f"{dst_path}/train/gsp") + assert os.path.exists(f"{dst_path}/train/gsp/000000.nc") + assert os.path.exists(f"{dst_path}/train/sat/000000.nc") + assert os.path.exists(f"{dst_path}/train/gsp/000001.nc") + assert os.path.exists(f"{dst_path}/train/sat/000001.nc") From 4489256188377325a984cb49df02cfa1e0590fab Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Wed, 10 Nov 2021 12:19:53 +0000 Subject: [PATCH 2/6] tidy --- tests/test_manager.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_manager.py b/tests/test_manager.py index 1dc00174..00d07eea 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -115,9 +115,7 @@ def test_batches(): filename = local_path / "tests" / "config" / "test.yaml" manager.load_yaml_configuration(filename=filename) - with tempfile.TemporaryDirectory(dir="./") as local_temp_path, tempfile.TemporaryDirectory( - dir="./" - ) as dst_path: + with tempfile.TemporaryDirectory() as local_temp_path, tempfile.TemporaryDirectory() as dst_path: # noqa 101 # set local temp path, and dst path manager.config.output_data.filepath = Path(dst_path) From 7ce94fb1af811202ef90f70f54ed3cc3a411b83c Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Wed, 10 Nov 2021 12:23:34 +0000 Subject: [PATCH 3/6] add save config test --- nowcasting_dataset/filesystem/utils.py | 2 +- tests/test_manager.py | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/nowcasting_dataset/filesystem/utils.py b/nowcasting_dataset/filesystem/utils.py index 12bb27f7..d16e5d5a 100644 --- a/nowcasting_dataset/filesystem/utils.py +++ b/nowcasting_dataset/filesystem/utils.py @@ -10,7 +10,7 @@ _LOG = logging.getLogger("nowcasting_dataset") -def upload_and_delete_local_files(dst_path: str, local_path: Path): +def upload_and_delete_local_files(dst_path: Union[str, Path], local_path: Union[str, Path]): """ Upload an entire folder and delete local files to either AWS or GCP """ diff --git a/tests/test_manager.py b/tests/test_manager.py index 00d07eea..3b6a8d1b 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -137,3 +137,25 @@ def test_batches(): assert os.path.exists(f"{dst_path}/train/sat/000000.nc") assert os.path.exists(f"{dst_path}/train/gsp/000001.nc") assert os.path.exists(f"{dst_path}/train/sat/000001.nc") + + +def test_save_config(): + """Test that configuration file is saved""" + + manager = Manager() + + # load config + local_path = Path(nowcasting_dataset.__file__).parent.parent + filename = local_path / "tests" / "config" / "test.yaml" + manager.load_yaml_configuration(filename=filename) + + with tempfile.TemporaryDirectory() as local_temp_path, tempfile.TemporaryDirectory() as dst_path: # noqa 101 + + # set local temp path, and dst path + manager.config.output_data.filepath = Path(dst_path) + manager.local_temp_path = Path(local_temp_path) + + # save config + manager.save_yaml_configuration() + + assert os.path.exists(f"{dst_path}/configuration.yaml") From 71d2228758652c4b264109f1481d8b8c675234d8 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Wed, 10 Nov 2021 12:27:11 +0000 Subject: [PATCH 4/6] self PR review --- nowcasting_dataset/dataset/split/split.py | 1 + tests/test_manager.py | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/nowcasting_dataset/dataset/split/split.py b/nowcasting_dataset/dataset/split/split.py index 1a6f1eb4..f873728e 100644 --- a/nowcasting_dataset/dataset/split/split.py +++ b/nowcasting_dataset/dataset/split/split.py @@ -201,6 +201,7 @@ def split_data( logger.debug("Split data done!") for split_name, dt in split_datetimes._asdict().items(): if len(dt) == 0: + # only a warning is made as this may happen during unittests logger.warning(f"{split_name} has {len(dt):,d} datetimes") else: logger.debug(f"{split_name} has {len(dt):,d} datetimes, from {dt[0]} to {dt[-1]}") diff --git a/tests/test_manager.py b/tests/test_manager.py index 3b6a8d1b..0056ee2e 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -78,9 +78,6 @@ def test_get_daylight_datetime_index(): np.testing.assert_array_equal(t0_datetimes, correct_t0_datetimes) -# TODO: Issue #322: Test the other Manager methods! - - def test_batches(): """Test that batches can be made""" filename = Path(nowcasting_dataset.__file__).parent.parent / "tests" / "data" / "sat_data.zarr" From 80070f083841d72693c23192d35d9ab9fb437dd3 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Wed, 10 Nov 2021 14:10:57 +0000 Subject: [PATCH 5/6] fsspec==2021.7.0 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 714d4269..c76e5b65 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,5 +21,5 @@ plotly tqdm black pre-commit -fsspec +fsspec==2021.7.0 pathy From acb298b32c5210366e2b99d4e9b1e18f48ed7489 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Wed, 10 Nov 2021 15:01:46 +0000 Subject: [PATCH 6/6] use 'copy' not 'put' --- nowcasting_dataset/filesystem/utils.py | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nowcasting_dataset/filesystem/utils.py b/nowcasting_dataset/filesystem/utils.py index d16e5d5a..25599a8c 100644 --- a/nowcasting_dataset/filesystem/utils.py +++ b/nowcasting_dataset/filesystem/utils.py @@ -16,7 +16,7 @@ def upload_and_delete_local_files(dst_path: Union[str, Path], local_path: Union[ """ _LOG.info("Uploading!") filesystem = get_filesystem(dst_path) - filesystem.put(str(local_path), str(dst_path), recursive=True) + filesystem.copy(str(local_path), str(dst_path), recursive=True) delete_all_files_in_temp_path(local_path) diff --git a/requirements.txt b/requirements.txt index c76e5b65..714d4269 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,5 +21,5 @@ plotly tqdm black pre-commit -fsspec==2021.7.0 +fsspec pathy