-
Notifications
You must be signed in to change notification settings - Fork 7
Issue/324 manger test #362
Changes from all commits
689ddff
4489256
7ce94fb
71d2228
6886b37
ffd94c1
b0f20c0
d7b3414
c22a078
94c52cd
984bd71
daf3ef6
82d850c
95c1a62
a3624fa
1d4aef9
03a2f16
0f2bef8
0545c52
7fc646c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -182,6 +182,8 @@ def create_batches( | |
| # Split locations per example into batches: | ||
| n_batches = len(spatial_and_temporal_locations_of_each_example) // batch_size | ||
| locations_for_batches = [] | ||
| logger.warning("xxxxx") | ||
| logger.warning(n_batches) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please make this warning a bit more verbose? |
||
| for batch_idx in range(n_batches): | ||
| start_example_idx = batch_idx * batch_size | ||
| end_example_idx = (batch_idx + 1) * batch_size | ||
|
|
@@ -193,7 +195,7 @@ def create_batches( | |
| # Loop round each batch: | ||
| for n_batches_processed, locations_for_batch in enumerate(locations_for_batches): | ||
| batch_idx = idx_of_first_batch + n_batches_processed | ||
| logger.debug(f"{self.__class__.__name__} creating batch {batch_idx}!") | ||
| logger.warning(f"{self.__class__.__name__} creating batch {batch_idx}!") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry if I've misunderstood but why does this message need to be a warning? |
||
|
|
||
| # Generate batch. | ||
| batch = self.get_batch( | ||
|
|
@@ -205,6 +207,7 @@ def create_batches( | |
| # Save batch to disk. | ||
| netcdf_filename = path_to_write_to / nd_utils.get_netcdf_filename(batch_idx) | ||
| batch.to_netcdf(netcdf_filename) | ||
| logger.warning(f"Save file to {netcdf_filename}") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above: Does this message really need to be a warning? |
||
|
|
||
| # Upload if necessary. | ||
| if ( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,15 +10,24 @@ | |
| _LOG = logging.getLogger("nowcasting_dataset") | ||
|
|
||
|
|
||
| def upload_and_delete_local_files(dst_path: str, local_path: Path): | ||
| def upload_and_delete_local_files(dst_path: Union[str, Path], local_path: Union[str, Path]): | ||
| """ | ||
| Upload an entire folder and delete local files to either AWS or GCP | ||
| """ | ||
| _LOG.info("Uploading!") | ||
| filesystem = get_filesystem(dst_path) | ||
| filesystem.put(str(local_path), dst_path, recursive=True) | ||
|
|
||
| _LOG.warning(f"moving files from {local_path} to {dst_path}") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this need to be a warning? |
||
|
|
||
| _LOG.warning(get_all_filenames_in_path(local_path)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please make these two messages a bit more verbose (or maybe remove these two messages?) Also, do these need to be warnings? |
||
| _LOG.warning(get_all_filenames_in_path(dst_path)) | ||
|
|
||
| filesystem.put(str(local_path) + "/", str(dst_path) + "/", recursive=True) | ||
| delete_all_files_in_temp_path(local_path) | ||
|
|
||
| _LOG.warning(get_all_filenames_in_path(local_path)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above :) |
||
| _LOG.warning(get_all_filenames_in_path(dst_path)) | ||
|
|
||
|
|
||
| def get_filesystem(path: Union[str, Path]) -> fsspec.AbstractFileSystem: | ||
| r"""Get the fsspect FileSystem from a path. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -369,6 +369,14 @@ def create_batches(self, overwrite_batches: bool) -> None: | |
| for worker_id, (data_source_name, data_source) in enumerate( | ||
| self.data_sources.items() | ||
| ): | ||
|
|
||
| if len(locations_for_split) == 0: | ||
| # not raising error as this is ok for unittests | ||
| logger.warning( | ||
| f"Not create batches for {split_name} as there are no locations" | ||
| ) | ||
| break | ||
|
|
||
| # Get indexes of first batch and example. And subset locations_for_split. | ||
| idx_of_first_batch = first_batches_to_create[split_name][data_source_name] | ||
| idx_of_first_example = idx_of_first_batch * self.config.process.batch_size | ||
|
|
@@ -389,18 +397,25 @@ def create_batches(self, overwrite_batches: bool) -> None: | |
| nd_fs_utils.makedirs(dst_path, exist_ok=True) | ||
| if self.save_batches_locally_and_upload: | ||
| nd_fs_utils.makedirs(local_temp_path, exist_ok=True) | ||
| else: | ||
| logger.warning( | ||
| f"Not saving uploading batches so have not made {local_temp_path}" | ||
| ) | ||
|
|
||
| # Submit data_source.create_batches task to the worker process. | ||
| future = executor.submit( | ||
| data_source.create_batches, | ||
| print(executor) | ||
| # future = executor.submit( | ||
| logger.warning("Making batches") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do these need to be warnings? |
||
| logger.warning(locations) | ||
| data_source.create_batches( | ||
| spatial_and_temporal_locations_of_each_example=locations, | ||
| idx_of_first_batch=idx_of_first_batch, | ||
| batch_size=self.config.process.batch_size, | ||
| dst_path=dst_path, | ||
| local_temp_path=local_temp_path, | ||
| upload_every_n_batches=self.config.process.upload_every_n_batches, | ||
| ) | ||
| future_create_batches_jobs.append(future) | ||
| # future_create_batches_jobs.append(future) | ||
|
|
||
| # Wait for all futures to finish: | ||
| for future, data_source_name in zip( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,5 +21,5 @@ plotly | |
| tqdm | ||
| black | ||
| pre-commit | ||
| fsspec | ||
| fsspec==2021.7.0 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ooh, interesting, are we sure we can't use more recent versions of |
||
| pathy | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this warning still required? 🙂