diff --git a/.ci_helpers/py3.7.yaml b/.ci_helpers/py3.7.yaml index 98e3d9d82..a09696fde 100644 --- a/.ci_helpers/py3.7.yaml +++ b/.ci_helpers/py3.7.yaml @@ -10,10 +10,10 @@ dependencies: - pynmea2 - pytz - scipy - - xarray<0.18 + - xarray - zarr - - fsspec==0.8.7 + - fsspec - requests - aiohttp - - s3fs==0.5.2 + - s3fs - mamba diff --git a/.ci_helpers/py3.8.yaml b/.ci_helpers/py3.8.yaml index 98f582abd..047a69220 100644 --- a/.ci_helpers/py3.8.yaml +++ b/.ci_helpers/py3.8.yaml @@ -10,10 +10,10 @@ dependencies: - pynmea2 - pytz - scipy - - xarray<0.18 + - xarray - zarr - - fsspec==0.8.7 + - fsspec - requests - aiohttp - - s3fs==0.5.2 + - s3fs - mamba diff --git a/.ci_helpers/py3.9.yaml b/.ci_helpers/py3.9.yaml index 8ffd88858..4b065d3d2 100644 --- a/.ci_helpers/py3.9.yaml +++ b/.ci_helpers/py3.9.yaml @@ -10,10 +10,10 @@ dependencies: - pynmea2 - pytz - scipy - - xarray<0.18 + - xarray - zarr - - fsspec==0.8.7 + - fsspec - requests - aiohttp - - s3fs==0.5.2 + - s3fs - mamba diff --git a/.ci_helpers/run-test.py b/.ci_helpers/run-test.py index 965529dd2..37903a24e 100644 --- a/.ci_helpers/run-test.py +++ b/.ci_helpers/run-test.py @@ -6,6 +6,7 @@ import argparse import glob import os +import re import shutil import sys from pathlib import Path @@ -22,6 +23,16 @@ ExitCode.NO_TESTS_COLLECTED: 5, } +MODULES_TO_TEST = { + "root": {}, # This is to test the root folder. + "convert": {}, + "calibrate": {}, + "echodata": {}, + "preprocess": {}, + "utils": {}, + "old": {"extra_globs": ["echopype/convert/convert.py", "echopype/process/*"]}, +} + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run tests listed.") parser.add_argument( @@ -40,6 +51,11 @@ action="store_true", help="Optional flag for running tests locally, not in continuous integration.", ) + parser.add_argument( + "--include-cov", + action="store_true", + help="Optional flag for including coverage. Exports to coverage.xml by default.", + ) args = parser.parse_args() if args.local: temp_path = Path("temp_echopype_output") @@ -50,69 +66,66 @@ if dump_path.exists(): shutil.rmtree(dump_path) echopype_folder = Path("echopype") - file_list = glob.glob(str(echopype_folder / "**" / "*.py")) + file_list = glob.glob(str(echopype_folder / "**" / "*.py"), recursive=True) else: file_list = args.touchedfiles.split(",") pytest_args = [] if args.pytest_args: pytest_args = args.pytest_args.split(",") + if args.include_cov: + # Checks for cov in pytest_args + for arg in pytest_args: + if re.match("--cov", arg) is not None: + raise ValueError( + "pytest args may not have any cov arguments if --include-cov is set." + ) + pytest_args = pytest_args + [ + "--cov-report=xml", + "--cov-append", + ] test_to_run = {} - for f in file_list: - file_path = Path(f) - file_name, file_ext = os.path.splitext(os.path.basename(f)) - if file_ext == ".py": - if any( - [ - (file_path.match("echopype/convert/*")), - (file_path.match("echopype/tests/test_convert*")), - ] - ): - if "convert" not in test_to_run: - test_to_run["convert"] = [] - test_to_run["convert"].append(file_path) - elif any( - [ - (file_path.match("echopype/calibrate/*")), - (file_path.match("echopype/tests/test_calibrate*")), - ] - ): - if "calibrate" not in test_to_run: - test_to_run["calibrate"] = [] - test_to_run["calibrate"].append(file_path) - elif any( - [ - (file_path.match("echopype/echodata/*")), - (file_path.match("echopype/tests/test_echodata*")), - ] - ): - if "echodata" not in test_to_run: - test_to_run["echodata"] = [] - test_to_run["echodata"].append(file_path) - elif any( - [ - (file_path.match("echopype/preprocess/*")), - (file_path.match("echopype/tests/test_preprocess*")), - ] - ): - if "preprocess" not in test_to_run: - test_to_run["preprocess"] = [] - test_to_run["preprocess"].append(file_path) - elif any( - [ - (file_path.match("echopype/convert/convert.py")), - (file_path.match("echopype/process/*")), - (file_path.match("echopype/tests/test_old.py")), - ] - ): - if "old" not in test_to_run: - test_to_run["old"] = [] - test_to_run["old"].append(file_path) + for module, mod_extras in MODULES_TO_TEST.items(): + if module == "root": + file_globs = [ + "echopype/*", + "echopype/tests/*", + ] + else: + file_globs = [ + f"echopype/{module}/*", + f"echopype/tests/{module}/*", + ] + if "extra_globs" in mod_extras: + file_globs = file_globs + mod_extras["extra_globs"] + for f in file_list: + file_path = Path(f) + file_name, file_ext = os.path.splitext(os.path.basename(f)) + if file_ext == ".py": + if any(((file_path.match(fg)) for fg in file_globs)): + if module not in test_to_run: + test_to_run[module] = [] + test_to_run[module].append(file_path) + original_pytest_args = pytest_args.copy() total_exit_codes = [] for k, v in test_to_run.items(): print(f"=== RUNNING {k.upper()} TESTS===") print(f"Touched files: {','.join([os.path.basename(p) for p in v])}") - test_files = glob.glob(f"echopype/tests/test_{k}*.py") + if k == "root": + file_glob_str = "echopype/tests/test_*.py" + cov_mod_arg = ["--cov=echopype"] + else: + file_glob_str = f"echopype/tests/{k}/*.py" + cov_mod_arg = [f"--cov=echopype/{k}"] + if args.include_cov: + if k == "old": + pytest_args = original_pytest_args + [ + "--cov=echopype/convert", + "--cov=echopype/process", + ] + else: + pytest_args = original_pytest_args + cov_mod_arg + test_files = glob.glob(file_glob_str) final_args = pytest_args + test_files print(f"Pytest args: {final_args}") exit_code = pytest.main(final_args) @@ -121,7 +134,7 @@ if len(total_exit_codes) == 0: print("No test(s) were run.") sys.exit(0) - if all([True if e == 0 else False for e in total_exit_codes]): + if all(True if e == 0 else False for e in total_exit_codes): print("All test run successful") sys.exit(0) else: diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 42ac4d46e..d2ddea7e9 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -56,11 +56,11 @@ jobs: - name: Cache conda uses: actions/cache@v2 env: - # Increase this value to reset cache if .ci_helpers/py${{ matrix.python-version }}.yaml has not changed + # Increase this value to reset cache if '.ci_helpers/py{0}.yaml' has not changed CACHE_NUMBER: 0 with: path: ~/conda_pkgs_dir - key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('.ci_helpers/py${{ matrix.python-version }}.yaml') }} + key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles(format('.ci_helpers/py{0}.yaml', matrix.python-version)) }} - name: Setup miniconda uses: conda-incubator/setup-miniconda@v2 with: @@ -87,7 +87,7 @@ jobs: - name: Running All Tests shell: bash -l {0} run: | - python .ci_helpers/run-test.py --local --pytest-args="--cov=echopype,--cov-report=xml,--log-cli-level=WARNING,-vv,--disable-warnings" |& tee ci_test_log.log + pytest -vv -rx --cov=echopype --cov-report=xml --log-cli-level=WARNING --disable-warnings |& tee ci_${{ matrix.python-version }}_test_log.log - name: Upload ci test log if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml deleted file mode 100644 index f526ace73..000000000 --- a/.github/workflows/lint.yaml +++ /dev/null @@ -1,17 +0,0 @@ -name: Lint code - -on: - push: - paths-ignore: - - '.ci_helpers/docker/**' - pull_request: - paths-ignore: - - '.ci_helpers/docker/**' - -jobs: - lint-test: - runs-on: ubuntu-20.04 - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - - uses: pre-commit/action@v2.0.0 diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index a626794bf..dc246250f 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -52,11 +52,11 @@ jobs: - name: Cache conda uses: actions/cache@v2 env: - # Increase this value to reset cache if .ci_helpers/py${{ matrix.python-version }}.yaml has not changed + # Increase this value to reset cache if '.ci_helpers/py{0}.yaml' has not changed CACHE_NUMBER: 0 with: path: ~/conda_pkgs_dir - key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('.ci_helpers/py${{ matrix.python-version }}.yaml') }} + key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles(format('.ci_helpers/py{0}.yaml', matrix.python-version)) }} - name: Setup miniconda uses: conda-incubator/setup-miniconda@v2 with: @@ -87,17 +87,17 @@ jobs: format: 'csv' - name: Print Changed files shell: bash -l {0} - run: echo "${{ steps.files.outputs.added_modified }}" + run: echo "${{ steps.files.outputs.added_modified_renamed }}" - name: Running all Tests if: contains(github.event.pull_request.labels.*.name, 'Needs Complete Testing') shell: bash -l {0} run: | - python .ci_helpers/run-test.py --local --pytest-args="--cov=echopype,--cov-report=xml,--log-cli-level=WARNING,-vv,--disable-warnings" |& tee ci_test_log.log + pytest -vv -rx --cov=echopype --cov-report=xml --log-cli-level=WARNING --disable-warnings |& tee ci_${{ matrix.python-version }}_test_log.log - name: Running Tests if: "!contains(github.event.pull_request.labels.*.name, 'Needs Complete Testing')" shell: bash -l {0} run: | - python .ci_helpers/run-test.py --pytest-args="--cov=echopype,--cov-report=xml,--log-cli-level=WARNING,-vv,--disable-warnings" ${{ steps.files.outputs.added_modified }} |& tee ci_test_log.log + python .ci_helpers/run-test.py --pytest-args="--log-cli-level=WARNING,-vv,-rx,--disable-warnings" --include-cov ${{ steps.files.outputs.added_modified_renamed }} |& tee ci_test_log.log - name: Upload ci test log if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 diff --git a/README.md b/README.md index 890683b41..596d1f073 100644 --- a/README.md +++ b/README.md @@ -1,36 +1,46 @@
{escape(repr(self))}" else: xr_collections = [] - for group in self.__group_map.keys(): + for group in self.group_map.keys(): if isinstance(getattr(self, group), xr.Dataset): xr_data = getattr(self, group)._repr_html_() xr_collections.append( HtmlTemplate.element_template.format( # noqa group_id=group + str(uuid.uuid4()), group=group, - group_name=self.__group_map[group]["name"], - group_description=self.__group_map[group][ - "description" - ], + group_name=self.group_map[group]["name"], + group_description=self.group_map[group]["description"], xr_data=xr_data, ) ) @@ -98,23 +111,211 @@ def _repr_html_(self) -> str: return html_repr def __setup_groups(self): - self.__group_map = OrderedDict(_get_convention()["groups"]) - for group in self.__group_map.keys(): + for group in self.group_map.keys(): setattr(self, group, None) - def __read_converted(self, converted_raw_path): + def __read_converted(self, converted_raw_path: Optional["PathHint"]): if converted_raw_path is not None: self._check_path(converted_raw_path) converted_raw_path = self._sanitize_path(converted_raw_path) self._load_file(converted_raw_path) - self.sonar_model = self.top.keywords + + if isinstance(converted_raw_path, fsspec.FSMap): + # Convert fsmap to Path so it can be used + # for retrieving the path strings + converted_raw_path = Path(converted_raw_path.root) self.converted_raw_path = converted_raw_path + def compute_range( + self, + env_params=None, + azfp_cal_type=None, + ek_waveform_mode=None, + ek_encode_mode="complex", + ): + """ + Computes the range of the data contained in this `EchoData` object, in meters. + + This method only applies to `sonar_model`s of `"AZFP"`, `"EK60"`, and `"EK80"`. + If the `sonar_model` is not `"AZFP"`, `"EK60"`, or `"EK80"`, an error is raised. + + Parameters + ---------- + env_params: dict + This dictionary should contain either: + - `"sound_speed"`: `float` + - `"temperature"`, `"salinity"`, and `"pressure"`: `float`s, + in which case the sound speed will be calculated. + If the `sonar_model` is `"EK60"` or `"EK80"`, and + `EchoData.environment.sound_speed_indicative` exists, then this parameter + does not need to be specified. + azfp_cal_type : {"Sv", "Sp"}, optional + - `"Sv"` for calculating volume backscattering strength + - `"Sp"` for calculating point backscattering strength. + This parameter is only used if `sonar_model` is `"AZFP"`, + and in that case it must be specified. + ek_waveform_mode : {"CW", "BB"}, optional + - `"CW"` for CW-mode samples, either recorded as complex or power samples + - `"BB"` for BB-mode samples, recorded as complex samples + This parameter is only used if `sonar_model` is `"EK60"` or `"EK80"`, + and in those cases it must be specified. + ek_encode_mode : {"complex", "power"}, optional + For EK80 data, range can be computed from complex or power samples. + The type of sample used can be specified with this parameter. + - `"complex"` to use complex samples + - `"power"` to use power samples + This parameter is only used if `sonar_model` is `"EK80"`. + + Returns + ------- + xr.DataArray + The range of the data in meters. + + Raises + ------ + ValueError + - When `sonar_model` is `"AZFP"` but `azfp_cal_type` is not specified or is `None`. + - When `sonar_model` is `"EK60"` or `"EK80"` but `ek_waveform_mode` + is not specified or is `None`. + - When `sonar_model` is `"EK60"` but `waveform_mode` is `"BB"` (EK60 cannot have + broadband samples). + - When `sonar_model` is `"AZFP"` and `env_params` does not contain + either `"sound_speed"` or all of `"temperature"`, `"salinity"`, and `"pressure"`. + - When `sonar_model` is `"EK60"` or `"EK80"`, + EchoData.environment.sound_speed_indicative does not exist, + and `env_params` does not contain either `"sound_speed"` or all of `"temperature"`, + `"salinity"`, and `"pressure"`. + - When `sonar_model` is not `"AZFP"`, `"EK60"`, or `"EK80"`. + """ + + def squeeze_non_scalar(n): + if not np.isscalar(n): + n = n.squeeze() + return n + + if "sound_speed" in env_params: + sound_speed = squeeze_non_scalar(env_params["sound_speed"]) + elif all( + [param in env_params for param in ("temperature", "salinity", "pressure")] + ): + sound_speed = calc_sound_speed( + squeeze_non_scalar(env_params["temperature"]), + squeeze_non_scalar(env_params["salinity"]), + squeeze_non_scalar(env_params["pressure"]), + formula_source="AZFP" if self.sonar_model == "AZFP" else "Mackenzie", + ) + elif ( + self.sonar_model in ("EK60", "EK80") + and "sound_speed_indicative" in self.environment + ): + sound_speed = squeeze_non_scalar(self.environment["sound_speed_indicative"]) + else: + raise ValueError( + "sound speed must be specified in env_params, " + "with temperature/salinity/pressure in env_params to be calculated, " + "or in EchoData.environment.sound_speed_indicative for EK60 and EK80 sonar models" + ) + + if self.sonar_model == "AZFP": + cal_type = azfp_cal_type + if cal_type is None: + raise ValueError( + "azfp_cal_type must be specified when sonar_model is AZFP" + ) + + # Notation below follows p.86 of user manual + N = self.vendor["number_of_samples_per_average_bin"] # samples per bin + f = self.vendor["digitization_rate"] # digitization rate + L = self.vendor["lockout_index"] # number of lockout samples + + # keep this in ref of AZFP matlab code, + # set to 1 since we want to calculate from raw data + bins_to_avg = 1 + + # Calculate range using parameters for each freq + # This is "the range to the centre of the sampling volume + # for bin m" from p.86 of user manual + if cal_type == "Sv": + range_offset = 0 + else: + range_offset = ( + sound_speed * self.beam["transmit_duration_nominal"] / 4 + ) # from matlab code + range_meter = ( + sound_speed * L / (2 * f) + + (sound_speed / 4) + * ( + ((2 * (self.beam.range_bin + 1) - 1) * N * bins_to_avg - 1) / f + + self.beam["transmit_duration_nominal"] + ) + - range_offset + ) + range_meter.name = "range" # add name to facilitate xr.merge + + return range_meter + elif self.sonar_model in ("EK60", "EK80"): + waveform_mode = ek_waveform_mode + encode_mode = ek_encode_mode + + if self.sonar_model == "EK60" and waveform_mode == "BB": + raise ValueError("EK60 cannot have BB samples") + + if waveform_mode is None: + raise ValueError( + "ek_waveform_mode must be specified when sonar_model is EK60 or EK80" + ) + tvg_correction_factor = TVG_CORRECTION_FACTOR[self.sonar_model] + + if waveform_mode == "CW": + if ( + self.sonar_model == "EK80" + and encode_mode == "power" + and self.beam_power is not None + ): + beam = self.beam_power + else: + beam = self.beam + + sample_thickness = beam["sample_interval"] * sound_speed / 2 + # TODO: Check with the AFSC about the half sample difference + range_meter = ( + beam.range_bin - tvg_correction_factor + ) * sample_thickness # [frequency x range_bin] + elif waveform_mode == "BB": + # TODO: bug: right now only first ping_time has non-nan range + shift = self.beam[ + "transmit_duration_nominal" + ] # based on Lar Anderson's Matlab code + # TODO: once we allow putting in arbitrary sound_speed, + # change below to use linearly-interpolated values + range_meter = ( + (self.beam.range_bin * self.beam["sample_interval"] - shift) + * sound_speed + / 2 + ) + # TODO: Lar Anderson's code include a slicing by minRange with a default of 0.02 m, + # need to ask why and see if necessary here + else: + raise ValueError("Input waveform_mode not recognized!") + + # make order of dims conform with the order of backscatter data + range_meter = range_meter.transpose("frequency", "ping_time", "range_bin") + range_meter = range_meter.where( + range_meter > 0, 0 + ) # set negative ranges to 0 + range_meter.name = "range" # add name to facilitate xr.merge + + return range_meter + else: + raise ValueError( + "this method only supports sonar_model values of AZFP, EK60, and EK80" + ) + @classmethod def _load_convert(cls, convert_obj): new_cls = cls() - for group in new_cls.__group_map.keys(): + for group in new_cls.group_map.keys(): if hasattr(convert_obj, group): setattr(new_cls, group, getattr(convert_obj, group)) @@ -122,9 +323,9 @@ def _load_convert(cls, convert_obj): setattr(new_cls, "source_file", getattr(convert_obj, "source_file")) return new_cls - def _load_file(self, raw_path): + def _load_file(self, raw_path: "PathHint"): """Lazy load Top-level, Beam, Environment, and Vendor groups from raw file.""" - for group, value in self.__group_map.items(): + for group, value in self.group_map.items(): # EK80 data may have a Beam_power group if both complex and power data exist. ds = None try: @@ -135,23 +336,23 @@ def _load_file(self, raw_path): except (OSError, GroupNotFoundError): # Skips group not found errors for EK80 and ADCP ... - if group == "top": - self.sonar_model = ds.keywords.upper() + if group == "top" and hasattr(ds, "keywords"): + self.sonar_model = ds.keywords.upper() # type: ignore if isinstance(ds, xr.Dataset): setattr(self, group, ds) - def _check_path(self, filepath): + def _check_path(self, filepath: "PathHint"): """Check if converted_raw_path exists""" - file_exists = check_file_existance(filepath, self.storage_options) + file_exists = check_file_existence(filepath, self.storage_options) if not file_exists: raise FileNotFoundError(f"There is no file named {filepath}") - def _sanitize_path(self, filepath): + def _sanitize_path(self, filepath: "PathHint") -> "PathHint": filepath = sanitize_file_path(filepath, self.storage_options) return filepath - def _check_suffix(self, filepath): + def _check_suffix(self, filepath: "PathHint") -> "FileFormatHint": """Check if file type is supported.""" # TODO: handle multiple files through the same set of checks for combining files if isinstance(filepath, fsspec.FSMap): @@ -162,14 +363,14 @@ def _check_suffix(self, filepath): if suffix not in XARRAY_ENGINE_MAP: raise ValueError("Input file type not supported!") - return suffix + return suffix # type: ignore - def _load_group(self, filepath, group=None): + def _load_group(self, filepath: "PathHint", group: Optional[str] = None): """Loads each echodata group""" suffix = self._check_suffix(filepath) return xr.open_dataset(filepath, group=group, engine=XARRAY_ENGINE_MAP[suffix]) - def to_netcdf(self, save_path=None, **kwargs): + def to_netcdf(self, save_path: Optional["PathHint"] = None, **kwargs): """Save content of EchoData to netCDF. Parameters @@ -191,7 +392,7 @@ def to_netcdf(self, save_path=None, **kwargs): return to_file(self, "netcdf4", save_path=save_path, **kwargs) - def to_zarr(self, save_path=None, **kwargs): + def to_zarr(self, save_path: Optional["PathHint"] = None, **kwargs): """Save content of EchoData to zarr. Parameters @@ -215,7 +416,7 @@ def to_zarr(self, save_path=None, **kwargs): # TODO: Remove below in future versions. They are for supporting old API calls. @property - def nc_path(self): + def nc_path(self) -> Optional["PathHint"]: warnings.warn( "`nc_path` is deprecated, Use `converted_raw_path` instead.", DeprecationWarning, @@ -228,7 +429,7 @@ def nc_path(self): return str(path.parent / (path.stem + ".nc")) @property - def zarr_path(self): + def zarr_path(self) -> Optional["PathHint"]: warnings.warn( "`zarr_path` is deprecated, Use `converted_raw_path` instead.", DeprecationWarning, @@ -240,7 +441,13 @@ def zarr_path(self): path = Path(self.converted_raw_path) return str(path.parent / (path.stem + ".zarr")) - def raw2nc(self, save_path=None, combine_opt=False, overwrite=False, compress=True): + def raw2nc( + self, + save_path: "PathHint" = None, + combine_opt: bool = False, + overwrite: bool = False, + compress: bool = True, + ): warnings.warn( "`raw2nc` is deprecated, use `to_netcdf` instead.", DeprecationWarning, @@ -254,7 +461,11 @@ def raw2nc(self, save_path=None, combine_opt=False, overwrite=False, compress=Tr ) def raw2zarr( - self, save_path=None, combine_opt=False, overwrite=False, compress=True + self, + save_path: "PathHint" = None, + combine_opt: bool = False, + overwrite: bool = False, + compress: bool = True, ): warnings.warn( "`raw2zarr` is deprecated, use `to_zarr` instead.", diff --git a/echopype/preprocess/api.py b/echopype/preprocess/api.py index f890bd24b..48a7415da 100644 --- a/echopype/preprocess/api.py +++ b/echopype/preprocess/api.py @@ -42,7 +42,7 @@ def _freq_MVBS(ds, rint, pbin): sv = 10 ** (ds["Sv"] / 10) # average should be done in linear domain sv.coords["range_meter"] = ( ["range_bin"], - ds_Sv["range"].isel(frequency=0, ping_time=0), + ds_Sv["range"].isel(frequency=0, ping_time=0).data, ) sv = sv.swap_dims({"range_bin": "range_meter"}) sv_groupby_bins = ( diff --git a/echopype/process/process_deprecated.py b/echopype/process/process_deprecated.py index ff18fcd9f..afaea6083 100644 --- a/echopype/process/process_deprecated.py +++ b/echopype/process/process_deprecated.py @@ -49,6 +49,7 @@ def __init__(self, file_path="", salinity=27.9, pressure=59, temperature=None): env_params=self._env_params, cal_params=None, waveform_mode=self.waveform_mode, + encode_mode=self.encode_mode ) # Deprecated data attributes self.Sv = None diff --git a/echopype/qc/api.py b/echopype/qc/api.py index 6dfe7ff23..1fa4680f6 100644 --- a/echopype/qc/api.py +++ b/echopype/qc/api.py @@ -53,4 +53,17 @@ def coerce_increasing_time(ds, time_name="ping_time", local_win_len=100): def exist_reversed_time(ds, time_name): + """Test for occurrence of time reversal in specified datetime coordinate variable. + + Parameters + ---------- + ds : xr.Dataset + a dataset for which the time coordinate will be tested + time_name : str + name of the time coordinate to be tested + + Returns + ------- + `True` if at least one time reversal is found, `False` otherwise. + """ return (np.diff(ds[time_name]) < np.timedelta64(0, "ns")).any() diff --git a/echopype/tests/test_calibrate.py b/echopype/tests/calibrate/test_calibrate.py similarity index 85% rename from echopype/tests/test_calibrate.py rename to echopype/tests/calibrate/test_calibrate.py index 36578aa7d..fe3aa7145 100644 --- a/echopype/tests/test_calibrate.py +++ b/echopype/tests/calibrate/test_calibrate.py @@ -4,6 +4,7 @@ from scipy.io import loadmat import echopype as ep from echopype.calibrate.calibrate_ek import CalibrateEK80 +import xarray as xr azfp_path = Path('./echopype/test_data/azfp') ek60_path = Path('./echopype/test_data/ek60') @@ -18,7 +19,7 @@ def test_compute_Sv_ek60_echoview(): echodata = ep.open_raw(ek60_raw_path, sonar_model='EK60') # Calibrate to get Sv - ds_Sv = ep.calibrate.compute_Sv(echodata) + ds_Sv = ep.calibrate.compute_Sv(echodata, waveform_mode="CW", encode_mode="power") # Compare with EchoView outputs channels = [] @@ -29,7 +30,7 @@ def test_compute_Sv_ek60_echoview(): # Echoview data is shifted by 1 sample along range (missing the first sample) assert np.allclose(test_Sv[:, :, 7:], - ds_Sv.Sv.isel(ping_time=slice(None, 10), range_bin=slice(8, None)), atol=1e-8) + ds_Sv.Sv.isel(ping_time=slice(None, 10), range_bin=slice(8, None)), atol=1e-8) def test_compute_Sv_ek60_matlab(): @@ -40,8 +41,8 @@ def test_compute_Sv_ek60_matlab(): echodata = ep.open_raw(ek60_raw_path, sonar_model='EK60') # Calibrate to get Sv - ds_Sv = ep.calibrate.compute_Sv(echodata) - ds_Sp = ep.calibrate.compute_Sp(echodata) + ds_Sv = ep.calibrate.compute_Sv(echodata, waveform_mode="CW", encode_mode="power") + ds_Sp = ep.calibrate.compute_Sp(echodata, waveform_mode="CW", encode_mode="power") # Load matlab outputs and test @@ -133,8 +134,8 @@ def test_compute_Sv_ek80_pc_echoview(): # Create a CalibrateEK80 object to perform pulse compression waveform_mode = 'BB' - cal_obj = CalibrateEK80(echodata, env_params=None, cal_params=None, waveform_mode=waveform_mode) - cal_obj.compute_range_meter(waveform_mode=waveform_mode, tvg_correction_factor=0) # compute range [m] + cal_obj = CalibrateEK80(echodata, env_params=None, cal_params=None, waveform_mode=waveform_mode, encode_mode="complex") + cal_obj.compute_range_meter(waveform_mode=waveform_mode) # compute range [m] chirp, _, tau_effective = cal_obj.get_transmit_chirp(waveform_mode=waveform_mode) pc = cal_obj.compress_pulse(chirp) pc_mean = pc.pulse_compressed_output.isel(frequency=0).mean(dim='quadrant').dropna('range_bin') @@ -162,7 +163,8 @@ def test_compute_Sv_ek80_CW_complex(): """ ek80_raw_path = str(ek80_path.joinpath('ar2.0-D20201210-T000409.raw')) # CW complex echodata = ep.open_raw(ek80_raw_path, sonar_model='EK80') - assert ep.calibrate.compute_Sv(echodata, waveform_mode='CW', encode_mode='complex') + sv = ep.calibrate.compute_Sv(echodata, waveform_mode='CW', encode_mode='complex') + assert isinstance(sv, xr.Dataset) is True def test_compute_Sv_ek80_BB_complex(): @@ -170,4 +172,16 @@ def test_compute_Sv_ek80_BB_complex(): """ ek80_raw_path = str(ek80_path.joinpath('ar2.0-D20201209-T235955.raw')) # CW complex echodata = ep.open_raw(ek80_raw_path, sonar_model='EK80') - assert ep.calibrate.compute_Sv(echodata, waveform_mode='BB', encode_mode='complex') + sv = ep.calibrate.compute_Sv(echodata, waveform_mode='BB', encode_mode='complex') + assert isinstance(sv, xr.Dataset) is True + +def test_compute_Sv_ek80_CW_power(): + """ + Tests calibration in CW mode data encoded as power samples, + while the file also contains BB complex samples + """ + + ek80_raw_path = ek80_path / "Summer2018--D20180905-T033113.raw" + ed = ep.open_raw(ek80_raw_path, sonar_model="EK80") + sv = ep.calibrate.compute_Sv(ed, waveform_mode="CW", encode_mode="power") + assert isinstance(sv, xr.Dataset) diff --git a/echopype/tests/conftest.py b/echopype/tests/conftest.py new file mode 100644 index 000000000..4ae1916ab --- /dev/null +++ b/echopype/tests/conftest.py @@ -0,0 +1,35 @@ +"""``pytest`` configuration.""" + +import pytest +from pathlib import Path + +import fsspec + +from echopype.testing import TEST_DATA_FOLDER + + +@pytest.fixture(scope="session") +def minio_bucket(): + common_storage_options = dict( + client_kwargs=dict(endpoint_url="http://localhost:9000/"), + key="minioadmin", + secret="minioadmin", + ) + bucket_name = "ooi-raw-data" + fs = fsspec.filesystem( + "s3", + **common_storage_options, + ) + test_data = "data" + if not fs.exists(test_data): + fs.mkdir(test_data) + + if not fs.exists(bucket_name): + fs.mkdir(bucket_name) + + # Load test data into bucket + for d in TEST_DATA_FOLDER.iterdir(): + source_path = f'echopype/test_data/{d.name}' + fs.put(source_path, f'{test_data}/{d.name}', recursive=True) + + return common_storage_options diff --git a/echopype/tests/convert/test_convert.py b/echopype/tests/convert/test_convert.py new file mode 100644 index 000000000..f873aaed7 --- /dev/null +++ b/echopype/tests/convert/test_convert.py @@ -0,0 +1,356 @@ +"""test_convert.py + +This module contain all the various tests for echopype conversion +from a raw data to standard compliant zarr or netcdf file(s). + +**Note that in order to run this test, minio server is required for s3 +output tests.** +""" + + +import os +import fsspec +import xarray as xr +import pytest +from echopype import open_raw +from echopype.testing import TEST_DATA_FOLDER +from echopype.convert.set_groups_base import DEFAULT_ENCODINGS + + +def _check_file_group(data_file, engine, groups): + for g in groups: + ds = xr.open_dataset(data_file, engine=engine, group=g) + + assert isinstance(ds, xr.Dataset) is True + + +def _check_output_files(engine, output_files, storage_options): + groups = [ + "Provenance", + "Environment", + "Beam", + "Sonar", + "Vendor", + "Platform", + ] + if isinstance(output_files, list): + fs = fsspec.get_mapper(output_files[0], **storage_options).fs + for f in output_files: + if engine == "zarr": + _check_file_group(fs.get_mapper(f), engine, groups) + fs.delete(f, recursive=True) + else: + _check_file_group(f, engine, groups) + fs.delete(f) + else: + fs = fsspec.get_mapper(output_files, **storage_options).fs + if engine == "zarr": + _check_file_group(fs.get_mapper(output_files), engine, groups) + fs.delete(output_files, recursive=True) + else: + _check_file_group(output_files, engine, groups) + fs.delete(output_files) + + +@pytest.mark.parametrize( + "sonar_model, raw_file, xml_path", + [ + ( + "azfp", + TEST_DATA_FOLDER / "azfp/ooi/17032923.01A", + TEST_DATA_FOLDER / "azfp/ooi/17032922.XML", + ), + ( + "ek60", + TEST_DATA_FOLDER / "ek60/DY1801_EK60-D20180211-T164025.raw", + None, + ), + ( + "ek80", + TEST_DATA_FOLDER / "ek80/ncei-wcsd/D20170826-T205615.raw", + None, + ), + ( + "ad2cp", + TEST_DATA_FOLDER / "ad2cp/raw/076/rawtest.076.00000.ad2cp", + None, + ), + ], +) +def test_convert_time_encodings(sonar_model, raw_file, xml_path): + ed = open_raw( + sonar_model=sonar_model, raw_file=raw_file, xml_path=xml_path + ) + ed.to_netcdf(overwrite=True) + for group, details in ed.group_map.items(): + if hasattr(ed, group): + group_ds = getattr(ed, group) + if isinstance(group_ds, xr.Dataset): + for var, encoding in DEFAULT_ENCODINGS.items(): + if var in group_ds: + da = group_ds[var] + assert da.encoding == encoding + + # Combine encoding and attributes since this + # is what is shown when using decode_cf=False + # without dtype attribute + total_attrs = dict(**da.attrs, **da.encoding) + total_attrs.pop('dtype') + + # Read converted file back in + file_da = xr.open_dataset( + ed.converted_raw_path, + group=details['ep_group'], + decode_cf=False, + )[var] + assert file_da.dtype == encoding['dtype'] + + # Read converted file back in + decoded_da = xr.open_dataset( + ed.converted_raw_path, + group=details['ep_group'], + )[var] + assert da.equals(decoded_da) is True + os.unlink(ed.converted_raw_path) + + +@pytest.mark.parametrize("model", ["EK60"]) +@pytest.mark.parametrize( + "input_path", + [ + "./echopype/test_data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.raw", + "s3://data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.raw", + [ + "http://localhost:8080/data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.raw", + "http://localhost:8080/data/ek60/ncei-wcsd/Summer2017-D20170615-T190843.raw", + ], + ], +) +@pytest.mark.parametrize("export_engine", ["zarr", "netcdf4"]) +@pytest.mark.parametrize( + "output_save_path", + [ + None, + "./echopype/test_data/dump/", + "./echopype/test_data/dump/tmp.zarr", + "./echopype/test_data/dump/tmp.nc", + "s3://ooi-raw-data/dump/", + "s3://ooi-raw-data/dump/tmp.zarr", + "s3://ooi-raw-data/dump/tmp.nc", + ], +) +def test_convert_ek60( + model, + input_path, + export_engine, + output_save_path, + minio_bucket, +): + common_storage_options = minio_bucket + output_storage_options = {} + ipath = input_path + if isinstance(input_path, list): + ipath = input_path[0] + + input_storage_options = ( + common_storage_options if ipath.startswith("s3://") else {} + ) + if output_save_path and output_save_path.startswith("s3://"): + output_storage_options = common_storage_options + + # Only using one file + ec = open_raw( + raw_file=ipath, + sonar_model=model, + storage_options=input_storage_options, + ) + + if ( + export_engine == "netcdf4" + and output_save_path is not None + and output_save_path.startswith("s3://") + ): + return + + if export_engine == "netcdf4": + to_file = getattr(ec, "to_netcdf") + elif export_engine == "zarr": + to_file = getattr(ec, "to_zarr") + else: + return + try: + to_file( + save_path=output_save_path, + overwrite=True, + output_storage_options=output_storage_options, + ) + + _check_output_files( + export_engine, ec.converted_raw_path, output_storage_options + ) + except Exception as e: + if export_engine == 'netcdf4' and output_save_path.startswith("s3://"): + assert isinstance(e, ValueError) is True + assert str(e) == 'Only local netcdf4 is supported.' + + +@pytest.mark.parametrize("model", ["azfp"]) +@pytest.mark.parametrize( + "input_path", + [ + "./echopype/test_data/azfp/ooi/17032923.01A", + "http://localhost:8080/data/azfp/ooi/17032923.01A", + ], +) +@pytest.mark.parametrize( + "xml_path", + [ + "./echopype/test_data/azfp/ooi/17032922.XML", + "http://localhost:8080/data/azfp/ooi/17032922.XML", + ], +) +@pytest.mark.parametrize("export_engine", ["zarr", "netcdf4"]) +@pytest.mark.parametrize( + "output_save_path", + [ + None, + "./echopype/test_data/dump/", + "./echopype/test_data/dump/tmp.zarr", + "./echopype/test_data/dump/tmp.nc", + "s3://ooi-raw-data/dump/", + "s3://ooi-raw-data/dump/tmp.zarr", + "s3://ooi-raw-data/dump/tmp.nc", + ], +) +@pytest.mark.parametrize("combine_files", [False]) +def test_convert_azfp( + model, + input_path, + xml_path, + export_engine, + output_save_path, + combine_files, + minio_bucket, +): + common_storage_options = minio_bucket + output_storage_options = {} + + input_storage_options = ( + common_storage_options if input_path.startswith("s3://") else {} + ) + if output_save_path and output_save_path.startswith("s3://"): + output_storage_options = common_storage_options + + ec = open_raw( + raw_file=input_path, + xml_path=xml_path, + sonar_model=model, + storage_options=input_storage_options, + ) + + assert ec.xml_path == xml_path + + if ( + export_engine == "netcdf4" + and output_save_path is not None + and output_save_path.startswith("s3://") + ): + return + + if export_engine == "netcdf4": + to_file = getattr(ec, "to_netcdf") + elif export_engine == "zarr": + to_file = getattr(ec, "to_zarr") + else: + return + try: + to_file( + save_path=output_save_path, + overwrite=True, + output_storage_options=output_storage_options, + ) + + _check_output_files( + export_engine, ec.converted_raw_path, output_storage_options + ) + except Exception as e: + if export_engine == 'netcdf4' and output_save_path.startswith("s3://"): + assert isinstance(e, ValueError) is True + assert str(e) == 'Only local netcdf4 is supported.' + + +@pytest.mark.parametrize("model", ["EK80"]) +@pytest.mark.parametrize( + "input_path", + [ + "./echopype/test_data/ek80/ncei-wcsd/D20170826-T205615.raw", + "http://localhost:8080/data/ek80/ncei-wcsd/D20170826-T205615.raw", + "s3://data/ek80/ncei-wcsd/D20170826-T205615.raw", + ], +) +@pytest.mark.parametrize("export_engine", ["zarr", "netcdf4"]) +@pytest.mark.parametrize( + "output_save_path", + [ + None, + "./echopype/test_data/dump/", + "./echopype/test_data/dump/tmp.zarr", + "./echopype/test_data/dump/tmp.nc", + "s3://ooi-raw-data/dump/", + "s3://ooi-raw-data/dump/tmp.zarr", + "s3://ooi-raw-data/dump/tmp.nc", + ], +) +@pytest.mark.parametrize("combine_files", [False]) +def test_convert_ek80( + model, + input_path, + export_engine, + output_save_path, + combine_files, + minio_bucket, +): + common_storage_options = minio_bucket + output_storage_options = {} + + input_storage_options = ( + common_storage_options if input_path.startswith("s3://") else {} + ) + if output_save_path and output_save_path.startswith("s3://"): + output_storage_options = common_storage_options + + ec = open_raw( + raw_file=input_path, + sonar_model=model, + storage_options=input_storage_options, + ) + + if ( + export_engine == "netcdf4" + and output_save_path is not None + and output_save_path.startswith("s3://") + ): + return + + if export_engine == "netcdf4": + to_file = getattr(ec, "to_netcdf") + elif export_engine == "zarr": + to_file = getattr(ec, "to_zarr") + else: + return + + try: + to_file( + save_path=output_save_path, + overwrite=True, + combine=combine_files, + output_storage_options=output_storage_options, + ) + + _check_output_files( + export_engine, ec.converted_raw_path, output_storage_options + ) + except Exception as e: + if export_engine == 'netcdf4' and output_save_path.startswith("s3://"): + assert isinstance(e, ValueError) is True + assert str(e) == 'Only local netcdf4 is supported.' diff --git a/echopype/tests/test_convert_ad2cp.py b/echopype/tests/convert/test_convert_ad2cp.py similarity index 100% rename from echopype/tests/test_convert_ad2cp.py rename to echopype/tests/convert/test_convert_ad2cp.py diff --git a/echopype/tests/test_convert_azfp.py b/echopype/tests/convert/test_convert_azfp.py similarity index 100% rename from echopype/tests/test_convert_azfp.py rename to echopype/tests/convert/test_convert_azfp.py diff --git a/echopype/tests/test_convert_ek60.py b/echopype/tests/convert/test_convert_ek60.py similarity index 100% rename from echopype/tests/test_convert_ek60.py rename to echopype/tests/convert/test_convert_ek60.py diff --git a/echopype/tests/test_convert_ek80.py b/echopype/tests/convert/test_convert_ek80.py similarity index 100% rename from echopype/tests/test_convert_ek80.py rename to echopype/tests/convert/test_convert_ek80.py diff --git a/echopype/tests/test_echodata.py b/echopype/tests/echodata/test_echodata.py similarity index 65% rename from echopype/tests/test_echodata.py rename to echopype/tests/echodata/test_echodata.py index d341f3142..68314047e 100644 --- a/echopype/tests/test_echodata.py +++ b/echopype/tests/echodata/test_echodata.py @@ -1,8 +1,8 @@ from textwrap import dedent -from pathlib import Path import fsspec +import echopype from echopype.testing import TEST_DATA_FOLDER from echopype.echodata import EchoData from echopype import open_converted @@ -11,35 +11,9 @@ import xarray as xr ek60_path = TEST_DATA_FOLDER / "ek60" - - -# TODO: Probably put the function below into a common module? -@pytest.fixture(scope="session") -def minio_bucket(): - common_storage_options = dict( - client_kwargs=dict(endpoint_url="http://localhost:9000/"), - key="minioadmin", - secret="minioadmin", - ) - bucket_name = "ooi-raw-data" - fs = fsspec.filesystem( - "s3", - **common_storage_options, - ) - test_data = "data" - if not fs.exists(test_data): - fs.mkdir(test_data) - - if not fs.exists(bucket_name): - fs.mkdir(bucket_name) - - # Load test data into bucket - test_data_path = Path(__file__).parent.parent.joinpath(Path("test_data")) - for d in test_data_path.iterdir(): - source_path = f'echopype/test_data/{d.name}' - fs.put(source_path, f'{test_data}/{d.name}', recursive=True) - - return common_storage_options +ek80_path = TEST_DATA_FOLDER / "ek80" +azfp_path = TEST_DATA_FOLDER / "azfp" +ad2cp_path = TEST_DATA_FOLDER / "ad2cp" class TestEchoData: @@ -83,6 +57,18 @@ def test_repr(self): actual = "\n".join(x.rstrip() for x in repr(ed).split("\n")) assert expected_repr == actual + def test_repr_html(self): + zarr_path_string = str(self.converted_zarr.absolute()) + ed = EchoData(converted_raw_path=self.converted_zarr) + assert hasattr(ed, "_repr_html_") + html_repr = ed._repr_html_().strip() + assert f"""
EchoData") and html_fallback.endswith("") + @pytest.mark.parametrize( "converted_zarr", @@ -133,3 +119,30 @@ def _check_path(zarr_path): and converted_zarr.endswith(".nc") ): assert isinstance(e, ValueError) is True + + +@pytest.mark.parametrize( + ("filepath", "sonar_model", "azfp_xml_path", "azfp_cal_type", "ek_waveform_mode", "ek_encode_mode"), + [ + (ek60_path / "ncei-wcsd" / "Summer2017-D20170615-T190214.raw", "EK60", None, None, "CW", "complex"), + (ek80_path / "D20190822-T161221.raw", "EK80", None, None, "CW", "power"), + (ek80_path / "D20170912-T234910.raw", "EK80", None, None, "BB", "complex"), + (azfp_path / "ooi" / "17032923.01A", "AZFP", azfp_path / "ooi" / "17032922.XML", "Sv", None, None), + (azfp_path / "ooi" / "17032923.01A", "AZFP", azfp_path / "ooi" / "17032922.XML", "Sp", None, None), + (ad2cp_path / "raw" / "090" / "rawtest.090.00001.ad2cp", "AD2CP", None, None, None, None) + ] +) +def test_compute_range(filepath, sonar_model, azfp_xml_path, azfp_cal_type, ek_waveform_mode, ek_encode_mode): + ed = echopype.open_raw(filepath, sonar_model, azfp_xml_path) + env_params = {"sound_speed": 343} + + if sonar_model == "AD2CP": + try: + ed.compute_range(env_params, ek_waveform_mode="CW", azfp_cal_type="Sv") + except ValueError: + return + else: + raise AssertionError + else: + range = ed.compute_range(env_params, azfp_cal_type, ek_waveform_mode, ) + assert isinstance(range, xr.DataArray) diff --git a/echopype/tests/echodata/test_echodata_combine.py b/echopype/tests/echodata/test_echodata_combine.py new file mode 100644 index 000000000..ec3014a8b --- /dev/null +++ b/echopype/tests/echodata/test_echodata_combine.py @@ -0,0 +1,204 @@ +from typing import Any, List, Optional, TYPE_CHECKING, Dict, Union +from pathlib import Path + +import numpy as np +import pytest +import xarray as xr +from xarray.core.merge import MergeError + +import echopype +from echopype.testing import TEST_DATA_FOLDER +from echopype.qc import exist_reversed_time +from echopype.core import SONAR_MODELS + +if TYPE_CHECKING: + from echopype.core import SonarModelsHint + +azfp_ooi_folder = TEST_DATA_FOLDER / "azfp" / "ooi" +azfp_test_data = [ + azfp_ooi_folder / "18100407.01A", + azfp_ooi_folder / "18100409.01A", + azfp_ooi_folder / "18100408.01A", +] +azfp_xml_file = azfp_ooi_folder / "18092920.XML" +ek60_ncei_wcsd_folder = TEST_DATA_FOLDER / "ek60" / "ncei-wcsd" +ek60_test_data = [ + ek60_ncei_wcsd_folder / "Summer2017-D20170620-T011027.raw", + ek60_ncei_wcsd_folder / "Summer2017-D20170620-T014302.raw", + ek60_ncei_wcsd_folder / "Summer2017-D20170620-T021537.raw", +] +ek60_reversed_ping_time_test_data = [ + ek60_ncei_wcsd_folder / "Summer2017-D20170719-T203615.raw", + ek60_ncei_wcsd_folder / "Summer2017-D20170719-T205415.raw", + ek60_ncei_wcsd_folder / "Summer2017-D20170719-T211347.raw", +] + + +@pytest.mark.parametrize( + "files, sonar_model, xml_file, concat_dims, concat_data_vars", + [ + ( + azfp_test_data, + "AZFP", + azfp_xml_file, + SONAR_MODELS["AZFP"]["concat_dims"], + SONAR_MODELS["AZFP"]["concat_data_vars"], + ), + ( + ek60_test_data, + "EK60", + None, + SONAR_MODELS["EK60"]["concat_dims"], + SONAR_MODELS["EK60"]["concat_data_vars"], + ), + ( + ek60_reversed_ping_time_test_data, + "EK60", + None, + SONAR_MODELS["EK60"]["concat_dims"], + SONAR_MODELS["EK60"]["concat_data_vars"], + ), + ], +) +def test_combine_echodata( + files: List[Path], + sonar_model: "SonarModelsHint", + xml_file: Optional[Path], + concat_dims: Dict[str, Optional[Union[str, List[str]]]], + concat_data_vars: Dict[str, str], +): + eds = [echopype.open_raw(file, sonar_model, xml_file) for file in files] + combined = echopype.combine_echodata(eds, "overwrite_conflicts") # type: ignore + + for group_name in combined.group_map: + if group_name in ("top", "sonar", "provenance"): + continue + combined_group: xr.Dataset = getattr(combined, group_name) + eds_groups = [ + getattr(ed, group_name) for ed in eds if getattr(ed, group_name) is not None + ] + + def union_attrs(datasets: List[xr.Dataset]) -> Dict[str, Any]: + """ + Merges attrs from a list of datasets. + Prioritizes keys from later datsets. + """ + + total_attrs = dict() + for ds in datasets: + total_attrs.update(ds.attrs) + return total_attrs + + test_ds = xr.combine_nested( + eds_groups, + [concat_dims.get(group_name, concat_dims["default"])], + data_vars=concat_data_vars.get(group_name, concat_data_vars["default"]), + coords="minimal", + combine_attrs="drop", + ) + test_ds.attrs.update(union_attrs(eds_groups)) + test_ds = test_ds.drop_dims( + [ + "concat_dim", + "old_ping_time", + "ping_time", + "old_location_time", + "location_time", + ], + errors="ignore", + ).drop_dims([f"{group}_attrs" for group in combined.group_map], errors="ignore") + assert combined_group is None or test_ds.identical( + combined_group.drop_dims( + ["old_ping_time", "ping_time", "old_location_time", "location_time"], + errors="ignore", + ) + ) + + +def test_ping_time_reversal(): + eds = [ + echopype.open_raw(file, "EK60") for file in ek60_reversed_ping_time_test_data + ] + combined = echopype.combine_echodata(eds, "overwrite_conflicts") # type: ignore + + for group_name in combined.group_map: + combined_group: xr.Dataset = getattr(combined, group_name) + + if combined_group is not None: + if "ping_time" in combined_group and group_name != "provenance": + assert not exist_reversed_time(combined_group, "ping_time") + if "old_ping_time" in combined_group: + assert exist_reversed_time(combined_group, "old_ping_time") + if "location_time" in combined_group and group_name not in ("provenance", "nmea"): + assert not exist_reversed_time(combined_group, "location_time") + if "old_location_time" in combined_group: + assert exist_reversed_time(combined_group, "old_location_time") + if "mru_time" in combined_group and group_name != "provenance": + assert not exist_reversed_time(combined_group, "mru_time") + if "old_mru_time" in combined_group: + assert exist_reversed_time(combined_group, "old_mru_time") + + +def test_attr_storage(): + # check storage of attributes before combination in provenance group + eds = [ + echopype.open_raw(file, "EK60") for file in ek60_test_data + ] + combined = echopype.combine_echodata(eds, "overwrite_conflicts") # type: ignore + for group in combined.group_map: + if f"{group}_attrs" in combined.provenance: + group_attrs = combined.provenance[f"{group}_attrs"] + for i, ed in enumerate(eds): + for attr, value in getattr(ed, group).attrs.items(): + assert group_attrs.isel(echodata_filename=i).sel({f"{group}_attr_key": attr}).data[()] == value + + # check selection by echodata_filename + for file in ek60_test_data: + assert Path(file).name in combined.provenance["echodata_filename"] + for group in combined.group_map: + if f"{group}_attrs" in combined.provenance: + group_attrs = combined.provenance[f"{group}_attrs"] + assert np.array_equal(group_attrs.sel(echodata_filename=Path(ek60_test_data[0]).name), group_attrs.isel(echodata_filename=0)) + + +def test_combine_attrs(): + # check parameter passed to combine_echodata that controls behavior of attribute combination + eds = [ + echopype.open_raw(file, "EK60") for file in ek60_test_data + ] + eds[0].beam.attrs.update({"foo": 1}) + eds[1].beam.attrs.update({"foo": 2}) + eds[2].beam.attrs.update({"foo": 3}) + + combined = echopype.combine_echodata(eds, "override") # type: ignore + assert combined.beam.attrs["foo"] == 1 + + combined = echopype.combine_echodata(eds, "drop") # type: ignore + assert "foo" not in combined.beam.attrs + + try: + combined = echopype.combine_echodata(eds, "identical") # type: ignore + except MergeError: + pass + else: + raise AssertionError + + try: + combined = echopype.combine_echodata(eds, "no_conflicts") # type: ignore + except MergeError: + pass + else: + raise AssertionError + + combined = echopype.combine_echodata(eds, "overwrite_conflicts") # type: ignore + assert combined.beam.attrs["foo"] == 3 + + eds[0].beam.attrs.update({"foo": 1}) + eds[1].beam.attrs.update({"foo": 1}) + eds[2].beam.attrs.update({"foo": 1}) + + combined = echopype.combine_echodata(eds, "identical") # type: ignore + assert combined.beam.attrs["foo"] == 1 + + combined = echopype.combine_echodata(eds, "no_conflicts") # type: ignore + assert combined.beam.attrs["foo"] == 1 diff --git a/echopype/tests/test_preprocess.py b/echopype/tests/old/test_preprocess.py similarity index 100% rename from echopype/tests/test_preprocess.py rename to echopype/tests/old/test_preprocess.py diff --git a/echopype/tests/test_convert.py b/echopype/tests/test_convert.py deleted file mode 100644 index ac15b9a30..000000000 --- a/echopype/tests/test_convert.py +++ /dev/null @@ -1,526 +0,0 @@ -"""test_convert.py - -This module contain all the various tests for echopype conversion -from a raw data to standard compliant zarr or netcdf file(s). - -**Note that in order to run this test, minio server is required for s3 -output tests.** -""" - - -import os -import shutil -import glob -import fsspec -import xarray as xr -import pytest -from pathlib import Path -from echopype import open_raw -from echopype.convert.api import _validate_path -from echopype.testing import TEST_DATA_FOLDER -from echopype.convert.set_groups_base import DEFAULT_ENCODINGS - - -def _check_file_group(data_file, engine, groups): - for g in groups: - ds = xr.open_dataset(data_file, engine=engine, group=g) - - assert isinstance(ds, xr.Dataset) is True - - -def _check_output_files(engine, output_files, storage_options): - groups = [ - "Provenance", - "Environment", - "Beam", - "Sonar", - "Vendor", - "Platform", - ] - if isinstance(output_files, list): - fs = fsspec.get_mapper(output_files[0], **storage_options).fs - for f in output_files: - if engine == "zarr": - _check_file_group(fs.get_mapper(f), engine, groups) - fs.delete(f, recursive=True) - else: - _check_file_group(f, engine, groups) - fs.delete(f) - else: - fs = fsspec.get_mapper(output_files, **storage_options).fs - if engine == "zarr": - _check_file_group(fs.get_mapper(output_files), engine, groups) - fs.delete(output_files, recursive=True) - else: - _check_file_group(output_files, engine, groups) - fs.delete(output_files) - - -@pytest.fixture(scope="session") -def minio_bucket(): - common_storage_options = dict( - client_kwargs=dict(endpoint_url="http://localhost:9000/"), - key="minioadmin", - secret="minioadmin", - ) - bucket_name = "ooi-raw-data" - fs = fsspec.filesystem( - "s3", - **common_storage_options, - ) - test_data = "data" - if not fs.exists(test_data): - fs.mkdir(test_data) - - if not fs.exists(bucket_name): - fs.mkdir(bucket_name) - - # Load test data into bucket - test_data_path = Path(__file__).parent.parent.joinpath(Path("test_data")) - for d in test_data_path.iterdir(): - source_path = f'echopype/test_data/{d.name}' - fs.put(source_path, f'{test_data}/{d.name}', recursive=True) - - return common_storage_options - - -@pytest.mark.parametrize("model", ["EK60"]) -@pytest.mark.parametrize("file_format", [".zarr"]) -@pytest.mark.parametrize( - "input_path", - [ - "./echopype/test_data/ek60/DY1801_EK60-D20180211-T164025.raw", - "http://localhost:8080/data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.raw", - ], -) -@pytest.mark.parametrize( - "output_save_path", - [ - None, - "./echopype/test_data/dump/", - "./echopype/test_data/dump/tmp.zarr", - "./echopype/test_data/dump/tmp.nc", - "s3://ooi-raw-data/dump/", - "s3://ooi-raw-data/dump/tmp.zarr", - "s3://ooi-raw-data/dump/tmp.nc", - ], -) -def test_validate_path_single_source( - model, file_format, input_path, output_save_path, minio_bucket -): - - output_storage_options = {} - if output_save_path and output_save_path.startswith("s3://"): - output_storage_options = dict( - client_kwargs=dict(endpoint_url="http://localhost:9000/"), - key="minioadmin", - secret="minioadmin", - ) - fsmap = fsspec.get_mapper(input_path) - single_fname = os.path.splitext(os.path.basename(fsmap.root))[0] - - converted_raw_path = _validate_path( - source_file=single_fname, - file_format=file_format, - output_storage_options=output_storage_options, - save_path=output_save_path - ) - # Used for cross-platform path comparisons - output_path = Path(converted_raw_path) - - if output_save_path is not None: - fsmap_tmp = fsspec.get_mapper(output_save_path, **output_storage_options) - fs = fsmap_tmp.fs - if not output_save_path.startswith("s3"): - if output_save_path.endswith("/"): - # if an output folder is given, below works with and without the slash at the end - assert output_path == Path(os.path.join(fsmap_tmp.root, single_fname + ".zarr")) - elif output_save_path.endswith(".zarr"): - # if an output filename is given - assert output_path == Path(fsmap_tmp.root) - else: - # force output file extension to the called type (here .zarr) - assert output_path == Path(os.path.splitext(fsmap_tmp.root)[0] + ".zarr") - shutil.rmtree(os.path.dirname(converted_raw_path)) - else: - if output_save_path.endswith("/"): - # if an output folder is given, below works with and without the slash at the end - assert output_path == Path(os.path.join(output_save_path, single_fname + ".zarr")) - elif output_save_path.endswith(".zarr"): - # if an output filename is given - assert output_path == Path(output_save_path) - else: - # force output file extension to the called type (here .zarr) - assert output_path == Path(os.path.splitext(output_save_path)[0] + ".zarr") - fs.delete(converted_raw_path) - else: - current_dir = Path.cwd() - temp_dir = current_dir.joinpath(Path("temp_echopype_output")) - assert output_path == Path(str(temp_dir.joinpath(Path(single_fname + ".zarr")))) - shutil.rmtree(os.path.dirname(converted_raw_path)) - - -@pytest.mark.parametrize("model", ["EK60"]) -@pytest.mark.parametrize("file_format", [".zarr"]) -@pytest.mark.parametrize( - "input_path", - [ - "./echopype/test_data/ek60/*.raw", - [ - "http://localhost:8080/data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.raw", - ], - ], -) -@pytest.mark.parametrize( - "output_save_path", - [ - None, - "./echopype/test_data/dump/", - "./echopype/test_data/dump/tmp.zarr", - "./echopype/test_data/dump/tmp.nc", - "s3://ooi-raw-data/dump/", - "s3://ooi-raw-data/dump/tmp.zarr", - "s3://ooi-raw-data/dump/tmp.nc", - ], -) -@pytest.mark.skip(reason='_validate_path only takes single files') -def test_validate_path_multiple_source( - model, file_format, input_path, output_save_path, minio_bucket -): - output_storage_options = {} - if output_save_path and output_save_path.startswith("s3://"): - output_storage_options = dict( - client_kwargs=dict(endpoint_url="http://localhost:9000/"), - key="minioadmin", - secret="minioadmin", - ) - - if isinstance(input_path, str): - mult_path = glob.glob(input_path) - else: - mult_path = input_path - echodata_mult = open_raw(mult_path, sonar_model="EK60") - echodata_mult._output_storage_options = output_storage_options - - echodata_mult._validate_path(file_format=file_format, save_path=output_save_path) - - if output_save_path is not None: - fsmap_tmp = fsspec.get_mapper(output_save_path, **output_storage_options) - fs = fsmap_tmp.fs - if not output_save_path.startswith("s3"): - if output_save_path.endswith("/"): - # if an output folder is given, below works with and without the slash at the end - assert echodata_mult.converted_raw_path == [ - os.path.join( - fsmap_tmp.root, - os.path.splitext(os.path.basename(f))[0] + ".zarr", - ) - for f in mult_path - ] - elif output_save_path.endswith(".zarr"): - # if an output filename is given: only use the directory - assert echodata_mult.converted_raw_path == [os.path.abspath(output_save_path)] - elif output_save_path.endswith(".nc"): - # force output file extension to the called type (here .zarr) - assert echodata_mult.converted_raw_path == [ - os.path.abspath(output_save_path.replace(".nc", ".zarr")) - ] - shutil.rmtree(os.path.dirname(echodata_mult.converted_raw_path[0])) - else: - if output_save_path.endswith("/"): - # if an output folder is given, below works with and without the slash at the end - assert echodata_mult.converted_raw_path == [ - os.path.join( - output_save_path, - os.path.splitext(os.path.basename(f))[0] + ".zarr", - ) - for f in mult_path - ] - elif output_save_path.endswith(".zarr"): - # if an output filename is given: only use the directory - assert echodata_mult.converted_raw_path == [output_save_path] - elif output_save_path.endswith(".nc"): - # force output file extension to the called type (here .zarr) - assert echodata_mult.converted_raw_path == [output_save_path.replace(".nc", ".zarr")] - fs.delete(echodata_mult.converted_raw_path[0]) - else: - current_dir = Path.cwd() - temp_dir = current_dir.joinpath(Path("temp_echopype_output")) - assert echodata_mult.converted_raw_path == [ - str(temp_dir.joinpath(Path(os.path.splitext(os.path.basename(f))[0] + ".zarr"))) - for f in mult_path - ] - shutil.rmtree(os.path.dirname(echodata_mult.converted_raw_path[0])) - - -@pytest.mark.parametrize( - "sonar_model, raw_file, xml_path", - [ - ( - "azfp", - TEST_DATA_FOLDER / "azfp/ooi/17032923.01A", - TEST_DATA_FOLDER / "azfp/ooi/17032922.XML" - ), - ( - "ek60", - TEST_DATA_FOLDER / "ek60/DY1801_EK60-D20180211-T164025.raw", - None - ), - ( - "ek80", - TEST_DATA_FOLDER / "ek80/ncei-wcsd/D20170826-T205615.raw", - None - ), - ( - "ad2cp", - TEST_DATA_FOLDER / "ad2cp/raw/076/rawtest.076.00000.ad2cp", - None - ) - ] -) -def test_convert_time_encodings(sonar_model, raw_file, xml_path): - ed = open_raw( - sonar_model=sonar_model, - raw_file=raw_file, - xml_path=xml_path - ) - ed.to_netcdf(overwrite=True) - for group, details in ed._EchoData__group_map.items(): - if hasattr(ed, group): - group_ds = getattr(ed, group) - if isinstance(group_ds, xr.Dataset): - for var, encoding in DEFAULT_ENCODINGS.items(): - if var in group_ds: - da = group_ds[var] - assert da.encoding == encoding - - # Combine encoding and attributes since this - # is what is shown when using decode_cf=False - # without dtype attribute - total_attrs = dict(**da.attrs, **da.encoding) - total_attrs.pop('dtype') - - # Read converted file back in - file_da = xr.open_dataset( - ed.converted_raw_path, - group=details['ep_group'], - decode_cf=False - )[var] - assert file_da.dtype == encoding['dtype'] - - # Read converted file back in - decoded_da = xr.open_dataset( - ed.converted_raw_path, - group=details['ep_group'], - )[var] - assert da.equals(decoded_da) is True - os.unlink(ed.converted_raw_path) - - -@pytest.mark.parametrize("model", ["EK60"]) -@pytest.mark.parametrize( - "input_path", - [ - "./echopype/test_data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.raw", - "s3://data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.raw", - [ - "http://localhost:8080/data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.raw", - "http://localhost:8080/data/ek60/ncei-wcsd/Summer2017-D20170615-T190843.raw", - ], - ], -) -@pytest.mark.parametrize("export_engine", ["zarr", "netcdf4"]) -@pytest.mark.parametrize( - "output_save_path", - [ - None, - "./echopype/test_data/dump/", - "./echopype/test_data/dump/tmp.zarr", - "./echopype/test_data/dump/tmp.nc", - "s3://ooi-raw-data/dump/", - "s3://ooi-raw-data/dump/tmp.zarr", - "s3://ooi-raw-data/dump/tmp.nc", - ], -) -def test_convert_ek60( - model, - input_path, - export_engine, - output_save_path, - minio_bucket, -): - common_storage_options = minio_bucket - output_storage_options = {} - ipath = input_path - if isinstance(input_path, list): - ipath = input_path[0] - - input_storage_options = common_storage_options if ipath.startswith("s3://") else {} - if output_save_path and output_save_path.startswith("s3://"): - output_storage_options = common_storage_options - - # Only using one file - ec = open_raw(raw_file=ipath, sonar_model=model, storage_options=input_storage_options) - - if ( - export_engine == "netcdf4" - and output_save_path is not None - and output_save_path.startswith("s3://") - ): - return - - if export_engine == "netcdf4": - to_file = getattr(ec, "to_netcdf") - elif export_engine == "zarr": - to_file = getattr(ec, "to_zarr") - else: - return - - to_file( - save_path=output_save_path, - overwrite=True, - output_storage_options=output_storage_options, - ) - - _check_output_files(export_engine, ec.converted_raw_path, output_storage_options) - - -@pytest.mark.parametrize("model", ["azfp"]) -@pytest.mark.parametrize( - "input_path", - [ - "./echopype/test_data/azfp/ooi/17032923.01A", - "http://localhost:8080/data/azfp/ooi/17032923.01A", - ], -) -@pytest.mark.parametrize( - "xml_path", - [ - "./echopype/test_data/azfp/ooi/17032922.XML", - "http://localhost:8080/data/azfp/ooi/17032922.XML", - ], -) -@pytest.mark.parametrize("export_engine", ["zarr", "netcdf4"]) -@pytest.mark.parametrize( - "output_save_path", - [ - None, - "./echopype/test_data/dump/", - "./echopype/test_data/dump/tmp.zarr", - "./echopype/test_data/dump/tmp.nc", - "s3://ooi-raw-data/dump/", - "s3://ooi-raw-data/dump/tmp.zarr", - "s3://ooi-raw-data/dump/tmp.nc", - ], -) -@pytest.mark.parametrize("combine_files", [False]) -def test_convert_azfp( - model, - input_path, - xml_path, - export_engine, - output_save_path, - combine_files, - minio_bucket, -): - common_storage_options = minio_bucket - output_storage_options = {} - - input_storage_options = common_storage_options if input_path.startswith("s3://") else {} - if output_save_path and output_save_path.startswith("s3://"): - output_storage_options = common_storage_options - - ec = open_raw( - raw_file=input_path, - xml_path=xml_path, - sonar_model=model, - storage_options=input_storage_options, - ) - - assert ec.xml_path == xml_path - - if ( - export_engine == "netcdf4" - and output_save_path is not None - and output_save_path.startswith("s3://") - ): - return - - if export_engine == "netcdf4": - to_file = getattr(ec, "to_netcdf") - elif export_engine == "zarr": - to_file = getattr(ec, "to_zarr") - else: - return - - to_file( - save_path=output_save_path, - overwrite=True, - output_storage_options=output_storage_options, - ) - - _check_output_files(export_engine, ec.converted_raw_path, output_storage_options) - - -@pytest.mark.parametrize("model", ["EK80"]) -@pytest.mark.parametrize( - "input_path", - [ - "./echopype/test_data/ek80/ncei-wcsd/D20170826-T205615.raw", - "http://localhost:8080/data/ek80/ncei-wcsd/D20170826-T205615.raw", - "s3://data/ek80/ncei-wcsd/D20170826-T205615.raw", - ], -) -@pytest.mark.parametrize("export_engine", ["zarr", "netcdf4"]) -@pytest.mark.parametrize( - "output_save_path", - [ - None, - "./echopype/test_data/dump/", - "./echopype/test_data/dump/tmp.zarr", - "./echopype/test_data/dump/tmp.nc", - "s3://ooi-raw-data/dump/", - "s3://ooi-raw-data/dump/tmp.zarr", - "s3://ooi-raw-data/dump/tmp.nc", - ], -) -@pytest.mark.parametrize("combine_files", [False]) -def test_convert_ek80( - model, - input_path, - export_engine, - output_save_path, - combine_files, - minio_bucket, -): - common_storage_options = minio_bucket - output_storage_options = {} - - input_storage_options = common_storage_options if input_path.startswith("s3://") else {} - if output_save_path and output_save_path.startswith("s3://"): - output_storage_options = common_storage_options - - ec = open_raw(raw_file=input_path, sonar_model=model, storage_options=input_storage_options) - - if ( - export_engine == "netcdf4" - and output_save_path is not None - and output_save_path.startswith("s3://") - ): - return - - if export_engine == "netcdf4": - to_file = getattr(ec, "to_netcdf") - elif export_engine == "zarr": - to_file = getattr(ec, "to_zarr") - else: - return - - to_file( - save_path=output_save_path, - overwrite=True, - combine=combine_files, - output_storage_options=output_storage_options, - ) - - _check_output_files(export_engine, ec.converted_raw_path, output_storage_options) diff --git a/echopype/tests/test_core.py b/echopype/tests/test_core.py new file mode 100644 index 000000000..4f8681b26 --- /dev/null +++ b/echopype/tests/test_core.py @@ -0,0 +1,62 @@ +from typing import TYPE_CHECKING + +import pytest + +if TYPE_CHECKING: + from ..core import SonarModelsHint +from ..core import SONAR_MODELS + +@pytest.mark.parametrize(["sonar_model", "ext"], [ + ("AZFP", ".01A"), + ("AZFP", ".01a"), + ("AZFP", ".05C"), + ("AZFP", ".12q"), + + ("EK60", ".raw"), + ("EK60", ".RAW"), + + ("EK80", ".raw"), + ("EK80", ".RAW"), + + ("EA640", ".raw"), + ("EA640", ".RAW"), + + ("AD2CP", ".ad2cp"), + ("AD2CP", ".AD2CP"), +]) +def test_file_extension_validation(sonar_model: "SonarModelsHint", ext: str): + SONAR_MODELS[sonar_model]["validate_ext"](ext) + + +@pytest.mark.parametrize(["sonar_model", "ext"], [ + ("AZFP", ".001A"), + ("AZFP", ".01AA"), + ("AZFP", ".01aa"), + ("AZFP", ".05AA"), + ("AZFP", ".07!"), + ("AZFP", ".01!"), + ("AZFP", ".0!A"), + ("AZFP", ".012"), + ("AZFP", ".0AA"), + ("AZFP", ".AAA"), + ("AZFP", "01A"), + + ("EK60", "raw"), + ("EK60", ".foo"), + + ("EK80", "raw"), + ("EK80", ".foo"), + + ("EA640", "raw"), + ("EA640", ".foo"), + + ("AD2CP", "ad2cp"), + ("AD2CP", ".foo"), +]) +def test_file_extension_validation_should_fail(sonar_model: "SonarModelsHint", ext: str): + try: + SONAR_MODELS[sonar_model]["validate_ext"](ext) + except ValueError: + pass + else: + raise ValueError(f"\"{ext}\" should have been rejected for sonar model {sonar_model}") diff --git a/echopype/tests/test_old.py b/echopype/tests/utils/test_old.py similarity index 100% rename from echopype/tests/test_old.py rename to echopype/tests/utils/test_old.py diff --git a/echopype/tests/utils/test_utils_io.py b/echopype/tests/utils/test_utils_io.py new file mode 100644 index 000000000..ab1c10077 --- /dev/null +++ b/echopype/tests/utils/test_utils_io.py @@ -0,0 +1,117 @@ +import os +import fsspec +from pathlib import Path +import pytest + +from echopype.utils.io import sanitize_file_path, validate_output_path + + +@pytest.mark.parametrize( + "file_path, should_fail, file_type", + [ + ('https://example.com/test.nc', True, 'nc'), + ('https://example.com/test.zarr', False, 'zarr'), + ('folder/test.nc', False, 'nc'), + ('folder/test.zarr', False, 'zarr'), + (Path('https:/example.com/test.nc'), True, 'nc'), + (Path('https:/example.com/test.zarr'), True, 'zarr'), + (Path('folder/test.nc'), False, 'nc'), + (Path('folder/test.zarr'), False, 'zarr'), + (fsspec.get_mapper('https://example.com/test.nc'), True, 'nc'), + (fsspec.get_mapper('https:/example.com/test.zarr'), False, 'zarr'), + (fsspec.get_mapper('folder/test.nc'), False, 'nc'), + (fsspec.get_mapper('folder/test.zarr'), False, 'zarr'), + ('https://example.com/test.jpeg', True, 'jpeg'), + (Path('https://example.com/test.jpeg'), True, 'jpeg'), + (fsspec.get_mapper('https://example.com/test.jpeg'), True, 'jpeg'), + ], +) +def test_sanitize_file_path(file_path, should_fail, file_type): + try: + sanitized = sanitize_file_path(file_path) + if not should_fail: + if file_type == 'nc': + assert isinstance(sanitized, Path) is True + elif file_type == 'zarr': + assert isinstance(sanitized, fsspec.FSMap) is True + except Exception as e: + assert isinstance(e, ValueError) is True + + +@pytest.mark.parametrize( + "save_path, engine", + [ + # Netcdf tests + ('folder/new_test.nc', 'netcdf4'), + ('folder/new_test.nc', 'zarr'), + ('folder/path/new_test.nc', 'netcdf4'), + ('folder/', 'netcdf4'), + ('s3://ooi-raw-data/', 'netcdf4'), + (Path('folder/'), 'netcdf4'), + (Path('folder/new_test.nc'), 'netcdf4'), + # Zarr tests + ('folder/new_test.zarr', 'zarr'), + ('folder/new_test.zarr', 'netcdf4'), + ('folder/path/new_test.zarr', 'zarr'), + ('folder/', 'zarr'), + # Empty tests + (None, 'netcdf4'), + (None, 'zarr'), + # Remotes + ('https://example.com/test.zarr', 'zarr'), + ('https://example.com/', 'zarr'), + ('https://example.com/test.nc', 'netcdf4'), + ('s3://ooi-raw-data/new_test.zarr', 'zarr'), + ('s3://ooi-raw-data/new_test.nc', 'netcdf4'), + ], +) +def test_validate_output_path(save_path, engine, minio_bucket): + output_root_path = './echopype/test_data/dump' + source_file = 'test.raw' + if engine == 'netcdf4': + ext = '.nc' + else: + ext = '.zarr' + + if save_path is not None: + if '://' not in str(save_path): + save_path = os.path.join(output_root_path, save_path) + is_dir = True if Path(save_path).suffix == '' else False + else: + is_dir = True + save_path = output_root_path + + output_storage_options = {} + if save_path and save_path.startswith("s3://"): + output_storage_options = dict( + client_kwargs=dict(endpoint_url="http://localhost:9000/"), + key="minioadmin", + secret="minioadmin", + ) + + try: + output_path = validate_output_path( + source_file, engine, output_storage_options, save_path + ) + + assert isinstance(output_path, str) is True + assert Path(output_path).suffix == ext + + if is_dir: + assert Path(output_path).name == source_file.replace('.raw', '') + ext + else: + output_file = Path(save_path) + assert Path(output_path).name == output_file.name.replace(output_file.suffix, '') + ext + except Exception as e: + if 'https://' in save_path: + if save_path == 'https://example.com/': + assert isinstance(e, ValueError) is True + assert str(e) == 'Input file type not supported!' + elif save_path == 'https://example.com/test.nc': + assert isinstance(e, ValueError) is True + assert str(e) == 'Only local netcdf4 is supported.' + else: + assert isinstance(e, PermissionError) is True + elif save_path == 's3://ooi-raw-data/new_test.nc': + assert isinstance(e, ValueError) is True + assert str(e) == 'Only local netcdf4 is supported.' diff --git a/echopype/utils/io.py b/echopype/utils/io.py index ac590e8c5..6945ffa51 100644 --- a/echopype/utils/io.py +++ b/echopype/utils/io.py @@ -3,13 +3,25 @@ """ import os import sys +import warnings from pathlib import Path -from typing import Union +from typing import TYPE_CHECKING, Dict, Union import fsspec from fsspec import FSMap from fsspec.implementations.local import LocalFileSystem +if TYPE_CHECKING: + from ..core import PathHint +SUPPORTED_ENGINES = { + "netcdf4": { + "ext": ".nc", + }, + "zarr": { + "ext": ".zarr", + }, +} + def get_files_from_dir(folder): """Retrieves all Netcdf and Zarr files from a given folder""" @@ -49,29 +61,73 @@ def get_file_format(file): raise ValueError(f"Unsupported file format: {os.path.splitext(file)[1]}") +def _get_suffix(filepath: Union[str, Path, FSMap]) -> str: + """Check if file type is supported.""" + # TODO: handle multiple files through the same set of checks for combining files + if isinstance(filepath, FSMap): + suffix = Path(filepath.root).suffix + else: + suffix = Path(str(filepath)).suffix + + if suffix not in [".nc", ".zarr"]: + raise ValueError("Input file type not supported!") + + return suffix + + def sanitize_file_path( - file_path: Union[str, Path, FSMap], - storage_options: dict = {}, + file_path: "PathHint", + storage_options: Dict[str, str] = {}, + is_dir: bool = False, ) -> Union[Path, FSMap]: - """Determines file path, either Path or FSMap""" + """ + Cleans and checks the user output file path type to + a standardized Path or FSMap type. + + Parameters + ---------- + file_path : str | Path | FSMap + The source file path + engine : str {'netcdf4', 'zarr'} + The engine to be used for file output + storage_options : dict + Storage options for file path + is_dir : bool + Flag for the function to know + if file_path is a directory or not. + If not, suffix will be determined. + """ + + if not is_dir: + suffix = _get_suffix(file_path) + else: + suffix = "" + if isinstance(file_path, Path): # Check for extension if ":/" in str(file_path): raise ValueError(f"{file_path} is not a valid posix path.") + if suffix == ".zarr": + return fsspec.get_mapper(str(file_path)) return file_path elif isinstance(file_path, str): if "://" in file_path: + if suffix == ".nc": + raise ValueError("Only local netcdf4 is supported.") return fsspec.get_mapper(file_path, **storage_options) - return Path(file_path) + elif suffix == ".zarr": + return fsspec.get_mapper(file_path) + else: + return Path(file_path) elif isinstance(file_path, fsspec.FSMap): root = file_path.root - if Path(root).suffix == ".nc": + if suffix == ".nc": if not isinstance(file_path.fs, LocalFileSystem): # For special case of netcdf. # netcdf4 engine can only read Path or string raise ValueError("Only local netcdf4 is supported.") - return root + return Path(root) return file_path else: raise ValueError( @@ -79,8 +135,89 @@ def sanitize_file_path( ) -def check_file_existance( - file_path: Union[str, Path, FSMap], storage_options: dict = {} +def validate_output_path( + source_file: str, + engine: str, + output_storage_options: Dict = {}, + save_path: Union[None, Path, str] = None, +) -> str: + """ + Assemble output file names and path. + + Parameters + ---------- + source_file : str + The source file path + engine : str {'netcdf4', 'zarr'} + The engine to be used for file output + output_storage_options : dict + Storage options for remote output path + save_path : str | Path | None + Either a directory or a file. If none then the save path is 'temp_echopype_output/' + in the current working directory. + """ + if engine not in SUPPORTED_ENGINES: + ValueError(f"Engine {engine} is not supported for file export.") + + file_ext = SUPPORTED_ENGINES[engine]["ext"] + + if save_path is None: + warnings.warn("save_path is not provided") + + current_dir = Path.cwd() + # Check permission, raise exception if no permission + check_file_permissions(current_dir) + out_dir = current_dir.joinpath(Path("temp_echopype_output")) + if not out_dir.exists(): + out_dir.mkdir(parents=True) + + warnings.warn( + f"Resulting converted file(s) will be available at {str(out_dir)}" + ) + out_path = str(out_dir / (Path(source_file).stem + file_ext)) + elif not isinstance(save_path, Path) and not isinstance(save_path, str): + raise TypeError("save_path must be a string or Path") + else: + if isinstance(save_path, str): + # Clean folder path by stripping '/' at the end + save_path = save_path.strip("/") + + # Determine whether this is a directory or not + is_dir = True if Path(save_path).suffix == "" else False + else: + is_dir = True if save_path.suffix == "" else False + + # Cleans path + sanitized_path = sanitize_file_path( + save_path, storage_options=output_storage_options, is_dir=is_dir + ) + + # Check file permissions + if is_dir: + check_file_permissions(sanitized_path) + out_path = os.path.join(save_path, Path(source_file).stem + file_ext) + else: + if isinstance(sanitized_path, Path): + check_file_permissions(sanitized_path.parent) + final_path = sanitized_path + else: + path_dir = fsspec.get_mapper( + os.path.dirname(save_path), **output_storage_options + ) + check_file_permissions(path_dir) + final_path = Path(save_path) + if final_path.suffix != file_ext: + warnings.warn( + "Mismatch between specified engine and save_path found; forcing output format to engine." # noqa + ) + out_path = str( + final_path.parent.joinpath(final_path.stem + file_ext).absolute() + ) + return out_path + + +def check_file_existence( + file_path: "PathHint", storage_options: Dict[str, str] = {} ) -> bool: """ Checks if file exists in the specified path @@ -127,7 +264,15 @@ def check_file_permissions(FILE_DIR): with FILE_DIR.fs.open(TEST_FILE, "w") as f: f.write("testing\n") FILE_DIR.fs.delete(TEST_FILE) - elif isinstance(FILE_DIR, Path): + elif isinstance(FILE_DIR, (Path, str)): + if isinstance(FILE_DIR, str): + FILE_DIR = Path(FILE_DIR) + + if not FILE_DIR.exists(): + warnings.warn( + f"{str(FILE_DIR)} does not exist. Attempting to create it." + ) + FILE_DIR.mkdir(exist_ok=True, parents=True) TEST_FILE = FILE_DIR.joinpath(Path(".permission_test")) TEST_FILE.write_text("testing\n") @@ -136,10 +281,5 @@ def check_file_permissions(FILE_DIR): TEST_FILE.unlink(missing_ok=True) else: TEST_FILE.unlink() - else: - TEST_FILE = os.path.join(FILE_DIR, ".permission_test") - with open(TEST_FILE, "w") as f: - f.write("testing\n") - os.remove(TEST_FILE) except Exception: raise PermissionError("Writing to specified path is not permitted.") diff --git a/requirements-dev.txt b/requirements-dev.txt index b679ac7ad..0d8797452 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -19,5 +19,6 @@ setuptools_scm sphinx sphinx-automodapi sphinx_rtd_theme +sphinxcontrib-mermaid twine wheel diff --git a/requirements.txt b/requirements.txt index 557c52ac4..95c160a3a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,9 +5,10 @@ numpy pynmea2 pytz scipy -xarray<0.18 +xarray zarr -fsspec==0.8.7 -s3fs==0.5.2 +fsspec +s3fs requests aiohttp +typing-extensions~=3.10 diff --git a/setup.py b/setup.py index 3ee0a78be..ac9119b70 100644 --- a/setup.py +++ b/setup.py @@ -9,6 +9,14 @@ # Dependencies. with open("requirements.txt") as f: requirements = f.readlines() + +with open("requirements-dev.txt") as f: + dev_reqs = f.readlines() + +EXTRA_REQUIRES = { + "dev": dev_reqs, +} + INSTALL_REQUIRES = [t.strip() for t in requirements] opts = dict( @@ -16,6 +24,8 @@ packages=find_packages(), include_package_data=True, install_requires=INSTALL_REQUIRES, + extras_require=EXTRA_REQUIRES, + python_requires=">=3.6", py_modules=["_echopype_version"], use_scm_version={ "fallback_version": "unknown",