diff --git a/.ci_helpers/py3.7.yaml b/.ci_helpers/py3.7.yaml index 98e3d9d82..a09696fde 100644 --- a/.ci_helpers/py3.7.yaml +++ b/.ci_helpers/py3.7.yaml @@ -10,10 +10,10 @@ dependencies: - pynmea2 - pytz - scipy - - xarray<0.18 + - xarray - zarr - - fsspec==0.8.7 + - fsspec - requests - aiohttp - - s3fs==0.5.2 + - s3fs - mamba diff --git a/.ci_helpers/py3.8.yaml b/.ci_helpers/py3.8.yaml index 98f582abd..047a69220 100644 --- a/.ci_helpers/py3.8.yaml +++ b/.ci_helpers/py3.8.yaml @@ -10,10 +10,10 @@ dependencies: - pynmea2 - pytz - scipy - - xarray<0.18 + - xarray - zarr - - fsspec==0.8.7 + - fsspec - requests - aiohttp - - s3fs==0.5.2 + - s3fs - mamba diff --git a/.ci_helpers/py3.9.yaml b/.ci_helpers/py3.9.yaml index 8ffd88858..4b065d3d2 100644 --- a/.ci_helpers/py3.9.yaml +++ b/.ci_helpers/py3.9.yaml @@ -10,10 +10,10 @@ dependencies: - pynmea2 - pytz - scipy - - xarray<0.18 + - xarray - zarr - - fsspec==0.8.7 + - fsspec - requests - aiohttp - - s3fs==0.5.2 + - s3fs - mamba diff --git a/.ci_helpers/run-test.py b/.ci_helpers/run-test.py index 965529dd2..37903a24e 100644 --- a/.ci_helpers/run-test.py +++ b/.ci_helpers/run-test.py @@ -6,6 +6,7 @@ import argparse import glob import os +import re import shutil import sys from pathlib import Path @@ -22,6 +23,16 @@ ExitCode.NO_TESTS_COLLECTED: 5, } +MODULES_TO_TEST = { + "root": {}, # This is to test the root folder. + "convert": {}, + "calibrate": {}, + "echodata": {}, + "preprocess": {}, + "utils": {}, + "old": {"extra_globs": ["echopype/convert/convert.py", "echopype/process/*"]}, +} + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run tests listed.") parser.add_argument( @@ -40,6 +51,11 @@ action="store_true", help="Optional flag for running tests locally, not in continuous integration.", ) + parser.add_argument( + "--include-cov", + action="store_true", + help="Optional flag for including coverage. Exports to coverage.xml by default.", + ) args = parser.parse_args() if args.local: temp_path = Path("temp_echopype_output") @@ -50,69 +66,66 @@ if dump_path.exists(): shutil.rmtree(dump_path) echopype_folder = Path("echopype") - file_list = glob.glob(str(echopype_folder / "**" / "*.py")) + file_list = glob.glob(str(echopype_folder / "**" / "*.py"), recursive=True) else: file_list = args.touchedfiles.split(",") pytest_args = [] if args.pytest_args: pytest_args = args.pytest_args.split(",") + if args.include_cov: + # Checks for cov in pytest_args + for arg in pytest_args: + if re.match("--cov", arg) is not None: + raise ValueError( + "pytest args may not have any cov arguments if --include-cov is set." + ) + pytest_args = pytest_args + [ + "--cov-report=xml", + "--cov-append", + ] test_to_run = {} - for f in file_list: - file_path = Path(f) - file_name, file_ext = os.path.splitext(os.path.basename(f)) - if file_ext == ".py": - if any( - [ - (file_path.match("echopype/convert/*")), - (file_path.match("echopype/tests/test_convert*")), - ] - ): - if "convert" not in test_to_run: - test_to_run["convert"] = [] - test_to_run["convert"].append(file_path) - elif any( - [ - (file_path.match("echopype/calibrate/*")), - (file_path.match("echopype/tests/test_calibrate*")), - ] - ): - if "calibrate" not in test_to_run: - test_to_run["calibrate"] = [] - test_to_run["calibrate"].append(file_path) - elif any( - [ - (file_path.match("echopype/echodata/*")), - (file_path.match("echopype/tests/test_echodata*")), - ] - ): - if "echodata" not in test_to_run: - test_to_run["echodata"] = [] - test_to_run["echodata"].append(file_path) - elif any( - [ - (file_path.match("echopype/preprocess/*")), - (file_path.match("echopype/tests/test_preprocess*")), - ] - ): - if "preprocess" not in test_to_run: - test_to_run["preprocess"] = [] - test_to_run["preprocess"].append(file_path) - elif any( - [ - (file_path.match("echopype/convert/convert.py")), - (file_path.match("echopype/process/*")), - (file_path.match("echopype/tests/test_old.py")), - ] - ): - if "old" not in test_to_run: - test_to_run["old"] = [] - test_to_run["old"].append(file_path) + for module, mod_extras in MODULES_TO_TEST.items(): + if module == "root": + file_globs = [ + "echopype/*", + "echopype/tests/*", + ] + else: + file_globs = [ + f"echopype/{module}/*", + f"echopype/tests/{module}/*", + ] + if "extra_globs" in mod_extras: + file_globs = file_globs + mod_extras["extra_globs"] + for f in file_list: + file_path = Path(f) + file_name, file_ext = os.path.splitext(os.path.basename(f)) + if file_ext == ".py": + if any(((file_path.match(fg)) for fg in file_globs)): + if module not in test_to_run: + test_to_run[module] = [] + test_to_run[module].append(file_path) + original_pytest_args = pytest_args.copy() total_exit_codes = [] for k, v in test_to_run.items(): print(f"=== RUNNING {k.upper()} TESTS===") print(f"Touched files: {','.join([os.path.basename(p) for p in v])}") - test_files = glob.glob(f"echopype/tests/test_{k}*.py") + if k == "root": + file_glob_str = "echopype/tests/test_*.py" + cov_mod_arg = ["--cov=echopype"] + else: + file_glob_str = f"echopype/tests/{k}/*.py" + cov_mod_arg = [f"--cov=echopype/{k}"] + if args.include_cov: + if k == "old": + pytest_args = original_pytest_args + [ + "--cov=echopype/convert", + "--cov=echopype/process", + ] + else: + pytest_args = original_pytest_args + cov_mod_arg + test_files = glob.glob(file_glob_str) final_args = pytest_args + test_files print(f"Pytest args: {final_args}") exit_code = pytest.main(final_args) @@ -121,7 +134,7 @@ if len(total_exit_codes) == 0: print("No test(s) were run.") sys.exit(0) - if all([True if e == 0 else False for e in total_exit_codes]): + if all(True if e == 0 else False for e in total_exit_codes): print("All test run successful") sys.exit(0) else: diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 42ac4d46e..d2ddea7e9 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -56,11 +56,11 @@ jobs: - name: Cache conda uses: actions/cache@v2 env: - # Increase this value to reset cache if .ci_helpers/py${{ matrix.python-version }}.yaml has not changed + # Increase this value to reset cache if '.ci_helpers/py{0}.yaml' has not changed CACHE_NUMBER: 0 with: path: ~/conda_pkgs_dir - key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('.ci_helpers/py${{ matrix.python-version }}.yaml') }} + key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles(format('.ci_helpers/py{0}.yaml', matrix.python-version)) }} - name: Setup miniconda uses: conda-incubator/setup-miniconda@v2 with: @@ -87,7 +87,7 @@ jobs: - name: Running All Tests shell: bash -l {0} run: | - python .ci_helpers/run-test.py --local --pytest-args="--cov=echopype,--cov-report=xml,--log-cli-level=WARNING,-vv,--disable-warnings" |& tee ci_test_log.log + pytest -vv -rx --cov=echopype --cov-report=xml --log-cli-level=WARNING --disable-warnings |& tee ci_${{ matrix.python-version }}_test_log.log - name: Upload ci test log if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml deleted file mode 100644 index f526ace73..000000000 --- a/.github/workflows/lint.yaml +++ /dev/null @@ -1,17 +0,0 @@ -name: Lint code - -on: - push: - paths-ignore: - - '.ci_helpers/docker/**' - pull_request: - paths-ignore: - - '.ci_helpers/docker/**' - -jobs: - lint-test: - runs-on: ubuntu-20.04 - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - - uses: pre-commit/action@v2.0.0 diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index a626794bf..dc246250f 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -52,11 +52,11 @@ jobs: - name: Cache conda uses: actions/cache@v2 env: - # Increase this value to reset cache if .ci_helpers/py${{ matrix.python-version }}.yaml has not changed + # Increase this value to reset cache if '.ci_helpers/py{0}.yaml' has not changed CACHE_NUMBER: 0 with: path: ~/conda_pkgs_dir - key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('.ci_helpers/py${{ matrix.python-version }}.yaml') }} + key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles(format('.ci_helpers/py{0}.yaml', matrix.python-version)) }} - name: Setup miniconda uses: conda-incubator/setup-miniconda@v2 with: @@ -87,17 +87,17 @@ jobs: format: 'csv' - name: Print Changed files shell: bash -l {0} - run: echo "${{ steps.files.outputs.added_modified }}" + run: echo "${{ steps.files.outputs.added_modified_renamed }}" - name: Running all Tests if: contains(github.event.pull_request.labels.*.name, 'Needs Complete Testing') shell: bash -l {0} run: | - python .ci_helpers/run-test.py --local --pytest-args="--cov=echopype,--cov-report=xml,--log-cli-level=WARNING,-vv,--disable-warnings" |& tee ci_test_log.log + pytest -vv -rx --cov=echopype --cov-report=xml --log-cli-level=WARNING --disable-warnings |& tee ci_${{ matrix.python-version }}_test_log.log - name: Running Tests if: "!contains(github.event.pull_request.labels.*.name, 'Needs Complete Testing')" shell: bash -l {0} run: | - python .ci_helpers/run-test.py --pytest-args="--cov=echopype,--cov-report=xml,--log-cli-level=WARNING,-vv,--disable-warnings" ${{ steps.files.outputs.added_modified }} |& tee ci_test_log.log + python .ci_helpers/run-test.py --pytest-args="--log-cli-level=WARNING,-vv,-rx,--disable-warnings" --include-cov ${{ steps.files.outputs.added_modified_renamed }} |& tee ci_test_log.log - name: Upload ci test log if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 diff --git a/README.md b/README.md index 890683b41..596d1f073 100644 --- a/README.md +++ b/README.md @@ -1,36 +1,46 @@
- +
# Echopype - - - +
+ + DOI + - - - + + GitHub License + +
+ +
+ + + - - - + + + - - - + + + - - - + + + +
- - DOI - +
+ + + - - GitHub License - + + + +
Echopype is a package built to enable interoperability and scalability in ocean sonar data processing. These data are widely used for obtaining information about the distribution and abundance of marine animals, such as fish and krill. Our ability to collect large volumes of sonar data from a variety of ocean platforms has grown significantly in the last decade. However, most of the new data remain under-utilized. echopype aims to address the root cause of this problem - the lack of interoperable data format and scalable analysis workflows that adapt well with increasing data volume - by providing open-source tools as entry points for scientists to make discovery using these new data. diff --git a/docs/requirements.txt b/docs/requirements.txt index 804179bb9..9e77b34d2 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,3 +1,4 @@ sphinx_rtd_theme sphinx-automodapi +sphinxcontrib-mermaid numpydoc \ No newline at end of file diff --git a/docs/source/api.rst b/docs/source/api.rst index b1701ff39..36eee029b 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -12,22 +12,13 @@ API components that most users will interact with. **Content** -* `Open raw and converted files`_ * `EchoData class`_ +* `Open raw and converted files`_ +* `Combine EchoData objects`_ * `Data processing subpackages`_ * `Utilities`_ -Open raw and converted files ----------------------------- - -.. automodule:: echopype - :members: open_raw - -.. automodule:: echopype - :members: open_converted - - EchoData class -------------- @@ -39,6 +30,20 @@ EchoData class :no-inheritance-diagram: :no-heading: +Open raw and converted files +---------------------------- + +.. automodule:: echopype + :members: open_raw + +.. automodule:: echopype + :members: open_converted + +Combine EchoData objects +------------------------ + +.. automodule:: echopype + :members: combine_echodata Data processing subpackages --------------------------- @@ -57,10 +62,17 @@ preprocess :no-inheritance-diagram: :no-heading: +qc +^^^ + +.. automodapi:: echopype.qc + :no-inheritance-diagram: + :no-heading: Utilities --------- + .. automodapi:: echopype.utils.uwa :no-inheritance-diagram: - :members: calc_absorption, calc_sound_speed :no-heading: + :members: calc_absorption, calc_sound_speed diff --git a/docs/source/conf.py b/docs/source/conf.py index 6c9d22b25..aac7e6cf4 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -47,7 +47,8 @@ 'sphinx.ext.intersphinx', 'sphinx.ext.mathjax', 'sphinx.ext.ifconfig', - 'sphinx.ext.githubpages' + 'sphinx.ext.githubpages', + 'sphinxcontrib.mermaid' ] numpydoc_show_class_members = False diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst index d143282fe..77886c039 100644 --- a/docs/source/contributing.rst +++ b/docs/source/contributing.rst @@ -3,20 +3,39 @@ Contributing to echopype We welcome your contributions, large or small! -Contributing on GitHub ----------------------- -For echopype development we use the **git forking workflow** (also known as fork & pull request workflow), where you first create your own fork of the source GitHub repository -`https://github.com/OSOceanAcoustics/echopype/ `_ -(``upstream``). Excellent tutorials for this workflow can be found -`here `_ and -`here `_, -including guidance on opening pull requests. +Contributing with Git and GitHub +-------------------------------- Please submit questions or report problems via GitHub issues. If you're new to GitHub, see these tips for submitting issues: `"Creating issues on GitHub" `_. +For echopype development we use the **gitflow workflow** with forking. All development +changes are merged into the ``dev`` development branch. First create your own fork of the +source GitHub repository +`https://github.com/OSOceanAcoustics/echopype/ `_ +(``upstream``), then clone your fork; your fork will be the ``origin`` remote. See +`this excellent tutorial `_ for +guidance on forking and opening pull requests, but replace references to the ``master`` +branch with the ``dev`` development branch. See +`this description of the gitflow workflow `_. +The complete workflow we use is depicted in the diagram below, which includes +components involving documentation updates (see `Documentation development`_ below) +and preparation of releases. + +.. mermaid:: + + graph LR + classDef patch fill:#f2ece4 + master --> stable + master --> dev + p1([doc patch]):::patch -.-> stable + p2([code patch]):::patch -.-> dev + stable --> |docs merge| rel[release/0.x.y] + dev --> |dev merge| rel + rel --> master + Installation for echopype development ------------------------------------- @@ -52,29 +71,6 @@ Create a `conda `_ environment for echopype development See the :doc:`installation` page to simply install the latest echopype release from conda or PyPI. -Documentation development --------------------------- - -Echopype documentation (``_) is based on -`Sphinx `_ and is hosted at -`Read the Docs `_. The sphinx files are found -in the ``docs`` directory, and the source documentation files, written in -`reStructuredText `_ -(``.rst``) format, are in the ``docs/source`` directory. The echopype development -conda environment created above will install all required sphinx dependencies. -To run sphinx locally: - -.. code-block:: bash - - cd docs - sphinx-build -b html -d _build/doctrees source _build/html - -To view the generated HTML files generated by Sphinx, open the -``docs/_build/html/index.html`` in your browser. - -We also maintain a test version of the documentation at https://doc-test-echopype.readthedocs.io/ -for viewing and debugging larger changes. - Test data files --------------- @@ -97,9 +93,10 @@ Running the tests ----------------- To run the echopype unit tests in ``echopype/tests``, -`docker `_ and -`docker-compose `_ -will need to be installed if not already installed. To run the tests: +`Docker `_ +will need to be installed if not already present +(`docker-compose `_ is also used, but it's installed +in the conda environment for echopype development). To run the tests: .. code-block:: bash @@ -117,6 +114,21 @@ and `S3 object-storage `_ sources, the latter via `minio `_. +pre-commit hooks +---------------- + +The echopype development conda environment includes `pre-commit `_, +and useful pre-commit "hooks" have been configured in the +`.pre-commit-config.yaml file `_. +Current hooks include file formatting (linting) checks (trailing spaces, trailing lines, +JSON and YAML format checks, etc) and Python style autoformatters (PEP8 / flake8, ``black`` and ``isort``). + +To run pre-commit hooks locally, run `pre-commit install` before running the +docker setup-service deploy statement described above. The hooks will run automatically +during ``git commit`` and will give you options as needed before committing your changes. +You can also run ``pre-commit`` before actually doing ``git commit``, as you edit the code, by running ``pre-commit run --all-files``. See the `pre-commit usage documentation `_ for details. + + Continuous integration GitHub Actions ------------------------------------- @@ -130,3 +142,52 @@ very limited scope (such as contributions to the documentation) or you know exactly what you're doing (you're a seasoned echopype contributor), the CI can be skipped. This is done by including the string "[skip ci]" in your last commit's message. + + +Documentation development +------------------------- + +Echopype documentation (``_) is based on +`Sphinx `_ and is hosted at +`Read The Docs `_. The sphinx files are found +in the ``docs`` directory, and the source documentation files, written in +`reStructuredText `_ +(``.rst``) format, are in the ``docs/source`` directory. The echopype development +conda environment will install all required Sphinx dependencies. +To run Sphinx locally: + +.. code-block:: bash + + cd docs + sphinx-build -b html -d _build/doctrees source _build/html + +To view the generated HTML files generated by Sphinx, open the +``docs/_build/html/index.html`` in your browser. + +Updates to the documentation that are based on the current echopype release (that is, +not involving echopype API changes) should be merged into the GitHub ``stable`` branch. +These updates will then become available immediately on the default ReadTheDocs version. +Examples of such updates include fixing spelling mistakes, expanding an explanation, and adding a new section that documents a previously undocumented feature. + +Function and object doc strings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For inline strings documenting functions and objects ("doc strings"), we use the +`numpydoc style (Numpy docstring format) `_. + + +Documentation versions +~~~~~~~~~~~~~~~~~~~~~~ + +``_ redirects to the documentation ``stable`` version, +``_, which is built from the `stable` branch +on the ``echopype`` GitHub repository. In addition, the ``latest`` version +(``_) is built from the `master` branch, +while the hidden `dev` version (``_) is built +from the ``dev`` branch. Finally, each new echopype release is built as a new release version +on ReadTheDocs. Merging pull requests into any of these three branches or issuing a +new tagged release will automatically result in a new ReadTheDocs build for the +corresponding version. + +We also maintain a test version of the documentation at ``_ +for viewing and debugging larger, more experimental changes, typically from a separate fork. diff --git a/echopype/__init__.py b/echopype/__init__.py index d290a78af..9386a9e36 100644 --- a/echopype/__init__.py +++ b/echopype/__init__.py @@ -5,6 +5,7 @@ from . import calibrate from .convert.api import open_raw from .echodata.api import open_converted +from .echodata.combine import combine_echodata from .process import Process # noqa -__all__ = ["open_raw", "open_converted", "calibrate"] +__all__ = ["open_raw", "open_converted", "combine_echodata", "calibrate"] diff --git a/echopype/calibrate/api.py b/echopype/calibrate/api.py index 1c8788e20..29487c62f 100644 --- a/echopype/calibrate/api.py +++ b/echopype/calibrate/api.py @@ -25,6 +25,7 @@ def _compute_cal( env_params=env_params, cal_params=cal_params, waveform_mode=waveform_mode, + encode_mode=encode_mode, ) # Perform calibration if cal_type == "Sv": diff --git a/echopype/calibrate/calibrate_azfp.py b/echopype/calibrate/calibrate_azfp.py index b6c76df0a..7ca5057bd 100644 --- a/echopype/calibrate/calibrate_azfp.py +++ b/echopype/calibrate/calibrate_azfp.py @@ -87,37 +87,9 @@ def compute_range_meter(self, cal_type): 'Sv' for calculating volume backscattering strength, or 'Sp' for calculating point backscattering strength """ - # Notation below follows p.86 of user manual - N = self.echodata.vendor["number_of_samples_per_average_bin"] # samples per bin - f = self.echodata.vendor["digitization_rate"] # digitization rate - L = self.echodata.vendor["lockout_index"] # number of lockout samples - sound_speed = self.env_params["sound_speed"] - - # keep this in ref of AZFP matlab code, - # set to 1 since we want to calculate from raw data - bins_to_avg = 1 - - # Calculate range using parameters for each freq - # This is "the range to the centre of the sampling volume - # for bin m" from p.86 of user manual - if cal_type == "Sv": - range_offset = 0 - else: - range_offset = ( - sound_speed * self.echodata.beam["transmit_duration_nominal"] / 4 - ) # from matlab code - range_meter = ( - sound_speed * L / (2 * f) - + (sound_speed / 4) - * ( - ((2 * (self.echodata.beam.range_bin + 1) - 1) * N * bins_to_avg - 1) / f - + self.echodata.beam["transmit_duration_nominal"] - ) - - range_offset + self.range_meter = self.echodata.compute_range( + self.env_params, azfp_cal_type=cal_type ) - range_meter.name = "range" # add name to facilitate xr.merge - - self.range_meter = range_meter def _cal_power(self, cal_type, **kwargs): """Calibrate to get volume backscattering strength (Sv) from AZFP power data. diff --git a/echopype/calibrate/calibrate_base.py b/echopype/calibrate/calibrate_base.py index a895fff72..03d73f2b4 100644 --- a/echopype/calibrate/calibrate_base.py +++ b/echopype/calibrate/calibrate_base.py @@ -1,3 +1,5 @@ +import abc + ENV_PARAMS = ("temperature", "salinity", "pressure", "sound_speed", "sound_absorption") CAL_PARAMS = { @@ -6,7 +8,7 @@ } -class CalibrateBase: +class CalibrateBase(abc.ABC): """Class to handle calibration for all sonar models.""" def __init__(self, echodata): @@ -17,12 +19,15 @@ def __init__(self, echodata): # range_meter is computed in compute_Sv/Sp in child class self.range_meter = None + @abc.abstractmethod def get_env_params(self, **kwargs): pass + @abc.abstractmethod def get_cal_params(self, **kwargs): pass + @abc.abstractmethod def compute_range_meter(self, **kwargs): """Calculate range in units meter. @@ -33,6 +38,7 @@ def compute_range_meter(self, **kwargs): """ pass + @abc.abstractmethod def _cal_power(self, cal_type, **kwargs): """Calibrate power data for EK60, EK80, and AZFP. @@ -44,9 +50,11 @@ def _cal_power(self, cal_type, **kwargs): """ pass + @abc.abstractmethod def compute_Sv(self, **kwargs): pass + @abc.abstractmethod def compute_Sp(self, **kwargs): pass diff --git a/echopype/calibrate/calibrate_ek.py b/echopype/calibrate/calibrate_ek.py index 8a758d25d..7f300c3bf 100644 --- a/echopype/calibrate/calibrate_ek.py +++ b/echopype/calibrate/calibrate_ek.py @@ -14,7 +14,7 @@ def __init__(self, echodata): self.env_params = dict.fromkeys(ENV_PARAMS) self.cal_params = dict.fromkeys(CAL_PARAMS["EK"]) - def compute_range_meter(self, waveform_mode, tvg_correction_factor): + def compute_range_meter(self, waveform_mode, encode_mode="complex"): """ Parameters ---------- @@ -30,42 +30,11 @@ def compute_range_meter(self, waveform_mode, tvg_correction_factor): range_meter : xr.DataArray range in units meter """ - if waveform_mode == "CW": - sample_thickness = ( - self.echodata.beam["sample_interval"] - * self.env_params["sound_speed"] - / 2 - ) - # TODO: Check with the AFSC about the half sample difference - range_meter = ( - self.echodata.beam.range_bin - tvg_correction_factor - ) * sample_thickness # [frequency x range_bin] - elif waveform_mode == "BB": - # TODO: bug: right now only first ping_time has non-nan range - shift = self.echodata.beam[ - "transmit_duration_nominal" - ] # based on Lar Anderson's Matlab code - # TODO: once we allow putting in arbitrary sound_speed, - # change below to use linearly-interpolated values - range_meter = ( - ( - self.echodata.beam.range_bin * self.echodata.beam["sample_interval"] - - shift - ) - * self.env_params["sound_speed"].squeeze() - / 2 - ) - # TODO: Lar Anderson's code include a slicing by minRange with a default of 0.02 m, - # need to ask why and see if necessary here - else: - raise ValueError("Input waveform_mode not recognized!") - - # make order of dims conform with the order of backscatter data - range_meter = range_meter.transpose("frequency", "ping_time", "range_bin") - range_meter = range_meter.where(range_meter > 0, 0) # set negative ranges to 0 - range_meter.name = "range" # add name to facilitate xr.merge - - self.range_meter = range_meter + self.range_meter = self.echodata.compute_range( + self.env_params, + ek_waveform_mode=waveform_mode, + ek_encode_mode=encode_mode, + ) def _get_vend_cal_params_power(self, param): """Get cal parameters stored in the Vendor group. @@ -79,7 +48,7 @@ def _get_vend_cal_params_power(self, param): # currently this has only been tested with EK60 data ds_vend = self.echodata.vendor - if param not in ds_vend: + if ds_vend is None or param not in ds_vend: return None if param not in ["sa_correction", "gain_correction"]: @@ -101,36 +70,65 @@ def _get_vend_cal_params_power(self, param): dim="ping_time", how="any", subset=["transmit_duration_nominal"] ) + if self.echodata.beam_power is not None: + beam = self.echodata.beam_power + else: + beam = self.echodata.beam + + # indexes of frequencies that are for power, not complex + relevant_indexes = np.where(np.isin(ds_vend["frequency"], beam["frequency"]))[0] + + unique_pulse_length = np.unique(beam["transmit_duration_nominal"], axis=1) + + pulse_length = ds_vend["pulse_length"][relevant_indexes] + # Find index with correct pulse length - unique_pulse_length = np.unique( - self.echodata.beam["transmit_duration_nominal"], axis=1 - ) - idx_wanted = np.abs(ds_vend["pulse_length"] - unique_pulse_length).argmin( + idx_wanted = np.abs(pulse_length - unique_pulse_length).argmin( dim="pulse_length_bin" ) - return ds_vend[param].sel(pulse_length_bin=idx_wanted).drop("pulse_length_bin") + return ( + ds_vend[param] + .isel(pulse_length_bin=idx_wanted, frequency=relevant_indexes) + .drop("pulse_length_bin") + ) - def get_cal_params(self, cal_params): + def get_cal_params(self, cal_params, waveform_mode=None, encode_mode="complex"): """Get cal params using user inputs or values from data file. Parameters ---------- cal_params : dict """ + + if ( + encode_mode == "power" + and waveform_mode == "CW" + and self.echodata.beam_power is not None + ): + beam = self.echodata.beam_power + else: + beam = self.echodata.beam + # Params from the Vendor group - params_from_vend = ["sa_correction", "gain_correction"] - for p in params_from_vend: - # substitute if None in user input - self.cal_params[p] = ( - cal_params[p] if p in cal_params else self._get_vend_cal_params_power(p) - ) + # only execute this if cw and power + if waveform_mode == "CW" and ( + self.echodata.beam_power is not None or "quadrant" not in self.echodata.beam + ): + params_from_vend = ["sa_correction", "gain_correction"] + for p in params_from_vend: + # substitute if None in user input + self.cal_params[p] = ( + cal_params[p] + if p in cal_params + else self._get_vend_cal_params_power(p) + ) # Other params self.cal_params["equivalent_beam_angle"] = ( cal_params["equivalent_beam_angle"] if "equivalent_beam_angle" in cal_params - else self.echodata.beam["equivalent_beam_angle"] + else beam["equivalent_beam_angle"] ) def _cal_power(self, cal_type, use_beam_power=False): @@ -221,10 +219,10 @@ def __init__(self, echodata, env_params, cal_params, **kwargs): self.get_env_params(env_params) if cal_params is None: cal_params = {} - self.get_cal_params(cal_params) + self.get_cal_params(cal_params, waveform_mode="CW", encode_mode="power") # default to CW mode recorded as power samples - self.compute_range_meter(waveform_mode="CW", tvg_correction_factor=2) + self.compute_range_meter(waveform_mode="CW", encode_mode="power") def get_env_params(self, env_params, **kwargs): """Get env params using user inputs or values from data file. @@ -281,7 +279,7 @@ class CalibrateEK80(CalibrateEK): z_et = 75 z_er = 1000 - def __init__(self, echodata, env_params, cal_params, waveform_mode): + def __init__(self, echodata, env_params, cal_params, waveform_mode, encode_mode): super().__init__(echodata) # initialize env and cal params @@ -294,15 +292,19 @@ def __init__(self, echodata, env_params, cal_params, waveform_mode): # load env and cal parameters if env_params is None: env_params = {} - self.get_env_params(env_params, waveform_mode=waveform_mode) + self.get_env_params( + env_params, waveform_mode=waveform_mode, encode_mode=encode_mode + ) if cal_params is None: cal_params = {} - self.get_cal_params(cal_params) + self.get_cal_params( + cal_params, waveform_mode=waveform_mode, encode_mode=encode_mode + ) # self.range_meter computed under self._compute_cal() # because the implementation is different depending on waveform_mode and encode_mode - def get_env_params(self, env_params, waveform_mode=None): + def get_env_params(self, env_params, waveform_mode=None, encode_mode="complex"): """Get env params using user inputs or values from data file. EK80 file by default contains sound speed, temperature, depth, salinity, and acidity, @@ -316,15 +318,26 @@ def get_env_params(self, env_params, waveform_mode=None): waveform_mode : str ``CW`` for CW-mode samples, either recorded as complex or power samples ``BB`` for BB-mode samples, recorded as complex samples + encode_mode : str + EK80 data can be encoded as complex samples or power samples. + Use ``complex`` to compute Sv from only complex samples, + and ``power`` to compute Sv from only power samples. """ + + if ( + encode_mode == "power" + and waveform_mode == "CW" + and self.echodata.beam_power is not None + ): + beam = self.echodata.beam_power + else: + beam = self.echodata.beam + # Use center frequency if in BB mode, else use nominal channel frequency if waveform_mode == "BB": - freq = ( - self.echodata.beam["frequency_start"] - + self.echodata.beam["frequency_end"] - ) / 2 + freq = (beam["frequency_start"] + beam["frequency_end"]) / 2 else: - freq = self.echodata.beam["frequency"] + freq = beam["frequency"] # Re-calculate environment parameters if user supply all env variables if ( @@ -762,6 +775,9 @@ def _compute_cal(self, cal_type, waveform_mode, encode_mode): # Warn user about additional data in the raw file if another type exists if self.echodata.beam_power is not None: # both power and complex samples exist + if encode_mode == "complex" and waveform_mode == "CW": + raise ValueError("file does not contain CW complex samples") + if encode_mode == "power": use_beam_power = True # switch source of backscatter data print( @@ -788,11 +804,11 @@ def _compute_cal(self, cal_type, waveform_mode, encode_mode): # Compute Sv if flag_complex: self.compute_range_meter( - waveform_mode=waveform_mode, tvg_correction_factor=0 + waveform_mode=waveform_mode, encode_mode=encode_mode ) ds_cal = self._cal_complex(cal_type=cal_type, waveform_mode=waveform_mode) else: - self.compute_range_meter(waveform_mode="CW", tvg_correction_factor=0) + self.compute_range_meter(waveform_mode="CW", encode_mode=encode_mode) ds_cal = self._cal_power(cal_type=cal_type, use_beam_power=use_beam_power) return ds_cal diff --git a/echopype/convert/__init__.py b/echopype/convert/__init__.py index 396f2cd01..ce788de98 100644 --- a/echopype/convert/__init__.py +++ b/echopype/convert/__init__.py @@ -1,5 +1,5 @@ """ -Include code to unpack manufacturer-specific data files into an interoperable netCDF format. +Unpack manufacturer-specific data files into an interoperable netCDF or Zarr format. The current version supports: diff --git a/echopype/convert/api.py b/echopype/convert/api.py index 817301be2..1f96b3ada 100644 --- a/echopype/convert/api.py +++ b/echopype/convert/api.py @@ -1,55 +1,20 @@ -import os import warnings from datetime import datetime as dt from pathlib import Path +from typing import TYPE_CHECKING, Dict, Optional, Tuple import fsspec import zarr -from fsspec.implementations.local import LocalFileSystem +# fmt: off +# black and isort have conflicting ideas about how this should be formatted +from ..core import SONAR_MODELS + +if TYPE_CHECKING: + from ..core import EngineHint, PathHint, SonarModelsHint +# fmt: on from ..echodata.echodata import XARRAY_ENGINE_MAP, EchoData from ..utils import io -from .parse_ad2cp import ParseAd2cp -from .parse_azfp import ParseAZFP -from .parse_ek60 import ParseEK60 -from .parse_ek80 import ParseEK80 -from .set_groups_ad2cp import SetGroupsAd2cp -from .set_groups_azfp import SetGroupsAZFP -from .set_groups_ek60 import SetGroupsEK60 -from .set_groups_ek80 import SetGroupsEK80 - -MODELS = { - "AZFP": { - "ext": ".01A", - "xml": True, - "parser": ParseAZFP, - "set_groups": SetGroupsAZFP, - }, - "EK60": { - "ext": ".raw", - "xml": False, - "parser": ParseEK60, - "set_groups": SetGroupsEK60, - }, - "EK80": { - "ext": ".raw", - "xml": False, - "parser": ParseEK80, - "set_groups": SetGroupsEK80, - }, - "EA640": { - "ext": ".raw", - "xml": False, - "parser": ParseEK80, - "set_groups": SetGroupsEK80, - }, - "AD2CP": { - "ext": ".ad2cp", - "xml": False, - "parser": ParseAd2cp, - "set_groups": SetGroupsAd2cp, - }, -} COMPRESSION_SETTINGS = { "netcdf4": {"zlib": True, "complevel": 4}, @@ -61,84 +26,14 @@ NMEA_SENTENCE_DEFAULT = ["GGA", "GLL", "RMC"] -def _normalize_path(out_f, convert_type, output_storage_options): - if convert_type == "zarr": - return fsspec.get_mapper(out_f, **output_storage_options) - elif convert_type == "netcdf4": - return out_f - - -def _validate_path(source_file, file_format, output_storage_options={}, save_path=None): - """Assemble output file names and path. - - Parameters - ---------- - file_format : str {'.nc', '.zarr'} - save_path : str - Either a directory or a file. If none then the save path is 'temp_echopype_output/' - in the current working directory. - """ - if save_path is None: - warnings.warn("save_path is not provided") - - current_dir = Path.cwd() - # Check permission, raise exception if no permission - io.check_file_permissions(current_dir) - out_dir = current_dir.joinpath(Path("temp_echopype_output")) - if not out_dir.exists(): - out_dir.mkdir(parents=True) - - warnings.warn( - f"Resulting converted file(s) will be available at {str(out_dir)}" - ) - out_path = str(out_dir / (Path(source_file).stem + file_format)) - - else: - if not isinstance(save_path, Path) and not isinstance(save_path, str): - raise TypeError("save_path must be a string or Path") - - fsmap = fsspec.get_mapper(str(save_path), **output_storage_options) - output_fs = fsmap.fs - - # Use the full path such as s3://... if it's not local, otherwise use root - if isinstance(output_fs, LocalFileSystem): - root = fsmap.root - else: - root = save_path - if Path(root).suffix == "": # directory - out_dir = root - out_path = os.path.join(root, Path(source_file).stem + file_format) - else: # file - out_dir = os.path.dirname(root) - out_path = os.path.join(out_dir, Path(root).stem + file_format) - - # Create folder if save_path does not exist already - fsmap = fsspec.get_mapper(str(out_dir), **output_storage_options) - fs = fsmap.fs - if file_format == ".nc" and not isinstance(fs, LocalFileSystem): - raise ValueError("Only local filesystem allowed for NetCDF output.") - else: - try: - # Check permission, raise exception if no permission - io.check_file_permissions(fsmap) - if isinstance(fs, LocalFileSystem): - # Only make directory if local file system - # otherwise it will just create the object path - fs.mkdir(fsmap.root) - except FileNotFoundError: - raise ValueError("Specified save_path is not valid.") - - return out_path # output_path is always a string - - def to_file( echodata: EchoData, - engine, - save_path=None, - compress=True, - overwrite=False, - parallel=False, - output_storage_options={}, + engine: "EngineHint", + save_path: Optional["PathHint"] = None, + compress: bool = True, + overwrite: bool = False, + parallel: bool = False, + output_storage_options: Dict[str, str] = {}, **kwargs, ): """Save content of EchoData to netCDF or zarr. @@ -166,10 +61,9 @@ def to_file( raise ValueError("Unknown type to convert file to!") # Assemble output file names and path - format_mapping = dict(map(reversed, XARRAY_ENGINE_MAP.items())) - output_file = _validate_path( + output_file = io.validate_output_path( source_file=echodata.source_file, - file_format=format_mapping[engine], + engine=engine, save_path=save_path, output_storage_options=output_storage_options, ) @@ -191,7 +85,9 @@ def to_file( print(f"{dt.now().strftime('%H:%M:%S')} saving {output_file}") _save_groups_to_file( echodata, - output_path=_normalize_path(output_file, engine, output_storage_options), + output_path=io.sanitize_file_path( + file_path=output_file, storage_options=output_storage_options + ), engine=engine, compress=compress, ) @@ -323,7 +219,7 @@ def _save_groups_to_file(echodata, output_path, engine, compress=True): ) -def _set_convert_params(param_dict): +def _set_convert_params(param_dict: Dict[str, str]) -> Dict[str, str]: """Set parameters (metadata) that may not exist in the raw files. The default set of parameters include: @@ -366,7 +262,12 @@ def _set_convert_params(param_dict): return out_params -def _check_file(raw_file, sonar_model, xml_path=None, storage_options={}): +def _check_file( + raw_file, + sonar_model: "SonarModelsHint", + xml_path: Optional["PathHint"] = None, + storage_options: Dict[str, str] = {}, +) -> Tuple[str, str]: """Checks whether the file and/or xml file exists and whether they have the correct extensions. @@ -389,7 +290,7 @@ def _check_file(raw_file, sonar_model, xml_path=None, storage_options={}): path to existing xml file empty string if no xml file is required for the specified model """ - if MODELS[sonar_model]["xml"]: # if this sonar model expects an XML file + if SONAR_MODELS[sonar_model]["xml"]: # if this sonar model expects an XML file if not xml_path: raise ValueError(f"XML file is required for {sonar_model} raw data") else: @@ -407,23 +308,22 @@ def _check_file(raw_file, sonar_model, xml_path=None, storage_options={}): # TODO: https://github.com/OSOceanAcoustics/echopype/issues/229 # to add compatibility for pathlib.Path objects for local paths fsmap = fsspec.get_mapper(raw_file, **storage_options) - ext = MODELS[sonar_model]["ext"] + validate_ext = SONAR_MODELS[sonar_model]["validate_ext"] if not fsmap.fs.exists(fsmap.root): raise FileNotFoundError(f"There is no file named {Path(raw_file).name}") - if Path(raw_file).suffix.upper() != ext.upper(): - raise ValueError(f"Expecting a {ext} file but got {raw_file}") + validate_ext(Path(raw_file).suffix.upper()) return str(raw_file), str(xml) def open_raw( - raw_file=None, - sonar_model=None, - xml_path=None, - convert_params=None, - storage_options=None, -): + raw_file: Optional["PathHint"] = None, + sonar_model: Optional["SonarModelsHint"] = None, + xml_path: Optional["PathHint"] = None, + convert_params: Optional[Dict[str, str]] = None, + storage_options: Optional[Dict[str, str]] = None, +) -> Optional[EchoData]: """Create an EchoData object containing parsed data from a single raw data file. The EchoData object can be used for adding metadata and ancillary data @@ -442,6 +342,10 @@ def open_raw( and need to be added to the converted file storage_options : dict options for cloud storage + + Returns + ------- + EchoData object """ if (sonar_model is None) and (raw_file is None): print("Please specify the path to the raw data file and the sonar model.") @@ -474,12 +378,12 @@ def open_raw( ) else: # Uppercased model in case people use lowercase - sonar_model = sonar_model.upper() + sonar_model = sonar_model.upper() # type: ignore # Check models - if sonar_model not in MODELS: + if sonar_model not in SONAR_MODELS: raise ValueError( - f"Unsupported echosounder model: {sonar_model}\nMust be one of: {list(MODELS)}" + f"Unsupported echosounder model: {sonar_model}\nMust be one of: {list(SONAR_MODELS)}" # noqa ) # Check paths and file types @@ -492,22 +396,24 @@ def open_raw( if not isinstance(raw_file, str): raise TypeError("file must be a string or Path") + assert sonar_model is not None + # Check file extension and existence file_chk, xml_chk = _check_file(raw_file, sonar_model, xml_path, storage_options) # TODO: the if-else below only works for the AZFP vs EK contrast, # but is brittle since it is abusing params by using it implicitly - if MODELS[sonar_model]["xml"]: + if SONAR_MODELS[sonar_model]["xml"]: params = xml_chk else: params = "ALL" # reserved to control if only wants to parse a certain type of datagram # Parse raw file and organize data into groups - parser = MODELS[sonar_model]["parser"]( + parser = SONAR_MODELS[sonar_model]["parser"]( file_chk, params=params, storage_options=storage_options ) parser.parse_raw() - setgrouper = MODELS[sonar_model]["set_groups"]( + setgrouper = SONAR_MODELS[sonar_model]["set_groups"]( parser, input_file=file_chk, output_path=None, diff --git a/echopype/convert/parse_ad2cp.py b/echopype/convert/parse_ad2cp.py index 68f62a58f..afdbbc2a7 100644 --- a/echopype/convert/parse_ad2cp.py +++ b/echopype/convert/parse_ad2cp.py @@ -437,16 +437,10 @@ def _read_data_record(self, f: BinaryIO): raise ValueError("invalid burst/average data record version") elif self.data_exclude["id"] in (0x17, 0x1B): # bottom track self.data_record_type = DataRecordType.BOTTOM_TRACK - elif self.data_exclude["id"] in (0x23, 0x24): # echosounder raw - if ( - self.parser.get_pulse_compressed() > 0 - and len(self.parser.echosounder_raw_transmit_packets) == 0 - ): - # first echosounder raw packet is the transmit packet - # if pulse compression is enabled - self.data_record_type = DataRecordType.ECHOSOUNDER_RAW_TRANSMIT - else: - self.data_record_type = DataRecordType.ECHOSOUNDER_RAW + elif self.data_exclude["id"] == 0x23: # echosounder raw + self.data_record_type = DataRecordType.ECHOSOUNDER_RAW + elif self.data_exclude["id"] == 0x24: # echosounder raw transmit + self.data_record_type = DataRecordType.ECHOSOUNDER_RAW_TRANSMIT elif self.data_exclude["id"] == 0x1A: # burst altimeter # altimeter is only supported by burst/average version 3 self.data_record_type = DataRecordType.BURST_VERSION3 diff --git a/echopype/convert/utils/ek_raw_io.py b/echopype/convert/utils/ek_raw_io.py index 8b6431a46..d835544a3 100644 --- a/echopype/convert/utils/ek_raw_io.py +++ b/echopype/convert/utils/ek_raw_io.py @@ -100,7 +100,7 @@ def __init__( # create a raw file object for the buffered reader fmap = fsspec.get_mapper(name, **storage_options) - if isinstance(fmap, LocalFileSystem): + if isinstance(fmap.fs, LocalFileSystem): fio = FileIO(name, mode=mode, closefd=closefd) else: fio = fmap.fs.open(fmap.root) diff --git a/echopype/core.py b/echopype/core.py new file mode 100644 index 000000000..b4b55042b --- /dev/null +++ b/echopype/core.py @@ -0,0 +1,118 @@ +import os +import re +from typing import TYPE_CHECKING, Any, Callable, Dict, Union + +from fsspec.mapping import FSMap +from typing_extensions import Literal + +from .convert.parse_ad2cp import ParseAd2cp +from .convert.parse_azfp import ParseAZFP +from .convert.parse_ek60 import ParseEK60 +from .convert.parse_ek80 import ParseEK80 +from .convert.set_groups_ad2cp import SetGroupsAd2cp +from .convert.set_groups_azfp import SetGroupsAZFP +from .convert.set_groups_ek60 import SetGroupsEK60 +from .convert.set_groups_ek80 import SetGroupsEK80 + +if TYPE_CHECKING: + # Please keep SonarModelsHint updated with the keys of the SONAR_MODELS dict + SonarModelsHint = Literal["AZFP", "EK60", "EK80", "EA640", "AD2CP"] + PathHint = Union[str, os.PathLike, FSMap] + FileFormatHint = Literal[".nc", ".zarr"] + EngineHint = Literal["netcdf4", "zarr"] + + +def validate_azfp_ext(test_ext: str): + if not re.fullmatch(r"\.\d{2}[a-zA-Z]", test_ext): + raise ValueError( + 'Expecting a file in the form ".XXY" ' + f"where XX is a number and Y is a letter but got {test_ext}" + ) + + +def validate_ext(ext: str) -> Callable[[str], None]: + def inner(test_ext: str): + if ext.casefold() != test_ext.casefold(): + raise ValueError(f"Expecting a {ext} file but got {test_ext}") + + return inner + + +SONAR_MODELS: Dict["SonarModelsHint", Dict[str, Any]] = { + "AZFP": { + "validate_ext": validate_azfp_ext, + "xml": True, + "parser": ParseAZFP, + "set_groups": SetGroupsAZFP, + "concat_dims": { + "platform": None, + "nmea": "location_time", + "vendor": ["ping_time", "frequency"], + "default": "ping_time", + }, + "concat_data_vars": { + "platform": "all", + "default": "minimal", + }, + }, + "EK60": { + "validate_ext": validate_ext(".raw"), + "xml": False, + "parser": ParseEK60, + "set_groups": SetGroupsEK60, + "concat_dims": { + "platform": ["location_time", "ping_time"], + "nmea": "location_time", + "vendor": None, + "default": "ping_time", + }, + "concat_data_vars": { + "default": "minimal", + }, + }, + "EK80": { + "validate_ext": validate_ext(".raw"), + "xml": False, + "parser": ParseEK80, + "set_groups": SetGroupsEK80, + "concat_dims": { + "platform": ["location_time", "mru_time"], + "nmea": "location_time", + "vendor": None, + "default": "ping_time", + }, + "concat_data_vars": { + "default": "minimal", + }, + }, + "EA640": { + "validate_ext": validate_ext(".raw"), + "xml": False, + "parser": ParseEK80, + "set_groups": SetGroupsEK80, + "concat_dims": { + "platform": ["location_time", "mru_time"], + "nmea": "location_time", + "vendor": None, + "default": "ping_time", + }, + "concat_data_vars": { + "default": "minimal", + }, + }, + "AD2CP": { + "validate_ext": validate_ext(".ad2cp"), + "xml": False, + "parser": ParseAd2cp, + "set_groups": SetGroupsAd2cp, + "concat_dims": { + "platform": "ping_time", + "nmea": "location_time", + "vendor": None, + "default": "ping_time", + }, + "concat_data_vars": { + "default": "minimal", + }, + }, +} diff --git a/echopype/echodata/api.py b/echopype/echodata/api.py index f8bc3a5ab..8f0ac1f05 100644 --- a/echopype/echodata/api.py +++ b/echopype/echodata/api.py @@ -1,8 +1,27 @@ +from typing import TYPE_CHECKING, Dict + +if TYPE_CHECKING: + from ..core import PathHint + from .echodata import EchoData -def open_converted(converted_raw_path, storage_options=None): - """Create an EchoData object from a single converted zarr/nc files.""" +def open_converted( + converted_raw_path: "PathHint", storage_options: Dict[str, str] = None +): + """Create an EchoData object from a single converted netcdf or zarr file. + + Parameters + ---------- + converted_raw_path : str + path to converted data file + storage_options : dict + options for cloud storage + + Returns + ------- + EchoData object + """ # TODO: combine multiple files when opening return EchoData( converted_raw_path=converted_raw_path, storage_options=storage_options diff --git a/echopype/echodata/combine.py b/echopype/echodata/combine.py new file mode 100644 index 000000000..09d49121d --- /dev/null +++ b/echopype/echodata/combine.py @@ -0,0 +1,294 @@ +import warnings +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List + +import xarray as xr +from _echopype_version import version as ECHOPYPE_VERSION + +from ..core import SONAR_MODELS +from ..qc import coerce_increasing_time, exist_reversed_time +from .echodata import EchoData + + +def union_attrs(datasets: List[xr.Dataset]) -> Dict[str, Any]: + """ + Merges attrs from a list of datasets. + Prioritizes keys from later datsets. + """ + + total_attrs = dict() + for ds in datasets: + total_attrs.update(ds.attrs) + return total_attrs + + +def assemble_combined_provenance(input_paths): + return xr.Dataset( + data_vars={ + "src_filenames": ("file", input_paths), + }, + attrs={ + "conversion_software_name": "echopype", + "conversion_software_version": ECHOPYPE_VERSION, + "conversion_time": datetime.utcnow().isoformat(timespec="seconds") + + "Z", # use UTC time + }, + ) + + +def combine_echodata(echodatas: List[EchoData], combine_attrs="override") -> EchoData: + """ + Combines multiple `EchoData` objects into a single `EchoData` object. + + Parameters + ---------- + echodatas: List[EchoData] + The list of `EchoData` objects to be combined. + combine_attrs: { "override", "drop", "identical", "no_conflicts", "overwrite_conflicts" } + String indicating how to combine attrs of the `EchoData` objects being merged. + This parameter matches the identically named xarray parameter + (see https://xarray.pydata.org/en/latest/generated/xarray.combine_nested.html) + with the exception of the "overwrite_conflicts" value. + + * "override": Default. skip comparing and copy attrs from the first `EchoData` + object to the result. + * "drop": empty attrs on returned `EchoData` object. + * "identical": all attrs must be the same on every object. + * "no_conflicts": attrs from all objects are combined, + any that have the same name must also have the same value. + * "overwrite_conflicts": attrs from all `EchoData` objects are combined, + attrs with conflicting keys will be overwritten by later `EchoData` objects. + + Returns + ------- + EchoData + An `EchoData` object with all of the data from the input `EchoData` objects combined. + + Raises + ------ + ValueError + If `echodatas` contains `EchoData` objects with different or `None` `sonar_model` values + (i.e., all `EchoData` objects must have the same non-None `sonar_model` value). + ValueError + If EchoData objects have conflicting source file names. + + Warns + ----- + UserWarning + If the `sonar_model` of the input `EchoData` objects is `"EK60"` and any `EchoData` objects + have non-monotonically increasing `ping_time`, `location_time` or `mru_time` values, + the corresponding values in the output `EchoData` object will be increased starting at the + timestamp where the reversal occurs such that all values in the output are monotonically + increasing. Additionally, the original `ping_time`, `location_time` or `mru_time` values + will be stored in the `Provenance` group, although this behavior may change in future + versions. + + Warnings + -------- + Changes in parameters between `EchoData` objects are not currently checked; + however, they may raise an error in future versions. + + Notes + ----- + * `EchoData` objects are combined by combining their groups individually. + * Attributes from all groups before the combination will be stored in the provenance group, + although this behavior may change in future versions. + * The `source_file` and `converted_raw_path` attributes will be copied from the first + `EchoData` object in the given list, but this may change in future versions. + + Examples + -------- + >>> ed1 = echopype.open_converted("file1.nc") + >>> ed2 = echopype.open_converted("file2.zarr") + >>> combined = echopype.combine_echodata([ed1, ed2]) + """ + + result = EchoData() + if len(echodatas) == 0: + return result + result.source_file = echodatas[0].source_file + result.converted_raw_path = echodatas[0].converted_raw_path + + sonar_model = None + for echodata in echodatas: + if echodata.sonar_model is None: + raise ValueError( + "all EchoData objects must have non-None sonar_model values" + ) + elif sonar_model is None: + sonar_model = echodata.sonar_model + elif echodata.sonar_model != sonar_model: + raise ValueError( + "all EchoData objects must have the same sonar_model value" + ) + + # ping time before reversal correction + old_ping_time = None + # ping time after reversal correction + new_ping_time = None + # location time before reversal correction + old_location_time = None + # location time after reversal correction + new_location_time = None + # mru time before reversal correction + old_mru_time = None + # mru time after reversal correction + new_mru_time = None + + # all attributes before combination + # { group1: [echodata1 attrs, echodata2 attrs, ...], ... } + old_attrs: Dict[str, List[Dict[str, Any]]] = dict() + + for group in EchoData.group_map: + group_datasets = [ + getattr(echodata, group) + for echodata in echodatas + if getattr(echodata, group) is not None + ] + if group in ("top", "sonar"): + combined_group = getattr(echodatas[0], group) + elif group == "provenance": + combined_group = assemble_combined_provenance( + [ + echodata.source_file + if echodata.source_file is not None + else echodata.converted_raw_path + for echodata in echodatas + ] + ) + else: + if len(group_datasets) == 0: + setattr(result, group, None) + continue + + concat_dim = SONAR_MODELS[sonar_model]["concat_dims"].get( + group, SONAR_MODELS[sonar_model]["concat_dims"]["default"] + ) + concat_data_vars = SONAR_MODELS[sonar_model]["concat_data_vars"].get( + group, SONAR_MODELS[sonar_model]["concat_data_vars"]["default"] + ) + combined_group = xr.combine_nested( + group_datasets, + [concat_dim], + data_vars=concat_data_vars, + coords="minimal", + combine_attrs="drop" + if combine_attrs == "overwrite_conflicts" + else combine_attrs, + ) + if combine_attrs == "overwrite_conflicts": + combined_group.attrs.update(union_attrs(group_datasets)) + + if group == "beam": + if sonar_model == "EK80": + combined_group["transceiver_software_version"] = combined_group[ + "transceiver_software_version" + ].astype(" 1: + old_attrs[group] = [group_dataset.attrs for group_dataset in group_datasets] + if combined_group is not None: + # xarray inserts this dimension when concating along multiple dimensions + combined_group = combined_group.drop_dims("concat_dim", errors="ignore") + setattr(result, group, combined_group) + + # save ping time before reversal correction + if old_ping_time is not None: + result.provenance["old_ping_time"] = old_ping_time + # save location time before reversal correction + if old_location_time is not None: + result.provenance["old_location_time"] = old_location_time + # save mru time before reversal correction + if old_mru_time is not None: + result.provenance["old_mru_time"] = old_mru_time + # TODO: possible parameter to disable original attributes and original ping_time storage + # in provenance group? + # save attrs from before combination + for group in old_attrs: + all_group_attrs = set() + for group_attrs in old_attrs[group]: + for attr in group_attrs: + all_group_attrs.add(attr) + echodata_filenames = [] + for ed in echodatas: + if ed.source_file is not None: + filepath = ed.source_file + elif ed.converted_raw_path is not None: + filepath = ed.converted_raw_path + else: + # unreachable + raise ValueError("EchoData object does not have a file path") + filename = Path(filepath).name + if filename in echodata_filenames: + raise ValueError("EchoData objects have conflicting filenames") + echodata_filenames.append(filename) + attrs = xr.DataArray( + [ + [group_attrs.get(attr) for attr in all_group_attrs] + for group_attrs in old_attrs[group] + ], + coords={ + "echodata_filename": echodata_filenames, + f"{group}_attr_key": list(all_group_attrs), + }, + dims=["echodata_filename", f"{group}_attr_key"], + ) + result.provenance = result.provenance.assign({f"{group}_attrs": attrs}) + + return result diff --git a/echopype/echodata/echodata.py b/echopype/echodata/echodata.py index 5f4195f92..1ba866ed2 100644 --- a/echopype/echodata/echodata.py +++ b/echopype/echodata/echodata.py @@ -3,43 +3,58 @@ from collections import OrderedDict from html import escape from pathlib import Path +from typing import TYPE_CHECKING, Dict, Optional import fsspec +import numpy as np import xarray as xr from zarr.errors import GroupNotFoundError -from ..utils.io import check_file_existance, sanitize_file_path +if TYPE_CHECKING: + from ..core import EngineHint, FileFormatHint, PathHint, SonarModelsHint + +from ..utils.io import check_file_existence, sanitize_file_path from ..utils.repr import HtmlTemplate +from ..utils.uwa import calc_sound_speed from .convention import _get_convention -XARRAY_ENGINE_MAP = { +XARRAY_ENGINE_MAP: Dict["FileFormatHint", "EngineHint"] = { ".nc": "netcdf4", ".zarr": "zarr", } +TVG_CORRECTION_FACTOR = { + "EK60": 2, + "EK80": 0, +} + class EchoData: """Echo data model class for handling raw converted data, including multiple files associated with the same data set. """ + group_map = OrderedDict(_get_convention()["groups"]) + def __init__( self, - converted_raw_path=None, - storage_options=None, - source_file=None, - xml_path=None, - sonar_model=None, + converted_raw_path: Optional["PathHint"] = None, + storage_options: Dict[str, str] = None, + source_file: Optional["PathHint"] = None, + xml_path: "PathHint" = None, + sonar_model: "SonarModelsHint" = None, ): # TODO: consider if should open datasets in init # or within each function call when echodata is used. Need to benchmark. - self.storage_options = storage_options if storage_options is not None else {} - self.source_file = source_file - self.xml_path = xml_path - self.sonar_model = sonar_model - self.converted_raw_path = None + self.storage_options: Dict[str, str] = ( + storage_options if storage_options is not None else {} + ) + self.source_file: Optional["PathHint"] = source_file + self.xml_path: Optional["PathHint"] = xml_path + self.sonar_model: Optional["SonarModelsHint"] = sonar_model + self.converted_raw_path: Optional["PathHint"] = None self.__setup_groups() self.__read_converted(converted_raw_path) @@ -47,8 +62,8 @@ def __init__( def __repr__(self) -> str: """Make string representation of InferenceData object.""" existing_groups = [ - f"{group}: ({self.__group_map[group]['name']}) {self.__group_map[group]['description']}" # noqa - for group in self.__group_map.keys() + f"{group}: ({self.group_map[group]['name']}) {self.group_map[group]['description']}" # noqa + for group in self.group_map.keys() if isinstance(getattr(self, group), xr.Dataset) ] fpath = "Internal Memory" @@ -70,17 +85,15 @@ def _repr_html_(self) -> str: html_repr = f"
{escape(repr(self))}
" else: xr_collections = [] - for group in self.__group_map.keys(): + for group in self.group_map.keys(): if isinstance(getattr(self, group), xr.Dataset): xr_data = getattr(self, group)._repr_html_() xr_collections.append( HtmlTemplate.element_template.format( # noqa group_id=group + str(uuid.uuid4()), group=group, - group_name=self.__group_map[group]["name"], - group_description=self.__group_map[group][ - "description" - ], + group_name=self.group_map[group]["name"], + group_description=self.group_map[group]["description"], xr_data=xr_data, ) ) @@ -98,23 +111,211 @@ def _repr_html_(self) -> str: return html_repr def __setup_groups(self): - self.__group_map = OrderedDict(_get_convention()["groups"]) - for group in self.__group_map.keys(): + for group in self.group_map.keys(): setattr(self, group, None) - def __read_converted(self, converted_raw_path): + def __read_converted(self, converted_raw_path: Optional["PathHint"]): if converted_raw_path is not None: self._check_path(converted_raw_path) converted_raw_path = self._sanitize_path(converted_raw_path) self._load_file(converted_raw_path) - self.sonar_model = self.top.keywords + + if isinstance(converted_raw_path, fsspec.FSMap): + # Convert fsmap to Path so it can be used + # for retrieving the path strings + converted_raw_path = Path(converted_raw_path.root) self.converted_raw_path = converted_raw_path + def compute_range( + self, + env_params=None, + azfp_cal_type=None, + ek_waveform_mode=None, + ek_encode_mode="complex", + ): + """ + Computes the range of the data contained in this `EchoData` object, in meters. + + This method only applies to `sonar_model`s of `"AZFP"`, `"EK60"`, and `"EK80"`. + If the `sonar_model` is not `"AZFP"`, `"EK60"`, or `"EK80"`, an error is raised. + + Parameters + ---------- + env_params: dict + This dictionary should contain either: + - `"sound_speed"`: `float` + - `"temperature"`, `"salinity"`, and `"pressure"`: `float`s, + in which case the sound speed will be calculated. + If the `sonar_model` is `"EK60"` or `"EK80"`, and + `EchoData.environment.sound_speed_indicative` exists, then this parameter + does not need to be specified. + azfp_cal_type : {"Sv", "Sp"}, optional + - `"Sv"` for calculating volume backscattering strength + - `"Sp"` for calculating point backscattering strength. + This parameter is only used if `sonar_model` is `"AZFP"`, + and in that case it must be specified. + ek_waveform_mode : {"CW", "BB"}, optional + - `"CW"` for CW-mode samples, either recorded as complex or power samples + - `"BB"` for BB-mode samples, recorded as complex samples + This parameter is only used if `sonar_model` is `"EK60"` or `"EK80"`, + and in those cases it must be specified. + ek_encode_mode : {"complex", "power"}, optional + For EK80 data, range can be computed from complex or power samples. + The type of sample used can be specified with this parameter. + - `"complex"` to use complex samples + - `"power"` to use power samples + This parameter is only used if `sonar_model` is `"EK80"`. + + Returns + ------- + xr.DataArray + The range of the data in meters. + + Raises + ------ + ValueError + - When `sonar_model` is `"AZFP"` but `azfp_cal_type` is not specified or is `None`. + - When `sonar_model` is `"EK60"` or `"EK80"` but `ek_waveform_mode` + is not specified or is `None`. + - When `sonar_model` is `"EK60"` but `waveform_mode` is `"BB"` (EK60 cannot have + broadband samples). + - When `sonar_model` is `"AZFP"` and `env_params` does not contain + either `"sound_speed"` or all of `"temperature"`, `"salinity"`, and `"pressure"`. + - When `sonar_model` is `"EK60"` or `"EK80"`, + EchoData.environment.sound_speed_indicative does not exist, + and `env_params` does not contain either `"sound_speed"` or all of `"temperature"`, + `"salinity"`, and `"pressure"`. + - When `sonar_model` is not `"AZFP"`, `"EK60"`, or `"EK80"`. + """ + + def squeeze_non_scalar(n): + if not np.isscalar(n): + n = n.squeeze() + return n + + if "sound_speed" in env_params: + sound_speed = squeeze_non_scalar(env_params["sound_speed"]) + elif all( + [param in env_params for param in ("temperature", "salinity", "pressure")] + ): + sound_speed = calc_sound_speed( + squeeze_non_scalar(env_params["temperature"]), + squeeze_non_scalar(env_params["salinity"]), + squeeze_non_scalar(env_params["pressure"]), + formula_source="AZFP" if self.sonar_model == "AZFP" else "Mackenzie", + ) + elif ( + self.sonar_model in ("EK60", "EK80") + and "sound_speed_indicative" in self.environment + ): + sound_speed = squeeze_non_scalar(self.environment["sound_speed_indicative"]) + else: + raise ValueError( + "sound speed must be specified in env_params, " + "with temperature/salinity/pressure in env_params to be calculated, " + "or in EchoData.environment.sound_speed_indicative for EK60 and EK80 sonar models" + ) + + if self.sonar_model == "AZFP": + cal_type = azfp_cal_type + if cal_type is None: + raise ValueError( + "azfp_cal_type must be specified when sonar_model is AZFP" + ) + + # Notation below follows p.86 of user manual + N = self.vendor["number_of_samples_per_average_bin"] # samples per bin + f = self.vendor["digitization_rate"] # digitization rate + L = self.vendor["lockout_index"] # number of lockout samples + + # keep this in ref of AZFP matlab code, + # set to 1 since we want to calculate from raw data + bins_to_avg = 1 + + # Calculate range using parameters for each freq + # This is "the range to the centre of the sampling volume + # for bin m" from p.86 of user manual + if cal_type == "Sv": + range_offset = 0 + else: + range_offset = ( + sound_speed * self.beam["transmit_duration_nominal"] / 4 + ) # from matlab code + range_meter = ( + sound_speed * L / (2 * f) + + (sound_speed / 4) + * ( + ((2 * (self.beam.range_bin + 1) - 1) * N * bins_to_avg - 1) / f + + self.beam["transmit_duration_nominal"] + ) + - range_offset + ) + range_meter.name = "range" # add name to facilitate xr.merge + + return range_meter + elif self.sonar_model in ("EK60", "EK80"): + waveform_mode = ek_waveform_mode + encode_mode = ek_encode_mode + + if self.sonar_model == "EK60" and waveform_mode == "BB": + raise ValueError("EK60 cannot have BB samples") + + if waveform_mode is None: + raise ValueError( + "ek_waveform_mode must be specified when sonar_model is EK60 or EK80" + ) + tvg_correction_factor = TVG_CORRECTION_FACTOR[self.sonar_model] + + if waveform_mode == "CW": + if ( + self.sonar_model == "EK80" + and encode_mode == "power" + and self.beam_power is not None + ): + beam = self.beam_power + else: + beam = self.beam + + sample_thickness = beam["sample_interval"] * sound_speed / 2 + # TODO: Check with the AFSC about the half sample difference + range_meter = ( + beam.range_bin - tvg_correction_factor + ) * sample_thickness # [frequency x range_bin] + elif waveform_mode == "BB": + # TODO: bug: right now only first ping_time has non-nan range + shift = self.beam[ + "transmit_duration_nominal" + ] # based on Lar Anderson's Matlab code + # TODO: once we allow putting in arbitrary sound_speed, + # change below to use linearly-interpolated values + range_meter = ( + (self.beam.range_bin * self.beam["sample_interval"] - shift) + * sound_speed + / 2 + ) + # TODO: Lar Anderson's code include a slicing by minRange with a default of 0.02 m, + # need to ask why and see if necessary here + else: + raise ValueError("Input waveform_mode not recognized!") + + # make order of dims conform with the order of backscatter data + range_meter = range_meter.transpose("frequency", "ping_time", "range_bin") + range_meter = range_meter.where( + range_meter > 0, 0 + ) # set negative ranges to 0 + range_meter.name = "range" # add name to facilitate xr.merge + + return range_meter + else: + raise ValueError( + "this method only supports sonar_model values of AZFP, EK60, and EK80" + ) + @classmethod def _load_convert(cls, convert_obj): new_cls = cls() - for group in new_cls.__group_map.keys(): + for group in new_cls.group_map.keys(): if hasattr(convert_obj, group): setattr(new_cls, group, getattr(convert_obj, group)) @@ -122,9 +323,9 @@ def _load_convert(cls, convert_obj): setattr(new_cls, "source_file", getattr(convert_obj, "source_file")) return new_cls - def _load_file(self, raw_path): + def _load_file(self, raw_path: "PathHint"): """Lazy load Top-level, Beam, Environment, and Vendor groups from raw file.""" - for group, value in self.__group_map.items(): + for group, value in self.group_map.items(): # EK80 data may have a Beam_power group if both complex and power data exist. ds = None try: @@ -135,23 +336,23 @@ def _load_file(self, raw_path): except (OSError, GroupNotFoundError): # Skips group not found errors for EK80 and ADCP ... - if group == "top": - self.sonar_model = ds.keywords.upper() + if group == "top" and hasattr(ds, "keywords"): + self.sonar_model = ds.keywords.upper() # type: ignore if isinstance(ds, xr.Dataset): setattr(self, group, ds) - def _check_path(self, filepath): + def _check_path(self, filepath: "PathHint"): """Check if converted_raw_path exists""" - file_exists = check_file_existance(filepath, self.storage_options) + file_exists = check_file_existence(filepath, self.storage_options) if not file_exists: raise FileNotFoundError(f"There is no file named {filepath}") - def _sanitize_path(self, filepath): + def _sanitize_path(self, filepath: "PathHint") -> "PathHint": filepath = sanitize_file_path(filepath, self.storage_options) return filepath - def _check_suffix(self, filepath): + def _check_suffix(self, filepath: "PathHint") -> "FileFormatHint": """Check if file type is supported.""" # TODO: handle multiple files through the same set of checks for combining files if isinstance(filepath, fsspec.FSMap): @@ -162,14 +363,14 @@ def _check_suffix(self, filepath): if suffix not in XARRAY_ENGINE_MAP: raise ValueError("Input file type not supported!") - return suffix + return suffix # type: ignore - def _load_group(self, filepath, group=None): + def _load_group(self, filepath: "PathHint", group: Optional[str] = None): """Loads each echodata group""" suffix = self._check_suffix(filepath) return xr.open_dataset(filepath, group=group, engine=XARRAY_ENGINE_MAP[suffix]) - def to_netcdf(self, save_path=None, **kwargs): + def to_netcdf(self, save_path: Optional["PathHint"] = None, **kwargs): """Save content of EchoData to netCDF. Parameters @@ -191,7 +392,7 @@ def to_netcdf(self, save_path=None, **kwargs): return to_file(self, "netcdf4", save_path=save_path, **kwargs) - def to_zarr(self, save_path=None, **kwargs): + def to_zarr(self, save_path: Optional["PathHint"] = None, **kwargs): """Save content of EchoData to zarr. Parameters @@ -215,7 +416,7 @@ def to_zarr(self, save_path=None, **kwargs): # TODO: Remove below in future versions. They are for supporting old API calls. @property - def nc_path(self): + def nc_path(self) -> Optional["PathHint"]: warnings.warn( "`nc_path` is deprecated, Use `converted_raw_path` instead.", DeprecationWarning, @@ -228,7 +429,7 @@ def nc_path(self): return str(path.parent / (path.stem + ".nc")) @property - def zarr_path(self): + def zarr_path(self) -> Optional["PathHint"]: warnings.warn( "`zarr_path` is deprecated, Use `converted_raw_path` instead.", DeprecationWarning, @@ -240,7 +441,13 @@ def zarr_path(self): path = Path(self.converted_raw_path) return str(path.parent / (path.stem + ".zarr")) - def raw2nc(self, save_path=None, combine_opt=False, overwrite=False, compress=True): + def raw2nc( + self, + save_path: "PathHint" = None, + combine_opt: bool = False, + overwrite: bool = False, + compress: bool = True, + ): warnings.warn( "`raw2nc` is deprecated, use `to_netcdf` instead.", DeprecationWarning, @@ -254,7 +461,11 @@ def raw2nc(self, save_path=None, combine_opt=False, overwrite=False, compress=Tr ) def raw2zarr( - self, save_path=None, combine_opt=False, overwrite=False, compress=True + self, + save_path: "PathHint" = None, + combine_opt: bool = False, + overwrite: bool = False, + compress: bool = True, ): warnings.warn( "`raw2zarr` is deprecated, use `to_zarr` instead.", diff --git a/echopype/preprocess/api.py b/echopype/preprocess/api.py index f890bd24b..48a7415da 100644 --- a/echopype/preprocess/api.py +++ b/echopype/preprocess/api.py @@ -42,7 +42,7 @@ def _freq_MVBS(ds, rint, pbin): sv = 10 ** (ds["Sv"] / 10) # average should be done in linear domain sv.coords["range_meter"] = ( ["range_bin"], - ds_Sv["range"].isel(frequency=0, ping_time=0), + ds_Sv["range"].isel(frequency=0, ping_time=0).data, ) sv = sv.swap_dims({"range_bin": "range_meter"}) sv_groupby_bins = ( diff --git a/echopype/process/process_deprecated.py b/echopype/process/process_deprecated.py index ff18fcd9f..afaea6083 100644 --- a/echopype/process/process_deprecated.py +++ b/echopype/process/process_deprecated.py @@ -49,6 +49,7 @@ def __init__(self, file_path="", salinity=27.9, pressure=59, temperature=None): env_params=self._env_params, cal_params=None, waveform_mode=self.waveform_mode, + encode_mode=self.encode_mode ) # Deprecated data attributes self.Sv = None diff --git a/echopype/qc/api.py b/echopype/qc/api.py index 6dfe7ff23..1fa4680f6 100644 --- a/echopype/qc/api.py +++ b/echopype/qc/api.py @@ -53,4 +53,17 @@ def coerce_increasing_time(ds, time_name="ping_time", local_win_len=100): def exist_reversed_time(ds, time_name): + """Test for occurrence of time reversal in specified datetime coordinate variable. + + Parameters + ---------- + ds : xr.Dataset + a dataset for which the time coordinate will be tested + time_name : str + name of the time coordinate to be tested + + Returns + ------- + `True` if at least one time reversal is found, `False` otherwise. + """ return (np.diff(ds[time_name]) < np.timedelta64(0, "ns")).any() diff --git a/echopype/tests/test_calibrate.py b/echopype/tests/calibrate/test_calibrate.py similarity index 85% rename from echopype/tests/test_calibrate.py rename to echopype/tests/calibrate/test_calibrate.py index 36578aa7d..fe3aa7145 100644 --- a/echopype/tests/test_calibrate.py +++ b/echopype/tests/calibrate/test_calibrate.py @@ -4,6 +4,7 @@ from scipy.io import loadmat import echopype as ep from echopype.calibrate.calibrate_ek import CalibrateEK80 +import xarray as xr azfp_path = Path('./echopype/test_data/azfp') ek60_path = Path('./echopype/test_data/ek60') @@ -18,7 +19,7 @@ def test_compute_Sv_ek60_echoview(): echodata = ep.open_raw(ek60_raw_path, sonar_model='EK60') # Calibrate to get Sv - ds_Sv = ep.calibrate.compute_Sv(echodata) + ds_Sv = ep.calibrate.compute_Sv(echodata, waveform_mode="CW", encode_mode="power") # Compare with EchoView outputs channels = [] @@ -29,7 +30,7 @@ def test_compute_Sv_ek60_echoview(): # Echoview data is shifted by 1 sample along range (missing the first sample) assert np.allclose(test_Sv[:, :, 7:], - ds_Sv.Sv.isel(ping_time=slice(None, 10), range_bin=slice(8, None)), atol=1e-8) + ds_Sv.Sv.isel(ping_time=slice(None, 10), range_bin=slice(8, None)), atol=1e-8) def test_compute_Sv_ek60_matlab(): @@ -40,8 +41,8 @@ def test_compute_Sv_ek60_matlab(): echodata = ep.open_raw(ek60_raw_path, sonar_model='EK60') # Calibrate to get Sv - ds_Sv = ep.calibrate.compute_Sv(echodata) - ds_Sp = ep.calibrate.compute_Sp(echodata) + ds_Sv = ep.calibrate.compute_Sv(echodata, waveform_mode="CW", encode_mode="power") + ds_Sp = ep.calibrate.compute_Sp(echodata, waveform_mode="CW", encode_mode="power") # Load matlab outputs and test @@ -133,8 +134,8 @@ def test_compute_Sv_ek80_pc_echoview(): # Create a CalibrateEK80 object to perform pulse compression waveform_mode = 'BB' - cal_obj = CalibrateEK80(echodata, env_params=None, cal_params=None, waveform_mode=waveform_mode) - cal_obj.compute_range_meter(waveform_mode=waveform_mode, tvg_correction_factor=0) # compute range [m] + cal_obj = CalibrateEK80(echodata, env_params=None, cal_params=None, waveform_mode=waveform_mode, encode_mode="complex") + cal_obj.compute_range_meter(waveform_mode=waveform_mode) # compute range [m] chirp, _, tau_effective = cal_obj.get_transmit_chirp(waveform_mode=waveform_mode) pc = cal_obj.compress_pulse(chirp) pc_mean = pc.pulse_compressed_output.isel(frequency=0).mean(dim='quadrant').dropna('range_bin') @@ -162,7 +163,8 @@ def test_compute_Sv_ek80_CW_complex(): """ ek80_raw_path = str(ek80_path.joinpath('ar2.0-D20201210-T000409.raw')) # CW complex echodata = ep.open_raw(ek80_raw_path, sonar_model='EK80') - assert ep.calibrate.compute_Sv(echodata, waveform_mode='CW', encode_mode='complex') + sv = ep.calibrate.compute_Sv(echodata, waveform_mode='CW', encode_mode='complex') + assert isinstance(sv, xr.Dataset) is True def test_compute_Sv_ek80_BB_complex(): @@ -170,4 +172,16 @@ def test_compute_Sv_ek80_BB_complex(): """ ek80_raw_path = str(ek80_path.joinpath('ar2.0-D20201209-T235955.raw')) # CW complex echodata = ep.open_raw(ek80_raw_path, sonar_model='EK80') - assert ep.calibrate.compute_Sv(echodata, waveform_mode='BB', encode_mode='complex') + sv = ep.calibrate.compute_Sv(echodata, waveform_mode='BB', encode_mode='complex') + assert isinstance(sv, xr.Dataset) is True + +def test_compute_Sv_ek80_CW_power(): + """ + Tests calibration in CW mode data encoded as power samples, + while the file also contains BB complex samples + """ + + ek80_raw_path = ek80_path / "Summer2018--D20180905-T033113.raw" + ed = ep.open_raw(ek80_raw_path, sonar_model="EK80") + sv = ep.calibrate.compute_Sv(ed, waveform_mode="CW", encode_mode="power") + assert isinstance(sv, xr.Dataset) diff --git a/echopype/tests/conftest.py b/echopype/tests/conftest.py new file mode 100644 index 000000000..4ae1916ab --- /dev/null +++ b/echopype/tests/conftest.py @@ -0,0 +1,35 @@ +"""``pytest`` configuration.""" + +import pytest +from pathlib import Path + +import fsspec + +from echopype.testing import TEST_DATA_FOLDER + + +@pytest.fixture(scope="session") +def minio_bucket(): + common_storage_options = dict( + client_kwargs=dict(endpoint_url="http://localhost:9000/"), + key="minioadmin", + secret="minioadmin", + ) + bucket_name = "ooi-raw-data" + fs = fsspec.filesystem( + "s3", + **common_storage_options, + ) + test_data = "data" + if not fs.exists(test_data): + fs.mkdir(test_data) + + if not fs.exists(bucket_name): + fs.mkdir(bucket_name) + + # Load test data into bucket + for d in TEST_DATA_FOLDER.iterdir(): + source_path = f'echopype/test_data/{d.name}' + fs.put(source_path, f'{test_data}/{d.name}', recursive=True) + + return common_storage_options diff --git a/echopype/tests/convert/test_convert.py b/echopype/tests/convert/test_convert.py new file mode 100644 index 000000000..f873aaed7 --- /dev/null +++ b/echopype/tests/convert/test_convert.py @@ -0,0 +1,356 @@ +"""test_convert.py + +This module contain all the various tests for echopype conversion +from a raw data to standard compliant zarr or netcdf file(s). + +**Note that in order to run this test, minio server is required for s3 +output tests.** +""" + + +import os +import fsspec +import xarray as xr +import pytest +from echopype import open_raw +from echopype.testing import TEST_DATA_FOLDER +from echopype.convert.set_groups_base import DEFAULT_ENCODINGS + + +def _check_file_group(data_file, engine, groups): + for g in groups: + ds = xr.open_dataset(data_file, engine=engine, group=g) + + assert isinstance(ds, xr.Dataset) is True + + +def _check_output_files(engine, output_files, storage_options): + groups = [ + "Provenance", + "Environment", + "Beam", + "Sonar", + "Vendor", + "Platform", + ] + if isinstance(output_files, list): + fs = fsspec.get_mapper(output_files[0], **storage_options).fs + for f in output_files: + if engine == "zarr": + _check_file_group(fs.get_mapper(f), engine, groups) + fs.delete(f, recursive=True) + else: + _check_file_group(f, engine, groups) + fs.delete(f) + else: + fs = fsspec.get_mapper(output_files, **storage_options).fs + if engine == "zarr": + _check_file_group(fs.get_mapper(output_files), engine, groups) + fs.delete(output_files, recursive=True) + else: + _check_file_group(output_files, engine, groups) + fs.delete(output_files) + + +@pytest.mark.parametrize( + "sonar_model, raw_file, xml_path", + [ + ( + "azfp", + TEST_DATA_FOLDER / "azfp/ooi/17032923.01A", + TEST_DATA_FOLDER / "azfp/ooi/17032922.XML", + ), + ( + "ek60", + TEST_DATA_FOLDER / "ek60/DY1801_EK60-D20180211-T164025.raw", + None, + ), + ( + "ek80", + TEST_DATA_FOLDER / "ek80/ncei-wcsd/D20170826-T205615.raw", + None, + ), + ( + "ad2cp", + TEST_DATA_FOLDER / "ad2cp/raw/076/rawtest.076.00000.ad2cp", + None, + ), + ], +) +def test_convert_time_encodings(sonar_model, raw_file, xml_path): + ed = open_raw( + sonar_model=sonar_model, raw_file=raw_file, xml_path=xml_path + ) + ed.to_netcdf(overwrite=True) + for group, details in ed.group_map.items(): + if hasattr(ed, group): + group_ds = getattr(ed, group) + if isinstance(group_ds, xr.Dataset): + for var, encoding in DEFAULT_ENCODINGS.items(): + if var in group_ds: + da = group_ds[var] + assert da.encoding == encoding + + # Combine encoding and attributes since this + # is what is shown when using decode_cf=False + # without dtype attribute + total_attrs = dict(**da.attrs, **da.encoding) + total_attrs.pop('dtype') + + # Read converted file back in + file_da = xr.open_dataset( + ed.converted_raw_path, + group=details['ep_group'], + decode_cf=False, + )[var] + assert file_da.dtype == encoding['dtype'] + + # Read converted file back in + decoded_da = xr.open_dataset( + ed.converted_raw_path, + group=details['ep_group'], + )[var] + assert da.equals(decoded_da) is True + os.unlink(ed.converted_raw_path) + + +@pytest.mark.parametrize("model", ["EK60"]) +@pytest.mark.parametrize( + "input_path", + [ + "./echopype/test_data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.raw", + "s3://data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.raw", + [ + "http://localhost:8080/data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.raw", + "http://localhost:8080/data/ek60/ncei-wcsd/Summer2017-D20170615-T190843.raw", + ], + ], +) +@pytest.mark.parametrize("export_engine", ["zarr", "netcdf4"]) +@pytest.mark.parametrize( + "output_save_path", + [ + None, + "./echopype/test_data/dump/", + "./echopype/test_data/dump/tmp.zarr", + "./echopype/test_data/dump/tmp.nc", + "s3://ooi-raw-data/dump/", + "s3://ooi-raw-data/dump/tmp.zarr", + "s3://ooi-raw-data/dump/tmp.nc", + ], +) +def test_convert_ek60( + model, + input_path, + export_engine, + output_save_path, + minio_bucket, +): + common_storage_options = minio_bucket + output_storage_options = {} + ipath = input_path + if isinstance(input_path, list): + ipath = input_path[0] + + input_storage_options = ( + common_storage_options if ipath.startswith("s3://") else {} + ) + if output_save_path and output_save_path.startswith("s3://"): + output_storage_options = common_storage_options + + # Only using one file + ec = open_raw( + raw_file=ipath, + sonar_model=model, + storage_options=input_storage_options, + ) + + if ( + export_engine == "netcdf4" + and output_save_path is not None + and output_save_path.startswith("s3://") + ): + return + + if export_engine == "netcdf4": + to_file = getattr(ec, "to_netcdf") + elif export_engine == "zarr": + to_file = getattr(ec, "to_zarr") + else: + return + try: + to_file( + save_path=output_save_path, + overwrite=True, + output_storage_options=output_storage_options, + ) + + _check_output_files( + export_engine, ec.converted_raw_path, output_storage_options + ) + except Exception as e: + if export_engine == 'netcdf4' and output_save_path.startswith("s3://"): + assert isinstance(e, ValueError) is True + assert str(e) == 'Only local netcdf4 is supported.' + + +@pytest.mark.parametrize("model", ["azfp"]) +@pytest.mark.parametrize( + "input_path", + [ + "./echopype/test_data/azfp/ooi/17032923.01A", + "http://localhost:8080/data/azfp/ooi/17032923.01A", + ], +) +@pytest.mark.parametrize( + "xml_path", + [ + "./echopype/test_data/azfp/ooi/17032922.XML", + "http://localhost:8080/data/azfp/ooi/17032922.XML", + ], +) +@pytest.mark.parametrize("export_engine", ["zarr", "netcdf4"]) +@pytest.mark.parametrize( + "output_save_path", + [ + None, + "./echopype/test_data/dump/", + "./echopype/test_data/dump/tmp.zarr", + "./echopype/test_data/dump/tmp.nc", + "s3://ooi-raw-data/dump/", + "s3://ooi-raw-data/dump/tmp.zarr", + "s3://ooi-raw-data/dump/tmp.nc", + ], +) +@pytest.mark.parametrize("combine_files", [False]) +def test_convert_azfp( + model, + input_path, + xml_path, + export_engine, + output_save_path, + combine_files, + minio_bucket, +): + common_storage_options = minio_bucket + output_storage_options = {} + + input_storage_options = ( + common_storage_options if input_path.startswith("s3://") else {} + ) + if output_save_path and output_save_path.startswith("s3://"): + output_storage_options = common_storage_options + + ec = open_raw( + raw_file=input_path, + xml_path=xml_path, + sonar_model=model, + storage_options=input_storage_options, + ) + + assert ec.xml_path == xml_path + + if ( + export_engine == "netcdf4" + and output_save_path is not None + and output_save_path.startswith("s3://") + ): + return + + if export_engine == "netcdf4": + to_file = getattr(ec, "to_netcdf") + elif export_engine == "zarr": + to_file = getattr(ec, "to_zarr") + else: + return + try: + to_file( + save_path=output_save_path, + overwrite=True, + output_storage_options=output_storage_options, + ) + + _check_output_files( + export_engine, ec.converted_raw_path, output_storage_options + ) + except Exception as e: + if export_engine == 'netcdf4' and output_save_path.startswith("s3://"): + assert isinstance(e, ValueError) is True + assert str(e) == 'Only local netcdf4 is supported.' + + +@pytest.mark.parametrize("model", ["EK80"]) +@pytest.mark.parametrize( + "input_path", + [ + "./echopype/test_data/ek80/ncei-wcsd/D20170826-T205615.raw", + "http://localhost:8080/data/ek80/ncei-wcsd/D20170826-T205615.raw", + "s3://data/ek80/ncei-wcsd/D20170826-T205615.raw", + ], +) +@pytest.mark.parametrize("export_engine", ["zarr", "netcdf4"]) +@pytest.mark.parametrize( + "output_save_path", + [ + None, + "./echopype/test_data/dump/", + "./echopype/test_data/dump/tmp.zarr", + "./echopype/test_data/dump/tmp.nc", + "s3://ooi-raw-data/dump/", + "s3://ooi-raw-data/dump/tmp.zarr", + "s3://ooi-raw-data/dump/tmp.nc", + ], +) +@pytest.mark.parametrize("combine_files", [False]) +def test_convert_ek80( + model, + input_path, + export_engine, + output_save_path, + combine_files, + minio_bucket, +): + common_storage_options = minio_bucket + output_storage_options = {} + + input_storage_options = ( + common_storage_options if input_path.startswith("s3://") else {} + ) + if output_save_path and output_save_path.startswith("s3://"): + output_storage_options = common_storage_options + + ec = open_raw( + raw_file=input_path, + sonar_model=model, + storage_options=input_storage_options, + ) + + if ( + export_engine == "netcdf4" + and output_save_path is not None + and output_save_path.startswith("s3://") + ): + return + + if export_engine == "netcdf4": + to_file = getattr(ec, "to_netcdf") + elif export_engine == "zarr": + to_file = getattr(ec, "to_zarr") + else: + return + + try: + to_file( + save_path=output_save_path, + overwrite=True, + combine=combine_files, + output_storage_options=output_storage_options, + ) + + _check_output_files( + export_engine, ec.converted_raw_path, output_storage_options + ) + except Exception as e: + if export_engine == 'netcdf4' and output_save_path.startswith("s3://"): + assert isinstance(e, ValueError) is True + assert str(e) == 'Only local netcdf4 is supported.' diff --git a/echopype/tests/test_convert_ad2cp.py b/echopype/tests/convert/test_convert_ad2cp.py similarity index 100% rename from echopype/tests/test_convert_ad2cp.py rename to echopype/tests/convert/test_convert_ad2cp.py diff --git a/echopype/tests/test_convert_azfp.py b/echopype/tests/convert/test_convert_azfp.py similarity index 100% rename from echopype/tests/test_convert_azfp.py rename to echopype/tests/convert/test_convert_azfp.py diff --git a/echopype/tests/test_convert_ek60.py b/echopype/tests/convert/test_convert_ek60.py similarity index 100% rename from echopype/tests/test_convert_ek60.py rename to echopype/tests/convert/test_convert_ek60.py diff --git a/echopype/tests/test_convert_ek80.py b/echopype/tests/convert/test_convert_ek80.py similarity index 100% rename from echopype/tests/test_convert_ek80.py rename to echopype/tests/convert/test_convert_ek80.py diff --git a/echopype/tests/test_echodata.py b/echopype/tests/echodata/test_echodata.py similarity index 65% rename from echopype/tests/test_echodata.py rename to echopype/tests/echodata/test_echodata.py index d341f3142..68314047e 100644 --- a/echopype/tests/test_echodata.py +++ b/echopype/tests/echodata/test_echodata.py @@ -1,8 +1,8 @@ from textwrap import dedent -from pathlib import Path import fsspec +import echopype from echopype.testing import TEST_DATA_FOLDER from echopype.echodata import EchoData from echopype import open_converted @@ -11,35 +11,9 @@ import xarray as xr ek60_path = TEST_DATA_FOLDER / "ek60" - - -# TODO: Probably put the function below into a common module? -@pytest.fixture(scope="session") -def minio_bucket(): - common_storage_options = dict( - client_kwargs=dict(endpoint_url="http://localhost:9000/"), - key="minioadmin", - secret="minioadmin", - ) - bucket_name = "ooi-raw-data" - fs = fsspec.filesystem( - "s3", - **common_storage_options, - ) - test_data = "data" - if not fs.exists(test_data): - fs.mkdir(test_data) - - if not fs.exists(bucket_name): - fs.mkdir(bucket_name) - - # Load test data into bucket - test_data_path = Path(__file__).parent.parent.joinpath(Path("test_data")) - for d in test_data_path.iterdir(): - source_path = f'echopype/test_data/{d.name}' - fs.put(source_path, f'{test_data}/{d.name}', recursive=True) - - return common_storage_options +ek80_path = TEST_DATA_FOLDER / "ek80" +azfp_path = TEST_DATA_FOLDER / "azfp" +ad2cp_path = TEST_DATA_FOLDER / "ad2cp" class TestEchoData: @@ -83,6 +57,18 @@ def test_repr(self): actual = "\n".join(x.rstrip() for x in repr(ed).split("\n")) assert expected_repr == actual + def test_repr_html(self): + zarr_path_string = str(self.converted_zarr.absolute()) + ed = EchoData(converted_raw_path=self.converted_zarr) + assert hasattr(ed, "_repr_html_") + html_repr = ed._repr_html_().strip() + assert f"""
EchoData: standardized raw data from {zarr_path_string}
""" in html_repr + + with xr.set_options(display_style="text"): + html_fallback = ed._repr_html_().strip() + + assert html_fallback.startswith("
EchoData") and html_fallback.endswith("
") + @pytest.mark.parametrize( "converted_zarr", @@ -133,3 +119,30 @@ def _check_path(zarr_path): and converted_zarr.endswith(".nc") ): assert isinstance(e, ValueError) is True + + +@pytest.mark.parametrize( + ("filepath", "sonar_model", "azfp_xml_path", "azfp_cal_type", "ek_waveform_mode", "ek_encode_mode"), + [ + (ek60_path / "ncei-wcsd" / "Summer2017-D20170615-T190214.raw", "EK60", None, None, "CW", "complex"), + (ek80_path / "D20190822-T161221.raw", "EK80", None, None, "CW", "power"), + (ek80_path / "D20170912-T234910.raw", "EK80", None, None, "BB", "complex"), + (azfp_path / "ooi" / "17032923.01A", "AZFP", azfp_path / "ooi" / "17032922.XML", "Sv", None, None), + (azfp_path / "ooi" / "17032923.01A", "AZFP", azfp_path / "ooi" / "17032922.XML", "Sp", None, None), + (ad2cp_path / "raw" / "090" / "rawtest.090.00001.ad2cp", "AD2CP", None, None, None, None) + ] +) +def test_compute_range(filepath, sonar_model, azfp_xml_path, azfp_cal_type, ek_waveform_mode, ek_encode_mode): + ed = echopype.open_raw(filepath, sonar_model, azfp_xml_path) + env_params = {"sound_speed": 343} + + if sonar_model == "AD2CP": + try: + ed.compute_range(env_params, ek_waveform_mode="CW", azfp_cal_type="Sv") + except ValueError: + return + else: + raise AssertionError + else: + range = ed.compute_range(env_params, azfp_cal_type, ek_waveform_mode, ) + assert isinstance(range, xr.DataArray) diff --git a/echopype/tests/echodata/test_echodata_combine.py b/echopype/tests/echodata/test_echodata_combine.py new file mode 100644 index 000000000..ec3014a8b --- /dev/null +++ b/echopype/tests/echodata/test_echodata_combine.py @@ -0,0 +1,204 @@ +from typing import Any, List, Optional, TYPE_CHECKING, Dict, Union +from pathlib import Path + +import numpy as np +import pytest +import xarray as xr +from xarray.core.merge import MergeError + +import echopype +from echopype.testing import TEST_DATA_FOLDER +from echopype.qc import exist_reversed_time +from echopype.core import SONAR_MODELS + +if TYPE_CHECKING: + from echopype.core import SonarModelsHint + +azfp_ooi_folder = TEST_DATA_FOLDER / "azfp" / "ooi" +azfp_test_data = [ + azfp_ooi_folder / "18100407.01A", + azfp_ooi_folder / "18100409.01A", + azfp_ooi_folder / "18100408.01A", +] +azfp_xml_file = azfp_ooi_folder / "18092920.XML" +ek60_ncei_wcsd_folder = TEST_DATA_FOLDER / "ek60" / "ncei-wcsd" +ek60_test_data = [ + ek60_ncei_wcsd_folder / "Summer2017-D20170620-T011027.raw", + ek60_ncei_wcsd_folder / "Summer2017-D20170620-T014302.raw", + ek60_ncei_wcsd_folder / "Summer2017-D20170620-T021537.raw", +] +ek60_reversed_ping_time_test_data = [ + ek60_ncei_wcsd_folder / "Summer2017-D20170719-T203615.raw", + ek60_ncei_wcsd_folder / "Summer2017-D20170719-T205415.raw", + ek60_ncei_wcsd_folder / "Summer2017-D20170719-T211347.raw", +] + + +@pytest.mark.parametrize( + "files, sonar_model, xml_file, concat_dims, concat_data_vars", + [ + ( + azfp_test_data, + "AZFP", + azfp_xml_file, + SONAR_MODELS["AZFP"]["concat_dims"], + SONAR_MODELS["AZFP"]["concat_data_vars"], + ), + ( + ek60_test_data, + "EK60", + None, + SONAR_MODELS["EK60"]["concat_dims"], + SONAR_MODELS["EK60"]["concat_data_vars"], + ), + ( + ek60_reversed_ping_time_test_data, + "EK60", + None, + SONAR_MODELS["EK60"]["concat_dims"], + SONAR_MODELS["EK60"]["concat_data_vars"], + ), + ], +) +def test_combine_echodata( + files: List[Path], + sonar_model: "SonarModelsHint", + xml_file: Optional[Path], + concat_dims: Dict[str, Optional[Union[str, List[str]]]], + concat_data_vars: Dict[str, str], +): + eds = [echopype.open_raw(file, sonar_model, xml_file) for file in files] + combined = echopype.combine_echodata(eds, "overwrite_conflicts") # type: ignore + + for group_name in combined.group_map: + if group_name in ("top", "sonar", "provenance"): + continue + combined_group: xr.Dataset = getattr(combined, group_name) + eds_groups = [ + getattr(ed, group_name) for ed in eds if getattr(ed, group_name) is not None + ] + + def union_attrs(datasets: List[xr.Dataset]) -> Dict[str, Any]: + """ + Merges attrs from a list of datasets. + Prioritizes keys from later datsets. + """ + + total_attrs = dict() + for ds in datasets: + total_attrs.update(ds.attrs) + return total_attrs + + test_ds = xr.combine_nested( + eds_groups, + [concat_dims.get(group_name, concat_dims["default"])], + data_vars=concat_data_vars.get(group_name, concat_data_vars["default"]), + coords="minimal", + combine_attrs="drop", + ) + test_ds.attrs.update(union_attrs(eds_groups)) + test_ds = test_ds.drop_dims( + [ + "concat_dim", + "old_ping_time", + "ping_time", + "old_location_time", + "location_time", + ], + errors="ignore", + ).drop_dims([f"{group}_attrs" for group in combined.group_map], errors="ignore") + assert combined_group is None or test_ds.identical( + combined_group.drop_dims( + ["old_ping_time", "ping_time", "old_location_time", "location_time"], + errors="ignore", + ) + ) + + +def test_ping_time_reversal(): + eds = [ + echopype.open_raw(file, "EK60") for file in ek60_reversed_ping_time_test_data + ] + combined = echopype.combine_echodata(eds, "overwrite_conflicts") # type: ignore + + for group_name in combined.group_map: + combined_group: xr.Dataset = getattr(combined, group_name) + + if combined_group is not None: + if "ping_time" in combined_group and group_name != "provenance": + assert not exist_reversed_time(combined_group, "ping_time") + if "old_ping_time" in combined_group: + assert exist_reversed_time(combined_group, "old_ping_time") + if "location_time" in combined_group and group_name not in ("provenance", "nmea"): + assert not exist_reversed_time(combined_group, "location_time") + if "old_location_time" in combined_group: + assert exist_reversed_time(combined_group, "old_location_time") + if "mru_time" in combined_group and group_name != "provenance": + assert not exist_reversed_time(combined_group, "mru_time") + if "old_mru_time" in combined_group: + assert exist_reversed_time(combined_group, "old_mru_time") + + +def test_attr_storage(): + # check storage of attributes before combination in provenance group + eds = [ + echopype.open_raw(file, "EK60") for file in ek60_test_data + ] + combined = echopype.combine_echodata(eds, "overwrite_conflicts") # type: ignore + for group in combined.group_map: + if f"{group}_attrs" in combined.provenance: + group_attrs = combined.provenance[f"{group}_attrs"] + for i, ed in enumerate(eds): + for attr, value in getattr(ed, group).attrs.items(): + assert group_attrs.isel(echodata_filename=i).sel({f"{group}_attr_key": attr}).data[()] == value + + # check selection by echodata_filename + for file in ek60_test_data: + assert Path(file).name in combined.provenance["echodata_filename"] + for group in combined.group_map: + if f"{group}_attrs" in combined.provenance: + group_attrs = combined.provenance[f"{group}_attrs"] + assert np.array_equal(group_attrs.sel(echodata_filename=Path(ek60_test_data[0]).name), group_attrs.isel(echodata_filename=0)) + + +def test_combine_attrs(): + # check parameter passed to combine_echodata that controls behavior of attribute combination + eds = [ + echopype.open_raw(file, "EK60") for file in ek60_test_data + ] + eds[0].beam.attrs.update({"foo": 1}) + eds[1].beam.attrs.update({"foo": 2}) + eds[2].beam.attrs.update({"foo": 3}) + + combined = echopype.combine_echodata(eds, "override") # type: ignore + assert combined.beam.attrs["foo"] == 1 + + combined = echopype.combine_echodata(eds, "drop") # type: ignore + assert "foo" not in combined.beam.attrs + + try: + combined = echopype.combine_echodata(eds, "identical") # type: ignore + except MergeError: + pass + else: + raise AssertionError + + try: + combined = echopype.combine_echodata(eds, "no_conflicts") # type: ignore + except MergeError: + pass + else: + raise AssertionError + + combined = echopype.combine_echodata(eds, "overwrite_conflicts") # type: ignore + assert combined.beam.attrs["foo"] == 3 + + eds[0].beam.attrs.update({"foo": 1}) + eds[1].beam.attrs.update({"foo": 1}) + eds[2].beam.attrs.update({"foo": 1}) + + combined = echopype.combine_echodata(eds, "identical") # type: ignore + assert combined.beam.attrs["foo"] == 1 + + combined = echopype.combine_echodata(eds, "no_conflicts") # type: ignore + assert combined.beam.attrs["foo"] == 1 diff --git a/echopype/tests/test_preprocess.py b/echopype/tests/old/test_preprocess.py similarity index 100% rename from echopype/tests/test_preprocess.py rename to echopype/tests/old/test_preprocess.py diff --git a/echopype/tests/test_convert.py b/echopype/tests/test_convert.py deleted file mode 100644 index ac15b9a30..000000000 --- a/echopype/tests/test_convert.py +++ /dev/null @@ -1,526 +0,0 @@ -"""test_convert.py - -This module contain all the various tests for echopype conversion -from a raw data to standard compliant zarr or netcdf file(s). - -**Note that in order to run this test, minio server is required for s3 -output tests.** -""" - - -import os -import shutil -import glob -import fsspec -import xarray as xr -import pytest -from pathlib import Path -from echopype import open_raw -from echopype.convert.api import _validate_path -from echopype.testing import TEST_DATA_FOLDER -from echopype.convert.set_groups_base import DEFAULT_ENCODINGS - - -def _check_file_group(data_file, engine, groups): - for g in groups: - ds = xr.open_dataset(data_file, engine=engine, group=g) - - assert isinstance(ds, xr.Dataset) is True - - -def _check_output_files(engine, output_files, storage_options): - groups = [ - "Provenance", - "Environment", - "Beam", - "Sonar", - "Vendor", - "Platform", - ] - if isinstance(output_files, list): - fs = fsspec.get_mapper(output_files[0], **storage_options).fs - for f in output_files: - if engine == "zarr": - _check_file_group(fs.get_mapper(f), engine, groups) - fs.delete(f, recursive=True) - else: - _check_file_group(f, engine, groups) - fs.delete(f) - else: - fs = fsspec.get_mapper(output_files, **storage_options).fs - if engine == "zarr": - _check_file_group(fs.get_mapper(output_files), engine, groups) - fs.delete(output_files, recursive=True) - else: - _check_file_group(output_files, engine, groups) - fs.delete(output_files) - - -@pytest.fixture(scope="session") -def minio_bucket(): - common_storage_options = dict( - client_kwargs=dict(endpoint_url="http://localhost:9000/"), - key="minioadmin", - secret="minioadmin", - ) - bucket_name = "ooi-raw-data" - fs = fsspec.filesystem( - "s3", - **common_storage_options, - ) - test_data = "data" - if not fs.exists(test_data): - fs.mkdir(test_data) - - if not fs.exists(bucket_name): - fs.mkdir(bucket_name) - - # Load test data into bucket - test_data_path = Path(__file__).parent.parent.joinpath(Path("test_data")) - for d in test_data_path.iterdir(): - source_path = f'echopype/test_data/{d.name}' - fs.put(source_path, f'{test_data}/{d.name}', recursive=True) - - return common_storage_options - - -@pytest.mark.parametrize("model", ["EK60"]) -@pytest.mark.parametrize("file_format", [".zarr"]) -@pytest.mark.parametrize( - "input_path", - [ - "./echopype/test_data/ek60/DY1801_EK60-D20180211-T164025.raw", - "http://localhost:8080/data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.raw", - ], -) -@pytest.mark.parametrize( - "output_save_path", - [ - None, - "./echopype/test_data/dump/", - "./echopype/test_data/dump/tmp.zarr", - "./echopype/test_data/dump/tmp.nc", - "s3://ooi-raw-data/dump/", - "s3://ooi-raw-data/dump/tmp.zarr", - "s3://ooi-raw-data/dump/tmp.nc", - ], -) -def test_validate_path_single_source( - model, file_format, input_path, output_save_path, minio_bucket -): - - output_storage_options = {} - if output_save_path and output_save_path.startswith("s3://"): - output_storage_options = dict( - client_kwargs=dict(endpoint_url="http://localhost:9000/"), - key="minioadmin", - secret="minioadmin", - ) - fsmap = fsspec.get_mapper(input_path) - single_fname = os.path.splitext(os.path.basename(fsmap.root))[0] - - converted_raw_path = _validate_path( - source_file=single_fname, - file_format=file_format, - output_storage_options=output_storage_options, - save_path=output_save_path - ) - # Used for cross-platform path comparisons - output_path = Path(converted_raw_path) - - if output_save_path is not None: - fsmap_tmp = fsspec.get_mapper(output_save_path, **output_storage_options) - fs = fsmap_tmp.fs - if not output_save_path.startswith("s3"): - if output_save_path.endswith("/"): - # if an output folder is given, below works with and without the slash at the end - assert output_path == Path(os.path.join(fsmap_tmp.root, single_fname + ".zarr")) - elif output_save_path.endswith(".zarr"): - # if an output filename is given - assert output_path == Path(fsmap_tmp.root) - else: - # force output file extension to the called type (here .zarr) - assert output_path == Path(os.path.splitext(fsmap_tmp.root)[0] + ".zarr") - shutil.rmtree(os.path.dirname(converted_raw_path)) - else: - if output_save_path.endswith("/"): - # if an output folder is given, below works with and without the slash at the end - assert output_path == Path(os.path.join(output_save_path, single_fname + ".zarr")) - elif output_save_path.endswith(".zarr"): - # if an output filename is given - assert output_path == Path(output_save_path) - else: - # force output file extension to the called type (here .zarr) - assert output_path == Path(os.path.splitext(output_save_path)[0] + ".zarr") - fs.delete(converted_raw_path) - else: - current_dir = Path.cwd() - temp_dir = current_dir.joinpath(Path("temp_echopype_output")) - assert output_path == Path(str(temp_dir.joinpath(Path(single_fname + ".zarr")))) - shutil.rmtree(os.path.dirname(converted_raw_path)) - - -@pytest.mark.parametrize("model", ["EK60"]) -@pytest.mark.parametrize("file_format", [".zarr"]) -@pytest.mark.parametrize( - "input_path", - [ - "./echopype/test_data/ek60/*.raw", - [ - "http://localhost:8080/data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.raw", - ], - ], -) -@pytest.mark.parametrize( - "output_save_path", - [ - None, - "./echopype/test_data/dump/", - "./echopype/test_data/dump/tmp.zarr", - "./echopype/test_data/dump/tmp.nc", - "s3://ooi-raw-data/dump/", - "s3://ooi-raw-data/dump/tmp.zarr", - "s3://ooi-raw-data/dump/tmp.nc", - ], -) -@pytest.mark.skip(reason='_validate_path only takes single files') -def test_validate_path_multiple_source( - model, file_format, input_path, output_save_path, minio_bucket -): - output_storage_options = {} - if output_save_path and output_save_path.startswith("s3://"): - output_storage_options = dict( - client_kwargs=dict(endpoint_url="http://localhost:9000/"), - key="minioadmin", - secret="minioadmin", - ) - - if isinstance(input_path, str): - mult_path = glob.glob(input_path) - else: - mult_path = input_path - echodata_mult = open_raw(mult_path, sonar_model="EK60") - echodata_mult._output_storage_options = output_storage_options - - echodata_mult._validate_path(file_format=file_format, save_path=output_save_path) - - if output_save_path is not None: - fsmap_tmp = fsspec.get_mapper(output_save_path, **output_storage_options) - fs = fsmap_tmp.fs - if not output_save_path.startswith("s3"): - if output_save_path.endswith("/"): - # if an output folder is given, below works with and without the slash at the end - assert echodata_mult.converted_raw_path == [ - os.path.join( - fsmap_tmp.root, - os.path.splitext(os.path.basename(f))[0] + ".zarr", - ) - for f in mult_path - ] - elif output_save_path.endswith(".zarr"): - # if an output filename is given: only use the directory - assert echodata_mult.converted_raw_path == [os.path.abspath(output_save_path)] - elif output_save_path.endswith(".nc"): - # force output file extension to the called type (here .zarr) - assert echodata_mult.converted_raw_path == [ - os.path.abspath(output_save_path.replace(".nc", ".zarr")) - ] - shutil.rmtree(os.path.dirname(echodata_mult.converted_raw_path[0])) - else: - if output_save_path.endswith("/"): - # if an output folder is given, below works with and without the slash at the end - assert echodata_mult.converted_raw_path == [ - os.path.join( - output_save_path, - os.path.splitext(os.path.basename(f))[0] + ".zarr", - ) - for f in mult_path - ] - elif output_save_path.endswith(".zarr"): - # if an output filename is given: only use the directory - assert echodata_mult.converted_raw_path == [output_save_path] - elif output_save_path.endswith(".nc"): - # force output file extension to the called type (here .zarr) - assert echodata_mult.converted_raw_path == [output_save_path.replace(".nc", ".zarr")] - fs.delete(echodata_mult.converted_raw_path[0]) - else: - current_dir = Path.cwd() - temp_dir = current_dir.joinpath(Path("temp_echopype_output")) - assert echodata_mult.converted_raw_path == [ - str(temp_dir.joinpath(Path(os.path.splitext(os.path.basename(f))[0] + ".zarr"))) - for f in mult_path - ] - shutil.rmtree(os.path.dirname(echodata_mult.converted_raw_path[0])) - - -@pytest.mark.parametrize( - "sonar_model, raw_file, xml_path", - [ - ( - "azfp", - TEST_DATA_FOLDER / "azfp/ooi/17032923.01A", - TEST_DATA_FOLDER / "azfp/ooi/17032922.XML" - ), - ( - "ek60", - TEST_DATA_FOLDER / "ek60/DY1801_EK60-D20180211-T164025.raw", - None - ), - ( - "ek80", - TEST_DATA_FOLDER / "ek80/ncei-wcsd/D20170826-T205615.raw", - None - ), - ( - "ad2cp", - TEST_DATA_FOLDER / "ad2cp/raw/076/rawtest.076.00000.ad2cp", - None - ) - ] -) -def test_convert_time_encodings(sonar_model, raw_file, xml_path): - ed = open_raw( - sonar_model=sonar_model, - raw_file=raw_file, - xml_path=xml_path - ) - ed.to_netcdf(overwrite=True) - for group, details in ed._EchoData__group_map.items(): - if hasattr(ed, group): - group_ds = getattr(ed, group) - if isinstance(group_ds, xr.Dataset): - for var, encoding in DEFAULT_ENCODINGS.items(): - if var in group_ds: - da = group_ds[var] - assert da.encoding == encoding - - # Combine encoding and attributes since this - # is what is shown when using decode_cf=False - # without dtype attribute - total_attrs = dict(**da.attrs, **da.encoding) - total_attrs.pop('dtype') - - # Read converted file back in - file_da = xr.open_dataset( - ed.converted_raw_path, - group=details['ep_group'], - decode_cf=False - )[var] - assert file_da.dtype == encoding['dtype'] - - # Read converted file back in - decoded_da = xr.open_dataset( - ed.converted_raw_path, - group=details['ep_group'], - )[var] - assert da.equals(decoded_da) is True - os.unlink(ed.converted_raw_path) - - -@pytest.mark.parametrize("model", ["EK60"]) -@pytest.mark.parametrize( - "input_path", - [ - "./echopype/test_data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.raw", - "s3://data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.raw", - [ - "http://localhost:8080/data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.raw", - "http://localhost:8080/data/ek60/ncei-wcsd/Summer2017-D20170615-T190843.raw", - ], - ], -) -@pytest.mark.parametrize("export_engine", ["zarr", "netcdf4"]) -@pytest.mark.parametrize( - "output_save_path", - [ - None, - "./echopype/test_data/dump/", - "./echopype/test_data/dump/tmp.zarr", - "./echopype/test_data/dump/tmp.nc", - "s3://ooi-raw-data/dump/", - "s3://ooi-raw-data/dump/tmp.zarr", - "s3://ooi-raw-data/dump/tmp.nc", - ], -) -def test_convert_ek60( - model, - input_path, - export_engine, - output_save_path, - minio_bucket, -): - common_storage_options = minio_bucket - output_storage_options = {} - ipath = input_path - if isinstance(input_path, list): - ipath = input_path[0] - - input_storage_options = common_storage_options if ipath.startswith("s3://") else {} - if output_save_path and output_save_path.startswith("s3://"): - output_storage_options = common_storage_options - - # Only using one file - ec = open_raw(raw_file=ipath, sonar_model=model, storage_options=input_storage_options) - - if ( - export_engine == "netcdf4" - and output_save_path is not None - and output_save_path.startswith("s3://") - ): - return - - if export_engine == "netcdf4": - to_file = getattr(ec, "to_netcdf") - elif export_engine == "zarr": - to_file = getattr(ec, "to_zarr") - else: - return - - to_file( - save_path=output_save_path, - overwrite=True, - output_storage_options=output_storage_options, - ) - - _check_output_files(export_engine, ec.converted_raw_path, output_storage_options) - - -@pytest.mark.parametrize("model", ["azfp"]) -@pytest.mark.parametrize( - "input_path", - [ - "./echopype/test_data/azfp/ooi/17032923.01A", - "http://localhost:8080/data/azfp/ooi/17032923.01A", - ], -) -@pytest.mark.parametrize( - "xml_path", - [ - "./echopype/test_data/azfp/ooi/17032922.XML", - "http://localhost:8080/data/azfp/ooi/17032922.XML", - ], -) -@pytest.mark.parametrize("export_engine", ["zarr", "netcdf4"]) -@pytest.mark.parametrize( - "output_save_path", - [ - None, - "./echopype/test_data/dump/", - "./echopype/test_data/dump/tmp.zarr", - "./echopype/test_data/dump/tmp.nc", - "s3://ooi-raw-data/dump/", - "s3://ooi-raw-data/dump/tmp.zarr", - "s3://ooi-raw-data/dump/tmp.nc", - ], -) -@pytest.mark.parametrize("combine_files", [False]) -def test_convert_azfp( - model, - input_path, - xml_path, - export_engine, - output_save_path, - combine_files, - minio_bucket, -): - common_storage_options = minio_bucket - output_storage_options = {} - - input_storage_options = common_storage_options if input_path.startswith("s3://") else {} - if output_save_path and output_save_path.startswith("s3://"): - output_storage_options = common_storage_options - - ec = open_raw( - raw_file=input_path, - xml_path=xml_path, - sonar_model=model, - storage_options=input_storage_options, - ) - - assert ec.xml_path == xml_path - - if ( - export_engine == "netcdf4" - and output_save_path is not None - and output_save_path.startswith("s3://") - ): - return - - if export_engine == "netcdf4": - to_file = getattr(ec, "to_netcdf") - elif export_engine == "zarr": - to_file = getattr(ec, "to_zarr") - else: - return - - to_file( - save_path=output_save_path, - overwrite=True, - output_storage_options=output_storage_options, - ) - - _check_output_files(export_engine, ec.converted_raw_path, output_storage_options) - - -@pytest.mark.parametrize("model", ["EK80"]) -@pytest.mark.parametrize( - "input_path", - [ - "./echopype/test_data/ek80/ncei-wcsd/D20170826-T205615.raw", - "http://localhost:8080/data/ek80/ncei-wcsd/D20170826-T205615.raw", - "s3://data/ek80/ncei-wcsd/D20170826-T205615.raw", - ], -) -@pytest.mark.parametrize("export_engine", ["zarr", "netcdf4"]) -@pytest.mark.parametrize( - "output_save_path", - [ - None, - "./echopype/test_data/dump/", - "./echopype/test_data/dump/tmp.zarr", - "./echopype/test_data/dump/tmp.nc", - "s3://ooi-raw-data/dump/", - "s3://ooi-raw-data/dump/tmp.zarr", - "s3://ooi-raw-data/dump/tmp.nc", - ], -) -@pytest.mark.parametrize("combine_files", [False]) -def test_convert_ek80( - model, - input_path, - export_engine, - output_save_path, - combine_files, - minio_bucket, -): - common_storage_options = minio_bucket - output_storage_options = {} - - input_storage_options = common_storage_options if input_path.startswith("s3://") else {} - if output_save_path and output_save_path.startswith("s3://"): - output_storage_options = common_storage_options - - ec = open_raw(raw_file=input_path, sonar_model=model, storage_options=input_storage_options) - - if ( - export_engine == "netcdf4" - and output_save_path is not None - and output_save_path.startswith("s3://") - ): - return - - if export_engine == "netcdf4": - to_file = getattr(ec, "to_netcdf") - elif export_engine == "zarr": - to_file = getattr(ec, "to_zarr") - else: - return - - to_file( - save_path=output_save_path, - overwrite=True, - combine=combine_files, - output_storage_options=output_storage_options, - ) - - _check_output_files(export_engine, ec.converted_raw_path, output_storage_options) diff --git a/echopype/tests/test_core.py b/echopype/tests/test_core.py new file mode 100644 index 000000000..4f8681b26 --- /dev/null +++ b/echopype/tests/test_core.py @@ -0,0 +1,62 @@ +from typing import TYPE_CHECKING + +import pytest + +if TYPE_CHECKING: + from ..core import SonarModelsHint +from ..core import SONAR_MODELS + +@pytest.mark.parametrize(["sonar_model", "ext"], [ + ("AZFP", ".01A"), + ("AZFP", ".01a"), + ("AZFP", ".05C"), + ("AZFP", ".12q"), + + ("EK60", ".raw"), + ("EK60", ".RAW"), + + ("EK80", ".raw"), + ("EK80", ".RAW"), + + ("EA640", ".raw"), + ("EA640", ".RAW"), + + ("AD2CP", ".ad2cp"), + ("AD2CP", ".AD2CP"), +]) +def test_file_extension_validation(sonar_model: "SonarModelsHint", ext: str): + SONAR_MODELS[sonar_model]["validate_ext"](ext) + + +@pytest.mark.parametrize(["sonar_model", "ext"], [ + ("AZFP", ".001A"), + ("AZFP", ".01AA"), + ("AZFP", ".01aa"), + ("AZFP", ".05AA"), + ("AZFP", ".07!"), + ("AZFP", ".01!"), + ("AZFP", ".0!A"), + ("AZFP", ".012"), + ("AZFP", ".0AA"), + ("AZFP", ".AAA"), + ("AZFP", "01A"), + + ("EK60", "raw"), + ("EK60", ".foo"), + + ("EK80", "raw"), + ("EK80", ".foo"), + + ("EA640", "raw"), + ("EA640", ".foo"), + + ("AD2CP", "ad2cp"), + ("AD2CP", ".foo"), +]) +def test_file_extension_validation_should_fail(sonar_model: "SonarModelsHint", ext: str): + try: + SONAR_MODELS[sonar_model]["validate_ext"](ext) + except ValueError: + pass + else: + raise ValueError(f"\"{ext}\" should have been rejected for sonar model {sonar_model}") diff --git a/echopype/tests/test_old.py b/echopype/tests/utils/test_old.py similarity index 100% rename from echopype/tests/test_old.py rename to echopype/tests/utils/test_old.py diff --git a/echopype/tests/utils/test_utils_io.py b/echopype/tests/utils/test_utils_io.py new file mode 100644 index 000000000..ab1c10077 --- /dev/null +++ b/echopype/tests/utils/test_utils_io.py @@ -0,0 +1,117 @@ +import os +import fsspec +from pathlib import Path +import pytest + +from echopype.utils.io import sanitize_file_path, validate_output_path + + +@pytest.mark.parametrize( + "file_path, should_fail, file_type", + [ + ('https://example.com/test.nc', True, 'nc'), + ('https://example.com/test.zarr', False, 'zarr'), + ('folder/test.nc', False, 'nc'), + ('folder/test.zarr', False, 'zarr'), + (Path('https:/example.com/test.nc'), True, 'nc'), + (Path('https:/example.com/test.zarr'), True, 'zarr'), + (Path('folder/test.nc'), False, 'nc'), + (Path('folder/test.zarr'), False, 'zarr'), + (fsspec.get_mapper('https://example.com/test.nc'), True, 'nc'), + (fsspec.get_mapper('https:/example.com/test.zarr'), False, 'zarr'), + (fsspec.get_mapper('folder/test.nc'), False, 'nc'), + (fsspec.get_mapper('folder/test.zarr'), False, 'zarr'), + ('https://example.com/test.jpeg', True, 'jpeg'), + (Path('https://example.com/test.jpeg'), True, 'jpeg'), + (fsspec.get_mapper('https://example.com/test.jpeg'), True, 'jpeg'), + ], +) +def test_sanitize_file_path(file_path, should_fail, file_type): + try: + sanitized = sanitize_file_path(file_path) + if not should_fail: + if file_type == 'nc': + assert isinstance(sanitized, Path) is True + elif file_type == 'zarr': + assert isinstance(sanitized, fsspec.FSMap) is True + except Exception as e: + assert isinstance(e, ValueError) is True + + +@pytest.mark.parametrize( + "save_path, engine", + [ + # Netcdf tests + ('folder/new_test.nc', 'netcdf4'), + ('folder/new_test.nc', 'zarr'), + ('folder/path/new_test.nc', 'netcdf4'), + ('folder/', 'netcdf4'), + ('s3://ooi-raw-data/', 'netcdf4'), + (Path('folder/'), 'netcdf4'), + (Path('folder/new_test.nc'), 'netcdf4'), + # Zarr tests + ('folder/new_test.zarr', 'zarr'), + ('folder/new_test.zarr', 'netcdf4'), + ('folder/path/new_test.zarr', 'zarr'), + ('folder/', 'zarr'), + # Empty tests + (None, 'netcdf4'), + (None, 'zarr'), + # Remotes + ('https://example.com/test.zarr', 'zarr'), + ('https://example.com/', 'zarr'), + ('https://example.com/test.nc', 'netcdf4'), + ('s3://ooi-raw-data/new_test.zarr', 'zarr'), + ('s3://ooi-raw-data/new_test.nc', 'netcdf4'), + ], +) +def test_validate_output_path(save_path, engine, minio_bucket): + output_root_path = './echopype/test_data/dump' + source_file = 'test.raw' + if engine == 'netcdf4': + ext = '.nc' + else: + ext = '.zarr' + + if save_path is not None: + if '://' not in str(save_path): + save_path = os.path.join(output_root_path, save_path) + is_dir = True if Path(save_path).suffix == '' else False + else: + is_dir = True + save_path = output_root_path + + output_storage_options = {} + if save_path and save_path.startswith("s3://"): + output_storage_options = dict( + client_kwargs=dict(endpoint_url="http://localhost:9000/"), + key="minioadmin", + secret="minioadmin", + ) + + try: + output_path = validate_output_path( + source_file, engine, output_storage_options, save_path + ) + + assert isinstance(output_path, str) is True + assert Path(output_path).suffix == ext + + if is_dir: + assert Path(output_path).name == source_file.replace('.raw', '') + ext + else: + output_file = Path(save_path) + assert Path(output_path).name == output_file.name.replace(output_file.suffix, '') + ext + except Exception as e: + if 'https://' in save_path: + if save_path == 'https://example.com/': + assert isinstance(e, ValueError) is True + assert str(e) == 'Input file type not supported!' + elif save_path == 'https://example.com/test.nc': + assert isinstance(e, ValueError) is True + assert str(e) == 'Only local netcdf4 is supported.' + else: + assert isinstance(e, PermissionError) is True + elif save_path == 's3://ooi-raw-data/new_test.nc': + assert isinstance(e, ValueError) is True + assert str(e) == 'Only local netcdf4 is supported.' diff --git a/echopype/utils/io.py b/echopype/utils/io.py index ac590e8c5..6945ffa51 100644 --- a/echopype/utils/io.py +++ b/echopype/utils/io.py @@ -3,13 +3,25 @@ """ import os import sys +import warnings from pathlib import Path -from typing import Union +from typing import TYPE_CHECKING, Dict, Union import fsspec from fsspec import FSMap from fsspec.implementations.local import LocalFileSystem +if TYPE_CHECKING: + from ..core import PathHint +SUPPORTED_ENGINES = { + "netcdf4": { + "ext": ".nc", + }, + "zarr": { + "ext": ".zarr", + }, +} + def get_files_from_dir(folder): """Retrieves all Netcdf and Zarr files from a given folder""" @@ -49,29 +61,73 @@ def get_file_format(file): raise ValueError(f"Unsupported file format: {os.path.splitext(file)[1]}") +def _get_suffix(filepath: Union[str, Path, FSMap]) -> str: + """Check if file type is supported.""" + # TODO: handle multiple files through the same set of checks for combining files + if isinstance(filepath, FSMap): + suffix = Path(filepath.root).suffix + else: + suffix = Path(str(filepath)).suffix + + if suffix not in [".nc", ".zarr"]: + raise ValueError("Input file type not supported!") + + return suffix + + def sanitize_file_path( - file_path: Union[str, Path, FSMap], - storage_options: dict = {}, + file_path: "PathHint", + storage_options: Dict[str, str] = {}, + is_dir: bool = False, ) -> Union[Path, FSMap]: - """Determines file path, either Path or FSMap""" + """ + Cleans and checks the user output file path type to + a standardized Path or FSMap type. + + Parameters + ---------- + file_path : str | Path | FSMap + The source file path + engine : str {'netcdf4', 'zarr'} + The engine to be used for file output + storage_options : dict + Storage options for file path + is_dir : bool + Flag for the function to know + if file_path is a directory or not. + If not, suffix will be determined. + """ + + if not is_dir: + suffix = _get_suffix(file_path) + else: + suffix = "" + if isinstance(file_path, Path): # Check for extension if ":/" in str(file_path): raise ValueError(f"{file_path} is not a valid posix path.") + if suffix == ".zarr": + return fsspec.get_mapper(str(file_path)) return file_path elif isinstance(file_path, str): if "://" in file_path: + if suffix == ".nc": + raise ValueError("Only local netcdf4 is supported.") return fsspec.get_mapper(file_path, **storage_options) - return Path(file_path) + elif suffix == ".zarr": + return fsspec.get_mapper(file_path) + else: + return Path(file_path) elif isinstance(file_path, fsspec.FSMap): root = file_path.root - if Path(root).suffix == ".nc": + if suffix == ".nc": if not isinstance(file_path.fs, LocalFileSystem): # For special case of netcdf. # netcdf4 engine can only read Path or string raise ValueError("Only local netcdf4 is supported.") - return root + return Path(root) return file_path else: raise ValueError( @@ -79,8 +135,89 @@ def sanitize_file_path( ) -def check_file_existance( - file_path: Union[str, Path, FSMap], storage_options: dict = {} +def validate_output_path( + source_file: str, + engine: str, + output_storage_options: Dict = {}, + save_path: Union[None, Path, str] = None, +) -> str: + """ + Assemble output file names and path. + + Parameters + ---------- + source_file : str + The source file path + engine : str {'netcdf4', 'zarr'} + The engine to be used for file output + output_storage_options : dict + Storage options for remote output path + save_path : str | Path | None + Either a directory or a file. If none then the save path is 'temp_echopype_output/' + in the current working directory. + """ + if engine not in SUPPORTED_ENGINES: + ValueError(f"Engine {engine} is not supported for file export.") + + file_ext = SUPPORTED_ENGINES[engine]["ext"] + + if save_path is None: + warnings.warn("save_path is not provided") + + current_dir = Path.cwd() + # Check permission, raise exception if no permission + check_file_permissions(current_dir) + out_dir = current_dir.joinpath(Path("temp_echopype_output")) + if not out_dir.exists(): + out_dir.mkdir(parents=True) + + warnings.warn( + f"Resulting converted file(s) will be available at {str(out_dir)}" + ) + out_path = str(out_dir / (Path(source_file).stem + file_ext)) + elif not isinstance(save_path, Path) and not isinstance(save_path, str): + raise TypeError("save_path must be a string or Path") + else: + if isinstance(save_path, str): + # Clean folder path by stripping '/' at the end + save_path = save_path.strip("/") + + # Determine whether this is a directory or not + is_dir = True if Path(save_path).suffix == "" else False + else: + is_dir = True if save_path.suffix == "" else False + + # Cleans path + sanitized_path = sanitize_file_path( + save_path, storage_options=output_storage_options, is_dir=is_dir + ) + + # Check file permissions + if is_dir: + check_file_permissions(sanitized_path) + out_path = os.path.join(save_path, Path(source_file).stem + file_ext) + else: + if isinstance(sanitized_path, Path): + check_file_permissions(sanitized_path.parent) + final_path = sanitized_path + else: + path_dir = fsspec.get_mapper( + os.path.dirname(save_path), **output_storage_options + ) + check_file_permissions(path_dir) + final_path = Path(save_path) + if final_path.suffix != file_ext: + warnings.warn( + "Mismatch between specified engine and save_path found; forcing output format to engine." # noqa + ) + out_path = str( + final_path.parent.joinpath(final_path.stem + file_ext).absolute() + ) + return out_path + + +def check_file_existence( + file_path: "PathHint", storage_options: Dict[str, str] = {} ) -> bool: """ Checks if file exists in the specified path @@ -127,7 +264,15 @@ def check_file_permissions(FILE_DIR): with FILE_DIR.fs.open(TEST_FILE, "w") as f: f.write("testing\n") FILE_DIR.fs.delete(TEST_FILE) - elif isinstance(FILE_DIR, Path): + elif isinstance(FILE_DIR, (Path, str)): + if isinstance(FILE_DIR, str): + FILE_DIR = Path(FILE_DIR) + + if not FILE_DIR.exists(): + warnings.warn( + f"{str(FILE_DIR)} does not exist. Attempting to create it." + ) + FILE_DIR.mkdir(exist_ok=True, parents=True) TEST_FILE = FILE_DIR.joinpath(Path(".permission_test")) TEST_FILE.write_text("testing\n") @@ -136,10 +281,5 @@ def check_file_permissions(FILE_DIR): TEST_FILE.unlink(missing_ok=True) else: TEST_FILE.unlink() - else: - TEST_FILE = os.path.join(FILE_DIR, ".permission_test") - with open(TEST_FILE, "w") as f: - f.write("testing\n") - os.remove(TEST_FILE) except Exception: raise PermissionError("Writing to specified path is not permitted.") diff --git a/requirements-dev.txt b/requirements-dev.txt index b679ac7ad..0d8797452 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -19,5 +19,6 @@ setuptools_scm sphinx sphinx-automodapi sphinx_rtd_theme +sphinxcontrib-mermaid twine wheel diff --git a/requirements.txt b/requirements.txt index 557c52ac4..95c160a3a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,9 +5,10 @@ numpy pynmea2 pytz scipy -xarray<0.18 +xarray zarr -fsspec==0.8.7 -s3fs==0.5.2 +fsspec +s3fs requests aiohttp +typing-extensions~=3.10 diff --git a/setup.py b/setup.py index 3ee0a78be..ac9119b70 100644 --- a/setup.py +++ b/setup.py @@ -9,6 +9,14 @@ # Dependencies. with open("requirements.txt") as f: requirements = f.readlines() + +with open("requirements-dev.txt") as f: + dev_reqs = f.readlines() + +EXTRA_REQUIRES = { + "dev": dev_reqs, +} + INSTALL_REQUIRES = [t.strip() for t in requirements] opts = dict( @@ -16,6 +24,8 @@ packages=find_packages(), include_package_data=True, install_requires=INSTALL_REQUIRES, + extras_require=EXTRA_REQUIRES, + python_requires=">=3.6", py_modules=["_echopype_version"], use_scm_version={ "fallback_version": "unknown",