diff --git a/CHANGELOG.md b/CHANGELOG.md index 7fefd0f..9efa9db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ - Added tests for `ici.extract_params` ([#5](https://github.com/NatLabRockies/ampworks/pull/5)) ### Bug Fixes +- Catch when `NaN` is present when parsing for headers in excel files ([#23](https://github.com/NatLabRockies/ampworks/pull/23)) - Force the `cell` dataset of the `DqdvFitter` to require an Ah column ([#19](https://github.com/NatLabRockies/ampworks/pull/19)) - Update patching policy for releases, use `spellcheck` in nox pre-commit ([#13](https://github.com/NatLabRockies/ampworks/pull/13)) - Readers missing name-only columns, e.g., `testtime` ([#8](https://github.com/NatLabRockies/ampworks/pull/8)) diff --git a/images/coverage.svg b/images/coverage.svg index 33c29fd..e69bfd4 100644 --- a/images/coverage.svg +++ b/images/coverage.svg @@ -1,5 +1,5 @@ - - coverage: 30.09% + + coverage: 30.07% @@ -15,7 +15,7 @@ coverage - - 30.09% + + 30.07% diff --git a/pyproject.toml b/pyproject.toml index 06af9c1..b556360 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,11 +68,11 @@ select = [ "D", # All pydocstyle checks ] extend-select = [ - "E302", # Expected 2 blank lines, found 1 (requires preview=true, above) + "E302", "E305", # Expected blank lines (requires preview=true, above) ] ignore = [ - "B007", "B009", "B028", "B905", - "D100", "D103", "D104", "D203", "D205", "D212", "D401", + "B007", "B009", "B010", "B028", "B905", + "D1", "D203", "D205", "D212", "D400", "D401", "D415", "E201", "E202", "E226", "E241", "E731", ] diff --git a/src/ampworks/__init__.py b/src/ampworks/__init__.py index 05d1f8e..4d15c74 100644 --- a/src/ampworks/__init__.py +++ b/src/ampworks/__init__.py @@ -18,7 +18,7 @@ the documentation by visiting the website, hosted on Read the Docs. The website includes search functionality and more detailed examples. -""" # noqa: D400, D415 (ignore missing punctuation on first line) +""" from typing import TYPE_CHECKING diff --git a/src/ampworks/_core/_read.py b/src/ampworks/_core/_read.py index 78e3943..f1f961f 100644 --- a/src/ampworks/_core/_read.py +++ b/src/ampworks/_core/_read.py @@ -1,7 +1,7 @@ from __future__ import annotations from warnings import warn -from typing import TYPE_CHECKING +from typing import Sequence, TYPE_CHECKING import pandas as pd @@ -11,7 +11,7 @@ # Define expected headers and their aliases -def format_alias(names: str, units: str) -> str: +def format_alias(names: Sequence[str], units: Sequence[str]) -> list[str]: aliases = units.copy() # units only @@ -187,8 +187,11 @@ def read_table(filepath: PathLike) -> Dataset: return Dataset() -def read_excel(filepath: PathLike, sheet_name: str | int | list = 'first', - stack_sheets: bool = False) -> Dataset: +def read_excel( + filepath: PathLike, + sheet_name: str | int | list[int, str] | None = None, + stack_sheets: bool = False, +) -> Dataset: """Read excel file.""" from ampworks import Dataset @@ -196,19 +199,22 @@ def read_excel(filepath: PathLike, sheet_name: str | int | list = 'first', all_sheets = workbook.sheet_names num_sheets = len(all_sheets) - # Warn if any sheet conflicts with 'first' or 'all' - if sheet_name == 'first' and 'first' in all_sheets: - warn() - elif sheet_name == 'all' and 'all' in all_sheets: + # warn if 'all' matches a sheet name + if sheet_name == 'all' and 'all' in all_sheets: warn() # Set which sheets to iterate through - if sheet_name in ['first', 'all']: + if sheet_name is None or sheet_name == 'all': iter_sheets = all_sheets elif isinstance(sheet_name, (str, int)): iter_sheets = [sheet_name] + elif isinstance(sheet_name, Sequence): + iter_sheets = list(sheet_name) else: - iter_sheets = sheet_name + raise TypeError( + "'sheet_name' expected a str, int, list[str, int], or None, but" + f" got {type(sheet_name)}." + ) # Raise errors if invalid indices/names indices = [v for v in iter_sheets if isinstance(v, int)] @@ -233,20 +239,21 @@ def read_excel(filepath: PathLike, sheet_name: str | int | list = 'first', # Find header row header_row = None for idx, row in preview.iterrows(): - if header_matches(row.values.astype(str), REQUIRED_HEADERS): + tmp_headers = row.fillna('').astype(str).values + if header_matches(tmp_headers, REQUIRED_HEADERS): header_row = idx break if header_row is not None: df = workbook.parse(sheet, header=header_row) datasets[sheet] = standardize_headers(df) - if sheet_name == 'first': + if sheet_name is None: break else: failed.append(sheet) # Prepare outputs - if sheet_name != 'first' and failed: + if sheet_name is None and failed: warn(f"Could not find valid headers in requested sheets: {failed}") if not datasets: