diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 674fa41..50158f6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,12 +20,10 @@ jobs: submodules: false coverage: codecov envs: | - - linux: py38-oldestdeps - - linux: py39-online + - linux: py39 #- linux: py310 #- linux: py311 - - linux: py312-devdeps - #- windows: py38 + - linux: py312 #- windows: py39 - windows: py310 #- windows: py311 @@ -33,5 +31,6 @@ jobs: #- macos: py38 #- macos: py39 #- macos: py310 - - macos: py311 - #- macos: py312 + - macos: py311-online + - macos: py312 + - windows: py313-devdeps diff --git a/README.md b/README.md index e369cc8..c806524 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ `cdflib` is a python module to read/write CDF (Common Data Format `.cdf`) files without needing to install the [CDF NASA library](https://cdf.gsfc.nasa.gov/). -Python >= 3.8 is required. +Python >= 3.9 is required. The core of this package uses only numpy, with no complicated compiler requirements. ## Install diff --git a/cdflib/cdfwrite.py b/cdflib/cdfwrite.py index 213c5f9..989d4b4 100644 --- a/cdflib/cdfwrite.py +++ b/cdflib/cdfwrite.py @@ -994,7 +994,7 @@ def _write_var_data_nonsparse( if isinstance(indata, complex): epoch16.append(indata.real) epoch16.append(indata.imag) - indata = epoch16 + indata = np.array(epoch16) # Convert to byte stream recs, data = self._convert_data(dataType, numElems, numValues, indata) diff --git a/cdflib/epochs.py b/cdflib/epochs.py index 0c54af0..4f77081 100644 --- a/cdflib/epochs.py +++ b/cdflib/epochs.py @@ -239,14 +239,15 @@ def timestamp_to_cdfepoch(unixtime_data: npt.ArrayLike) -> np.ndarray: for ud in times: if not np.isnan(ud): dt = np.datetime64(int(ud * 1000), "ms") + dt_item: datetime.datetime = dt.item() dt_to_convert = [ - dt.item().year, - dt.item().month, - dt.item().day, - dt.item().hour, - dt.item().minute, - dt.item().second, - int(dt.item().microsecond / 1000), + dt_item.year, + dt_item.month, + dt_item.day, + dt_item.hour, + dt_item.minute, + dt_item.second, + int(dt_item.microsecond / 1000), ] converted_data = CDFepoch.compute(dt_to_convert) else: @@ -267,15 +268,16 @@ def timestamp_to_cdfepoch16(unixtime_data: npt.ArrayLike) -> np.ndarray: for ud in times: if not np.isnan(ud): dt = np.datetime64(int(ud * 1000000), "us") + dt_item: datetime.datetime = dt.item() dt_to_convert = [ - dt.item().year, - dt.item().month, - dt.item().day, - dt.item().hour, - dt.item().minute, - dt.item().second, - int(dt.item().microsecond / 1000), - int(dt.item().microsecond % 1000), + dt_item.year, + dt_item.month, + dt_item.day, + dt_item.hour, + dt_item.minute, + dt_item.second, + int(dt_item.microsecond / 1000), + int(dt_item.microsecond % 1000), 0, 0, ] @@ -298,15 +300,16 @@ def timestamp_to_tt2000(unixtime_data: npt.ArrayLike) -> np.ndarray: for ud in times: if not np.isnan(ud): dt = np.datetime64(int(ud * 1000000), "us") + dt_item: datetime.datetime = dt.item() dt_to_convert = [ - dt.item().year, - dt.item().month, - dt.item().day, - dt.item().hour, - dt.item().minute, - dt.item().second, - int(dt.item().microsecond / 1000), - int(dt.item().microsecond % 1000), + dt_item.year, + dt_item.month, + dt_item.day, + dt_item.hour, + dt_item.minute, + dt_item.second, + int(dt_item.microsecond / 1000), + int(dt_item.microsecond % 1000), 0, ] converted_data = CDFepoch.compute(dt_to_convert) @@ -487,7 +490,7 @@ def breakdown_tt2000(tt2000: cdf_tt2000_type) -> np.ndarray: 999 ns is returned. """ - new_tt2000 = np.atleast_1d(tt2000).astype(np.longlong) + new_tt2000 = np.atleast_1d(tt2000).astype(np.int64) count = len(new_tt2000) toutcs = np.zeros((9, count), dtype=int) datxs = CDFepoch._LeapSecondsfromJ2000(new_tt2000) @@ -498,8 +501,8 @@ def breakdown_tt2000(tt2000: cdf_tt2000_type) -> np.ndarray: nanoSecsSinceJ2000[~post2000] += CDFepoch.T12hinNanoSecs nanoSecsSinceJ2000[~post2000] -= CDFepoch.dTinNanoSecs - secsSinceJ2000 = (nanoSecsSinceJ2000 / CDFepoch.SECinNanoSecsD).astype(np.longlong) - nansecs = (nanoSecsSinceJ2000 - secsSinceJ2000 * CDFepoch.SECinNanoSecs).astype(np.longlong) + secsSinceJ2000 = (nanoSecsSinceJ2000 / CDFepoch.SECinNanoSecsD).astype(np.int64) + nansecs = (nanoSecsSinceJ2000 - (secsSinceJ2000 * CDFepoch.SECinNanoSecs)).astype(np.int64) # type: ignore posNanoSecs = new_tt2000 > 0 secsSinceJ2000[posNanoSecs] -= 32 @@ -512,7 +515,7 @@ def breakdown_tt2000(tt2000: cdf_tt2000_type) -> np.ndarray: t2s = secsSinceJ2000 * CDFepoch.SECinNanoSecs + nansecs - post72 = datxs[:, 0] > 0 + post72: np.ndarray = datxs[:, 0] > 0 secsSinceJ2000[post72] -= datxs[post72, 0].astype(int) epochs = CDFepoch.J2000Since0AD12hSec + secsSinceJ2000 diff --git a/cdflib/xarray/cdf_to_xarray.py b/cdflib/xarray/cdf_to_xarray.py index 5c8d84a..5236376 100644 --- a/cdflib/xarray/cdf_to_xarray.py +++ b/cdflib/xarray/cdf_to_xarray.py @@ -307,7 +307,7 @@ def _convert_fillvals_to_nan(var_data: npt.NDArray, var_atts: Dict[str, Any], va new_data[new_data == var_atts["FILLVAL"]] = np.datetime64("nat") else: new_data[new_data == var_atts["FILLVAL"]] = np.nan - else: + elif new_data.size == 1: if new_data == var_atts["FILLVAL"]: if new_data.dtype.type == np.datetime64: new_data[new_data == var_atts["FILLVAL"]] = np.array(np.datetime64("nat")) diff --git a/cdflib/xarray/xarray_to_cdf.py b/cdflib/xarray/xarray_to_cdf.py index 562caec..28a3949 100644 --- a/cdflib/xarray/xarray_to_cdf.py +++ b/cdflib/xarray/xarray_to_cdf.py @@ -194,10 +194,12 @@ def _dtype_to_cdf_type(var: xr.DataArray, terminate_on_warning: bool = False) -> return STRINGS_TO_DATATYPES[cdf_data_type], element_size -def _dtype_to_fillval(var: xr.DataArray, terminate_on_warning: bool = False) -> Union[np.number, np.str_, np.datetime64]: +def _dtype_to_fillval( + var: xr.DataArray, terminate_on_warning: bool = False +) -> Union[np.number, np.str_, np.datetime64, np.complex128]: datatype, _ = _dtype_to_cdf_type(var, terminate_on_warning=terminate_on_warning) if datatype in DATATYPE_FILLVALS: - return DATATYPE_FILLVALS[datatype] # type: ignore + return DATATYPE_FILLVALS[datatype] # type: ignore[return-value] else: return np.str_(" ") @@ -247,50 +249,98 @@ def _verify_depend_dimensions( coordinate_variable_name: str, terminate_on_warning: bool = False, ) -> bool: - primary_data = np.array(dataset[primary_variable_name]) - coordinate_data = np.array(dataset[coordinate_variable_name]) + try: + primary_data = np.array(dataset[primary_variable_name]) + coordinate_data = np.array(dataset[coordinate_variable_name]) - if len(primary_data.shape) != 0 and len(coordinate_data.shape) == 0: - _warn_or_except( - f"ISTP Compliance Warning: {coordinate_variable_name} is listed as the DEPEND_{dimension_number} for variable {primary_variable_name}, but the dimensions do not match.", - terminate_on_warning, - ) - return False + if len(primary_data.shape) != 0 and len(coordinate_data.shape) == 0: + _warn_or_except( + f"ISTP Compliance Warning: {coordinate_variable_name} is listed as the DEPEND_{dimension_number} for variable {primary_variable_name}, but the dimensions do not match.", + terminate_on_warning, + ) + return False - if len(coordinate_data.shape) != 0 and len(primary_data.shape) == 0: - _warn_or_except( - f"ISTP Compliance Warning: {coordinate_variable_name} is listed as the DEPEND_{dimension_number} for variable {primary_variable_name}, but the dimensions do not match.", - terminate_on_warning, - ) - return False + if len(coordinate_data.shape) != 0 and len(primary_data.shape) == 0: + _warn_or_except( + f"ISTP Compliance Warning: {coordinate_variable_name} is listed as the DEPEND_{dimension_number} for variable {primary_variable_name}, but the dimensions do not match.", + terminate_on_warning, + ) + return False - if len(coordinate_data.shape) > 2: - _warn_or_except( - f"ISTP Compliance Warning: {coordinate_variable_name} has too many dimensions to be the DEPEND_{dimension_number} for variable {primary_variable_name}", - terminate_on_warning, - ) - return False - if len(coordinate_data.shape) == 2: - if primary_data.shape[0] != coordinate_data.shape[0]: + if len(coordinate_data.shape) > 2: _warn_or_except( - f"ISTP Compliance Warning: {coordinate_variable_name} is listed as the DEPEND_{dimension_number} for variable {primary_variable_name}, but the Epoch dimensions do not match.", + f"ISTP Compliance Warning: {coordinate_variable_name} has too many dimensions to be the DEPEND_{dimension_number} for variable {primary_variable_name}", terminate_on_warning, ) return False - if len(primary_data.shape) <= dimension_number: - _warn_or_except( - f"ISTP Compliance Warning: {coordinate_variable_name} is listed as the DEPEND_{dimension_number} for variable {primary_variable_name}, but {primary_variable_name} does not have that many dimensions", - terminate_on_warning, - ) - return False + if len(coordinate_data.shape) == 2: + if primary_data.shape[0] != coordinate_data.shape[0]: + _warn_or_except( + f"ISTP Compliance Warning: {coordinate_variable_name} is listed as the DEPEND_{dimension_number} for variable {primary_variable_name}, but the Epoch dimensions do not match.", + terminate_on_warning, + ) + return False - if primary_data.shape[dimension_number] != coordinate_data.shape[-1]: - _warn_or_except( - f"ISTP Compliance Warning: {coordinate_variable_name} is listed as the DEPEND_{dimension_number} for variable {primary_variable_name}, but the dimensions do not match.", - terminate_on_warning, - ) - return False + # All variables should have at the very least a size of dimension_number + # (i.e. a variable with a DEPEND_2 should have 2 dimensions) + if len(primary_data.shape) < dimension_number: + _warn_or_except( + f"ISTP Compliance Warning: {coordinate_variable_name} is listed as the DEPEND_{dimension_number} for variable {primary_variable_name}, but {primary_variable_name} does not have that many dimensions", + terminate_on_warning, + ) + return False + + # All variables with a DEPEND_0 should always have a shape size of at least dimension_number + 1 + # (i.e. a variable with a DEPEND_2 should have 2 dimensions, 2 for DEPEND_1 and DEPEND_2, and 1 for DEPEND_0) + if len(primary_data.shape) < dimension_number + 1: + if "VAR_TYPE" in dataset[primary_variable_name].attrs: + if dataset[primary_variable_name].attrs["VAR_TYPE"] == "data": + # Data variables should always have as many dimensions as their are DEPENDS + _warn_or_except( + f"ISTP Compliance Warning: {coordinate_variable_name} is listed as the DEPEND_{dimension_number} for variable {primary_variable_name}, but {primary_variable_name} does not have that many dimensions", + terminate_on_warning, + ) + return False + else: + for key in dataset[primary_variable_name].attrs: + if key.lower() == "depend_0": + # support_data variables with a DEPEND_0 should always match the dimension number + _warn_or_except( + f"ISTP Compliance Warning: {coordinate_variable_name} is listed as the DEPEND_{dimension_number} for variable {primary_variable_name}, but {primary_variable_name} does not have that many dimensions", + terminate_on_warning, + ) + return False + + # Check that the size of the dimension that DEPEND_{i} is refering to is + # also the same size of the DEPEND_{i}'s last dimension + for key in dataset[primary_variable_name].attrs: + if key.lower() == "depend_0": + if primary_data.shape[dimension_number] != coordinate_data.shape[-1]: + _warn_or_except( + f"ISTP Compliance Warning: {coordinate_variable_name} is listed as the DEPEND_{dimension_number} for variable {primary_variable_name}, but the dimensions do not match.", + terminate_on_warning, + ) + return False + else: + if primary_data.shape[dimension_number - 1] != coordinate_data.shape[-1]: + _warn_or_except( + f"ISTP Compliance Warning: {coordinate_variable_name} is listed as the DEPEND_{dimension_number} for variable {primary_variable_name}, but the dimensions do not match.", + terminate_on_warning, + ) + return False + except ISTPError as istp_e: + raise istp_e + except Exception as e: + if terminate_on_warning: + raise Exception( + f"Unknown error occured verifying {primary_variable_name}'s DEPEND_{dimension_number}, which is pointed to {coordinate_variable_name}. Error message: {e}" + ) + else: + print( + f"Unknown error occured verifying {primary_variable_name}'s DEPEND_{dimension_number}, which is pointed to {coordinate_variable_name}" + ) + return False return True @@ -333,7 +383,9 @@ def _dimension_checker(dataset: xr.Dataset, terminate_on_warning: bool = False) if depend_regex.match(att.lower()) and att != "DEPEND_0": if (dataset[var].attrs[att] in dataset) or (dataset[var].attrs[att] in dataset.coords): depend_i = dataset[var].attrs[att] - if _verify_depend_dimensions(dataset, int(att[-1]), var, depend_i): + if _verify_depend_dimensions( + dataset, int(att[-1]), var, depend_i, terminate_on_warning=terminate_on_warning + ): istp_depend_dimension_list.append(dataset[var].attrs[att]) else: _warn_or_except( @@ -424,7 +476,7 @@ def _epoch_checker(dataset: xr.Dataset, dim_vars: List[str], terminate_on_warnin # Ensure that the dimension is listed somewhere else in the dataset if potential_depend_0 in dataset or potential_depend_0 in dataset.coords: - if _verify_depend_dimensions(dataset, 0, var, potential_depend_0): + if _verify_depend_dimensions(dataset, 0, var, potential_depend_0, terminate_on_warning=terminate_on_warning): depend_0_list.append(potential_depend_0) time_varying_dimensions.append(var) else: diff --git a/mypy.ini b/mypy.ini index 439fda9..bb513b0 100644 --- a/mypy.ini +++ b/mypy.ini @@ -7,7 +7,7 @@ show_column_numbers = True warn_unused_configs = True warn_redundant_casts = True -warn_unused_ignores = True +warn_unused_ignores = False strict_equality = True strict_concatenate = True check_untyped_defs = True diff --git a/setup.cfg b/setup.cfg index 99aaa07..581e858 100644 --- a/setup.cfg +++ b/setup.cfg @@ -15,7 +15,6 @@ classifiers = Environment :: Console Intended Audience :: Science/Research Operating System :: OS Independent - Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 @@ -27,7 +26,7 @@ long_description = file: README.md long_description_content_type = text/markdown [options] -python_requires = >= 3.8 +python_requires = >= 3.9 include_package_data = True packages = cdflib install_requires = diff --git a/tests/test_xarray_istp_checkers.py b/tests/test_xarray_istp_checkers.py new file mode 100644 index 0000000..9e159f1 --- /dev/null +++ b/tests/test_xarray_istp_checkers.py @@ -0,0 +1,136 @@ +import os + +import numpy as np +import pytest +import xarray as xr + +from cdflib.xarray import xarray_to_cdf +from cdflib.xarray.xarray_to_cdf import ISTPError + +sample_global_attributes = { + "Project": "Hail Mary", + "Source_name": "Thin Air", + "Discipline": "None", + "Data_type": "counts", + "Descriptor": "Midichlorians in unicorn blood", + "Data_version": "3.14", + "Logical_file_id": "SEVENTEEN", + "PI_name": "Darth Vader", + "PI_affiliation": "Dark Side", + "TEXT": "AHHHHH", + "Instrument_type": "Banjo", + "Mission_group": "Impossible", + "Logical_source": ":)", + "Logical_source_description": ":(", +} +sample_variable_attributes = { + "CATDESC": "data", + "DISPLAY_TYPE": "spectrogram", + "FIELDNAM": "test", + "FORMAT": "test", + "UNITS": "test", + "VALIDMIN": 0, + "VALIDMAX": 10, + "FILLVAL": np.int64(-9223372036854775808), +} +sample_data_variable_attributes = sample_variable_attributes | {"VAR_TYPE": "data", "LABLAXIS": "test"} +sample_support_variable_attributes = sample_variable_attributes | {"VAR_TYPE": "support_data"} + + +def test_istp_dimension_attribute_checker(): + # Create a bare-minimum ISTP compliant file + pytest.importorskip("xarray") + + var_data = [[1, 2, 3], [1, 2, 3], [1, 2, 3]] + var_dims = ["epoch", "direction"] + data = xr.Variable(var_dims, var_data, sample_data_variable_attributes | {"DEPEND_0": "epoch", "DEPEND_1": "direction"}) + epoch_data = [1, 2, 3] + epoch_dims = ["epoch"] + epoch = xr.Variable(epoch_dims, epoch_data, sample_support_variable_attributes) + dir_data = [1, 2, 3] + dir_dims = ["direction"] + direction = xr.Variable(dir_dims, dir_data, sample_support_variable_attributes) + + ds = xr.Dataset(data_vars={"data": data, "epoch": epoch, "direction": direction}, attrs=sample_global_attributes) + + xarray_to_cdf(ds, "hello.cdf", auto_fix_depends=False, terminate_on_warning=True) + os.remove("hello.cdf") + + +def test_istp_dimension_attribute_checker_with_typo(): + # Put an extra "n" on the end of "DEPEND_1":"directionn" from the last test, + # and make sure it fails + pytest.importorskip("xarray") + + var_data = [[1, 2, 3], [1, 2, 3], [1, 2, 3]] + var_dims = ["epoch", "direction"] + data = xr.Variable(var_dims, var_data, sample_data_variable_attributes | {"DEPEND_0": "epoch", "DEPEND_1": "directionn"}) + epoch_data = [1, 2, 3] + epoch_dims = ["epoch"] + epoch = xr.Variable(epoch_dims, epoch_data, sample_support_variable_attributes) + dir_data = [1, 2, 3] + dir_dims = ["direction"] + direction = xr.Variable(dir_dims, dir_data, sample_support_variable_attributes) + + ds = xr.Dataset(data_vars={"data": data, "epoch": epoch, "direction": direction}, attrs=sample_global_attributes) + with pytest.raises(ISTPError): + xarray_to_cdf(ds, "hello.cdf", auto_fix_depends=False, terminate_on_warning=True) + + if os.path.exists("hello.cdf"): + os.remove("hello.cdf") + + +def test_istp_support_data_has_depends(): + # We're going to test that a support_data variable is allowed to have its own DEPEND_{i} + pytest.importorskip("xarray") + + var_data = [[1, 2, 3], [1, 2, 3], [1, 2, 3]] + var_dims = ["epoch", "direction"] + data = xr.Variable(var_dims, var_data, sample_data_variable_attributes | {"DEPEND_0": "epoch", "DEPEND_1": "direction"}) + support_data = [1, 2, 3] + support_dims = ["direction"] + support = xr.Variable(support_dims, support_data, sample_support_variable_attributes | {"DEPEND_1": "direction"}) + epoch_data = [1, 2, 3] + epoch_dims = ["epoch"] + epoch = xr.Variable(epoch_dims, epoch_data, sample_support_variable_attributes) + dir_data = [1, 2, 3] + dir_dims = ["direction"] + direction = xr.Variable(dir_dims, dir_data, sample_support_variable_attributes) + + ds = xr.Dataset( + data_vars={"data": data, "epoch": epoch, "direction": direction, "support": support}, attrs=sample_global_attributes + ) + + xarray_to_cdf(ds, "hello.cdf", auto_fix_depends=False, terminate_on_warning=True) + os.remove("hello.cdf") + + +def test_istp_support_data_has_depends_expected_failure(): + # We're going to mimic the previous test, but give support a DEPEND_0 of "epoch". + # This should make it fail, because it does not have enough dimensions + pytest.importorskip("xarray") + + var_data = [[1, 2, 3], [1, 2, 3], [1, 2, 3]] + var_dims = ["epoch", "direction"] + data = xr.Variable(var_dims, var_data, sample_data_variable_attributes | {"DEPEND_0": "epoch", "DEPEND_1": "direction"}) + support_data = [1, 2, 3] + support_dims = ["direction"] + support = xr.Variable( + support_dims, support_data, sample_support_variable_attributes | {"DEPEND_0": "epoch", "DEPEND_1": "direction"} + ) + epoch_data = [1, 2, 3] + epoch_dims = ["epoch"] + epoch = xr.Variable(epoch_dims, epoch_data, sample_support_variable_attributes) + dir_data = [1, 2, 3] + dir_dims = ["direction"] + direction = xr.Variable(dir_dims, dir_data, sample_support_variable_attributes) + + ds = xr.Dataset( + data_vars={"data": data, "epoch": epoch, "direction": direction, "support": support}, attrs=sample_global_attributes + ) + + with pytest.raises(ISTPError): + xarray_to_cdf(ds, "hello.cdf", auto_fix_depends=False, terminate_on_warning=True) + + if os.path.exists("hello.cdf"): + os.remove("hello.cdf") diff --git a/tox.ini b/tox.ini index 05820eb..2fe49ca 100644 --- a/tox.ini +++ b/tox.ini @@ -1,9 +1,9 @@ [tox] min_version = 4.0 envlist = - py{38,39,310,311,312}{,-online} - py38-oldestdeps - py312-devdeps + py{39,310,311,312,313}{,-online} + #py39-oldestdeps + py313-devdeps build_docs [testenv]