diff --git a/cdflib/cdfwrite.py b/cdflib/cdfwrite.py index 354c7ce..54a0e23 100644 --- a/cdflib/cdfwrite.py +++ b/cdflib/cdfwrite.py @@ -870,30 +870,33 @@ def _write_var_attrs(self, f: io.BufferedWriter, varNum: int, var_attrs: Dict[st if items == 2: dataType = self._datatype_token(entry[1]) + # Handle user setting datatype if dataType > 0: # CDF data type defined in entry data = entry[0] if self._checklistofNums(data): - # All are numbers + # Data needs no pre-processing and is good to go if hasattr(data, "__len__") and not isinstance(data, str): numElems = len(data) else: numElems = 1 else: - # Then string(s) -- either in CDF_type or epoch in string(s) + # Data needs some sort of pre-processing to proceed if dataType == self.CDF_CHAR or dataType == self.CDF_UCHAR: if hasattr(data, "__len__") and not isinstance(data, str): + # Reformat strings items = len(data) odata = data data = "" for x in range(0, items): if x > 0: data += "\\N " - data += odata[x] + data += str(odata[x]) else: - data = odata[x] + data = str(odata[x]) numElems = len(data) elif dataType == self.CDF_EPOCH or dataType == self.CDF_EPOCH16 or dataType == self.CDF_TIME_TT2000: + # Convert data to CDF time cvalue = [] if hasattr(data, "__len__") and not isinstance(data, str): numElems = len(data) @@ -903,7 +906,22 @@ def _write_var_attrs(self, f: io.BufferedWriter, varNum: int, var_attrs: Dict[st else: data = cdfepoch.CDFepoch.parse(data) numElems = 1 - else: + elif isinstance(data, str): + # One possibility is that the user wants to convert a string to a number + numElems = 1 + data = np.array(float(data)) + else: + # The final possibility I can think of is that the user wants to convert a list of strings to a list of numbers + try: + numElems = 1 + data = np.array([float(item) for item in data]) + except: + logger.warning( + f"Cannot determine how to convert {str(data)} to specified type of {dataType}. Ignoring the specified datatype, and continuing." + ) + dataType = 0 + + if dataType == 0: # No data type defined... data = entry if hasattr(data, "__len__") and not isinstance(data, str): @@ -913,9 +931,9 @@ def _write_var_attrs(self, f: io.BufferedWriter, varNum: int, var_attrs: Dict[st for x in range(0, len(entry)): if x > 0: data += "\\N " - data += entry[x] + data += str(entry[x]) else: - data = entry[x] + data = str(entry[x]) numElems = len(data) else: numElems, dataType = self._datatype_define(entry) @@ -1750,7 +1768,7 @@ def _write_aedr( value_size = 1 cdata = "\x00".encode() else: - value_size = len(cdata) + value_size = recs * self._datatype_size(dataType, numElems) block_size = value_size + 56 aedr = bytearray(block_size) aedr[0:8] = struct.pack(">q", block_size) @@ -2304,6 +2322,17 @@ def _convert_data(self, data_type: int, num_elems: int, num_values: int, indata: odata += adata.ljust(num_elems, "\x00") recs = int((size * size2) / num_values) return recs, odata.encode() + elif all(isinstance(item, str) for item in indata): + # Attempt to convert to a numpy array of numbers + try: + return self._numpy_to_bytes(data_type, num_values, num_elems, np.array([float(item) for item in indata])) + except: + # Do the best we can, create bytes from the string. + # It will probably come out to be jibberish + outdata = ("".join(indata)).ljust(num_elems, "\x00").encode() + recs = int(len(outdata) / recSize) + return recs, outdata + else: try: return self._numpy_to_bytes(data_type, num_values, num_elems, np.array(indata)) @@ -2366,8 +2395,19 @@ def _convert_data(self, data_type: int, num_elems: int, num_values: int, indata: return recs, odata.encode() else: return self._numpy_to_bytes(data_type, num_values, num_elems, indata) - elif isinstance(indata, str): + elif isinstance(indata, str) and (data_type == self.CDF_CHAR or data_type == self.CDF_UCHAR): + # Just convert the string directly to bytes return 1, indata.ljust(num_elems, "\x00").encode() + elif isinstance(indata, str) and data_type != self.CDF_CHAR and data_type == self.CDF_UCHAR: + # Try to convert the single string to a numerical type. + try: + return self._numpy_to_bytes(data_type, num_values, num_elems, np.array([float(indata)])) + except: + # Do the best we can, create bytes from the string. + # It will probably come out to be jibberish + outdata = indata.ljust(num_elems, "\x00").encode() + recs = int(len(outdata) / recSize) + return recs, outdata else: try: # Try converting the data to numpy @@ -2398,7 +2438,7 @@ def _convert_data(self, data_type: int, num_elems: int, num_values: int, indata: else: return recs, struct.pack(form, indata) except struct.error: - raise ValueError("Unable to convert data to CDF format, data " "object cannot be of type string.") + raise ValueError("Unable to convert data to CDF format, data object cannot be of type string.") def _num_values(self, zVar: bool, varNum: int) -> int: """ diff --git a/tests/test_cdfwrite.py b/tests/test_cdfwrite.py index 6e44b5e..9c69ad0 100755 --- a/tests/test_cdfwrite.py +++ b/tests/test_cdfwrite.py @@ -5,6 +5,7 @@ import pytest from cdflib import cdfread, cdfwrite +from cdflib.xarray import cdf_to_xarray R = Path(__file__).parent fnbasic = "testing.cdf" @@ -639,3 +640,45 @@ def test_convert_data_error(tmp_path): with pytest.raises(ValueError): # Data from list of strings with dimension "epoch" cdf._convert_data(51, 1, 1, indata) + + +def test_string_input_but_number_type(tmp_path): + # This small example used to create a corrupted CDF file. + # Because the FILLVAL was input as a string, but it is told to be a double + cdf = cdfwrite.CDF(tmp_path / "test.cdf") + var_data = np.random.rand(5, 3) * 30 + var_spec = { + "Variable": "temperature", + "Data_Type": 45, + "Num_Elements": 1, + "Rec_Vary": False, + "Dim_Sizes": [5, 3], + "Compress": 0, + } + var_att_dict = {"FILLVAL": [np.str_("12"), "CDF_DOUBLE"]} + cdf.write_var(var_spec, var_attrs=var_att_dict, var_data=var_data) + cdf.close() + + # Reading it back in would cause an error + cdf_to_xarray(tmp_path / "test.cdf") + + +def test_array_string_input_but_number_type(tmp_path): + # This small example used to create a corrupted CDF file. + # Because the FILLVAL was input as a string, but it is told to be a double + cdf = cdfwrite.CDF(tmp_path / "test.cdf") + var_data = np.random.rand(5, 3) * 30 + var_spec = { + "Variable": "temperature", + "Data_Type": 45, + "Num_Elements": 1, + "Rec_Vary": False, + "Dim_Sizes": [5, 3], + "Compress": 0, + } + var_att_dict = {"FILLVAL": [np.array([np.str_("12"), np.str_("13")]), "CDF_DOUBLE"]} + cdf.write_var(var_spec, var_attrs=var_att_dict, var_data=var_data) + cdf.close() + + # Reading it back in would cause an error + cdf_to_xarray(tmp_path / "test.cdf")