Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 50 additions & 10 deletions cdflib/cdfwrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,30 +870,33 @@ def _write_var_attrs(self, f: io.BufferedWriter, varNum: int, var_attrs: Dict[st
if items == 2:
dataType = self._datatype_token(entry[1])

# Handle user setting datatype
if dataType > 0:
# CDF data type defined in entry
data = entry[0]
if self._checklistofNums(data):
# All are numbers
# Data needs no pre-processing and is good to go
if hasattr(data, "__len__") and not isinstance(data, str):
numElems = len(data)
else:
numElems = 1
else:
# Then string(s) -- either in CDF_type or epoch in string(s)
# Data needs some sort of pre-processing to proceed
if dataType == self.CDF_CHAR or dataType == self.CDF_UCHAR:
if hasattr(data, "__len__") and not isinstance(data, str):
# Reformat strings
items = len(data)
odata = data
data = ""
for x in range(0, items):
if x > 0:
data += "\\N "
data += odata[x]
data += str(odata[x])
else:
data = odata[x]
data = str(odata[x])
numElems = len(data)
elif dataType == self.CDF_EPOCH or dataType == self.CDF_EPOCH16 or dataType == self.CDF_TIME_TT2000:
# Convert data to CDF time
cvalue = []
if hasattr(data, "__len__") and not isinstance(data, str):
numElems = len(data)
Expand All @@ -903,7 +906,22 @@ def _write_var_attrs(self, f: io.BufferedWriter, varNum: int, var_attrs: Dict[st
else:
data = cdfepoch.CDFepoch.parse(data)
numElems = 1
else:
elif isinstance(data, str):
# One possibility is that the user wants to convert a string to a number
numElems = 1
data = np.array(float(data))
else:
# The final possibility I can think of is that the user wants to convert a list of strings to a list of numbers
try:
numElems = 1
data = np.array([float(item) for item in data])
except:
logger.warning(
f"Cannot determine how to convert {str(data)} to specified type of {dataType}. Ignoring the specified datatype, and continuing."
)
dataType = 0

if dataType == 0:
# No data type defined...
data = entry
if hasattr(data, "__len__") and not isinstance(data, str):
Expand All @@ -913,9 +931,9 @@ def _write_var_attrs(self, f: io.BufferedWriter, varNum: int, var_attrs: Dict[st
for x in range(0, len(entry)):
if x > 0:
data += "\\N "
data += entry[x]
data += str(entry[x])
else:
data = entry[x]
data = str(entry[x])
numElems = len(data)
else:
numElems, dataType = self._datatype_define(entry)
Expand Down Expand Up @@ -1750,7 +1768,7 @@ def _write_aedr(
value_size = 1
cdata = "\x00".encode()
else:
value_size = len(cdata)
value_size = recs * self._datatype_size(dataType, numElems)
block_size = value_size + 56
aedr = bytearray(block_size)
aedr[0:8] = struct.pack(">q", block_size)
Expand Down Expand Up @@ -2304,6 +2322,17 @@ def _convert_data(self, data_type: int, num_elems: int, num_values: int, indata:
odata += adata.ljust(num_elems, "\x00")
recs = int((size * size2) / num_values)
return recs, odata.encode()
elif all(isinstance(item, str) for item in indata):
# Attempt to convert to a numpy array of numbers
try:
return self._numpy_to_bytes(data_type, num_values, num_elems, np.array([float(item) for item in indata]))
except:
# Do the best we can, create bytes from the string.
# It will probably come out to be jibberish
outdata = ("".join(indata)).ljust(num_elems, "\x00").encode()
recs = int(len(outdata) / recSize)
return recs, outdata

else:
try:
return self._numpy_to_bytes(data_type, num_values, num_elems, np.array(indata))
Expand Down Expand Up @@ -2366,8 +2395,19 @@ def _convert_data(self, data_type: int, num_elems: int, num_values: int, indata:
return recs, odata.encode()
else:
return self._numpy_to_bytes(data_type, num_values, num_elems, indata)
elif isinstance(indata, str):
elif isinstance(indata, str) and (data_type == self.CDF_CHAR or data_type == self.CDF_UCHAR):
# Just convert the string directly to bytes
return 1, indata.ljust(num_elems, "\x00").encode()
elif isinstance(indata, str) and data_type != self.CDF_CHAR and data_type == self.CDF_UCHAR:
# Try to convert the single string to a numerical type.
try:
return self._numpy_to_bytes(data_type, num_values, num_elems, np.array([float(indata)]))
except:
# Do the best we can, create bytes from the string.
# It will probably come out to be jibberish
outdata = indata.ljust(num_elems, "\x00").encode()
recs = int(len(outdata) / recSize)
return recs, outdata
else:
try:
# Try converting the data to numpy
Expand Down Expand Up @@ -2398,7 +2438,7 @@ def _convert_data(self, data_type: int, num_elems: int, num_values: int, indata:
else:
return recs, struct.pack(form, indata)
except struct.error:
raise ValueError("Unable to convert data to CDF format, data " "object cannot be of type string.")
raise ValueError("Unable to convert data to CDF format, data object cannot be of type string.")

def _num_values(self, zVar: bool, varNum: int) -> int:
"""
Expand Down
43 changes: 43 additions & 0 deletions tests/test_cdfwrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pytest

from cdflib import cdfread, cdfwrite
from cdflib.xarray import cdf_to_xarray

R = Path(__file__).parent
fnbasic = "testing.cdf"
Expand Down Expand Up @@ -639,3 +640,45 @@ def test_convert_data_error(tmp_path):
with pytest.raises(ValueError):
# Data from list of strings with dimension "epoch"
cdf._convert_data(51, 1, 1, indata)


def test_string_input_but_number_type(tmp_path):
# This small example used to create a corrupted CDF file.
# Because the FILLVAL was input as a string, but it is told to be a double
cdf = cdfwrite.CDF(tmp_path / "test.cdf")
var_data = np.random.rand(5, 3) * 30
var_spec = {
"Variable": "temperature",
"Data_Type": 45,
"Num_Elements": 1,
"Rec_Vary": False,
"Dim_Sizes": [5, 3],
"Compress": 0,
}
var_att_dict = {"FILLVAL": [np.str_("12"), "CDF_DOUBLE"]}
cdf.write_var(var_spec, var_attrs=var_att_dict, var_data=var_data)
cdf.close()

# Reading it back in would cause an error
cdf_to_xarray(tmp_path / "test.cdf")


def test_array_string_input_but_number_type(tmp_path):
# This small example used to create a corrupted CDF file.
# Because the FILLVAL was input as a string, but it is told to be a double
cdf = cdfwrite.CDF(tmp_path / "test.cdf")
var_data = np.random.rand(5, 3) * 30
var_spec = {
"Variable": "temperature",
"Data_Type": 45,
"Num_Elements": 1,
"Rec_Vary": False,
"Dim_Sizes": [5, 3],
"Compress": 0,
}
var_att_dict = {"FILLVAL": [np.array([np.str_("12"), np.str_("13")]), "CDF_DOUBLE"]}
cdf.write_var(var_spec, var_attrs=var_att_dict, var_data=var_data)
cdf.close()

# Reading it back in would cause an error
cdf_to_xarray(tmp_path / "test.cdf")
Loading