Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 60 additions & 19 deletions cdlparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,24 @@

Creator: Phil Bentley
"""
from __future__ import print_function

__version_info__ = (0, 0, 8, 'beta', 0)
__version__ = "%d.%d.%d-%s" % __version_info__[0:4]

import codecs
import sys, os, logging, types
import six
import re
import ply.lex as lex
from ply.lex import TOKEN
import ply.yacc as yacc
import netCDF4 as nc4
import numpy as np
from functools import reduce

if not six.PY2:
long = int

# default fill values for netCDF-3 data types (as defined in netcdf.h include file)
NC_FILL_BYTE = np.int8(-127)
Expand Down Expand Up @@ -171,9 +180,8 @@ def parse_file(self, cdlfile, ncfile=None) :
:returns: A handle to a netCDF4.Dataset object.
"""
self.cdlfile = cdlfile
f = open(cdlfile)
data = f.read() # FIXME: can we parse input w/o reading entire CDL file into memory?
f.close()
with codecs.open(cdlfile, encoding="utf-8") as f:
data = f.read() # FIXME: can we parse input w/o reading entire CDL file into memory?
return self.parse_text(data, ncfile=ncfile)

def parse_text(self, cdltext, ncfile=None) :
Expand All @@ -188,7 +196,8 @@ def parse_text(self, cdltext, ncfile=None) :
Alternatively, this can be done immediately upon completion of parsing by setting the
close_on_completion keyword argument to True when instantiating the CDLParser instance.

:param cdltext: String containing the CDL text to parse.
:param cdltext: String containing the CDL text to parse. Must be unicode str if containing
unicode.
:param ncfile: Optional pathname of the netCDF file to receive output.
:returns: A handle to a netCDF4.Dataset object.
"""
Expand Down Expand Up @@ -376,6 +385,7 @@ def t_DOUBLE_CONST(self, t) :
def t_SHORT_CONST(self, t) :
r'[+-]?([0-9]+|0[xX][0-9a-fA-F]+)[sS]'
#r'[+-]?[0-9]+[sS]|0[xX][0-9a-fA-F]+[sS]' # original regex in ncgen3.l file
t.value = fix_octal(t.value)
try :
int_val = int(eval(t.value[:-1]))
except :
Expand All @@ -391,6 +401,7 @@ def t_SHORT_CONST(self, t) :
def t_BYTE_CONST(self, t) :
#r'[+-]?[0-9]+[Bb]' # modified regex
#r'[+-]?[0-9]*[0-9][Bb]' # original regex in ncgen3.l file
t.value = fix_octal(t.value)
try :
if t.value[0] == "'" :
int_val = ord(eval(t.value))
Expand All @@ -411,14 +422,15 @@ def t_INT_CONST(self, t) :
r'[+-]?([1-9][0-9]*|0[xX]?[0-9a-fA-F]+|0)' # [Ll] suffix has been deprecated
#r'[+-]?([1-9][0-9]*|0)[lL]?' # original regex for decimal integers in ncgen3.l file
#r'0[xX]?[0-9a-fA-F]+[lL]?' # original regex for octal or hex integers in ncgen3.l file
t.value = fix_octal(t.value)
try :
long_val = long(eval(t.value))
except :
errmsg = "Bad integer constant: %s" % t.value
raise CDLContentError(errmsg)
if long_val < XDR_INT_MIN or long_val > XDR_INT_MAX :
errmsg = "Integer constant outside valid range (%d -> %d): %s" \
% (XDR_INT_MIN, XDR_INT_MAX, int_val)
% (XDR_INT_MIN, XDR_INT_MAX, long_val)
raise CDLContentError(errmsg)
else :
t.value = np.int32(long_val)
Expand Down Expand Up @@ -472,7 +484,7 @@ def p_dimdecl(self, p) :
| dimd EQUALS DOUBLE_CONST
| dimd EQUALS NC_UNLIMITED_K"""
dimname = ""
if isinstance(p[3], basestring) :
if isinstance(p[3], six.string_types) :
if p[3] == "unlimited" :
if self.rec_dimname :
raise CDLContentError("Only one UNLIMITED dimension is allowed.")
Expand Down Expand Up @@ -639,7 +651,7 @@ def p_datadecl(self, p) :
try :
self.write_var_data(var, arr)
self.logger.info("Wrote %d data value(s) for variable %s" % (len(arr), p[1]))
except Exception, exc :
except Exception as exc :
self.logger.error(str(exc))
raise

Expand Down Expand Up @@ -772,7 +784,7 @@ def write_var_data(self, var, arr) :
arrlen = len(arr)
varlen = var.size
if is_charvar and var.ndim > 0 :
varlen /= var.shape[-1]
varlen = varlen // var.shape[-1]
reclen = 0
self.logger.debug("Length of passed-in data array = %d" % arrlen)
if varlen : self.logger.debug("Expected length of variable = %d" % varlen)
Expand All @@ -782,7 +794,7 @@ def write_var_data(self, var, arr) :
if is_recvar :
rec_dimlen = len(self.ncdataset.dimensions[self.rec_dimname])
if rec_dimlen > 0 : # record dimension has been set to non-zero
reclen = varlen / rec_dimlen
reclen = varlen // rec_dimlen
else : # record dimension is still equal to zero
varlen = arrlen
reclen = 1
Expand All @@ -806,28 +818,28 @@ def write_var_data(self, var, arr) :
put_char_data(var, arr, reclen)
else :
put_numeric_data(var, arr, reclen)
except Exception, exc :
except Exception as exc :
errmsg = "Error attempting to write data array for variable %s\n" % var._name
errmsg += "Exception details are as follows:\n%s" % str(exc)
raise CDLContentError(errmsg)

def _lextest(self, data) :
"""private method - for test purposes only"""
self.lexer.input(data)
print "-----"
print("-----")
while 1 :
t = self.lexer.token()
if not t : break
print "type: %-15s\tvalue: %s" % (t.type, t.value)
print "-----"
print("type: %-15s\tvalue: %s" % (t.type, t.value))
print("-----")

#---------------------------------------------------------------------------------------------------
def put_numeric_data(var, arr, reclen=0) :
#---------------------------------------------------------------------------------------------------
"""Write numeric data array to netcdf variable."""
nparr = np.array(arr, dtype=var.dtype)
shape = list(var.shape)
if reclen : shape[0] = len(arr) / reclen
if reclen : shape[0] = len(arr) // reclen
nparr.shape = shape
var[:] = nparr

Expand All @@ -838,7 +850,7 @@ def put_char_data(var, arr, reclen=0) :
maxlen = var.shape[-1] if var.ndim > 0 else 1
nparr = str_list_to_char_arr(arr, maxlen)
shape = list(var.shape)
if reclen : shape[0] = len(arr) / reclen
if reclen : shape[0] = len(arr) // reclen
nparr.shape = shape
var[:] = nparr

Expand Down Expand Up @@ -892,14 +904,43 @@ def deescapify(name) :
i += 1
return newname


# Regex for finding escape sequences
ESCAPE_SEQUENCE_RE = re.compile(r'''
( \\U........ # 8-digit hex escapes
| \\u.... # 4-digit hex escapes
| \\x.. # 2-digit hex escapes
| \\[0-7]{1,3} # Octal escapes
| \\N\{[^}]+\} # Unicode characters by name
| \\[\\'"abfnrtv] # Single-character escapes
)''', re.UNICODE | re.VERBOSE)

#---------------------------------------------------------------------------------------------------
def expand_escapes(tstring) :
#---------------------------------------------------------------------------------------------------
"""
A Python version of ncgen's expand_escapes() function (see escapes.c). This function simply
uses the built-in string.decode() method.
Function to convert escapes to actual (unicode) characters. Fulfills the same purpose as
expand_escapes() in ncgen3/escapes.c or unescape() in ncgen/escapes.c.
Input string containing unicode must be a unicode string.
https://stackoverflow.com/a/24519338/2196270
"""
def decode_match(match):
return codecs.decode(match.group(0), 'unicode-escape')

return ESCAPE_SEQUENCE_RE.sub(decode_match, tstring)

#---------------------------------------------------------------------------------------------------
def fix_octal(octal_str) :
#---------------------------------------------------------------------------------------------------
"""
Fixes anything octal, including +/- prefix and letter suffix to use "0o"
"""
return tstring.decode('string_escape')
m = re.match(r"([+-]?)0(\d+.*)", octal_str)
if m:
# Make octal python 3 compatible
return m.group(1) + "0o" + m.group(2)
else:
return octal_str

#---------------------------------------------------------------------------------------------------
def get_default_fill_value(datatype) :
Expand All @@ -926,7 +967,7 @@ def main() :
"""Rudimentary main function - primarily for testing purposes at this point in time."""
debug = 0
if len(sys.argv) < 2 :
print "usage: python cdlparser.py cdlfile [keyword=value, ...]"
print("usage: python cdlparser.py cdlfile [keyword=value, ...]")
sys.exit(1)
cdlfile = sys.argv[1]
kwargs = {}
Expand Down
2 changes: 1 addition & 1 deletion test/test_charvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def tearDown(self) :

def test_scalar_variables(self) :
var = self.dataset.variables['letter']
self.assertTrue(var[:] == "X")
self.assertTrue(var[:] == b"X")

def test_non_scalar_variables(self) :
var = self.dataset.variables['regcodes']
Expand Down
7 changes: 5 additions & 2 deletions test/test_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ def setUp(self) :
variables:
float var1(dim1) ;
var1:att1 = "dummy attribute" ;
// FillValue necessary to enable masking in NETCDF3_CLASSIC right now.
// See https://github.com/Unidata/netcdf4-python/issues/725.
var1:_FillValue = 9.9692099683868690e+36;
// global attributes
:c1 = "foo" ; // with spaces
:c2="bar" ; // w/o spaces
Expand Down Expand Up @@ -111,13 +114,13 @@ def test_double_array(self) :

def test_dimensions(self) :
self.assertTrue(len(self.dataset.dimensions) == 1)
self.assertTrue(self.dataset.dimensions.keys()[0] == "dim1")
self.assertTrue('dim1' in self.dataset.dimensions.keys())
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, use of 'in' is the better idiom. Plus, the keys() method is redundant in both the original and new statements.

dim = self.dataset.dimensions['dim1']
self.assertTrue(len(dim) == 3)

def test_variables(self) :
self.assertTrue(len(self.dataset.variables) == 1)
self.assertTrue(self.dataset.variables.keys()[0] == "var1")
self.assertTrue("var1" in self.dataset.variables.keys())
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, use of 'in' is the better idiom. Plus, the keys() method is redundant in both the original and new statements.

var = self.dataset.variables['var1']
self.assertTrue(var.att1 == "dummy attribute")
data = var[:]
Expand Down