diff --git a/nibabel/gifti/gifti.py b/nibabel/gifti/gifti.py index c3199064b5..60b6aae455 100644 --- a/nibabel/gifti/gifti.py +++ b/nibabel/gifti/gifti.py @@ -29,7 +29,7 @@ class GiftiMetaData(xml.XmlSerializable): the list self.data """ def __init__(self, nvpair=None): self.data = [] - if not nvpair is None: + if nvpair is not None: self.data.append(nvpair) @classmethod @@ -296,7 +296,7 @@ def from_array(klass, cda.intent = intent_codes.code[intent] cda.encoding = gifti_encoding_codes.code[encoding] cda.endian = gifti_endian_codes.code[endian] - if not coordsys is None: + if coordsys is not None: cda.coordsys = coordsys cda.ind_ord = array_index_order_codes.code[ordering] cda.meta = GiftiMetaData.from_dict(meta) @@ -371,7 +371,7 @@ def print_summary(self): print('Endian: ', gifti_endian_codes.specs[self.endian]) print('ExternalFileName: ', self.ext_fname) print('ExternalFileOffset: ', self.ext_offset) - if not self.coordsys is None: + if self.coordsys is not None: print('----') print('Coordinate System:') print(self.coordsys.print_summary()) @@ -386,14 +386,44 @@ def metadata(self): return self.meta.metadata -class GiftiImage(FileBasedImage, xml.XmlSerializable): +class GiftiImage(xml.XmlSerializable, FileBasedImage): + """ + The Gifti spec suggests using the following suffixes to your + filename when saving each specific type of data: + + .gii + Generic GIFTI File + .coord.gii + Coordinates + .func.gii + Functional + .label.gii + Labels + .rgba.gii + RGB or RGBA + .shape.gii + Shape + .surf.gii + Surface + .tensor.gii + Tensors + .time.gii + Time Series + .topo.gii + Topology + + The Gifti file is stored in endian convention of the current machine. + """ valid_exts = ('.gii',) files_types = (('image', '.gii'),) def __init__(self, header=None, extra=None, file_map=None, meta=None, labeltable=None, darrays=None, version="1.0"): - FileBasedImage.__init__(self, header=header, extra=extra, - file_map=file_map) + super(GiftiImage, self).__init__(header=header, extra=extra, + file_map=file_map) + # placed here temporarily for git diff purposes + from .parse_gifti_fast import GiftiImageParser + GiftiImage.parser = GiftiImageParser if darrays is None: darrays = [] @@ -501,7 +531,7 @@ def getArraysFromIntent(self, intent): def print_summary(self): print('----start----') - print('Source filename: ', self.filename) + print('Source filename: ', self.get_filename()) print('Number of data arrays: ', self.numDA) print('Version: ', self.version) if self.meta is not None: @@ -536,22 +566,6 @@ def to_xml(self, enc='utf-8'): """ + xml.XmlSerializable.to_xml(self, enc) - @classmethod - def from_file_map(klass, file_map): - """ Load a Gifti image from a file_map - - Parameters - file_map : string - - Returns - ------- - img : GiftiImage - Returns a GiftiImage - """ - from .parse_gifti_fast import parse_gifti_file - return parse_gifti_file( - fptr=file_map['image'].get_prepare_fileobj('rb')) - def to_file_map(self, file_map=None): """ Save the current image to the specified file_map @@ -562,40 +576,31 @@ def to_file_map(self, file_map=None): Returns ------- None - - Notes - ----- - We write all files with utf-8 encoding, and specify this at the top of - the XML file with the ``encoding`` attribute. - - The Gifti spec suggests using the following suffixes to your - filename when saving each specific type of data: - - .gii - Generic GIFTI File - .coord.gii - Coordinates - .func.gii - Functional - .label.gii - Labels - .rgba.gii - RGB or RGBA - .shape.gii - Shape - .surf.gii - Surface - .tensor.gii - Tensors - .time.gii - Time Series - .topo.gii - Topology - - The Gifti file is stored in endian convention of the current machine. """ - # Our giftis are always utf-8 encoded - see GiftiImage.to_xml if file_map is None: file_map = self.file_map f = file_map['image'].get_prepare_fileobj('wb') f.write(self.to_xml()) + + @classmethod + def from_file_map(klass, file_map, buffer_size=35000000): + """ Load a Gifti image from a file_map + + Parameters + file_map : string + + Returns + ------- + img : GiftiImage + Returns a GiftiImage + """ + parser = klass.parser(buffer_size=buffer_size) + parser.parse(fptr=file_map['image'].get_prepare_fileobj('rb')) + img = parser.img + return img + + @classmethod + def from_filename(klass, filename, buffer_size=35000000): + file_map = klass.filespec_to_file_map(filename) + img = klass.from_file_map(file_map, buffer_size=buffer_size) + return img diff --git a/nibabel/gifti/parse_gifti_fast.py b/nibabel/gifti/parse_gifti_fast.py index aa3995b6b7..5a5b92856a 100644 --- a/nibabel/gifti/parse_gifti_fast.py +++ b/nibabel/gifti/parse_gifti_fast.py @@ -13,19 +13,16 @@ import warnings import zlib from ..externals.six import StringIO -from xml.parsers.expat import ParserCreate, ExpatError import numpy as np -from ..nifti1 import data_type_codes, xform_codes, intent_codes from .gifti import (GiftiMetaData, GiftiImage, GiftiLabel, GiftiLabelTable, GiftiNVPairs, GiftiDataArray, GiftiCoordSystem) from .util import (array_index_order_codes, gifti_encoding_codes, gifti_endian_codes) - - -DEBUG_PRINT = False +from ..nifti1 import data_type_codes, xform_codes, intent_codes +from ..xmlutils import XmlParser def read_data_block(encoding, endian, ordering, datatype, shape, data): @@ -37,8 +34,8 @@ def read_data_block(encoding, endian, ordering, datatype, shape, data): c = StringIO(data) da = np.loadtxt(c) da = da.astype(data_type_codes.type[datatype]) - # independent of the endianness - return da + return da # independent of the endianness + elif enclabel == 'B64BIN': # GIFTI_ENCODING_B64BIN dec = base64.b64decode(data.encode('ascii')) @@ -47,6 +44,7 @@ def read_data_block(encoding, endian, ordering, datatype, shape, data): newarr = np.fromstring(dec, dtype=dt) if len(newarr.shape) != len(sh): newarr = newarr.reshape(sh, order=ord) + elif enclabel == 'B64GZ': # GIFTI_ENCODING_B64GZ # convert to bytes array for python 3.2 @@ -58,27 +56,31 @@ def read_data_block(encoding, endian, ordering, datatype, shape, data): newarr = np.fromstring(zdec, dtype=dt) if len(newarr.shape) != len(sh): newarr = newarr.reshape(sh, order=ord) + elif enclabel == 'External': # GIFTI_ENCODING_EXTBIN raise NotImplementedError("In what format are the external files?") + else: return 0 + # check if we need to byteswap required_byteorder = gifti_endian_codes.byteorder[endian] if (required_byteorder in ('big', 'little') and - required_byteorder != sys.byteorder): + required_byteorder != sys.byteorder): newarr = newarr.byteswap() return newarr -class Outputter(object): +class GiftiImageParser(XmlParser): - def __init__(self): - self.initialize() + def __init__(self, encoding=None, buffer_size=35000000, verbose=0): + super(GiftiImageParser, self).__init__(encoding=encoding, + buffer_size=buffer_size, + verbose=verbose) + # output + self.img = None - def initialize(self): - """ Initialize outputter - """ # finite state machine stack self.fsm_state = [] @@ -95,15 +97,15 @@ def initialize(self): # where to write CDATA: self.write_to = None - self.img = None # Collecting char buffer fragments self._char_blocks = None def StartElementHandler(self, name, attrs): self.flush_chardata() - if DEBUG_PRINT: + if self.verbose > 0: print('Start element:\n\t', repr(name), attrs) + if name == 'GIFTI': # create gifti image self.img = GiftiImage() @@ -111,33 +113,35 @@ def StartElementHandler(self, name, attrs): self.img.version = attrs['Version'] if 'NumberOfDataArrays' in attrs: self.expected_numDA = int(attrs['NumberOfDataArrays']) - self.fsm_state.append('GIFTI') + elif name == 'MetaData': self.fsm_state.append('MetaData') - # if this metadata tag is first, create self.img.meta if len(self.fsm_state) == 2: self.meta_global = GiftiMetaData() else: # otherwise, create darray.meta self.meta_da = GiftiMetaData() + elif name == 'MD': self.nvpair = GiftiNVPairs() self.fsm_state.append('MD') + elif name == 'Name': if self.nvpair is None: raise ExpatError - else: - self.write_to = 'Name' + self.write_to = 'Name' + elif name == 'Value': if self.nvpair is None: raise ExpatError - else: - self.write_to = 'Value' + self.write_to = 'Value' + elif name == 'LabelTable': self.lata = GiftiLabelTable() self.fsm_state.append('LabelTable') + elif name == 'Label': self.label = GiftiLabel() if "Index" in attrs: @@ -153,6 +157,7 @@ def StartElementHandler(self, name, attrs): if "Alpha" in attrs: self.label.alpha = float(attrs["Alpha"]) self.write_to = 'Label' + elif name == 'DataArray': self.da = GiftiDataArray() if "Intent" in attrs: @@ -180,32 +185,35 @@ def StartElementHandler(self, name, attrs): self.da.ext_offset = attrs["ExternalFileOffset"] self.img.darrays.append(self.da) self.fsm_state.append('DataArray') + elif name == 'CoordinateSystemTransformMatrix': self.coordsys = GiftiCoordSystem() self.img.darrays[-1].coordsys = self.coordsys self.fsm_state.append('CoordinateSystemTransformMatrix') + elif name == 'DataSpace': if self.coordsys is None: raise ExpatError - else: - self.write_to = 'DataSpace' + self.write_to = 'DataSpace' + elif name == 'TransformedSpace': if self.coordsys is None: raise ExpatError - else: - self.write_to = 'TransformedSpace' + self.write_to = 'TransformedSpace' + elif name == 'MatrixData': if self.coordsys is None: raise ExpatError - else: - self.write_to = 'MatrixData' + self.write_to = 'MatrixData' + elif name == 'Data': self.write_to = 'Data' def EndElementHandler(self, name): self.flush_chardata() - if DEBUG_PRINT: + if self.verbose > 0: print('End element:\n\t', repr(name)) + if name == 'GIFTI': if hasattr(self, 'expected_numDA') and self.expected_numDA != self.img.numDA: warnings.warn("Actual # of data arrays does not match " @@ -214,6 +222,7 @@ def EndElementHandler(self, name): # remove last element of the list self.fsm_state.pop() # assert len(self.fsm_state) == 0 + elif name == 'MetaData': self.fsm_state.pop() if len(self.fsm_state) == 1: @@ -224,6 +233,7 @@ def EndElementHandler(self, name): else: self.img.darrays[-1].meta = self.meta_da self.meta_da = None + elif name == 'MD': self.fsm_state.pop() if self.meta_global is not None and self.meta_da is None: @@ -232,28 +242,24 @@ def EndElementHandler(self, name): self.meta_da.data.append(self.nvpair) # remove reference self.nvpair = None + elif name == 'LabelTable': self.fsm_state.pop() # add labeltable self.img.labeltable = self.lata self.lata = None + elif name == 'DataArray': self.fsm_state.pop() + elif name == 'CoordinateSystemTransformMatrix': self.fsm_state.pop() self.coordsys = None - elif name == 'DataSpace': - self.write_to = None - elif name == 'TransformedSpace': - self.write_to = None - elif name == 'MatrixData': - self.write_to = None - elif name == 'Name': - self.write_to = None - elif name == 'Value': - self.write_to = None - elif name == 'Data': + + elif name in ['DataSpace', 'TransformedSpace', 'MatrixData', + 'Name', 'Value', 'Data']: self.write_to = None + elif name == 'Label': self.lata.labels.append(self.label) self.label = None @@ -283,24 +289,30 @@ def flush_chardata(self): data = ''.join(self._char_blocks) # Reset the char collector self._char_blocks = None + # Process data if self.write_to == 'Name': data = data.strip() self.nvpair.name = data + elif self.write_to == 'Value': data = data.strip() self.nvpair.value = data + elif self.write_to == 'DataSpace': data = data.strip() self.coordsys.dataspace = xform_codes.code[data] + elif self.write_to == 'TransformedSpace': data = data.strip() self.coordsys.xformspace = xform_codes.code[data] + elif self.write_to == 'MatrixData': # conversion to numpy array c = StringIO(data) self.coordsys.xform = np.loadtxt(c) c.close() + elif self.write_to == 'Data': da_tmp = self.img.darrays[-1] da_tmp.data = read_data_block(da_tmp.encoding, da_tmp.endian, @@ -309,64 +321,27 @@ def flush_chardata(self): # update the endianness according to the # current machine setting self.endian = gifti_endian_codes.code[sys.byteorder] + elif self.write_to == 'Label': self.label.label = data.strip() @property def pending_data(self): " True if there is character data pending for processing " - return not self._char_blocks is None + return self._char_blocks is not None -def parse_gifti_file(fname=None, fptr=None, buffer_size=None): - """ Parse gifti file named `fname`, return image - - Parameters - ---------- - fname : str - filename of gifti file - buffer_size: None or int, optional - size of read buffer. None gives default of 35000000 unless on python < - 2.6, in which case it is read only in the parser. In that case values - other than None cause a ValueError on execution - - Returns - ------- - img : gifti image - """ - assert (fname is not None) + (fptr is not None) == 1, "Specify only fname or fptr, not both" - - if fptr is None: - with open(fname, 'rb') as datasource: - return parse_gifti_file(fptr=datasource, buffer_size=buffer_size) - else: - datasource = fptr +class Outputter(GiftiImageParser): + @np.deprecate_with_doc("Use GiftiImageParser instead.") + def __init__(self): + super(Outputter, self).__init__() - if buffer_size is None: - buffer_sz_val = 35000000 - else: - buffer_sz_val = buffer_size - - parser = ParserCreate() - parser.buffer_text = True - try: - parser.buffer_size = buffer_sz_val - except AttributeError: - if not buffer_size is None: - raise ValueError('Cannot set buffer size for parser') - HANDLER_NAMES = ['StartElementHandler', - 'EndElementHandler', - 'CharacterDataHandler'] - out = Outputter() - for name in HANDLER_NAMES: - setattr(parser, name, getattr(out, name)) - try: - parser.ParseFile(datasource) - except ExpatError: - print('An expat error occured while parsing the Gifti file.') - - # Reality check for pending data - assert out.pending_data is False - # update filename - out.img.filename = fname - return out.img + def initialize(self): + """ Initialize outputter + """ + self.__init__() + + +@np.deprecate_with_doc("Use GiftiImageParser.parse() instead.") +def parse_gifti_file(fname=None, fptr=None, buffer_size=None): + GiftiImageParser(buffer_size=buffer_size).parse(fname=fname, fptr=fptr) diff --git a/nibabel/gifti/tests/test_parse_gifti_fast.py b/nibabel/gifti/tests/test_parse_gifti_fast.py index f2470c707b..e9511b649b 100644 --- a/nibabel/gifti/tests/test_parse_gifti_fast.py +++ b/nibabel/gifti/tests/test_parse_gifti_fast.py @@ -16,6 +16,7 @@ import nibabel.gifti as gi from nibabel.gifti.util import gifti_endian_codes +from nibabel.gifti.parse_gifti_fast import Outputter, parse_gifti_file from nibabel.loadsave import load, save from nibabel.nifti1 import xform_codes from nibabel.tmpdirs import InTemporaryDirectory @@ -286,12 +287,14 @@ def test_labeltable_deprecations(): # Test deprecation with clear_and_catch_warnings() as w: - warnings.filterwarnings('once', category=DeprecationWarning) + warnings.filterwarnings('always', category=DeprecationWarning) assert_equal(lt, img.get_labeltable()) + assert_equal(len(w), 1) with clear_and_catch_warnings() as w: - warnings.filterwarnings('once', category=DeprecationWarning) + warnings.filterwarnings('always', category=DeprecationWarning) img.set_labeltable(lt) + assert_equal(len(w), 1) assert_equal(lt, img.labeltable) @@ -313,3 +316,24 @@ def test_parse_dataarrays(): load(fn) assert_equal(len(w), 1) assert_equal(img.numDA, 0) + + +def test_parse_deprecated(): + + # Test deprecation + with clear_and_catch_warnings() as w: + warnings.filterwarnings('always', category=DeprecationWarning) + op = Outputter() + assert_equal(len(w), 1) + op.initialize() # smoke test--no error. + + with clear_and_catch_warnings() as w: + warnings.filterwarnings('always', category=DeprecationWarning) + assert_raises(ValueError, parse_gifti_file) + assert_equal(len(w), 1) + + +def test_parse_with_buffersize(): + for buff_sz in [None, 1, 2**12]: + img2 = load(DATA_FILE2, buffer_size=buff_sz) + assert_equal(img2.darrays[0].data.shape, (143479, 1)) diff --git a/nibabel/xmlutils.py b/nibabel/xmlutils.py index fa23466006..11c41c230e 100644 --- a/nibabel/xmlutils.py +++ b/nibabel/xmlutils.py @@ -9,7 +9,12 @@ """ Thin layer around xml.etree.ElementTree, to abstract nibabel xml support. """ + +from io import BytesIO from xml.etree.ElementTree import Element, SubElement, tostring +from xml.parsers.expat import ParserCreate + +from .filebasedimages import FileBasedHeader, FileBasedImage class XmlSerializable(object): @@ -23,3 +28,85 @@ def to_xml(self, enc='utf-8'): """ Output should be an xml string with the given encoding. (default: utf-8)""" return tostring(self._to_xml_element(), enc) + + +class XmlBasedHeader(FileBasedHeader, XmlSerializable): + """ Basic wrapper around FileBasedHeader and XmlSerializable.""" + pass + + +class XmlParser(object): + """ Base class for defining how to parse xml-based image snippets. + + Image-specific parsers should define: + StartElementHandler + EndElementHandler + CharacterDataHandler + """ + + HANDLER_NAMES = ['StartElementHandler', + 'EndElementHandler', + 'CharacterDataHandler'] + + def __init__(self, encoding=None, buffer_size=35000000, verbose=0): + """ + Parameters + ---------- + encoding : str + string containing xml document + + buffer_size: None or int, optional + size of read buffer. None uses default buffer_size + from xml.parsers.expat. + + verbose : int, optional + amount of output during parsing (0=silent, by default). + """ + self.encoding = encoding + self.buffer_size = buffer_size + self.verbose = verbose + + def _create_parser(self): + """Internal function that allows subclasses to mess + with the underlying parser, if desired.""" + + parser = ParserCreate(encoding=self.encoding) # from xml package + parser.buffer_text = True + if self.buffer_size is not None: + parser.buffer_size = self.buffer_size + return parser + + def parse(self, string=None, fname=None, fptr=None): + """ + Parameters + ---------- + string : str + string containing xml document + + fname : str + file name of an xml document. + + fptr : file pointer + open file pointer to an xml documents + """ + if int(string is not None) + int(fptr is not None) + int(fname is not None) != 1: + raise ValueError('Exactly one of fptr, fname, string must be specified.') + + if string is not None: + fptr = BytesIO(string) + elif fname is not None: + fptr = open(fname, 'r') + + parser = self._create_parser() + for name in self.HANDLER_NAMES: + setattr(parser, name, getattr(self, name)) + parser.ParseFile(fptr) + + def StartElementHandler(self, name, attrs): + raise NotImplementedError + + def EndElementHandler(self, name): + raise NotImplementedError + + def CharacterDataHandler(self, data): + raise NotImplementedError