diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py index 0ec6da0ca5..023cebad35 100644 --- a/nibabel/arrayproxy.py +++ b/nibabel/arrayproxy.py @@ -86,7 +86,8 @@ class ArrayProxy(object): # Assume Fortran array memory layout order = 'F' - def __init__(self, file_like, spec, *, mmap=True, keep_file_open=None): + def __init__(self, file_like, spec, *, + mmap=True, keep_file_open=None, compression=None): """Initialize array proxy instance Parameters @@ -125,6 +126,7 @@ def __init__(self, file_like, spec, *, mmap=True, keep_file_open=None): If ``file_like`` is an open file handle, this setting has no effect. The default value (``None``) will result in the value of ``KEEP_FILE_OPEN_DEFAULT`` being used. + compression : { None, "gz", "bz2", "zst" }, optional, keyword only """ if mmap not in (True, False, 'c', 'r'): raise ValueError("mmap should be one of {True, False, 'c', 'r'}") @@ -147,10 +149,11 @@ def __init__(self, file_like, spec, *, mmap=True, keep_file_open=None): # Permit any specifier that can be interpreted as a numpy dtype self._dtype = np.dtype(self._dtype) self._mmap = mmap + self._compression = compression # Flags to keep track of whether a single ImageOpener is created, and # whether a single underlying file handle is created. self._keep_file_open, self._persist_opener = \ - self._should_keep_file_open(file_like, keep_file_open) + self._should_keep_file_open(file_like, keep_file_open, compression) self._lock = RLock() def __del__(self): @@ -172,7 +175,7 @@ def __setstate__(self, state): self.__dict__.update(state) self._lock = RLock() - def _should_keep_file_open(self, file_like, keep_file_open): + def _should_keep_file_open(self, file_like, keep_file_open, compression): """Called by ``__init__``. This method determines how to manage ``ImageOpener`` instances, @@ -248,7 +251,8 @@ def _should_keep_file_open(self, file_like, keep_file_open): if hasattr(file_like, 'read') and hasattr(file_like, 'seek'): return False, False # if the file is a gzip file, and we have_indexed_gzip, - have_igzip = openers.HAVE_INDEXED_GZIP and file_like.endswith('.gz') + have_igzip = openers.HAVE_INDEXED_GZIP and (compression in ("gz", ".gz") or + file_like.endswith('.gz')) persist_opener = keep_file_open or have_igzip return keep_file_open, persist_opener @@ -297,11 +301,15 @@ def _get_fileobj(self): if self._persist_opener: if not hasattr(self, '_opener'): self._opener = openers.ImageOpener( - self.file_like, keep_open=self._keep_file_open) + self.file_like, + keep_open=self._keep_file_open, + compression=self._compression) yield self._opener else: with openers.ImageOpener( - self.file_like, keep_open=False) as opener: + self.file_like, + keep_open=False, + compression=self._compression) as opener: yield opener def _get_unscaled(self, slicer): diff --git a/nibabel/openers.py b/nibabel/openers.py index 592419375d..73d14d2695 100644 --- a/nibabel/openers.py +++ b/nibabel/openers.py @@ -96,6 +96,7 @@ class Opener(object): specified, is `rb`. ``compresslevel``, if relevant, and not specified, is set from class variable ``default_compresslevel``. ``keep_open``, if relevant, and not specified, is ``False``. + compression : { None, "gz", "bz2", "zst" }, optional, keyworld only \*\*kwargs : keyword arguments passed to opening method when `fileish` is str. Change of defaults as for \*args @@ -119,13 +120,13 @@ class Opener(object): #: whether to ignore case looking for compression extensions compress_ext_icase = True - def __init__(self, fileish, *args, **kwargs): + def __init__(self, fileish, *args, compression=None, **kwargs): if self._is_fileobj(fileish): self.fobj = fileish self.me_opened = False self._name = None return - opener, arg_names = self._get_opener_argnames(fileish) + opener, arg_names = self._get_opener_argnames(fileish, compression) # Get full arguments to check for mode and compresslevel full_kwargs = kwargs.copy() n_args = len(args) @@ -151,7 +152,11 @@ def __init__(self, fileish, *args, **kwargs): self._name = fileish self.me_opened = True - def _get_opener_argnames(self, fileish): + def _get_opener_argnames(self, fileish, compression): + if compression is not None: + if compression[0] != '.': + compression = f'.{compression}' + return self.compress_ext_map[compression] _, ext = splitext(fileish) if self.compress_ext_icase: ext = ext.lower() diff --git a/nibabel/surfice.py b/nibabel/surfice.py new file mode 100644 index 0000000000..c77055fd3a --- /dev/null +++ b/nibabel/surfice.py @@ -0,0 +1,177 @@ +import io +import struct +import gzip +import numpy as np +from .wrapstruct import LabeledWrapStruct +from .dataobj_images import DataobjImage +from .arrayproxy import ArrayProxy + + +header_dtd = [ + ('magic', 'S2'), # 0; 0x5a4d (little endian) == "MZ" + ('attr', 'u2'), # 2; Attributes bitfield reporting stored data + ('nface', 'u4'), # 4; Number of faces + ('nvert', 'u4'), # 8; Number of vertices + ('nskip', 'u4'), # 12; Number of bytes to skip (for future header extensions) +] +header_dtype = np.dtype(header_dtd) + + +class MZ3Header(LabeledWrapStruct): + template_dtype = header_dtype + compression = False + + @classmethod + def from_header(klass, header=None, check=True): + if type(header) == klass: + obj = header.copy() + if check: + obj.check_fix() + return obj + + def copy(self): + ret = super().copy() + ret.compression = self.compression + ret._nscalar = self._nscalar + return ret + + @classmethod + def from_fileobj(klass, fileobj, endianness=None, check=True): + raw_str = fileobj.read(klass.template_dtype.itemsize) + compression = raw_str[:2] == b'\x1f\x8b' + if compression: + fileobj.seek(0) + with gzip.open(fileobj, 'rb') as fobj: + raw_str = fobj.read(klass.template_dtype.itemsize) + + hdr = klass(raw_str, endianness, check) + hdr.compression = compression + hdr._nscalar = hdr._calculate_nscalar(fileobj) + return hdr + + def get_data_offset(self): + _, attr, nface, nvert, nskip = self._structarr.tolist() + + isface = attr & 1 != 0 + isvert = attr & 2 != 0 + isrgba = attr & 4 != 0 + return 16 + nskip + isface * nface * 12 + isvert * nvert * 12 + isrgba * nvert * 12 + + def _calculate_nscalar(self, fileobj): + _, attr, nface, nvert, nskip = self._structarr.tolist() + + isscalar = attr & 8 != 0 + isdouble = attr & 16 != 0 + base_size = self.get_data_offset() + + nscalar = 0 + if isscalar or isdouble: + factor = nvert * (4 if isscalar else 8) + ret = fileobj.tell() + if self.compression: + fileobj.seek(-4, 2) + full_size_mod_4gb = struct.unpack('I', fileobj.read(4))[0] + full_size = full_size_mod_4gb + nscalar, remainder = divmod(full_size - base_size, factor) + for _ in range(5): + full_size += (1 << 32) + nscalar, remainder = divmod(full_size - base_size, factor) + if remainder == 0: + break + else: + fileobj.seek(0) + with gzip.open(fileobj, 'rb') as fobj: + fobj.seek(0, 2) + full_size = fobj.tell() + nscalar, remainder = divmod(full_size - base_size, factor) + if remainder: + raise ValueError("Apparent file size failure") + else: + fileobj.seek(0, 2) + full_size = fileobj.tell() + nscalar, remainder = divmod(full_size - base_size, factor) + if remainder: + raise ValueError("Apparent file size failure") + fileobj.seek(ret) + return nscalar + + @classmethod + def guessed_endian(klass, mapping): + return '<' + + @classmethod + def default_structarr(klass, endianness=None): + if endianness is not None and endian_codes[endianness] != '<': + raise ValueError('MZ3Header must always be little endian') + structarr = super().default_structarr(endianness=endianness) + structarr['magic'] = b"MZ" + return structarr + + @classmethod + def may_contain_header(klass, binaryblock): + if len(binaryblock) < 16: + return False + + # May be gzipped without changing extension + if binaryblock[:2] == b'\x1f\x8b': + with gzip.open(io.BytesIO(binaryblock), 'rb') as fobj: + binaryblock = fobj.read(16) + + hdr_struct = np.ndarray(shape=(), dtype=klass.template_dtype, buffer=binaryblock[:16]) + return hdr_struct['magic'] == b'MZ' + + def get_data_dtype(self): + if self._structarr['attr'] & 8: + return np.dtype('": + raise ValueError("Cannot set type to big-endian") + dt = np.dtype(datatype).newbyteorder("<") + + if dt == np.dtype('