diff --git a/warc/arc.py b/warc/arc.py index 5889587..d9e25b7 100644 --- a/warc/arc.py +++ b/warc/arc.py @@ -11,6 +11,7 @@ import StringIO import warnings +from . import gzip2 from .utils import CaseInsensitiveDict ARC1_HEADER_RE = re.compile('(?P\S*)\s(?P\S*)\s(?P\S*)\s(?P\S*)\s(?P\S*)') @@ -206,7 +207,7 @@ def __str__(self): class ARCFile(object): - def __init__(self, filename=None, mode=None, fileobj=None, version = None, file_headers = {}): + def __init__(self, filename=None, mode=None, fileobj=None, compress=None, version = None, file_headers = {}): """ Initialises a file like object that can be used to read or write Arc files. Works for both version 1 or version 2. @@ -251,6 +252,13 @@ def __init__(self, filename=None, mode=None, fileobj=None, version = None, file_ """ if fileobj is None: fileobj = __builtin__.open(filename, mode or "rb") + mode = fileobj.mode + # initiaize compress based on filename, if not already specified + if compress is None and filename and filename.endswith(".gz"): + compress = True + + if compress: + fileobj = gzip2.GzipFile(fileobj=fileobj, mode=mode) self.fileobj = fileobj if version != None and int(version) not in (1, 2):