Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion warc/arc.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import StringIO
import warnings

from . import gzip2
from .utils import CaseInsensitiveDict

ARC1_HEADER_RE = re.compile('(?P<url>\S*)\s(?P<ip_address>\S*)\s(?P<date>\S*)\s(?P<content_type>\S*)\s(?P<length>\S*)')
Expand Down Expand Up @@ -206,7 +207,7 @@ def __str__(self):


class ARCFile(object):
def __init__(self, filename=None, mode=None, fileobj=None, version = None, file_headers = {}):
def __init__(self, filename=None, mode=None, fileobj=None, compress=None, version = None, file_headers = {}):
"""
Initialises a file like object that can be used to read or
write Arc files. Works for both version 1 or version 2.
Expand Down Expand Up @@ -251,6 +252,13 @@ def __init__(self, filename=None, mode=None, fileobj=None, version = None, file_
"""
if fileobj is None:
fileobj = __builtin__.open(filename, mode or "rb")
mode = fileobj.mode
# initiaize compress based on filename, if not already specified
if compress is None and filename and filename.endswith(".gz"):
compress = True

if compress:
fileobj = gzip2.GzipFile(fileobj=fileobj, mode=mode)
self.fileobj = fileobj

if version != None and int(version) not in (1, 2):
Expand Down