Skip to content
This repository was archived by the owner on Mar 23, 2020. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 18 additions & 14 deletions warc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

class CaseInsensitiveDict(DictMixin):
"""Almost like a dictionary, but keys are case-insensitive.

>>> d = CaseInsensitiveDict(foo=1, Bar=2)
>>> d['foo']
1
Expand All @@ -29,16 +29,16 @@ class CaseInsensitiveDict(DictMixin):
def __init__(self, *args, **kwargs):
self._d = {}
self.update(*args, **kwargs)

def __setitem__(self, name, value):
self._d[name.lower()] = value

def __getitem__(self, name):
return self._d[name.lower()]

def __delitem__(self, name):
del self._d[name.lower()]

def __eq__(self, other):
return isinstance(other, CaseInsensitiveDict) and other._d == self._d

Expand All @@ -54,44 +54,48 @@ def keys(self):

class FilePart:
"""File interface over a part of file.
Takes a file and length to read from the file and returns a file-object

Takes a file and length to read from the file and returns a file-object
over that part of the file.
"""
def __init__(self, fileobj, length):
self.fileobj = fileobj
self.length = length
self.offset = 0
self.buf = ""
self.buf = self.fileobj.read(0)

def read(self, size=-1):
if size == -1:
return self._read(self.length)
else:
return self._read(size)

def _read(self, size):
if len(self.buf) >= size:
content = self.buf[:size]
self.buf = self.buf[size:]
else:
size = min(size, self.length - self.offset - len(self.buf))
content = self.buf + self.fileobj.read(size)
self.buf = ""
self.buf = type(self.buf)()
self.offset += len(content)

if isinstance(content, bytes):
content = content.decode("utf-8")

return content

def _unread(self, content):
self.buf = content + self.buf
self.offset -= len(content)

def readline(self):
chunks = []
chunk = self._read(1024)
while chunk and "\n" not in chunk:
chunks.append(chunk)
chunk = self._read(1024)

if "\n" in chunk:
index = chunk.index("\n")
self._unread(chunk[index+1:])
Expand Down
Loading