From beb10641868f8f3b73302ae2676d7f1f4d21d8c6 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 24 Nov 2020 14:18:15 +0000 Subject: [PATCH 1/6] Use native bytearray truncation --- h11/_connection.py | 1 - h11/_receivebuffer.py | 29 ++++++++++------------------- h11/tests/test_receivebuffer.py | 2 -- 3 files changed, 10 insertions(+), 22 deletions(-) diff --git a/h11/_connection.py b/h11/_connection.py index 410c4e9..c4baa80 100644 --- a/h11/_connection.py +++ b/h11/_connection.py @@ -425,7 +425,6 @@ def next_event(self): event = self._extract_next_receive_event() if event not in [NEED_DATA, PAUSED]: self._process_event(self.their_role, event) - self._receive_buffer.compress() if event is NEED_DATA: if len(self._receive_buffer) > self._max_incomplete_event_size: # 431 is "Request header fields too large" which is pretty diff --git a/h11/_receivebuffer.py b/h11/_receivebuffer.py index c56749a..8601f81 100644 --- a/h11/_receivebuffer.py +++ b/h11/_receivebuffer.py @@ -42,7 +42,6 @@ class ReceiveBuffer(object): def __init__(self): self._data = bytearray() # These are both absolute offsets into self._data: - self._start = 0 self._looked_at = 0 self._looked_for = b"" @@ -51,56 +50,48 @@ def __bool__(self): # for @property unprocessed_data def __bytes__(self): - return bytes(self._data[self._start :]) + return bytes(self._data) if sys.version_info[0] < 3: # version specific: Python 2 __str__ = __bytes__ __nonzero__ = __bool__ def __len__(self): - return len(self._data) - self._start - - def compress(self): - # Heuristic: only compress if it lets us reduce size by a factor - # of 2 - if self._start > len(self._data) // 2: - del self._data[: self._start] - self._looked_at -= self._start - self._start -= self._start + return len(self._data) def __iadd__(self, byteslike): self._data += byteslike return self def maybe_extract_at_most(self, count): - out = self._data[self._start : self._start + count] + out = self._data[:count] if not out: return None - self._start += len(out) + self._data[:count] = b"" return out def maybe_extract_until_next(self, needle): # Returns extracted bytes on success (advancing offset), or None on # failure if self._looked_for == needle: - search_start = max(self._start, self._looked_at - len(needle) + 1) + search_start = max(0, self._looked_at - len(needle) + 1) else: - search_start = self._start + search_start = 0 offset = self._data.find(needle, search_start) if offset == -1: self._looked_at = len(self._data) self._looked_for = needle return None new_start = offset + len(needle) - out = self._data[self._start : new_start] - self._start = new_start + out = self._data[:new_start] + self._data[:new_start] = b"" return out # HTTP/1.1 has a number of constructs where you keep reading lines until # you see a blank one. This does that, and then returns the lines. def maybe_extract_lines(self): - if self._data[self._start : self._start + 2] == b"\r\n": - self._start += 2 + if self._data[:2] == b"\r\n": + self._data[:2] = b"" return [] else: data = self.maybe_extract_until_next(b"\r\n\r\n") diff --git a/h11/tests/test_receivebuffer.py b/h11/tests/test_receivebuffer.py index 2be220b..f099333 100644 --- a/h11/tests/test_receivebuffer.py +++ b/h11/tests/test_receivebuffer.py @@ -12,7 +12,6 @@ def test_receivebuffer(): assert len(b) == 3 assert bytes(b) == b"123" - b.compress() assert bytes(b) == b"123" assert b.maybe_extract_at_most(2) == b"12" @@ -20,7 +19,6 @@ def test_receivebuffer(): assert len(b) == 1 assert bytes(b) == b"3" - b.compress() assert bytes(b) == b"3" assert b.maybe_extract_at_most(10) == b"3" From 6519c2184827a4cd34da90a620565b2525f9ce0c Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 24 Nov 2020 14:21:38 +0000 Subject: [PATCH 2/6] Drop comment that is no longer relevant --- h11/_receivebuffer.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/h11/_receivebuffer.py b/h11/_receivebuffer.py index 8601f81..580f82c 100644 --- a/h11/_receivebuffer.py +++ b/h11/_receivebuffer.py @@ -16,28 +16,6 @@ # of constantly copying # WARNING: # - I haven't benchmarked or profiled any of this yet. -# -# Note that starting in Python 3.4, deleting the initial n bytes from a -# bytearray is amortized O(n), thanks to some excellent work by Antoine -# Martin: -# -# https://bugs.python.org/issue19087 -# -# This means that if we only supported 3.4+, we could get rid of the code here -# involving self._start and self.compress, because it's doing exactly the same -# thing that bytearray now does internally. -# -# BUT unfortunately, we still support 2.7, and reading short segments out of a -# long buffer MUST be O(bytes read) to avoid DoS issues, so we can't actually -# delete this code. Yet: -# -# https://pythonclock.org/ -# -# (Two things to double-check first though: make sure PyPy also has the -# optimization, and benchmark to make sure it's a win, since we do have a -# slightly clever thing where we delay calling compress() until we've -# processed a whole event, which could in theory be slightly more efficient -# than the internal bytearray support.) class ReceiveBuffer(object): def __init__(self): self._data = bytearray() From 4a50931669eed2f1b0443408355b6d8b3007435d Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 24 Nov 2020 15:04:45 +0000 Subject: [PATCH 3/6] Address review comments --- h11/_receivebuffer.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/h11/_receivebuffer.py b/h11/_receivebuffer.py index 580f82c..018e760 100644 --- a/h11/_receivebuffer.py +++ b/h11/_receivebuffer.py @@ -45,7 +45,11 @@ def maybe_extract_at_most(self, count): out = self._data[:count] if not out: return None - self._data[:count] = b"" + # Note that front-truncation of bytesarray is amortized O(n), from + # Python 3.4 onwards, thanks to some excellent work by Antoine Martin: + # + # https://bugs.python.org/issue19087 + del self._data[:count] return out def maybe_extract_until_next(self, needle): @@ -62,14 +66,14 @@ def maybe_extract_until_next(self, needle): return None new_start = offset + len(needle) out = self._data[:new_start] - self._data[:new_start] = b"" + del self._data[:new_start] return out # HTTP/1.1 has a number of constructs where you keep reading lines until # you see a blank one. This does that, and then returns the lines. def maybe_extract_lines(self): if self._data[:2] == b"\r\n": - self._data[:2] = b"" + del self._data[:2] return [] else: data = self.maybe_extract_until_next(b"\r\n\r\n") From c0f7b32328393fe37370ca1bea8cce3dab9762de Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 24 Nov 2020 15:07:42 +0000 Subject: [PATCH 4/6] Fix up core contributor name in comment --- h11/_receivebuffer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/h11/_receivebuffer.py b/h11/_receivebuffer.py index 018e760..27a750f 100644 --- a/h11/_receivebuffer.py +++ b/h11/_receivebuffer.py @@ -46,7 +46,7 @@ def maybe_extract_at_most(self, count): if not out: return None # Note that front-truncation of bytesarray is amortized O(n), from - # Python 3.4 onwards, thanks to some excellent work by Antoine Martin: + # Python 3.4 onwards, thanks to some excellent work by Antoine Pitrou: # # https://bugs.python.org/issue19087 del self._data[:count] From 1c041019142d4d7ebb9b0cb750f46cb406d5291a Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 24 Nov 2020 15:55:15 +0000 Subject: [PATCH 5/6] Clean up comment --- h11/_receivebuffer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/h11/_receivebuffer.py b/h11/_receivebuffer.py index 27a750f..7564c06 100644 --- a/h11/_receivebuffer.py +++ b/h11/_receivebuffer.py @@ -45,8 +45,8 @@ def maybe_extract_at_most(self, count): out = self._data[:count] if not out: return None - # Note that front-truncation of bytesarray is amortized O(n), from - # Python 3.4 onwards, thanks to some excellent work by Antoine Pitrou: + # Note that front-truncation of bytesarray is O(1), from Python 3.4 + # onwards, thanks to some excellent work by Antoine Pitrou: # # https://bugs.python.org/issue19087 del self._data[:count] From 50e195f13e2181dd0a3fd2b0476626f307237fab Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 24 Nov 2020 20:34:49 +0000 Subject: [PATCH 6/6] Update h11/_receivebuffer.py Co-authored-by: Ran Benita --- h11/_receivebuffer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/h11/_receivebuffer.py b/h11/_receivebuffer.py index 7564c06..8cdb1a1 100644 --- a/h11/_receivebuffer.py +++ b/h11/_receivebuffer.py @@ -45,8 +45,8 @@ def maybe_extract_at_most(self, count): out = self._data[:count] if not out: return None - # Note that front-truncation of bytesarray is O(1), from Python 3.4 - # onwards, thanks to some excellent work by Antoine Pitrou: + # Note that front-truncation of bytesarray is amortized O(1), from + # Python 3.4 onwards, thanks to some excellent work by Antoine Pitrou: # # https://bugs.python.org/issue19087 del self._data[:count]