From eeadae13aa14773b5c231cd50e9fb324fa47b8e3 Mon Sep 17 00:00:00 2001 From: Rob Brackett Date: Fri, 15 Dec 2023 17:35:39 -0800 Subject: [PATCH] Fix flake8 errors and URL problem --- wayback/_client.py | 26 ++++++++++++-------------- wayback/tests/test_client.py | 14 ++++++-------- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/wayback/_client.py b/wayback/_client.py index 32c1680..be0006d 100644 --- a/wayback/_client.py +++ b/wayback/_client.py @@ -28,18 +28,16 @@ # Timeout) import time from typing import Generator, Optional -from urllib.parse import urljoin, urlparse +from urllib.parse import urlencode, urljoin, urlparse from urllib3 import PoolManager, HTTPResponse, Timeout as Urllib3Timeout from urllib3.connectionpool import HTTPConnectionPool -from urllib3.exceptions import (ClosedPoolError, - ConnectTimeoutError, +from urllib3.exceptions import (ConnectTimeoutError, DecodeError, MaxRetryError, ProtocolError, ReadTimeoutError, ProxyError, - TimeoutError, - ProtocolError) + TimeoutError) from warnings import warn from . import _utils, __version__ from ._models import CdxRecord, Memento @@ -350,7 +348,8 @@ def iter_byte_slices(data: bytes, size: int) -> Generator[bytes, None, None]: def parse_header_links(value): """Return a list of parsed link headers proxies. - i.e. Link: ; rel=front; type="image/jpeg",; rel=back;type="image/jpeg" + i.e. Link: ; rel=front; type="image/jpeg", + ; rel=back;type="image/jpeg" :rtype: list """ @@ -384,7 +383,6 @@ def parse_header_links(value): return links -from urllib.parse import urlencode # XXX: pretty much wholesale taken from requests. May need adjustment. # https://github.com/psf/requests/blob/147c8511ddbfa5e8f71bbf5c18ede0c4ceb3bba4/requests/models.py#L107-L134 def serialize_querystring(data): @@ -441,7 +439,7 @@ def _parse_content_type_header(header): index_of_equals = param.find("=") if index_of_equals != -1: key = param[:index_of_equals].strip(items_to_strip) - value = param[index_of_equals + 1 :].strip(items_to_strip) + value = param[index_of_equals + 1:].strip(items_to_strip) params_dict[key.lower()] = value return content_type, params_dict @@ -503,7 +501,7 @@ def __init__(self, raw: HTTPResponse, request_url: str) -> None: self.raw = raw self.status_code = raw.status self.headers = raw.headers - self.url = getattr(raw, 'url', request_url) + self.url = urljoin(request_url, getattr(raw, 'url', '')) self.encoding = get_encoding_from_headers(self.headers) # XXX: shortcut to essentially what requests does in `iter_content()`. @@ -539,9 +537,7 @@ def stream(self, chunk_size: int = 10 * 1024) -> Generator[bytes, None, None]: @property def content(self) -> bytes: if self._content is None: - logger.warning(f'Getting content!!!') self._content = b"".join(self.stream()) or b"" - logger.warning(f'Getting content DONE: "{self._content}"') return self._content @@ -612,7 +608,7 @@ def close(self, cache: bool = True) -> None: if self.raw: try: if cache: - # Inspired by requests: https://github.com/psf/requests/blob/eedd67462819f8dbf8c1c32e77f9070606605231/requests/sessions.py#L160-L163 + # Inspired by requests: https://github.com/psf/requests/blob/eedd67462819f8dbf8c1c32e77f9070606605231/requests/sessions.py#L160-L163 # noqa try: self.content except (DecodeError, ProtocolError, RuntimeError): @@ -838,7 +834,8 @@ def send(self, method, url, *, params=None, allow_redirects=True, timeout=-1) -> response.close(cache=False) # XXX: urllib3's MaxRetryError can wrap all the other errors, so # we should probably be checking `error.reason` on it. See how - # requests handles this: https://github.com/psf/requests/blob/a25fde6989f8df5c3d823bc9f2e2fc24aa71f375/src/requests/adapters.py#L502-L537 + # requests handles this: + # https://github.com/psf/requests/blob/a25fde6989f8df5c3d823bc9f2e2fc24aa71f375/src/requests/adapters.py#L502-L537 # # XXX: requests.RetryError used to be in our list of handleable # errors; it gets raised when urllib3 raises a MaxRetryError with a @@ -846,7 +843,8 @@ def send(self, method, url, *, params=None, allow_redirects=True, timeout=-1) -> # situation here... # # XXX: Consider how read-related exceptions need to be handled (or - # not). In requests: https://github.com/psf/requests/blob/a25fde6989f8df5c3d823bc9f2e2fc24aa71f375/src/requests/models.py#L794-L839 + # not). In requests: + # https://github.com/psf/requests/blob/a25fde6989f8df5c3d823bc9f2e2fc24aa71f375/src/requests/models.py#L794-L839 except WaybackSession.handleable_errors as error: response = getattr(error, 'response', None) if response is not None: diff --git a/wayback/tests/test_client.py b/wayback/tests/test_client.py index 644eef8..8f37219 100644 --- a/wayback/tests/test_client.py +++ b/wayback/tests/test_client.py @@ -1,9 +1,15 @@ from datetime import date, datetime, timezone, timedelta +from io import BytesIO from itertools import islice from pathlib import Path import time import pytest from unittest import mock +from urllib.parse import urlparse, ParseResult, parse_qs +from urllib3 import (HTTPConnectionPool, + HTTPResponse, + HTTPHeaderDict, + Timeout as Urllib3Timeout) from .support import create_vcr from .._client import (CdxRecord, Mode, @@ -217,10 +223,6 @@ def test_search_with_filter_tuple(): assert all(('feature' in v.url for v in versions)) -from io import BytesIO -from urllib.parse import urlparse, ParseResult, parse_qs -from urllib3 import HTTPConnectionPool, HTTPResponse, HTTPHeaderDict -import logging class Urllib3MockManager: def __init__(self) -> None: self.responses = [] @@ -769,7 +771,6 @@ def return_timeout(self, *args, **kwargs) -> HTTPResponse: >>> def test_timeout(self, mock_class): >>> assert urllib3.get('http://test.com', timeout=5).data == b'5' """ - logging.warning(f'Called with args={args}, kwargs={kwargs}') res = HTTPResponse( body=str(kwargs.get('timeout', None)).encode(), headers=HTTPHeaderDict(), @@ -778,9 +779,6 @@ def return_timeout(self, *args, **kwargs) -> HTTPResponse: return res -from urllib3 import Timeout as Urllib3Timeout - - class TestWaybackSession: def test_request_retries(self, urllib3_mock): urllib3_mock.get('http://test.com', [{'text': 'bad1', 'status_code': 503},