diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 4c2faedb..aa028df5 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -7,4 +7,3 @@ current_version = 0.12.6 files = setup.py cachecontrol/__init__.py docs/conf.py commit = True tag = True - diff --git a/.gitignore b/.gitignore index e2de968b..92826fa8 100644 --- a/.gitignore +++ b/.gitignore @@ -2,16 +2,17 @@ # # SPDX-License-Identifier: Apache-2.0 -.DS_Store +*.egg-info/* *.pyc *.pyo -*.egg-info/* -dist +*~ +.DS_Store +.Python +.tox bin +build/ +dist +docs/_build +include lib lib64 -include -.Python -docs/_build -build/ -.tox \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..c69b49d4 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,21 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.3.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace +- repo: https://github.com/timothycrosley/isort + rev: 4.3.21 + hooks: + - id: isort + additional_dependencies: + - toml +- repo: https://github.com/python/black + rev: 19.10b0 + hooks: + - id: black diff --git a/cachecontrol/__init__.py b/cachecontrol/__init__.py index 002e3a05..23ab25ed 100644 --- a/cachecontrol/__init__.py +++ b/cachecontrol/__init__.py @@ -10,6 +10,6 @@ __email__ = "eric@ionrock.org" __version__ = "0.12.6" -from .wrapper import CacheControl from .adapter import CacheControlAdapter from .controller import CacheController +from .wrapper import CacheControl diff --git a/cachecontrol/_cmd.py b/cachecontrol/_cmd.py index ccee0079..bf04b5db 100644 --- a/cachecontrol/_cmd.py +++ b/cachecontrol/_cmd.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import logging +from argparse import ArgumentParser import requests @@ -10,8 +11,6 @@ from cachecontrol.cache import DictCache from cachecontrol.controller import logger -from argparse import ArgumentParser - def setup_logging(): logger.setLevel(logging.DEBUG) diff --git a/cachecontrol/adapter.py b/cachecontrol/adapter.py index 22b49638..e3e4c512 100644 --- a/cachecontrol/adapter.py +++ b/cachecontrol/adapter.py @@ -2,14 +2,14 @@ # # SPDX-License-Identifier: Apache-2.0 -import types import functools +import types import zlib from requests.adapters import HTTPAdapter -from .controller import CacheController, PERMANENT_REDIRECT_STATUSES from .cache import DictCache +from .controller import PERMANENT_REDIRECT_STATUSES, CacheController from .filewrapper import CallbackFileWrapper diff --git a/cachecontrol/cache.py b/cachecontrol/cache.py index 8037e528..9500f5fb 100644 --- a/cachecontrol/cache.py +++ b/cachecontrol/cache.py @@ -6,26 +6,32 @@ The cache object API for implementing caches. The default is a thread safe in-memory dictionary. """ + +from abc import ABCMeta, abstractmethod from threading import Lock +from six import add_metaclass -class BaseCache(object): +@add_metaclass(ABCMeta) +class BaseCache(object): + @abstractmethod def get(self, key): - raise NotImplementedError() + pass + @abstractmethod def set(self, key, value): - raise NotImplementedError() + pass + @abstractmethod def delete(self, key): - raise NotImplementedError() + pass def close(self): pass class DictCache(BaseCache): - def __init__(self, init_dict=None): self.lock = Lock() self.data = init_dict or {} diff --git a/cachecontrol/caches/file_cache.py b/cachecontrol/caches/file_cache.py index 43393b8f..523b07a5 100644 --- a/cachecontrol/caches/file_cache.py +++ b/cachecontrol/caches/file_cache.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import hashlib +import logging import os from textwrap import dedent @@ -14,6 +15,9 @@ except NameError: # py2.X FileNotFoundError = (IOError, OSError) + FileExistsError = (IOError, OSError) + +logger = logging.getLogger(__name__) def _secure_open_write(filename, fmode): @@ -58,7 +62,6 @@ def _secure_open_write(filename, fmode): class FileCache(BaseCache): - def __init__( self, directory, @@ -111,6 +114,7 @@ def _fn(self, name): def get(self, key): name = self._fn(key) + logger.debug("Looking up '%s' in '%s'", key, name) try: with open(name, "rb") as fh: return fh.read() @@ -120,11 +124,13 @@ def get(self, key): def set(self, key, value): name = self._fn(key) + logger.debug("Caching '%s' in '%s'", key, name) # Make sure the directory exists + parentdir = os.path.dirname(name) try: - os.makedirs(os.path.dirname(name), self.dirmode) - except (IOError, OSError): + os.makedirs(parentdir, self.dirmode) + except FileExistsError: pass with self.lock_class(name) as lock: diff --git a/cachecontrol/caches/redis_cache.py b/cachecontrol/caches/redis_cache.py index 564c30e4..f0b146e0 100644 --- a/cachecontrol/caches/redis_cache.py +++ b/cachecontrol/caches/redis_cache.py @@ -5,11 +5,11 @@ from __future__ import division from datetime import datetime + from cachecontrol.cache import BaseCache class RedisCache(BaseCache): - def __init__(self, conn): self.conn = conn diff --git a/cachecontrol/compat.py b/cachecontrol/compat.py index 72c456cf..d602c4aa 100644 --- a/cachecontrol/compat.py +++ b/cachecontrol/compat.py @@ -2,17 +2,6 @@ # # SPDX-License-Identifier: Apache-2.0 -try: - from urllib.parse import urljoin -except ImportError: - from urlparse import urljoin - - -try: - import cPickle as pickle -except ImportError: - import pickle - # Handle the case where the requests module has been patched to not have # urllib3 bundled as part of its source. try: @@ -24,9 +13,3 @@ from requests.packages.urllib3.util import is_fp_closed except ImportError: from urllib3.util import is_fp_closed - -# Replicate some six behaviour -try: - text_type = unicode -except NameError: - text_type = str diff --git a/cachecontrol/controller.py b/cachecontrol/controller.py index 4d76877f..8a2fee50 100644 --- a/cachecontrol/controller.py +++ b/cachecontrol/controller.py @@ -5,9 +5,9 @@ """ The httplib2 algorithms ported for use with requests. """ +import calendar import logging import re -import calendar import time from email.utils import parsedate_tz @@ -16,7 +16,6 @@ from .cache import DictCache from .serialize import Serializer - logger = logging.getLogger(__name__) URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") @@ -164,7 +163,7 @@ def cached_request(self, request): # with cache busting headers as usual (ie no-cache). if int(resp.status) in PERMANENT_REDIRECT_STATUSES: msg = ( - 'Returning cached permanent redirect response ' + "Returning cached permanent redirect response " "(ignoring date and etag information)" ) logger.debug(msg) @@ -312,20 +311,21 @@ def cache_response(self, request, response, body=None, status_codes=None): # If we've been given an etag, then keep the response if self.cache_etags and "etag" in response_headers: logger.debug("Caching due to etag") - self.cache.set( - cache_url, self.serializer.dumps(request, response, body) - ) + self.cache.set(cache_url, self.serializer.dumps(request, response, body)) # Add to the cache any permanent redirects. We do this before looking # that the Date headers. elif int(response.status) in PERMANENT_REDIRECT_STATUSES: logger.debug("Caching permanent redirect") - self.cache.set(cache_url, self.serializer.dumps(request, response, b'')) + self.cache.set(cache_url, self.serializer.dumps(request, response, b"")) # Add to the cache if the response headers demand it. If there # is no date header then we can't do anything about expiring # the cache. - elif "date" in response_headers: + elif "date" not in response_headers: + logger.debug("No date header, expiration cannot be set.") + return + else: # cache when there is a max-age > 0 if "max-age" in cc and cc["max-age"] > 0: logger.debug("Caching b/c date exists and max-age > 0") @@ -341,6 +341,8 @@ def cache_response(self, request, response, body=None, status_codes=None): self.cache.set( cache_url, self.serializer.dumps(request, response, body) ) + else: + logger.debug("No combination of headers to cache.") def update_cached_response(self, request, response): """On a 304 we will get a new set of headers that we want to diff --git a/cachecontrol/heuristics.py b/cachecontrol/heuristics.py index ebe4a96f..27ef7dae 100644 --- a/cachecontrol/heuristics.py +++ b/cachecontrol/heuristics.py @@ -4,10 +4,8 @@ import calendar import time - -from email.utils import formatdate, parsedate, parsedate_tz - from datetime import datetime, timedelta +from email.utils import formatdate, parsedate, parsedate_tz TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT" @@ -22,7 +20,6 @@ def datetime_to_header(dt): class BaseHeuristic(object): - def warning(self, response): """ Return a valid 1xx warning header value describing the cache @@ -101,8 +98,19 @@ class LastModified(BaseHeuristic): http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397 Unlike mozilla we limit this to 24-hr. """ + cacheable_by_default_statuses = { - 200, 203, 204, 206, 300, 301, 404, 405, 410, 414, 501 + 200, + 203, + 204, + 206, + 300, + 301, + 404, + 405, + 410, + 414, + 501, } def update_headers(self, resp): diff --git a/cachecontrol/serialize.py b/cachecontrol/serialize.py index 4e49a90e..0d40ca5a 100644 --- a/cachecontrol/serialize.py +++ b/cachecontrol/serialize.py @@ -10,7 +10,10 @@ import msgpack from requests.structures import CaseInsensitiveDict -from .compat import HTTPResponse, pickle, text_type +from six import text_type +from six.moves import cPickle as pickle + +from .compat import HTTPResponse def _b64_decode_bytes(b): @@ -25,7 +28,6 @@ def _b64_decode_str(s): class Serializer(object): - def dumps(self, request, response, body): response_headers = CaseInsensitiveDict(response.headers) diff --git a/dev_requirements.txt b/dev_requirements.txt index ce7f9994..e1896819 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -4,15 +4,17 @@ -e . -tox -pytest-cov -pytest -mock +black +bumpversion cherrypy -sphinx -redis +isort lockfile -bumpversion +mock +pre-commit +pytest +pytest-cov +redis +sphinx +tox twine -black wheel diff --git a/docs/conf.py b/docs/conf.py index bd7cedb7..b4447a89 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -15,7 +15,8 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os +import os +import sys # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the diff --git a/examples/benchmark.py b/examples/benchmark.py index b036f788..2eac44b7 100644 --- a/examples/benchmark.py +++ b/examples/benchmark.py @@ -2,13 +2,14 @@ # # SPDX-License-Identifier: Apache-2.0 -import sys -import requests import argparse - -from multiprocessing import Process +import sys from datetime import datetime +from multiprocessing import Process from wsgiref.simple_server import make_server + +import requests + from cachecontrol import CacheControl HOST = "localhost" @@ -17,12 +18,12 @@ class Server(object): - def __call__(self, env, sr): body = "Hello World!" status = "200 OK" headers = [ - ("Cache-Control", "max-age=%i" % (60 * 10)), ("Content-Type", "text/plain") + ("Cache-Control", "max-age=%i" % (60 * 10)), + ("Content-Type", "text/plain"), ] sr(status, headers) return body diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..1d48a866 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,11 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + +[tool.isort] +line_length = 88 +known_first_party = ['cachecontrol'] +known_third_party = ['mock', 'lockfile', 'requests', 'pytest', 'msgpack', 'cherrypy'] +# Set multi-line output to "Vertical Hanging indent" to avoid fighting with black. +multi_line_output = 3 +include_trailing_comma = true diff --git a/setup.py b/setup.py index 7cdae8c7..b571cc3f 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ include_package_data=True, description="httplib2 caching for requests", long_description=long_description, - install_requires=["requests", "msgpack>=0.5.2"], + install_requires=["requests", "msgpack>=0.5.2", "six"], extras_require={"filecache": ["lockfile>=0.9"], "redis": ["redis>=2.10.5"]}, entry_points={"console_scripts": ["doesitcache = cachecontrol._cmd:main"]}, python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*", diff --git a/tests/conftest.py b/tests/conftest.py index e68b1548..2681ad43 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,18 +2,15 @@ # # SPDX-License-Identifier: Apache-2.0 -from pprint import pformat - import os import socket - -import pytest +from pprint import pformat import cherrypy +import pytest class SimpleApp(object): - def __init__(self): self.etag_count = 0 self.update_etag_string() diff --git a/tests/test_adapter.py b/tests/test_adapter.py index a6820571..ce85928f 100644 --- a/tests/test_adapter.py +++ b/tests/test_adapter.py @@ -4,8 +4,8 @@ import mock import pytest - from requests import Session + from cachecontrol.adapter import CacheControlAdapter from cachecontrol.cache import DictCache from cachecontrol.wrapper import CacheControl @@ -35,7 +35,6 @@ def sess(url, request): class TestSessionActions(object): - def test_get_caches(self, url, sess): r2 = sess.get(url) assert r2.from_cache is True diff --git a/tests/test_cache_control.py b/tests/test_cache_control.py index 18c75624..0b7c0f8f 100644 --- a/tests/test_cache_control.py +++ b/tests/test_cache_control.py @@ -5,19 +5,18 @@ """ Unit tests that verify our caching methods work correctly. """ +import time + import pytest from mock import ANY, Mock -import time from cachecontrol import CacheController from cachecontrol.cache import DictCache - TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT" class NullSerializer(object): - def dumps(self, request, response): return response @@ -156,7 +155,9 @@ def req(self, headers): return self.c.cached_request(mock_request) def test_cache_request_no_headers(self): - cached_resp = Mock(headers={"ETag": "jfd9094r808", "Content-Length": 100}, status=200) + cached_resp = Mock( + headers={"ETag": "jfd9094r808", "Content-Length": 100}, status=200 + ) self.c.cache = DictCache({self.url: cached_resp}) resp = self.req({}) assert not resp diff --git a/tests/test_chunked_response.py b/tests/test_chunked_response.py index 46840870..bc390fbb 100644 --- a/tests/test_chunked_response.py +++ b/tests/test_chunked_response.py @@ -1,3 +1,9 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + +# encoding: utf-8 + # SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 @@ -22,7 +28,6 @@ def sess(): class TestChunkedResponses(object): - def test_cache_chunked_response(self, url, sess): """ Verify that an otherwise cacheable response is cached when the diff --git a/tests/test_etag.py b/tests/test_etag.py index 700a0c54..2b627763 100644 --- a/tests/test_etag.py +++ b/tests/test_etag.py @@ -3,18 +3,15 @@ # SPDX-License-Identifier: Apache-2.0 import pytest - -from mock import Mock, patch - import requests +from mock import Mock, patch from cachecontrol import CacheControl from cachecontrol.cache import DictCache -from cachecontrol.compat import urljoin +from six.moves.urllib.parse import urljoin class NullSerializer(object): - def dumps(self, request, response, body=None): return response diff --git a/tests/test_expires_heuristics.py b/tests/test_expires_heuristics.py index 5d62f157..913704e7 100644 --- a/tests/test_expires_heuristics.py +++ b/tests/test_expires_heuristics.py @@ -4,26 +4,26 @@ import calendar import time - -from email.utils import formatdate, parsedate from datetime import datetime +from email.utils import formatdate, parsedate +from pprint import pprint from mock import Mock from requests import Session, get from requests.structures import CaseInsensitiveDict from cachecontrol import CacheControl -from cachecontrol.heuristics import LastModified, ExpiresAfter, OneDayCache -from cachecontrol.heuristics import TIME_FMT -from cachecontrol.heuristics import BaseHeuristic - -from pprint import pprint +from cachecontrol.heuristics import ( + TIME_FMT, + BaseHeuristic, + ExpiresAfter, + LastModified, + OneDayCache, +) class TestHeuristicWithoutWarning(object): - def setup(self): - class NoopHeuristic(BaseHeuristic): warning = Mock() @@ -41,11 +41,8 @@ def test_no_header_change_means_no_warning_header(self, url): class TestHeuristicWith3xxResponse(object): - def setup(self): - class DummyHeuristic(BaseHeuristic): - def update_headers(self, resp): return {"x-dummy-header": "foobar"} @@ -63,7 +60,6 @@ def test_heuristic_applies_to_304(self, url): class TestUseExpiresHeuristic(object): - def test_expires_heuristic_arg(self): sess = Session() cached_sess = CacheControl(sess, heuristic=Mock()) @@ -71,7 +67,6 @@ def test_expires_heuristic_arg(self): class TestOneDayCache(object): - def setup(self): self.sess = Session() self.cached_sess = CacheControl(self.sess, heuristic=OneDayCache()) @@ -91,7 +86,6 @@ def test_cache_for_one_day(self, url): class TestExpiresAfter(object): - def setup(self): self.sess = Session() self.cache_sess = CacheControl(self.sess, heuristic=ExpiresAfter(days=1)) @@ -112,7 +106,6 @@ def test_expires_after_one_day(self, url): class TestLastModified(object): - def setup(self): self.sess = Session() self.cached_sess = CacheControl(self.sess, heuristic=LastModified()) @@ -132,7 +125,6 @@ def test_last_modified(self, url): class DummyResponse: - def __init__(self, status, headers): self.status = status self.headers = CaseInsensitiveDict(headers) @@ -143,7 +135,6 @@ def datetime_to_header(dt): class TestModifiedUnitTests(object): - def last_modified(self, period): return time.strftime(TIME_FMT, time.gmtime(self.time_now - period)) diff --git a/tests/test_max_age.py b/tests/test_max_age.py index 739f27e1..a04776cd 100644 --- a/tests/test_max_age.py +++ b/tests/test_max_age.py @@ -3,15 +3,15 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import print_function -import pytest +import pytest from requests import Session + from cachecontrol.adapter import CacheControlAdapter from cachecontrol.cache import DictCache class NullSerializer(object): - def dumps(self, request, response, body=None): return response @@ -22,7 +22,6 @@ def loads(self, request, data): class TestMaxAge(object): - @pytest.fixture() def sess(self, url): self.url = url diff --git a/tests/test_redirects.py b/tests/test_redirects.py index 56571f66..40db5f6e 100644 --- a/tests/test_redirects.py +++ b/tests/test_redirects.py @@ -11,7 +11,6 @@ class TestPermanentRedirects(object): - def setup(self): self.sess = CacheControl(requests.Session()) @@ -33,7 +32,6 @@ def test_bust_cache_on_redirect(self, url): class TestMultipleChoicesRedirects(object): - def setup(self): self.sess = CacheControl(requests.Session()) diff --git a/tests/test_regressions.py b/tests/test_regressions.py index 0806035a..eccd2797 100644 --- a/tests/test_regressions.py +++ b/tests/test_regressions.py @@ -3,17 +3,16 @@ # SPDX-License-Identifier: Apache-2.0 import sys -import pytest +import pytest +from requests import Session from cachecontrol import CacheControl from cachecontrol.caches import FileCache from cachecontrol.filewrapper import CallbackFileWrapper -from requests import Session class Test39(object): - @pytest.mark.skipif( sys.version.startswith("2"), reason="Only run this for python 3.x" ) diff --git a/tests/test_serialization.py b/tests/test_serialization.py index 59771c5a..598ae289 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -4,15 +4,13 @@ import msgpack import requests - from mock import Mock -from cachecontrol.compat import pickle from cachecontrol.serialize import Serializer +from six.moves import cPickle as pickle class TestSerializer(object): - def setup(self): self.serializer = Serializer() self.response_data = { @@ -93,7 +91,9 @@ def test_read_latest_version_streamable(self, url): original_resp = requests.get(url, stream=True) req = original_resp.request - resp = self.serializer.loads(req, self.serializer.dumps(req, original_resp.raw, original_resp.content)) + resp = self.serializer.loads( + req, self.serializer.dumps(req, original_resp.raw, original_resp.content) + ) assert resp.read() diff --git a/tests/test_storage_filecache.py b/tests/test_storage_filecache.py index 4ac8a4f0..38f178b0 100644 --- a/tests/test_storage_filecache.py +++ b/tests/test_storage_filecache.py @@ -7,16 +7,16 @@ """ import os import string - from random import randint, sample import pytest import requests -from cachecontrol import CacheControl -from cachecontrol.caches import FileCache from lockfile import LockFile from lockfile.mkdirlockfile import MkdirLockFile +from cachecontrol import CacheControl +from cachecontrol.caches import FileCache + def randomdata(): """Plain random http data generator:""" @@ -26,7 +26,6 @@ def randomdata(): class TestStorageFileCache(object): - @pytest.fixture() def sess(self, url, tmpdir): self.url = url diff --git a/tests/test_storage_redis.py b/tests/test_storage_redis.py index 4646be50..3edfb8ac 100644 --- a/tests/test_storage_redis.py +++ b/tests/test_storage_redis.py @@ -5,11 +5,11 @@ from datetime import datetime from mock import Mock + from cachecontrol.caches import RedisCache class TestRedisCache(object): - def setup(self): self.conn = Mock() self.cache = RedisCache(self.conn) diff --git a/tests/test_vary.py b/tests/test_vary.py index 543294b3..a9d6fc96 100644 --- a/tests/test_vary.py +++ b/tests/test_vary.py @@ -2,18 +2,17 @@ # # SPDX-License-Identifier: Apache-2.0 +from pprint import pprint + import pytest import requests from cachecontrol import CacheControl from cachecontrol.cache import DictCache -from cachecontrol.compat import urljoin - -from pprint import pprint +from six.moves.urllib.parse import urljoin class TestVary(object): - @pytest.fixture() def sess(self, url): self.url = urljoin(url, "/vary_accept")