Skip to content

Commit 64b1c10

Browse files
WaVEVtimgraham
andcommitted
INTPYTHON-451 Add support for database caching
Co-authored-by: Tim Graham <[email protected]>
1 parent 67e1e9f commit 64b1c10

19 files changed

+1412
-5
lines changed

.pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ repos:
4545
hooks:
4646
- id: rstcheck
4747
additional_dependencies: [sphinx]
48-
args: ["--ignore-directives=django-admin,fieldlookup,setting", "--ignore-roles=djadmin,lookup,setting"]
48+
args: ["--ignore-directives=django-admin,django-admin-option,fieldlookup,setting", "--ignore-roles=djadmin,lookup,setting"]
4949

5050
# We use the Python version instead of the original version which seems to require Docker
5151
# https://github.com/koalaman/shellcheck-precommit

django_mongodb_backend/cache.py

+216
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
import pickle
2+
from datetime import datetime, timezone
3+
4+
from django.core.cache.backends.base import DEFAULT_TIMEOUT, BaseCache
5+
from django.core.cache.backends.db import Options
6+
from django.db import connections, router
7+
from django.utils.functional import cached_property
8+
from pymongo import ASCENDING, DESCENDING, IndexModel, ReturnDocument
9+
from pymongo.errors import DuplicateKeyError, OperationFailure
10+
11+
12+
class MongoSerializer:
13+
def __init__(self, protocol=None):
14+
self.protocol = pickle.HIGHEST_PROTOCOL if protocol is None else protocol
15+
16+
def dumps(self, obj):
17+
# For better incr() and decr() atomicity, don't pickle integers.
18+
# Using type() rather than isinstance() matches only integers and not
19+
# subclasses like bool.
20+
if type(obj) is int: # noqa: E721
21+
return obj
22+
return pickle.dumps(obj, self.protocol)
23+
24+
def loads(self, data):
25+
try:
26+
return int(data)
27+
except (ValueError, TypeError):
28+
return pickle.loads(data) # noqa: S301
29+
30+
31+
class MongoDBCache(BaseCache):
32+
pickle_protocol = pickle.HIGHEST_PROTOCOL
33+
34+
def __init__(self, collection_name, params):
35+
super().__init__(params)
36+
self._collection_name = collection_name
37+
38+
class CacheEntry:
39+
_meta = Options(collection_name)
40+
41+
self.cache_model_class = CacheEntry
42+
43+
def create_indexes(self):
44+
expires_index = IndexModel("expires_at", expireAfterSeconds=0)
45+
key_index = IndexModel("key", unique=True)
46+
self.collection_for_write.create_indexes([expires_index, key_index])
47+
48+
@cached_property
49+
def serializer(self):
50+
return MongoSerializer(self.pickle_protocol)
51+
52+
@property
53+
def collection_for_read(self):
54+
db = router.db_for_read(self.cache_model_class)
55+
return connections[db].get_collection(self._collection_name)
56+
57+
@property
58+
def collection_for_write(self):
59+
db = router.db_for_write(self.cache_model_class)
60+
return connections[db].get_collection(self._collection_name)
61+
62+
def _filter_expired(self, expired=False):
63+
"""
64+
Return MQL to exclude expired entries (needed because the MongoDB
65+
daemon does not remove expired entries precisely when they expire).
66+
If expired=True, return MQL to include only expired entries.
67+
"""
68+
op = "$lt" if expired else "$gte"
69+
return {"expires_at": {op: datetime.utcnow()}}
70+
71+
def get_backend_timeout(self, timeout=DEFAULT_TIMEOUT):
72+
if timeout is None:
73+
return datetime.max
74+
timestamp = super().get_backend_timeout(timeout)
75+
return datetime.fromtimestamp(timestamp, tz=timezone.utc)
76+
77+
def get(self, key, default=None, version=None):
78+
return self.get_many([key], version).get(key, default)
79+
80+
def get_many(self, keys, version=None):
81+
if not keys:
82+
return {}
83+
keys_map = {self.make_and_validate_key(key, version=version): key for key in keys}
84+
with self.collection_for_read.find(
85+
{"key": {"$in": tuple(keys_map)}, **self._filter_expired(expired=False)}
86+
) as cursor:
87+
return {keys_map[row["key"]]: self.serializer.loads(row["value"]) for row in cursor}
88+
89+
def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
90+
key = self.make_and_validate_key(key, version=version)
91+
num = self.collection_for_write.count_documents({}, hint="_id_")
92+
if num >= self._max_entries:
93+
self._cull(num)
94+
self.collection_for_write.update_one(
95+
{"key": key},
96+
{
97+
"$set": {
98+
"key": key,
99+
"value": self.serializer.dumps(value),
100+
"expires_at": self.get_backend_timeout(timeout),
101+
}
102+
},
103+
upsert=True,
104+
)
105+
106+
def add(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
107+
key = self.make_and_validate_key(key, version=version)
108+
num = self.collection_for_write.count_documents({}, hint="_id_")
109+
if num >= self._max_entries:
110+
self._cull(num)
111+
try:
112+
self.collection_for_write.update_one(
113+
{"key": key, **self._filter_expired(expired=True)},
114+
{
115+
"$set": {
116+
"key": key,
117+
"value": self.serializer.dumps(value),
118+
"expires_at": self.get_backend_timeout(timeout),
119+
}
120+
},
121+
upsert=True,
122+
)
123+
except DuplicateKeyError:
124+
return False
125+
return True
126+
127+
def _cull(self, num):
128+
if self._cull_frequency == 0:
129+
self.clear()
130+
else:
131+
# The fraction of entries that are culled when MAX_ENTRIES is
132+
# reached is 1 / CULL_FREQUENCY. For example, in the default case
133+
# of CULL_FREQUENCY=3, 2/3 of the entries are kept, thus `keep_num`
134+
# will be 2/3 of the current number of entries.
135+
keep_num = num - num // self._cull_frequency
136+
try:
137+
# Find the first cache entry beyond the retention limit,
138+
# culling entries that expire the soonest.
139+
deleted_from = next(
140+
self.collection_for_write.aggregate(
141+
[
142+
{"$sort": {"expires_at": DESCENDING, "key": ASCENDING}},
143+
{"$skip": keep_num},
144+
{"$limit": 1},
145+
{"$project": {"key": 1, "expires_at": 1}},
146+
]
147+
)
148+
)
149+
except StopIteration:
150+
# If no entries are found, there is nothing to delete. It may
151+
# happen if the database removes expired entries between the
152+
# query to get `num` and the query to get `deleted_from`.
153+
pass
154+
else:
155+
# Cull the cache.
156+
self.collection_for_write.delete_many(
157+
{
158+
"$or": [
159+
# Delete keys that expire before `deleted_from`...
160+
{"expires_at": {"$lt": deleted_from["expires_at"]}},
161+
# and the entries that share an expiration with
162+
# `deleted_from` but are alphabetically after it
163+
# (per the same sorting to fetch `deleted_from`).
164+
{
165+
"$and": [
166+
{"expires_at": deleted_from["expires_at"]},
167+
{"key": {"$gte": deleted_from["key"]}},
168+
]
169+
},
170+
]
171+
}
172+
)
173+
174+
def touch(self, key, timeout=DEFAULT_TIMEOUT, version=None):
175+
key = self.make_and_validate_key(key, version=version)
176+
res = self.collection_for_write.update_one(
177+
{"key": key}, {"$set": {"expires_at": self.get_backend_timeout(timeout)}}
178+
)
179+
return res.matched_count > 0
180+
181+
def incr(self, key, delta=1, version=None):
182+
serialized_key = self.make_and_validate_key(key, version=version)
183+
try:
184+
updated = self.collection_for_write.find_one_and_update(
185+
{"key": serialized_key, **self._filter_expired(expired=False)},
186+
{"$inc": {"value": delta}},
187+
return_document=ReturnDocument.AFTER,
188+
)
189+
except OperationFailure as exc:
190+
method_name = "incr" if delta >= 1 else "decr"
191+
raise TypeError(f"Cannot apply {method_name}() to a non-numeric value.") from exc
192+
if updated is None:
193+
raise ValueError(f"Key '{key}' not found.") from None
194+
return updated["value"]
195+
196+
def delete(self, key, version=None):
197+
return self._delete_many([key], version)
198+
199+
def delete_many(self, keys, version=None):
200+
self._delete_many(keys, version)
201+
202+
def _delete_many(self, keys, version=None):
203+
if not keys:
204+
return False
205+
keys = tuple(self.make_and_validate_key(key, version=version) for key in keys)
206+
return bool(self.collection_for_write.delete_many({"key": {"$in": keys}}).deleted_count)
207+
208+
def has_key(self, key, version=None):
209+
key = self.make_and_validate_key(key, version=version)
210+
num = self.collection_for_read.count_documents(
211+
{"key": key, **self._filter_expired(expired=False)}
212+
)
213+
return num > 0
214+
215+
def clear(self):
216+
self.collection_for_write.delete_many({})

django_mongodb_backend/creation.py

+13
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
from django.conf import settings
22
from django.db.backends.base.creation import BaseDatabaseCreation
33

4+
from django_mongodb_backend.management.commands.createcachecollection import (
5+
Command as CreateCacheCollection,
6+
)
7+
48

59
class DatabaseCreation(BaseDatabaseCreation):
610
def _execute_create_test_db(self, cursor, parameters, keepdb=False):
@@ -16,3 +20,12 @@ def _destroy_test_db(self, test_database_name, verbosity):
1620
for collection in self.connection.introspection.table_names():
1721
if not collection.startswith("system."):
1822
self.connection.database.drop_collection(collection)
23+
24+
def create_test_db(self, *args, **kwargs):
25+
test_database_name = super().create_test_db(*args, **kwargs)
26+
# Not using call_command() avoids the requirement to put
27+
# "django_mongodb_backend" in INSTALLED_APPS.
28+
CreateCacheCollection().handle(
29+
database=self.connection.alias, verbosity=kwargs["verbosity"]
30+
)
31+
return test_database_name

django_mongodb_backend/management/__init__.py

Whitespace-only changes.

django_mongodb_backend/management/commands/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from django.conf import settings
2+
from django.core.cache import caches
3+
from django.core.management.base import BaseCommand
4+
from django.db import DEFAULT_DB_ALIAS, connections, router
5+
6+
from django_mongodb_backend.cache import MongoDBCache
7+
8+
9+
class Command(BaseCommand):
10+
help = "Creates the collections needed to use the MongoDB cache backend."
11+
requires_system_checks = []
12+
13+
def add_arguments(self, parser):
14+
parser.add_argument(
15+
"args",
16+
metavar="collection_name",
17+
nargs="*",
18+
help="Optional collections names. Otherwise, settings.CACHES is "
19+
"used to find cache collections.",
20+
)
21+
parser.add_argument(
22+
"--database",
23+
default=DEFAULT_DB_ALIAS,
24+
help="Nominates a database onto which the cache collections will be "
25+
'installed. Defaults to the "default" database.',
26+
)
27+
28+
def handle(self, *collection_names, **options):
29+
db = options["database"]
30+
self.verbosity = options["verbosity"]
31+
if collection_names:
32+
# Legacy behavior, collection_name specified as argument
33+
for collection_name in collection_names:
34+
self.check_collection(db, collection_name)
35+
else:
36+
for cache_alias in settings.CACHES:
37+
cache = caches[cache_alias]
38+
if isinstance(cache, MongoDBCache):
39+
self.check_collection(db, cache._collection_name)
40+
41+
def check_collection(self, database, collection_name):
42+
cache = MongoDBCache(collection_name, {})
43+
if not router.allow_migrate_model(database, cache.cache_model_class):
44+
return
45+
connection = connections[database]
46+
if cache._collection_name in connection.introspection.table_names():
47+
if self.verbosity > 0:
48+
self.stdout.write("Cache collection '%s' already exists." % cache._collection_name)
49+
return
50+
cache.create_indexes()

docs/source/_ext/djangodocs.py

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from sphinx.domains.std import Cmdoption
2+
3+
14
def setup(app):
25
app.add_object_type(
36
directivename="django-admin",
@@ -14,3 +17,4 @@ def setup(app):
1417
rolename="setting",
1518
indextemplate="pair: %s; setting",
1619
)
20+
app.add_directive("django-admin-option", Cmdoption)

docs/source/_static/custom.css

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
p.admonition-title::after {
2+
/* Remove colon after admonition titles. */
3+
content: none;
4+
}

docs/source/conf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,4 +52,4 @@
5252
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
5353

5454
html_theme = "alabaster"
55-
# html_static_path = ["_static"]
55+
html_static_path = ["_static"]

docs/source/index.rst

+5
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ Forms
4848

4949
- :doc:`ref/forms`
5050

51+
Core functionalities
52+
====================
53+
54+
- :doc:`topics/cache`
55+
5156
Miscellaneous
5257
=============
5358

docs/source/ref/django-admin.rst

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
===================
2+
Management commands
3+
===================
4+
5+
Django MongoDB Backend includes some :doc:`Django management commands
6+
<django:ref/django-admin>`.
7+
8+
Required configuration
9+
======================
10+
11+
To make these commands available, you must include ``"django_mongodb_backend"``
12+
in the :setting:`INSTALLED_APPS` setting.
13+
14+
Available commands
15+
==================
16+
17+
``createcachecollection``
18+
-------------------------
19+
20+
.. django-admin:: createcachecollection
21+
22+
Creates the cache collection for use with the :doc:`database cache backend
23+
</topics/cache>` using the information from your :setting:`CACHES` setting.
24+
25+
.. django-admin-option:: --database DATABASE
26+
27+
Specifies the database in which the cache collection(s) will be created.
28+
Defaults to ``default``.

docs/source/ref/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,5 @@ API reference
77

88
models/index
99
forms
10+
django-admin
1011
utils

docs/source/releases/5.1.x.rst

+7
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22
Django MongoDB Backend 5.1.x
33
============================
44

5+
5.1.0 beta 2
6+
============
7+
8+
*Unreleased*
9+
10+
- Added support for :doc:`database caching </topics/cache>`.
11+
512
5.1.0 beta 1
613
============
714

0 commit comments

Comments
 (0)