Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

INTPYTHON-451 Add support for database caching #253

Merged
merged 1 commit into from
Mar 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ repos:
hooks:
- id: rstcheck
additional_dependencies: [sphinx]
args: ["--ignore-directives=django-admin,fieldlookup,setting", "--ignore-roles=djadmin,lookup,setting"]
args: ["--ignore-directives=django-admin,django-admin-option,fieldlookup,setting", "--ignore-roles=djadmin,lookup,setting"]

# We use the Python version instead of the original version which seems to require Docker
# https://github.com/koalaman/shellcheck-precommit
Expand Down
216 changes: 216 additions & 0 deletions django_mongodb_backend/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
import pickle
from datetime import datetime, timezone

from django.core.cache.backends.base import DEFAULT_TIMEOUT, BaseCache
from django.core.cache.backends.db import Options
from django.db import connections, router
from django.utils.functional import cached_property
from pymongo import ASCENDING, DESCENDING, IndexModel, ReturnDocument
from pymongo.errors import DuplicateKeyError, OperationFailure


class MongoSerializer:
def __init__(self, protocol=None):
self.protocol = pickle.HIGHEST_PROTOCOL if protocol is None else protocol

def dumps(self, obj):
# For better incr() and decr() atomicity, don't pickle integers.
# Using type() rather than isinstance() matches only integers and not
# subclasses like bool.
if type(obj) is int: # noqa: E721
return obj
return pickle.dumps(obj, self.protocol)

def loads(self, data):
try:
return int(data)
except (ValueError, TypeError):
return pickle.loads(data) # noqa: S301


class MongoDBCache(BaseCache):
pickle_protocol = pickle.HIGHEST_PROTOCOL

def __init__(self, collection_name, params):
super().__init__(params)
self._collection_name = collection_name

class CacheEntry:
_meta = Options(collection_name)

self.cache_model_class = CacheEntry

def create_indexes(self):
expires_index = IndexModel("expires_at", expireAfterSeconds=0)
key_index = IndexModel("key", unique=True)
self.collection_for_write.create_indexes([expires_index, key_index])

@cached_property
def serializer(self):
return MongoSerializer(self.pickle_protocol)

@property
def collection_for_read(self):
db = router.db_for_read(self.cache_model_class)
return connections[db].get_collection(self._collection_name)

@property
def collection_for_write(self):
db = router.db_for_write(self.cache_model_class)
return connections[db].get_collection(self._collection_name)

def _filter_expired(self, expired=False):
"""
Return MQL to exclude expired entries (needed because the MongoDB
daemon does not remove expired entries precisely when they expire).
If expired=True, return MQL to include only expired entries.
"""
op = "$lt" if expired else "$gte"
return {"expires_at": {op: datetime.utcnow()}}

def get_backend_timeout(self, timeout=DEFAULT_TIMEOUT):
if timeout is None:
return datetime.max
timestamp = super().get_backend_timeout(timeout)
return datetime.fromtimestamp(timestamp, tz=timezone.utc)

def get(self, key, default=None, version=None):
return self.get_many([key], version).get(key, default)

def get_many(self, keys, version=None):
if not keys:
return {}
keys_map = {self.make_and_validate_key(key, version=version): key for key in keys}
with self.collection_for_read.find(
{"key": {"$in": tuple(keys_map)}, **self._filter_expired(expired=False)}
) as cursor:
return {keys_map[row["key"]]: self.serializer.loads(row["value"]) for row in cursor}

def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
key = self.make_and_validate_key(key, version=version)
num = self.collection_for_write.count_documents({}, hint="_id_")
if num >= self._max_entries:
self._cull(num)
self.collection_for_write.update_one(
{"key": key},
{
"$set": {
"key": key,
"value": self.serializer.dumps(value),
"expires_at": self.get_backend_timeout(timeout),
}
},
upsert=True,
)

def add(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
key = self.make_and_validate_key(key, version=version)
num = self.collection_for_write.count_documents({}, hint="_id_")
if num >= self._max_entries:
self._cull(num)
try:
self.collection_for_write.update_one(
{"key": key, **self._filter_expired(expired=True)},
{
"$set": {
"key": key,
"value": self.serializer.dumps(value),
"expires_at": self.get_backend_timeout(timeout),
}
},
upsert=True,
)
except DuplicateKeyError:
return False
return True

def _cull(self, num):
if self._cull_frequency == 0:
self.clear()
else:
# The fraction of entries that are culled when MAX_ENTRIES is
# reached is 1 / CULL_FREQUENCY. For example, in the default case
# of CULL_FREQUENCY=3, 2/3 of the entries are kept, thus `keep_num`
# will be 2/3 of the current number of entries.
keep_num = num - num // self._cull_frequency
try:
# Find the first cache entry beyond the retention limit,
# culling entries that expire the soonest.
deleted_from = next(
self.collection_for_write.aggregate(
[
{"$sort": {"expires_at": DESCENDING, "key": ASCENDING}},
{"$skip": keep_num},
{"$limit": 1},
{"$project": {"key": 1, "expires_at": 1}},
]
)
)
except StopIteration:
# If no entries are found, there is nothing to delete. It may
# happen if the database removes expired entries between the
# query to get `num` and the query to get `deleted_from`.
pass
else:
# Cull the cache.
self.collection_for_write.delete_many(
{
"$or": [
# Delete keys that expire before `deleted_from`...
{"expires_at": {"$lt": deleted_from["expires_at"]}},
# and the entries that share an expiration with
# `deleted_from` but are alphabetically after it
# (per the same sorting to fetch `deleted_from`).
{
"$and": [
{"expires_at": deleted_from["expires_at"]},
{"key": {"$gte": deleted_from["key"]}},
]
},
]
}
)

def touch(self, key, timeout=DEFAULT_TIMEOUT, version=None):
key = self.make_and_validate_key(key, version=version)
res = self.collection_for_write.update_one(
{"key": key}, {"$set": {"expires_at": self.get_backend_timeout(timeout)}}
)
return res.matched_count > 0

def incr(self, key, delta=1, version=None):
serialized_key = self.make_and_validate_key(key, version=version)
try:
updated = self.collection_for_write.find_one_and_update(
{"key": serialized_key, **self._filter_expired(expired=False)},
{"$inc": {"value": delta}},
return_document=ReturnDocument.AFTER,
)
except OperationFailure as exc:
method_name = "incr" if delta >= 1 else "decr"
raise TypeError(f"Cannot apply {method_name}() to a non-numeric value.") from exc
if updated is None:
raise ValueError(f"Key '{key}' not found.") from None
return updated["value"]

def delete(self, key, version=None):
return self._delete_many([key], version)

def delete_many(self, keys, version=None):
self._delete_many(keys, version)

def _delete_many(self, keys, version=None):
if not keys:
return False
keys = tuple(self.make_and_validate_key(key, version=version) for key in keys)
return bool(self.collection_for_write.delete_many({"key": {"$in": keys}}).deleted_count)

def has_key(self, key, version=None):
key = self.make_and_validate_key(key, version=version)
num = self.collection_for_read.count_documents(
{"key": key, **self._filter_expired(expired=False)}
)
return num > 0

def clear(self):
self.collection_for_write.delete_many({})
13 changes: 13 additions & 0 deletions django_mongodb_backend/creation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from django.conf import settings
from django.db.backends.base.creation import BaseDatabaseCreation

from django_mongodb_backend.management.commands.createcachecollection import (
Command as CreateCacheCollection,
)


class DatabaseCreation(BaseDatabaseCreation):
def _execute_create_test_db(self, cursor, parameters, keepdb=False):
Expand All @@ -16,3 +20,12 @@ def _destroy_test_db(self, test_database_name, verbosity):
for collection in self.connection.introspection.table_names():
if not collection.startswith("system."):
self.connection.database.drop_collection(collection)

def create_test_db(self, *args, **kwargs):
test_database_name = super().create_test_db(*args, **kwargs)
# Not using call_command() avoids the requirement to put
# "django_mongodb_backend" in INSTALLED_APPS.
CreateCacheCollection().handle(
database=self.connection.alias, verbosity=kwargs["verbosity"]
)
return test_database_name
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from django.conf import settings
from django.core.cache import caches
from django.core.management.base import BaseCommand
from django.db import DEFAULT_DB_ALIAS, connections, router

from django_mongodb_backend.cache import MongoDBCache


class Command(BaseCommand):
help = "Creates the collections needed to use the MongoDB cache backend."
requires_system_checks = []

def add_arguments(self, parser):
parser.add_argument(
"args",
metavar="collection_name",
nargs="*",
help="Optional collections names. Otherwise, settings.CACHES is "
"used to find cache collections.",
)
parser.add_argument(
"--database",
default=DEFAULT_DB_ALIAS,
help="Nominates a database onto which the cache collections will be "
'installed. Defaults to the "default" database.',
)

def handle(self, *collection_names, **options):
db = options["database"]
self.verbosity = options["verbosity"]
if collection_names:
# Legacy behavior, collection_name specified as argument
for collection_name in collection_names:
self.check_collection(db, collection_name)
else:
for cache_alias in settings.CACHES:
cache = caches[cache_alias]
if isinstance(cache, MongoDBCache):
self.check_collection(db, cache._collection_name)

def check_collection(self, database, collection_name):
cache = MongoDBCache(collection_name, {})
if not router.allow_migrate_model(database, cache.cache_model_class):
return
connection = connections[database]
if cache._collection_name in connection.introspection.table_names():
if self.verbosity > 0:
self.stdout.write("Cache collection '%s' already exists." % cache._collection_name)
return
cache.create_indexes()
4 changes: 4 additions & 0 deletions docs/source/_ext/djangodocs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from sphinx.domains.std import Cmdoption


def setup(app):
app.add_object_type(
directivename="django-admin",
Expand All @@ -14,3 +17,4 @@ def setup(app):
rolename="setting",
indextemplate="pair: %s; setting",
)
app.add_directive("django-admin-option", Cmdoption)
4 changes: 4 additions & 0 deletions docs/source/_static/custom.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
p.admonition-title::after {
/* Remove colon after admonition titles. */
content: none;
}
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,4 @@
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output

html_theme = "alabaster"
# html_static_path = ["_static"]
html_static_path = ["_static"]
5 changes: 5 additions & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ Forms

- :doc:`ref/forms`

Core functionalities
====================

- :doc:`topics/cache`

Miscellaneous
=============

Expand Down
28 changes: 28 additions & 0 deletions docs/source/ref/django-admin.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
===================
Management commands
===================

Django MongoDB Backend includes some :doc:`Django management commands
<django:ref/django-admin>`.

Required configuration
======================

To make these commands available, you must include ``"django_mongodb_backend"``
in the :setting:`INSTALLED_APPS` setting.

Available commands
==================

``createcachecollection``
-------------------------

.. django-admin:: createcachecollection

Creates the cache collection for use with the :doc:`database cache backend
</topics/cache>` using the information from your :setting:`CACHES` setting.

.. django-admin-option:: --database DATABASE

Specifies the database in which the cache collection(s) will be created.
Defaults to ``default``.
1 change: 1 addition & 0 deletions docs/source/ref/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ API reference

models/index
forms
django-admin
utils
7 changes: 7 additions & 0 deletions docs/source/releases/5.1.x.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@
Django MongoDB Backend 5.1.x
============================

5.1.0 beta 2
============

*Unreleased*

- Added support for :doc:`database caching </topics/cache>`.

5.1.0 beta 1
============

Expand Down
Loading
Loading