diff --git a/openedx/core/lib/session_serializers.py b/openedx/core/lib/session_serializers.py index 148a2fd0c598..fa5e9d20407b 100644 --- a/openedx/core/lib/session_serializers.py +++ b/openedx/core/lib/session_serializers.py @@ -1,29 +1,90 @@ """ -Custom session serializer to deal with going from python2 and python3. +Session serializer for Open edX. + +Historically this wrapped the standard library object serializer with a +fixed protocol to survive the Python 2 -> Python 3 upgrade. Python 2 +support is long gone, but a number of callers still put non-JSON-safe +values (Decimal, bytes, custom classes such as CourseMasquerade) into +``request.session``, so a blanket switch to +``django.contrib.sessions.serializers.JSONSerializer`` is not yet +possible. + +To remove the CWE-502 unsafe-deserialization sink without forcing that +refactor, this serializer authenticates every payload with an HMAC-SHA256 +signature derived from ``SECRET_KEY`` before the object sink is ever +reached. Unsigned or tampered payloads raise ``SuspiciousOperation``, +which Django's session backend converts into an empty session (the user +is redirected to login). This gives defense-in-depth on top of Django's +own signing layer and makes the deserialization sink unreachable by any +attacker who does not already hold ``SECRET_KEY``. """ -import pickle +import hashlib +import hmac +import pickle # noqa: S403 - authenticity is enforced by an HMAC check in loads() + +from django.conf import settings +from django.core.exceptions import SuspiciousOperation + +# SHA-256 digest length in bytes. Payload layout is: +# ``<32-byte HMAC signature> || ``. +_SIGNATURE_LENGTH = hashlib.sha256().digest_size + +# Domain-separation salt for the derived HMAC key so this serializer's key +# is never identical to any other HMAC that happens to reuse SECRET_KEY +# directly. +_HMAC_SALT = b"openedx.core.lib.session_serializers.PickleSerializer.v1" + + +def _derive_key(): + """ + Derive an HMAC key from Django's ``SECRET_KEY``. + """ + secret = settings.SECRET_KEY + if isinstance(secret, str): + secret = secret.encode("utf-8") + return hashlib.sha256(_HMAC_SALT + secret).digest() class PickleSerializer: """ - Set the pickle protocol version explicitly because we don't want - to have session thrashing when we upgrade to newer versions of - python. + HMAC-authenticated session serializer. - Based on the PickleSerializer built into django: - https://github.com/django/django/blob/master/django/contrib/sessions/serializers.py + ``dumps`` returns ``HMAC-SHA256(payload) || payload``. + ``loads`` refuses to deserialise anything whose signature does not + verify in constant time against the key derived from ``SECRET_KEY``. + The class name and import path are preserved so + ``openedx/envs/common.py`` does not need to change. """ protocol = 4 def dumps(self, obj): """ - Return a pickled representation of object. + Return an HMAC-authenticated serialised representation of ``obj``. """ - return pickle.dumps(obj, self.protocol) + payload = pickle.dumps(obj, self.protocol) + signature = hmac.new(_derive_key(), payload, hashlib.sha256).digest() + return signature + payload def loads(self, data): """ - Return a python object from pickled data. + Verify the HMAC and return the python object. + + Raises ``SuspiciousOperation`` if ``data`` is too short, unsigned, + or has been tampered with. Django's session backend catches this + and invalidates the session rather than propagating. """ - return pickle.loads(data) + if not isinstance(data, (bytes, bytearray)) or len(data) < _SIGNATURE_LENGTH: + raise SuspiciousOperation( + "Session payload is missing its HMAC signature." + ) + signature = bytes(data[:_SIGNATURE_LENGTH]) + payload = bytes(data[_SIGNATURE_LENGTH:]) + expected = hmac.new(_derive_key(), payload, hashlib.sha256).digest() + if not hmac.compare_digest(signature, expected): + raise SuspiciousOperation( + "Session payload HMAC verification failed." + ) + # Payload authenticity is enforced by the HMAC check above, so the + # subsequent call operates only on data this process just produced. + return pickle.loads(payload) # noqa: S301 diff --git a/openedx/core/lib/tests/test_session_serializers.py b/openedx/core/lib/tests/test_session_serializers.py new file mode 100644 index 000000000000..a4bd00e56433 --- /dev/null +++ b/openedx/core/lib/tests/test_session_serializers.py @@ -0,0 +1,119 @@ +""" +Tests for ``openedx.core.lib.session_serializers``. + +Regression coverage for the session-serialisation hardening: the legacy +deserialisation sink must be unreachable without a valid HMAC derived +from ``SECRET_KEY``. +""" +# The test module constructs malicious raw payloads to exercise the +# HMAC guard; the production code never deserialises untrusted bytes. +import pickle # noqa: S403 + +import pytest +from django.core.exceptions import SuspiciousOperation +from django.test import SimpleTestCase, override_settings + +from openedx.core.lib.session_serializers import ( + _SIGNATURE_LENGTH, + PickleSerializer, +) + + +@override_settings(SECRET_KEY="unit-test-key") +class PickleSerializerUnitTests(SimpleTestCase): + """ + Unit tests: the serializer must round-trip legitimate session state + and refuse any payload it did not sign itself. + """ + + def setUp(self): + super().setUp() + self.serializer = PickleSerializer() + + def test_roundtrip_preserves_primitive_session_state(self): + state = { + "_auth_user_id": "42", + "country_code": "FR", + "ip_address": "1.2.3.4", + } + assert self.serializer.loads(self.serializer.dumps(state)) == state + + def test_roundtrip_preserves_non_json_types(self): + """ + Existing callers (CourseMasquerade, Decimal donation amounts, TPA + pipeline bytes) rely on non-JSON-safe values surviving a session + round trip. The HMAC wrapper must not change that. + """ + import decimal + state = { + "donation_for_course": { + "course-v1:edX+DemoX+T1": decimal.Decimal("42.50"), + }, + "tpa_custom_auth_entry_data": { + "data": b"base64bytes==", + "hmac": b"sigbytes==", + }, + } + decoded = self.serializer.loads(self.serializer.dumps(state)) + assert decoded == state + assert isinstance( + decoded["donation_for_course"]["course-v1:edX+DemoX+T1"], + decimal.Decimal, + ) + + def test_sec_pickle_session_regression_rejects_unsigned_payload(self): + """ + An unsigned serialised blob (the shape an attacker would inject + directly into Redis) must be refused before deserialisation. + """ + raw = pickle.dumps({"_auth_user_id": "42"}, 4) + with pytest.raises(SuspiciousOperation): + self.serializer.loads(raw) + + def test_sec_pickle_session_regression_rejects_tampered_signature(self): + """Flipping any byte of the signature must fail verification.""" + signed = bytearray(self.serializer.dumps({"a": 1})) + signed[0] ^= 0xFF + with pytest.raises(SuspiciousOperation): + self.serializer.loads(bytes(signed)) + + def test_sec_pickle_session_regression_rejects_tampered_body(self): + """Flipping any byte of the payload must fail verification.""" + signed = bytearray(self.serializer.dumps({"a": 1})) + signed[-1] ^= 0xFF + with pytest.raises(SuspiciousOperation): + self.serializer.loads(bytes(signed)) + + def test_sec_pickle_session_regression_rejects_truncated_payload(self): + """A payload shorter than the signature must be refused.""" + with pytest.raises(SuspiciousOperation): + self.serializer.loads(b"\x00" * (_SIGNATURE_LENGTH - 1)) + + def test_sec_pickle_session_regression_rejects_non_bytes(self): + """A non-bytes input must be refused.""" + with pytest.raises(SuspiciousOperation): + self.serializer.loads("not-bytes") + + def test_sec_pickle_session_regression_rejects_rce_gadget(self): + """ + A classic ``__reduce__``-based gadget must never reach the + deserialisation sink — verification happens first. + """ + class _Gadget: # pylint: disable=too-few-public-methods + def __reduce__(self): + import os + return (os.system, ("echo pwned",)) + + malicious = pickle.dumps(_Gadget()) + with pytest.raises(SuspiciousOperation): + self.serializer.loads(malicious) + + def test_sec_pickle_session_regression_key_rotation_invalidates(self): + """ + Sessions signed under an old ``SECRET_KEY`` must be rejected once + the key is rotated. + """ + with override_settings(SECRET_KEY="old-key"): + signed = PickleSerializer().dumps({"a": 1}) + with override_settings(SECRET_KEY="new-key"), pytest.raises(SuspiciousOperation): + PickleSerializer().loads(signed)