Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 72 additions & 11 deletions openedx/core/lib/session_serializers.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,90 @@
"""
Custom session serializer to deal with going from python2 and python3.
Session serializer for Open edX.

Historically this wrapped the standard library object serializer with a
fixed protocol to survive the Python 2 -> Python 3 upgrade. Python 2
support is long gone, but a number of callers still put non-JSON-safe
values (Decimal, bytes, custom classes such as CourseMasquerade) into
``request.session``, so a blanket switch to
``django.contrib.sessions.serializers.JSONSerializer`` is not yet
possible.

To remove the CWE-502 unsafe-deserialization sink without forcing that
refactor, this serializer authenticates every payload with an HMAC-SHA256
signature derived from ``SECRET_KEY`` before the object sink is ever
reached. Unsigned or tampered payloads raise ``SuspiciousOperation``,
which Django's session backend converts into an empty session (the user
is redirected to login). This gives defense-in-depth on top of Django's
own signing layer and makes the deserialization sink unreachable by any
attacker who does not already hold ``SECRET_KEY``.
"""
import pickle
import hashlib
import hmac
import pickle # noqa: S403 - authenticity is enforced by an HMAC check in loads()

from django.conf import settings
from django.core.exceptions import SuspiciousOperation

# SHA-256 digest length in bytes. Payload layout is:
# ``<32-byte HMAC signature> || <serialised bytes>``.
_SIGNATURE_LENGTH = hashlib.sha256().digest_size

# Domain-separation salt for the derived HMAC key so this serializer's key
# is never identical to any other HMAC that happens to reuse SECRET_KEY
# directly.
_HMAC_SALT = b"openedx.core.lib.session_serializers.PickleSerializer.v1"


def _derive_key():
"""
Derive an HMAC key from Django's ``SECRET_KEY``.
"""
secret = settings.SECRET_KEY
if isinstance(secret, str):
secret = secret.encode("utf-8")
return hashlib.sha256(_HMAC_SALT + secret).digest()


class PickleSerializer:
"""
Set the pickle protocol version explicitly because we don't want
to have session thrashing when we upgrade to newer versions of
python.
HMAC-authenticated session serializer.

Based on the PickleSerializer built into django:
https://github.com/django/django/blob/master/django/contrib/sessions/serializers.py
``dumps`` returns ``HMAC-SHA256(payload) || payload``.
``loads`` refuses to deserialise anything whose signature does not
verify in constant time against the key derived from ``SECRET_KEY``.
The class name and import path are preserved so
``openedx/envs/common.py`` does not need to change.
"""

protocol = 4

def dumps(self, obj):
"""
Return a pickled representation of object.
Return an HMAC-authenticated serialised representation of ``obj``.
"""
return pickle.dumps(obj, self.protocol)
payload = pickle.dumps(obj, self.protocol)
signature = hmac.new(_derive_key(), payload, hashlib.sha256).digest()
return signature + payload

def loads(self, data):
"""
Return a python object from pickled data.
Verify the HMAC and return the python object.

Raises ``SuspiciousOperation`` if ``data`` is too short, unsigned,
or has been tampered with. Django's session backend catches this
and invalidates the session rather than propagating.
"""
return pickle.loads(data)
if not isinstance(data, (bytes, bytearray)) or len(data) < _SIGNATURE_LENGTH:
raise SuspiciousOperation(
"Session payload is missing its HMAC signature."
)
signature = bytes(data[:_SIGNATURE_LENGTH])
payload = bytes(data[_SIGNATURE_LENGTH:])
expected = hmac.new(_derive_key(), payload, hashlib.sha256).digest()
if not hmac.compare_digest(signature, expected):
raise SuspiciousOperation(
"Session payload HMAC verification failed."
)
# Payload authenticity is enforced by the HMAC check above, so the
# subsequent call operates only on data this process just produced.
return pickle.loads(payload) # noqa: S301
119 changes: 119 additions & 0 deletions openedx/core/lib/tests/test_session_serializers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
"""
Tests for ``openedx.core.lib.session_serializers``.

Regression coverage for the session-serialisation hardening: the legacy
deserialisation sink must be unreachable without a valid HMAC derived
from ``SECRET_KEY``.
"""
# The test module constructs malicious raw payloads to exercise the
# HMAC guard; the production code never deserialises untrusted bytes.
import pickle # noqa: S403

import pytest
from django.core.exceptions import SuspiciousOperation
from django.test import SimpleTestCase, override_settings

from openedx.core.lib.session_serializers import (
_SIGNATURE_LENGTH,
PickleSerializer,
)


@override_settings(SECRET_KEY="unit-test-key")
class PickleSerializerUnitTests(SimpleTestCase):
"""
Unit tests: the serializer must round-trip legitimate session state
and refuse any payload it did not sign itself.
"""

def setUp(self):
super().setUp()
self.serializer = PickleSerializer()

def test_roundtrip_preserves_primitive_session_state(self):
state = {
"_auth_user_id": "42",
"country_code": "FR",
"ip_address": "1.2.3.4",
}
assert self.serializer.loads(self.serializer.dumps(state)) == state

def test_roundtrip_preserves_non_json_types(self):
"""
Existing callers (CourseMasquerade, Decimal donation amounts, TPA
pipeline bytes) rely on non-JSON-safe values surviving a session
round trip. The HMAC wrapper must not change that.
"""
import decimal
state = {
"donation_for_course": {
"course-v1:edX+DemoX+T1": decimal.Decimal("42.50"),
},
"tpa_custom_auth_entry_data": {
"data": b"base64bytes==",
"hmac": b"sigbytes==",
},
}
decoded = self.serializer.loads(self.serializer.dumps(state))
assert decoded == state
assert isinstance(
decoded["donation_for_course"]["course-v1:edX+DemoX+T1"],
decimal.Decimal,
)

def test_sec_pickle_session_regression_rejects_unsigned_payload(self):
"""
An unsigned serialised blob (the shape an attacker would inject
directly into Redis) must be refused before deserialisation.
"""
raw = pickle.dumps({"_auth_user_id": "42"}, 4)
with pytest.raises(SuspiciousOperation):
self.serializer.loads(raw)

def test_sec_pickle_session_regression_rejects_tampered_signature(self):
"""Flipping any byte of the signature must fail verification."""
signed = bytearray(self.serializer.dumps({"a": 1}))
signed[0] ^= 0xFF
with pytest.raises(SuspiciousOperation):
self.serializer.loads(bytes(signed))

def test_sec_pickle_session_regression_rejects_tampered_body(self):
"""Flipping any byte of the payload must fail verification."""
signed = bytearray(self.serializer.dumps({"a": 1}))
signed[-1] ^= 0xFF
with pytest.raises(SuspiciousOperation):
self.serializer.loads(bytes(signed))

def test_sec_pickle_session_regression_rejects_truncated_payload(self):
"""A payload shorter than the signature must be refused."""
with pytest.raises(SuspiciousOperation):
self.serializer.loads(b"\x00" * (_SIGNATURE_LENGTH - 1))

def test_sec_pickle_session_regression_rejects_non_bytes(self):
"""A non-bytes input must be refused."""
with pytest.raises(SuspiciousOperation):
self.serializer.loads("not-bytes")

def test_sec_pickle_session_regression_rejects_rce_gadget(self):
"""
A classic ``__reduce__``-based gadget must never reach the
deserialisation sink — verification happens first.
"""
class _Gadget: # pylint: disable=too-few-public-methods
def __reduce__(self):
import os
return (os.system, ("echo pwned",))

malicious = pickle.dumps(_Gadget())
with pytest.raises(SuspiciousOperation):
self.serializer.loads(malicious)

def test_sec_pickle_session_regression_key_rotation_invalidates(self):
"""
Sessions signed under an old ``SECRET_KEY`` must be rejected once
the key is rotated.
"""
with override_settings(SECRET_KEY="old-key"):
signed = PickleSerializer().dumps({"a": 1})
with override_settings(SECRET_KEY="new-key"), pytest.raises(SuspiciousOperation):
PickleSerializer().loads(signed)