Skip to content

Commit ad2e643

Browse files
Merge pull request #225 from SaplingLearn/security/125-search-materials-user-scope
fix(security): scope search_course_materials by user_id (#125)
2 parents 97b3aaf + 59b2bac commit ad2e643

2 files changed

Lines changed: 84 additions & 13 deletions

File tree

backend/agents/tools/chat_context.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,17 @@ async def search_course_materials(
145145
course_id: str | None,
146146
query: str,
147147
limit: int = 5,
148+
*,
149+
user_id: str,
148150
) -> list[CourseMaterial]:
149-
"""Return the top `limit` documents for `course_id`, ranked by
150-
keyword overlap with `query`.
151+
"""Return the top `limit` documents owned by `user_id` in `course_id`,
152+
ranked by keyword overlap with `query`.
153+
154+
#125: documents are user-scoped *within* a shared course, so the query
155+
MUST filter on user_id as well as course_id — otherwise another enrolled
156+
student's private summary/concept_notes get decrypted into this user's LLM
157+
context. user_id is keyword-only and required so no caller can silently
158+
omit the scope.
151159
152160
Drops rows that have neither a summary nor concept notes — there's
153161
nothing to ground on, and including them would waste a tool-result
@@ -167,7 +175,10 @@ def _fetch() -> list[dict[str, Any]]:
167175
return (
168176
table("documents").select(
169177
"id,file_name,summary,concept_notes",
170-
filters={"course_id": f"eq.{course_id}"},
178+
filters={
179+
"course_id": f"eq.{course_id}",
180+
"user_id": f"eq.{user_id}",
181+
},
171182
order="created_at.desc",
172183
)
173184
or []
@@ -238,11 +249,13 @@ async def search_course_materials_tool(
238249
) -> list[CourseMaterial]:
239250
"""Pydantic AI tool wrapper.
240251
241-
The LLM supplies `query` (and optionally `limit`); `course_id` is
242-
pulled from `ctx.deps` so the model can't aim a search at another
243-
course's materials.
252+
The LLM supplies `query` (and optionally `limit`); `course_id` and
253+
`user_id` are pulled from `ctx.deps` so the model can't aim a search at
254+
another course's — or another user's — materials.
244255
"""
245-
return await search_course_materials(ctx.deps.course_id, query, limit)
256+
return await search_course_materials(
257+
ctx.deps.course_id, query, limit, user_id=ctx.deps.user_id
258+
)
246259

247260

248261
# read_session_history

backend/tests/test_chat_context_tools.py

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,62 @@ def _run(coro):
4141
# ── search_course_materials ───────────────────────────────────────────────
4242

4343

44+
class TestSearchCourseMaterialsUserScope:
45+
"""#125: documents are user-scoped within a shared course. The query must
46+
filter on user_id, or another enrolled student's private summary/concept
47+
notes get decrypted into this user's LLM context."""
48+
49+
def test_does_not_return_other_users_documents(self):
50+
doc_mine = {
51+
"id": "doc_mine",
52+
"file_name": "my_notes.pdf",
53+
"summary": "my private summary about recursion",
54+
"concept_notes": [],
55+
}
56+
doc_other = {
57+
"id": "doc_other",
58+
"file_name": "their_notes.pdf",
59+
"summary": "another student's private summary about recursion",
60+
"concept_notes": [],
61+
}
62+
63+
def _row_scoped_select(*_args, **kwargs):
64+
# Faithful row-scoped store: both docs sit in the same course, owned
65+
# by different users. A query scoped to me returns only mine; an
66+
# UNSCOPED query (pre-fix — no user_id filter) leaks the whole class.
67+
f = kwargs.get("filters", {})
68+
uid = f.get("user_id")
69+
if uid == "eq.user_mine":
70+
return [doc_mine]
71+
if "user_id" not in f:
72+
return [doc_mine, doc_other]
73+
return []
74+
75+
with patch("agents.tools.chat_context.table") as t, patch(
76+
"agents.tools.chat_context.decrypt_if_present", side_effect=lambda v: v
77+
):
78+
t.return_value.select.side_effect = _row_scoped_select
79+
result = _run(
80+
search_course_materials("course_cs101", "recursion", user_id="user_mine")
81+
)
82+
83+
ids = {m.document_id for m in result}
84+
assert "doc_mine" in ids
85+
# Pre-fix this leaked the other student's doc into the result (and thus
86+
# the LLM context); the user_id scope keeps it out.
87+
assert "doc_other" not in ids
88+
# And the mechanism that keeps it out: the DB query is scoped to the
89+
# caller's user_id, not course_id alone.
90+
filters = t.return_value.select.call_args.kwargs["filters"]
91+
assert filters.get("user_id") == "eq.user_mine"
92+
93+
4494
class TestSearchCourseMaterials:
4595
def test_returns_empty_when_course_id_is_none(self):
4696
# No table call should happen at all — cross-course search is a
4797
# data-leak risk we explicitly avoid.
4898
with patch("agents.tools.chat_context.table") as t:
49-
result = _run(search_course_materials(None, "recursion"))
99+
result = _run(search_course_materials(None, "recursion", user_id="user_andres"))
50100

51101
assert result == []
52102
t.assert_not_called()
@@ -82,7 +132,9 @@ def test_scores_by_keyword_overlap_and_caps_at_limit(self):
82132
):
83133
t.return_value.select.return_value = rows
84134
result = _run(
85-
search_course_materials("course_cs101", "recursion base case", limit=2)
135+
search_course_materials(
136+
"course_cs101", "recursion base case", limit=2, user_id="user_andres"
137+
)
86138
)
87139

88140
assert len(result) == 2
@@ -113,7 +165,9 @@ def test_drops_empty_entries(self):
113165
"agents.tools.chat_context.decrypt_if_present", side_effect=lambda v: v
114166
):
115167
t.return_value.select.return_value = rows
116-
result = _run(search_course_materials("course_cs101", "pointer"))
168+
result = _run(
169+
search_course_materials("course_cs101", "pointer", user_id="user_andres")
170+
)
117171

118172
assert [m.document_id for m in result] == ["doc_good"]
119173

@@ -145,7 +199,9 @@ def fake_decrypt_json(value):
145199
"agents.tools.chat_context.decrypt_json", side_effect=fake_decrypt_json
146200
) as dec_json:
147201
t.return_value.select.return_value = rows
148-
result = _run(search_course_materials("course_cs101", "foo"))
202+
result = _run(
203+
search_course_materials("course_cs101", "foo", user_id="user_andres")
204+
)
149205

150206
# Both decrypt helpers were invoked at the boundary.
151207
assert dec_str.called, "decrypt_if_present must run on summary"
@@ -309,8 +365,10 @@ def test_search_tool_passes_course_id_from_deps(self):
309365
inner.return_value = []
310366
_run(search_course_materials_tool(self._ctx(), "recursion", limit=3))
311367

312-
# course_id pulled from deps, not from the LLM.
313-
inner.assert_awaited_once_with("course_cs101", "recursion", 3)
368+
# course_id AND user_id pulled from deps, not from the LLM (#125).
369+
inner.assert_awaited_once_with(
370+
"course_cs101", "recursion", 3, user_id="user_andres"
371+
)
314372

315373
def test_history_tool_passes_session_id_from_deps(self):
316374
with patch(

0 commit comments

Comments
 (0)