fix(quiz): close 2 remaining review issues from PR #71

Jose-Gael-Cruz-Lopez · claude · Jose-Gael-Cruz-Lopez · commit 4bee59ee89f5 · 2026-05-04T08:36:13.000-04:00
1. All-drift cascade test (TestQuizAgentFallback) New `test_falls_back_to_legacy_when_all_questions_drift` pins the path the 3 contract tests don't cover directly: agent returns a schema-valid Quiz where every question's correct_answer doesn't appear in its options → _quiz_via_agent's wire-format filter drops all of them → raises RuntimeError → bare-Exception catch in generate_quiz routes to _legacy_generate_quiz. Asserts the legacy gemini path actually runs and the legacy fallback question is what reaches the client. 2. Drift warning no longer leaks student content to local logs _agent_question_to_wire's drift warning was using %r to dump the raw correct_answer, options, and concept text. Logfire's egress scrubber (PR #67) handled remote ingestion, but Railway's local stdout still saw the unredacted strings. Now we log: n_options=4, canonical_len=18, fp=<sha256[:12]> The fingerprint is stable across recurrences of the same drift, so we still get correlation; the actual content stays out of stdout. Hashlib import hoisted to module scope. Pre-existing transient: tests/test_ocr_pipeline.py::test_gemini_parse that flickered red in the previous review run cleared on re-run (skipped in isolation, passing in full suite). Confirmed transient live-Gemini hiccup, not caused by this branch. Tests - tests/test_quiz_routes.py: 23/23 (the previous "24" was a miscount; net +1 from the new cascade test). - Full backend suite: 443 passed, 3 pre-existing live-Supabase failures unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/backend/routes/quiz.py b/backend/routes/quiz.py
@@ -1,7 +1,8 @@
-import logging
-import uuid
+import hashlib
 import json
+import logging
 import os
+import uuid
 from datetime import datetime
 
 from fastapi import APIRouter, BackgroundTasks, HTTPException, Request
@@ -79,10 +80,21 @@ def _agent_question_to_wire(q: QuizQuestion, qid: int) -> dict | None:
         # Generation drift: agent's correct_answer doesn't match any
         # option verbatim. Surface in logs (Logfire span carries the
         # question_id correlation) and drop. Caller filters None.
+        #
+        # Don't log the raw text — student-content concept names and
+        # quiz answers don't belong in stdout/Railway logs. The
+        # fingerprint is stable enough to correlate with the same
+        # generation drift if it recurs (sha256 of "<canonical>|<options>");
+        # the full content is still in Logfire spans (where the scrubber
+        # from PR #67 controls egress).
+        canonical_only = q.correct_answer.strip()
+        fp = hashlib.sha256(
+            f"{canonical_only}|{'|'.join(q.options)}".encode("utf-8")
+        ).hexdigest()[:12]
         logger.warning(
-            "quiz: dropping question %d — correct_answer %r not found "
-            "in options %r (concept=%s)",
-            qid, q.correct_answer, q.options, q.concept,
+            "quiz: dropping question id=%d — correct_answer not found in "
+            "options (n_options=%d, canonical_len=%d, fp=%s)",
+            qid, len(q.options), len(canonical_only), fp,
         )
         return None
     return {
diff --git a/backend/tests/test_quiz_routes.py b/backend/tests/test_quiz_routes.py
@@ -492,6 +492,56 @@ def test_falls_back_to_legacy_on_unexpected_exception(self):
         gemini_mock.assert_called_once()
         assert r.json()["questions"][0]["question"] == "Legacy fallback question?"
 
+    def test_falls_back_to_legacy_when_all_questions_drift(self):
+        """Cascade: agent succeeds but every question fails wire-format
+        validation → _quiz_via_agent raises RuntimeError →
+        bare-Exception catch in generate_quiz routes to legacy.
+
+        This pins the path the 3 contract tests don't directly exercise
+        (they test _agent_question_to_wire in isolation; this exercises
+        the full route under the all-drift condition).
+        """
+        # Build a Quiz where every question's correct_answer doesn't
+        # appear in its options — schema-valid, but the wire-format
+        # check drops every one.
+        drift_quiz = Quiz(questions=[
+            QuizQuestion(
+                question=f"Q{i}?",
+                type="multiple_choice",
+                difficulty="easy",
+                options=["a", "b", "c", "d"],
+                correct_answer="MISMATCH",  # not in options
+                explanation="x",
+                concept="X",
+            )
+            for i in range(3)
+        ])
+
+        get_graph_p, get_ctx_p, gemini_p = self._patch_legacy_dependencies()
+        with (
+            patch("routes.quiz.table", side_effect=_generate_table_factory()),
+            patch(
+                "routes.quiz.quiz_agent.run",
+                new=AsyncMock(return_value=SimpleNamespace(output=drift_quiz)),
+            ),
+            get_graph_p,
+            get_ctx_p,
+            gemini_p as gemini_mock,
+        ):
+            r = client.post("/api/quiz/generate", json={
+                "user_id": "user_andres",
+                "concept_node_id": "node1",
+                "num_questions": 3,
+                "difficulty": "easy",
+                "use_shared_context": False,
+            })
+
+        assert r.status_code == 200
+        # Legacy path fired (every question dropped → RuntimeError →
+        # caught → _legacy_generate_quiz called).
+        gemini_mock.assert_called_once()
+        assert r.json()["questions"][0]["question"] == "Legacy fallback question?"
+
 
 # ── Wire-format contract: pinned by tests so silent drift can't recur ───────