fix(quiz): symmetric "fast" semantic across agent + legacy paths

Jose-Gael-Cruz-Lopez · claude · Jose-Gael-Cruz-Lopez · commit a2fd5cdc9c77 · 2026-05-04T21:28:58.000-04:00
Closes the asymmetry I flagged in the most recent review of PR #71. Was: agent path "fast" → gemini-2.5-flash (upgrade), legacy path "fast" → gemini-2.5-flash-lite (no-op). Same body field, different models depending on which path ran. The kind of inconsistency that surfaces six months later as "why does Fast sometimes feel slower than Default?" Now: agent path AND legacy fallback both use the same mapping ("fast" → flash, "smart" → pro, else → lite). _PREF_MODEL_NAMES is the single source of truth on the agent side; the legacy elif-chain in _legacy_generate_quiz mirrors it exactly. Also: - backend/routes/quiz.py: re-imported MODEL_DEFAULT alongside MODEL_LITE/MODEL_SMART. Tightened the lazy-import comment in _resolve_model_pref to describe the actual benefit (deferring GoogleProvider construction to the first override request, not import-path isolation — agents.quiz is already imported at module top). - docs/decisions/0013-refactor-2-quiz-shipped.md: addendum reworded to "two independent layers, not multiplicative" — the env var sets the agent's startup baseline; the body field, when present, fully replaces it for the current call. Adds a paragraph documenting the agent/legacy symmetry and pointing at _PREF_MODEL_NAMES as the single source of truth. Tests (TestQuizModelPref grew to 7) - test_legacy_fallback_uses_default_when_pref_fast (NEW): pins the symmetry — legacy "fast" must call gemini with MODEL_DEFAULT, not MODEL_LITE. Asserts a clear failure message if anyone undoes the fix. - test_legacy_fallback_uses_lite_when_no_pref (NEW): pins the default — no model_pref still uses MODEL_LITE (the cheap baseline that's been there since the route shipped). - Existing 5 tests unchanged. Tests - tests/test_quiz_routes.py: 30/30 (was 28, +2). - Full backend suite: 533 passed, 3 pre-existing live-Supabase failures unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/backend/routes/quiz.py b/backend/routes/quiz.py
@@ -15,7 +15,7 @@
 from models import GenerateQuizBody, SubmitQuizBody
 from services.auth_guard import require_self
 from services.encryption import decrypt_if_present
-from services.gemini_service import MODEL_LITE, MODEL_SMART, call_gemini_json
+from services.gemini_service import MODEL_DEFAULT, MODEL_LITE, MODEL_SMART, call_gemini_json
 from services.graph_service import get_graph, update_streak
 from services.quiz_context_service import get_quiz_context, save_quiz_context
 from services.fingerprint import fingerprint
@@ -121,9 +121,13 @@ def _resolve_model_pref(model_pref: str | None):
     """Build a GoogleModel override for the per-request fast/smart
     preference, or return None to use the agent's default.
 
-    Lazy import keeps the agents module out of the route's import path
-    until the override is actually requested — keeps test fixtures that
-    patch routes.quiz.quiz_agent.run from re-instantiating providers.
+    `google_model` is imported lazily so that constructing a
+    GoogleProvider (which reads GEMINI_API_KEY at call time) only
+    happens when an override is actually requested — not at module
+    import. agents.quiz is already in this route's import graph, so
+    this isn't about import-path isolation; it's about deferring the
+    one runtime side-effect (the provider build) to the request that
+    needs it.
     """
     if not model_pref:
         return None
@@ -265,10 +269,18 @@ async def _legacy_generate_quiz(body: GenerateQuizBody, request: Request) -> lis
                     )
                 prompt += "\n\n" + "\n\n".join(addendum_parts)
 
-    # Honor the same fast/smart toggle the agent path uses. "smart"
-    # routes to gemini-2.5-pro; anything else (including "fast" and
-    # None) stays on the existing flash-lite default.
-    legacy_model = MODEL_SMART if body.model_pref == "smart" else MODEL_LITE
+    # Honor the same fast/smart toggle the agent path uses. The mapping
+    # mirrors _PREF_MODEL_NAMES exactly, so a user choosing "fast" gets
+    # the same upgraded model whether the agent succeeded or fell back
+    # here. Without this, "fast" was silently a no-op on the legacy
+    # path (still MODEL_LITE) — the kind of inconsistency that surfaces
+    # six months later as "why does Fast sometimes feel slower?"
+    if body.model_pref == "smart":
+        legacy_model = MODEL_SMART  # gemini-2.5-pro
+    elif body.model_pref == "fast":
+        legacy_model = MODEL_DEFAULT  # gemini-2.5-flash, matches _PREF_MODEL_NAMES
+    else:
+        legacy_model = MODEL_LITE  # gemini-2.5-flash-lite, agent default per ADR 0008
     try:
         result = call_gemini_json(prompt, model=legacy_model)
     except Exception as e:
diff --git a/backend/tests/test_quiz_routes.py b/backend/tests/test_quiz_routes.py
@@ -731,3 +731,55 @@ def test_legacy_fallback_uses_smart_when_pref_smart(self):
         # call_gemini_json was called with model=MODEL_SMART, not MODEL_LITE.
         gemini_mock.assert_called_once()
         assert gemini_mock.call_args.kwargs.get("model") == MODEL_SMART
+
+    def test_legacy_fallback_uses_default_when_pref_fast(self):
+        """Symmetry contract: legacy "fast" must upgrade to MODEL_DEFAULT
+        (gemini-2.5-flash) just like the agent path does. Without this,
+        a Fast request that trips the agent silently downgrades to
+        MODEL_LITE — the same kind of inconsistency users would feel
+        as "Fast sometimes gives worse quizzes than Default."
+        """
+        from services.gemini_service import MODEL_DEFAULT, MODEL_LITE
+        run_mock = AsyncMock(side_effect=RuntimeError("agent boom"))
+        gemini_mock = MagicMock(return_value={"questions": [{
+            "id": 1, "question": "Q?",
+            "options": [{"label": "A", "text": "x", "correct": True}],
+            "explanation": ".", "concept_tested": "X", "difficulty": "easy",
+        }]})
+        with (
+            patch("routes.quiz.table", side_effect=_generate_table_factory()),
+            patch("routes.quiz.quiz_agent.run", new=run_mock),
+            patch("routes.quiz.get_graph", return_value={"nodes": [], "edges": []}),
+            patch("routes.quiz.get_quiz_context", return_value={}),
+            patch("routes.quiz.call_gemini_json", new=gemini_mock),
+        ):
+            r = self._post({"model_pref": "fast"})
+        assert r.status_code == 200
+        gemini_mock.assert_called_once()
+        chosen = gemini_mock.call_args.kwargs.get("model")
+        assert chosen == MODEL_DEFAULT, (
+            f"Legacy fast→{chosen!r} expected MODEL_DEFAULT (gemini-2.5-flash); "
+            f"do NOT downgrade to MODEL_LITE (={MODEL_LITE!r})."
+        )
+
+    def test_legacy_fallback_uses_lite_when_no_pref(self):
+        """Default path (no model_pref) keeps using MODEL_LITE — the
+        cheap baseline that's been in place since the route shipped."""
+        from services.gemini_service import MODEL_LITE
+        run_mock = AsyncMock(side_effect=RuntimeError("agent boom"))
+        gemini_mock = MagicMock(return_value={"questions": [{
+            "id": 1, "question": "Q?",
+            "options": [{"label": "A", "text": "x", "correct": True}],
+            "explanation": ".", "concept_tested": "X", "difficulty": "easy",
+        }]})
+        with (
+            patch("routes.quiz.table", side_effect=_generate_table_factory()),
+            patch("routes.quiz.quiz_agent.run", new=run_mock),
+            patch("routes.quiz.get_graph", return_value={"nodes": [], "edges": []}),
+            patch("routes.quiz.get_quiz_context", return_value={}),
+            patch("routes.quiz.call_gemini_json", new=gemini_mock),
+        ):
+            r = self._post({})  # no model_pref
+        assert r.status_code == 200
+        gemini_mock.assert_called_once()
+        assert gemini_mock.call_args.kwargs.get("model") == MODEL_LITE
diff --git a/docs/decisions/0013-refactor-2-quiz-shipped.md b/docs/decisions/0013-refactor-2-quiz-shipped.md
@@ -182,10 +182,21 @@ fast → flash-flash override, no-pref falls through, unknown pref
 falls through, and legacy fallback honors smart.
 
 Decision rationale: keep the env var (`SAPLING_MODEL_QUIZ`) for ops
-defaults, AND accept `model_pref` for per-request overrides. They
-compose — the env var sets the agent's startup default; the body
-field overrides per call. Same shape as the chat tutor on main, so
-the two AI-driven routes are now symmetric.
+defaults AND accept `model_pref` for per-request overrides. They are
+two independent layers, not multiplicative — the env var sets the
+agent's startup baseline (`model_for("quiz")` reads it at process
+start); the body field, when present, fully replaces that default
+for the current call by passing `model=...` to `quiz_agent.run`.
+Same shape as the chat tutor on main, so the two AI-driven routes
+are now symmetric.
+
+`fast` and `smart` resolve identically across the agent path AND the
+legacy fallback (commit ddd109b initially missed this; it was fixed
+in the same iteration so a `fast` request that trips the agent
+guardrails still gets `gemini-2.5-flash`, not silently downgraded to
+`gemini-2.5-flash-lite`). The route's `_PREF_MODEL_NAMES` table is
+the single source of truth; the legacy `else`/`elif` chain mirrors
+it exactly.
 
 ## Pre-existing test failures (not caused by this refactor)