diff --git a/.github/workflows/repo-architect.yml b/.github/workflows/repo-architect.yml index 2d2bba8..e300c41 100644 --- a/.github/workflows/repo-architect.yml +++ b/.github/workflows/repo-architect.yml @@ -14,10 +14,15 @@ on: - mutate - campaign github_model: - description: 'GitHub Models model id (overrides preferred model)' + description: 'GitHub Models model id (overrides preferred model; leave blank to use catalog resolution)' required: false default: '' type: string + github_fallback_model: + description: 'GitHub Models fallback model id (used if primary model fails)' + required: false + default: 'openai/gpt-5' + type: string report_path: description: 'Primary report path' required: true @@ -167,6 +172,8 @@ jobs: GITHUB_REPO: ${{ github.repository }} GITHUB_BASE_BRANCH: ${{ github.event.repository.default_branch }} REPO_ARCHITECT_BRANCH_SUFFIX: ${{ github.run_id }}-${{ github.run_attempt }} + GITHUB_MODEL: ${{ github.event.inputs.github_model }} + GITHUB_FALLBACK_MODEL: ${{ github.event.inputs.github_fallback_model }} run: | MODE="${{ github.event.inputs.mode }}" MODEL="${{ github.event.inputs.github_model }}" diff --git a/repo_architect.py b/repo_architect.py index e64cc5f..44db7a5 100644 --- a/repo_architect.py +++ b/repo_architect.py @@ -69,6 +69,13 @@ }) # Canonical lane execution order for mutate / campaign modes MUTATION_LANE_ORDER: Tuple[str, ...] = ("parse_errors", "import_cycles", "entrypoint_consolidation", "hygiene", "report") +# Canonical architectural charter files (relative to git root) +CHARTER_PATHS: Tuple[str, ...] = ( + "docs/architecture/GODELOS_ARCHITECTURAL_CHARTER.md", + "docs/architecture/GODELOS_REPO_IMPLEMENTATION_CHARTER.md", +) +# Maximum characters from each charter file injected into model context +_MAX_CHARTER_CHARS_PER_FILE = 3000 # Maximum characters of source code sent to the model per file snippet _MAX_SOURCE_SNIPPET_CHARS = 4000 _MAX_CYCLE_SNIPPET_CHARS = 3000 @@ -111,6 +118,8 @@ class Config: # Model selection (preferred may fall back to fallback on unavailability) preferred_model: Optional[str] = None fallback_model: Optional[str] = None + # Explicit fallback model from GITHUB_FALLBACK_MODEL env var (overrides fallback_model if set) + github_fallback_model: Optional[str] = None # Explicit lane order override (None = use MUTATION_LANE_ORDER) campaign_lanes: Optional[Tuple[str, ...]] = None @@ -373,6 +382,30 @@ def _is_model_unavailable_error(msg: str) -> bool: return any(sig in lower for sig in _MODEL_UNAVAILABLE_SIGNALS) +# Regex matching HTTP status codes that warrant a fallback retry: +# 403 (permission/forbidden), 404 (not found), 429 (rate-limit/quota), 5xx (provider failure) +_FALLBACK_HTTP_CODE_RE = re.compile(r"(?:inference failed|network error).*?:\s*(403|404|429|5\d\d)\b") +# Timeout signals in lowercased error text +_TIMEOUT_SIGNALS = frozenset({"timed out", "timeout"}) + + +def _should_try_fallback(msg: str) -> bool: + """Return True if the error warrants retrying with the fallback model. + + Triggers on: model unavailability, HTTP 403/404/429, timeout, and 5xx provider errors. + Does NOT trigger on bare non-code error strings (e.g. generic "rate limit exceeded" + without an HTTP status code) to keep the trigger set narrow and deterministic. + """ + if _is_model_unavailable_error(msg): + return True + lower = msg.lower() + if any(sig in lower for sig in _TIMEOUT_SIGNALS): + return True + if _FALLBACK_HTTP_CODE_RE.search(msg): + return True + return False + + def extract_json_from_model_text(text: str) -> Any: """Extract the first JSON object or array from model-returned text (handles fences).""" try: @@ -421,7 +454,7 @@ def call_models_with_fallback_or_none( return resp, preferred, None, False except RepoArchitectError as exc: reason = str(exc) - if fallback and fallback != preferred and _is_model_unavailable_error(reason): + if fallback and fallback != preferred and _should_try_fallback(reason): try: resp = github_models_chat(token, fallback, messages) return resp, preferred, reason, True @@ -692,19 +725,57 @@ def build_analysis(root: pathlib.Path) -> Dict[str, Any]: } +# ----------------------------- +# Charter context +# ----------------------------- + +def load_charter_context(git_root: pathlib.Path) -> Dict[str, Any]: + """Load architectural charter files if present. + + Returns a dict with: + - loaded_files: list of relative paths that were successfully read + - content_hash: hex digest of combined content (None if no files loaded) + - applied: False initially; callers set True when charter was injected + - content: truncated combined charter text for model injection + """ + loaded_files: List[str] = [] + contents: List[str] = [] + for rel in CHARTER_PATHS: + path = git_root / rel + if path.exists(): + try: + text = path.read_text(encoding="utf-8", errors="replace") + loaded_files.append(rel) + contents.append(f"### {rel}\n\n{text[:_MAX_CHARTER_CHARS_PER_FILE]}") + except OSError: + pass + combined = "\n\n".join(contents) + content_hash = hashlib.sha256(combined.encode("utf-8")).hexdigest()[:16] if combined else None + return { + "loaded_files": loaded_files, + "content_hash": content_hash, + "applied": False, + "content": combined, + } + + # ----------------------------- # Models enrichment # ----------------------------- def enrich_with_github_models(config: Config, analysis: Dict[str, Any]) -> Dict[str, Any]: preferred = config.github_model or config.preferred_model - fallback = config.fallback_model + fallback = config.github_fallback_model or config.fallback_model meta: Dict[str, Any] = { "enabled": False, "used": False, "requested_model": preferred, "actual_model": None, "model": preferred, # kept for backward compatibility + "primary_model": preferred, + "fallback_model": fallback, + "model_used": None, + "fallback_used": False, "summary": None, "fallback_reason": None, "fallback_occurred": False, @@ -712,6 +783,12 @@ def enrich_with_github_models(config: Config, analysis: Dict[str, Any]) -> Dict[ if not config.github_token or not preferred: return meta meta["enabled"] = True + charter_context: Dict[str, Any] = analysis.get("charter_context") or {} + charter_text = charter_context.get("content", "") + charter_preamble = ( + f"\n\nArchitectural charter guidance (authoritative for this repository):\n{charter_text}\n" + if charter_text else "" + ) prompt = textwrap.dedent(f""" You are summarizing repository architecture risk. Architecture score: {analysis['architecture_score']} @@ -722,20 +799,23 @@ def enrich_with_github_models(config: Config, analysis: Dict[str, Any]) -> Dict[ Return 5 bullet points, compact and concrete, no preamble. """).strip() + system_content = "You produce concise engineering prioritization notes." + charter_preamble resp, requested, fallback_reason, fallback_occurred = call_models_with_fallback_or_none( config.github_token, preferred, fallback, [ - {"role": "system", "content": "You produce concise engineering prioritization notes."}, + {"role": "system", "content": system_content}, {"role": "user", "content": prompt}, ], ) meta["fallback_reason"] = fallback_reason meta["fallback_occurred"] = fallback_occurred + meta["fallback_used"] = fallback_occurred if resp is None: return meta try: meta["summary"] = parse_model_text(resp) meta["actual_model"] = resp.get("model", fallback if fallback_occurred else preferred) + meta["model_used"] = meta["actual_model"] meta["used"] = True except RepoArchitectError as exc: meta["fallback_reason"] = (meta.get("fallback_reason") or "") + f"; parse failed: {exc}" @@ -930,6 +1010,24 @@ def remove_marked_debug_prints(root: pathlib.Path, analysis: Dict[str, Any], bud ) +def _charter_system_prefix(analysis: Dict[str, Any]) -> str: + """Return a brief charter-guidance preamble to prepend to model system messages. + + Returns an empty string when no charter is loaded, so callers do not need + to guard against it. + """ + charter_context = analysis.get("charter_context") or {} + text = charter_context.get("content", "") + if not text: + return "" + return ( + "\n\nAuthoritative architectural charter for this repository " + "(obey its engineering direction when producing code mutations):\n" + + text + + "\n" + ) + + def build_report_plan(config: Config, analysis: Dict[str, Any], model_meta: Dict[str, Any], state: Dict[str, Any]) -> Optional[PatchPlan]: suite = build_report_suite(analysis, model_meta) bundle_hash = sha256_text("\n".join([k + "\n" + v for k, v in sorted(suite.items())])) @@ -1002,7 +1100,7 @@ def build_parse_errors_plan(config: Config, analysis: Dict[str, Any]) -> Optiona resp, _req, fallback_reason, _fell = call_models_with_fallback_or_none( config.github_token, preferred, fallback, [ - {"role": "system", "content": "You fix Python syntax errors. Return only valid JSON with corrected file contents."}, + {"role": "system", "content": "You fix Python syntax errors. Return only valid JSON with corrected file contents." + _charter_system_prefix(analysis)}, {"role": "user", "content": prompt}, ], ) @@ -1079,7 +1177,7 @@ def build_import_cycles_plan(config: Config, analysis: Dict[str, Any]) -> Option resp, _req, fallback_reason, _fell = call_models_with_fallback_or_none( config.github_token, preferred, fallback, [ - {"role": "system", "content": "You fix Python import cycles. Return only valid JSON with corrected file contents."}, + {"role": "system", "content": "You fix Python import cycles. Return only valid JSON with corrected file contents." + _charter_system_prefix(analysis)}, {"role": "user", "content": prompt}, ], ) @@ -1163,7 +1261,7 @@ def build_entrypoint_consolidation_plan(config: Config, analysis: Dict[str, Any] resp, _req, fallback_reason, _fell = call_models_with_fallback_or_none( config.github_token, preferred, fallback, [ - {"role": "system", "content": "You annotate redundant Python entrypoints with deprecation comments. Return only valid JSON."}, + {"role": "system", "content": "You annotate redundant Python entrypoints with deprecation comments. Return only valid JSON." + _charter_system_prefix(analysis)}, {"role": "user", "content": prompt}, ], ) @@ -1421,7 +1519,12 @@ def workflow_yaml(secret_env_names: Sequence[str], cron: str, github_model: Opti github_model: description: 'GitHub Models model id (overrides preferred model)' required: false - default: '' + default: 'anthropic/claude-sonnet-4.5' + type: string + github_fallback_model: + description: 'GitHub Models fallback model id (used if primary model fails)' + required: false + default: 'openai/gpt-5' type: string report_path: description: 'Primary report path' @@ -1572,6 +1675,8 @@ def deterministic_available(exclude=None): GITHUB_REPO: ${{{{ github.repository }}}} GITHUB_BASE_BRANCH: ${{{{ github.event.repository.default_branch }}}} REPO_ARCHITECT_BRANCH_SUFFIX: ${{{{ github.run_id }}}}-${{{{ github.run_attempt }}}} + GITHUB_MODEL: ${{{{ github.event.inputs.github_model }}}} + GITHUB_FALLBACK_MODEL: ${{{{ github.event.inputs.github_fallback_model }}}} {extra_env} run: | MODE="${{{{ github.event.inputs.mode }}}}" MODEL="${{{{ github.event.inputs.github_model }}}}" @@ -1624,7 +1729,12 @@ def run_cycle(config: Config) -> Dict[str, Any]: ensure_agent_dir(config.agent_dir) state = load_state(config) analysis = build_analysis(config.git_root) + charter_context = load_charter_context(config.git_root) + analysis["charter_context"] = charter_context model_meta = enrich_with_github_models(config, analysis) + # Mark charter as applied if the model was actually used + if model_meta.get("used") or model_meta.get("enabled"): + charter_context["applied"] = bool(charter_context.get("loaded_files")) analysis["model_meta"] = model_meta persist_analysis(config, analysis) @@ -1633,6 +1743,12 @@ def run_cycle(config: Config) -> Dict[str, Any]: str(config.graph_path.relative_to(config.git_root)), str(config.roadmap_path.relative_to(config.git_root)), ] + # Charter metadata exposed at top level for easy inspection + charter_meta = { + "loaded_files": charter_context.get("loaded_files", []), + "content_hash": charter_context.get("content_hash"), + "applied": charter_context.get("applied", False), + } result: Dict[str, Any] = { "status": "analyzed", "mode": config.mode, @@ -1641,6 +1757,10 @@ def run_cycle(config: Config) -> Dict[str, Any]: "architecture_score": analysis["architecture_score"], "requested_model": model_meta.get("requested_model"), "actual_model": model_meta.get("actual_model"), + "primary_model": model_meta.get("primary_model"), + "fallback_model": model_meta.get("fallback_model"), + "model_used": model_meta.get("model_used"), + "fallback_used": model_meta.get("fallback_used", False), "fallback_reason": model_meta.get("fallback_reason"), "fallback_occurred": model_meta.get("fallback_occurred", False), "no_safe_code_mutation_reason": None, @@ -1655,6 +1775,7 @@ def run_cycle(config: Config) -> Dict[str, Any]: "roadmap_path": str(config.roadmap_path), "roadmap": analysis["roadmap"], "github_models": model_meta, + "charter": charter_meta, "metadata": {"architecture_score": analysis["architecture_score"], "model_meta": model_meta, "report_path": str(config.report_path)}, } @@ -1715,7 +1836,11 @@ def run_campaign( ensure_agent_dir(config.agent_dir) state = load_state(config) analysis = build_analysis(config.git_root) + charter_context = load_charter_context(config.git_root) + analysis["charter_context"] = charter_context model_meta = enrich_with_github_models(config, analysis) + if model_meta.get("used") or model_meta.get("enabled"): + charter_context["applied"] = bool(charter_context.get("loaded_files")) analysis["model_meta"] = model_meta persist_analysis(config, analysis) @@ -1740,6 +1865,7 @@ def run_campaign( slices_applied += 1 # Re-analyse so the next lane sees an up-to-date repo state analysis = build_analysis(config.git_root) + analysis["charter_context"] = charter_context model_meta = enrich_with_github_models(config, analysis) analysis["model_meta"] = model_meta except RepoArchitectError as exc: @@ -1747,6 +1873,11 @@ def run_campaign( if stop_on_failure: break + charter_meta = { + "loaded_files": charter_context.get("loaded_files", []), + "content_hash": charter_context.get("content_hash"), + "applied": charter_context.get("applied", False), + } summary: Dict[str, Any] = { "mode": "campaign", "status": "campaign_complete", @@ -1757,7 +1888,12 @@ def run_campaign( "architecture_score": analysis["architecture_score"], "requested_model": model_meta.get("requested_model"), "actual_model": model_meta.get("actual_model"), + "primary_model": model_meta.get("primary_model"), + "fallback_model": model_meta.get("fallback_model"), + "model_used": model_meta.get("model_used"), + "fallback_used": model_meta.get("fallback_used", False), "fallback_reason": model_meta.get("fallback_reason"), + "charter": charter_meta, "results": slice_results, } @@ -1940,6 +2076,7 @@ def build_config(args: argparse.Namespace) -> Config: configure_branch_protection=args.configure_branch_protection, preferred_model=preferred, fallback_model=fallback, + github_fallback_model=os.environ.get("GITHUB_FALLBACK_MODEL"), campaign_lanes=campaign_lanes, ) diff --git a/tests/test_repo_architect.py b/tests/test_repo_architect.py index 5547b6c..3e9ec55 100644 --- a/tests/test_repo_architect.py +++ b/tests/test_repo_architect.py @@ -72,6 +72,7 @@ def _make_config(root: Optional[pathlib.Path] = None, mode: str = "analyze", **o configure_branch_protection=False, preferred_model=None, fallback_model=None, + github_fallback_model=None, ) if overrides: cfg = dataclasses.replace(cfg, **overrides) @@ -176,6 +177,67 @@ def test_case_insensitive(self) -> None: self.assertTrue(ra._is_model_unavailable_error("Model_Not_Found")) +# --------------------------------------------------------------------------- +# 2b. _should_try_fallback — expanded trigger set +# --------------------------------------------------------------------------- + +class TestShouldTryFallback(unittest.TestCase): + """Verify _should_try_fallback covers the explicit and minimal trigger set.""" + + def test_model_unavailable_triggers(self) -> None: + self.assertTrue(ra._should_try_fallback("unknown_model: gpt-5.4 not found")) + + def test_http_403_triggers(self) -> None: + self.assertTrue(ra._should_try_fallback( + "GitHub Models inference failed: 403 Permission denied" + )) + + def test_http_404_triggers(self) -> None: + self.assertTrue(ra._should_try_fallback( + "GitHub Models inference failed: 404 Not found" + )) + + def test_http_429_triggers(self) -> None: + self.assertTrue(ra._should_try_fallback( + "GitHub Models inference failed: 429 Too many requests" + )) + + def test_http_500_triggers(self) -> None: + self.assertTrue(ra._should_try_fallback( + "GitHub Models inference failed: 500 Internal server error" + )) + + def test_http_503_triggers(self) -> None: + self.assertTrue(ra._should_try_fallback( + "GitHub Models inference failed: 503 Service unavailable" + )) + + def test_timeout_triggers(self) -> None: + self.assertTrue(ra._should_try_fallback( + "GitHub Models inference network error: timed out" + )) + + def test_timeout_keyword_triggers(self) -> None: + self.assertTrue(ra._should_try_fallback( + "GitHub Models inference network error: timeout" + )) + + def test_bare_rate_limit_string_does_not_trigger(self) -> None: + """A generic 'rate limit exceeded' message (no HTTP code) must NOT trigger.""" + self.assertFalse(ra._should_try_fallback("rate limit exceeded")) + + def test_http_200_does_not_trigger(self) -> None: + self.assertFalse(ra._should_try_fallback( + "GitHub Models inference failed: 200 OK" + )) + + def test_http_400_does_not_trigger(self) -> None: + """400 Bad Request is a client error that should not trigger fallback.""" + self.assertFalse(ra._should_try_fallback( + "GitHub Models inference failed: 400 Bad request" + )) + + class TestCallModelsWithFallback(unittest.TestCase): _MESSAGES = [{"role": "user", "content": "hi"}] @@ -207,8 +269,90 @@ def side_effect(token: str, model: str, messages: list) -> dict: self.assertIsNotNone(reason) self.assertIn("unknown_model", reason) - def test_fallback_NOT_triggered_on_transient_error(self) -> None: - """Transient errors (rate limit, network) must NOT trigger model fallback.""" + def test_fallback_triggered_on_http_403(self) -> None: + """HTTP 403 (permission) on primary must trigger exactly one fallback attempt.""" + fallback_resp = {"choices": [{"message": {"content": "ok"}}], "model": "openai/gpt-4.1"} + calls: List[str] = [] + + def side_effect(token: str, model: str, messages: list) -> dict: + calls.append(model) + if model == "anthropic/claude-sonnet-4.5": + raise ra.RepoArchitectError( + "GitHub Models inference failed: 403 Permission denied" + ) + return fallback_resp + + with patch.object(ra, "github_models_chat", side_effect=side_effect): + resp, req_model, reason, fell = ra.call_models_with_fallback_or_none( + "tok", "anthropic/claude-sonnet-4.5", "openai/gpt-5", self._MESSAGES + ) + self.assertEqual(calls, ["anthropic/claude-sonnet-4.5", "openai/gpt-5"]) + self.assertEqual(resp, fallback_resp) + self.assertTrue(fell) + self.assertIn("403", reason) + + def test_fallback_triggered_on_http_429(self) -> None: + """HTTP 429 on primary must trigger fallback retry.""" + fallback_resp = {"choices": [{"message": {"content": "ok"}}], "model": "openai/gpt-5"} + calls: List[str] = [] + + def side_effect(token: str, model: str, messages: list) -> dict: + calls.append(model) + if model == "anthropic/claude-sonnet-4.5": + raise ra.RepoArchitectError( + "GitHub Models inference failed: 429 Too many requests" + ) + return fallback_resp + + with patch.object(ra, "github_models_chat", side_effect=side_effect): + resp, req_model, reason, fell = ra.call_models_with_fallback_or_none( + "tok", "anthropic/claude-sonnet-4.5", "openai/gpt-5", self._MESSAGES + ) + self.assertEqual(calls, ["anthropic/claude-sonnet-4.5", "openai/gpt-5"]) + self.assertEqual(resp, fallback_resp) + self.assertTrue(fell) + self.assertIn("429", reason) + + def test_fallback_triggered_on_5xx(self) -> None: + """HTTP 5xx provider failure on primary must trigger fallback retry.""" + fallback_resp = {"choices": [{"message": {"content": "ok"}}], "model": "openai/gpt-5"} + + def side_effect(token: str, model: str, messages: list) -> dict: + if model == "anthropic/claude-sonnet-4.5": + raise ra.RepoArchitectError( + "GitHub Models inference failed: 503 Service temporarily unavailable" + ) + return fallback_resp + + with patch.object(ra, "github_models_chat", side_effect=side_effect): + resp, req_model, reason, fell = ra.call_models_with_fallback_or_none( + "tok", "anthropic/claude-sonnet-4.5", "openai/gpt-5", self._MESSAGES + ) + self.assertEqual(resp, fallback_resp) + self.assertTrue(fell) + self.assertIn("503", reason) + + def test_fallback_triggered_on_timeout(self) -> None: + """Network timeout on primary must trigger fallback retry.""" + fallback_resp = {"choices": [{"message": {"content": "ok"}}], "model": "openai/gpt-5"} + + def side_effect(token: str, model: str, messages: list) -> dict: + if model == "anthropic/claude-sonnet-4.5": + raise ra.RepoArchitectError( + "GitHub Models inference network error: timed out" + ) + return fallback_resp + + with patch.object(ra, "github_models_chat", side_effect=side_effect): + resp, req_model, reason, fell = ra.call_models_with_fallback_or_none( + "tok", "anthropic/claude-sonnet-4.5", "openai/gpt-5", self._MESSAGES + ) + self.assertEqual(resp, fallback_resp) + self.assertTrue(fell) + self.assertIn("timed out", reason) + + def test_fallback_NOT_triggered_on_bare_error_string(self) -> None: + """A bare error string without HTTP code must NOT trigger fallback (narrow trigger set).""" calls: List[str] = [] def side_effect(token: str, model: str, messages: list) -> dict: @@ -224,6 +368,32 @@ def side_effect(token: str, model: str, messages: list) -> dict: self.assertIsNone(resp) self.assertFalse(fell) + def test_fallback_retried_exactly_once_then_both_fail(self) -> None: + """When both models fail, combined error is surfaced and fallback_occurred=True.""" + calls: List[str] = [] + + def side_effect(token: str, model: str, messages: list) -> dict: + calls.append(model) + if model == "anthropic/claude-sonnet-4.5": + raise ra.RepoArchitectError( + "GitHub Models inference failed: 503 provider error" + ) + raise ra.RepoArchitectError( + "GitHub Models inference failed: 502 fallback provider error" + ) + + with patch.object(ra, "github_models_chat", side_effect=side_effect): + resp, req_model, reason, fell = ra.call_models_with_fallback_or_none( + "tok", "anthropic/claude-sonnet-4.5", "openai/gpt-5", self._MESSAGES + ) + self.assertEqual(calls, ["anthropic/claude-sonnet-4.5", "openai/gpt-5"]) + self.assertIsNone(resp) + self.assertTrue(fell) + self.assertIsNotNone(reason) + self.assertIn("503", reason) + self.assertIn("fallback also failed", reason) + self.assertIn("502", reason) + def test_both_models_fail_returns_none(self) -> None: def side_effect(token: str, model: str, messages: list) -> dict: raise ra.RepoArchitectError("unknown_model: all models gone") @@ -312,6 +482,86 @@ def test_workflow_yaml_resolves_models_via_catalog_and_keeps_blank_override_logi self.assertIn("REPO_ARCHITECT_FALLBACK_MODEL={fallback}", workflow) self.assertIn('if [ -n "$MODEL" ]; then EXTRA_ARGS="$EXTRA_ARGS --github-model $MODEL"; fi', workflow) self.assertIn("models: read", workflow) + # New: primary/fallback model inputs with defaults + self.assertIn("github_fallback_model:", workflow) + self.assertIn("default: 'openai/gpt-5'", workflow) + self.assertIn("default: 'anthropic/claude-sonnet-4.5'", workflow) + # New: GITHUB_MODEL and GITHUB_FALLBACK_MODEL exported as env vars + self.assertIn("GITHUB_FALLBACK_MODEL:", workflow) + + def test_build_config_reads_github_fallback_model_env(self) -> None: + """GITHUB_FALLBACK_MODEL env var should populate github_fallback_model in Config.""" + env = { + "GITHUB_FALLBACK_MODEL": "openai/gpt-5", + } + with patch.object(ra, "discover_git_root", return_value=pathlib.Path("/tmp/repo")): + with patch.dict(os.environ, env, clear=False): + config = ra.build_config(ra.parse_args([])) + self.assertEqual(config.github_fallback_model, "openai/gpt-5") + + def test_enrich_uses_github_fallback_model_over_fallback_model(self) -> None: + """github_fallback_model takes precedence over fallback_model in enrich_with_github_models.""" + analysis = { + "architecture_score": 0.9, + "cycles": [], + "parse_error_files": [], + "entrypoint_paths": [], + "roadmap": [], + } + fallback_resp = {"choices": [{"message": {"content": "ok"}}], "model": "openai/gpt-5"} + with tempfile.TemporaryDirectory() as tmp: + root = _make_git_root(tmp) + config = _make_config( + root, + github_token="tok", + github_model="anthropic/claude-sonnet-4.5", + github_fallback_model="openai/gpt-5", + fallback_model="google/gemini-3-pro", + ) + with patch.object( + ra, + "call_models_with_fallback_or_none", + return_value=(fallback_resp, "anthropic/claude-sonnet-4.5", "503 error", True), + ) as mocked_call: + meta = ra.enrich_with_github_models(config, analysis) + # github_fallback_model="openai/gpt-5" must be passed as fallback, not fallback_model + self.assertEqual(mocked_call.call_args.args[2], "openai/gpt-5") + self.assertTrue(meta["fallback_used"]) + self.assertTrue(meta["fallback_occurred"]) + + def test_enrich_metadata_primary_success(self) -> None: + """On primary success, metadata must reflect no fallback was used.""" + analysis = { + "architecture_score": 0.9, + "cycles": [], + "parse_error_files": [], + "entrypoint_paths": [], + "roadmap": [], + } + primary_resp = { + "choices": [{"message": {"content": "bullet 1"}}], + "model": "anthropic/claude-sonnet-4.5", + } + with tempfile.TemporaryDirectory() as tmp: + root = _make_git_root(tmp) + config = _make_config( + root, + github_token="tok", + github_model="anthropic/claude-sonnet-4.5", + github_fallback_model="openai/gpt-5", + ) + with patch.object( + ra, + "call_models_with_fallback_or_none", + return_value=(primary_resp, "anthropic/claude-sonnet-4.5", None, False), + ): + meta = ra.enrich_with_github_models(config, analysis) + self.assertEqual(meta["primary_model"], "anthropic/claude-sonnet-4.5") + self.assertEqual(meta["fallback_model"], "openai/gpt-5") + self.assertEqual(meta["model_used"], "anthropic/claude-sonnet-4.5") + self.assertFalse(meta["fallback_used"]) + self.assertFalse(meta["fallback_occurred"]) + self.assertIsNone(meta["fallback_reason"]) # --------------------------------------------------------------------------- @@ -461,8 +711,10 @@ def test_campaign_max_slices_respected(self) -> None: REQUIRED_OUTPUT_FIELDS = frozenset({ "status", "mode", "lane", "architecture_score", "requested_model", "actual_model", "fallback_reason", "fallback_occurred", + "primary_model", "fallback_model", "model_used", "fallback_used", "no_safe_code_mutation_reason", "branch", "changed_files", "validation", "pull_request_url", "artifact_files", + "charter", }) @@ -496,7 +748,9 @@ def test_campaign_mode_output_contract(self) -> None: result = ra.run_campaign(config, list(ra.MUTATION_LANE_ORDER), max_slices=2, stop_on_failure=False) campaign_fields = {"status", "mode", "slices_attempted", "slices_applied", "lanes_requested", "lanes_executed", "architecture_score", - "requested_model", "actual_model", "fallback_reason", "results"} + "requested_model", "actual_model", "fallback_reason", + "primary_model", "fallback_model", "model_used", "fallback_used", + "charter", "results"} missing = campaign_fields - set(result.keys()) self.assertEqual(missing, set(), f"Missing campaign output fields: {missing}") @@ -790,5 +1044,125 @@ def test_output_contract_lanes_active_respects_config(self) -> None: self.assertEqual(result["lanes_active"], ["hygiene"]) +# --------------------------------------------------------------------------- +# 11. Charter context — loading and metadata +# --------------------------------------------------------------------------- + +class TestCharterContext(unittest.TestCase): + """Verify charter file loading, missing-file tolerance, and output metadata.""" + + def test_load_charter_context_no_files_present(self) -> None: + """No crash when charter files are absent; loaded_files is empty, applied is False.""" + with tempfile.TemporaryDirectory() as tmp: + root = pathlib.Path(tmp) + ctx = ra.load_charter_context(root) + self.assertEqual(ctx["loaded_files"], []) + self.assertIsNone(ctx["content_hash"]) + self.assertFalse(ctx["applied"]) + self.assertEqual(ctx["content"], "") + + def test_load_charter_context_partial(self) -> None: + """Only present charter files are loaded; the missing one is silently skipped.""" + with tempfile.TemporaryDirectory() as tmp: + root = pathlib.Path(tmp) + first_rel = ra.CHARTER_PATHS[0] + charter_path = root / first_rel + charter_path.parent.mkdir(parents=True, exist_ok=True) + charter_path.write_text("# Test Charter\n\nSome guidance here.", encoding="utf-8") + + ctx = ra.load_charter_context(root) + + self.assertEqual(ctx["loaded_files"], [first_rel]) + self.assertIsNotNone(ctx["content_hash"]) + self.assertFalse(ctx["applied"]) + self.assertIn("Test Charter", ctx["content"]) + # Missing second file must not be in loaded_files + self.assertNotIn(ra.CHARTER_PATHS[1], ctx["loaded_files"]) + + def test_load_charter_context_both_files(self) -> None: + """When both files exist, both appear in loaded_files and content is combined.""" + with tempfile.TemporaryDirectory() as tmp: + root = pathlib.Path(tmp) + for rel in ra.CHARTER_PATHS: + path = root / rel + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(f"# Charter {rel}\n\nGuidance.", encoding="utf-8") + + ctx = ra.load_charter_context(root) + + self.assertEqual(sorted(ctx["loaded_files"]), sorted(ra.CHARTER_PATHS)) + self.assertIsNotNone(ctx["content_hash"]) + self.assertFalse(ctx["applied"]) + for rel in ra.CHARTER_PATHS: + self.assertIn(rel, ctx["content"]) + + def test_run_cycle_charter_metadata_in_result(self) -> None: + """run_cycle must include charter metadata at top level.""" + with tempfile.TemporaryDirectory() as tmp: + root = _make_git_root(tmp) + config = _make_config(root, mode="analyze") + result = ra.run_cycle(config) + self.assertIn("charter", result) + charter = result["charter"] + self.assertIn("loaded_files", charter) + self.assertIn("content_hash", charter) + self.assertIn("applied", charter) + self.assertIsInstance(charter["loaded_files"], list) + + def test_run_cycle_charter_loaded_when_files_present(self) -> None: + """Charter metadata reflects loaded files when charters exist in the repo.""" + with tempfile.TemporaryDirectory() as tmp: + root = _make_git_root(tmp) + # Plant both charter files + for rel in ra.CHARTER_PATHS: + path = root / rel + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(f"# {rel}\nEngineering direction.", encoding="utf-8") + config = _make_config(root, mode="analyze") + result = ra.run_cycle(config) + charter = result["charter"] + self.assertEqual(sorted(charter["loaded_files"]), sorted(ra.CHARTER_PATHS)) + self.assertIsNotNone(charter["content_hash"]) + + def test_run_campaign_charter_metadata_in_result(self) -> None: + """run_campaign must include charter metadata in the summary dict.""" + with tempfile.TemporaryDirectory() as tmp: + root = _make_git_root(tmp) + config = _make_config(root, mode="campaign") + result = ra.run_campaign(config, ["hygiene"], max_slices=1, stop_on_failure=False) + self.assertIn("charter", result) + charter = result["charter"] + self.assertIn("loaded_files", charter) + self.assertIn("content_hash", charter) + self.assertIn("applied", charter) + + def test_charter_system_prefix_empty_when_no_charter(self) -> None: + """_charter_system_prefix returns empty string when no charter content is present.""" + analysis: Dict[str, Any] = {"charter_context": {"content": "", "loaded_files": []}} + self.assertEqual(ra._charter_system_prefix(analysis), "") + + def test_charter_system_prefix_present_when_charter_loaded(self) -> None: + """_charter_system_prefix returns non-empty string when charter content is set.""" + analysis: Dict[str, Any] = { + "charter_context": {"content": "# My Charter\n\nDo X.", "loaded_files": ["docs/architecture/X.md"]} + } + prefix = ra._charter_system_prefix(analysis) + self.assertIn("My Charter", prefix) + self.assertIn("authoritative", prefix.lower()) + + def test_content_hash_changes_with_content(self) -> None: + """Different charter content should produce different hashes.""" + with tempfile.TemporaryDirectory() as tmp: + root = pathlib.Path(tmp) + rel = ra.CHARTER_PATHS[0] + path = root / rel + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text("version 1", encoding="utf-8") + ctx1 = ra.load_charter_context(root) + path.write_text("version 2", encoding="utf-8") + ctx2 = ra.load_charter_context(root) + self.assertNotEqual(ctx1["content_hash"], ctx2["content_hash"]) + + if __name__ == "__main__": unittest.main()