From cdd76c04b1ed4c63a563b478e31b9ace6cd2274a Mon Sep 17 00:00:00 2001 From: nesquena-hermes <[email protected]> Date: Wed, 10 Jun 2026 22:16:25 +0000 Subject: [PATCH 1/2] fix(model-resolver): preserve @provider:model picks across cold catalogs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _resolve_compatible_session_model_state() no longer reverts an explicit @provider:model selection to the default when the provider's group is missing from the cached catalog snapshot. explicit picks always honored; non-explicit (2nd+ turn / chat switch) preservation requires the provider to be KNOWN/CONFIGURED via the new _provider_is_known_or_configured() (static registry + custom-provider config, NOT the cold catalog) — so a cold live-discovery provider (ollama-cloud/deepseek/xai) is preserved while a genuinely-unknown provider (@removed:...) falls through to default-repair. A known-but-unconfigured builtin is deliberately preserved (surfaces a clear runtime auth error rather than a silent swap; a cheap env/config credential check would mis-classify OAuth/auth-store providers). Keeps the #3867 cached-catalog hot path intact. Co-authored-by: starship-s --- api/config.py | 53 ++++++ api/routes.py | 60 +++++++ tests/test_provider_mismatch.py | 290 ++++++++++++++++++++++++++++++++ 3 files changed, 403 insertions(+) diff --git a/api/config.py b/api/config.py index 6448b9e663..f4f2823e5c 100644 --- a/api/config.py +++ b/api/config.py @@ -1214,6 +1214,59 @@ def _named_custom_provider_slug_for_base_url( return "" +def _provider_is_known_or_configured( + provider_id: object, + config_obj: dict | None = None, +) -> bool: + """True when ``provider_id`` is a provider Hermes recognizes (static registry) + or the user has configured (named custom provider), decided from the STATIC + registry + config state only — never from a live/cold catalog snapshot. + + This distinguishes a provider Hermes knows how to route (e.g. ``ollama-cloud``, + whose model group simply isn't folded into the current cached catalog yet, or a + named ``custom_providers`` entry) from a *genuinely unknown* one + (``@removed:...`` that is in no registry and configured nowhere). The former's + explicitly-qualified selection is preserved across a cold catalog; the latter + falls back to the default so chat/start doesn't route to an unrecognized + provider. + + DELIBERATE SCOPE (see the @provider:model guard in + ``_resolve_compatible_session_model_state``): registry membership counts as + "known" even when the user has no key configured for that built-in. We do NOT + require authenticated-credential evidence here, on purpose. The only fully + reliable "is this provider authenticated" signal is the live auth store / + catalog rebuild — exactly the cost the caller's ``prefer_cached_catalog`` hot + path avoids — and a cheap env/config-only credential check would mis-classify + providers authenticated via OAuth/auth-store (``ollama-cloud`` among them), + re-introducing the original silent-revert bug for them. A known-but-unconfigured + pick is therefore kept and surfaces a clear run-time auth error rather than a + silent swap to the default. + + Deliberately does NOT consult ``get_available_models()`` / the catalog groups, + which are exactly what is cold here — re-deriving them live would defeat the + ``prefer_cached_catalog`` hot-path win this guards. + """ + raw = str(provider_id or "").strip().lower() + if not raw: + return False + # Configured custom provider: a named slug in custom_providers, or any + # ``custom`` / ``custom:`` form when custom_providers are defined. + if _named_custom_provider_slug_for_provider(raw, config_obj): + return True + if raw == "custom" or raw.startswith("custom:"): + return bool(_custom_provider_entries(config_obj)) + # Known first-party / built-in provider id (alias-resolved). Static registry + # knowledge that is always available, so a live-discovery provider whose + # catalog group is momentarily absent still counts as known. + canonical = _resolve_provider_alias(raw) + return ( + raw in _PROVIDER_DISPLAY + or canonical in _PROVIDER_DISPLAY + or raw in _PROVIDER_MODELS + or canonical in _PROVIDER_MODELS + ) + + # Well-known models per provider (used to populate dropdown for direct API providers) _PROVIDER_MODELS = { "anthropic": [ diff --git a/api/routes.py b/api/routes.py index e7ab46a2b6..e261611d6d 100644 --- a/api/routes.py +++ b/api/routes.py @@ -1079,6 +1079,7 @@ def _clear_live_models_cache() -> None: _resolve_cli_toolsets, _INDEX_HTML_PATH, get_available_models, + _provider_is_known_or_configured, IMAGE_EXTS, MD_EXTS, MIME_MAP, @@ -2632,6 +2633,16 @@ def _resolve_compatible_session_model_state( if not provider_raw or not bare_model: return model, requested_provider, False + # A fresh, explicit user pick is by definition not a stale artifact, so + # honor the @provider:model exactly as chosen — never reroute it via the + # active-provider family repair or the cold-catalog fallback below (a bare + # id like "gpt-oss-120b" under an OpenAI-active agent would otherwise get + # pulled to OpenAI by the family-match branch). If the named provider is + # unreachable the user sees a clear run-time error rather than a silent + # model swap. Must sit above the family-match repair (#3737 principle). + if explicit_model_pick: + return model, provider_raw, False + raw_provider_ids, normalized_provider_ids = _catalog_provider_id_sets(catalog) hint_matches_active = ( provider_raw == raw_active_provider @@ -2663,6 +2674,55 @@ def _resolve_compatible_session_model_state( else None ) return bare_model, provider_context, True + # On NON-explicit resolves (2nd+ turn, chat switch — explicit picks already + # returned above), preserve the selection only when all three hold: + # + # * provider_normalized == "" — a non-first-party provider hint + # (ollama-cloud / deepseek / xai / a named custom proxy). First-party + # families fall through to the stale-cross-provider repair below. + # + # * the BARE model is not a first-party family id (does not start with + # gpt/claude/gemini), i.e. not a misrouted first-party model that a + # vanished provider used to host (e.g. "@copilot:claude-opus-4.6"). + # + # * the provider is KNOWN or CONFIGURED. This is the load-bearing + # distinction: catalog-absence has two causes — + # (a) a cold live-discovery provider (ollama-cloud is configured; its + # group just isn't in this cached snapshot yet) → preserve, and + # (b) a genuinely removed/unknown provider ("@removed:mistral-large" + # configured nowhere) → fall through to the default so chat/start + # doesn't route to an unreachable provider. + # _provider_is_known_or_configured() decides this from the static + # provider registry + config state, NOT from the cold catalog snapshot + # (re-deriving that live would defeat the prefer_cached_catalog win). + # + # DELIBERATE: the registry test treats a KNOWN built-in (deepseek, minimax, + # ollama-cloud, …) as preservable even when the user has no key configured + # for it. We accept this on purpose. The only fully-reliable "is this + # provider authenticated" signal is the live auth store / catalog rebuild — + # exactly the cost this hot path avoids — and a cheap config/env-only check + # would mis-classify providers configured via OAuth/auth-store (ollama-cloud + # among them), re-introducing the original silent-revert bug for them. So a + # known-but-unconfigured pick is kept; the user gets a clear run-time auth + # error instead of a silent swap to the default. Pinned by + # test_at_provider_known_unconfigured_builtin_is_intentionally_preserved. + # + # KNOWN LIMITATION: the first-party-family test is a bare-name prefix match + # (the same approximation _model_matches_active_provider_family uses). A + # genuine third-party model whose name merely *starts* with gpt/claude/ + # gemini (e.g. "@ollama:gpt4all-mini") is therefore still mis-classified as + # first-party and reverted on non-explicit paths. A name-based check cannot + # disambiguate that; the behavior is pinned by + # test_at_provider_first_party_named_third_party_model_known_limitation. + _bare_is_first_party_family = any( + bare_model.lower().startswith(_p) for _p in ("gpt", "claude", "gemini") + ) + if ( + not provider_normalized + and not _bare_is_first_party_family + and _provider_is_known_or_configured(provider_raw) + ): + return model, provider_raw, False if default_model: provider_context = ( raw_active_provider diff --git a/tests/test_provider_mismatch.py b/tests/test_provider_mismatch.py index 1eb0b28351..1318dea0e7 100644 --- a/tests/test_provider_mismatch.py +++ b/tests/test_provider_mismatch.py @@ -1005,6 +1005,296 @@ def test_stale_at_provider_model_falls_back_when_family_mismatches(monkeypatch): assert effective == "gpt-5.5" +def test_at_provider_third_party_model_survives_cold_catalog(monkeypatch): + """A non-first-party @provider:model selection must NOT revert to the default + just because the provider's group is missing from the current catalog snapshot. + + Providers like ollama-cloud / deepseek / xai normalize to "" and discover their + models live, so a cold/minimal catalog can momentarily lack the group even + though the provider is configured. The @provider:model branch of + _resolve_compatible_session_model_state used to fall through to ``default_model`` + in that case, silently swapping the user's chosen model on any non-explicit + resolve (2nd+ turn, chat switch — explicit_model_pick is False there). Because + the bare id (minimax-m3) is not a first-party family id (gpt/claude/gemini), the + selection must instead be preserved. + """ + import api.routes as routes + + monkeypatch.setattr( + routes, + "get_available_models", + lambda: { + # Active provider is something else and the ollama-cloud group is + # absent from THIS snapshot (live discovery not yet folded in). + "active_provider": "anthropic", + "default_model": "claude-opus-4.8", + "groups": [ + { + "provider": "Anthropic", + "provider_id": "anthropic", + "models": [{"id": "claude-opus-4.8", "label": "Opus"}], + }, + ], + }, + ) + + # Both the explicit-pick (1st turn) and the non-explicit (2nd+ turn / chat + # switch) paths must keep the selection. + for explicit in (True, False): + model, provider, changed = routes._resolve_compatible_session_model_state( + "@ollama-cloud:minimax-m3", + "ollama-cloud", + explicit_model_pick=explicit, + ) + assert model == "@ollama-cloud:minimax-m3", ( + f"explicit_model_pick={explicit}: third-party @provider model must " + f"survive a cold catalog snapshot, got {model!r}" + ) + assert provider == "ollama-cloud" + assert changed is False + + +def test_at_provider_removed_provider_still_reverts_to_default(monkeypatch): + """The catalog-absence preservation must NOT extend to a genuinely + removed/unconfigured provider. + + Catalog-absence has two causes and only one is a cold-discovery artifact: + * ollama-cloud is configured, its group is just missing from this snapshot + -> preserve (covered by the sibling test). + * @removed:mistral-large names a provider that is no longer configured + anywhere -> preserving it would route chat/start to an unreachable + provider, so it must still fall through to the default-repair. + + The guard tells the two apart via _provider_is_known_or_configured() (static + registry + config state), never via the cold catalog. "removed" is neither a + known built-in provider nor a configured custom provider, so a non-explicit + resolve reverts to the active default. An explicit pick is still honored, + leaving the user a deliberate escape hatch. + """ + import api.routes as routes + + monkeypatch.setattr( + routes, + "get_available_models", + lambda: { + "active_provider": "anthropic", + "default_model": "claude-opus-4.8", + "groups": [ + { + "provider": "Anthropic", + "provider_id": "anthropic", + "models": [{"id": "claude-opus-4.8", "label": "Opus"}], + }, + ], + }, + ) + + # Non-explicit (2nd+ turn / chat switch): unknown provider -> repair to default. + model, _provider, changed = routes._resolve_compatible_session_model_state( + "@removed:mistral-large", + "removed", + explicit_model_pick=False, + ) + assert model == "claude-opus-4.8", ( + f"a removed/unconfigured @provider model must revert to the default on a " + f"non-explicit resolve, got {model!r}" + ) + assert changed is True + + # Explicit pick is still honored even for an unknown provider. + model2, provider2, changed2 = routes._resolve_compatible_session_model_state( + "@removed:mistral-large", + "removed", + explicit_model_pick=True, + ) + assert model2 == "@removed:mistral-large" + assert provider2 == "removed" + assert changed2 is False + + +def test_at_provider_known_unconfigured_builtin_is_intentionally_preserved(monkeypatch): + """Pin the DELIBERATE choice: a KNOWN built-in provider is preserved on a cold + catalog even when the user has no key configured for it. + + _provider_is_known_or_configured() counts static-registry membership as "known" + and does NOT require authenticated-credential evidence. This is on purpose: the + only fully-reliable "is this provider authenticated" signal is the live auth + store / catalog rebuild — exactly the cost the hot path avoids — and a cheap + env/config-only credential check would mis-classify OAuth/auth-store providers + (ollama-cloud among them) and re-introduce the original silent-revert bug. So a + known-but-unconfigured pick like "@deepseek:deepseek-v4-pro" under an + Anthropic-only setup is kept; the user gets a clear run-time auth error rather + than a silent swap to the default. + + If a future change adds reliable cheap credential evidence and flips this to + revert-when-unconfigured, update this expectation (and the helper docstring). + """ + import api.routes as routes + + monkeypatch.setattr( + routes, + "get_available_models", + lambda: { + "active_provider": "anthropic", + "default_model": "claude-opus-4.8", + "groups": [ + { + "provider": "Anthropic", + "provider_id": "anthropic", + "models": [{"id": "claude-opus-4.8", "label": "Opus"}], + }, + ], + }, + ) + + model, provider, changed = routes._resolve_compatible_session_model_state( + "@deepseek:deepseek-v4-pro", + "deepseek", + explicit_model_pick=False, + ) + assert model == "@deepseek:deepseek-v4-pro", ( + "a known built-in provider is intentionally preserved on a cold catalog " + f"even without configured credentials, got {model!r}" + ) + assert provider == "deepseek" + assert changed is False + + +def test_at_provider_explicit_pick_not_rerouted_by_family_match(monkeypatch): + """An explicit pick must NOT be rerouted by the active-provider family-match + repair, even when the bare id looks like the active family and the catalog is + cold. + + Regression for the branch-order bug: the explicit-pick guard sits at the top of + the @provider:model branch, above the _model_matches_active_provider_family + repair. Without that ordering, an explicit "@ollama-cloud:gpt-oss-120b" under an + OpenAI-active agent would be stripped to bare "gpt-oss-120b" and routed to + OpenAI (the family match fires on the "gpt" prefix) — silently swapping the + user's deliberately-chosen ollama-cloud provider. + """ + import api.routes as routes + + monkeypatch.setattr( + routes, + "get_available_models", + lambda: { + "active_provider": "openai", + "default_model": "gpt-5.5", + "groups": [ + { + "provider": "OpenAI", + "provider_id": "openai", + "models": [{"id": "gpt-5.5", "label": "GPT-5.5"}], + }, + ], + }, + ) + + model, provider, changed = routes._resolve_compatible_session_model_state( + "@ollama-cloud:gpt-oss-120b", + "ollama-cloud", + explicit_model_pick=True, + ) + assert model == "@ollama-cloud:gpt-oss-120b", ( + "explicit @provider:model pick must survive the family-match repair, " + f"got {model!r}" + ) + assert provider == "ollama-cloud" + assert changed is False + + +def test_at_provider_explicit_pick_is_honored_even_when_unroutable(monkeypatch): + """A fresh, explicit @provider:model pick is honored verbatim even when its + bare id is a first-party family name and the provider is absent from the + catalog. explicit_model_pick is only set on a deliberate user pick, so it must + win over the stale-cross-provider repair. Only the *non-explicit* path (2nd+ + turn / chat switch) repairs such a model to the default (see the test below).""" + import api.routes as routes + + monkeypatch.setattr( + routes, + "get_available_models", + lambda: { + "active_provider": "openai-codex", + "default_model": "gpt-5.5", + "groups": [ + { + "provider": "OpenAI Codex", + "provider_id": "openai-codex", + "models": [{"id": "gpt-5.5", "label": "GPT-5.5"}], + }, + ], + }, + ) + + model, provider, changed = routes._resolve_compatible_session_model_state( + "@copilot:claude-opus-4.6", + None, + explicit_model_pick=True, + ) + assert model == "@copilot:claude-opus-4.6" + # The explicit pick is returned with its own @-qualified provider hint intact, + # not rewritten to the active provider or the default's provider. + assert provider == "copilot" + assert changed is False + + +def test_at_provider_first_party_named_third_party_model_known_limitation(monkeypatch): + """Pin (not endorse) the known false-positive of the bare-name prefix heuristic. + + The first-party-family guard classifies a bare id purely by its name prefix + (gpt/claude/gemini), the same approximation _model_matches_active_provider_family + uses. A genuine third-party model whose name merely starts with one of those + prefixes — e.g. "@ollama:gpt4all-mini" (GPT4All is a third-party family) — is + therefore mis-classified as first-party and still reverts to the default on a + non-explicit resolve, the very behavior the sibling test prevents for + non-first-party-named ids. A name-only check cannot distinguish this case; + disambiguating it would require consulting the user's configured providers. + + This test documents the boundary so the limitation is tracked, not silent. If a + future change makes the classifier provider-aware, update this expectation to + assert preservation instead. + """ + import api.routes as routes + + monkeypatch.setattr( + routes, + "get_available_models", + lambda: { + "active_provider": "anthropic", + "default_model": "claude-opus-4.8", + "groups": [ + { + "provider": "Anthropic", + "provider_id": "anthropic", + "models": [{"id": "claude-opus-4.8", "label": "Opus"}], + }, + ], + }, + ) + + # Non-explicit path: the gpt-prefixed third-party id is (imperfectly) treated + # as a stale first-party model and repaired to the default. + model, _provider, changed = routes._resolve_compatible_session_model_state( + "@ollama:gpt4all-mini", + "ollama", + explicit_model_pick=False, + ) + assert model == "claude-opus-4.8" + assert changed is True + + # An explicit pick still escapes the heuristic and is preserved, so the user + # always has a reliable way to select such a model. + model2, provider2, changed2 = routes._resolve_compatible_session_model_state( + "@ollama:gpt4all-mini", + "ollama", + explicit_model_pick=True, + ) + assert model2 == "@ollama:gpt4all-mini" + assert provider2 == "ollama" + assert changed2 is False + + def test_google_active_provider_keeps_valid_gemini_session_model(monkeypatch): """A Google-configured session must keep its Gemini model.""" import api.routes as routes From d7a97600ed714e28155f2ac3b3db24e01c6aba06 Mon Sep 17 00:00:00 2001 From: nesquena-hermes <[email protected]> Date: Wed, 10 Jun 2026 22:17:01 +0000 Subject: [PATCH 2/2] =?UTF-8?q?chore:=20stamp=20v0.51.354=20=E2=80=94=20Re?= =?UTF-8?q?lease=20LR=20(@provider:model=20cold-catalog=20preservation)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 851b2b9c3b..46d8fee397 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ - **New RFC: Stable Assistant Turn Anchors for Live-to-Final rendering.** Defines a frontend presentation/reconciliation model for anchoring one assistant turn across live streaming, settlement, replay/reload/recovery, Compact Worklog, Transparent Stream, terminal states, artifacts, and side effects. (#3926) +## [v0.51.354] — 2026-06-10 — Release LR (preserve explicit @provider:model picks across cold catalogs) + +### Fixed + +- **An explicit `@provider:model` pick no longer snaps back to the default model when the provider's group is briefly missing from the cached catalog.** Providers that discover their models live (ollama-cloud, deepseek, xai) can momentarily lack their group in the cached catalog snapshot used on hot `GET /api/session` and chat-switch paths; a selection like `@ollama-cloud:minimax-m3` was being silently reverted to the global default on the 2nd-and-later turn. The resolver now preserves the explicit selection when the provider is known or configured (decided from the static registry + config, not the cold catalog), while a genuinely-unknown provider still falls back to the default instead of routing to an unrecognized one. The cached-catalog performance path is unchanged. (#3950) + ## [v0.51.353] — 2026-06-10 — Release LQ (cross-client live-turn recovery) ### Fixed