Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 150 additions & 0 deletions tests/test_renderer_client.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import asyncio
from functools import lru_cache
from unittest.mock import patch

Expand Down Expand Up @@ -81,6 +82,155 @@ def test_renderer_client_uses_renderer_model_name_override():
)


def test_renderer_client_threads_chat_template_kwargs_into_pool():
"""``sampling_args.extra_body.chat_template_kwargs`` land on the
concrete RendererConfig (resolving Auto/None against
``MODEL_RENDERER_MAP`` when needed) and are stripped from the params
forwarded to /generate. Covers explicit / Auto / None bases."""
from renderers import AutoRendererConfig, Qwen3RendererConfig

bases = [
Qwen3RendererConfig(enable_thinking=True),
AutoRendererConfig(preserve_all_thinking=True),
None,
]
for base in bases:
RendererClient._shared_pools.clear()

client = object.__new__(RendererClient)
client._renderer = None
client._pool_size = 1
client._config = vf.ClientConfig(client_type="renderer", renderer_config=base)
client._client = object() # type: ignore[attr-defined]

sentinel_pool = RendererPool.__new__(RendererPool)
captured: dict = {}

async def _fake_generate(**kwargs):
captured.update(kwargs)
return {"content": "ok"}

with (
patch(
"verifiers.clients.renderer_client.create_renderer_pool",
return_value=sentinel_pool,
) as create_pool_mock,
patch(
"verifiers.clients.renderer_client.generate",
side_effect=_fake_generate,
),
):
asyncio.run(
client.get_native_response(
prompt=[{"role": "user", "content": "hi"}],
model="Qwen/Qwen3-8B",
sampling_args={
"extra_body": {
"chat_template_kwargs": {"enable_thinking": False},
"top_k": 20,
}
},
tools=None,
)
)

expected_preserve_all = (
base.preserve_all_thinking
if isinstance(base, AutoRendererConfig)
else False
)
create_pool_mock.assert_called_once_with(
"Qwen/Qwen3-8B",
Qwen3RendererConfig(
enable_thinking=False,
preserve_all_thinking=expected_preserve_all,
),
size=1,
)
assert captured["sampling_params"] == {"top_k": 20}


def test_renderer_client_auto_resolves_against_renderer_model_name_override():
"""When ``ClientConfig.renderer_model_name`` overrides the API request
model, auto-resolution looks up the OVERRIDE in ``MODEL_RENDERER_MAP``
(it's what loads the tokenizer the renderer holds) — not the request
model, which may not be in the map at all."""
from renderers import Qwen3RendererConfig

RendererClient._shared_pools.clear()

client = object.__new__(RendererClient)
client._renderer = None
client._pool_size = 1
client._config = vf.ClientConfig(
client_type="renderer",
renderer_model_name="Qwen/Qwen3-8B", # override
)
client._client = object() # type: ignore[attr-defined]

sentinel_pool = RendererPool.__new__(RendererPool)

async def _fake_generate(**kwargs):
return {"content": "ok"}

with (
patch(
"verifiers.clients.renderer_client.create_renderer_pool",
return_value=sentinel_pool,
) as create_pool_mock,
patch("verifiers.clients.renderer_client.generate", side_effect=_fake_generate),
):
asyncio.run(
client.get_native_response(
prompt=[{"role": "user", "content": "hi"}],
model="r8-smoke", # not in MODEL_RENDERER_MAP
sampling_args={
"extra_body": {"chat_template_kwargs": {"enable_thinking": False}}
},
tools=None,
)
)

# Resolves against renderer_model_name (Qwen/Qwen3-8B → "qwen3") rather
# than the request "r8-smoke" (which would fall through to default).
create_pool_mock.assert_called_once_with(
"Qwen/Qwen3-8B",
Qwen3RendererConfig(enable_thinking=False),
size=1,
)


def test_renderer_client_rejects_invalid_chat_template_kwargs():
"""Unknown / mistyped chat_template_kwargs surface as a pydantic
``ValidationError`` (``extra="forbid"`` on the typed RendererConfig)."""
from pydantic import ValidationError
from renderers import Qwen3RendererConfig

RendererClient._shared_pools.clear()

client = object.__new__(RendererClient)
client._renderer = None
client._pool_size = 1
client._config = vf.ClientConfig(
client_type="renderer", renderer_config=Qwen3RendererConfig()
)
client._client = object() # type: ignore[attr-defined]

with pytest.raises(ValidationError, match="enable_thinkng"):
asyncio.run(
client.get_native_response(
prompt=[{"role": "user", "content": "hi"}],
model="Qwen/Qwen3-8B",
sampling_args={
"extra_body": {
"chat_template_kwargs": {"enable_thinkng": False}, # typo
}
},
tools=None,
)
)


# Provenance: Eli's review on PR #1068, comment 3150580768.
# "RendererClient parses the GPT-OSS assistant tool call into ToolCall(name=...),
# but ToolEnv returns ToolMessage with only content/tool_call_id, and
Expand Down
88 changes: 82 additions & 6 deletions verifiers/clients/renderer_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,22 @@
from renderers import Message as RendererMessage
from renderers import OverlongPromptError as RendererOverlongPromptError
from renderers import (
AutoRendererConfig,
MultimodalRenderer,
ParsedToolCall,
RenderedTokens,
Renderer,
RendererConfig,
RendererPool,
ToolCallParseStatus,
ToolSpec,
config_from_name,
create_renderer_pool,
is_multimodal,
)
from renderers import ToolCall as RendererToolCall
from renderers import ToolCallFunction
from renderers.base import MODEL_RENDERER_MAP
from renderers.client import _maybe_offload, generate

from verifiers.clients.client import Client
Expand Down Expand Up @@ -374,6 +378,53 @@ async def _get_incremental_prompt_ids(
return None


def _resolve_renderer_config(
base: RendererConfig | None,
chat_template_kwargs: Mapping[str, Any] | None,
*,
renderer_model: str,
) -> RendererConfig | None:
"""Merge ``chat_template_kwargs`` into a typed ``RendererConfig``.

When ``base`` is ``None`` or ``AutoRendererConfig`` (would auto-resolve
inside ``renderers.create_renderer``), we pull resolution forward via
``MODEL_RENDERER_MAP`` so kwargs land on the concrete config variant
and pydantic validates them against the actual renderer's schema —
``AutoRendererConfig`` intentionally carries only ``preserve_*`` and
would reject template kwargs like ``enable_thinking``. ``renderer_model``
must match what the pool will tokenize with (i.e.
``ClientConfig.renderer_model_name`` when set, else the request model),
so resolution agrees with the tokenizer the renderer will hold.

Kwargs override fields with the same name on the (resolved) base.
Typed configs (``extra="forbid"``) reject unknown keys with a
field-path error; ``DefaultRendererConfig`` (``extra="allow"``) keeps
the escape hatch for arbitrary jinja kwargs.
"""
if not chat_template_kwargs:
return base

# Resolve auto → concrete (mirrors ``renderers._resolve_auto``) so
# ``enable_thinking`` etc. validate against the right schema instead of
# ``AutoRendererConfig``'s minimal one. Carries ``preserve_*`` across.
if base is None or isinstance(base, AutoRendererConfig):
renderer_name = MODEL_RENDERER_MAP.get(renderer_model, "default")
# ``config_from_name`` returns ``None`` only for ``"auto"``, which
# ``MODEL_RENDERER_MAP.get(..., "default")`` excludes — assert for ty.
concrete = config_from_name(renderer_name)
assert concrete is not None
if isinstance(base, AutoRendererConfig):
concrete = concrete.model_copy(
update={
"preserve_all_thinking": base.preserve_all_thinking,
"preserve_thinking_between_tool_calls": base.preserve_thinking_between_tool_calls,
}
)
base = cast(RendererConfig, concrete)

return type(base).model_validate({**base.model_dump(), **chat_template_kwargs})


class RendererClient(
Client[AsyncOpenAI, list[RendererMessage], dict[str, Any], ToolSpec]
):
Expand Down Expand Up @@ -419,13 +470,19 @@ async def close(self) -> None:

# ── Renderer management ─────────────────────────────────────────

def _get_renderer_or_pool(self, model: str) -> Renderer | RendererPool:
def _get_renderer_or_pool(
self,
model: str,
*,
renderer_config: RendererConfig | None = None,
) -> Renderer | RendererPool:
if self._renderer is not None:
return self._renderer

renderer_config = (
self._config.renderer_config if self._config is not None else None
)
if renderer_config is None:
renderer_config = (
self._config.renderer_config if self._config is not None else None
)
renderer_model = (
self._config.renderer_model_name
if self._config is not None and self._config.renderer_model_name is not None
Expand Down Expand Up @@ -477,14 +534,33 @@ async def get_native_response(
tools: list[ToolSpec] | None = None,
**kwargs: Any,
) -> dict[str, Any]:
renderer = self._get_renderer_or_pool(model)

args = dict(sampling_args)
extra_headers = {
**dict(args.pop("extra_headers", None) or {}),
**dict(kwargs.pop("extra_headers", None) or {}),
}
sampling_params: dict[str, Any] = dict(args.pop("extra_body", None) or {})

# ``chat_template_kwargs`` belong to the renderer, not the engine —
# peel them off the per-request sampling and fold them into the
# typed RendererConfig. Pool cache key already includes the
# effective config so identical kwargs reuse the same renderer.
chat_template_kwargs = sampling_params.pop("chat_template_kwargs", None)
# Auto-resolution must agree with the model the pool will tokenize
# against — ``renderer_model_name`` overrides the request ``model``
# when set (same precedence ``_get_renderer_or_pool`` uses below).
renderer_model = (
self._config.renderer_model_name
if self._config is not None and self._config.renderer_model_name is not None
else model
)
effective_cfg = _resolve_renderer_config(
self._config.renderer_config if self._config is not None else None,
chat_template_kwargs,
renderer_model=renderer_model,
)
Comment thread
cursor[bot] marked this conversation as resolved.
renderer = self._get_renderer_or_pool(model, renderer_config=effective_cfg)

for key in (
"temperature",
"top_p",
Expand Down
Loading