Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions docs/evaluation.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,23 @@ env.set_concurrency(256)

The `renderer` client type requires the optional renderer package. Install it with `uv add "verifiers[renderers]"` before running evals with `--api-client-type renderer`.

#### Model precedence

`--model` / `-m` sets the inference client's model. Custom environments that
need to know that model can accept `model` in `load_environment()` / their
constructor, or read the injected `model` environment kwarg, instead of
requiring users to repeat the same value in `--env-args`.

To use a different model inside the environment than the one driving inference,
pass it explicitly via `--env-args`:

```bash
prime eval run my-env -m google/gemma-3-27b-it -a '{"model": "qwen/qwen3-14b"}'
```

That override changes only the environment's view of `model`; the inference
client still uses `--model`.

For convenience, define model endpoints in `./configs/endpoints.toml` to avoid repeating URL and key flags.

```toml
Expand Down
33 changes: 33 additions & 0 deletions tests/scripts/test_eval_model_kwarg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from verifiers.scripts.eval import build_eval_config


def test_resolved_model_lands_in_extra_env_kwargs(monkeypatch):
raw = {
"env_id": "math-python",
"model": "openai/gpt-4.1-mini",
"api_base_url": "https://example.test/v1",
"api_key_var": "OPENAI_API_KEY",
}
monkeypatch.setenv("OPENAI_API_KEY", "sk-test")

cfg = build_eval_config(raw)

assert cfg.model == "openai/gpt-4.1-mini"
assert cfg.extra_env_kwargs.get("model") == "openai/gpt-4.1-mini"


def test_env_args_model_overrides_for_env_but_not_client(monkeypatch):
raw = {
"env_id": "math-python",
"model": "openai/gpt-4.1-mini",
"env_args": {"model": "qwen/qwen3-14b"},
"api_base_url": "https://example.test/v1",
"api_key_var": "OPENAI_API_KEY",
}
monkeypatch.setenv("OPENAI_API_KEY", "sk-test")

cfg = build_eval_config(raw)

assert cfg.model == "openai/gpt-4.1-mini"
assert cfg.env_args.get("model") == "qwen/qwen3-14b"
assert cfg.extra_env_kwargs.get("model") is None
Loading