Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
0f7810e
add MultiAgentEnv for turn-based multi-agent environments
nph4rd Feb 7, 2026
a4fea39
rename Actor to Agent, add Protocol abstraction
nph4rd Feb 8, 2026
8dab76d
require Protocol in MultiAgentEnv, simplify docstrings
nph4rd Feb 8, 2026
2708169
update docstrings
nph4rd Feb 8, 2026
e8c04dc
add multi-agent reward functions for heterogeneous rewards
nph4rd Feb 25, 2026
65c2853
compute per-agent advantages for multi-agent rewards
nph4rd Feb 25, 2026
2ca7c72
include all rewards in per-agent rewards for multi-agent training
nph4rd Feb 25, 2026
0c8ce5e
add opponent-conditioned baselines for multi-agent advantage estimation
nph4rd Feb 26, 2026
b426660
add debug logging for opponent-conditioned baselines
nph4rd Feb 26, 2026
5bc1468
add trajectory structure debug
nph4rd Feb 26, 2026
2b37080
debug extras and state keys
nph4rd Feb 26, 2026
dce56cd
remove opponent-conditioned baselines for comparison test
nph4rd Feb 27, 2026
5425ebb
add per-agent baselines for multi-agent advantage computation
nph4rd Mar 4, 2026
0034333
fix score_rollout to support multi-agent reward functions
nph4rd Mar 6, 2026
902e3f7
normalize messages from build_agent_prompt before storing in trajectory
nph4rd Mar 6, 2026
c8d3715
add per-agent reward metrics for multi-agent environments
nph4rd Mar 7, 2026
e80aab6
add per-agent model routing for multi-policy lora training
nph4rd Mar 8, 2026
616bed6
point textarena to fork with kuhn poker fixes
nph4rd Mar 9, 2026
f430b7f
fix rubric rollout score import after rebase
nph4rd May 20, 2026
7d91582
add SpawningProtocol for hierarchical multi-agent envs
nph4rd May 21, 2026
b0d8526
resolve PEP 563 string annotations in _is_multiagent_func
nph4rd May 21, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ prime-pydantic-config = false
renderers = false
openenv-core = false

[tool.uv.sources]
textarena = { git = "https://github.com/nph4rd/TextArena.git", branch = "fix/kuhn-poker-phantom-ante" }

[tool.uv.extra-build-dependencies]
flash-attn = [{ requirement = "torch", match-runtime = true }]

Expand Down
20 changes: 20 additions & 0 deletions tests/test_rubric.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Tests for the Rubric class."""

from __future__ import annotations

from typing import cast

import pytest
Expand All @@ -8,6 +10,24 @@
from verifiers.types import RewardFunc, RolloutInput, RolloutTiming, State


# Regression for `_is_multiagent_func` mis-classifying functions defined under
# ``from __future__ import annotations`` (PEP 563): the return annotation is
# the string ``"dict[str, float]"`` rather than the resolved generic alias, so
# ``get_origin(annotation)`` returns ``None`` and the function was routed
# through the individual-reward path — where its dict return value got coerced
# to 0 by ``float(dict)`` failing. ``_is_multiagent_func`` now uses
# ``typing.get_type_hints`` to resolve the string.
async def _multiagent_under_future_annotations(
state: State, **_kwargs
) -> dict[str, float]:
return {"agent_a": 0.5, "agent_b": 1.0}


def test_is_multiagent_func_handles_future_annotations():
rubric = Rubric()
assert rubric._is_multiagent_func(_multiagent_under_future_annotations) is True


class TestRubric:
"""Test cases for the Rubric class."""

Expand Down
225 changes: 225 additions & 0 deletions tests/test_spawning_protocol.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
"""End-to-end test for SpawningProtocol via a toy proposer/solver env.

The proposer picks an integer N. The protocol then spawns k child solver
rollouts whose job is to double N. We assert that:

- the parent's trajectory contains both proposer and child solver steps
- child steps are tagged with agent_id="solver" and is_trainable
- state["extras"]["spawns"] carries SpawnResult(s) with one State per child
- each child's reward was computed by its own rubric (score was 1.0 only
when the solver's completion equaled 2*N)
"""

from __future__ import annotations

import re

import pytest
from datasets import Dataset

import verifiers as vf
from verifiers import (
Agent,
MultiAgentEnv,
Rubric,
SingleTurnEnv,
SpawningProtocol,
SpawnSpec,
)
from verifiers.types import Messages, State


PROPOSER_NUMBER = 5 # what the proposer always picks in this test
NUM_CHILDREN = 3


# --------------------------------------------------------------------------- #
# Child env: simple single-turn doubling env.
# --------------------------------------------------------------------------- #


def _doubling_correct(completion, answer, **_) -> float:
text = completion if isinstance(completion, str) else completion[-1]["content"]
match = re.search(r"-?\d+", text)
if match is None:
return 0.0
return 1.0 if int(match.group(0)) == int(answer) else 0.0


@pytest.fixture
def child_solver_env(mock_client):
"""SingleTurnEnv whose prompt asks for 2*N and rubric scores correctness."""
dataset = Dataset.from_dict(
{
"prompt": [[{"role": "user", "content": f"Double {PROPOSER_NUMBER}."}]],
"answer": [str(2 * PROPOSER_NUMBER)],
"example_id": [0],
}
)
rubric = Rubric(funcs=[_doubling_correct])

env = SingleTurnEnv(
client=mock_client,
model="test-model",
dataset=dataset,
parser=vf.Parser(),
rubric=rubric,
)

# Pre-stage half of the children's responses to be correct, half wrong.
# The mocked client returns the same default response unless overridden;
# since the proposer ALSO uses this client we'll just set a single default
# and use add_response for both turn types.
mock_client.add_response(
[{"role": "user", "content": f"Double {PROPOSER_NUMBER}."}],
str(2 * PROPOSER_NUMBER),
)
return env


# --------------------------------------------------------------------------- #
# Parent env: proposer that emits a number, then spawns NUM_CHILDREN solvers.
# --------------------------------------------------------------------------- #


class _OneShotSpawnProtocol(SpawningProtocol):
"""Spawns child solvers exactly once, immediately after the proposer's turn."""

def __init__(self, child_env, agent_id: str, num_children: int):
self._child_env = child_env
self._agent_id = agent_id
self._num_children = num_children

def get_initial_agent(self, state: State) -> str:
return "proposer"

def get_next_agent(self, state: State) -> str:
# Single-turn protocol: never returns a "next" agent because
# on_turn_complete sets state["is_completed"]=True.
return "proposer"

def should_spawn(self, state: State) -> bool:
# Spawn once: only if the proposer just acted and we haven't spawned yet.
already_spawned = bool(state["extras"].get("spawns"))
last_step = state["trajectory"][-1] if state["trajectory"] else None
last_agent = (last_step or {}).get("extras", {}).get("agent_id")
return last_agent == "proposer" and not already_spawned

def get_spawn_specs(self, state: State) -> list[SpawnSpec]:
# The proposer's "answer" is the last word of its completion.
text = state["trajectory"][-1]["completion"][-1]["content"]
n = int(re.search(r"-?\d+", text).group(0))
prompt = [{"role": "user", "content": f"Double {n}."}]
inputs = [
{"prompt": prompt, "answer": str(2 * n), "example_id": i}
for i in range(self._num_children)
]
return [
SpawnSpec(
agent_id=self._agent_id,
child_env=self._child_env,
inputs=inputs,
is_trainable=True,
)
]


class _ProposerEnv(MultiAgentEnv):
"""One-turn proposer that picks a number, registered as a trainable agent."""

async def build_agent_prompt(self, agent_id: str, state: State) -> Messages:
return [
{
"role": "user",
"content": "Pick an integer and the solver will try to double it.",
}
]

@vf.stop
async def proposer_done(self, state: State, **kwargs) -> bool:
# End the rollout once the proposer's turn has been spawned out;
# the spawn block in MultiAgentEnv.rollout() runs before the next
# iteration's is_completed check, so children finish first.
return bool(state.get("extras", {}).get("spawns"))


@pytest.fixture
def proposer_env(mock_client, child_solver_env):
protocol = _OneShotSpawnProtocol(
child_env=child_solver_env, agent_id="solver", num_children=NUM_CHILDREN
)
rubric = Rubric()
env = _ProposerEnv(
protocol=protocol,
client=mock_client,
model="test-model",
dataset=Dataset.from_dict({"prompt": [[{"role": "user", "content": "go"}]], "example_id": [0]}),
parser=vf.Parser(),
rubric=rubric,
max_turns=8,
)
env.register_agent(Agent(id="proposer", system_prompt="", is_trainable=True))
env.register_agent(Agent(id="solver", system_prompt="", is_trainable=True))

mock_client.add_response(
[
{
"role": "user",
"content": "Pick an integer and the solver will try to double it.",
}
],
str(PROPOSER_NUMBER),
)
return env


# --------------------------------------------------------------------------- #
# Tests
# --------------------------------------------------------------------------- #


@pytest.mark.asyncio
async def test_spawning_protocol_runs_children_and_records_spawns(
proposer_env, mock_client
):
"""One proposer turn → NUM_CHILDREN solver children → all embedded + recorded."""
state = await proposer_env.rollout(
{"prompt": [{"role": "user", "content": "go"}], "example_id": 0},
client=mock_client,
model="test-model",
sampling_args={"temperature": 1.0},
)

# 1. The parent trajectory contains the proposer's step plus one per child.
agent_ids = [s["extras"].get("agent_id") for s in state["trajectory"]]
assert agent_ids.count("proposer") == 1
assert agent_ids.count("solver") == NUM_CHILDREN

# 2. Spawns recorded.
spawns = state["extras"]["spawns"]
assert len(spawns) == 1
spawn = spawns[0]
assert spawn.spec.agent_id == "solver"
assert len(spawn.states) == NUM_CHILDREN

# 3. Children were scored by the child env's own rubric — the mocked
# solver always returns 2*N so each child's reward is 1.0.
for child_state in spawn.states:
assert child_state["reward"] == 1.0


@pytest.mark.asyncio
async def test_child_trajectory_steps_carry_is_trainable_tag(
proposer_env, mock_client
):
state = await proposer_env.rollout(
{"prompt": [{"role": "user", "content": "go"}], "example_id": 0},
client=mock_client,
model="test-model",
sampling_args={"temperature": 1.0},
)
child_steps = [s for s in state["trajectory"] if s["extras"].get("agent_id") == "solver"]
assert child_steps, "expected child steps in parent trajectory"
for step in child_steps:
# is_trainable was set on the SpawnSpec; it must flow through to steps.
assert step["extras"].get("is_trainable") is True
14 changes: 4 additions & 10 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions verifiers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,15 @@
"Terminus2",
"Terminus2Config",
"SignalConfig",
"Agent",
"Protocol",
"RoundRobinProtocol",
"SpawningProtocol",
"SpawnSpec",
"SpawnResult",
"Environment",
"MultiTurnEnv",
"MultiAgentEnv",
"SingleTurnEnv",
"PythonEnv",
"SandboxEnv",
Expand Down Expand Up @@ -169,6 +176,13 @@
"SingleTurnEnv": "verifiers.envs.singleturn_env:SingleTurnEnv",
"StatefulToolEnv": "verifiers.envs.stateful_tool_env:StatefulToolEnv",
"ToolEnv": "verifiers.envs.tool_env:ToolEnv",
"Agent": "verifiers.envs.agent:Agent",
"Protocol": "verifiers.envs.protocol:Protocol",
"RoundRobinProtocol": "verifiers.envs.protocol:RoundRobinProtocol",
"SpawningProtocol": "verifiers.envs.protocol:SpawningProtocol",
"SpawnSpec": "verifiers.envs.protocol:SpawnSpec",
"SpawnResult": "verifiers.envs.protocol:SpawnResult",
"MultiAgentEnv": "verifiers.envs.multiagent_env:MultiAgentEnv",
"EnvGroup": "verifiers.envs.env_group:EnvGroup",
"JudgeRubric": "verifiers.rubrics.judge_rubric:JudgeRubric",
"load_environment": "verifiers.utils.env_utils:load_environment",
Expand Down Expand Up @@ -281,8 +295,17 @@ def __getattr__(name: str):
from .clients.openai_completions_client import OpenAICompletionsClient # noqa: F401
from .clients.openai_responses_client import OpenAIResponsesClient # noqa: F401
from .clients.renderer_client import RendererClient # noqa: F401
from .envs.agent import Agent # noqa: F401
from .envs.protocol import ( # noqa: F401
Protocol,
RoundRobinProtocol,
SpawningProtocol,
SpawnResult,
SpawnSpec,
)
from .envs.env_group import EnvGroup # noqa: F401
from .envs.environment import Environment # noqa: F401
from .envs.multiagent_env import MultiAgentEnv # noqa: F401
from .envs.experimental.cli_agent_env import CliAgentEnv # noqa: F401
from .envs.experimental.gym_env import GymEnv # noqa: F401
from .envs.experimental.harbor_env import HarborEnv # noqa: F401
Expand Down
Loading