diff --git a/docs/agents.md b/docs/agents.md
index 89df03a14e..e9730c1f42 100644
--- a/docs/agents.md
+++ b/docs/agents.md
@@ -284,6 +284,7 @@ async def main():
system_prompts=(),
system_prompt_functions=[],
system_prompt_dynamic_functions={},
+ response_prefix=None,
),
ModelRequestNode(
request=ModelRequest(
@@ -293,7 +294,8 @@ async def main():
timestamp=datetime.datetime(...),
)
]
- )
+ ),
+ response_prefix=None,
),
CallToolsNode(
model_response=ModelResponse(
@@ -346,6 +348,7 @@ async def main():
system_prompts=(),
system_prompt_functions=[],
system_prompt_dynamic_functions={},
+ response_prefix=None,
),
ModelRequestNode(
request=ModelRequest(
@@ -355,7 +358,8 @@ async def main():
timestamp=datetime.datetime(...),
)
]
- )
+ ),
+ response_prefix=None,
),
CallToolsNode(
model_response=ModelResponse(
diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
index b02c3762b3..45d9e8925c 100644
--- a/pydantic_ai_slim/pydantic_ai/_agent_graph.py
+++ b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -163,6 +163,7 @@ class UserPromptNode(AgentNode[DepsT, NodeRunEndT]):
_: dataclasses.KW_ONLY
deferred_tool_results: DeferredToolResults | None = None
+ response_prefix: str | None = None
instructions: str | None = None
instructions_functions: list[_system_prompt.SystemPromptRunner[DepsT]] = dataclasses.field(default_factory=list)
@@ -247,7 +248,7 @@ async def run( # noqa: C901
next_message.instructions = await ctx.deps.get_instructions(run_context)
- return ModelRequestNode[DepsT, NodeRunEndT](request=next_message)
+ return ModelRequestNode[DepsT, NodeRunEndT](request=next_message, response_prefix=self.response_prefix)
async def _handle_deferred_tool_results( # noqa: C901
self,
@@ -348,6 +349,7 @@ async def _sys_parts(self, run_context: RunContext[DepsT]) -> list[_messages.Mod
async def _prepare_request_parameters(
ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT, NodeRunEndT]],
+ response_prefix: str | None = None,
) -> models.ModelRequestParameters:
"""Build tools and create an agent model."""
output_schema = ctx.deps.output_schema
@@ -373,6 +375,7 @@ async def _prepare_request_parameters(
output_tools=output_tools,
output_object=output_object,
allow_text_output=allow_text_output,
+ response_prefix=response_prefix,
)
@@ -381,6 +384,7 @@ class ModelRequestNode(AgentNode[DepsT, NodeRunEndT]):
"""The node that makes a request to the model using the last message in state.message_history."""
request: _messages.ModelRequest
+ response_prefix: str | None = None
_result: CallToolsNode[DepsT, NodeRunEndT] | None = field(repr=False, init=False, default=None)
_did_stream: bool = field(repr=False, init=False, default=False)
@@ -469,7 +473,9 @@ async def _prepare_request(
# See `tests/test_tools.py::test_parallel_tool_return_with_deferred` for an example where this is necessary
message_history = _clean_message_history(message_history)
- model_request_parameters = await _prepare_request_parameters(ctx)
+ # TODO: Raise exception if response_prefix is not supported by the ctx.deps.model.profile
+
+ model_request_parameters = await _prepare_request_parameters(ctx, self.response_prefix)
model_request_parameters = ctx.deps.model.customize_request_parameters(model_request_parameters)
model_settings = ctx.deps.model_settings
diff --git a/pydantic_ai_slim/pydantic_ai/agent/__init__.py b/pydantic_ai_slim/pydantic_ai/agent/__init__.py
index f1d3d4e02e..e93ae4c424 100644
--- a/pydantic_ai_slim/pydantic_ai/agent/__init__.py
+++ b/pydantic_ai_slim/pydantic_ai/agent/__init__.py
@@ -438,6 +438,7 @@ def iter(
usage: _usage.RunUsage | None = None,
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
+ response_prefix: str | None = None,
) -> AbstractAsyncContextManager[AgentRun[AgentDepsT, OutputDataT]]: ...
@overload
@@ -455,6 +456,7 @@ def iter(
usage: _usage.RunUsage | None = None,
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
+ response_prefix: str | None = None,
) -> AbstractAsyncContextManager[AgentRun[AgentDepsT, RunOutputDataT]]: ...
@asynccontextmanager
@@ -472,6 +474,7 @@ async def iter(
usage: _usage.RunUsage | None = None,
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
+ response_prefix: str | None = None,
) -> AsyncIterator[AgentRun[AgentDepsT, Any]]:
"""A contextmanager which can be used to iterate over the agent graph's nodes as they are executed.
@@ -505,6 +508,7 @@ async def main():
system_prompts=(),
system_prompt_functions=[],
system_prompt_dynamic_functions={},
+ response_prefix=None,
),
ModelRequestNode(
request=ModelRequest(
@@ -514,7 +518,8 @@ async def main():
timestamp=datetime.datetime(...),
)
]
- )
+ ),
+ response_prefix=None,
),
CallToolsNode(
model_response=ModelResponse(
@@ -544,6 +549,7 @@ async def main():
usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
infer_name: Whether to try to infer the agent name from the call frame if it's not set.
toolsets: Optional additional toolsets for this run.
+ response_prefix: Optional prefix to prepend to the model's response. Only supported by certain models.
Returns:
The result of the run.
@@ -553,6 +559,13 @@ async def main():
model_used = self._get_model(model)
del model
+ # Validate response_prefix support
+ if response_prefix is not None and not model_used.profile.supports_response_prefix:
+ raise exceptions.UserError(
+ f'Model {model_used.model_name} does not support response prefix. '
+ 'Response prefix is only supported by certain models like Anthropic Claude and some OpenAI-compatible models.'
+ )
+
deps = self._get_deps(deps)
output_schema = self._prepare_output_schema(output_type, model_used.profile)
@@ -640,6 +653,7 @@ async def get_instructions(run_context: RunContext[AgentDepsT]) -> str | None:
system_prompts=self._system_prompts,
system_prompt_functions=self._system_prompt_functions,
system_prompt_dynamic_functions=self._system_prompt_dynamic_functions,
+ response_prefix=response_prefix,
)
agent_name = self.name or 'agent'
diff --git a/pydantic_ai_slim/pydantic_ai/agent/abstract.py b/pydantic_ai_slim/pydantic_ai/agent/abstract.py
index 8d6c9ff293..560f926a21 100644
--- a/pydantic_ai_slim/pydantic_ai/agent/abstract.py
+++ b/pydantic_ai_slim/pydantic_ai/agent/abstract.py
@@ -127,6 +127,7 @@ async def run(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
) -> AgentRunResult[OutputDataT]: ...
@overload
@@ -145,6 +146,7 @@ async def run(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
) -> AgentRunResult[RunOutputDataT]: ...
async def run(
@@ -162,6 +164,7 @@ async def run(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
) -> AgentRunResult[Any]:
"""Run the agent with a user prompt in async mode.
@@ -194,6 +197,7 @@ async def main():
infer_name: Whether to try to infer the agent name from the call frame if it's not set.
toolsets: Optional additional toolsets for this run.
event_stream_handler: Optional handler for events from the model's streaming response and the agent's execution of tools to use for this run.
+ response_prefix: Optional prefix to prepend to the model's response. Only supported by certain models.
Returns:
The result of the run.
@@ -214,6 +218,7 @@ async def main():
usage_limits=usage_limits,
usage=usage,
toolsets=toolsets,
+ response_prefix=response_prefix,
) as agent_run:
async for node in agent_run:
if event_stream_handler is not None and (
@@ -241,6 +246,7 @@ def run_sync(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
) -> AgentRunResult[OutputDataT]: ...
@overload
@@ -259,6 +265,7 @@ def run_sync(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
) -> AgentRunResult[RunOutputDataT]: ...
def run_sync(
@@ -276,6 +283,7 @@ def run_sync(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
) -> AgentRunResult[Any]:
"""Synchronously run the agent with a user prompt.
@@ -307,6 +315,7 @@ def run_sync(
infer_name: Whether to try to infer the agent name from the call frame if it's not set.
toolsets: Optional additional toolsets for this run.
event_stream_handler: Optional handler for events from the model's streaming response and the agent's execution of tools to use for this run.
+ response_prefix: Optional prefix to prepend to the model's response. Only supported by certain models.
Returns:
The result of the run.
@@ -328,6 +337,7 @@ def run_sync(
infer_name=False,
toolsets=toolsets,
event_stream_handler=event_stream_handler,
+ response_prefix=response_prefix,
)
)
@@ -347,6 +357,7 @@ def run_stream(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
) -> AbstractAsyncContextManager[result.StreamedRunResult[AgentDepsT, OutputDataT]]: ...
@overload
@@ -365,6 +376,7 @@ def run_stream(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
) -> AbstractAsyncContextManager[result.StreamedRunResult[AgentDepsT, RunOutputDataT]]: ...
@asynccontextmanager
@@ -383,6 +395,7 @@ async def run_stream( # noqa C901
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
) -> AsyncIterator[result.StreamedRunResult[AgentDepsT, Any]]:
"""Run the agent with a user prompt in async streaming mode.
@@ -424,6 +437,7 @@ async def main():
event_stream_handler: Optional handler for events from the model's streaming response and the agent's execution of tools to use for this run.
It will receive all the events up until the final result is found, which you can then read or stream from inside the context manager.
Note that it does _not_ receive any events after the final result is found.
+ response_prefix: Optional prefix to prepend to the model's response. Only supported by certain models.
Returns:
The result of the run.
@@ -448,6 +462,7 @@ async def main():
usage=usage,
infer_name=False,
toolsets=toolsets,
+ response_prefix=response_prefix,
) as agent_run:
first_node = agent_run.next_node # start with the first node
assert isinstance(first_node, _agent_graph.UserPromptNode) # the first node should be a user prompt node
@@ -558,6 +573,7 @@ def iter(
usage: _usage.RunUsage | None = None,
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
+ response_prefix: str | None = None,
) -> AbstractAsyncContextManager[AgentRun[AgentDepsT, OutputDataT]]: ...
@overload
@@ -575,6 +591,7 @@ def iter(
usage: _usage.RunUsage | None = None,
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
+ response_prefix: str | None = None,
) -> AbstractAsyncContextManager[AgentRun[AgentDepsT, RunOutputDataT]]: ...
@asynccontextmanager
@@ -593,6 +610,7 @@ async def iter(
usage: _usage.RunUsage | None = None,
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
+ response_prefix: str | None = None,
) -> AsyncIterator[AgentRun[AgentDepsT, Any]]:
"""A contextmanager which can be used to iterate over the agent graph's nodes as they are executed.
@@ -626,6 +644,7 @@ async def main():
system_prompts=(),
system_prompt_functions=[],
system_prompt_dynamic_functions={},
+ response_prefix=None,
),
ModelRequestNode(
request=ModelRequest(
@@ -635,7 +654,8 @@ async def main():
timestamp=datetime.datetime(...),
)
]
- )
+ ),
+ response_prefix=None,
),
CallToolsNode(
model_response=ModelResponse(
@@ -665,6 +685,7 @@ async def main():
usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
infer_name: Whether to try to infer the agent name from the call frame if it's not set.
toolsets: Optional additional toolsets for this run.
+ response_prefix: Optional prefix to prepend to the model's response. Only supported by certain models.
Returns:
The result of the run.
diff --git a/pydantic_ai_slim/pydantic_ai/agent/wrapper.py b/pydantic_ai_slim/pydantic_ai/agent/wrapper.py
index 36f7969323..5cc1772141 100644
--- a/pydantic_ai_slim/pydantic_ai/agent/wrapper.py
+++ b/pydantic_ai_slim/pydantic_ai/agent/wrapper.py
@@ -81,6 +81,7 @@ def iter(
usage: _usage.RunUsage | None = None,
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
+ response_prefix: str | None = None,
) -> AbstractAsyncContextManager[AgentRun[AgentDepsT, OutputDataT]]: ...
@overload
@@ -98,6 +99,7 @@ def iter(
usage: _usage.RunUsage | None = None,
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
+ response_prefix: str | None = None,
) -> AbstractAsyncContextManager[AgentRun[AgentDepsT, RunOutputDataT]]: ...
@asynccontextmanager
@@ -115,6 +117,7 @@ async def iter(
usage: _usage.RunUsage | None = None,
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
+ response_prefix: str | None = None,
) -> AsyncIterator[AgentRun[AgentDepsT, Any]]:
"""A contextmanager which can be used to iterate over the agent graph's nodes as they are executed.
@@ -148,6 +151,7 @@ async def main():
system_prompts=(),
system_prompt_functions=[],
system_prompt_dynamic_functions={},
+ response_prefix=None,
),
ModelRequestNode(
request=ModelRequest(
@@ -157,7 +161,8 @@ async def main():
timestamp=datetime.datetime(...),
)
]
- )
+ ),
+ response_prefix=None,
),
CallToolsNode(
model_response=ModelResponse(
@@ -187,6 +192,7 @@ async def main():
usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
infer_name: Whether to try to infer the agent name from the call frame if it's not set.
toolsets: Optional additional toolsets for this run.
+ response_prefix: Optional prefix to prepend to the model's response. Only supported by certain models.
Returns:
The result of the run.
@@ -203,6 +209,7 @@ async def main():
usage=usage,
infer_name=infer_name,
toolsets=toolsets,
+ response_prefix=response_prefix,
) as run:
yield run
diff --git a/pydantic_ai_slim/pydantic_ai/durable_exec/dbos/_agent.py b/pydantic_ai_slim/pydantic_ai/durable_exec/dbos/_agent.py
index a12c9e70c0..45f646314f 100644
--- a/pydantic_ai_slim/pydantic_ai/durable_exec/dbos/_agent.py
+++ b/pydantic_ai_slim/pydantic_ai/durable_exec/dbos/_agent.py
@@ -120,6 +120,7 @@ async def wrapped_run_workflow(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
**_deprecated_kwargs: Never,
) -> AgentRunResult[Any]:
with self._dbos_overrides():
@@ -136,6 +137,7 @@ async def wrapped_run_workflow(
infer_name=infer_name,
toolsets=toolsets,
event_stream_handler=event_stream_handler,
+ response_prefix=response_prefix,
**_deprecated_kwargs,
)
@@ -157,6 +159,7 @@ def wrapped_run_sync_workflow(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
**_deprecated_kwargs: Never,
) -> AgentRunResult[Any]:
with self._dbos_overrides():
@@ -173,6 +176,7 @@ def wrapped_run_sync_workflow(
infer_name=infer_name,
toolsets=toolsets,
event_stream_handler=event_stream_handler,
+ response_prefix=response_prefix,
**_deprecated_kwargs,
)
@@ -245,6 +249,7 @@ async def run(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
) -> AgentRunResult[OutputDataT]: ...
@overload
@@ -263,6 +268,7 @@ async def run(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
) -> AgentRunResult[RunOutputDataT]: ...
async def run(
@@ -280,6 +286,7 @@ async def run(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
**_deprecated_kwargs: Never,
) -> AgentRunResult[Any]:
"""Run the agent with a user prompt in async mode.
@@ -313,6 +320,7 @@ async def main():
infer_name: Whether to try to infer the agent name from the call frame if it's not set.
toolsets: Optional additional toolsets for this run.
event_stream_handler: Optional event stream handler to use for this run.
+ response_prefix: Optional response prefix to use for this run.
Returns:
The result of the run.
@@ -330,6 +338,7 @@ async def main():
infer_name=infer_name,
toolsets=toolsets,
event_stream_handler=event_stream_handler,
+ response_prefix=response_prefix,
**_deprecated_kwargs,
)
@@ -349,6 +358,7 @@ def run_sync(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
) -> AgentRunResult[OutputDataT]: ...
@overload
@@ -367,6 +377,7 @@ def run_sync(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
) -> AgentRunResult[RunOutputDataT]: ...
def run_sync(
@@ -384,6 +395,7 @@ def run_sync(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
**_deprecated_kwargs: Never,
) -> AgentRunResult[Any]:
"""Synchronously run the agent with a user prompt.
@@ -416,6 +428,7 @@ def run_sync(
infer_name: Whether to try to infer the agent name from the call frame if it's not set.
toolsets: Optional additional toolsets for this run.
event_stream_handler: Optional event stream handler to use for this run.
+ response_prefix: Optional response prefix to use for this run.
Returns:
The result of the run.
@@ -433,6 +446,7 @@ def run_sync(
infer_name=infer_name,
toolsets=toolsets,
event_stream_handler=event_stream_handler,
+ response_prefix=response_prefix,
**_deprecated_kwargs,
)
@@ -452,6 +466,7 @@ def run_stream(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
) -> AbstractAsyncContextManager[StreamedRunResult[AgentDepsT, OutputDataT]]: ...
@overload
@@ -470,6 +485,7 @@ def run_stream(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
) -> AbstractAsyncContextManager[StreamedRunResult[AgentDepsT, RunOutputDataT]]: ...
@asynccontextmanager
@@ -488,6 +504,7 @@ async def run_stream(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
**_deprecated_kwargs: Never,
) -> AsyncIterator[StreamedRunResult[AgentDepsT, Any]]:
"""Run the agent with a user prompt in async mode, returning a streamed response.
@@ -518,6 +535,7 @@ async def main():
infer_name: Whether to try to infer the agent name from the call frame if it's not set.
toolsets: Optional additional toolsets for this run.
event_stream_handler: Optional event stream handler to use for this run. It will receive all the events up until the final result is found, which you can then read or stream from inside the context manager.
+ response_prefix: Optional response prefix to use for this run.
Returns:
The result of the run.
@@ -542,6 +560,7 @@ async def main():
infer_name=infer_name,
toolsets=toolsets,
event_stream_handler=event_stream_handler,
+ response_prefix=response_prefix,
**_deprecated_kwargs,
) as result:
yield result
@@ -561,6 +580,7 @@ def iter(
usage: _usage.RunUsage | None = None,
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
+ response_prefix: str | None = None,
**_deprecated_kwargs: Never,
) -> AbstractAsyncContextManager[AgentRun[AgentDepsT, OutputDataT]]: ...
@@ -579,6 +599,7 @@ def iter(
usage: _usage.RunUsage | None = None,
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
+ response_prefix: str | None = None,
**_deprecated_kwargs: Never,
) -> AbstractAsyncContextManager[AgentRun[AgentDepsT, RunOutputDataT]]: ...
@@ -597,6 +618,7 @@ async def iter(
usage: _usage.RunUsage | None = None,
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
+ response_prefix: str | None = None,
**_deprecated_kwargs: Never,
) -> AsyncIterator[AgentRun[AgentDepsT, Any]]:
"""A contextmanager which can be used to iterate over the agent graph's nodes as they are executed.
@@ -631,6 +653,7 @@ async def main():
system_prompts=(),
system_prompt_functions=[],
system_prompt_dynamic_functions={},
+ response_prefix=None,
),
ModelRequestNode(
request=ModelRequest(
@@ -640,7 +663,8 @@ async def main():
timestamp=datetime.datetime(...),
)
]
- )
+ ),
+ response_prefix=None,
),
CallToolsNode(
model_response=ModelResponse(
@@ -670,6 +694,7 @@ async def main():
usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
infer_name: Whether to try to infer the agent name from the call frame if it's not set.
toolsets: Optional additional toolsets for this run.
+ response_prefix: Optional response prefix to use for this run.
Returns:
The result of the run.
@@ -692,6 +717,7 @@ async def main():
usage=usage,
infer_name=infer_name,
toolsets=toolsets,
+ response_prefix=response_prefix,
**_deprecated_kwargs,
) as run:
yield run
diff --git a/pydantic_ai_slim/pydantic_ai/durable_exec/temporal/_agent.py b/pydantic_ai_slim/pydantic_ai/durable_exec/temporal/_agent.py
index cb284b6097..6759bb507b 100644
--- a/pydantic_ai_slim/pydantic_ai/durable_exec/temporal/_agent.py
+++ b/pydantic_ai_slim/pydantic_ai/durable_exec/temporal/_agent.py
@@ -267,6 +267,7 @@ async def run(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
) -> AgentRunResult[OutputDataT]: ...
@overload
@@ -285,6 +286,7 @@ async def run(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
) -> AgentRunResult[RunOutputDataT]: ...
async def run(
@@ -302,6 +304,7 @@ async def run(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
**_deprecated_kwargs: Never,
) -> AgentRunResult[Any]:
"""Run the agent with a user prompt in async mode.
@@ -335,6 +338,7 @@ async def main():
infer_name: Whether to try to infer the agent name from the call frame if it's not set.
toolsets: Optional additional toolsets for this run.
event_stream_handler: Optional event stream handler to use for this run.
+ response_prefix: Optional prefix to prepend to the model's response. Only supported by certain models.
Returns:
The result of the run.
@@ -358,6 +362,7 @@ async def main():
infer_name=infer_name,
toolsets=toolsets,
event_stream_handler=event_stream_handler or self.event_stream_handler,
+ response_prefix=response_prefix,
**_deprecated_kwargs,
)
@@ -377,6 +382,7 @@ def run_sync(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
) -> AgentRunResult[OutputDataT]: ...
@overload
@@ -395,6 +401,7 @@ def run_sync(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
) -> AgentRunResult[RunOutputDataT]: ...
def run_sync(
@@ -412,6 +419,7 @@ def run_sync(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
**_deprecated_kwargs: Never,
) -> AgentRunResult[Any]:
"""Synchronously run the agent with a user prompt.
@@ -444,6 +452,7 @@ def run_sync(
infer_name: Whether to try to infer the agent name from the call frame if it's not set.
toolsets: Optional additional toolsets for this run.
event_stream_handler: Optional event stream handler to use for this run.
+ response_prefix: Optional prefix to prepend to the model's response. Only supported by certain models.
Returns:
The result of the run.
@@ -466,6 +475,7 @@ def run_sync(
infer_name=infer_name,
toolsets=toolsets,
event_stream_handler=event_stream_handler,
+ response_prefix=response_prefix,
**_deprecated_kwargs,
)
@@ -485,6 +495,7 @@ def run_stream(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
) -> AbstractAsyncContextManager[StreamedRunResult[AgentDepsT, OutputDataT]]: ...
@overload
@@ -503,6 +514,7 @@ def run_stream(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
) -> AbstractAsyncContextManager[StreamedRunResult[AgentDepsT, RunOutputDataT]]: ...
@asynccontextmanager
@@ -521,6 +533,7 @@ async def run_stream(
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+ response_prefix: str | None = None,
**_deprecated_kwargs: Never,
) -> AsyncIterator[StreamedRunResult[AgentDepsT, Any]]:
"""Run the agent with a user prompt in async mode, returning a streamed response.
@@ -551,6 +564,7 @@ async def main():
infer_name: Whether to try to infer the agent name from the call frame if it's not set.
toolsets: Optional additional toolsets for this run.
event_stream_handler: Optional event stream handler to use for this run. It will receive all the events up until the final result is found, which you can then read or stream from inside the context manager.
+ response_prefix: Optional prefix to prepend to the model's response. Only supported by certain models.
Returns:
The result of the run.
@@ -575,6 +589,7 @@ async def main():
infer_name=infer_name,
toolsets=toolsets,
event_stream_handler=event_stream_handler,
+ response_prefix=response_prefix,
**_deprecated_kwargs,
) as result:
yield result
@@ -594,6 +609,7 @@ def iter(
usage: _usage.RunUsage | None = None,
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
+ response_prefix: str | None = None,
**_deprecated_kwargs: Never,
) -> AbstractAsyncContextManager[AgentRun[AgentDepsT, OutputDataT]]: ...
@@ -612,6 +628,7 @@ def iter(
usage: _usage.RunUsage | None = None,
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
+ response_prefix: str | None = None,
**_deprecated_kwargs: Never,
) -> AbstractAsyncContextManager[AgentRun[AgentDepsT, RunOutputDataT]]: ...
@@ -630,6 +647,7 @@ async def iter(
usage: _usage.RunUsage | None = None,
infer_name: bool = True,
toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
+ response_prefix: str | None = None,
**_deprecated_kwargs: Never,
) -> AsyncIterator[AgentRun[AgentDepsT, Any]]:
"""A contextmanager which can be used to iterate over the agent graph's nodes as they are executed.
@@ -664,6 +682,7 @@ async def main():
system_prompts=(),
system_prompt_functions=[],
system_prompt_dynamic_functions={},
+ response_prefix=None,
),
ModelRequestNode(
request=ModelRequest(
@@ -673,7 +692,8 @@ async def main():
timestamp=datetime.datetime(...),
)
]
- )
+ ),
+ response_prefix=None,
),
CallToolsNode(
model_response=ModelResponse(
@@ -703,6 +723,7 @@ async def main():
usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
infer_name: Whether to try to infer the agent name from the call frame if it's not set.
toolsets: Optional additional toolsets for this run.
+ response_prefix: Optional prefix to prepend to the model's response. Only supported by certain models.
Returns:
The result of the run.
@@ -736,6 +757,7 @@ async def main():
usage=usage,
infer_name=infer_name,
toolsets=toolsets,
+ response_prefix=response_prefix,
**_deprecated_kwargs,
) as run:
yield run
diff --git a/pydantic_ai_slim/pydantic_ai/models/__init__.py b/pydantic_ai_slim/pydantic_ai/models/__init__.py
index 04e063f570..c123cea256 100644
--- a/pydantic_ai_slim/pydantic_ai/models/__init__.py
+++ b/pydantic_ai_slim/pydantic_ai/models/__init__.py
@@ -380,6 +380,8 @@ class ModelRequestParameters:
output_tools: list[ToolDefinition] = field(default_factory=list)
allow_text_output: bool = True
+ response_prefix: str | None = None
+
@cached_property
def tool_defs(self) -> dict[str, ToolDefinition]:
return {tool_def.name: tool_def for tool_def in [*self.function_tools, *self.output_tools]}
diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py
index 49b69f9567..f31938d255 100644
--- a/pydantic_ai_slim/pydantic_ai/models/anthropic.py
+++ b/pydantic_ai_slim/pydantic_ai/models/anthropic.py
@@ -203,7 +203,7 @@ async def request(
response = await self._messages_create(
messages, False, cast(AnthropicModelSettings, model_settings or {}), model_request_parameters
)
- model_response = self._process_response(response)
+ model_response = self._process_response(response, model_request_parameters)
return model_response
@asynccontextmanager
@@ -266,7 +266,7 @@ async def _messages_create(
if (allow_parallel_tool_calls := model_settings.get('parallel_tool_calls')) is not None:
tool_choice['disable_parallel_tool_use'] = not allow_parallel_tool_calls
- system_prompt, anthropic_messages = await self._map_message(messages)
+ system_prompt, anthropic_messages = await self._map_message(messages, model_request_parameters)
try:
extra_headers = model_settings.get('extra_headers', {})
@@ -296,12 +296,17 @@ async def _messages_create(
raise ModelHTTPError(status_code=status_code, model_name=self.model_name, body=e.body) from e
raise # pragma: lax no cover
- def _process_response(self, response: BetaMessage) -> ModelResponse:
+ def _process_response(
+ self, response: BetaMessage, model_request_parameters: ModelRequestParameters
+ ) -> ModelResponse:
"""Process a non-streamed response, and prepare a message to return."""
items: list[ModelResponsePart] = []
- for item in response.content:
+ for i, item in enumerate(response.content):
if isinstance(item, BetaTextBlock):
- items.append(TextPart(content=item.text))
+ content = item.text
+ if i == 0 and (response_prefix := model_request_parameters.response_prefix):
+ content = response_prefix + content
+ items.append(TextPart(content=content))
elif isinstance(item, BetaWebSearchToolResultBlock | BetaCodeExecutionToolResultBlock):
items.append(
BuiltinToolReturnPart(
@@ -400,7 +405,9 @@ def _get_builtin_tools(
)
return tools, extra_headers
- async def _map_message(self, messages: list[ModelMessage]) -> tuple[str, list[BetaMessageParam]]: # noqa: C901
+ async def _map_message( # noqa: C901
+ self, messages: list[ModelMessage], model_request_parameters: ModelRequestParameters
+ ) -> tuple[str, list[BetaMessageParam]]:
"""Just maps a `pydantic_ai.Message` to a `anthropic.types.MessageParam`."""
system_prompt_parts: list[str] = []
anthropic_messages: list[BetaMessageParam] = []
@@ -512,6 +519,12 @@ async def _map_message(self, messages: list[ModelMessage]) -> tuple[str, list[Be
anthropic_messages.append(BetaMessageParam(role='assistant', content=assistant_content_params))
else:
assert_never(m)
+
+ if response_prefix := model_request_parameters.response_prefix:
+ anthropic_messages.append(
+ BetaMessageParam(role='assistant', content=[BetaTextBlockParam(text=response_prefix, type='text')])
+ )
+
if instructions := self._get_instructions(messages):
system_prompt_parts.insert(0, instructions)
system_prompt = '\n\n'.join(system_prompt_parts)
@@ -617,6 +630,8 @@ class AnthropicStreamedResponse(StreamedResponse):
async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]: # noqa: C901
current_block: BetaContentBlock | None = None
+ response_prefix = self.model_request_parameters.response_prefix
+
async for event in self._response:
if isinstance(event, BetaRawMessageStartEvent):
self._usage = _map_usage(event)
@@ -624,10 +639,13 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
elif isinstance(event, BetaRawContentBlockStartEvent):
current_block = event.content_block
- if isinstance(current_block, BetaTextBlock) and current_block.text:
- maybe_event = self._parts_manager.handle_text_delta(
- vendor_part_id=event.index, content=current_block.text
- )
+ if isinstance(current_block, BetaTextBlock) and (current_block.text or response_prefix):
+ text = current_block.text
+ if response_prefix:
+ text = response_prefix + text
+ response_prefix = None
+
+ maybe_event = self._parts_manager.handle_text_delta(vendor_part_id=event.index, content=text)
if maybe_event is not None: # pragma: no branch
yield maybe_event
elif isinstance(current_block, BetaThinkingBlock):
diff --git a/pydantic_ai_slim/pydantic_ai/models/mistral.py b/pydantic_ai_slim/pydantic_ai/models/mistral.py
index 0c749f3c60..07130fa26a 100644
--- a/pydantic_ai_slim/pydantic_ai/models/mistral.py
+++ b/pydantic_ai_slim/pydantic_ai/models/mistral.py
@@ -558,6 +558,8 @@ def _map_messages(self, messages: list[ModelMessage]) -> list[MistralMessages]:
# Insert a dummy assistant message
processed_messages.append(MistralAssistantMessage(content=[MistralTextChunk(text='OK')]))
+ # TODO: Insert response_prefix
+
return processed_messages
def _map_user_prompt(self, part: UserPromptPart) -> MistralUserMessage:
diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py
index 2b83fcad30..d4bcff9413 100644
--- a/pydantic_ai_slim/pydantic_ai/models/openai.py
+++ b/pydantic_ai_slim/pydantic_ai/models/openai.py
@@ -383,7 +383,7 @@ async def request(
response = await self._completions_create(
messages, False, cast(OpenAIChatModelSettings, model_settings or {}), model_request_parameters
)
- model_response = self._process_response(response)
+ model_response = self._process_response(response, model_request_parameters)
return model_response
@asynccontextmanager
@@ -439,7 +439,7 @@ async def _completions_create(
else:
tool_choice = 'auto'
- openai_messages = await self._map_messages(messages)
+ openai_messages = await self._map_messages(messages, model_request_parameters)
response_format: chat.completion_create_params.ResponseFormat | None = None
if model_request_parameters.output_mode == 'native':
@@ -491,7 +491,9 @@ async def _completions_create(
raise ModelHTTPError(status_code=status_code, model_name=self.model_name, body=e.body) from e
raise # pragma: lax no cover
- def _process_response(self, response: chat.ChatCompletion | str) -> ModelResponse:
+ def _process_response(
+ self, response: chat.ChatCompletion | str, model_request_parameters: ModelRequestParameters
+ ) -> ModelResponse:
"""Process a non-streamed response, and prepare a message to return."""
# Although the OpenAI SDK claims to return a Pydantic model (`ChatCompletion`) from the chat completions function:
# * it hasn't actually performed validation (presumably they're creating the model with `model_construct` or something?!)
@@ -542,10 +544,13 @@ def _process_response(self, response: chat.ChatCompletion | str) -> ModelRespons
for lp in choice.logprobs.content
]
- if choice.message.content is not None:
+ if (content := choice.message.content) is not None:
+ if response_prefix := model_request_parameters.response_prefix:
+ content = response_prefix + content
+
items.extend(
(replace(part, id='content', provider_name=self.system) if isinstance(part, ThinkingPart) else part)
- for part in split_content_into_text_and_thinking(choice.message.content, self.profile.thinking_tags)
+ for part in split_content_into_text_and_thinking(content, self.profile.thinking_tags)
)
if choice.message.tool_calls is not None:
for c in choice.message.tool_calls:
@@ -621,7 +626,9 @@ def _get_web_search_options(self, model_request_parameters: ModelRequestParamete
f'`{tool.__class__.__name__}` is not supported by `OpenAIChatModel`. If it should be, please file an issue.'
)
- async def _map_messages(self, messages: list[ModelMessage]) -> list[chat.ChatCompletionMessageParam]:
+ async def _map_messages(
+ self, messages: list[ModelMessage], model_request_parameters: ModelRequestParameters
+ ) -> list[chat.ChatCompletionMessageParam]:
"""Just maps a `pydantic_ai.Message` to a `openai.types.ChatCompletionMessageParam`."""
openai_messages: list[chat.ChatCompletionMessageParam] = []
for message in messages:
@@ -661,6 +668,11 @@ async def _map_messages(self, messages: list[ModelMessage]) -> list[chat.ChatCom
assert_never(message)
if instructions := self._get_instructions(messages):
openai_messages.insert(0, chat.ChatCompletionSystemMessageParam(content=instructions, role='system'))
+
+ if response_prefix := model_request_parameters.response_prefix:
+ # TODO: Add prefix=True for DeepSeek?
+ openai_messages.append(chat.ChatCompletionAssistantMessageParam(role='assistant', content=response_prefix))
+
return openai_messages
@staticmethod
@@ -1349,6 +1361,8 @@ class OpenAIStreamedResponse(StreamedResponse):
_provider_name: str
async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
+ response_prefix = self.model_request_parameters.response_prefix
+
async for chunk in self._response:
self._usage += _map_usage(chunk)
@@ -1370,7 +1384,11 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
# Handle the text part of the response
content = choice.delta.content
- if content is not None:
+ if content is not None or response_prefix:
+ if response_prefix:
+ content = response_prefix + (content or '')
+ response_prefix = None
+
maybe_event = self._parts_manager.handle_text_delta(
vendor_part_id='content',
content=content,
diff --git a/pydantic_ai_slim/pydantic_ai/profiles/__init__.py b/pydantic_ai_slim/pydantic_ai/profiles/__init__.py
index 9915ecf04f..87dba8a0c6 100644
--- a/pydantic_ai_slim/pydantic_ai/profiles/__init__.py
+++ b/pydantic_ai_slim/pydantic_ai/profiles/__init__.py
@@ -55,6 +55,9 @@ class ModelProfile:
This is currently only used by `OpenAIChatModel`, `HuggingFaceModel`, and `GroqModel`.
"""
+ supports_response_prefix: bool = False
+ """Whether the model supports response prefix (prefill) functionality."""
+
@classmethod
def from_profile(cls, profile: ModelProfile | None) -> Self:
"""Build a ModelProfile subclass instance from a ModelProfile instance."""
diff --git a/pydantic_ai_slim/pydantic_ai/profiles/anthropic.py b/pydantic_ai_slim/pydantic_ai/profiles/anthropic.py
index f6a2755819..76e117c718 100644
--- a/pydantic_ai_slim/pydantic_ai/profiles/anthropic.py
+++ b/pydantic_ai_slim/pydantic_ai/profiles/anthropic.py
@@ -5,4 +5,4 @@
def anthropic_model_profile(model_name: str) -> ModelProfile | None:
"""Get the model profile for an Anthropic model."""
- return ModelProfile(thinking_tags=('', ''))
+ return ModelProfile(thinking_tags=('', ''), supports_response_prefix=True)
diff --git a/pydantic_ai_slim/pydantic_ai/profiles/deepseek.py b/pydantic_ai_slim/pydantic_ai/profiles/deepseek.py
index 92e166964d..aba8b4139b 100644
--- a/pydantic_ai_slim/pydantic_ai/profiles/deepseek.py
+++ b/pydantic_ai_slim/pydantic_ai/profiles/deepseek.py
@@ -5,4 +5,7 @@
def deepseek_model_profile(model_name: str) -> ModelProfile | None:
"""Get the model profile for a DeepSeek model."""
- return ModelProfile(ignore_streamed_leading_whitespace='r1' in model_name)
+ return ModelProfile(
+ ignore_streamed_leading_whitespace='r1' in model_name,
+ supports_response_prefix=True,
+ )
diff --git a/pydantic_ai_slim/pydantic_ai/providers/openrouter.py b/pydantic_ai_slim/pydantic_ai/providers/openrouter.py
index 96b0602e36..f434c87948 100644
--- a/pydantic_ai_slim/pydantic_ai/providers/openrouter.py
+++ b/pydantic_ai_slim/pydantic_ai/providers/openrouter.py
@@ -70,7 +70,9 @@ def model_profile(self, model_name: str) -> ModelProfile | None:
# As OpenRouterProvider is always used with OpenAIChatModel, which used to unconditionally use OpenAIJsonSchemaTransformer,
# we need to maintain that behavior unless json_schema_transformer is set explicitly
- return OpenAIModelProfile(json_schema_transformer=OpenAIJsonSchemaTransformer).update(profile)
+ return OpenAIModelProfile(
+ json_schema_transformer=OpenAIJsonSchemaTransformer, supports_response_prefix=True
+ ).update(profile)
@overload
def __init__(self) -> None: ...
diff --git a/pydantic_ai_slim/pydantic_ai/run.py b/pydantic_ai_slim/pydantic_ai/run.py
index 0cc9481043..7e01c1a5f1 100644
--- a/pydantic_ai_slim/pydantic_ai/run.py
+++ b/pydantic_ai_slim/pydantic_ai/run.py
@@ -52,6 +52,7 @@ async def main():
system_prompts=(),
system_prompt_functions=[],
system_prompt_dynamic_functions={},
+ response_prefix=None,
),
ModelRequestNode(
request=ModelRequest(
@@ -61,7 +62,8 @@ async def main():
timestamp=datetime.datetime(...),
)
]
- )
+ ),
+ response_prefix=None,
),
CallToolsNode(
model_response=ModelResponse(
@@ -186,6 +188,7 @@ async def main():
system_prompts=(),
system_prompt_functions=[],
system_prompt_dynamic_functions={},
+ response_prefix=None,
),
ModelRequestNode(
request=ModelRequest(
@@ -195,7 +198,8 @@ async def main():
timestamp=datetime.datetime(...),
)
]
- )
+ ),
+ response_prefix=None,
),
CallToolsNode(
model_response=ModelResponse(
diff --git a/test_response_prefix_example.py b/test_response_prefix_example.py
new file mode 100644
index 0000000000..0a6f42e9de
--- /dev/null
+++ b/test_response_prefix_example.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+"""Example script demonstrating the response prefix feature in Pydantic AI."""
+
+from pydantic_ai import Agent
+from pydantic_ai.models.test import TestModel
+
+
+def test_response_prefix():
+ """Test the response prefix feature with validation."""
+ # Test that unsupported models raise an error
+ agent = Agent(TestModel())
+
+ try:
+ agent.run_sync('Hello', response_prefix='Assistant: ')
+ assert False, 'Should have raised UserError'
+ except Exception as e:
+ print(f'✅ Validation works: {e}')
+
+ # Create a mock model that supports response prefix
+ class MockResponsePrefixModel(TestModel):
+ @property
+ def profile(self): # pyright: ignore[reportIncompatibleVariableOverride]
+ profile = super().profile
+ profile.supports_response_prefix = True
+ return profile
+
+ # Create an agent with the mock model
+ agent = Agent(MockResponsePrefixModel())
+
+ # Test that the parameter is accepted without error
+ result = agent.run_sync('Hello', response_prefix='Assistant: ')
+ print('✅ Response prefix parameter accepted by supported model')
+ print(f'Response: {result.output}')
+
+ print('✅ Response prefix feature working correctly!')
+
+
+if __name__ == '__main__':
+ test_response_prefix()
diff --git a/tests/models/cassettes/test_anthropic/test_anthropic_response_prefix.yaml b/tests/models/cassettes/test_anthropic/test_anthropic_response_prefix.yaml
new file mode 100644
index 0000000000..1bc4f0c6f8
--- /dev/null
+++ b/tests/models/cassettes/test_anthropic/test_anthropic_response_prefix.yaml
@@ -0,0 +1,160 @@
+interactions:
+- request:
+ headers:
+ accept:
+ - application/json
+ accept-encoding:
+ - gzip, deflate
+ connection:
+ - keep-alive
+ content-length:
+ - '256'
+ content-type:
+ - application/json
+ host:
+ - api.anthropic.com
+ method: POST
+ parsed_body:
+ max_tokens: 4096
+ messages:
+ - content:
+ - text: 'What is the name of color #FF0000'
+ type: text
+ role: user
+ - content:
+ - text: Su nombre es
+ type: text
+ role: assistant
+ model: claude-sonnet-4-0
+ stream: false
+ system: Be concise.
+ uri: https://api.anthropic.com/v1/messages?beta=true
+ response:
+ headers:
+ connection:
+ - keep-alive
+ content-length:
+ - '518'
+ content-type:
+ - application/json
+ strict-transport-security:
+ - max-age=31536000; includeSubDomains; preload
+ transfer-encoding:
+ - chunked
+ parsed_body:
+ content:
+ - text: ' **rojo** (o "red" en inglés). Este es el valor hexadecimal para el color rojo puro en el espacio de color
+ RGB.'
+ type: text
+ id: msg_01AsJ8x22wZUZK43ebDwD12n
+ model: claude-sonnet-4-20250514
+ role: assistant
+ stop_reason: end_turn
+ stop_sequence: null
+ type: message
+ usage:
+ cache_creation:
+ ephemeral_1h_input_tokens: 0
+ ephemeral_5m_input_tokens: 0
+ cache_creation_input_tokens: 0
+ cache_read_input_tokens: 0
+ input_tokens: 24
+ output_tokens: 39
+ service_tier: standard
+ status:
+ code: 200
+ message: OK
+- request:
+ headers:
+ accept:
+ - application/json
+ accept-encoding:
+ - gzip, deflate
+ connection:
+ - keep-alive
+ content-length:
+ - '255'
+ content-type:
+ - application/json
+ host:
+ - api.anthropic.com
+ method: POST
+ parsed_body:
+ max_tokens: 4096
+ messages:
+ - content:
+ - text: 'What is the name of color #FF0000'
+ type: text
+ role: user
+ - content:
+ - text: Su nombre es
+ type: text
+ role: assistant
+ model: claude-sonnet-4-0
+ stream: true
+ system: Be concise.
+ uri: https://api.anthropic.com/v1/messages?beta=true
+ response:
+ body:
+ string: |+
+ event: message_start
+ data: {"type":"message_start","message":{"id":"msg_01CAZPvhQ5cuSvKdgBBvi7ev","type":"message","role":"assistant","model":"claude-sonnet-4-20250514","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":24,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":2,"service_tier":"standard"}} }
+
+ event: content_block_start
+ data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} }
+
+ event: content_block_delta
+ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" **r"} }
+
+ event: content_block_delta
+ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"ojo** (o **red**"} }
+
+ event: ping
+ data: {"type": "ping"}
+
+ event: content_block_delta
+ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" en inglés).\n\nEl color #FF0000 es"} }
+
+ event: content_block_delta
+ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" rojo puro en el sistema hex"} }
+
+ event: content_block_delta
+ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"adecimal RGB, donde:\n- FF = 255 ("} }
+
+ event: content_block_delta
+ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"máximo valor de rojo)\n- 00 ="} }
+
+ event: content_block_delta
+ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" 0 (sin"} }
+
+ event: content_block_delta
+ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" verde)\n- 00 = "}}
+
+ event: content_block_delta
+ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"0 (sin azul)"} }
+
+ event: content_block_stop
+ data: {"type":"content_block_stop","index":0 }
+
+ event: message_delta
+ data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":24,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":82} }
+
+ event: message_stop
+ data: {"type":"message_stop" }
+
+ headers:
+ cache-control:
+ - no-cache
+ connection:
+ - keep-alive
+ content-type:
+ - text/event-stream; charset=utf-8
+ strict-transport-security:
+ - max-age=31536000; includeSubDomains; preload
+ transfer-encoding:
+ - chunked
+ status:
+ code: 200
+ message: OK
+version: 1
+...
diff --git a/tests/models/test_anthropic.py b/tests/models/test_anthropic.py
index 8d65141d18..8ca02feb63 100644
--- a/tests/models/test_anthropic.py
+++ b/tests/models/test_anthropic.py
@@ -2723,3 +2723,153 @@ async def test_anthropic_web_search_tool_stream(allow_model_requests: None, anth
PartDeltaEvent(index=17, delta=TextPartDelta(content_delta=' disruptions affecting North America.')),
]
)
+
+
+async def test_anthropic_response_prefix(allow_model_requests: None, anthropic_api_key: str):
+ """Test that Anthropic models correctly handle response prefix."""
+ m = AnthropicModel('claude-sonnet-4-0', provider=AnthropicProvider(api_key=anthropic_api_key))
+ agent = Agent(m, instructions='Be concise.')
+
+ # Test non-streaming response
+ result = await agent.run('What is the name of color #FF0000', response_prefix='Su nombre es')
+ assert result.output == snapshot(
+ 'Su nombre es **rojo** (o "red" en inglés). Este es el valor hexadecimal para el color rojo puro en el espacio de color RGB.'
+ )
+ assert result.all_messages() == snapshot(
+ [
+ ModelRequest(
+ parts=[
+ UserPromptPart(
+ content='What is the name of color #FF0000',
+ timestamp=IsDatetime(),
+ )
+ ],
+ instructions='Be concise.',
+ ),
+ ModelResponse(
+ parts=[
+ TextPart(
+ content='Su nombre es **rojo** (o "red" en inglés). Este es el valor hexadecimal para el color rojo puro en el espacio de color RGB.'
+ )
+ ],
+ usage=RequestUsage(
+ input_tokens=24,
+ output_tokens=39,
+ details={
+ 'cache_creation_input_tokens': 0,
+ 'cache_read_input_tokens': 0,
+ 'input_tokens': 24,
+ 'output_tokens': 39,
+ },
+ ),
+ model_name='claude-sonnet-4-20250514',
+ timestamp=IsDatetime(),
+ provider_name='anthropic',
+ provider_details={'finish_reason': 'end_turn'},
+ provider_response_id='msg_01AsJ8x22wZUZK43ebDwD12n',
+ finish_reason='stop',
+ ),
+ ]
+ )
+
+ # Test streaming response
+ event_parts: list[Any] = []
+ async with agent.iter(user_prompt='What is the name of color #FF0000', response_prefix='Su nombre es') as agent_run:
+ async for node in agent_run:
+ if Agent.is_model_request_node(node):
+ async with node.stream(agent_run.ctx) as request_stream:
+ async for event in request_stream:
+ event_parts.append(event)
+
+ assert event_parts == snapshot(
+ [
+ PartStartEvent(index=0, part=TextPart(content='Su nombre es')),
+ FinalResultEvent(tool_name=None, tool_call_id=None),
+ PartDeltaEvent(index=0, delta=TextPartDelta(content_delta=' **r')),
+ PartDeltaEvent(index=0, delta=TextPartDelta(content_delta='ojo** (o **red**')),
+ PartDeltaEvent(
+ index=0,
+ delta=TextPartDelta(
+ content_delta="""\
+ en inglés).
+
+El color #FF0000 es\
+"""
+ ),
+ ),
+ PartDeltaEvent(index=0, delta=TextPartDelta(content_delta=' rojo puro en el sistema hex')),
+ PartDeltaEvent(
+ index=0,
+ delta=TextPartDelta(
+ content_delta="""\
+adecimal RGB, donde:
+- FF = 255 (\
+"""
+ ),
+ ),
+ PartDeltaEvent(
+ index=0,
+ delta=TextPartDelta(
+ content_delta="""\
+máximo valor de rojo)
+- 00 =\
+"""
+ ),
+ ),
+ PartDeltaEvent(index=0, delta=TextPartDelta(content_delta=' 0 (sin')),
+ PartDeltaEvent(
+ index=0,
+ delta=TextPartDelta(
+ content_delta="""\
+ verde)
+- 00 = \
+"""
+ ),
+ ),
+ PartDeltaEvent(index=0, delta=TextPartDelta(content_delta='0 (sin azul)')),
+ ]
+ )
+ assert agent_run.result is not None
+ assert agent_run.result.all_messages() == snapshot(
+ [
+ ModelRequest(
+ parts=[
+ UserPromptPart(
+ content='What is the name of color #FF0000',
+ timestamp=IsDatetime(),
+ )
+ ],
+ instructions='Be concise.',
+ ),
+ ModelResponse(
+ parts=[
+ TextPart(
+ content="""\
+Su nombre es **rojo** (o **red** en inglés).
+
+El color #FF0000 es rojo puro en el sistema hexadecimal RGB, donde:
+- FF = 255 (máximo valor de rojo)
+- 00 = 0 (sin verde)
+- 00 = 0 (sin azul)\
+"""
+ )
+ ],
+ usage=RequestUsage(
+ input_tokens=24,
+ output_tokens=82,
+ details={
+ 'cache_creation_input_tokens': 0,
+ 'cache_read_input_tokens': 0,
+ 'input_tokens': 24,
+ 'output_tokens': 82,
+ },
+ ),
+ model_name='claude-sonnet-4-20250514',
+ timestamp=IsDatetime(),
+ provider_name='anthropic',
+ provider_details={'finish_reason': 'end_turn'},
+ provider_response_id='msg_01CAZPvhQ5cuSvKdgBBvi7ev',
+ finish_reason='stop',
+ ),
+ ]
+ )
diff --git a/tests/models/test_fallback.py b/tests/models/test_fallback.py
index 484a73ac37..bd7f773574 100644
--- a/tests/models/test_fallback.py
+++ b/tests/models/test_fallback.py
@@ -139,6 +139,7 @@ def test_first_failed_instrumented(capfire: CaptureLogfire) -> None:
'output_object': None,
'output_tools': [],
'allow_text_output': True,
+ 'response_prefix': None,
},
'logfire.span_type': 'span',
'logfire.msg': 'chat fallback:function:failure_response:,function:success_response:',
@@ -238,6 +239,7 @@ async def test_first_failed_instrumented_stream(capfire: CaptureLogfire) -> None
'output_object': None,
'output_tools': [],
'allow_text_output': True,
+ 'response_prefix': None,
},
'logfire.span_type': 'span',
'logfire.msg': 'chat fallback:function::failure_response_stream,function::success_response_stream',
@@ -344,6 +346,7 @@ def test_all_failed_instrumented(capfire: CaptureLogfire) -> None:
'output_object': None,
'output_tools': [],
'allow_text_output': True,
+ 'response_prefix': None,
},
'logfire.json_schema': {
'type': 'object',
diff --git a/tests/models/test_instrumented.py b/tests/models/test_instrumented.py
index 9ea6fadc47..5a350ee866 100644
--- a/tests/models/test_instrumented.py
+++ b/tests/models/test_instrumented.py
@@ -176,6 +176,7 @@ async def test_instrumented_model(capfire: CaptureLogfire):
'output_object': None,
'output_tools': [],
'allow_text_output': True,
+ 'response_prefix': None,
},
'logfire.json_schema': {
'type': 'object',
@@ -407,6 +408,7 @@ async def test_instrumented_model_stream(capfire: CaptureLogfire):
'output_object': None,
'output_tools': [],
'allow_text_output': True,
+ 'response_prefix': None,
},
'logfire.json_schema': {
'type': 'object',
@@ -505,6 +507,7 @@ async def test_instrumented_model_stream_break(capfire: CaptureLogfire):
'output_object': None,
'output_tools': [],
'allow_text_output': True,
+ 'response_prefix': None,
},
'logfire.json_schema': {
'type': 'object',
@@ -623,6 +626,7 @@ async def test_instrumented_model_attributes_mode(capfire: CaptureLogfire, instr
'output_object': None,
'output_tools': [],
'allow_text_output': True,
+ 'response_prefix': None,
},
'gen_ai.request.temperature': 1,
'logfire.msg': 'chat gpt-4o',
@@ -749,6 +753,7 @@ async def test_instrumented_model_attributes_mode(capfire: CaptureLogfire, instr
'output_object': None,
'output_tools': [],
'allow_text_output': True,
+ 'response_prefix': None,
},
'gen_ai.request.temperature': 1,
'logfire.msg': 'chat gpt-4o',
@@ -1316,6 +1321,7 @@ async def test_response_cost_error(capfire: CaptureLogfire, monkeypatch: pytest.
'output_object': None,
'output_tools': [],
'allow_text_output': True,
+ 'response_prefix': None,
},
'logfire.span_type': 'span',
'logfire.msg': 'chat gpt-4o',
diff --git a/tests/models/test_model_request_parameters.py b/tests/models/test_model_request_parameters.py
index 2915796ab1..72951a7c08 100644
--- a/tests/models/test_model_request_parameters.py
+++ b/tests/models/test_model_request_parameters.py
@@ -14,4 +14,5 @@ def test_model_request_parameters_are_serializable():
'allow_text_output': True,
'output_tools': [],
'output_object': None,
+ 'response_prefix': None,
}
diff --git a/tests/test_agent.py b/tests/test_agent.py
index 8c90db56b5..40d15222d4 100644
--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@@ -4975,3 +4975,32 @@ def llm(messages: list[ModelMessage], info: AgentInfo) -> ModelResponse:
),
]
)
+
+
+def test_response_prefix_validation():
+ """Test that response_prefix raises an error for unsupported models."""
+ # Test with a model that doesn't support response prefix
+ agent = Agent(TestModel())
+
+ with pytest.raises(UserError, match='Model test does not support response prefix'):
+ agent.run_sync('Hello', response_prefix='Assistant: ')
+
+
+def test_response_prefix_parameter_passed():
+ """Test that response_prefix parameter is accepted by run methods."""
+ # Test with a model that supports response prefix
+ from pydantic_ai.models.test import TestModel
+
+ # Create a mock model that supports response prefix
+ class MockResponsePrefixModel(TestModel):
+ @property
+ def profile(self): # pyright: ignore[reportIncompatibleVariableOverride]
+ profile = super().profile
+ profile.supports_response_prefix = True
+ return profile
+
+ agent = Agent(MockResponsePrefixModel())
+
+ # This should not raise an error
+ result = agent.run_sync('Hello', response_prefix='Assistant: ')
+ assert result.output is not None
diff --git a/tests/test_logfire.py b/tests/test_logfire.py
index 583537f3c5..feb16544bd 100644
--- a/tests/test_logfire.py
+++ b/tests/test_logfire.py
@@ -395,6 +395,7 @@ async def my_ret(x: int) -> str:
'output_tools': [],
'output_object': None,
'allow_text_output': True,
+ 'response_prefix': None,
}
)
),
@@ -785,6 +786,7 @@ class MyOutput:
}
],
'allow_text_output': False,
+ 'response_prefix': None,
}
)
),
@@ -889,6 +891,7 @@ async def test_feedback(capfire: CaptureLogfire) -> None:
'output_object': None,
'output_tools': [],
'allow_text_output': True,
+ 'response_prefix': None,
},
'logfire.span_type': 'span',
'logfire.msg': 'chat test',