mattbrandman · mattbrandman · Jun 8, 2025 · Jun 9, 2025 · Jun 10, 2025 · Jun 10, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -247,7 +247,7 @@ jobs:
 
       - run: make lint-js
 
-      - run: uv run --package mcp-run-python pytest mcp-run-python -v
+      - run: uv run --package mcp-run-python pytest mcp-run-python -v --durations=100
 
       - run: deno task dev warmup
         working-directory: mcp-run-python

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -13,8 +13,20 @@ repos:
     rev: 0.6.8
     hooks:
       - id: fix-smartquotes
+        exclude: "cassettes/"
       - id: fix-spaces
+        exclude: "cassettes/"
       - id: fix-ligatures
+        exclude: "cassettes/"
+
+  - repo: https://github.com/codespell-project/codespell
+    # Configuration for codespell is in pyproject.toml
+    rev: v2.3.0
+    hooks:
+      - id: codespell
+        args: ["--skip", "tests/models/cassettes/*"]
+        additional_dependencies:
+          - tomli
 
   - repo: local
     hooks:
@@ -38,12 +50,12 @@ repos:
         args: [lint-js]
         language: system
         types_or: [javascript, ts, json]
-        files: '^mcp-run-python/'
+        files: "^mcp-run-python/"
         pass_filenames: false
       - id: clai-help
         name: clai help output
         entry: uv
-        args: [run, pytest, 'clai/update_readme.py']
+        args: [run, pytest, "clai/update_readme.py"]
         language: system
         types_or: [python, markdown]
         pass_filenames: false
@@ -54,12 +66,3 @@ repos:
         language: system
         types: [python]
         pass_filenames: false
-
-  - repo: https://github.com/codespell-project/codespell
-    # Configuration for codespell is in pyproject.toml
-    rev: v2.3.0
-    hooks:
-    - id: codespell
-      args: ['--skip', 'tests/models/cassettes/*,docs/a2a/fasta2a.md,tests/models/test_groq.py']
-      additional_dependencies:
-      - tomli
diff --git a/Makefile b/Makefile
@@ -64,7 +64,7 @@ test: ## Run tests and collect coverage data
 	@uv run coverage report
 
 .PHONY: test-fast
-test-fast: ## Same as test except no coverage. ~1/4th the time depending on hardware.
+test-fast: ## Same as test except no coverage and 4x faster depending on hardware
 	uv run pytest -n auto --dist=loadgroup
 
 .PHONY: test-all-python
@@ -78,12 +78,12 @@ test-all-python: ## Run tests on Python 3.9 to 3.13
 	@uv run coverage report
 
 .PHONY: testcov
-testcov: test ## Run tests and generate a coverage report
+testcov: test ## Run tests and generate an HTML coverage report
 	@echo "building coverage html"
 	@uv run coverage html
 
 .PHONY: test-mrp
-test-mrp: ## Build and  tests of mcp-run-python
+test-mrp: ## Build and tests of mcp-run-python
 	cd mcp-run-python && deno task build
 	uv run --package mcp-run-python pytest mcp-run-python -v
 

diff --git a/docs/agents.md b/docs/agents.md
@@ -701,6 +701,8 @@ _(This example is complete, it can be run "as is")_
 
 You can also dynamically change the instructions for an agent by using the `@agent.instructions` decorator.
 
+Note that returning an empty string will result in no instruction message added.
+
 ```python {title="dynamic_instructions.py"}
 from datetime import date
 

diff --git a/docs/api/providers.md b/docs/api/providers.md
@@ -18,10 +18,14 @@
 
 ::: pydantic_ai.providers.cohere
 
-::: pydantic_ai.providers.mistral
+::: pydantic_ai.providers.mistral.MistralProvider
 
-::: pydantic_ai.providers.fireworks
+::: pydantic_ai.providers.fireworks.FireworksProvider
 
-::: pydantic_ai.providers.grok
+::: pydantic_ai.providers.grok.GrokProvider
 
-::: pydantic_ai.providers.together
+::: pydantic_ai.providers.together.TogetherProvider
+
+::: pydantic_ai.providers.heroku.HerokuProvider
+
+::: pydantic_ai.providers.openrouter.OpenRouterProvider
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -12,11 +12,14 @@ PydanticAI is still pre-version 1, so breaking changes will occur, however:
 !!! note
     Here's a filtered list of the breaking changes for each version to help you upgrade PydanticAI.
 
-### v0.1.0 (2025-04-15)
+### v0.3.0 (2025-06-18)
 
-See [#1248](https://github.com/pydantic/pydantic-ai/pull/1248) — the attribute/parameter name `result` was renamed to `output` in many places. Hopefully all changes keep a deprecated attribute or parameter with the old name, so you should get many deprecation warnings.
+See [#1142](https://github.com/pydantic/pydantic-ai/pull/1142) — Adds support for thinking parts.
 
-See [#1484](https://github.com/pydantic/pydantic-ai/pull/1484) — `format_as_xml` was moved and made available to import from the package root, e.g. `from pydantic_ai import format_as_xml`.
+We now convert the thinking blocks (`"<think>..."</think>"`) in provider specific text parts to
+PydanticAI `ThinkingPart`s. Also, as part of this release, we made the choice to not send back the
+`ThinkingPart`s to the provider - the idea is to save costs on behalf of the user. In the future, we
+intend to add a setting to customize this behavior.
 
 ### v0.2.0 (2025-05-12)
 
@@ -25,6 +28,13 @@ See [#1647](https://github.com/pydantic/pydantic-ai/pull/1647) — usage makes s
 * Adds `usage` to `ModelResponse` (field has a default factory of `Usage()` so it'll work to load data that doesn't have usage)
 * changes the return type of `Model.request` to just `ModelResponse` instead of `tuple[ModelResponse, Usage]`
 
+
+### v0.1.0 (2025-04-15)
+
+See [#1248](https://github.com/pydantic/pydantic-ai/pull/1248) — the attribute/parameter name `result` was renamed to `output` in many places. Hopefully all changes keep a deprecated attribute or parameter with the old name, so you should get many deprecation warnings.
+
+See [#1484](https://github.com/pydantic/pydantic-ai/pull/1484) — `format_as_xml` was moved and made available to import from the package root, e.g. `from pydantic_ai import format_as_xml`.
+
 ---
 
 ## Full Changelog

diff --git a/docs/evals.md b/docs/evals.md
@@ -453,7 +453,7 @@ class SpanTracingEvaluator(Evaluator[str, str]):
         has_errors = span_tree.any(error_query)
 
         # Calculate a performance score (lower is better)
-        performance_score = 1.0 if total_processing_time < 0.5 else 0.5
+        performance_score = 1.0 if total_processing_time < 1.0 else 0.5
 
         return {
             'has_spans': True,

diff --git a/docs/graph.md b/docs/graph.md
@@ -653,7 +653,7 @@ Instead of running the entire graph in a single process invocation, we run the g
 
     from dataclasses import dataclass, field
 
-    from groq import BaseModel
+    from pydantic import BaseModel
     from pydantic_graph import (
         BaseNode,
         End,

diff --git a/docs/input.md b/docs/input.md
@@ -2,6 +2,7 @@
 
 Some LLMs are now capable of understanding audio, video, image and document content.
 
+
 ## Image Input
 
 !!! info
@@ -64,14 +65,6 @@ You can provide video input using either [`VideoUrl`][pydantic_ai.VideoUrl] or [
 !!! info
     Some models do not support document input. Please check the model's documentation to confirm whether it supports document input.
 
-!!! warning
-    When using Gemini models, the document content will always be sent as binary data, regardless of whether you use `DocumentUrl` or `BinaryContent`. This is due to differences in how Vertex AI and Google AI handle document inputs.
-
-    For more details, see [this discussion](https://discuss.ai.google.dev/t/i-am-using-google-generative-ai-model-gemini-1-5-pro-for-image-analysis-but-getting-error/34866/4).
-
-    If you are unsatisfied with this behavior, please let us know by opening an issue on
-    [GitHub](https://github.com/pydantic/pydantic-ai/issues).
-
 You can provide document input using either [`DocumentUrl`][pydantic_ai.DocumentUrl] or [`BinaryContent`][pydantic_ai.BinaryContent]. The process is similar to the examples above.
 
 If you have a direct URL for the document, you can use [`DocumentUrl`][pydantic_ai.DocumentUrl]:
@@ -109,3 +102,23 @@ result = agent.run_sync(
 print(result.output)
 # > The document discusses...
 ```
+
+## User-side download vs. direct file URL
+
+As a general rule, when you provide a URL using any of `ImageUrl`, `AudioUrl`, `VideoUrl` or `DocumentUrl`, PydanticAI downloads the file content and then sends it as part of the API request.
+
+The situation is different for certain models:
+
+- [`AnthropicModel`][pydantic_ai.models.anthropic.AnthropicModel]: if you provide a PDF document via `DocumentUrl`, the URL is sent directly in the API request, so no download happens on the user side.
+
+- [`GeminiModel`][pydantic_ai.models.gemini.GeminiModel] and [`GoogleModel`][pydantic_ai.models.google.GoogleModel] on Vertex AI: any URL provided using `ImageUrl`, `AudioUrl`, `VideoUrl`, or `DocumentUrl` is sent as-is in the API request and no data is downloaded beforehand.
+
+    See the [Gemini API docs for Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#filedata) to learn more about supported URLs, formats and limitations:
+
+    - Cloud Storage bucket URIs (with protocol `gs://`)
+    - Public HTTP(S) URLs
+    - Public YouTube video URL (maximum one URL per request)
+
+    However, because of crawling restrictions, it may happen that Gemini can't access certain URLs. In that case, you can instruct PydanticAI to download the file content and send that instead of the URL by setting the boolean flag `force_download` to `True`. This attribute is available on all objects that inherit from [`FileUrl`][pydantic_ai.messages.FileUrl].
+
+- [`GeminiModel`][pydantic_ai.models.gemini.GeminiModel] and [`GoogleModel`][pydantic_ai.models.google.GoogleModel] on GLA: YouTube video URLs are sent directly in the request to the model.
diff --git a/docs/mcp/client.md b/docs/mcp/client.md
@@ -18,33 +18,35 @@ pip/uv-add "pydantic-ai-slim[mcp]"
 
 PydanticAI comes with two ways to connect to MCP servers:
 
-- [`MCPServerHTTP`][pydantic_ai.mcp.MCPServerHTTP] which connects to an MCP server using the [Streamable HTTP transport](https://modelcontextprotocol.io/specification/2025-03-26/basic/transports#streamable-http) transport
+- [`MCPServerSSE`][pydantic_ai.mcp.MCPServerSSE] which connects to an MCP server using the [HTTP SSE](https://spec.modelcontextprotocol.io/specification/2024-11-05/basic/transports/#http-with-sse) transport
+- [`MCPServerStreamableHTTP`][pydantic_ai.mcp.MCPServerStreamableHTTP] which connects to an MCP server using the [Streamable HTTP](https://modelcontextprotocol.io/introduction#streamable-http) transport
 - [`MCPServerStdio`][pydantic_ai.mcp.MCPServerStdio] which runs the server as a subprocess and connects to it using the [stdio](https://spec.modelcontextprotocol.io/specification/2024-11-05/basic/transports/#stdio) transport
 
 Examples of both are shown below; [mcp-run-python](run-python.md) is used as the MCP server in both examples.
 
-### HTTP Client
+### SSE Client
 
-[`MCPServerHTTP`][pydantic_ai.mcp.MCPServerHTTP] connects over HTTP using the [Streamable HTTP transport](https://modelcontextprotocol.io/specification/2025-03-26/basic/transports#streamable-http) to a server.
+[`MCPServerSSE`][pydantic_ai.mcp.MCPServerSSE] connects over HTTP using the [HTTP + Server Sent Events transport](https://spec.modelcontextprotocol.io/specification/2024-11-05/basic/transports/#http-with-sse) to a server.
 
 !!! note
-    [`MCPServerHTTP`][pydantic_ai.mcp.MCPServerHTTP] requires an MCP server to be running and accepting HTTP connections before calling [`agent.run_mcp_servers()`][pydantic_ai.Agent.run_mcp_servers]. Running the server is not managed by PydanticAI.
+    [`MCPServerSSE`][pydantic_ai.mcp.MCPServerSSE] requires an MCP server to be running and accepting HTTP connections before calling [`agent.run_mcp_servers()`][pydantic_ai.Agent.run_mcp_servers]. Running the server is not managed by PydanticAI.
 
-The StreamableHTTP Transport is able to connect to both stateless HTTP and older Server Sent Events (SSE) servers.
+The name "HTTP" is used since this implemented will be adapted in future to use the new
+[Streamable HTTP](https://github.com/modelcontextprotocol/specification/pull/206) currently in development.
 
-Before creating the HTTP client, we need to run the server (docs [here](run-python.md)):
+Before creating the SSE client, we need to run the server (docs [here](run-python.md)):
 
-```bash {title="terminal (run http server)"}
+```bash {title="terminal (run sse server)"}
 deno run \
   -N -R=node_modules -W=node_modules --node-modules-dir=auto \
   jsr:@pydantic/mcp-run-python sse
 ```
 
 ```python {title="mcp_sse_client.py" py="3.10"}
 from pydantic_ai import Agent
-from pydantic_ai.mcp import MCPServerHTTP
+from pydantic_ai.mcp import MCPServerSSE
 
-server = MCPServerHTTP(url='http://localhost:3001/sse')  # (1)!
+server = MCPServerSSE(url='http://localhost:3001/sse')  # (1)!
 agent = Agent('openai:gpt-4o', mcp_servers=[server])  # (2)!
 
 
@@ -55,7 +57,7 @@ async def main():
     #> There are 9,208 days between January 1, 2000, and March 18, 2025.
 ```
 
-1. Define the MCP server with the URL used to connect. This will typically end in `/mcp` for HTTP servers and `/sse` for SSE.
+1. Define the MCP server with the URL used to connect.
 2. Create an agent with the MCP server attached.
 3. Create a client session to connect to the server.
 
@@ -83,6 +85,53 @@ Will display as follows:
 
 ![Logfire run python code](../img/logfire-run-python-code.png)
 
+### Streamable HTTP Client
+
+[`MCPServerStreamableHTTP`][pydantic_ai.mcp.MCPServerStreamableHTTP] connects over HTTP using the
+[Streamable HTTP](https://modelcontextprotocol.io/introduction#streamable-http) transport to a server.
+
+!!! note
+    [`MCPServerStreamableHTTP`][pydantic_ai.mcp.MCPServerStreamableHTTP] requires an MCP server to be
+    running and accepting HTTP connections before calling
+    [`agent.run_mcp_servers()`][pydantic_ai.Agent.run_mcp_servers]. Running the server is not
+    managed by PydanticAI.
+
+Before creating the Streamable HTTP client, we need to run a server that supports the Streamable HTTP transport.
+
+```python {title="streamable_http_server.py" py="3.10" test="skip"}
+from mcp.server.fastmcp import FastMCP
+
+app = FastMCP()
+
+@app.tool()
+def add(a: int, b: int) -> int:
+    return a + b
+
+app.run(transport='streamable-http')
+```
+
+Then we can create the client:
+
+```python {title="mcp_streamable_http_client.py" py="3.10"}
+from pydantic_ai import Agent
+from pydantic_ai.mcp import MCPServerStreamableHTTP
+
+server = MCPServerStreamableHTTP('http://localhost:8000/mcp')  # (1)!
+agent = Agent('openai:gpt-4o', mcp_servers=[server])  # (2)!
+
+async def main():
+    async with agent.run_mcp_servers():  # (3)!
+        result = await agent.run('How many days between 2000-01-01 and 2025-03-18?')
+    print(result.output)
+    #> There are 9,208 days between January 1, 2000, and March 18, 2025.
+```
+
+1. Define the MCP server with the URL used to connect.
+2. Create an agent with the MCP server attached.
+3. Create a client session to connect to the server.
+
+_(This example is complete, it can be run "as is" with Python 3.10+ — you'll need to add `asyncio.run(main())` to run `main`)_
+
 ### MCP "stdio" Server
 
 The other transport offered by MCP is the [stdio transport](https://spec.modelcontextprotocol.io/specification/2024-11-05/basic/transports/#stdio) where the server is run as a subprocess and communicates with the client over `stdin` and `stdout`. In this case, you'd use the [`MCPServerStdio`][pydantic_ai.mcp.MCPServerStdio] class.
@@ -118,6 +167,48 @@ async def main():
 
 1. See [MCP Run Python](run-python.md) for more information.
 
+## Tool call customisation
+
+The MCP servers provide the ability to set a `process_tool_call` which allows
+the customisation of tool call requests and their responses.
+
+A common use case for this is to inject metadata to the requests which the server
+call needs.
+
+```python {title="mcp_process_tool_call.py" py="3.10"}
+from typing import Any
+
+from pydantic_ai import Agent
+from pydantic_ai.mcp import CallToolFunc, MCPServerStdio, ToolResult
+from pydantic_ai.models.test import TestModel
+from pydantic_ai.tools import RunContext
+
+
+async def process_tool_call(
+    ctx: RunContext[int],
+    call_tool: CallToolFunc,
+    tool_name: str,
+    args: dict[str, Any],
+) -> ToolResult:
+    """A tool call processor that passes along the deps."""
+    return await call_tool(tool_name, args, metadata={'deps': ctx.deps})
+
+
+server = MCPServerStdio('python', ['-m', 'tests.mcp_server'], process_tool_call=process_tool_call)
+agent = Agent(
+    model=TestModel(call_tools=['echo_deps']),
+    deps_type=int,
+    mcp_servers=[server]
+)
+
+
+async def main():
+    async with agent.run_mcp_servers():
+        result = await agent.run('Echo with deps set to 42', deps=42)
+    print(result.output)
+    #> {"echo_deps":{"echo":"This is an echo message","deps":42}}
+```
+
 ## Using Tool Prefixes to Avoid Naming Conflicts
 
 When connecting to multiple MCP servers that might provide tools with the same name, you can use the `tool_prefix` parameter to avoid naming conflicts. This parameter adds a prefix to all tool names from a specific server.
@@ -134,15 +225,15 @@ This allows you to use multiple servers that might have overlapping tool names w
 
 ```python {title="mcp_tool_prefix_http_client.py" py="3.10"}
 from pydantic_ai import Agent
-from pydantic_ai.mcp import MCPServerHTTP
+from pydantic_ai.mcp import MCPServerSSE
 
 # Create two servers with different prefixes
-weather_server = MCPServerHTTP(
+weather_server = MCPServerSSE(
     url='http://localhost:3001/sse',
     tool_prefix='weather'  # Tools will be prefixed with 'weather_'
 )
 
-calculator_server = MCPServerHTTP(
+calculator_server = MCPServerSSE(
     url='http://localhost:3002/sse',
     tool_prefix='calc'  # Tools will be prefixed with 'calc_'
 )