Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion pydantic_ai_slim/pydantic_ai/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,18 @@ def format(self) -> str:
__repr__ = _utils.dataclasses_no_defaults_repr


UserContent: TypeAlias = 'str | ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent'
@dataclass(repr=False)
class UploadedFile:
"""File uploaded to the LLM provider."""

file: Any
"""A provider-specific file object, e.g. a file ID or a file URL."""

kind: Literal['uploaded-file'] = 'uploaded-file'
"""Type identifier, this is available on all parts as a discriminator."""


UserContent: TypeAlias = 'str | ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent | UploadedFile'


@dataclass(repr=False)
Expand Down
3 changes: 3 additions & 0 deletions pydantic_ai_slim/pydantic_ai/models/bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
ThinkingPart,
ToolCallPart,
ToolReturnPart,
UploadedFile,
UserPromptPart,
VideoUrl,
)
Expand Down Expand Up @@ -577,6 +578,8 @@ async def _map_user_prompt(part: UserPromptPart, document_count: Iterator[int])
content.append({'video': video})
elif isinstance(item, AudioUrl): # pragma: no cover
raise NotImplementedError('Audio is not supported yet.')
elif isinstance(item, UploadedFile):
raise NotImplementedError('Uploaded files are not supported yet.')
else:
assert_never(item)
return [{'role': 'user', 'content': content}]
Expand Down
3 changes: 3 additions & 0 deletions pydantic_ai_slim/pydantic_ai/models/gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
ThinkingPart,
ToolCallPart,
ToolReturnPart,
UploadedFile,
UserPromptPart,
VideoUrl,
)
Expand Down Expand Up @@ -368,6 +369,8 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[_GeminiPartUnion]
else: # pragma: lax no cover
file_data = _GeminiFileDataPart(file_data={'file_uri': item.url, 'mime_type': item.media_type})
content.append(file_data)
elif isinstance(item, UploadedFile):
raise NotImplementedError('Uploaded files are not supported for GeminiModel.')
else:
assert_never(item) # pragma: lax no cover
return content
Expand Down
12 changes: 10 additions & 2 deletions pydantic_ai_slim/pydantic_ai/models/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
ThinkingPart,
ToolCallPart,
ToolReturnPart,
UploadedFile,
UserPromptPart,
VideoUrl,
)
Expand All @@ -54,6 +55,7 @@
ContentUnionDict,
CountTokensConfigDict,
ExecutableCodeDict,
File,
FunctionCallDict,
FunctionCallingConfigDict,
FunctionCallingConfigMode,
Expand Down Expand Up @@ -425,7 +427,7 @@ async def _map_messages(self, messages: list[ModelMessage]) -> tuple[ContentDict
if isinstance(part, SystemPromptPart):
system_parts.append({'text': part.content})
elif isinstance(part, UserPromptPart):
message_parts.extend(await self._map_user_prompt(part))
message_parts.extend(await self._map_user_prompt(part, contents))
elif isinstance(part, ToolReturnPart):
message_parts.append(
{
Expand Down Expand Up @@ -465,7 +467,7 @@ async def _map_messages(self, messages: list[ModelMessage]) -> tuple[ContentDict
system_instruction = ContentDict(role='user', parts=system_parts) if system_parts else None
return system_instruction, contents

async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]:
async def _map_user_prompt(self, part: UserPromptPart, contents: list[ContentUnionDict]) -> list[PartDict]:
if isinstance(part.content, str):
return [{'text': part.content}]
else:
Expand Down Expand Up @@ -499,6 +501,12 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]:
content.append(
{'file_data': {'file_uri': item.url, 'mime_type': item.media_type}}
) # pragma: lax no cover
elif isinstance(item, UploadedFile):
if not isinstance(item.file, File):
raise UserError('UploadedFile.file must be a genai.types.File object')
# genai.types.File is its own ContentUnionDict and not a
# PartDict, so append to the contents directly.
contents.append(item.file)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As shown in the google file upload example:

result = client.models.generate_content(
    model="gemini-2.0-flash",
    contents=[
        myfile,
        "\n\n",
        "Can you tell me about the instruments in this photo?",
    ],
)

The file is actually the whole content, instead of being a "message part". That's why I'm appending to the contents array directly here. Reference: https://ai.google.dev/api/files

else:
assert_never(item)
return content
Expand Down
3 changes: 3 additions & 0 deletions pydantic_ai_slim/pydantic_ai/models/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
ThinkingPart,
ToolCallPart,
ToolReturnPart,
UploadedFile,
UserPromptPart,
VideoUrl,
)
Expand Down Expand Up @@ -424,6 +425,8 @@ async def _map_user_prompt(part: UserPromptPart) -> ChatCompletionInputMessage:
raise NotImplementedError('DocumentUrl is not supported for Hugging Face')
elif isinstance(item, VideoUrl):
raise NotImplementedError('VideoUrl is not supported for Hugging Face')
elif isinstance(item, UploadedFile):
raise NotImplementedError('Uploaded files are not supported for Hugging Face')
else:
assert_never(item)
return ChatCompletionInputMessage(role='user', content=content) # type: ignore
Expand Down
33 changes: 29 additions & 4 deletions pydantic_ai_slim/pydantic_ai/models/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from datetime import datetime
from typing import Any, Literal, Union, cast, overload

from httpx import URL
from openai.types import FileObject
from pydantic import ValidationError
from typing_extensions import assert_never, deprecated

Expand Down Expand Up @@ -36,6 +38,7 @@
ThinkingPart,
ToolCallPart,
ToolReturnPart,
UploadedFile,
UserPromptPart,
VideoUrl,
)
Expand Down Expand Up @@ -623,7 +626,7 @@ async def _map_user_message(self, message: ModelRequest) -> AsyncIterable[chat.C
else:
yield chat.ChatCompletionSystemMessageParam(role='system', content=part.content)
elif isinstance(part, UserPromptPart):
yield await self._map_user_prompt(part)
yield await self._map_user_prompt(part, self._provider)
elif isinstance(part, ToolReturnPart):
yield chat.ChatCompletionToolMessageParam(
role='tool',
Expand All @@ -645,7 +648,7 @@ async def _map_user_message(self, message: ModelRequest) -> AsyncIterable[chat.C
assert_never(part)

@staticmethod
async def _map_user_prompt(part: UserPromptPart) -> chat.ChatCompletionUserMessageParam:
async def _map_user_prompt(part: UserPromptPart, provider: Provider[Any]) -> chat.ChatCompletionUserMessageParam:
content: str | list[ChatCompletionContentPartParam]
if isinstance(part.content, str):
content = part.content
Expand Down Expand Up @@ -697,6 +700,9 @@ async def _map_user_prompt(part: UserPromptPart) -> chat.ChatCompletionUserMessa
content.append(file)
elif isinstance(item, VideoUrl): # pragma: no cover
raise NotImplementedError('VideoUrl is not supported for OpenAI')
elif isinstance(item, UploadedFile):
file = _map_uploaded_file(item, provider)
content.append(File(file=FileFile(file_id=file.id), type='file'))
else:
assert_never(item)
return chat.ChatCompletionUserMessageParam(role='user', content=content)
Expand Down Expand Up @@ -984,7 +990,7 @@ async def _map_messages(
if isinstance(part, SystemPromptPart):
openai_messages.append(responses.EasyInputMessageParam(role='system', content=part.content))
elif isinstance(part, UserPromptPart):
openai_messages.append(await self._map_user_prompt(part))
openai_messages.append(await self._map_user_prompt(part, self._provider))
elif isinstance(part, ToolReturnPart):
openai_messages.append(
FunctionCallOutput(
Expand Down Expand Up @@ -1066,7 +1072,7 @@ def _map_json_schema(self, o: OutputObjectDefinition) -> responses.ResponseForma
return response_format_param

@staticmethod
async def _map_user_prompt(part: UserPromptPart) -> responses.EasyInputMessageParam:
async def _map_user_prompt(part: UserPromptPart, provider: Provider[Any]) -> responses.EasyInputMessageParam:
content: str | list[responses.ResponseInputContentParam]
if isinstance(part.content, str):
content = part.content
Expand Down Expand Up @@ -1124,6 +1130,9 @@ async def _map_user_prompt(part: UserPromptPart) -> responses.EasyInputMessagePa
)
elif isinstance(item, VideoUrl): # pragma: no cover
raise NotImplementedError('VideoUrl is not supported for OpenAI.')
elif isinstance(item, UploadedFile):
file = _map_uploaded_file(item, provider)
content.append(responses.ResponseInputFileParam(file_id=file.id, type='input_file'))
else:
assert_never(item)
return responses.EasyInputMessageParam(role='user', content=content)
Expand Down Expand Up @@ -1358,3 +1367,19 @@ def _map_usage(response: chat.ChatCompletion | ChatCompletionChunk | responses.R
u.input_audio_tokens = response_usage.prompt_tokens_details.audio_tokens or 0
u.cache_read_tokens = response_usage.prompt_tokens_details.cached_tokens or 0
return u


def _map_openai_uploaded_file(item: UploadedFile) -> FileObject:
if not isinstance(item.file, FileObject):
raise UserError('UploadedFile.file must be an openai.types.FileObject')
return item.file


def _map_uploaded_file(uploaded_file: UploadedFile, provider: Provider[Any]) -> FileObject:
"""Map an UploadedFile to a File object."""
url = URL(provider.base_url)

if url.host == 'api.openai.com':
return _map_openai_uploaded_file(uploaded_file)
else:
raise UserError(f'UploadedFile is not supported for `{provider.name}` with base_url {provider.base_url}.')
Binary file added tests/assets/smiley.pdf
Binary file not shown.
70 changes: 70 additions & 0 deletions tests/models/cassettes/test_google/test_uploaded_file_input.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
interactions:
- request:
headers:
accept:
- '*/*'
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '280'
content-type:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
parsed_body:
contents:
- parts:
- fileData:
fileUri: https://generativelanguage.googleapis.com/v1beta/files/6myu0b1v3mxl
mimeType: application/pdf
role: user
- parts:
- text: Give me a short description of this image
role: user
generationConfig: {}
uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
response:
headers:
alt-svc:
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
content-length:
- '881'
content-type:
- application/json; charset=UTF-8
server-timing:
- gfet4t7; dur=5652
transfer-encoding:
- chunked
vary:
- Origin
- X-Origin
- Referer
parsed_body:
candidates:
- content:
parts:
- text: The image displays a classic smiley face. It features a bright yellow circular face with two simple black
dot eyes and an upward-curved black line forming a smile. The yellow circle has a subtle darker yellow outline
and is set against a plain white background.
role: model
finishReason: STOP
index: 0
modelVersion: gemini-2.5-flash
responseId: T7OkaOv-JOemmtkP5IXU2QI
usageMetadata:
candidatesTokenCount: 51
promptTokenCount: 268
promptTokensDetails:
- modality: TEXT
tokenCount: 10
- modality: DOCUMENT
tokenCount: 258
thoughtsTokenCount: 678
totalTokenCount: 997
status:
code: 200
message: OK
version: 1
85 changes: 85 additions & 0 deletions tests/models/cassettes/test_openai/test_uploaded_file_input.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
interactions:
- request:
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '206'
content-type:
- application/json
host:
- api.openai.com
method: POST
parsed_body:
messages:
- content:
- text: Give me a short description of this image
type: text
- file:
file_id: file-7yEHnJNSSBeUYfkLq6G8KG
type: file
role: user
model: gpt-4o
stream: false
uri: https://api.openai.com/v1/chat/completions
response:
headers:
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
connection:
- keep-alive
content-length:
- '974'
content-type:
- application/json
openai-organization:
- coplane
openai-processing-ms:
- '4261'
openai-project:
- proj_KGkpeAYM2vPXvZOVtXfnuZ9r
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
parsed_body:
choices:
- finish_reason: stop
index: 0
logprobs: null
message:
annotations: []
content: The image is a simple design of a classic yellow smiley face. It features a bright yellow circle with two
black dots for eyes and a curved black line for a smiling mouth.
refusal: null
role: assistant
created: 1755630898
id: chatcmpl-C6M5KUA0T23RWuMTuAopnIc4ygeJb
model: gpt-4o-2024-08-06
object: chat.completion
service_tier: default
system_fingerprint: fp_80956533cb
usage:
completion_tokens: 36
completion_tokens_details:
accepted_prediction_tokens: 0
audio_tokens: 0
reasoning_tokens: 0
rejected_prediction_tokens: 0
prompt_tokens: 312
prompt_tokens_details:
audio_tokens: 0
cached_tokens: 0
total_tokens: 348
status:
code: 200
message: OK
version: 1
Loading
Loading