Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,14 @@ response = await router.create(input=messages, model="gemini-3-flash-preview")
response = await router.create(input=messages, model="claude-sonnet-4-5-20250929")
```

Count input tokens before making a request using each provider's native token counting endpoint:

```python
token_count = await router.count_tokens(input=messages, model="gpt-5.2")
token_count = await router.count_tokens(input=messages, model="gemini-3-flash-preview")
token_count = await router.count_tokens(input=messages, model="claude-sonnet-4-5-20250929")
```

### InteropRouter Design Philosophy

The only goal of InteropRouter is to interoperate between the most common AI providers. To make this goal achievable, we make several trade-offs:
Expand Down
10 changes: 5 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
[project]
name = "interop-router"
version = "0.1.5"
version = "0.1.6"
description = "Interoperate between AI providers using the OpenAI Responses API as a common interface."
readme = "README.md"
authors = [
{ name = "David Koleczek", email = "45405824+DavidKoleczek@users.noreply.github.com" }
]
requires-python = ">=3.11"
dependencies = [
"anthropic[aiohttp]>=0.78,<1.0",
"google-genai[aiohttp]>=1.62,<2.0",
"openai[aiohttp]>=2.17,<3.0",
"anthropic[aiohttp]>=0.79,<1.0",
"google-genai[aiohttp]>=1.63,<2.0",
"openai[aiohttp]>=2.20,<3.0",
"pydantic>=2.12,<3.0",
]

Expand All @@ -25,7 +25,7 @@ dev = [
"pytest>=9.0",
"pytest-asyncio>=1.3",
"ruff>=0.15",
"ty>=0.0.15",
"ty>=0.0.17",
]

[tool.ruff]
Expand Down
28 changes: 28 additions & 0 deletions src/interop_router/anthropic_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,34 @@ async def create(
interop_response.duration_seconds = duration_seconds
return interop_response

@staticmethod
async def count_tokens(
*,
client: AsyncAnthropic,
input: list[ChatMessage],
model: SupportedModelAnthropic,
instructions: str | None = None,
reasoning: Reasoning | None = None,
tools: Iterable[ToolParam] | None = None,
) -> int:
preprocessed_input, system_instruction = AnthropicProvider._preprocess_input(input)
anthropic_messages = AnthropicProvider._convert_input_messages(preprocessed_input)
config, extra_headers = AnthropicProvider._create_config(
model, reasoning=reasoning, tools=tools, system_instruction=system_instruction
)
count_kwargs: dict[str, Any] = {
"model": model,
"messages": anthropic_messages,
"system": config["system"],
"thinking": config["thinking"],
}
if config["tools"]:
count_kwargs["tools"] = config["tools"]
if extra_headers:
count_kwargs["extra_headers"] = extra_headers
result = await client.messages.count_tokens(**count_kwargs)
return result.input_tokens

@staticmethod
def _preprocess_input(input: list[ChatMessage]) -> tuple[list[ChatMessage], str]:
preprocessed_messages = []
Expand Down
33 changes: 31 additions & 2 deletions src/interop_router/gemini_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,13 @@ async def create(
) -> RouterResponse:
preprocessed_input, system_instruction = GeminiProvider._preprocess_input(input)
gemini_messages = GeminiProvider._convert_input_messages(preprocessed_input)
combined_instructions = "\n".join(filter(None, [system_instruction, instructions]))

# We can have kwargs specific to Gemini in the last message's provider_kwargs
gemini_kwargs = input[-1].provider_kwargs.get("gemini", {}) if input else {}
gemini_config, effective_model = GeminiProvider._create_config(
model=model,
system_instruction=system_instruction,
system_instruction=combined_instructions,
include=include,
max_output_tokens=max_output_tokens,
reasoning=reasoning,
Expand All @@ -85,6 +86,34 @@ async def create(
interop_response.duration_seconds = duration_seconds
return interop_response

@staticmethod
async def count_tokens(
*,
client: genai.Client,
input: list[ChatMessage],
model: SupportedModelGemini,
instructions: str | None = None,
reasoning: Reasoning | None = None,
tools: Iterable[ToolParam] | None = None,
) -> int:
preprocessed_input, system_instruction = GeminiProvider._preprocess_input(input)
gemini_messages = GeminiProvider._convert_input_messages(preprocessed_input)

# The Gemini Developer API count_tokens endpoint does not support system_instruction
# or tools in the config (only Vertex AI does). To get an accurate count, we prepend
# system instructions as a Content entry in the contents list.
combined_instructions = "\n".join(filter(None, [system_instruction, instructions]))
count_contents: list[Content] = []
if combined_instructions:
count_contents.append(Content(parts=[types.Part(text=combined_instructions)], role="user"))
count_contents.extend(gemini_messages)

result = await client.aio.models.count_tokens(
model=model,
contents=count_contents,
)
return result.total_tokens or 0

@staticmethod
def _preprocess_input(input: list[ChatMessage]) -> tuple[list[ChatMessage], str]:
"""Removes non-Gemini reasoning messages and
Expand Down Expand Up @@ -313,7 +342,7 @@ def _create_config(
(tool for tool in (tools or []) if tool.get("type") == "image_generation"),
None,
)
effective_model = image_gen_tool.get("model") if image_gen_tool else model
effective_model = image_gen_tool.get("model", model) if image_gen_tool else model

gemini_tools: types.ToolListUnion | None = None
tool_config: types.ToolConfig | None = None
Expand Down
20 changes: 20 additions & 0 deletions src/interop_router/openai_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,26 @@ async def create(
duration_seconds=duration_seconds,
)

@staticmethod
async def count_tokens(
*,
client: AsyncOpenAI,
input: list[ChatMessage],
model: SupportedModelOpenAI,
instructions: str | None = None,
reasoning: Reasoning | None = None,
tools: Iterable[ToolParam] | None = None,
) -> int:
input_messages = OpenAIProvider._prepare_input_messages(input)
response = await client.responses.input_tokens.count(
model=model,
input=input_messages,
instructions=instructions if instructions is not None else omit,
reasoning=reasoning if reasoning is not None else omit,
tools=tools if tools is not None else omit,
)
return response.input_tokens

@staticmethod
def _prepare_input_messages(messages: list[ChatMessage]) -> list[ResponseInputItemParam]:
"""Filters out reasoning from other providers, expands Gemini web search
Expand Down
68 changes: 68 additions & 0 deletions src/interop_router/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,71 @@ async def create(
)

raise ValueError(f"Unknown model: {model}")

async def count_tokens(
self,
*,
input: list[ChatMessage],
model: SupportedModel,
instructions: str | None = None,
reasoning: Reasoning | None = None,
tools: Iterable[ToolParam] | None = None,
) -> int:
"""Count input tokens for the given messages and configuration.

Uses the provider's native token counting endpoint.

Args:
input: List of chat messages.
model: The model to use for token counting.
instructions: Optional system instructions.
reasoning: Optional reasoning configuration.
tools: Optional list of tools.

Returns:
Token count estimate for the input.

Raises:
ValueError: If no client is registered for the required provider or if the
provider does not support token counting.
"""
if model in get_args(SupportedModelOpenAI):
client = self._clients.get("openai")
if client is None:
raise ValueError("No client registered for provider: openai")
return await OpenAIProvider.count_tokens(
client=client,
input=input,
model=cast(SupportedModelOpenAI, model),
instructions=instructions,
reasoning=reasoning,
tools=tools,
)

if model in get_args(SupportedModelGemini):
client = self._clients.get("gemini")
if client is None:
raise ValueError("No client registered for provider: gemini")
return await GeminiProvider.count_tokens(
client=client,
input=input,
model=cast(SupportedModelGemini, model),
instructions=instructions,
reasoning=reasoning,
tools=tools,
)

if model in get_args(SupportedModelAnthropic):
client = self._clients.get("anthropic")
if client is None:
raise ValueError("No client registered for provider: anthropic")
return await AnthropicProvider.count_tokens(
client=client,
input=input,
model=cast(SupportedModelAnthropic, model),
instructions=instructions,
reasoning=reasoning,
tools=tools,
)

raise ValueError(f"Unknown model: {model}")
11 changes: 11 additions & 0 deletions src/interop_router/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,3 +185,14 @@ async def create(
truncation: Literal["auto", "disabled"] | None = None,
background: bool | None = None,
) -> RouterResponse: ...

async def count_tokens(
self,
*,
client: Any,
input: list[ChatMessage],
model: SupportedModel,
instructions: str | None = None,
reasoning: Reasoning | None = None,
tools: Iterable[ToolParam] | None = None,
) -> int: ...
Loading