diff --git a/docs/my-website/docs/providers/tars.md b/docs/my-website/docs/providers/tars.md
new file mode 100644
index 000000000000..00041b7cf4e8
--- /dev/null
+++ b/docs/my-website/docs/providers/tars.md
@@ -0,0 +1,656 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# TARS (Tetrate Agent Router Service)
+
+https://router.tetrate.ai
+
+TARS is an AI Gateway-as-a-Service from Tetrate that provides intelligent routing for GenAI applications. It's OpenAI-compatible and routes to multiple LLM providers.
+
+## API Key
+
+```python
+# env variable
+os.environ['TARS_API_KEY']
+```
+
+## Quick Start
+
+```python showLineNumbers
+import litellm
+import os
+
+# Set your TARS API key.
+os.environ["TARS_API_KEY"] = "your-tars-api-key"
+
+# Chat Completions.
+response = litellm.completion(
+ model="tars/claude-haiku-4-5",
+ messages=[{"role": "user", "content": "Hello, how are you?"}]
+)
+print(response.choices[0].message.content)
+
+# Vision (Image Analysis).
+response = litellm.completion(
+ model="tars/gpt-4o",
+ messages=[
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": "What do you see?"},
+ {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}}
+ ]
+ }
+ ]
+)
+print(response.choices[0].message.content)
+
+# Embeddings.
+response = litellm.embedding(
+ model="tars/text-embedding-3-small",
+ input=["Hello world"]
+)
+print(response.data[0].embedding)
+```
+
+## Features
+
+TARS supports:
+
+- ✅ Chat Completions
+- ✅ Embeddings
+- ✅ Vision (Multi-modal image analysis)
+- ✅ Streaming
+- ✅ Async calls
+- ✅ Function/Tool calling
+
+## API Configuration
+
+### Required Environment Variables
+
+```bash
+export TARS_API_KEY="your-tars-api-key"
+```
+
+### Optional Configuration
+
+```bash
+# Override the default API base URL.
+export TARS_API_BASE="https://api.router.tetrate.ai/v1"
+```
+
+## Supported Models
+
+TARS provides access to models from multiple providers including:
+
+### Recommended Models
+
+**Claude Haiku 4.5** (`tars/claude-haiku-4-5`) - Fast, cost-effective model with vision support. Great for most use cases.
+
+**Claude Sonnet 4.5** (`tars/claude-sonnet-4-5`) - Balanced performance and cost for complex tasks.
+
+**GPT-4o** (`tars/gpt-4o`) - OpenAI's flagship multimodal model with strong vision capabilities.
+
+### Vision Models
+
+- OpenAI (GPT-4o, GPT-4o-mini, GPT-4.1, GPT-5, etc.)
+- Anthropic (Claude 4.5, Claude 4.5 Haiku, Claude 4, Claude 3.7 Sonnet, etc.)
+- xAI (Grok 4, Grok 3, etc.)
+- Google (Gemini 2.5 Pro, Gemini 2.0 Flash, etc.)
+- DeepSeek, Qwen, and many more
+
+### Chat Models
+
+- OpenAI (GPT-4o, GPT-4o-mini, GPT-4.1, GPT-5, O1, O3, etc.)
+- Anthropic (Claude 4.5, Claude 4.5 Haiku, Claude 4, Claude 3.7 Sonnet, Claude 3.5 Haiku, etc.)
+- xAI (Grok 4, Grok 3, etc.)
+- Google (Gemini 2.5 Pro, Gemini 2.0 Flash, etc.)
+- DeepSeek, Qwen, and many more
+
+### Embedding Models
+
+- OpenAI (text-embedding-3-small, text-embedding-3-large, text-embedding-ada-002)
+- Custom embedding models from various providers
+
+To see the full list of available models, visit: https://api.router.tetrate.ai/v1/models
+
+## Usage Examples
+
+### Chat Completions
+
+```python showLineNumbers title="LiteLLM python sdk usage - Non-streaming"
+import litellm
+import os
+
+os.environ["TARS_API_KEY"] = "your-tars-api-key"
+
+response = litellm.completion(
+ model="tars/claude-haiku-4-5",
+ messages=[
+ {"role": "system", "content": "You are a helpful assistant."},
+ {"role": "user", "content": "Explain quantum computing"}
+ ],
+ temperature=0.7,
+ max_tokens=500
+)
+print(response.choices[0].message.content)
+```
+
+### Streaming
+
+```python showLineNumbers title="LiteLLM python sdk usage - Streaming"
+import litellm
+import os
+
+os.environ["TARS_API_KEY"] = "your-tars-api-key"
+
+response = litellm.completion(
+ model="tars/gpt-4o",
+ messages=[{"role": "user", "content": "Write a short poem"}],
+ stream=True
+)
+
+for chunk in response:
+ if chunk.choices[0].delta.content:
+ print(chunk.choices[0].delta.content, end="")
+```
+
+### Async Chat Completions
+
+```python showLineNumbers title="LiteLLM python sdk usage - Async"
+import litellm
+import asyncio
+import os
+
+os.environ["TARS_API_KEY"] = "your-tars-api-key"
+
+async def test_async():
+ response = await litellm.acompletion(
+ model="tars/claude-haiku-4-5",
+ messages=[{"role": "user", "content": "Hello!"}]
+ )
+ print(response.choices[0].message.content)
+
+asyncio.run(test_async())
+```
+
+### Function/Tool Calling
+
+```python showLineNumbers title="LiteLLM python sdk usage - Function calling"
+import litellm
+import os
+
+os.environ["TARS_API_KEY"] = "your-tars-api-key"
+
+tools = [
+ {
+ "type": "function",
+ "function": {
+ "name": "get_weather",
+ "description": "Get current weather",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {"type": "string"}
+ },
+ "required": ["location"]
+ }
+ }
+ }
+]
+
+response = litellm.completion(
+ model="tars/gpt-4o",
+ messages=[{"role": "user", "content": "What's the weather in SF?"}],
+ tools=tools
+)
+print(response.choices[0].message.tool_calls)
+```
+
+### Vision (Multi-modal)
+
+```python showLineNumbers title="LiteLLM python sdk usage - Vision"
+import litellm
+import os
+
+os.environ["TARS_API_KEY"] = "your-tars-api-key"
+
+# Vision with image URL
+response = litellm.completion(
+ model="tars/gpt-4o",
+ messages=[
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "What do you see in this image?"
+ },
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+ }
+ }
+ ]
+ }
+ ],
+ temperature=0.7,
+ max_tokens=150
+)
+
+print(response.choices[0].message.content)
+```
+
+### Vision with Base64 Image
+
+```python showLineNumbers title="LiteLLM python sdk usage - Vision with Base64"
+import litellm
+import os
+
+os.environ["TARS_API_KEY"] = "your-tars-api-key"
+
+# Vision with base64 encoded image
+response = litellm.completion(
+ model="tars/claude-haiku-4-5",
+ messages=[
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "Describe this image."
+ },
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": ""
+ }
+ }
+ ]
+ }
+ ]
+)
+
+print(response.choices[0].message.content)
+```
+
+### Vision Function Calling
+
+```python showLineNumbers title="LiteLLM python sdk usage - Vision with Function Calling"
+import litellm
+import os
+
+os.environ["TARS_API_KEY"] = "your-tars-api-key"
+
+tools = [
+ {
+ "type": "function",
+ "function": {
+ "name": "analyze_image",
+ "description": "Analyze image content",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "objects_detected": {"type": "array", "items": {"type": "string"}},
+ "scene_type": {"type": "string"}
+ }
+ }
+ }
+ }
+]
+
+response = litellm.completion(
+ model="tars/gpt-4o",
+ messages=[
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "Analyze this image and call the analyze_image function."
+ },
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": "https://example.com/image.jpg"
+ }
+ }
+ ]
+ }
+ ],
+ tools=tools
+)
+
+if response.choices[0].message.tool_calls:
+ tool_call = response.choices[0].message.tool_calls[0]
+ print(f"Tool called: {tool_call.function.name}")
+ print(f"Arguments: {tool_call.function.arguments}")
+```
+
+### Streaming Vision
+
+```python showLineNumbers title="LiteLLM python sdk usage - Streaming Vision"
+import litellm
+import os
+
+os.environ["TARS_API_KEY"] = "your-tars-api-key"
+
+response = litellm.completion(
+ model="tars/gpt-4o",
+ messages=[
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "Describe this image in detail."
+ },
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": "https://example.com/image.jpg"
+ }
+ }
+ ]
+ }
+ ],
+ stream=True
+)
+
+for chunk in response:
+ if chunk.choices[0].delta.content:
+ print(chunk.choices[0].delta.content, end="")
+```
+
+### Async Vision
+
+```python showLineNumbers title="LiteLLM python sdk usage - Async Vision"
+import litellm
+import asyncio
+import os
+
+os.environ["TARS_API_KEY"] = "your-tars-api-key"
+
+async def test_vision():
+ response = await litellm.acompletion(
+ model="tars/claude-haiku-4-5",
+ messages=[
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "What do you see in this image?"
+ },
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": "https://example.com/image.jpg"
+ }
+ }
+ ]
+ }
+ ]
+ )
+ print(response.choices[0].message.content)
+
+asyncio.run(test_vision())
+```
+
+### Embeddings
+
+```python showLineNumbers title="LiteLLM python sdk usage - Embeddings"
+import litellm
+import os
+
+os.environ["TARS_API_KEY"] = "your-tars-api-key"
+
+response = litellm.embedding(
+ model="tars/text-embedding-3-large",
+ input=["Hello world", "Goodbye world"]
+)
+
+for embedding in response.data:
+ print(f"Embedding {embedding.index}: {len(embedding.embedding)} dimensions")
+```
+
+## Platform Features
+
+TARS provides intelligent routing and platform features that enable:
+
+- Intelligent routing and load balancing
+- Automatic fallback to alternative models
+- Cost optimization features
+- Performance monitoring
+- Unified API access
+
+For detailed pricing and platform information, see: https://router.tetrate.ai/models
+
+## Getting Your API Key
+
+1. Sign up at https://router.tetrate.ai
+2. Get $5 free credit with a business email
+3. Generate your API key from the dashboard
+4. Set the `TARS_API_KEY` environment variable
+
+## Usage with LiteLLM Proxy Server
+
+Here's how to call TARS models with the LiteLLM Proxy Server
+
+### 1. Save key in your environment
+
+```bash
+export TARS_API_KEY="your-tars-api-key"
+```
+
+### 2. Start the proxy
+
+
+
+
+```yaml
+model_list:
+ - model_name: claude-haiku
+ litellm_params:
+ model: tars/claude-haiku-4-5
+ api_key: os.environ/TARS_API_KEY
+
+ - model_name: claude-sonnet
+ litellm_params:
+ model: tars/claude-sonnet-4-5
+ api_key: os.environ/TARS_API_KEY
+
+ - model_name: gpt-4o
+ litellm_params:
+ model: tars/gpt-4o
+ api_key: os.environ/TARS_API_KEY
+
+ - model_name: embeddings
+ litellm_params:
+ model: tars/text-embedding-3-large
+ api_key: os.environ/TARS_API_KEY
+```
+
+```bash
+litellm --config /path/to/config.yaml
+```
+
+
+
+
+```bash
+$ litellm --model tars/claude-haiku-4-5
+
+# Server running on http://0.0.0.0:4000
+```
+
+
+
+
+### 3. Test it
+
+
+
+
+```shell
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+--header 'Content-Type: application/json' \
+--data ' {
+ "model": "gpt-4o",
+ "messages": [
+ {
+ "role": "user",
+ "content": "what llm are you"
+ }
+ ]
+ }
+'
+```
+
+
+
+
+```python
+import openai
+client = openai.OpenAI(
+ api_key="anything",
+ base_url="http://0.0.0.0:4000"
+)
+
+response = client.chat.completions.create(
+ model="gpt-4o",
+ messages=[
+ {
+ "role": "user",
+ "content": "this is a test request, write a short poem"
+ }
+ ]
+)
+
+print(response)
+```
+
+
+
+
+```python
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts.chat import (
+ ChatPromptTemplate,
+ HumanMessagePromptTemplate,
+ SystemMessagePromptTemplate,
+)
+from langchain.schema import HumanMessage, SystemMessage
+
+chat = ChatOpenAI(
+ openai_api_base="http://0.0.0.0:4000",
+ model="gpt-4o",
+ temperature=0.1
+)
+
+messages = [
+ SystemMessage(
+ content="You are a helpful assistant that im using to make a test request to."
+ ),
+ HumanMessage(
+ content="test from litellm. tell me why it's amazing in 1 sentence"
+ ),
+]
+response = chat(messages)
+
+print(response)
+```
+
+
+
+
+## Advanced Features
+
+### Cost Tracking
+
+LiteLLM automatically tracks costs for TARS models with a 5% margin added to the base model costs. This margin accounts for TARS routing and platform overhead.
+
+**Note:** Cost tracking is only available for models with pricing information in LiteLLM's model catalog. If a model doesn't have pricing information, no cost will be displayed.
+
+```python showLineNumbers
+import litellm
+import os
+
+os.environ["TARS_API_KEY"] = "your-tars-api-key"
+
+response = litellm.completion(
+ model="tars/gpt-4o",
+ messages=[{"role": "user", "content": "Hello!"}]
+)
+
+# Cost is automatically calculated with 5% margin.
+print(f"Response cost: ${response._hidden_params.get('response_cost', 0):.6f}")
+```
+
+### Cost Optimization
+
+TARS automatically routes requests to optimize for cost while maintaining performance:
+
+```python showLineNumbers
+import litellm
+import os
+
+os.environ["TARS_API_KEY"] = "your-tars-api-key"
+
+# TARS can automatically switch to cheaper models.
+response = litellm.completion(
+ model="tars/gpt-4o-mini", # Use cost-effective model.
+ messages=[{"role": "user", "content": "Simple question"}]
+)
+```
+
+### Automatic Fallback
+
+TARS provides automatic fallback to alternative models when primary models are unavailable:
+
+```python showLineNumbers
+import litellm
+import os
+
+os.environ["TARS_API_KEY"] = "your-tars-api-key"
+
+# If the specified model is down, TARS routes to alternatives.
+response = litellm.completion(
+ model="tars/claude-haiku-4-5",
+ messages=[{"role": "user", "content": "Hello"}],
+ # TARS handles fallback automatically.
+)
+```
+
+## Support
+
+- Dashboard: https://router.tetrate.ai
+- Documentation: https://docs.tetrate.io
+- Support: Contact through the TARS dashboard
+
+## Troubleshooting
+
+### Authentication Errors
+
+If you get authentication errors:
+
+1. Verify your API key is set correctly: `echo $TARS_API_KEY`
+2. Check your key hasn't expired in the dashboard
+3. Ensure you have sufficient credits
+
+### Rate Limits
+
+TARS respects the rate limits of underlying providers. If you hit rate limits:
+
+1. Check your usage in the dashboard
+2. Consider upgrading to Enterprise plan for higher limits
+3. Implement exponential backoff in your code
+
+### Model Not Found
+
+If a model isn't available:
+
+1. Check the latest model list: https://router.tetrate.ai/models
+2. Verify the model ID is correct (e.g., `tars/claude-sonnet-4-20250514`)
+3. Some models may require specific account permissions
diff --git a/litellm/__init__.py b/litellm/__init__.py
index d1ecc3c76783..3470bbb5017a 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -528,6 +528,7 @@ def identify(event_details):
ovhcloud_models: Set = set()
ovhcloud_embedding_models: Set = set()
lemonade_models: Set = set()
+tars_models: Set = set()
def is_bedrock_pricing_only_model(key: str) -> bool:
@@ -752,6 +753,8 @@ def add_known_models():
ovhcloud_embedding_models.add(key)
elif value.get("litellm_provider") == "lemonade":
lemonade_models.add(key)
+ elif value.get("litellm_provider") == "tars":
+ tars_models.add(key)
add_known_models()
@@ -854,6 +857,7 @@ def add_known_models():
| ovhcloud_models
| lemonade_models
| set(clarifai_models)
+ | tars_models
)
model_list_set = set(model_list)
@@ -940,6 +944,7 @@ def add_known_models():
"ovhcloud": ovhcloud_models | ovhcloud_embedding_models,
"lemonade": lemonade_models,
"clarifai": clarifai_models,
+ "tars": tars_models,
}
# mapping for those models which have larger equivalents
@@ -1284,6 +1289,7 @@ def add_known_models():
from .llms.watsonx.embed.transformation import IBMWatsonXEmbeddingConfig
from .llms.github_copilot.chat.transformation import GithubCopilotConfig
from .llms.nebius.chat.transformation import NebiusConfig
+from .llms.tars.chat.transformation import TarsConfig
from .llms.wandb.chat.transformation import WandbConfig
from .llms.dashscope.chat.transformation import DashScopeChatConfig
from .llms.moonshot.chat.transformation import MoonshotChatConfig
diff --git a/litellm/constants.py b/litellm/constants.py
index 8553ca6ced66..9f7d935634c6 100644
--- a/litellm/constants.py
+++ b/litellm/constants.py
@@ -361,7 +361,8 @@
"vercel_ai_gateway",
"wandb",
"ovhcloud",
- "lemonade"
+ "lemonade",
+ "tars"
]
LITELLM_EMBEDDING_PROVIDERS_SUPPORTING_INPUT_ARRAY_OF_TOKENS = [
@@ -547,6 +548,7 @@
"wandb",
"cometapi",
"clarifai",
+ "tars",
]
openai_text_completion_compatible_providers: List = (
[ # providers that support `/v1/completions`
diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
index 37c76e5584bb..4604e75af008 100644
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@@ -375,6 +375,11 @@ def cost_per_token( # noqa: PLR0915
cost_per_token as dashscope_cost_per_token,
)
return dashscope_cost_per_token(model=model, usage=usage_block)
+ elif custom_llm_provider == "tars":
+ from litellm.llms.tars.cost_calculator import (
+ cost_per_token as tars_cost_per_token,
+ )
+ return tars_cost_per_token(model=model, usage=usage_block)
else:
model_info = _cached_get_model_info_helper(
model=model, custom_llm_provider=custom_llm_provider
diff --git a/litellm/litellm_core_utils/get_llm_provider_logic.py b/litellm/litellm_core_utils/get_llm_provider_logic.py
index 275f2a63a1ba..5c06d14a5657 100644
--- a/litellm/litellm_core_utils/get_llm_provider_logic.py
+++ b/litellm/litellm_core_utils/get_llm_provider_logic.py
@@ -803,6 +803,14 @@ def _get_openai_compatible_provider_info( # noqa: PLR0915
) = litellm.ClarifaiConfig()._get_openai_compatible_provider_info(
api_base, api_key
)
+ elif custom_llm_provider == "tars":
+ # TARS (Tetrate Agent Router Service) is OpenAI compatible
+ api_base = (
+ api_base
+ or get_secret_str("TARS_API_BASE")
+ or "https://api.router.tetrate.ai/v1"
+ ) # type: ignore
+ dynamic_api_key = api_key or get_secret_str("TARS_API_KEY")
if api_base is not None and not isinstance(api_base, str):
raise Exception("api base needs to be a string. api_base={}".format(api_base))
diff --git a/litellm/llms/tars/chat/transformation.py b/litellm/llms/tars/chat/transformation.py
new file mode 100644
index 000000000000..f54d835464e1
--- /dev/null
+++ b/litellm/llms/tars/chat/transformation.py
@@ -0,0 +1,79 @@
+"""
+Support for OpenAI's `/v1/chat/completions` endpoint.
+
+TARS (Tetrate Agent Router Service) is OpenAI-compatible.
+
+Docs: https://router.tetrate.ai
+API: https://api.router.tetrate.ai/v1
+"""
+
+from typing import Optional, Union
+
+import httpx
+
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
+
+from ..common_utils import TarsException, TarsModelInfo
+
+
+class TarsConfig(OpenAIGPTConfig, TarsModelInfo):
+ """
+ Configuration for TARS (Tetrate Agent Router Service).
+
+ TARS is OpenAI-compatible and routes to multiple LLM providers.
+ Supports dynamic model fetching from the TARS API.
+ """
+
+ def get_error_class(
+ self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
+ ) -> BaseLLMException:
+ return TarsException(
+ message=error_message,
+ status_code=status_code,
+ headers=headers,
+ )
+
+ def get_complete_url(
+ self,
+ api_base: Optional[str],
+ api_key: Optional[str],
+ model: str,
+ optional_params: dict,
+ litellm_params: dict,
+ stream: Optional[bool] = None,
+ ) -> str:
+ if not api_base:
+ api_base = "https://api.router.tetrate.ai/v1"
+
+ endpoint = "chat/completions"
+ api_base = api_base.rstrip("/")
+
+ if endpoint in api_base:
+ result = api_base
+ else:
+ result = f"{api_base}/{endpoint}"
+
+ return result
+
+ def get_models(self, api_key: Optional[str] = None, api_base: Optional[str] = None):
+ """
+ Override OpenAIGPTConfig.get_models() to use TARS API instead of OpenAI API.
+ """
+ # Use TarsModelInfo.get_models() method instead of OpenAIGPTConfig.get_models()
+ return TarsModelInfo.get_models(self, api_key=api_key, api_base=api_base)
+
+ @staticmethod
+ def get_api_base(api_base: Optional[str] = None) -> str:
+ """
+ Override OpenAIGPTConfig.get_api_base() to use TARS API base instead of OpenAI API base.
+ """
+ return TarsModelInfo.get_api_base(api_base)
+
+ @staticmethod
+ def get_api_key(api_key: Optional[str] = None) -> Optional[str]:
+ """
+ Override OpenAIGPTConfig.get_api_key() to use TARS API key instead of OpenAI API key.
+ """
+ return TarsModelInfo.get_api_key(api_key)
+
diff --git a/litellm/llms/tars/common_utils.py b/litellm/llms/tars/common_utils.py
new file mode 100644
index 000000000000..e4d323f7e24e
--- /dev/null
+++ b/litellm/llms/tars/common_utils.py
@@ -0,0 +1,111 @@
+"""
+TARS (Tetrate Agent Router Service) common utilities and model info.
+"""
+
+from typing import List, Optional
+
+import httpx
+
+from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+from litellm.secret_managers.main import get_secret_str
+
+
+class TarsException(BaseLLMException):
+ """Exception class for TARS provider errors."""
+ pass
+
+
+class TarsModelInfo(BaseLLMModelInfo):
+ """
+ Model info for TARS (Tetrate Agent Router Service) provider.
+
+ Supports dynamic model fetching from the TARS API.
+ """
+
+ @staticmethod
+ def get_api_key(api_key: Optional[str] = None) -> Optional[str]:
+ """Get TARS API key from parameter or environment variable."""
+ return api_key or get_secret_str("TARS_API_KEY")
+
+ @staticmethod
+ def get_api_base(api_base: Optional[str] = None) -> str:
+ """Get TARS API base URL from parameter or environment variable."""
+ return api_base or get_secret_str("TARS_API_BASE") or "https://api.router.tetrate.ai/v1"
+
+ @staticmethod
+ def get_base_model(model: str) -> Optional[str]:
+ """Remove tars/ prefix from model name."""
+ return model.replace("tars/", "")
+
+ def get_models(
+ self, api_key: Optional[str] = None, api_base: Optional[str] = None
+ ) -> List[str]:
+ """
+ Fetch available models from TARS API.
+
+ Args:
+ api_key: TARS API key (optional, will use TARS_API_KEY env var if not provided)
+ api_base: TARS API base URL (optional, defaults to https://api.router.tetrate.ai/v1)
+
+ Returns:
+ List of model names prefixed with "tars/"
+ """
+ api_base = self.get_api_base(api_base)
+ api_key = self.get_api_key(api_key)
+
+ if api_key is None:
+ raise ValueError(
+ "TARS_API_KEY is not set. Please set the environment variable to query TARS's /models endpoint."
+ )
+
+ try:
+ # Use a fresh httpx client to avoid any global configuration issues
+ url = f"{api_base}/models"
+ with httpx.Client() as client:
+ response = client.get(
+ url=url,
+ headers={"Authorization": f"Bearer {api_key}"},
+ timeout=10.0
+ )
+ response.raise_for_status()
+ except httpx.HTTPStatusError as e:
+ raise ValueError(
+ f"Failed to fetch models from TARS. Status code: {e.response.status_code}, Response: {e.response.text}"
+ )
+ except Exception as e:
+ raise ValueError(f"Failed to fetch models from TARS. Error: {e}")
+
+ models_data = response.json().get("data", [])
+
+ # Extract model IDs and prefix with "tars/"
+ litellm_model_names = []
+ for model in models_data:
+ if isinstance(model, dict) and "id" in model:
+ model_id = model["id"]
+ litellm_model_name = f"tars/{model_id}"
+ litellm_model_names.append(litellm_model_name)
+
+ return sorted(litellm_model_names)
+
+ def validate_environment(
+ self,
+ headers: dict,
+ model: str,
+ messages: list,
+ optional_params: dict,
+ litellm_params: dict,
+ api_key: Optional[str] = None,
+ api_base: Optional[str] = None,
+ ) -> dict:
+ """Validate TARS environment and add authentication headers."""
+ api_key = self.get_api_key(api_key)
+ api_base = self.get_api_base(api_base)
+
+ if api_key is None:
+ raise ValueError(
+ "TARS_API_KEY is not set. Please set the environment variable."
+ )
+
+ headers["Authorization"] = f"Bearer {api_key}"
+ return headers
\ No newline at end of file
diff --git a/litellm/llms/tars/cost_calculator.py b/litellm/llms/tars/cost_calculator.py
new file mode 100644
index 000000000000..d88ba8cb48ef
--- /dev/null
+++ b/litellm/llms/tars/cost_calculator.py
@@ -0,0 +1,55 @@
+"""
+Helper util for handling TARS-specific cost calculation.
+- Uses the generic cost calculator which already handles tiered pricing correctly.
+- Adds a 5% margin to the base model costs.
+- Returns (0.0, 0.0) when no pricing is available.
+"""
+
+from typing import Tuple
+
+from litellm.types.utils import Usage
+from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_token
+from litellm.utils import get_model_info
+
+
+def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
+ """
+ Calculates the cost per token for a given TARS model with a 5% margin.
+ Uses the generic cost calculator for all pricing logic.
+
+ Input:
+ - model: str, the model name without provider prefix.
+ - usage: LiteLLM Usage block, containing usage information.
+
+ Returns:
+ Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd.
+ Returns (0.0, 0.0) if no pricing is available.
+ """
+ try:
+ # Check if pricing is available for this model.
+ model_info = get_model_info(model=model, custom_llm_provider="tars")
+
+ # If no pricing is available, return (0.0, 0.0).
+ if not model_info or (
+ model_info.get("input_cost_per_token", 0) == 0 and
+ model_info.get("output_cost_per_token", 0) == 0
+ ):
+ return (0.0, 0.0)
+
+ # Calculate base cost using generic calculator.
+ prompt_cost, completion_cost = generic_cost_per_token(
+ model=model,
+ usage=usage,
+ custom_llm_provider="tars"
+ )
+
+ # Add 5% margin to both costs.
+ margin_multiplier = 1.05
+ prompt_cost_with_margin = prompt_cost * margin_multiplier
+ completion_cost_with_margin = completion_cost * margin_multiplier
+
+ return prompt_cost_with_margin, completion_cost_with_margin
+ except Exception:
+ # If any error occurs (e.g., model not found), return (0.0, 0.0).
+ return (0.0, 0.0)
+
diff --git a/litellm/llms/tars/embedding/transformation.py b/litellm/llms/tars/embedding/transformation.py
new file mode 100644
index 000000000000..9cb3e4460dc7
--- /dev/null
+++ b/litellm/llms/tars/embedding/transformation.py
@@ -0,0 +1,52 @@
+"""
+Support for OpenAI's `/v1/embeddings` endpoint.
+
+TARS (Tetrate Agent Router Service) is OpenAI-compatible for embeddings.
+
+Docs: https://router.tetrate.ai
+API: https://api.router.tetrate.ai/v1
+"""
+
+from typing import Optional, Union
+
+import httpx
+
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+from litellm.llms.openai.embedding.transformation import OpenAIEmbeddingConfig
+from litellm.secret_managers.main import get_secret_str
+
+from ..common_utils import TarsException
+
+
+class TarsEmbeddingConfig(OpenAIEmbeddingConfig):
+ """
+ Configuration for TARS embeddings.
+
+ TARS supports embeddings through OpenAI-compatible API.
+ """
+
+ def get_error_class(
+ self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
+ ) -> BaseLLMException:
+ return TarsException(
+ message=error_message,
+ status_code=status_code,
+ headers=headers,
+ )
+
+ @staticmethod
+ def get_api_base(api_base: Optional[str] = None) -> str:
+ """
+ Get TARS API base URL from parameter or environment variable.
+ Override to use TARS-specific defaults instead of OpenAI defaults.
+ """
+ return api_base or get_secret_str("TARS_API_BASE") or "https://api.router.tetrate.ai/v1"
+
+ @staticmethod
+ def get_api_key(api_key: Optional[str] = None) -> Optional[str]:
+ """
+ Get TARS API key from parameter or environment variable.
+ Override to use TARS-specific API key instead of OpenAI key.
+ """
+ return api_key or get_secret_str("TARS_API_KEY")
+
diff --git a/litellm/main.py b/litellm/main.py
index 8e3f9a0b3d8f..e335939ef68d 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -2013,6 +2013,49 @@ def completion( # type: ignore # noqa: PLR0915
provider_config=provider_config,
)
+ ## LOGGING
+ logging.post_call(
+ input=messages, api_key=api_key, original_response=response
+ )
+ elif custom_llm_provider == "tars":
+ api_base = (
+ api_base
+ or litellm.api_base
+ or get_secret_str("TARS_API_BASE")
+ or "https://api.router.tetrate.ai/v1"
+ )
+
+ api_key = (
+ api_key
+ or litellm.api_key
+ or get_secret("TARS_API_KEY")
+ )
+
+ ## Load Config
+ config = litellm.TarsConfig.get_config()
+ for k, v in config.items():
+ if k not in optional_params:
+ optional_params[k] = v
+
+ ## COMPLETION CALL
+ response = base_llm_http_handler.completion(
+ model=model,
+ stream=stream,
+ messages=messages,
+ acompletion=acompletion,
+ api_base=api_base,
+ model_response=model_response,
+ optional_params=optional_params,
+ litellm_params=litellm_params,
+ shared_session=shared_session,
+ custom_llm_provider="tars",
+ timeout=timeout,
+ headers=headers,
+ encoding=encoding,
+ api_key=api_key,
+ logging_obj=logging,
+ client=client,
+ )
## LOGGING
logging.post_call(
input=messages, api_key=api_key, original_response=response
@@ -4494,6 +4537,27 @@ def embedding( # noqa: PLR0915
or "api.studio.nebius.ai/v1"
)
+ response = openai_chat_completions.embedding(
+ model=model,
+ input=input,
+ api_base=api_base,
+ api_key=api_key,
+ logging_obj=logging,
+ timeout=timeout,
+ model_response=EmbeddingResponse(),
+ optional_params=optional_params,
+ client=client,
+ aembedding=aembedding,
+ )
+ elif custom_llm_provider == "tars":
+ api_key = api_key or litellm.api_key or get_secret("TARS_API_KEY")
+ api_base = (
+ api_base
+ or litellm.api_base
+ or get_secret_str("TARS_API_BASE")
+ or "https://api.router.tetrate.ai/v1"
+ )
+
response = openai_chat_completions.embedding(
model=model,
input=input,
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 4804ae1de742..cf0f93d3b0cc 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -23832,5 +23832,1894 @@
"supports_tool_choice": true,
"supports_vision": true,
"supports_web_search": true
+ },
+ "tars/claude-haiku-4-5": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000001,
+ "output_cost_per_token": 0.000005,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 64000,
+ "max_tokens": 64000
+ },
+ "tars/claude-haiku-4-5-20251001": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000001,
+ "output_cost_per_token": 0.000005,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 64000,
+ "max_tokens": 64000
+ },
+ "tars/gpt-4o-2024-11-20": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000025,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 0.00000125,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384
+ },
+ "tars/deepinfra/deepseek-ai/DeepSeek-R1-0528-Turbo": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000001,
+ "output_cost_per_token": 0.000003,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/claude-sonnet-4-5-20250929": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "cache_read_input_token_cost": 3e-7,
+ "cache_write_input_token_cost": 0.00000375,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 64000,
+ "max_tokens": 64000
+ },
+ "tars/claude-sonnet-4-5": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "cache_read_input_token_cost": 3e-7,
+ "cache_write_input_token_cost": 0.00000375,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 64000,
+ "max_tokens": 64000
+ },
+ "tars/claude-sonnet-4-20250514": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "cache_read_input_token_cost": 3e-7,
+ "cache_write_input_token_cost": 0.00000375,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 64000,
+ "max_tokens": 64000
+ },
+ "tars/claude-sonnet-4-0": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "cache_read_input_token_cost": 3e-7,
+ "cache_write_input_token_cost": 0.00000375,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 64000,
+ "max_tokens": 64000
+ },
+ "tars/claude-opus-4-20250514": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000015,
+ "output_cost_per_token": 0.000075,
+ "cache_read_input_token_cost": 0.0000015,
+ "cache_write_input_token_cost": 0.00001875,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 32000,
+ "max_tokens": 32000
+ },
+ "tars/claude-opus-4-1-20250805": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000015,
+ "output_cost_per_token": 0.000075,
+ "cache_read_input_token_cost": 0.0000015,
+ "cache_write_input_token_cost": 0.00001875,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 32000,
+ "max_tokens": 32000
+ },
+ "tars/claude-opus-4-1": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000015,
+ "output_cost_per_token": 0.000075,
+ "cache_read_input_token_cost": 0.0000015,
+ "cache_write_input_token_cost": 0.00001875,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 32000,
+ "max_tokens": 32000
+ },
+ "tars/claude-opus-4-0": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000015,
+ "output_cost_per_token": 0.000075,
+ "cache_read_input_token_cost": 0.0000015,
+ "cache_write_input_token_cost": 0.00001875,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 32000,
+ "max_tokens": 32000
+ },
+ "tars/claude-3-haiku-20240307": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.5e-7,
+ "output_cost_per_token": 0.00000125,
+ "cache_read_input_token_cost": 3e-8,
+ "cache_write_input_token_cost": 3e-7,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/claude-3-7-sonnet-20250219": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "cache_read_input_token_cost": 3e-7,
+ "cache_write_input_token_cost": 0.00000375,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 64000,
+ "max_tokens": 64000
+ },
+ "tars/claude-3-5-haiku-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 8e-7,
+ "output_cost_per_token": 0.000004,
+ "cache_read_input_token_cost": 8e-8,
+ "cache_write_input_token_cost": 0.000001,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192
+ },
+ "tars/claude-3-5-haiku-20241022": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 8e-7,
+ "output_cost_per_token": 0.000004,
+ "cache_read_input_token_cost": 8e-8,
+ "cache_write_input_token_cost": 0.000001,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192
+ },
+ "tars/xai/grok-code-fast-1-0825": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 0.0000015,
+ "max_input_tokens": 256000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-code-fast-1": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 0.0000015,
+ "max_input_tokens": 256000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-code-fast": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 0.0000015,
+ "max_input_tokens": 256000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-4-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 256000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-4-fast-reasoning-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 5e-7,
+ "cache_read_input_token_cost": 5e-8,
+ "cache_write_input_token_cost": 5e-8,
+ "max_input_tokens": 2000000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-4-fast-reasoning": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 5e-7,
+ "cache_read_input_token_cost": 5e-8,
+ "cache_write_input_token_cost": 5e-8,
+ "max_input_tokens": 2000000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-4-fast-non-reasoning-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 5e-7,
+ "max_input_tokens": 2000000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-4-fast-non-reasoning": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 5e-7,
+ "max_input_tokens": 2000000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-4-fast": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 5e-7,
+ "cache_read_input_token_cost": 5e-8,
+ "cache_write_input_token_cost": 5e-8,
+ "max_input_tokens": 2000000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-4-0709": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 256000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-4": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 256000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-mini-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-mini-fast-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-mini-fast-beta": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-mini-fast": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-mini-beta": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-mini": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-fast-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-fast-beta": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-fast": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-beta": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-2-vision-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000002,
+ "output_cost_per_token": 0.00001,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-2-vision-1212": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000002,
+ "output_cost_per_token": 0.00001,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-2-vision": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000002,
+ "output_cost_per_token": 0.00001,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/zai-org/GLM-4.6": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 6e-7,
+ "output_cost_per_token": 0.000002,
+ "max_input_tokens": 202752,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/zai-org/GLM-4.5-Air": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 0.0000011,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/zai-org/GLM-4.5": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3.8e-7,
+ "output_cost_per_token": 0.0000016,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/thenlper/gte-large": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-8,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/thenlper/gte-base": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/shibing624/text2vec-base-chinese": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/sentence-transformers/paraphrase-MiniLM-L6-v2": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/sentence-transformers/multi-qa-mpnet-base-dot-v1": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/sentence-transformers/clip-ViT-B-32-multilingual-v1": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/sentence-transformers/clip-ViT-B-32": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 77,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/sentence-transformers/all-mpnet-base-v2": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/sentence-transformers/all-MiniLM-L6-v2": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/sentence-transformers/all-MiniLM-L12-v2": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Sao10K/L3.3-70B-Euryale-v2.3": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 6.5e-7,
+ "output_cost_per_token": 7.5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Sao10K/L3.1-70B-Euryale-v2.2": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 6.5e-7,
+ "output_cost_per_token": 7.5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Sao10K/L3-8B-Lunaris-v1-Turbo": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4e-8,
+ "output_cost_per_token": 5e-8,
+ "max_input_tokens": 8192,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/QwQ-32B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 4e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-Next-80B-A3B-Thinking": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.4e-7,
+ "output_cost_per_token": 0.0000014,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-Next-80B-A3B-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.4e-7,
+ "output_cost_per_token": 0.0000014,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-Embedding-8B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-8,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-Embedding-4B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-Embedding-0.6B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-9,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.9e-7,
+ "output_cost_per_token": 0.0000012,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4e-7,
+ "output_cost_per_token": 0.0000016,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-32B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "output_cost_per_token": 2.8e-7,
+ "max_input_tokens": 40960,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-30B-A3B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 8e-8,
+ "output_cost_per_token": 2.9e-7,
+ "max_input_tokens": 40960,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-235B-A22B-Thinking-2507": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 0.0000029,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-235B-A22B-Instruct-2507": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 9e-8,
+ "output_cost_per_token": 6e-7,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-235B-A22B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.8e-7,
+ "output_cost_per_token": 5.4e-7,
+ "max_input_tokens": 40960,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-14B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 6e-8,
+ "output_cost_per_token": 2.4e-7,
+ "max_input_tokens": 40960,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen2.5-VL-32B-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 6e-7,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen2.5-72B-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.2e-7,
+ "output_cost_per_token": 3.9e-7,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/openai/gpt-oss-20b": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-8,
+ "output_cost_per_token": 1.4e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/openai/gpt-oss-120b-Turbo": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 6e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/openai/gpt-oss-120b": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-8,
+ "output_cost_per_token": 4.5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/nvidia/NVIDIA-Nemotron-Nano-9B-v2": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4e-8,
+ "output_cost_per_token": 1.6e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "output_cost_per_token": 4e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/nvidia/Llama-3.1-Nemotron-70B-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 6e-7,
+ "output_cost_per_token": 6e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/NousResearch/Hermes-3-Llama-3.1-70B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 3e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/NousResearch/Hermes-3-Llama-3.1-405B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000001,
+ "output_cost_per_token": 0.000001,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/moonshotai/Kimi-K2-Instruct-0905": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-7,
+ "output_cost_per_token": 0.000002,
+ "cache_read_input_token_cost": 4e-7,
+ "cache_write_input_token_cost": 4e-7,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/moonshotai/Kimi-K2-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-7,
+ "output_cost_per_token": 0.000002,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/mistralai/Mixtral-8x7B-Instruct-v0.1": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4e-7,
+ "output_cost_per_token": 4e-7,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/mistralai/Mistral-Small-3.2-24B-Instruct-2506": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 7.5e-8,
+ "output_cost_per_token": 2e-7,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/mistralai/Mistral-Small-24B-Instruct-2501": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-8,
+ "output_cost_per_token": 8e-8,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/microsoft/WizardLM-2-8x22B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4.8e-7,
+ "output_cost_per_token": 4.8e-7,
+ "max_input_tokens": 65536,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/microsoft/phi-4": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 7e-8,
+ "output_cost_per_token": 1.4e-7,
+ "max_input_tokens": 16384,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-8,
+ "output_cost_per_token": 3e-8,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-8,
+ "output_cost_per_token": 5e-8,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4e-7,
+ "output_cost_per_token": 4e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4e-7,
+ "output_cost_per_token": 4e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Meta-Llama-3-8B-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-8,
+ "output_cost_per_token": 6e-8,
+ "max_input_tokens": 8192,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Llama-Guard-4-12B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.8e-7,
+ "output_cost_per_token": 1.8e-7,
+ "max_input_tokens": 163840,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Llama-Guard-3-8B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5.5e-8,
+ "output_cost_per_token": 5.5e-8,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Llama-4-Scout-17B-16E-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 8e-8,
+ "output_cost_per_token": 3e-7,
+ "max_input_tokens": 327680,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 6e-7,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Llama-3.3-70B-Instruct-Turbo": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.3e-7,
+ "output_cost_per_token": 3.9e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Llama-3.3-70B-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.3e-7,
+ "output_cost_per_token": 4e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Llama-3.2-3B-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-8,
+ "output_cost_per_token": 2e-8,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Llama-3.2-11B-Vision-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4.9e-8,
+ "output_cost_per_token": 4.9e-8,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Gryphe/MythoMax-L2-13b": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-8,
+ "output_cost_per_token": 9e-8,
+ "max_input_tokens": 4096,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/google/gemma-3-4b-it": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4e-8,
+ "output_cost_per_token": 8e-8,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/google/gemma-3-27b-it": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 9e-8,
+ "output_cost_per_token": 1.6e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/google/gemma-3-12b-it": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-8,
+ "output_cost_per_token": 1e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/google/gemini-2.5-pro": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00000125,
+ "output_cost_per_token": 0.00001,
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/google/gemini-2.5-flash": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 0.0000025,
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/google/gemini-2.0-flash-001": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "output_cost_per_token": 4e-7,
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/google/embeddinggemma-300m": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-9,
+ "max_input_tokens": 2048,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/deepseek-ai/DeepSeek-V3.2-Exp": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.7e-7,
+ "output_cost_per_token": 4e-7,
+ "max_input_tokens": 163840,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/deepseek-ai/DeepSeek-V3.1-Terminus": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.7e-7,
+ "output_cost_per_token": 0.000001,
+ "max_input_tokens": 163840,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/deepseek-ai/DeepSeek-V3.1": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.7e-7,
+ "output_cost_per_token": 0.000001,
+ "max_input_tokens": 163840,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/deepseek-ai/DeepSeek-V3-0324": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.5e-7,
+ "output_cost_per_token": 8.8e-7,
+ "max_input_tokens": 163840,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/deepseek-ai/DeepSeek-V3": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3.8e-7,
+ "output_cost_per_token": 8.9e-7,
+ "max_input_tokens": 163840,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/deepseek-ai/DeepSeek-R1-Turbo": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000001,
+ "output_cost_per_token": 0.000003,
+ "max_input_tokens": 40960,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 6e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/deepseek-ai/DeepSeek-R1-0528": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-7,
+ "output_cost_per_token": 0.00000215,
+ "max_input_tokens": 163840,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/deepseek-ai/DeepSeek-R1": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 7e-7,
+ "output_cost_per_token": 0.0000024,
+ "max_input_tokens": 163840,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/BAAI/bge-m3-multi": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-8,
+ "max_input_tokens": 8192,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/BAAI/bge-m3": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-8,
+ "max_input_tokens": 8192,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/BAAI/bge-large-en-v1.5": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-8,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/BAAI/bge-en-icl": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-8,
+ "max_input_tokens": 8192,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/BAAI/bge-base-en-v1.5": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/anthropic/claude-4-sonnet": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000033,
+ "output_cost_per_token": 0.0000165,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/anthropic/claude-4-opus": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000165,
+ "output_cost_per_token": 0.0000825,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/anthropic/claude-3-7-sonnet-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000033,
+ "output_cost_per_token": 0.0000165,
+ "cache_write_input_token_cost": 3.3e-7,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/allenai/olmOCR-7B-0825": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.7e-7,
+ "output_cost_per_token": 0.0000015,
+ "max_input_tokens": 16384,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/groq/qwen/qwen3-32b": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.9e-7,
+ "output_cost_per_token": 5.9e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 40960,
+ "max_tokens": 40960
+ },
+ "tars/groq/openai/gpt-oss-20b": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "output_cost_per_token": 5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 65536,
+ "max_tokens": 65536
+ },
+ "tars/groq/openai/gpt-oss-120b": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 7.5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 65536,
+ "max_tokens": 65536
+ },
+ "tars/groq/moonshotai/kimi-k2-instruct-0905": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000001,
+ "output_cost_per_token": 0.000003,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384
+ },
+ "tars/groq/meta-llama/llama-prompt-guard-2-86m": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4e-8,
+ "output_cost_per_token": 4e-8,
+ "max_input_tokens": 512,
+ "max_output_tokens": 512,
+ "max_tokens": 512
+ },
+ "tars/groq/meta-llama/llama-prompt-guard-2-22m": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-8,
+ "output_cost_per_token": 3e-8,
+ "max_input_tokens": 512,
+ "max_output_tokens": 512,
+ "max_tokens": 512
+ },
+ "tars/groq/meta-llama/llama-guard-4-12b": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 2e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 1024,
+ "max_tokens": 1024
+ },
+ "tars/groq/meta-llama/llama-4-scout-17b-16e-instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.1e-7,
+ "output_cost_per_token": 3.4e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192
+ },
+ "tars/groq/meta-llama/llama-4-maverick-17b-128e-instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 6e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192
+ },
+ "tars/groq/llama-3.3-70b-versatile": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5.9e-7,
+ "output_cost_per_token": 7.9e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768
+ },
+ "tars/groq/llama-3.1-8b-instant": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-8,
+ "output_cost_per_token": 8e-8,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "max_tokens": 131072
+ },
+ "tars/gemini-2.0-flash": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "output_cost_per_token": 4e-7,
+ "cache_read_input_token_cost": 2.5e-8,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192
+ },
+ "tars/deepinfra/intfloat/multilingual-e5-large-instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/intfloat/multilingual-e5-large": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-8,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/intfloat/e5-large-v2": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-8,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/intfloat/e5-base-v2": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gemini-2.0-flash-001": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "output_cost_per_token": 4e-7,
+ "cache_read_input_token_cost": 2.5e-8,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192
+ },
+ "tars/gemini-2.0-flash-exp": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192
+ },
+ "tars/gemini-2.0-flash-lite": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 7.5e-8,
+ "output_cost_per_token": 3e-7,
+ "cache_read_input_token_cost": 1.9e-8,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192
+ },
+ "tars/gemini-2.0-flash-lite-preview-02-05": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 7.5e-8,
+ "output_cost_per_token": 3e-7,
+ "cache_read_input_token_cost": 1.9e-8,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192
+ },
+ "tars/gemini-2.0-flash-thinking-exp": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65536,
+ "max_tokens": 65536
+ },
+ "tars/gemini-2.0-flash-thinking-exp-01-21": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65536,
+ "max_tokens": 65536
+ },
+ "tars/gemini-2.5-flash": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 0.0000025,
+ "cache_read_input_token_cost": 7.5e-8,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65535,
+ "max_tokens": 65535
+ },
+ "tars/gemini-2.5-flash-lite": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "output_cost_per_token": 4e-7,
+ "cache_read_input_token_cost": 2.5e-8,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65535,
+ "max_tokens": 65535
+ },
+ "tars/gemini-2.5-flash-lite-preview-06-17": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "output_cost_per_token": 4e-7,
+ "cache_read_input_token_cost": 2.5e-8,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65535,
+ "max_tokens": 65535
+ },
+ "tars/gemini-2.5-flash-preview-05-20": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 0.0000025,
+ "cache_read_input_token_cost": 7.5e-8,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65535,
+ "max_tokens": 65535
+ },
+ "tars/gemini-2.5-pro": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00000125,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 3.13e-7,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65535,
+ "max_tokens": 65535
+ },
+ "tars/gemini-2.5-pro-preview-03-25": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00000125,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 3.13e-7,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65535,
+ "max_tokens": 65535
+ },
+ "tars/gemini-2.5-pro-preview-05-06": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00000125,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 3.13e-7,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65535,
+ "max_tokens": 65535
+ },
+ "tars/gemini-2.5-pro-preview-06-05": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00000125,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 3.13e-7,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65535,
+ "max_tokens": 65535
+ },
+ "tars/chatgpt-4o-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000005,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-3.5-turbo": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000015,
+ "output_cost_per_token": 0.000002,
+ "max_input_tokens": 16385,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-3.5-turbo-0125": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-7,
+ "output_cost_per_token": 0.0000015,
+ "max_input_tokens": 16385,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-3.5-turbo-1106": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000001,
+ "output_cost_per_token": 0.000002,
+ "max_input_tokens": 16385,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-3.5-turbo-16k": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000004,
+ "max_input_tokens": 16385,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-4": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00003,
+ "output_cost_per_token": 0.00006,
+ "max_input_tokens": 8192,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-4-0125-preview": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00001,
+ "output_cost_per_token": 0.00003,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-4-0613": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00003,
+ "output_cost_per_token": 0.00006,
+ "max_input_tokens": 8192,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-4-1106-preview": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00001,
+ "output_cost_per_token": 0.00003,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-4-turbo": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00001,
+ "output_cost_per_token": 0.00003,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-4-turbo-2024-04-09": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00001,
+ "output_cost_per_token": 0.00003,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-4-turbo-preview": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00001,
+ "output_cost_per_token": 0.00003,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-4.1": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000002,
+ "output_cost_per_token": 0.000008,
+ "cache_read_input_token_cost": 5e-7,
+ "max_input_tokens": 1047576,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768
+ },
+ "tars/gpt-4.1-2025-04-14": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000002,
+ "output_cost_per_token": 0.000008,
+ "cache_read_input_token_cost": 5e-7,
+ "max_input_tokens": 1047576,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768
+ },
+ "tars/gpt-4.1-mini": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4e-7,
+ "output_cost_per_token": 0.0000016,
+ "cache_read_input_token_cost": 1e-7,
+ "max_input_tokens": 1047576,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768
+ },
+ "tars/gpt-4.1-mini-2025-04-14": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4e-7,
+ "output_cost_per_token": 0.0000016,
+ "cache_read_input_token_cost": 1e-7,
+ "max_input_tokens": 1047576,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768
+ },
+ "tars/gpt-4.1-nano": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "output_cost_per_token": 4e-7,
+ "cache_read_input_token_cost": 2.5e-8,
+ "max_input_tokens": 1047576,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768
+ },
+ "tars/gpt-4.1-nano-2025-04-14": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "output_cost_per_token": 4e-7,
+ "cache_read_input_token_cost": 2.5e-8,
+ "max_input_tokens": 1047576,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768
+ },
+ "tars/gpt-4o": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000025,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 0.00000125,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384
+ },
+ "tars/gpt-4o-2024-05-13": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000005,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-4o-2024-08-06": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000025,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 0.00000125,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384
+ },
+ "tars/gpt-4o-mini": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 6e-7,
+ "cache_read_input_token_cost": 7.5e-8,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384
+ },
+ "tars/gpt-4o-mini-2024-07-18": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 6e-7,
+ "cache_read_input_token_cost": 7.5e-8,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384
+ },
+ "tars/gpt-4o-mini-search-preview": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 6e-7,
+ "cache_read_input_token_cost": 7.5e-8,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384
+ },
+ "tars/gpt-4o-mini-search-preview-2025-03-11": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 6e-7,
+ "cache_read_input_token_cost": 7.5e-8,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384
+ },
+ "tars/gpt-4o-search-preview": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000025,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 0.00000125,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384
+ },
+ "tars/gpt-4o-search-preview-2025-03-11": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000025,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 0.00000125,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384
+ },
+ "tars/gpt-5": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00000125,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 1.25e-7,
+ "max_input_tokens": 400000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000
+ },
+ "tars/gpt-5-2025-08-07": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00000125,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 1.25e-7,
+ "max_input_tokens": 400000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000
+ },
+ "tars/gpt-5-chat-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00000125,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 1.25e-7,
+ "max_input_tokens": 400000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000
+ },
+ "tars/gpt-5-mini": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.5e-7,
+ "output_cost_per_token": 0.000002,
+ "cache_read_input_token_cost": 2.5e-8,
+ "max_input_tokens": 400000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000
+ },
+ "tars/gpt-5-mini-2025-08-07": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.5e-7,
+ "output_cost_per_token": 0.000002,
+ "cache_read_input_token_cost": 2.5e-8,
+ "max_input_tokens": 400000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000
+ },
+ "tars/gpt-5-nano": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-8,
+ "output_cost_per_token": 4e-7,
+ "cache_read_input_token_cost": 5e-9,
+ "max_input_tokens": 400000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000
+ },
+ "tars/gpt-5-nano-2025-08-07": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-8,
+ "output_cost_per_token": 4e-7,
+ "cache_read_input_token_cost": 5e-9,
+ "max_input_tokens": 400000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000
+ },
+ "tars/o1": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000015,
+ "output_cost_per_token": 0.00006,
+ "cache_read_input_token_cost": 0.0000075,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 100000,
+ "max_tokens": 100000
+ },
+ "tars/o1-2024-12-17": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000015,
+ "output_cost_per_token": 0.00006,
+ "cache_read_input_token_cost": 0.0000075,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 100000,
+ "max_tokens": 100000
+ },
+ "tars/o3": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000002,
+ "output_cost_per_token": 0.000008,
+ "cache_read_input_token_cost": 5e-7,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 100000,
+ "max_tokens": 100000
+ },
+ "tars/o3-2025-04-16": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000002,
+ "output_cost_per_token": 0.000008,
+ "cache_read_input_token_cost": 5e-7,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 100000,
+ "max_tokens": 100000
+ },
+ "tars/o3-mini": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000011,
+ "output_cost_per_token": 0.0000044,
+ "cache_read_input_token_cost": 5.5e-7,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 100000,
+ "max_tokens": 100000
+ },
+ "tars/o4-mini": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000011,
+ "output_cost_per_token": 0.0000044,
+ "cache_read_input_token_cost": 2.75e-7,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 100000,
+ "max_tokens": 100000
+ },
+ "tars/o4-mini-2025-04-16": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000011,
+ "output_cost_per_token": 0.0000044,
+ "cache_read_input_token_cost": 2.75e-7,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 100000,
+ "max_tokens": 100000
+ },
+ "tars/text-embedding-3-large": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.3e-7,
+ "max_input_tokens": 8191,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/text-embedding-3-small": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-8,
+ "max_input_tokens": 8191,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/text-embedding-ada-002": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "max_input_tokens": 8191,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/claude-3-7-sonnet-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "cache_read_input_token_cost": 3e-7,
+ "cache_write_input_token_cost": 0.00000375,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 64000,
+ "max_tokens": 64000
+ },
+ "tars/deepinfra/mistralai/Mistral-Nemo-Instruct-2407": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-8,
+ "output_cost_per_token": 4e-8,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/o3-mini-2025-01-31": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000011,
+ "output_cost_per_token": 0.0000044,
+ "cache_read_input_token_cost": 5.5e-7,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 100000,
+ "max_tokens": 100000
}
}
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index c87bf75c8c68..46df7b019b61 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -2504,6 +2504,7 @@ class LlmProviders(str, Enum):
COMPACTIFAI = "compactifai"
CUSTOM = "custom"
LITELLM_PROXY = "litellm_proxy"
+ TARS = "tars"
HOSTED_VLLM = "hosted_vllm"
LLAMAFILE = "llamafile"
LM_STUDIO = "lm_studio"
diff --git a/litellm/utils.py b/litellm/utils.py
index f1e56954fe1a..01467fbf4b65 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -7103,6 +7103,8 @@ def get_provider_chat_config( # noqa: PLR0915
return litellm.NovitaConfig()
elif litellm.LlmProviders.NEBIUS == provider:
return litellm.NebiusConfig()
+ elif litellm.LlmProviders.TARS == provider:
+ return litellm.TarsConfig()
elif litellm.LlmProviders.WANDB == provider:
return litellm.WandbConfig()
elif litellm.LlmProviders.DASHSCOPE == provider:
@@ -7373,6 +7375,8 @@ def get_provider_model_info(
return litellm.LemonadeChatConfig()
elif LlmProviders.CLARIFAI == provider:
return litellm.ClarifaiConfig()
+ elif LlmProviders.TARS == provider:
+ return litellm.TarsConfig()
return None
@staticmethod
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 4804ae1de742..cf0f93d3b0cc 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -23832,5 +23832,1894 @@
"supports_tool_choice": true,
"supports_vision": true,
"supports_web_search": true
+ },
+ "tars/claude-haiku-4-5": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000001,
+ "output_cost_per_token": 0.000005,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 64000,
+ "max_tokens": 64000
+ },
+ "tars/claude-haiku-4-5-20251001": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000001,
+ "output_cost_per_token": 0.000005,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 64000,
+ "max_tokens": 64000
+ },
+ "tars/gpt-4o-2024-11-20": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000025,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 0.00000125,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384
+ },
+ "tars/deepinfra/deepseek-ai/DeepSeek-R1-0528-Turbo": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000001,
+ "output_cost_per_token": 0.000003,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/claude-sonnet-4-5-20250929": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "cache_read_input_token_cost": 3e-7,
+ "cache_write_input_token_cost": 0.00000375,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 64000,
+ "max_tokens": 64000
+ },
+ "tars/claude-sonnet-4-5": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "cache_read_input_token_cost": 3e-7,
+ "cache_write_input_token_cost": 0.00000375,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 64000,
+ "max_tokens": 64000
+ },
+ "tars/claude-sonnet-4-20250514": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "cache_read_input_token_cost": 3e-7,
+ "cache_write_input_token_cost": 0.00000375,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 64000,
+ "max_tokens": 64000
+ },
+ "tars/claude-sonnet-4-0": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "cache_read_input_token_cost": 3e-7,
+ "cache_write_input_token_cost": 0.00000375,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 64000,
+ "max_tokens": 64000
+ },
+ "tars/claude-opus-4-20250514": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000015,
+ "output_cost_per_token": 0.000075,
+ "cache_read_input_token_cost": 0.0000015,
+ "cache_write_input_token_cost": 0.00001875,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 32000,
+ "max_tokens": 32000
+ },
+ "tars/claude-opus-4-1-20250805": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000015,
+ "output_cost_per_token": 0.000075,
+ "cache_read_input_token_cost": 0.0000015,
+ "cache_write_input_token_cost": 0.00001875,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 32000,
+ "max_tokens": 32000
+ },
+ "tars/claude-opus-4-1": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000015,
+ "output_cost_per_token": 0.000075,
+ "cache_read_input_token_cost": 0.0000015,
+ "cache_write_input_token_cost": 0.00001875,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 32000,
+ "max_tokens": 32000
+ },
+ "tars/claude-opus-4-0": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000015,
+ "output_cost_per_token": 0.000075,
+ "cache_read_input_token_cost": 0.0000015,
+ "cache_write_input_token_cost": 0.00001875,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 32000,
+ "max_tokens": 32000
+ },
+ "tars/claude-3-haiku-20240307": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.5e-7,
+ "output_cost_per_token": 0.00000125,
+ "cache_read_input_token_cost": 3e-8,
+ "cache_write_input_token_cost": 3e-7,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/claude-3-7-sonnet-20250219": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "cache_read_input_token_cost": 3e-7,
+ "cache_write_input_token_cost": 0.00000375,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 64000,
+ "max_tokens": 64000
+ },
+ "tars/claude-3-5-haiku-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 8e-7,
+ "output_cost_per_token": 0.000004,
+ "cache_read_input_token_cost": 8e-8,
+ "cache_write_input_token_cost": 0.000001,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192
+ },
+ "tars/claude-3-5-haiku-20241022": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 8e-7,
+ "output_cost_per_token": 0.000004,
+ "cache_read_input_token_cost": 8e-8,
+ "cache_write_input_token_cost": 0.000001,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192
+ },
+ "tars/xai/grok-code-fast-1-0825": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 0.0000015,
+ "max_input_tokens": 256000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-code-fast-1": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 0.0000015,
+ "max_input_tokens": 256000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-code-fast": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 0.0000015,
+ "max_input_tokens": 256000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-4-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 256000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-4-fast-reasoning-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 5e-7,
+ "cache_read_input_token_cost": 5e-8,
+ "cache_write_input_token_cost": 5e-8,
+ "max_input_tokens": 2000000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-4-fast-reasoning": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 5e-7,
+ "cache_read_input_token_cost": 5e-8,
+ "cache_write_input_token_cost": 5e-8,
+ "max_input_tokens": 2000000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-4-fast-non-reasoning-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 5e-7,
+ "max_input_tokens": 2000000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-4-fast-non-reasoning": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 5e-7,
+ "max_input_tokens": 2000000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-4-fast": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 5e-7,
+ "cache_read_input_token_cost": 5e-8,
+ "cache_write_input_token_cost": 5e-8,
+ "max_input_tokens": 2000000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-4-0709": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 256000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-4": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 256000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-mini-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-mini-fast-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-mini-fast-beta": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-mini-fast": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-mini-beta": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-mini": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-fast-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-fast-beta": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-fast": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3-beta": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-3": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-2-vision-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000002,
+ "output_cost_per_token": 0.00001,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-2-vision-1212": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000002,
+ "output_cost_per_token": 0.00001,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/xai/grok-2-vision": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000002,
+ "output_cost_per_token": 0.00001,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/zai-org/GLM-4.6": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 6e-7,
+ "output_cost_per_token": 0.000002,
+ "max_input_tokens": 202752,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/zai-org/GLM-4.5-Air": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 0.0000011,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/zai-org/GLM-4.5": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3.8e-7,
+ "output_cost_per_token": 0.0000016,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/thenlper/gte-large": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-8,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/thenlper/gte-base": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/shibing624/text2vec-base-chinese": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/sentence-transformers/paraphrase-MiniLM-L6-v2": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/sentence-transformers/multi-qa-mpnet-base-dot-v1": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/sentence-transformers/clip-ViT-B-32-multilingual-v1": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/sentence-transformers/clip-ViT-B-32": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 77,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/sentence-transformers/all-mpnet-base-v2": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/sentence-transformers/all-MiniLM-L6-v2": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/sentence-transformers/all-MiniLM-L12-v2": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Sao10K/L3.3-70B-Euryale-v2.3": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 6.5e-7,
+ "output_cost_per_token": 7.5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Sao10K/L3.1-70B-Euryale-v2.2": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 6.5e-7,
+ "output_cost_per_token": 7.5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Sao10K/L3-8B-Lunaris-v1-Turbo": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4e-8,
+ "output_cost_per_token": 5e-8,
+ "max_input_tokens": 8192,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/QwQ-32B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 4e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-Next-80B-A3B-Thinking": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.4e-7,
+ "output_cost_per_token": 0.0000014,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-Next-80B-A3B-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.4e-7,
+ "output_cost_per_token": 0.0000014,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-Embedding-8B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-8,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-Embedding-4B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-Embedding-0.6B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-9,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.9e-7,
+ "output_cost_per_token": 0.0000012,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4e-7,
+ "output_cost_per_token": 0.0000016,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-32B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "output_cost_per_token": 2.8e-7,
+ "max_input_tokens": 40960,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-30B-A3B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 8e-8,
+ "output_cost_per_token": 2.9e-7,
+ "max_input_tokens": 40960,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-235B-A22B-Thinking-2507": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 0.0000029,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-235B-A22B-Instruct-2507": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 9e-8,
+ "output_cost_per_token": 6e-7,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-235B-A22B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.8e-7,
+ "output_cost_per_token": 5.4e-7,
+ "max_input_tokens": 40960,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen3-14B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 6e-8,
+ "output_cost_per_token": 2.4e-7,
+ "max_input_tokens": 40960,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen2.5-VL-32B-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 6e-7,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Qwen/Qwen2.5-72B-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.2e-7,
+ "output_cost_per_token": 3.9e-7,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/openai/gpt-oss-20b": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-8,
+ "output_cost_per_token": 1.4e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/openai/gpt-oss-120b-Turbo": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 6e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/openai/gpt-oss-120b": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-8,
+ "output_cost_per_token": 4.5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/nvidia/NVIDIA-Nemotron-Nano-9B-v2": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4e-8,
+ "output_cost_per_token": 1.6e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "output_cost_per_token": 4e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/nvidia/Llama-3.1-Nemotron-70B-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 6e-7,
+ "output_cost_per_token": 6e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/NousResearch/Hermes-3-Llama-3.1-70B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 3e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/NousResearch/Hermes-3-Llama-3.1-405B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000001,
+ "output_cost_per_token": 0.000001,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/moonshotai/Kimi-K2-Instruct-0905": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-7,
+ "output_cost_per_token": 0.000002,
+ "cache_read_input_token_cost": 4e-7,
+ "cache_write_input_token_cost": 4e-7,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/moonshotai/Kimi-K2-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-7,
+ "output_cost_per_token": 0.000002,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/mistralai/Mixtral-8x7B-Instruct-v0.1": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4e-7,
+ "output_cost_per_token": 4e-7,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/mistralai/Mistral-Small-3.2-24B-Instruct-2506": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 7.5e-8,
+ "output_cost_per_token": 2e-7,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/mistralai/Mistral-Small-24B-Instruct-2501": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-8,
+ "output_cost_per_token": 8e-8,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/microsoft/WizardLM-2-8x22B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4.8e-7,
+ "output_cost_per_token": 4.8e-7,
+ "max_input_tokens": 65536,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/microsoft/phi-4": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 7e-8,
+ "output_cost_per_token": 1.4e-7,
+ "max_input_tokens": 16384,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-8,
+ "output_cost_per_token": 3e-8,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-8,
+ "output_cost_per_token": 5e-8,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4e-7,
+ "output_cost_per_token": 4e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4e-7,
+ "output_cost_per_token": 4e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Meta-Llama-3-8B-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-8,
+ "output_cost_per_token": 6e-8,
+ "max_input_tokens": 8192,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Llama-Guard-4-12B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.8e-7,
+ "output_cost_per_token": 1.8e-7,
+ "max_input_tokens": 163840,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Llama-Guard-3-8B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5.5e-8,
+ "output_cost_per_token": 5.5e-8,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Llama-4-Scout-17B-16E-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 8e-8,
+ "output_cost_per_token": 3e-7,
+ "max_input_tokens": 327680,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 6e-7,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Llama-3.3-70B-Instruct-Turbo": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.3e-7,
+ "output_cost_per_token": 3.9e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Llama-3.3-70B-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.3e-7,
+ "output_cost_per_token": 4e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Llama-3.2-3B-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-8,
+ "output_cost_per_token": 2e-8,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/meta-llama/Llama-3.2-11B-Vision-Instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4.9e-8,
+ "output_cost_per_token": 4.9e-8,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/Gryphe/MythoMax-L2-13b": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-8,
+ "output_cost_per_token": 9e-8,
+ "max_input_tokens": 4096,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/google/gemma-3-4b-it": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4e-8,
+ "output_cost_per_token": 8e-8,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/google/gemma-3-27b-it": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 9e-8,
+ "output_cost_per_token": 1.6e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/google/gemma-3-12b-it": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-8,
+ "output_cost_per_token": 1e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/google/gemini-2.5-pro": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00000125,
+ "output_cost_per_token": 0.00001,
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/google/gemini-2.5-flash": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 0.0000025,
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/google/gemini-2.0-flash-001": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "output_cost_per_token": 4e-7,
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/google/embeddinggemma-300m": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-9,
+ "max_input_tokens": 2048,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/deepseek-ai/DeepSeek-V3.2-Exp": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.7e-7,
+ "output_cost_per_token": 4e-7,
+ "max_input_tokens": 163840,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/deepseek-ai/DeepSeek-V3.1-Terminus": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.7e-7,
+ "output_cost_per_token": 0.000001,
+ "max_input_tokens": 163840,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/deepseek-ai/DeepSeek-V3.1": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.7e-7,
+ "output_cost_per_token": 0.000001,
+ "max_input_tokens": 163840,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/deepseek-ai/DeepSeek-V3-0324": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.5e-7,
+ "output_cost_per_token": 8.8e-7,
+ "max_input_tokens": 163840,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/deepseek-ai/DeepSeek-V3": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3.8e-7,
+ "output_cost_per_token": 8.9e-7,
+ "max_input_tokens": 163840,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/deepseek-ai/DeepSeek-R1-Turbo": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000001,
+ "output_cost_per_token": 0.000003,
+ "max_input_tokens": 40960,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 6e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/deepseek-ai/DeepSeek-R1-0528": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-7,
+ "output_cost_per_token": 0.00000215,
+ "max_input_tokens": 163840,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/deepseek-ai/DeepSeek-R1": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 7e-7,
+ "output_cost_per_token": 0.0000024,
+ "max_input_tokens": 163840,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/BAAI/bge-m3-multi": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-8,
+ "max_input_tokens": 8192,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/BAAI/bge-m3": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-8,
+ "max_input_tokens": 8192,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/BAAI/bge-large-en-v1.5": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-8,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/BAAI/bge-en-icl": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-8,
+ "max_input_tokens": 8192,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/BAAI/bge-base-en-v1.5": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/anthropic/claude-4-sonnet": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000033,
+ "output_cost_per_token": 0.0000165,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/anthropic/claude-4-opus": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000165,
+ "output_cost_per_token": 0.0000825,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/anthropic/claude-3-7-sonnet-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000033,
+ "output_cost_per_token": 0.0000165,
+ "cache_write_input_token_cost": 3.3e-7,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/allenai/olmOCR-7B-0825": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.7e-7,
+ "output_cost_per_token": 0.0000015,
+ "max_input_tokens": 16384,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/groq/qwen/qwen3-32b": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.9e-7,
+ "output_cost_per_token": 5.9e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 40960,
+ "max_tokens": 40960
+ },
+ "tars/groq/openai/gpt-oss-20b": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "output_cost_per_token": 5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 65536,
+ "max_tokens": 65536
+ },
+ "tars/groq/openai/gpt-oss-120b": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 7.5e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 65536,
+ "max_tokens": 65536
+ },
+ "tars/groq/moonshotai/kimi-k2-instruct-0905": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000001,
+ "output_cost_per_token": 0.000003,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384
+ },
+ "tars/groq/meta-llama/llama-prompt-guard-2-86m": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4e-8,
+ "output_cost_per_token": 4e-8,
+ "max_input_tokens": 512,
+ "max_output_tokens": 512,
+ "max_tokens": 512
+ },
+ "tars/groq/meta-llama/llama-prompt-guard-2-22m": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-8,
+ "output_cost_per_token": 3e-8,
+ "max_input_tokens": 512,
+ "max_output_tokens": 512,
+ "max_tokens": 512
+ },
+ "tars/groq/meta-llama/llama-guard-4-12b": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 2e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 1024,
+ "max_tokens": 1024
+ },
+ "tars/groq/meta-llama/llama-4-scout-17b-16e-instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.1e-7,
+ "output_cost_per_token": 3.4e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192
+ },
+ "tars/groq/meta-llama/llama-4-maverick-17b-128e-instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-7,
+ "output_cost_per_token": 6e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192
+ },
+ "tars/groq/llama-3.3-70b-versatile": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5.9e-7,
+ "output_cost_per_token": 7.9e-7,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768
+ },
+ "tars/groq/llama-3.1-8b-instant": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-8,
+ "output_cost_per_token": 8e-8,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "max_tokens": 131072
+ },
+ "tars/gemini-2.0-flash": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "output_cost_per_token": 4e-7,
+ "cache_read_input_token_cost": 2.5e-8,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192
+ },
+ "tars/deepinfra/intfloat/multilingual-e5-large-instruct": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/intfloat/multilingual-e5-large": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-8,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/intfloat/e5-large-v2": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-8,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/deepinfra/intfloat/e5-base-v2": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-9,
+ "max_input_tokens": 512,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gemini-2.0-flash-001": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "output_cost_per_token": 4e-7,
+ "cache_read_input_token_cost": 2.5e-8,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192
+ },
+ "tars/gemini-2.0-flash-exp": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192
+ },
+ "tars/gemini-2.0-flash-lite": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 7.5e-8,
+ "output_cost_per_token": 3e-7,
+ "cache_read_input_token_cost": 1.9e-8,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192
+ },
+ "tars/gemini-2.0-flash-lite-preview-02-05": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 7.5e-8,
+ "output_cost_per_token": 3e-7,
+ "cache_read_input_token_cost": 1.9e-8,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192
+ },
+ "tars/gemini-2.0-flash-thinking-exp": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65536,
+ "max_tokens": 65536
+ },
+ "tars/gemini-2.0-flash-thinking-exp-01-21": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65536,
+ "max_tokens": 65536
+ },
+ "tars/gemini-2.5-flash": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 0.0000025,
+ "cache_read_input_token_cost": 7.5e-8,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65535,
+ "max_tokens": 65535
+ },
+ "tars/gemini-2.5-flash-lite": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "output_cost_per_token": 4e-7,
+ "cache_read_input_token_cost": 2.5e-8,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65535,
+ "max_tokens": 65535
+ },
+ "tars/gemini-2.5-flash-lite-preview-06-17": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "output_cost_per_token": 4e-7,
+ "cache_read_input_token_cost": 2.5e-8,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65535,
+ "max_tokens": 65535
+ },
+ "tars/gemini-2.5-flash-preview-05-20": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 0.0000025,
+ "cache_read_input_token_cost": 7.5e-8,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65535,
+ "max_tokens": 65535
+ },
+ "tars/gemini-2.5-pro": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00000125,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 3.13e-7,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65535,
+ "max_tokens": 65535
+ },
+ "tars/gemini-2.5-pro-preview-03-25": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00000125,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 3.13e-7,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65535,
+ "max_tokens": 65535
+ },
+ "tars/gemini-2.5-pro-preview-05-06": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00000125,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 3.13e-7,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65535,
+ "max_tokens": 65535
+ },
+ "tars/gemini-2.5-pro-preview-06-05": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00000125,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 3.13e-7,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65535,
+ "max_tokens": 65535
+ },
+ "tars/chatgpt-4o-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000005,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-3.5-turbo": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000015,
+ "output_cost_per_token": 0.000002,
+ "max_input_tokens": 16385,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-3.5-turbo-0125": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-7,
+ "output_cost_per_token": 0.0000015,
+ "max_input_tokens": 16385,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-3.5-turbo-1106": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000001,
+ "output_cost_per_token": 0.000002,
+ "max_input_tokens": 16385,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-3.5-turbo-16k": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000004,
+ "max_input_tokens": 16385,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-4": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00003,
+ "output_cost_per_token": 0.00006,
+ "max_input_tokens": 8192,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-4-0125-preview": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00001,
+ "output_cost_per_token": 0.00003,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-4-0613": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00003,
+ "output_cost_per_token": 0.00006,
+ "max_input_tokens": 8192,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-4-1106-preview": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00001,
+ "output_cost_per_token": 0.00003,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-4-turbo": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00001,
+ "output_cost_per_token": 0.00003,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-4-turbo-2024-04-09": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00001,
+ "output_cost_per_token": 0.00003,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-4-turbo-preview": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00001,
+ "output_cost_per_token": 0.00003,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-4.1": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000002,
+ "output_cost_per_token": 0.000008,
+ "cache_read_input_token_cost": 5e-7,
+ "max_input_tokens": 1047576,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768
+ },
+ "tars/gpt-4.1-2025-04-14": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000002,
+ "output_cost_per_token": 0.000008,
+ "cache_read_input_token_cost": 5e-7,
+ "max_input_tokens": 1047576,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768
+ },
+ "tars/gpt-4.1-mini": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4e-7,
+ "output_cost_per_token": 0.0000016,
+ "cache_read_input_token_cost": 1e-7,
+ "max_input_tokens": 1047576,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768
+ },
+ "tars/gpt-4.1-mini-2025-04-14": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 4e-7,
+ "output_cost_per_token": 0.0000016,
+ "cache_read_input_token_cost": 1e-7,
+ "max_input_tokens": 1047576,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768
+ },
+ "tars/gpt-4.1-nano": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "output_cost_per_token": 4e-7,
+ "cache_read_input_token_cost": 2.5e-8,
+ "max_input_tokens": 1047576,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768
+ },
+ "tars/gpt-4.1-nano-2025-04-14": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "output_cost_per_token": 4e-7,
+ "cache_read_input_token_cost": 2.5e-8,
+ "max_input_tokens": 1047576,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768
+ },
+ "tars/gpt-4o": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000025,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 0.00000125,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384
+ },
+ "tars/gpt-4o-2024-05-13": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000005,
+ "output_cost_per_token": 0.000015,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/gpt-4o-2024-08-06": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000025,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 0.00000125,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384
+ },
+ "tars/gpt-4o-mini": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 6e-7,
+ "cache_read_input_token_cost": 7.5e-8,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384
+ },
+ "tars/gpt-4o-mini-2024-07-18": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 6e-7,
+ "cache_read_input_token_cost": 7.5e-8,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384
+ },
+ "tars/gpt-4o-mini-search-preview": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 6e-7,
+ "cache_read_input_token_cost": 7.5e-8,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384
+ },
+ "tars/gpt-4o-mini-search-preview-2025-03-11": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 6e-7,
+ "cache_read_input_token_cost": 7.5e-8,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384
+ },
+ "tars/gpt-4o-search-preview": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000025,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 0.00000125,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384
+ },
+ "tars/gpt-4o-search-preview-2025-03-11": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000025,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 0.00000125,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384
+ },
+ "tars/gpt-5": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00000125,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 1.25e-7,
+ "max_input_tokens": 400000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000
+ },
+ "tars/gpt-5-2025-08-07": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00000125,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 1.25e-7,
+ "max_input_tokens": 400000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000
+ },
+ "tars/gpt-5-chat-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.00000125,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 1.25e-7,
+ "max_input_tokens": 400000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000
+ },
+ "tars/gpt-5-mini": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.5e-7,
+ "output_cost_per_token": 0.000002,
+ "cache_read_input_token_cost": 2.5e-8,
+ "max_input_tokens": 400000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000
+ },
+ "tars/gpt-5-mini-2025-08-07": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2.5e-7,
+ "output_cost_per_token": 0.000002,
+ "cache_read_input_token_cost": 2.5e-8,
+ "max_input_tokens": 400000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000
+ },
+ "tars/gpt-5-nano": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-8,
+ "output_cost_per_token": 4e-7,
+ "cache_read_input_token_cost": 5e-9,
+ "max_input_tokens": 400000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000
+ },
+ "tars/gpt-5-nano-2025-08-07": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 5e-8,
+ "output_cost_per_token": 4e-7,
+ "cache_read_input_token_cost": 5e-9,
+ "max_input_tokens": 400000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000
+ },
+ "tars/o1": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000015,
+ "output_cost_per_token": 0.00006,
+ "cache_read_input_token_cost": 0.0000075,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 100000,
+ "max_tokens": 100000
+ },
+ "tars/o1-2024-12-17": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000015,
+ "output_cost_per_token": 0.00006,
+ "cache_read_input_token_cost": 0.0000075,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 100000,
+ "max_tokens": 100000
+ },
+ "tars/o3": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000002,
+ "output_cost_per_token": 0.000008,
+ "cache_read_input_token_cost": 5e-7,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 100000,
+ "max_tokens": 100000
+ },
+ "tars/o3-2025-04-16": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000002,
+ "output_cost_per_token": 0.000008,
+ "cache_read_input_token_cost": 5e-7,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 100000,
+ "max_tokens": 100000
+ },
+ "tars/o3-mini": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000011,
+ "output_cost_per_token": 0.0000044,
+ "cache_read_input_token_cost": 5.5e-7,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 100000,
+ "max_tokens": 100000
+ },
+ "tars/o4-mini": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000011,
+ "output_cost_per_token": 0.0000044,
+ "cache_read_input_token_cost": 2.75e-7,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 100000,
+ "max_tokens": 100000
+ },
+ "tars/o4-mini-2025-04-16": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000011,
+ "output_cost_per_token": 0.0000044,
+ "cache_read_input_token_cost": 2.75e-7,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 100000,
+ "max_tokens": 100000
+ },
+ "tars/text-embedding-3-large": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1.3e-7,
+ "max_input_tokens": 8191,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/text-embedding-3-small": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-8,
+ "max_input_tokens": 8191,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/text-embedding-ada-002": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 1e-7,
+ "max_input_tokens": 8191,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/claude-3-7-sonnet-latest": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "cache_read_input_token_cost": 3e-7,
+ "cache_write_input_token_cost": 0.00000375,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 64000,
+ "max_tokens": 64000
+ },
+ "tars/deepinfra/mistralai/Mistral-Nemo-Instruct-2407": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 2e-8,
+ "output_cost_per_token": 4e-8,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096
+ },
+ "tars/o3-mini-2025-01-31": {
+ "litellm_provider": "tars",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000011,
+ "output_cost_per_token": 0.0000044,
+ "cache_read_input_token_cost": 5.5e-7,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 100000,
+ "max_tokens": 100000
}
}
diff --git a/tests/test_litellm/llms/tars/test_tars.py b/tests/test_litellm/llms/tars/test_tars.py
new file mode 100644
index 000000000000..590b43fbe8cb
--- /dev/null
+++ b/tests/test_litellm/llms/tars/test_tars.py
@@ -0,0 +1,394 @@
+import json
+import os
+import sys
+
+import pytest
+
+sys.path.insert(
+ 0, os.path.abspath("../../../../..")
+) # Adds the parent directory to the system path.
+from unittest.mock import MagicMock, patch
+
+from litellm.llms.tars.chat.transformation import TarsConfig
+from litellm.llms.tars.common_utils import TarsException, TarsModelInfo
+from litellm.llms.tars.embedding.transformation import TarsEmbeddingConfig
+from litellm.llms.tars.cost_calculator import cost_per_token
+from litellm.types.utils import ModelResponse, Usage
+import httpx
+
+
+def test_tars_config_initialization():
+ """Test that TarsConfig can be initialized with various parameters."""
+ config = TarsConfig(
+ temperature=0.7,
+ max_tokens=100,
+ top_p=0.9,
+ )
+
+ assert config.temperature == 0.7
+ assert config.max_tokens == 100
+ assert config.top_p == 0.9
+
+
+def test_tars_get_api_base():
+ """Test the get_api_base method returns correct default."""
+ api_base = TarsConfig.get_api_base(api_base=None)
+ assert api_base == "https://api.router.tetrate.ai/v1"
+
+
+def test_tars_get_api_base_custom():
+ """Test the get_api_base method with custom API base."""
+ custom_api_base = "https://custom.tars.ai/v1"
+ api_base = TarsConfig.get_api_base(api_base=custom_api_base)
+ assert api_base == custom_api_base
+
+
+def test_tars_get_api_key():
+ """Test the get_api_key method."""
+ test_key = "test-tars-key"
+ api_key = TarsConfig.get_api_key(api_key=test_key)
+ assert api_key == test_key
+
+
+def test_tars_get_complete_url():
+ """Test the get_complete_url method generates correct endpoint URL."""
+ config = TarsConfig()
+
+ url = config.get_complete_url(
+ api_base="https://api.router.tetrate.ai/v1",
+ api_key="test-key",
+ model="claude-sonnet-4-20250514",
+ optional_params={},
+ litellm_params={},
+ stream=False
+ )
+
+ assert url == "https://api.router.tetrate.ai/v1/chat/completions"
+
+
+def test_tars_get_complete_url_no_duplicate():
+ """Test that get_complete_url doesn't duplicate endpoint in URL."""
+ config = TarsConfig()
+
+ url = config.get_complete_url(
+ api_base="https://api.router.tetrate.ai/v1/chat/completions",
+ api_key="test-key",
+ model="gpt-4o",
+ optional_params={},
+ litellm_params={},
+ stream=False
+ )
+
+ assert url == "https://api.router.tetrate.ai/v1/chat/completions"
+ assert url.count("chat/completions") == 1
+
+
+def test_tars_exception():
+ """Test TarsException can be instantiated properly."""
+ exception = TarsException(
+ message="Test error",
+ status_code=400,
+ headers={"content-type": "application/json"}
+ )
+
+ assert exception.status_code == 400
+ assert exception.message == "Test error"
+
+
+def test_tars_model_info_get_base_model():
+ """Test get_base_model removes tars/ prefix."""
+ model_info = TarsModelInfo()
+
+ base_model = model_info.get_base_model("tars/claude-sonnet-4-20250514")
+ assert base_model == "claude-sonnet-4-20250514"
+
+ base_model = model_info.get_base_model("tars/gpt-4o")
+ assert base_model == "gpt-4o"
+
+
+def test_tars_model_info_validate_environment():
+ """Test validate_environment adds proper authentication headers."""
+ model_info = TarsModelInfo()
+
+ headers = {}
+ test_api_key = "test-tars-api-key"
+
+ result_headers = model_info.validate_environment(
+ headers=headers,
+ model="tars/gpt-4o",
+ messages=[{"role": "user", "content": "Hello"}],
+ optional_params={},
+ litellm_params={},
+ api_key=test_api_key,
+ api_base="https://api.router.tetrate.ai/v1"
+ )
+
+ assert "Authorization" in result_headers
+ assert result_headers["Authorization"] == f"Bearer {test_api_key}"
+
+
+def test_tars_model_info_validate_environment_no_key():
+ """Test validate_environment raises error when no API key provided."""
+ model_info = TarsModelInfo()
+
+ headers = {}
+
+ with pytest.raises(ValueError, match="TARS_API_KEY is not set"):
+ model_info.validate_environment(
+ headers=headers,
+ model="tars/gpt-4o",
+ messages=[{"role": "user", "content": "Hello"}],
+ optional_params={},
+ litellm_params={},
+ api_key=None,
+ api_base="https://api.router.tetrate.ai/v1"
+ )
+
+
+@patch('httpx.Client')
+def test_tars_get_models_success(mock_client_class):
+ """Test get_models fetches and formats models correctly."""
+ mock_client = MagicMock()
+ mock_response = MagicMock()
+ mock_response.json.return_value = {
+ "data": [
+ {"id": "claude-sonnet-4-20250514", "object": "model"},
+ {"id": "gpt-4o", "object": "model"},
+ {"id": "gpt-4o-mini", "object": "model"},
+ ]
+ }
+ mock_client.get.return_value = mock_response
+ mock_client.__enter__.return_value = mock_client
+ mock_client.__exit__.return_value = False
+ mock_client_class.return_value = mock_client
+
+ model_info = TarsModelInfo()
+ models = model_info.get_models(api_key="test-key", api_base="https://api.router.tetrate.ai/v1")
+
+ assert len(models) == 3
+ assert "tars/claude-sonnet-4-20250514" in models
+ assert "tars/gpt-4o" in models
+ assert "tars/gpt-4o-mini" in models
+ assert models == sorted(models) # Verify models are sorted.
+
+
+@patch('httpx.Client')
+def test_tars_get_models_no_api_key(mock_client_class):
+ """Test get_models raises error when no API key provided."""
+ model_info = TarsModelInfo()
+
+ with pytest.raises(ValueError, match="TARS_API_KEY is not set"):
+ model_info.get_models(api_key=None, api_base="https://api.router.tetrate.ai/v1")
+
+
+@patch('httpx.Client')
+def test_tars_get_models_http_error(mock_client_class):
+ """Test get_models handles HTTP errors gracefully."""
+ mock_client = MagicMock()
+ mock_response = MagicMock()
+ mock_response.status_code = 401
+ mock_response.text = "Unauthorized"
+
+ http_error = httpx.HTTPStatusError(
+ "Unauthorized",
+ request=MagicMock(),
+ response=mock_response
+ )
+ mock_client.get.side_effect = http_error
+ mock_client.__enter__.return_value = mock_client
+ mock_client.__exit__.return_value = False
+ mock_client_class.return_value = mock_client
+
+ model_info = TarsModelInfo()
+
+ with pytest.raises(ValueError, match="Failed to fetch models from TARS"):
+ model_info.get_models(api_key="invalid-key", api_base="https://api.router.tetrate.ai/v1")
+
+
+def test_tars_embedding_config():
+ """Test TarsEmbeddingConfig initialization."""
+ config = TarsEmbeddingConfig()
+
+ assert config is not None
+
+
+def test_tars_embedding_get_error_class():
+ """Test TarsEmbeddingConfig returns correct error class."""
+ config = TarsEmbeddingConfig()
+
+ error = config.get_error_class(
+ error_message="Test error",
+ status_code=500,
+ headers={"content-type": "application/json"}
+ )
+
+ assert isinstance(error, TarsException)
+ assert error.status_code == 500
+
+
+def test_tars_config_get_error_class():
+ """Test TarsConfig returns correct error class."""
+ config = TarsConfig()
+
+ error = config.get_error_class(
+ error_message="Test error",
+ status_code=429,
+ headers={"content-type": "application/json"}
+ )
+
+ assert isinstance(error, TarsException)
+ assert error.status_code == 429
+
+
+def test_config_get_config():
+ """Test that get_config method returns the configuration."""
+ config_dict = TarsConfig.get_config()
+ assert isinstance(config_dict, dict)
+
+
+def test_response_format_support():
+ """Test that response_format parameter is supported."""
+ response_format = {
+ "type": "json_object"
+ }
+
+ config = TarsConfig(response_format=response_format)
+ assert config.response_format == response_format
+
+
+def test_tools_support():
+ """Test that tools parameter is supported."""
+ tools = [
+ {
+ "type": "function",
+ "function": {
+ "name": "get_weather",
+ "description": "Get weather information"
+ }
+ }
+ ]
+
+ config = TarsConfig(tools=tools)
+ assert config.tools == tools
+
+
+def test_functions_support():
+ """Test that functions parameter is supported."""
+ functions = [
+ {
+ "name": "get_weather",
+ "description": "Get weather information",
+ "parameters": {
+ "type": "object",
+ "properties": {}
+ }
+ }
+ ]
+
+ config = TarsConfig(functions=functions)
+ assert config.functions == functions
+
+
+def test_stop_parameter_support():
+ """Test that stop parameter supports both string and list."""
+ # Test with string.
+ config1 = TarsConfig(stop="STOP")
+ assert config1.stop == "STOP"
+
+ # Test with list.
+ config2 = TarsConfig(stop=["STOP", "END"])
+ assert config2.stop == ["STOP", "END"]
+
+
+def test_logit_bias_support():
+ """Test that logit_bias parameter is supported."""
+ logit_bias = {"50256": -100}
+
+ config = TarsConfig(logit_bias=logit_bias)
+ assert config.logit_bias == logit_bias
+
+
+def test_presence_penalty_support():
+ """Test that presence_penalty parameter is supported."""
+ config = TarsConfig(presence_penalty=0.5)
+ assert config.presence_penalty == 0.5
+
+
+def test_n_parameter_support():
+ """Test that n parameter (number of completions) is supported."""
+ config = TarsConfig(n=3)
+ assert config.n == 3
+
+
+def test_max_completion_tokens_support():
+ """Test that max_completion_tokens parameter is supported."""
+ config = TarsConfig(max_completion_tokens=150)
+ assert config.max_completion_tokens == 150
+
+
+def test_tars_config_inherits_openai():
+ """Test that TarsConfig properly inherits from OpenAIGPTConfig."""
+ from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
+
+ config = TarsConfig()
+ assert isinstance(config, OpenAIGPTConfig)
+
+
+def test_tars_embedding_config_inherits_openai():
+ """Test that TarsEmbeddingConfig properly inherits from OpenAIEmbeddingConfig."""
+ from litellm.llms.openai.embedding.transformation import OpenAIEmbeddingConfig
+
+ config = TarsEmbeddingConfig()
+ assert isinstance(config, OpenAIEmbeddingConfig)
+
+
+def test_tars_cost_calculator_with_5_percent_margin():
+ """Test that TARS cost calculator adds 5% margin to base costs."""
+ usage = Usage(
+ prompt_tokens=1000,
+ completion_tokens=500,
+ total_tokens=1500
+ )
+
+ # Test with a known model that has pricing.
+ prompt_cost, completion_cost = cost_per_token(model="gpt-4o", usage=usage)
+
+ # Verify that costs are calculated (non-zero).
+ assert prompt_cost > 0
+ assert completion_cost > 0
+
+ # The costs should include the 5% margin.
+ # We can't test exact values without knowing the base prices, but we can verify they're positive.
+
+
+def test_tars_cost_calculator_no_pricing_fallback():
+ """Test that TARS cost calculator returns (0.0, 0.0) when no pricing is available."""
+ usage = Usage(
+ prompt_tokens=1000,
+ completion_tokens=500,
+ total_tokens=1500
+ )
+
+ # Test with a model that doesn't exist in the pricing catalog.
+ prompt_cost, completion_cost = cost_per_token(model="nonexistent-model-12345", usage=usage)
+
+ # Should return (0.0, 0.0) as fallback.
+ assert prompt_cost == 0.0
+ assert completion_cost == 0.0
+
+
+def test_tars_cost_calculator_with_zero_tokens():
+ """Test that TARS cost calculator handles zero tokens correctly."""
+ usage = Usage(
+ prompt_tokens=0,
+ completion_tokens=0,
+ total_tokens=0
+ )
+
+ # Test with a known model.
+ prompt_cost, completion_cost = cost_per_token(model="gpt-4o", usage=usage)
+
+ # Should return 0.0 for both when no tokens are used.
+ assert prompt_cost == 0.0
+ assert completion_cost == 0.0
+
diff --git a/ui/litellm-dashboard/src/components/add_model/provider_specific_fields.tsx b/ui/litellm-dashboard/src/components/add_model/provider_specific_fields.tsx
index 2d11569ab956..bf55319c2741 100644
--- a/ui/litellm-dashboard/src/components/add_model/provider_specific_fields.tsx
+++ b/ui/litellm-dashboard/src/components/add_model/provider_specific_fields.tsx
@@ -460,6 +460,22 @@ const PROVIDER_CREDENTIAL_FIELDS: Record =
required: true,
},
],
+ [Providers.TARS]: [
+ {
+ key: "api_base",
+ label: "API Base",
+ placeholder: "https://api.router.tetrate.ai/v1",
+ defaultValue: "https://api.router.tetrate.ai/v1",
+ tooltip: "The base URL for TARS API. Default: https://api.router.tetrate.ai/v1",
+ },
+ {
+ key: "api_key",
+ label: "TARS API Key",
+ type: "password",
+ required: true,
+ tooltip: "Get your API key from https://router.tetrate.ai",
+ },
+ ],
[Providers.FireworksAI]: [
{
key: "api_key",
diff --git a/ui/litellm-dashboard/src/components/provider_info_helpers.tsx b/ui/litellm-dashboard/src/components/provider_info_helpers.tsx
index fb09b9fa82f8..1f1433f48a66 100644
--- a/ui/litellm-dashboard/src/components/provider_info_helpers.tsx
+++ b/ui/litellm-dashboard/src/components/provider_info_helpers.tsx
@@ -30,6 +30,7 @@ export enum Providers {
Openrouter = "Openrouter",
Oracle = "Oracle Cloud Infrastructure (OCI)",
Perplexity = "Perplexity",
+ TARS = "TARS (Tetrate Agent Router Service)",
Sambanova = "Sambanova",
Snowflake = "Snowflake",
TogetherAI = "TogetherAI",
@@ -66,6 +67,7 @@ export const provider_map: Record = {
Perplexity: "perplexity",
TogetherAI: "together_ai",
Openrouter: "openrouter",
+ TARS: "tars",
Oracle: "oci",
Snowflake: "snowflake",
FireworksAI: "fireworks_ai",
@@ -124,6 +126,7 @@ export const providerLogoMap: Record = {
[Providers.JinaAI]: `${asset_logos_folder}jina.png`,
[Providers.VolcEngine]: `${asset_logos_folder}volcengine.png`,
[Providers.DeepInfra]: `${asset_logos_folder}deepinfra.png`,
+ [Providers.TARS]: `${asset_logos_folder}openai_small.svg`,
};
export const getProviderLogoAndName = (providerValue: string): { logo: string; displayName: string } => {