diff --git a/docs/my-website/docs/providers/tars.md b/docs/my-website/docs/providers/tars.md new file mode 100644 index 000000000000..00041b7cf4e8 --- /dev/null +++ b/docs/my-website/docs/providers/tars.md @@ -0,0 +1,656 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# TARS (Tetrate Agent Router Service) + +https://router.tetrate.ai + +TARS is an AI Gateway-as-a-Service from Tetrate that provides intelligent routing for GenAI applications. It's OpenAI-compatible and routes to multiple LLM providers. + +## API Key + +```python +# env variable +os.environ['TARS_API_KEY'] +``` + +## Quick Start + +```python showLineNumbers +import litellm +import os + +# Set your TARS API key. +os.environ["TARS_API_KEY"] = "your-tars-api-key" + +# Chat Completions. +response = litellm.completion( + model="tars/claude-haiku-4-5", + messages=[{"role": "user", "content": "Hello, how are you?"}] +) +print(response.choices[0].message.content) + +# Vision (Image Analysis). +response = litellm.completion( + model="tars/gpt-4o", + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": "What do you see?"}, + {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}} + ] + } + ] +) +print(response.choices[0].message.content) + +# Embeddings. +response = litellm.embedding( + model="tars/text-embedding-3-small", + input=["Hello world"] +) +print(response.data[0].embedding) +``` + +## Features + +TARS supports: + +- ✅ Chat Completions +- ✅ Embeddings +- ✅ Vision (Multi-modal image analysis) +- ✅ Streaming +- ✅ Async calls +- ✅ Function/Tool calling + +## API Configuration + +### Required Environment Variables + +```bash +export TARS_API_KEY="your-tars-api-key" +``` + +### Optional Configuration + +```bash +# Override the default API base URL. +export TARS_API_BASE="https://api.router.tetrate.ai/v1" +``` + +## Supported Models + +TARS provides access to models from multiple providers including: + +### Recommended Models + +**Claude Haiku 4.5** (`tars/claude-haiku-4-5`) - Fast, cost-effective model with vision support. Great for most use cases. + +**Claude Sonnet 4.5** (`tars/claude-sonnet-4-5`) - Balanced performance and cost for complex tasks. + +**GPT-4o** (`tars/gpt-4o`) - OpenAI's flagship multimodal model with strong vision capabilities. + +### Vision Models + +- OpenAI (GPT-4o, GPT-4o-mini, GPT-4.1, GPT-5, etc.) +- Anthropic (Claude 4.5, Claude 4.5 Haiku, Claude 4, Claude 3.7 Sonnet, etc.) +- xAI (Grok 4, Grok 3, etc.) +- Google (Gemini 2.5 Pro, Gemini 2.0 Flash, etc.) +- DeepSeek, Qwen, and many more + +### Chat Models + +- OpenAI (GPT-4o, GPT-4o-mini, GPT-4.1, GPT-5, O1, O3, etc.) +- Anthropic (Claude 4.5, Claude 4.5 Haiku, Claude 4, Claude 3.7 Sonnet, Claude 3.5 Haiku, etc.) +- xAI (Grok 4, Grok 3, etc.) +- Google (Gemini 2.5 Pro, Gemini 2.0 Flash, etc.) +- DeepSeek, Qwen, and many more + +### Embedding Models + +- OpenAI (text-embedding-3-small, text-embedding-3-large, text-embedding-ada-002) +- Custom embedding models from various providers + +To see the full list of available models, visit: https://api.router.tetrate.ai/v1/models + +## Usage Examples + +### Chat Completions + +```python showLineNumbers title="LiteLLM python sdk usage - Non-streaming" +import litellm +import os + +os.environ["TARS_API_KEY"] = "your-tars-api-key" + +response = litellm.completion( + model="tars/claude-haiku-4-5", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Explain quantum computing"} + ], + temperature=0.7, + max_tokens=500 +) +print(response.choices[0].message.content) +``` + +### Streaming + +```python showLineNumbers title="LiteLLM python sdk usage - Streaming" +import litellm +import os + +os.environ["TARS_API_KEY"] = "your-tars-api-key" + +response = litellm.completion( + model="tars/gpt-4o", + messages=[{"role": "user", "content": "Write a short poem"}], + stream=True +) + +for chunk in response: + if chunk.choices[0].delta.content: + print(chunk.choices[0].delta.content, end="") +``` + +### Async Chat Completions + +```python showLineNumbers title="LiteLLM python sdk usage - Async" +import litellm +import asyncio +import os + +os.environ["TARS_API_KEY"] = "your-tars-api-key" + +async def test_async(): + response = await litellm.acompletion( + model="tars/claude-haiku-4-5", + messages=[{"role": "user", "content": "Hello!"}] + ) + print(response.choices[0].message.content) + +asyncio.run(test_async()) +``` + +### Function/Tool Calling + +```python showLineNumbers title="LiteLLM python sdk usage - Function calling" +import litellm +import os + +os.environ["TARS_API_KEY"] = "your-tars-api-key" + +tools = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current weather", + "parameters": { + "type": "object", + "properties": { + "location": {"type": "string"} + }, + "required": ["location"] + } + } + } +] + +response = litellm.completion( + model="tars/gpt-4o", + messages=[{"role": "user", "content": "What's the weather in SF?"}], + tools=tools +) +print(response.choices[0].message.tool_calls) +``` + +### Vision (Multi-modal) + +```python showLineNumbers title="LiteLLM python sdk usage - Vision" +import litellm +import os + +os.environ["TARS_API_KEY"] = "your-tars-api-key" + +# Vision with image URL +response = litellm.completion( + model="tars/gpt-4o", + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What do you see in this image?" + }, + { + "type": "image_url", + "image_url": { + "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + } + } + ] + } + ], + temperature=0.7, + max_tokens=150 +) + +print(response.choices[0].message.content) +``` + +### Vision with Base64 Image + +```python showLineNumbers title="LiteLLM python sdk usage - Vision with Base64" +import litellm +import os + +os.environ["TARS_API_KEY"] = "your-tars-api-key" + +# Vision with base64 encoded image +response = litellm.completion( + model="tars/claude-haiku-4-5", + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Describe this image." + }, + { + "type": "image_url", + "image_url": { + "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkY+g8AAwEB/6P/4AAAAASUVORK5CYII=" + } + } + ] + } + ] +) + +print(response.choices[0].message.content) +``` + +### Vision Function Calling + +```python showLineNumbers title="LiteLLM python sdk usage - Vision with Function Calling" +import litellm +import os + +os.environ["TARS_API_KEY"] = "your-tars-api-key" + +tools = [ + { + "type": "function", + "function": { + "name": "analyze_image", + "description": "Analyze image content", + "parameters": { + "type": "object", + "properties": { + "objects_detected": {"type": "array", "items": {"type": "string"}}, + "scene_type": {"type": "string"} + } + } + } + } +] + +response = litellm.completion( + model="tars/gpt-4o", + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Analyze this image and call the analyze_image function." + }, + { + "type": "image_url", + "image_url": { + "url": "https://example.com/image.jpg" + } + } + ] + } + ], + tools=tools +) + +if response.choices[0].message.tool_calls: + tool_call = response.choices[0].message.tool_calls[0] + print(f"Tool called: {tool_call.function.name}") + print(f"Arguments: {tool_call.function.arguments}") +``` + +### Streaming Vision + +```python showLineNumbers title="LiteLLM python sdk usage - Streaming Vision" +import litellm +import os + +os.environ["TARS_API_KEY"] = "your-tars-api-key" + +response = litellm.completion( + model="tars/gpt-4o", + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Describe this image in detail." + }, + { + "type": "image_url", + "image_url": { + "url": "https://example.com/image.jpg" + } + } + ] + } + ], + stream=True +) + +for chunk in response: + if chunk.choices[0].delta.content: + print(chunk.choices[0].delta.content, end="") +``` + +### Async Vision + +```python showLineNumbers title="LiteLLM python sdk usage - Async Vision" +import litellm +import asyncio +import os + +os.environ["TARS_API_KEY"] = "your-tars-api-key" + +async def test_vision(): + response = await litellm.acompletion( + model="tars/claude-haiku-4-5", + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What do you see in this image?" + }, + { + "type": "image_url", + "image_url": { + "url": "https://example.com/image.jpg" + } + } + ] + } + ] + ) + print(response.choices[0].message.content) + +asyncio.run(test_vision()) +``` + +### Embeddings + +```python showLineNumbers title="LiteLLM python sdk usage - Embeddings" +import litellm +import os + +os.environ["TARS_API_KEY"] = "your-tars-api-key" + +response = litellm.embedding( + model="tars/text-embedding-3-large", + input=["Hello world", "Goodbye world"] +) + +for embedding in response.data: + print(f"Embedding {embedding.index}: {len(embedding.embedding)} dimensions") +``` + +## Platform Features + +TARS provides intelligent routing and platform features that enable: + +- Intelligent routing and load balancing +- Automatic fallback to alternative models +- Cost optimization features +- Performance monitoring +- Unified API access + +For detailed pricing and platform information, see: https://router.tetrate.ai/models + +## Getting Your API Key + +1. Sign up at https://router.tetrate.ai +2. Get $5 free credit with a business email +3. Generate your API key from the dashboard +4. Set the `TARS_API_KEY` environment variable + +## Usage with LiteLLM Proxy Server + +Here's how to call TARS models with the LiteLLM Proxy Server + +### 1. Save key in your environment + +```bash +export TARS_API_KEY="your-tars-api-key" +``` + +### 2. Start the proxy + + + + +```yaml +model_list: + - model_name: claude-haiku + litellm_params: + model: tars/claude-haiku-4-5 + api_key: os.environ/TARS_API_KEY + + - model_name: claude-sonnet + litellm_params: + model: tars/claude-sonnet-4-5 + api_key: os.environ/TARS_API_KEY + + - model_name: gpt-4o + litellm_params: + model: tars/gpt-4o + api_key: os.environ/TARS_API_KEY + + - model_name: embeddings + litellm_params: + model: tars/text-embedding-3-large + api_key: os.environ/TARS_API_KEY +``` + +```bash +litellm --config /path/to/config.yaml +``` + + + + +```bash +$ litellm --model tars/claude-haiku-4-5 + +# Server running on http://0.0.0.0:4000 +``` + + + + +### 3. Test it + + + + +```shell +curl --location 'http://0.0.0.0:4000/chat/completions' \ +--header 'Content-Type: application/json' \ +--data ' { + "model": "gpt-4o", + "messages": [ + { + "role": "user", + "content": "what llm are you" + } + ] + } +' +``` + + + + +```python +import openai +client = openai.OpenAI( + api_key="anything", + base_url="http://0.0.0.0:4000" +) + +response = client.chat.completions.create( + model="gpt-4o", + messages=[ + { + "role": "user", + "content": "this is a test request, write a short poem" + } + ] +) + +print(response) +``` + + + + +```python +from langchain.chat_models import ChatOpenAI +from langchain.prompts.chat import ( + ChatPromptTemplate, + HumanMessagePromptTemplate, + SystemMessagePromptTemplate, +) +from langchain.schema import HumanMessage, SystemMessage + +chat = ChatOpenAI( + openai_api_base="http://0.0.0.0:4000", + model="gpt-4o", + temperature=0.1 +) + +messages = [ + SystemMessage( + content="You are a helpful assistant that im using to make a test request to." + ), + HumanMessage( + content="test from litellm. tell me why it's amazing in 1 sentence" + ), +] +response = chat(messages) + +print(response) +``` + + + + +## Advanced Features + +### Cost Tracking + +LiteLLM automatically tracks costs for TARS models with a 5% margin added to the base model costs. This margin accounts for TARS routing and platform overhead. + +**Note:** Cost tracking is only available for models with pricing information in LiteLLM's model catalog. If a model doesn't have pricing information, no cost will be displayed. + +```python showLineNumbers +import litellm +import os + +os.environ["TARS_API_KEY"] = "your-tars-api-key" + +response = litellm.completion( + model="tars/gpt-4o", + messages=[{"role": "user", "content": "Hello!"}] +) + +# Cost is automatically calculated with 5% margin. +print(f"Response cost: ${response._hidden_params.get('response_cost', 0):.6f}") +``` + +### Cost Optimization + +TARS automatically routes requests to optimize for cost while maintaining performance: + +```python showLineNumbers +import litellm +import os + +os.environ["TARS_API_KEY"] = "your-tars-api-key" + +# TARS can automatically switch to cheaper models. +response = litellm.completion( + model="tars/gpt-4o-mini", # Use cost-effective model. + messages=[{"role": "user", "content": "Simple question"}] +) +``` + +### Automatic Fallback + +TARS provides automatic fallback to alternative models when primary models are unavailable: + +```python showLineNumbers +import litellm +import os + +os.environ["TARS_API_KEY"] = "your-tars-api-key" + +# If the specified model is down, TARS routes to alternatives. +response = litellm.completion( + model="tars/claude-haiku-4-5", + messages=[{"role": "user", "content": "Hello"}], + # TARS handles fallback automatically. +) +``` + +## Support + +- Dashboard: https://router.tetrate.ai +- Documentation: https://docs.tetrate.io +- Support: Contact through the TARS dashboard + +## Troubleshooting + +### Authentication Errors + +If you get authentication errors: + +1. Verify your API key is set correctly: `echo $TARS_API_KEY` +2. Check your key hasn't expired in the dashboard +3. Ensure you have sufficient credits + +### Rate Limits + +TARS respects the rate limits of underlying providers. If you hit rate limits: + +1. Check your usage in the dashboard +2. Consider upgrading to Enterprise plan for higher limits +3. Implement exponential backoff in your code + +### Model Not Found + +If a model isn't available: + +1. Check the latest model list: https://router.tetrate.ai/models +2. Verify the model ID is correct (e.g., `tars/claude-sonnet-4-20250514`) +3. Some models may require specific account permissions diff --git a/litellm/__init__.py b/litellm/__init__.py index d1ecc3c76783..3470bbb5017a 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -528,6 +528,7 @@ def identify(event_details): ovhcloud_models: Set = set() ovhcloud_embedding_models: Set = set() lemonade_models: Set = set() +tars_models: Set = set() def is_bedrock_pricing_only_model(key: str) -> bool: @@ -752,6 +753,8 @@ def add_known_models(): ovhcloud_embedding_models.add(key) elif value.get("litellm_provider") == "lemonade": lemonade_models.add(key) + elif value.get("litellm_provider") == "tars": + tars_models.add(key) add_known_models() @@ -854,6 +857,7 @@ def add_known_models(): | ovhcloud_models | lemonade_models | set(clarifai_models) + | tars_models ) model_list_set = set(model_list) @@ -940,6 +944,7 @@ def add_known_models(): "ovhcloud": ovhcloud_models | ovhcloud_embedding_models, "lemonade": lemonade_models, "clarifai": clarifai_models, + "tars": tars_models, } # mapping for those models which have larger equivalents @@ -1284,6 +1289,7 @@ def add_known_models(): from .llms.watsonx.embed.transformation import IBMWatsonXEmbeddingConfig from .llms.github_copilot.chat.transformation import GithubCopilotConfig from .llms.nebius.chat.transformation import NebiusConfig +from .llms.tars.chat.transformation import TarsConfig from .llms.wandb.chat.transformation import WandbConfig from .llms.dashscope.chat.transformation import DashScopeChatConfig from .llms.moonshot.chat.transformation import MoonshotChatConfig diff --git a/litellm/constants.py b/litellm/constants.py index 8553ca6ced66..9f7d935634c6 100644 --- a/litellm/constants.py +++ b/litellm/constants.py @@ -361,7 +361,8 @@ "vercel_ai_gateway", "wandb", "ovhcloud", - "lemonade" + "lemonade", + "tars" ] LITELLM_EMBEDDING_PROVIDERS_SUPPORTING_INPUT_ARRAY_OF_TOKENS = [ @@ -547,6 +548,7 @@ "wandb", "cometapi", "clarifai", + "tars", ] openai_text_completion_compatible_providers: List = ( [ # providers that support `/v1/completions` diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index 37c76e5584bb..4604e75af008 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -375,6 +375,11 @@ def cost_per_token( # noqa: PLR0915 cost_per_token as dashscope_cost_per_token, ) return dashscope_cost_per_token(model=model, usage=usage_block) + elif custom_llm_provider == "tars": + from litellm.llms.tars.cost_calculator import ( + cost_per_token as tars_cost_per_token, + ) + return tars_cost_per_token(model=model, usage=usage_block) else: model_info = _cached_get_model_info_helper( model=model, custom_llm_provider=custom_llm_provider diff --git a/litellm/litellm_core_utils/get_llm_provider_logic.py b/litellm/litellm_core_utils/get_llm_provider_logic.py index 275f2a63a1ba..5c06d14a5657 100644 --- a/litellm/litellm_core_utils/get_llm_provider_logic.py +++ b/litellm/litellm_core_utils/get_llm_provider_logic.py @@ -803,6 +803,14 @@ def _get_openai_compatible_provider_info( # noqa: PLR0915 ) = litellm.ClarifaiConfig()._get_openai_compatible_provider_info( api_base, api_key ) + elif custom_llm_provider == "tars": + # TARS (Tetrate Agent Router Service) is OpenAI compatible + api_base = ( + api_base + or get_secret_str("TARS_API_BASE") + or "https://api.router.tetrate.ai/v1" + ) # type: ignore + dynamic_api_key = api_key or get_secret_str("TARS_API_KEY") if api_base is not None and not isinstance(api_base, str): raise Exception("api base needs to be a string. api_base={}".format(api_base)) diff --git a/litellm/llms/tars/chat/transformation.py b/litellm/llms/tars/chat/transformation.py new file mode 100644 index 000000000000..f54d835464e1 --- /dev/null +++ b/litellm/llms/tars/chat/transformation.py @@ -0,0 +1,79 @@ +""" +Support for OpenAI's `/v1/chat/completions` endpoint. + +TARS (Tetrate Agent Router Service) is OpenAI-compatible. + +Docs: https://router.tetrate.ai +API: https://api.router.tetrate.ai/v1 +""" + +from typing import Optional, Union + +import httpx + +from litellm.llms.base_llm.chat.transformation import BaseLLMException +from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig + +from ..common_utils import TarsException, TarsModelInfo + + +class TarsConfig(OpenAIGPTConfig, TarsModelInfo): + """ + Configuration for TARS (Tetrate Agent Router Service). + + TARS is OpenAI-compatible and routes to multiple LLM providers. + Supports dynamic model fetching from the TARS API. + """ + + def get_error_class( + self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] + ) -> BaseLLMException: + return TarsException( + message=error_message, + status_code=status_code, + headers=headers, + ) + + def get_complete_url( + self, + api_base: Optional[str], + api_key: Optional[str], + model: str, + optional_params: dict, + litellm_params: dict, + stream: Optional[bool] = None, + ) -> str: + if not api_base: + api_base = "https://api.router.tetrate.ai/v1" + + endpoint = "chat/completions" + api_base = api_base.rstrip("/") + + if endpoint in api_base: + result = api_base + else: + result = f"{api_base}/{endpoint}" + + return result + + def get_models(self, api_key: Optional[str] = None, api_base: Optional[str] = None): + """ + Override OpenAIGPTConfig.get_models() to use TARS API instead of OpenAI API. + """ + # Use TarsModelInfo.get_models() method instead of OpenAIGPTConfig.get_models() + return TarsModelInfo.get_models(self, api_key=api_key, api_base=api_base) + + @staticmethod + def get_api_base(api_base: Optional[str] = None) -> str: + """ + Override OpenAIGPTConfig.get_api_base() to use TARS API base instead of OpenAI API base. + """ + return TarsModelInfo.get_api_base(api_base) + + @staticmethod + def get_api_key(api_key: Optional[str] = None) -> Optional[str]: + """ + Override OpenAIGPTConfig.get_api_key() to use TARS API key instead of OpenAI API key. + """ + return TarsModelInfo.get_api_key(api_key) + diff --git a/litellm/llms/tars/common_utils.py b/litellm/llms/tars/common_utils.py new file mode 100644 index 000000000000..e4d323f7e24e --- /dev/null +++ b/litellm/llms/tars/common_utils.py @@ -0,0 +1,111 @@ +""" +TARS (Tetrate Agent Router Service) common utilities and model info. +""" + +from typing import List, Optional + +import httpx + +from litellm.llms.base_llm.base_utils import BaseLLMModelInfo +from litellm.llms.base_llm.chat.transformation import BaseLLMException +from litellm.secret_managers.main import get_secret_str + + +class TarsException(BaseLLMException): + """Exception class for TARS provider errors.""" + pass + + +class TarsModelInfo(BaseLLMModelInfo): + """ + Model info for TARS (Tetrate Agent Router Service) provider. + + Supports dynamic model fetching from the TARS API. + """ + + @staticmethod + def get_api_key(api_key: Optional[str] = None) -> Optional[str]: + """Get TARS API key from parameter or environment variable.""" + return api_key or get_secret_str("TARS_API_KEY") + + @staticmethod + def get_api_base(api_base: Optional[str] = None) -> str: + """Get TARS API base URL from parameter or environment variable.""" + return api_base or get_secret_str("TARS_API_BASE") or "https://api.router.tetrate.ai/v1" + + @staticmethod + def get_base_model(model: str) -> Optional[str]: + """Remove tars/ prefix from model name.""" + return model.replace("tars/", "") + + def get_models( + self, api_key: Optional[str] = None, api_base: Optional[str] = None + ) -> List[str]: + """ + Fetch available models from TARS API. + + Args: + api_key: TARS API key (optional, will use TARS_API_KEY env var if not provided) + api_base: TARS API base URL (optional, defaults to https://api.router.tetrate.ai/v1) + + Returns: + List of model names prefixed with "tars/" + """ + api_base = self.get_api_base(api_base) + api_key = self.get_api_key(api_key) + + if api_key is None: + raise ValueError( + "TARS_API_KEY is not set. Please set the environment variable to query TARS's /models endpoint." + ) + + try: + # Use a fresh httpx client to avoid any global configuration issues + url = f"{api_base}/models" + with httpx.Client() as client: + response = client.get( + url=url, + headers={"Authorization": f"Bearer {api_key}"}, + timeout=10.0 + ) + response.raise_for_status() + except httpx.HTTPStatusError as e: + raise ValueError( + f"Failed to fetch models from TARS. Status code: {e.response.status_code}, Response: {e.response.text}" + ) + except Exception as e: + raise ValueError(f"Failed to fetch models from TARS. Error: {e}") + + models_data = response.json().get("data", []) + + # Extract model IDs and prefix with "tars/" + litellm_model_names = [] + for model in models_data: + if isinstance(model, dict) and "id" in model: + model_id = model["id"] + litellm_model_name = f"tars/{model_id}" + litellm_model_names.append(litellm_model_name) + + return sorted(litellm_model_names) + + def validate_environment( + self, + headers: dict, + model: str, + messages: list, + optional_params: dict, + litellm_params: dict, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + ) -> dict: + """Validate TARS environment and add authentication headers.""" + api_key = self.get_api_key(api_key) + api_base = self.get_api_base(api_base) + + if api_key is None: + raise ValueError( + "TARS_API_KEY is not set. Please set the environment variable." + ) + + headers["Authorization"] = f"Bearer {api_key}" + return headers \ No newline at end of file diff --git a/litellm/llms/tars/cost_calculator.py b/litellm/llms/tars/cost_calculator.py new file mode 100644 index 000000000000..d88ba8cb48ef --- /dev/null +++ b/litellm/llms/tars/cost_calculator.py @@ -0,0 +1,55 @@ +""" +Helper util for handling TARS-specific cost calculation. +- Uses the generic cost calculator which already handles tiered pricing correctly. +- Adds a 5% margin to the base model costs. +- Returns (0.0, 0.0) when no pricing is available. +""" + +from typing import Tuple + +from litellm.types.utils import Usage +from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_token +from litellm.utils import get_model_info + + +def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]: + """ + Calculates the cost per token for a given TARS model with a 5% margin. + Uses the generic cost calculator for all pricing logic. + + Input: + - model: str, the model name without provider prefix. + - usage: LiteLLM Usage block, containing usage information. + + Returns: + Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd. + Returns (0.0, 0.0) if no pricing is available. + """ + try: + # Check if pricing is available for this model. + model_info = get_model_info(model=model, custom_llm_provider="tars") + + # If no pricing is available, return (0.0, 0.0). + if not model_info or ( + model_info.get("input_cost_per_token", 0) == 0 and + model_info.get("output_cost_per_token", 0) == 0 + ): + return (0.0, 0.0) + + # Calculate base cost using generic calculator. + prompt_cost, completion_cost = generic_cost_per_token( + model=model, + usage=usage, + custom_llm_provider="tars" + ) + + # Add 5% margin to both costs. + margin_multiplier = 1.05 + prompt_cost_with_margin = prompt_cost * margin_multiplier + completion_cost_with_margin = completion_cost * margin_multiplier + + return prompt_cost_with_margin, completion_cost_with_margin + except Exception: + # If any error occurs (e.g., model not found), return (0.0, 0.0). + return (0.0, 0.0) + diff --git a/litellm/llms/tars/embedding/transformation.py b/litellm/llms/tars/embedding/transformation.py new file mode 100644 index 000000000000..9cb3e4460dc7 --- /dev/null +++ b/litellm/llms/tars/embedding/transformation.py @@ -0,0 +1,52 @@ +""" +Support for OpenAI's `/v1/embeddings` endpoint. + +TARS (Tetrate Agent Router Service) is OpenAI-compatible for embeddings. + +Docs: https://router.tetrate.ai +API: https://api.router.tetrate.ai/v1 +""" + +from typing import Optional, Union + +import httpx + +from litellm.llms.base_llm.chat.transformation import BaseLLMException +from litellm.llms.openai.embedding.transformation import OpenAIEmbeddingConfig +from litellm.secret_managers.main import get_secret_str + +from ..common_utils import TarsException + + +class TarsEmbeddingConfig(OpenAIEmbeddingConfig): + """ + Configuration for TARS embeddings. + + TARS supports embeddings through OpenAI-compatible API. + """ + + def get_error_class( + self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] + ) -> BaseLLMException: + return TarsException( + message=error_message, + status_code=status_code, + headers=headers, + ) + + @staticmethod + def get_api_base(api_base: Optional[str] = None) -> str: + """ + Get TARS API base URL from parameter or environment variable. + Override to use TARS-specific defaults instead of OpenAI defaults. + """ + return api_base or get_secret_str("TARS_API_BASE") or "https://api.router.tetrate.ai/v1" + + @staticmethod + def get_api_key(api_key: Optional[str] = None) -> Optional[str]: + """ + Get TARS API key from parameter or environment variable. + Override to use TARS-specific API key instead of OpenAI key. + """ + return api_key or get_secret_str("TARS_API_KEY") + diff --git a/litellm/main.py b/litellm/main.py index 8e3f9a0b3d8f..e335939ef68d 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -2013,6 +2013,49 @@ def completion( # type: ignore # noqa: PLR0915 provider_config=provider_config, ) + ## LOGGING + logging.post_call( + input=messages, api_key=api_key, original_response=response + ) + elif custom_llm_provider == "tars": + api_base = ( + api_base + or litellm.api_base + or get_secret_str("TARS_API_BASE") + or "https://api.router.tetrate.ai/v1" + ) + + api_key = ( + api_key + or litellm.api_key + or get_secret("TARS_API_KEY") + ) + + ## Load Config + config = litellm.TarsConfig.get_config() + for k, v in config.items(): + if k not in optional_params: + optional_params[k] = v + + ## COMPLETION CALL + response = base_llm_http_handler.completion( + model=model, + stream=stream, + messages=messages, + acompletion=acompletion, + api_base=api_base, + model_response=model_response, + optional_params=optional_params, + litellm_params=litellm_params, + shared_session=shared_session, + custom_llm_provider="tars", + timeout=timeout, + headers=headers, + encoding=encoding, + api_key=api_key, + logging_obj=logging, + client=client, + ) ## LOGGING logging.post_call( input=messages, api_key=api_key, original_response=response @@ -4494,6 +4537,27 @@ def embedding( # noqa: PLR0915 or "api.studio.nebius.ai/v1" ) + response = openai_chat_completions.embedding( + model=model, + input=input, + api_base=api_base, + api_key=api_key, + logging_obj=logging, + timeout=timeout, + model_response=EmbeddingResponse(), + optional_params=optional_params, + client=client, + aembedding=aembedding, + ) + elif custom_llm_provider == "tars": + api_key = api_key or litellm.api_key or get_secret("TARS_API_KEY") + api_base = ( + api_base + or litellm.api_base + or get_secret_str("TARS_API_BASE") + or "https://api.router.tetrate.ai/v1" + ) + response = openai_chat_completions.embedding( model=model, input=input, diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 4804ae1de742..cf0f93d3b0cc 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -23832,5 +23832,1894 @@ "supports_tool_choice": true, "supports_vision": true, "supports_web_search": true + }, + "tars/claude-haiku-4-5": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000 + }, + "tars/claude-haiku-4-5-20251001": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000 + }, + "tars/gpt-4o-2024-11-20": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 0.00000125, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384 + }, + "tars/deepinfra/deepseek-ai/DeepSeek-R1-0528-Turbo": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/claude-sonnet-4-5-20250929": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_read_input_token_cost": 3e-7, + "cache_write_input_token_cost": 0.00000375, + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000 + }, + "tars/claude-sonnet-4-5": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_read_input_token_cost": 3e-7, + "cache_write_input_token_cost": 0.00000375, + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000 + }, + "tars/claude-sonnet-4-20250514": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_read_input_token_cost": 3e-7, + "cache_write_input_token_cost": 0.00000375, + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000 + }, + "tars/claude-sonnet-4-0": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_read_input_token_cost": 3e-7, + "cache_write_input_token_cost": 0.00000375, + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000 + }, + "tars/claude-opus-4-20250514": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "cache_read_input_token_cost": 0.0000015, + "cache_write_input_token_cost": 0.00001875, + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000 + }, + "tars/claude-opus-4-1-20250805": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "cache_read_input_token_cost": 0.0000015, + "cache_write_input_token_cost": 0.00001875, + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000 + }, + "tars/claude-opus-4-1": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "cache_read_input_token_cost": 0.0000015, + "cache_write_input_token_cost": 0.00001875, + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000 + }, + "tars/claude-opus-4-0": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "cache_read_input_token_cost": 0.0000015, + "cache_write_input_token_cost": 0.00001875, + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000 + }, + "tars/claude-3-haiku-20240307": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.00000125, + "cache_read_input_token_cost": 3e-8, + "cache_write_input_token_cost": 3e-7, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/claude-3-7-sonnet-20250219": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_read_input_token_cost": 3e-7, + "cache_write_input_token_cost": 0.00000375, + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000 + }, + "tars/claude-3-5-haiku-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 8e-7, + "output_cost_per_token": 0.000004, + "cache_read_input_token_cost": 8e-8, + "cache_write_input_token_cost": 0.000001, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192 + }, + "tars/claude-3-5-haiku-20241022": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 8e-7, + "output_cost_per_token": 0.000004, + "cache_read_input_token_cost": 8e-8, + "cache_write_input_token_cost": 0.000001, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192 + }, + "tars/xai/grok-code-fast-1-0825": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 0.0000015, + "max_input_tokens": 256000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-code-fast-1": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 0.0000015, + "max_input_tokens": 256000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-code-fast": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 0.0000015, + "max_input_tokens": 256000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-4-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "max_input_tokens": 256000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-4-fast-reasoning-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 5e-7, + "cache_read_input_token_cost": 5e-8, + "cache_write_input_token_cost": 5e-8, + "max_input_tokens": 2000000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-4-fast-reasoning": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 5e-7, + "cache_read_input_token_cost": 5e-8, + "cache_write_input_token_cost": 5e-8, + "max_input_tokens": 2000000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-4-fast-non-reasoning-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 5e-7, + "max_input_tokens": 2000000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-4-fast-non-reasoning": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 5e-7, + "max_input_tokens": 2000000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-4-fast": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 5e-7, + "cache_read_input_token_cost": 5e-8, + "cache_write_input_token_cost": 5e-8, + "max_input_tokens": 2000000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-4-0709": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "max_input_tokens": 256000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-4": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "max_input_tokens": 256000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-mini-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-mini-fast-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-mini-fast-beta": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-mini-fast": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-mini-beta": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-mini": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-fast-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-fast-beta": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-fast": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-beta": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-2-vision-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.00001, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-2-vision-1212": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.00001, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-2-vision": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.00001, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/zai-org/GLM-4.6": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.000002, + "max_input_tokens": 202752, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/zai-org/GLM-4.5-Air": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 0.0000011, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/zai-org/GLM-4.5": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3.8e-7, + "output_cost_per_token": 0.0000016, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/thenlper/gte-large": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-8, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/thenlper/gte-base": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/shibing624/text2vec-base-chinese": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/sentence-transformers/paraphrase-MiniLM-L6-v2": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/sentence-transformers/multi-qa-mpnet-base-dot-v1": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/sentence-transformers/clip-ViT-B-32-multilingual-v1": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/sentence-transformers/clip-ViT-B-32": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 77, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/sentence-transformers/all-mpnet-base-v2": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/sentence-transformers/all-MiniLM-L6-v2": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/sentence-transformers/all-MiniLM-L12-v2": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Sao10K/L3.3-70B-Euryale-v2.3": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 6.5e-7, + "output_cost_per_token": 7.5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Sao10K/L3.1-70B-Euryale-v2.2": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 6.5e-7, + "output_cost_per_token": 7.5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Sao10K/L3-8B-Lunaris-v1-Turbo": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4e-8, + "output_cost_per_token": 5e-8, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/QwQ-32B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-Next-80B-A3B-Thinking": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.4e-7, + "output_cost_per_token": 0.0000014, + "max_input_tokens": 262144, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-Next-80B-A3B-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.4e-7, + "output_cost_per_token": 0.0000014, + "max_input_tokens": 262144, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-Embedding-8B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-8, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-Embedding-4B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-Embedding-0.6B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-9, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.9e-7, + "output_cost_per_token": 0.0000012, + "max_input_tokens": 262144, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.0000016, + "max_input_tokens": 262144, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-32B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 2.8e-7, + "max_input_tokens": 40960, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-30B-A3B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 8e-8, + "output_cost_per_token": 2.9e-7, + "max_input_tokens": 40960, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-235B-A22B-Thinking-2507": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000029, + "max_input_tokens": 262144, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-235B-A22B-Instruct-2507": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 9e-8, + "output_cost_per_token": 6e-7, + "max_input_tokens": 262144, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-235B-A22B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 5.4e-7, + "max_input_tokens": 40960, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-14B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 6e-8, + "output_cost_per_token": 2.4e-7, + "max_input_tokens": 40960, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen2.5-VL-32B-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 6e-7, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen2.5-72B-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 3.9e-7, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/openai/gpt-oss-20b": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-8, + "output_cost_per_token": 1.4e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/openai/gpt-oss-120b-Turbo": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/openai/gpt-oss-120b": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 4.5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/nvidia/NVIDIA-Nemotron-Nano-9B-v2": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4e-8, + "output_cost_per_token": 1.6e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/nvidia/Llama-3.1-Nemotron-70B-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/NousResearch/Hermes-3-Llama-3.1-70B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 3e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/NousResearch/Hermes-3-Llama-3.1-405B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/moonshotai/Kimi-K2-Instruct-0905": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.000002, + "cache_read_input_token_cost": 4e-7, + "cache_write_input_token_cost": 4e-7, + "max_input_tokens": 262144, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/moonshotai/Kimi-K2-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.000002, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/mistralai/Mixtral-8x7B-Instruct-v0.1": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/mistralai/Mistral-Small-3.2-24B-Instruct-2506": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 7.5e-8, + "output_cost_per_token": 2e-7, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/mistralai/Mistral-Small-24B-Instruct-2501": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 8e-8, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/microsoft/WizardLM-2-8x22B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4.8e-7, + "output_cost_per_token": 4.8e-7, + "max_input_tokens": 65536, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/microsoft/phi-4": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 7e-8, + "output_cost_per_token": 1.4e-7, + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-8, + "output_cost_per_token": 3e-8, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-8, + "output_cost_per_token": 5e-8, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Meta-Llama-3-8B-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-8, + "output_cost_per_token": 6e-8, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Llama-Guard-4-12B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 1.8e-7, + "max_input_tokens": 163840, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Llama-Guard-3-8B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5.5e-8, + "output_cost_per_token": 5.5e-8, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Llama-4-Scout-17B-16E-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 8e-8, + "output_cost_per_token": 3e-7, + "max_input_tokens": 327680, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "max_input_tokens": 1048576, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Llama-3.3-70B-Instruct-Turbo": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 3.9e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Llama-3.3-70B-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.3e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Llama-3.2-3B-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-8, + "output_cost_per_token": 2e-8, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Llama-3.2-11B-Vision-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4.9e-8, + "output_cost_per_token": 4.9e-8, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Gryphe/MythoMax-L2-13b": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 9e-8, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/google/gemma-3-4b-it": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4e-8, + "output_cost_per_token": 8e-8, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/google/gemma-3-27b-it": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 9e-8, + "output_cost_per_token": 1.6e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/google/gemma-3-12b-it": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 1e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/google/gemini-2.5-pro": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "max_input_tokens": 1000000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/google/gemini-2.5-flash": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000025, + "max_input_tokens": 1000000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/google/gemini-2.0-flash-001": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 1000000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/google/embeddinggemma-300m": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-9, + "max_input_tokens": 2048, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/deepseek-ai/DeepSeek-V3.2-Exp": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 163840, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/deepseek-ai/DeepSeek-V3.1-Terminus": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 0.000001, + "max_input_tokens": 163840, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/deepseek-ai/DeepSeek-V3.1": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 0.000001, + "max_input_tokens": 163840, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/deepseek-ai/DeepSeek-V3-0324": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 8.8e-7, + "max_input_tokens": 163840, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/deepseek-ai/DeepSeek-V3": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3.8e-7, + "output_cost_per_token": 8.9e-7, + "max_input_tokens": 163840, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/deepseek-ai/DeepSeek-R1-Turbo": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "max_input_tokens": 40960, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 6e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/deepseek-ai/DeepSeek-R1-0528": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.00000215, + "max_input_tokens": 163840, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/deepseek-ai/DeepSeek-R1": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 7e-7, + "output_cost_per_token": 0.0000024, + "max_input_tokens": 163840, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/BAAI/bge-m3-multi": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-8, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/BAAI/bge-m3": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-8, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/BAAI/bge-large-en-v1.5": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-8, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/BAAI/bge-en-icl": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-8, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/BAAI/bge-base-en-v1.5": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/anthropic/claude-4-sonnet": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000033, + "output_cost_per_token": 0.0000165, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/anthropic/claude-4-opus": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000165, + "output_cost_per_token": 0.0000825, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/anthropic/claude-3-7-sonnet-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000033, + "output_cost_per_token": 0.0000165, + "cache_write_input_token_cost": 3.3e-7, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/allenai/olmOCR-7B-0825": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 0.0000015, + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/groq/qwen/qwen3-32b": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.9e-7, + "output_cost_per_token": 5.9e-7, + "max_input_tokens": 131072, + "max_output_tokens": 40960, + "max_tokens": 40960 + }, + "tars/groq/openai/gpt-oss-20b": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 65536, + "max_tokens": 65536 + }, + "tars/groq/openai/gpt-oss-120b": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 7.5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 65536, + "max_tokens": 65536 + }, + "tars/groq/moonshotai/kimi-k2-instruct-0905": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "max_input_tokens": 262144, + "max_output_tokens": 16384, + "max_tokens": 16384 + }, + "tars/groq/meta-llama/llama-prompt-guard-2-86m": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4e-8, + "output_cost_per_token": 4e-8, + "max_input_tokens": 512, + "max_output_tokens": 512, + "max_tokens": 512 + }, + "tars/groq/meta-llama/llama-prompt-guard-2-22m": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-8, + "output_cost_per_token": 3e-8, + "max_input_tokens": 512, + "max_output_tokens": 512, + "max_tokens": 512 + }, + "tars/groq/meta-llama/llama-guard-4-12b": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "max_input_tokens": 131072, + "max_output_tokens": 1024, + "max_tokens": 1024 + }, + "tars/groq/meta-llama/llama-4-scout-17b-16e-instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.1e-7, + "output_cost_per_token": 3.4e-7, + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192 + }, + "tars/groq/meta-llama/llama-4-maverick-17b-128e-instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 6e-7, + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192 + }, + "tars/groq/llama-3.3-70b-versatile": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5.9e-7, + "output_cost_per_token": 7.9e-7, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768 + }, + "tars/groq/llama-3.1-8b-instant": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 8e-8, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072 + }, + "tars/gemini-2.0-flash": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "cache_read_input_token_cost": 2.5e-8, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192 + }, + "tars/deepinfra/intfloat/multilingual-e5-large-instruct": { + "litellm_provider": "tars", + "mode": "chat", + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/intfloat/multilingual-e5-large": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-8, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/intfloat/e5-large-v2": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-8, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/intfloat/e5-base-v2": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gemini-2.0-flash-001": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "cache_read_input_token_cost": 2.5e-8, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192 + }, + "tars/gemini-2.0-flash-exp": { + "litellm_provider": "tars", + "mode": "chat", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192 + }, + "tars/gemini-2.0-flash-lite": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 7.5e-8, + "output_cost_per_token": 3e-7, + "cache_read_input_token_cost": 1.9e-8, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192 + }, + "tars/gemini-2.0-flash-lite-preview-02-05": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 7.5e-8, + "output_cost_per_token": 3e-7, + "cache_read_input_token_cost": 1.9e-8, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192 + }, + "tars/gemini-2.0-flash-thinking-exp": { + "litellm_provider": "tars", + "mode": "chat", + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 65536 + }, + "tars/gemini-2.0-flash-thinking-exp-01-21": { + "litellm_provider": "tars", + "mode": "chat", + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 65536 + }, + "tars/gemini-2.5-flash": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000025, + "cache_read_input_token_cost": 7.5e-8, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535 + }, + "tars/gemini-2.5-flash-lite": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "cache_read_input_token_cost": 2.5e-8, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535 + }, + "tars/gemini-2.5-flash-lite-preview-06-17": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "cache_read_input_token_cost": 2.5e-8, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535 + }, + "tars/gemini-2.5-flash-preview-05-20": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000025, + "cache_read_input_token_cost": 7.5e-8, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535 + }, + "tars/gemini-2.5-pro": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 3.13e-7, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535 + }, + "tars/gemini-2.5-pro-preview-03-25": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 3.13e-7, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535 + }, + "tars/gemini-2.5-pro-preview-05-06": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 3.13e-7, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535 + }, + "tars/gemini-2.5-pro-preview-06-05": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 3.13e-7, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535 + }, + "tars/chatgpt-4o-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000015, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-3.5-turbo": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-3.5-turbo-0125": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.0000015, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-3.5-turbo-1106": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000002, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-3.5-turbo-16k": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000004, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-4": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-4-0125-preview": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-4-0613": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-4-1106-preview": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-4-turbo": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-4-turbo-2024-04-09": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-4-turbo-preview": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-4.1": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "cache_read_input_token_cost": 5e-7, + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768 + }, + "tars/gpt-4.1-2025-04-14": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "cache_read_input_token_cost": 5e-7, + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768 + }, + "tars/gpt-4.1-mini": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.0000016, + "cache_read_input_token_cost": 1e-7, + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768 + }, + "tars/gpt-4.1-mini-2025-04-14": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.0000016, + "cache_read_input_token_cost": 1e-7, + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768 + }, + "tars/gpt-4.1-nano": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "cache_read_input_token_cost": 2.5e-8, + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768 + }, + "tars/gpt-4.1-nano-2025-04-14": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "cache_read_input_token_cost": 2.5e-8, + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768 + }, + "tars/gpt-4o": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 0.00000125, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384 + }, + "tars/gpt-4o-2024-05-13": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000015, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-4o-2024-08-06": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 0.00000125, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384 + }, + "tars/gpt-4o-mini": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "cache_read_input_token_cost": 7.5e-8, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384 + }, + "tars/gpt-4o-mini-2024-07-18": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "cache_read_input_token_cost": 7.5e-8, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384 + }, + "tars/gpt-4o-mini-search-preview": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "cache_read_input_token_cost": 7.5e-8, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384 + }, + "tars/gpt-4o-mini-search-preview-2025-03-11": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "cache_read_input_token_cost": 7.5e-8, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384 + }, + "tars/gpt-4o-search-preview": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 0.00000125, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384 + }, + "tars/gpt-4o-search-preview-2025-03-11": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 0.00000125, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384 + }, + "tars/gpt-5": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 1.25e-7, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000 + }, + "tars/gpt-5-2025-08-07": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 1.25e-7, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000 + }, + "tars/gpt-5-chat-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 1.25e-7, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000 + }, + "tars/gpt-5-mini": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.000002, + "cache_read_input_token_cost": 2.5e-8, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000 + }, + "tars/gpt-5-mini-2025-08-07": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.000002, + "cache_read_input_token_cost": 2.5e-8, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000 + }, + "tars/gpt-5-nano": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 4e-7, + "cache_read_input_token_cost": 5e-9, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000 + }, + "tars/gpt-5-nano-2025-08-07": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 4e-7, + "cache_read_input_token_cost": 5e-9, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000 + }, + "tars/o1": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.00006, + "cache_read_input_token_cost": 0.0000075, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000 + }, + "tars/o1-2024-12-17": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.00006, + "cache_read_input_token_cost": 0.0000075, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000 + }, + "tars/o3": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "cache_read_input_token_cost": 5e-7, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000 + }, + "tars/o3-2025-04-16": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "cache_read_input_token_cost": 5e-7, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000 + }, + "tars/o3-mini": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 5.5e-7, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000 + }, + "tars/o4-mini": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 2.75e-7, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000 + }, + "tars/o4-mini-2025-04-16": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 2.75e-7, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000 + }, + "tars/text-embedding-3-large": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.3e-7, + "max_input_tokens": 8191, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/text-embedding-3-small": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-8, + "max_input_tokens": 8191, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/text-embedding-ada-002": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "max_input_tokens": 8191, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/claude-3-7-sonnet-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_read_input_token_cost": 3e-7, + "cache_write_input_token_cost": 0.00000375, + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000 + }, + "tars/deepinfra/mistralai/Mistral-Nemo-Instruct-2407": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-8, + "output_cost_per_token": 4e-8, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/o3-mini-2025-01-31": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 5.5e-7, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000 } } diff --git a/litellm/types/utils.py b/litellm/types/utils.py index c87bf75c8c68..46df7b019b61 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -2504,6 +2504,7 @@ class LlmProviders(str, Enum): COMPACTIFAI = "compactifai" CUSTOM = "custom" LITELLM_PROXY = "litellm_proxy" + TARS = "tars" HOSTED_VLLM = "hosted_vllm" LLAMAFILE = "llamafile" LM_STUDIO = "lm_studio" diff --git a/litellm/utils.py b/litellm/utils.py index f1e56954fe1a..01467fbf4b65 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -7103,6 +7103,8 @@ def get_provider_chat_config( # noqa: PLR0915 return litellm.NovitaConfig() elif litellm.LlmProviders.NEBIUS == provider: return litellm.NebiusConfig() + elif litellm.LlmProviders.TARS == provider: + return litellm.TarsConfig() elif litellm.LlmProviders.WANDB == provider: return litellm.WandbConfig() elif litellm.LlmProviders.DASHSCOPE == provider: @@ -7373,6 +7375,8 @@ def get_provider_model_info( return litellm.LemonadeChatConfig() elif LlmProviders.CLARIFAI == provider: return litellm.ClarifaiConfig() + elif LlmProviders.TARS == provider: + return litellm.TarsConfig() return None @staticmethod diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 4804ae1de742..cf0f93d3b0cc 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -23832,5 +23832,1894 @@ "supports_tool_choice": true, "supports_vision": true, "supports_web_search": true + }, + "tars/claude-haiku-4-5": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000 + }, + "tars/claude-haiku-4-5-20251001": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000 + }, + "tars/gpt-4o-2024-11-20": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 0.00000125, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384 + }, + "tars/deepinfra/deepseek-ai/DeepSeek-R1-0528-Turbo": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/claude-sonnet-4-5-20250929": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_read_input_token_cost": 3e-7, + "cache_write_input_token_cost": 0.00000375, + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000 + }, + "tars/claude-sonnet-4-5": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_read_input_token_cost": 3e-7, + "cache_write_input_token_cost": 0.00000375, + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000 + }, + "tars/claude-sonnet-4-20250514": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_read_input_token_cost": 3e-7, + "cache_write_input_token_cost": 0.00000375, + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000 + }, + "tars/claude-sonnet-4-0": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_read_input_token_cost": 3e-7, + "cache_write_input_token_cost": 0.00000375, + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000 + }, + "tars/claude-opus-4-20250514": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "cache_read_input_token_cost": 0.0000015, + "cache_write_input_token_cost": 0.00001875, + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000 + }, + "tars/claude-opus-4-1-20250805": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "cache_read_input_token_cost": 0.0000015, + "cache_write_input_token_cost": 0.00001875, + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000 + }, + "tars/claude-opus-4-1": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "cache_read_input_token_cost": 0.0000015, + "cache_write_input_token_cost": 0.00001875, + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000 + }, + "tars/claude-opus-4-0": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "cache_read_input_token_cost": 0.0000015, + "cache_write_input_token_cost": 0.00001875, + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000 + }, + "tars/claude-3-haiku-20240307": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.00000125, + "cache_read_input_token_cost": 3e-8, + "cache_write_input_token_cost": 3e-7, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/claude-3-7-sonnet-20250219": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_read_input_token_cost": 3e-7, + "cache_write_input_token_cost": 0.00000375, + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000 + }, + "tars/claude-3-5-haiku-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 8e-7, + "output_cost_per_token": 0.000004, + "cache_read_input_token_cost": 8e-8, + "cache_write_input_token_cost": 0.000001, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192 + }, + "tars/claude-3-5-haiku-20241022": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 8e-7, + "output_cost_per_token": 0.000004, + "cache_read_input_token_cost": 8e-8, + "cache_write_input_token_cost": 0.000001, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192 + }, + "tars/xai/grok-code-fast-1-0825": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 0.0000015, + "max_input_tokens": 256000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-code-fast-1": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 0.0000015, + "max_input_tokens": 256000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-code-fast": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 0.0000015, + "max_input_tokens": 256000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-4-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "max_input_tokens": 256000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-4-fast-reasoning-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 5e-7, + "cache_read_input_token_cost": 5e-8, + "cache_write_input_token_cost": 5e-8, + "max_input_tokens": 2000000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-4-fast-reasoning": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 5e-7, + "cache_read_input_token_cost": 5e-8, + "cache_write_input_token_cost": 5e-8, + "max_input_tokens": 2000000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-4-fast-non-reasoning-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 5e-7, + "max_input_tokens": 2000000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-4-fast-non-reasoning": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 5e-7, + "max_input_tokens": 2000000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-4-fast": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 5e-7, + "cache_read_input_token_cost": 5e-8, + "cache_write_input_token_cost": 5e-8, + "max_input_tokens": 2000000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-4-0709": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "max_input_tokens": 256000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-4": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "max_input_tokens": 256000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-mini-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-mini-fast-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-mini-fast-beta": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-mini-fast": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-mini-beta": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-mini": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-fast-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-fast-beta": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-fast": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3-beta": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-3": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-2-vision-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.00001, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-2-vision-1212": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.00001, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/xai/grok-2-vision": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.00001, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/zai-org/GLM-4.6": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.000002, + "max_input_tokens": 202752, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/zai-org/GLM-4.5-Air": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 0.0000011, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/zai-org/GLM-4.5": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3.8e-7, + "output_cost_per_token": 0.0000016, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/thenlper/gte-large": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-8, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/thenlper/gte-base": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/shibing624/text2vec-base-chinese": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/sentence-transformers/paraphrase-MiniLM-L6-v2": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/sentence-transformers/multi-qa-mpnet-base-dot-v1": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/sentence-transformers/clip-ViT-B-32-multilingual-v1": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/sentence-transformers/clip-ViT-B-32": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 77, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/sentence-transformers/all-mpnet-base-v2": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/sentence-transformers/all-MiniLM-L6-v2": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/sentence-transformers/all-MiniLM-L12-v2": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Sao10K/L3.3-70B-Euryale-v2.3": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 6.5e-7, + "output_cost_per_token": 7.5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Sao10K/L3.1-70B-Euryale-v2.2": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 6.5e-7, + "output_cost_per_token": 7.5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Sao10K/L3-8B-Lunaris-v1-Turbo": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4e-8, + "output_cost_per_token": 5e-8, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/QwQ-32B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-Next-80B-A3B-Thinking": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.4e-7, + "output_cost_per_token": 0.0000014, + "max_input_tokens": 262144, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-Next-80B-A3B-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.4e-7, + "output_cost_per_token": 0.0000014, + "max_input_tokens": 262144, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-Embedding-8B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-8, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-Embedding-4B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-Embedding-0.6B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-9, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.9e-7, + "output_cost_per_token": 0.0000012, + "max_input_tokens": 262144, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.0000016, + "max_input_tokens": 262144, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-32B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 2.8e-7, + "max_input_tokens": 40960, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-30B-A3B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 8e-8, + "output_cost_per_token": 2.9e-7, + "max_input_tokens": 40960, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-235B-A22B-Thinking-2507": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000029, + "max_input_tokens": 262144, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-235B-A22B-Instruct-2507": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 9e-8, + "output_cost_per_token": 6e-7, + "max_input_tokens": 262144, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-235B-A22B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 5.4e-7, + "max_input_tokens": 40960, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen3-14B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 6e-8, + "output_cost_per_token": 2.4e-7, + "max_input_tokens": 40960, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen2.5-VL-32B-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 6e-7, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Qwen/Qwen2.5-72B-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 3.9e-7, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/openai/gpt-oss-20b": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-8, + "output_cost_per_token": 1.4e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/openai/gpt-oss-120b-Turbo": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/openai/gpt-oss-120b": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 4.5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/nvidia/NVIDIA-Nemotron-Nano-9B-v2": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4e-8, + "output_cost_per_token": 1.6e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/nvidia/Llama-3.1-Nemotron-70B-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/NousResearch/Hermes-3-Llama-3.1-70B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 3e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/NousResearch/Hermes-3-Llama-3.1-405B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/moonshotai/Kimi-K2-Instruct-0905": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.000002, + "cache_read_input_token_cost": 4e-7, + "cache_write_input_token_cost": 4e-7, + "max_input_tokens": 262144, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/moonshotai/Kimi-K2-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.000002, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/mistralai/Mixtral-8x7B-Instruct-v0.1": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/mistralai/Mistral-Small-3.2-24B-Instruct-2506": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 7.5e-8, + "output_cost_per_token": 2e-7, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/mistralai/Mistral-Small-24B-Instruct-2501": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 8e-8, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/microsoft/WizardLM-2-8x22B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4.8e-7, + "output_cost_per_token": 4.8e-7, + "max_input_tokens": 65536, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/microsoft/phi-4": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 7e-8, + "output_cost_per_token": 1.4e-7, + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-8, + "output_cost_per_token": 3e-8, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-8, + "output_cost_per_token": 5e-8, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Meta-Llama-3-8B-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-8, + "output_cost_per_token": 6e-8, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Llama-Guard-4-12B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 1.8e-7, + "max_input_tokens": 163840, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Llama-Guard-3-8B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5.5e-8, + "output_cost_per_token": 5.5e-8, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Llama-4-Scout-17B-16E-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 8e-8, + "output_cost_per_token": 3e-7, + "max_input_tokens": 327680, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "max_input_tokens": 1048576, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Llama-3.3-70B-Instruct-Turbo": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 3.9e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Llama-3.3-70B-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.3e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Llama-3.2-3B-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-8, + "output_cost_per_token": 2e-8, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/meta-llama/Llama-3.2-11B-Vision-Instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4.9e-8, + "output_cost_per_token": 4.9e-8, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/Gryphe/MythoMax-L2-13b": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 9e-8, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/google/gemma-3-4b-it": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4e-8, + "output_cost_per_token": 8e-8, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/google/gemma-3-27b-it": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 9e-8, + "output_cost_per_token": 1.6e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/google/gemma-3-12b-it": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 1e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/google/gemini-2.5-pro": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "max_input_tokens": 1000000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/google/gemini-2.5-flash": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000025, + "max_input_tokens": 1000000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/google/gemini-2.0-flash-001": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 1000000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/google/embeddinggemma-300m": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-9, + "max_input_tokens": 2048, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/deepseek-ai/DeepSeek-V3.2-Exp": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 163840, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/deepseek-ai/DeepSeek-V3.1-Terminus": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 0.000001, + "max_input_tokens": 163840, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/deepseek-ai/DeepSeek-V3.1": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 0.000001, + "max_input_tokens": 163840, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/deepseek-ai/DeepSeek-V3-0324": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 8.8e-7, + "max_input_tokens": 163840, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/deepseek-ai/DeepSeek-V3": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3.8e-7, + "output_cost_per_token": 8.9e-7, + "max_input_tokens": 163840, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/deepseek-ai/DeepSeek-R1-Turbo": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "max_input_tokens": 40960, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 6e-7, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/deepseek-ai/DeepSeek-R1-0528": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.00000215, + "max_input_tokens": 163840, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/deepseek-ai/DeepSeek-R1": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 7e-7, + "output_cost_per_token": 0.0000024, + "max_input_tokens": 163840, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/BAAI/bge-m3-multi": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-8, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/BAAI/bge-m3": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-8, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/BAAI/bge-large-en-v1.5": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-8, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/BAAI/bge-en-icl": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-8, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/BAAI/bge-base-en-v1.5": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/anthropic/claude-4-sonnet": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000033, + "output_cost_per_token": 0.0000165, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/anthropic/claude-4-opus": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000165, + "output_cost_per_token": 0.0000825, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/anthropic/claude-3-7-sonnet-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000033, + "output_cost_per_token": 0.0000165, + "cache_write_input_token_cost": 3.3e-7, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/allenai/olmOCR-7B-0825": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 0.0000015, + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/groq/qwen/qwen3-32b": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.9e-7, + "output_cost_per_token": 5.9e-7, + "max_input_tokens": 131072, + "max_output_tokens": 40960, + "max_tokens": 40960 + }, + "tars/groq/openai/gpt-oss-20b": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 65536, + "max_tokens": 65536 + }, + "tars/groq/openai/gpt-oss-120b": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 7.5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 65536, + "max_tokens": 65536 + }, + "tars/groq/moonshotai/kimi-k2-instruct-0905": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "max_input_tokens": 262144, + "max_output_tokens": 16384, + "max_tokens": 16384 + }, + "tars/groq/meta-llama/llama-prompt-guard-2-86m": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4e-8, + "output_cost_per_token": 4e-8, + "max_input_tokens": 512, + "max_output_tokens": 512, + "max_tokens": 512 + }, + "tars/groq/meta-llama/llama-prompt-guard-2-22m": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-8, + "output_cost_per_token": 3e-8, + "max_input_tokens": 512, + "max_output_tokens": 512, + "max_tokens": 512 + }, + "tars/groq/meta-llama/llama-guard-4-12b": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "max_input_tokens": 131072, + "max_output_tokens": 1024, + "max_tokens": 1024 + }, + "tars/groq/meta-llama/llama-4-scout-17b-16e-instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.1e-7, + "output_cost_per_token": 3.4e-7, + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192 + }, + "tars/groq/meta-llama/llama-4-maverick-17b-128e-instruct": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 6e-7, + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192 + }, + "tars/groq/llama-3.3-70b-versatile": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5.9e-7, + "output_cost_per_token": 7.9e-7, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768 + }, + "tars/groq/llama-3.1-8b-instant": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 8e-8, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072 + }, + "tars/gemini-2.0-flash": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "cache_read_input_token_cost": 2.5e-8, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192 + }, + "tars/deepinfra/intfloat/multilingual-e5-large-instruct": { + "litellm_provider": "tars", + "mode": "chat", + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/intfloat/multilingual-e5-large": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-8, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/intfloat/e5-large-v2": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-8, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/deepinfra/intfloat/e5-base-v2": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-9, + "max_input_tokens": 512, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gemini-2.0-flash-001": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "cache_read_input_token_cost": 2.5e-8, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192 + }, + "tars/gemini-2.0-flash-exp": { + "litellm_provider": "tars", + "mode": "chat", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192 + }, + "tars/gemini-2.0-flash-lite": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 7.5e-8, + "output_cost_per_token": 3e-7, + "cache_read_input_token_cost": 1.9e-8, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192 + }, + "tars/gemini-2.0-flash-lite-preview-02-05": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 7.5e-8, + "output_cost_per_token": 3e-7, + "cache_read_input_token_cost": 1.9e-8, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192 + }, + "tars/gemini-2.0-flash-thinking-exp": { + "litellm_provider": "tars", + "mode": "chat", + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 65536 + }, + "tars/gemini-2.0-flash-thinking-exp-01-21": { + "litellm_provider": "tars", + "mode": "chat", + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 65536 + }, + "tars/gemini-2.5-flash": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000025, + "cache_read_input_token_cost": 7.5e-8, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535 + }, + "tars/gemini-2.5-flash-lite": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "cache_read_input_token_cost": 2.5e-8, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535 + }, + "tars/gemini-2.5-flash-lite-preview-06-17": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "cache_read_input_token_cost": 2.5e-8, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535 + }, + "tars/gemini-2.5-flash-preview-05-20": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000025, + "cache_read_input_token_cost": 7.5e-8, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535 + }, + "tars/gemini-2.5-pro": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 3.13e-7, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535 + }, + "tars/gemini-2.5-pro-preview-03-25": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 3.13e-7, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535 + }, + "tars/gemini-2.5-pro-preview-05-06": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 3.13e-7, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535 + }, + "tars/gemini-2.5-pro-preview-06-05": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 3.13e-7, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535 + }, + "tars/chatgpt-4o-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000015, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-3.5-turbo": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-3.5-turbo-0125": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.0000015, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-3.5-turbo-1106": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000002, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-3.5-turbo-16k": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000004, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-4": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-4-0125-preview": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-4-0613": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-4-1106-preview": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-4-turbo": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-4-turbo-2024-04-09": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-4-turbo-preview": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-4.1": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "cache_read_input_token_cost": 5e-7, + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768 + }, + "tars/gpt-4.1-2025-04-14": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "cache_read_input_token_cost": 5e-7, + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768 + }, + "tars/gpt-4.1-mini": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.0000016, + "cache_read_input_token_cost": 1e-7, + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768 + }, + "tars/gpt-4.1-mini-2025-04-14": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.0000016, + "cache_read_input_token_cost": 1e-7, + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768 + }, + "tars/gpt-4.1-nano": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "cache_read_input_token_cost": 2.5e-8, + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768 + }, + "tars/gpt-4.1-nano-2025-04-14": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "cache_read_input_token_cost": 2.5e-8, + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768 + }, + "tars/gpt-4o": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 0.00000125, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384 + }, + "tars/gpt-4o-2024-05-13": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000015, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/gpt-4o-2024-08-06": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 0.00000125, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384 + }, + "tars/gpt-4o-mini": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "cache_read_input_token_cost": 7.5e-8, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384 + }, + "tars/gpt-4o-mini-2024-07-18": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "cache_read_input_token_cost": 7.5e-8, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384 + }, + "tars/gpt-4o-mini-search-preview": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "cache_read_input_token_cost": 7.5e-8, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384 + }, + "tars/gpt-4o-mini-search-preview-2025-03-11": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "cache_read_input_token_cost": 7.5e-8, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384 + }, + "tars/gpt-4o-search-preview": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 0.00000125, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384 + }, + "tars/gpt-4o-search-preview-2025-03-11": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 0.00000125, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384 + }, + "tars/gpt-5": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 1.25e-7, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000 + }, + "tars/gpt-5-2025-08-07": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 1.25e-7, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000 + }, + "tars/gpt-5-chat-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 1.25e-7, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000 + }, + "tars/gpt-5-mini": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.000002, + "cache_read_input_token_cost": 2.5e-8, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000 + }, + "tars/gpt-5-mini-2025-08-07": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.000002, + "cache_read_input_token_cost": 2.5e-8, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000 + }, + "tars/gpt-5-nano": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 4e-7, + "cache_read_input_token_cost": 5e-9, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000 + }, + "tars/gpt-5-nano-2025-08-07": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 4e-7, + "cache_read_input_token_cost": 5e-9, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000 + }, + "tars/o1": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.00006, + "cache_read_input_token_cost": 0.0000075, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000 + }, + "tars/o1-2024-12-17": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.00006, + "cache_read_input_token_cost": 0.0000075, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000 + }, + "tars/o3": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "cache_read_input_token_cost": 5e-7, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000 + }, + "tars/o3-2025-04-16": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "cache_read_input_token_cost": 5e-7, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000 + }, + "tars/o3-mini": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 5.5e-7, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000 + }, + "tars/o4-mini": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 2.75e-7, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000 + }, + "tars/o4-mini-2025-04-16": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 2.75e-7, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000 + }, + "tars/text-embedding-3-large": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1.3e-7, + "max_input_tokens": 8191, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/text-embedding-3-small": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-8, + "max_input_tokens": 8191, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/text-embedding-ada-002": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 1e-7, + "max_input_tokens": 8191, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/claude-3-7-sonnet-latest": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_read_input_token_cost": 3e-7, + "cache_write_input_token_cost": 0.00000375, + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000 + }, + "tars/deepinfra/mistralai/Mistral-Nemo-Instruct-2407": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 2e-8, + "output_cost_per_token": 4e-8, + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "tars/o3-mini-2025-01-31": { + "litellm_provider": "tars", + "mode": "chat", + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 5.5e-7, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000 } } diff --git a/tests/test_litellm/llms/tars/test_tars.py b/tests/test_litellm/llms/tars/test_tars.py new file mode 100644 index 000000000000..590b43fbe8cb --- /dev/null +++ b/tests/test_litellm/llms/tars/test_tars.py @@ -0,0 +1,394 @@ +import json +import os +import sys + +import pytest + +sys.path.insert( + 0, os.path.abspath("../../../../..") +) # Adds the parent directory to the system path. +from unittest.mock import MagicMock, patch + +from litellm.llms.tars.chat.transformation import TarsConfig +from litellm.llms.tars.common_utils import TarsException, TarsModelInfo +from litellm.llms.tars.embedding.transformation import TarsEmbeddingConfig +from litellm.llms.tars.cost_calculator import cost_per_token +from litellm.types.utils import ModelResponse, Usage +import httpx + + +def test_tars_config_initialization(): + """Test that TarsConfig can be initialized with various parameters.""" + config = TarsConfig( + temperature=0.7, + max_tokens=100, + top_p=0.9, + ) + + assert config.temperature == 0.7 + assert config.max_tokens == 100 + assert config.top_p == 0.9 + + +def test_tars_get_api_base(): + """Test the get_api_base method returns correct default.""" + api_base = TarsConfig.get_api_base(api_base=None) + assert api_base == "https://api.router.tetrate.ai/v1" + + +def test_tars_get_api_base_custom(): + """Test the get_api_base method with custom API base.""" + custom_api_base = "https://custom.tars.ai/v1" + api_base = TarsConfig.get_api_base(api_base=custom_api_base) + assert api_base == custom_api_base + + +def test_tars_get_api_key(): + """Test the get_api_key method.""" + test_key = "test-tars-key" + api_key = TarsConfig.get_api_key(api_key=test_key) + assert api_key == test_key + + +def test_tars_get_complete_url(): + """Test the get_complete_url method generates correct endpoint URL.""" + config = TarsConfig() + + url = config.get_complete_url( + api_base="https://api.router.tetrate.ai/v1", + api_key="test-key", + model="claude-sonnet-4-20250514", + optional_params={}, + litellm_params={}, + stream=False + ) + + assert url == "https://api.router.tetrate.ai/v1/chat/completions" + + +def test_tars_get_complete_url_no_duplicate(): + """Test that get_complete_url doesn't duplicate endpoint in URL.""" + config = TarsConfig() + + url = config.get_complete_url( + api_base="https://api.router.tetrate.ai/v1/chat/completions", + api_key="test-key", + model="gpt-4o", + optional_params={}, + litellm_params={}, + stream=False + ) + + assert url == "https://api.router.tetrate.ai/v1/chat/completions" + assert url.count("chat/completions") == 1 + + +def test_tars_exception(): + """Test TarsException can be instantiated properly.""" + exception = TarsException( + message="Test error", + status_code=400, + headers={"content-type": "application/json"} + ) + + assert exception.status_code == 400 + assert exception.message == "Test error" + + +def test_tars_model_info_get_base_model(): + """Test get_base_model removes tars/ prefix.""" + model_info = TarsModelInfo() + + base_model = model_info.get_base_model("tars/claude-sonnet-4-20250514") + assert base_model == "claude-sonnet-4-20250514" + + base_model = model_info.get_base_model("tars/gpt-4o") + assert base_model == "gpt-4o" + + +def test_tars_model_info_validate_environment(): + """Test validate_environment adds proper authentication headers.""" + model_info = TarsModelInfo() + + headers = {} + test_api_key = "test-tars-api-key" + + result_headers = model_info.validate_environment( + headers=headers, + model="tars/gpt-4o", + messages=[{"role": "user", "content": "Hello"}], + optional_params={}, + litellm_params={}, + api_key=test_api_key, + api_base="https://api.router.tetrate.ai/v1" + ) + + assert "Authorization" in result_headers + assert result_headers["Authorization"] == f"Bearer {test_api_key}" + + +def test_tars_model_info_validate_environment_no_key(): + """Test validate_environment raises error when no API key provided.""" + model_info = TarsModelInfo() + + headers = {} + + with pytest.raises(ValueError, match="TARS_API_KEY is not set"): + model_info.validate_environment( + headers=headers, + model="tars/gpt-4o", + messages=[{"role": "user", "content": "Hello"}], + optional_params={}, + litellm_params={}, + api_key=None, + api_base="https://api.router.tetrate.ai/v1" + ) + + +@patch('httpx.Client') +def test_tars_get_models_success(mock_client_class): + """Test get_models fetches and formats models correctly.""" + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.json.return_value = { + "data": [ + {"id": "claude-sonnet-4-20250514", "object": "model"}, + {"id": "gpt-4o", "object": "model"}, + {"id": "gpt-4o-mini", "object": "model"}, + ] + } + mock_client.get.return_value = mock_response + mock_client.__enter__.return_value = mock_client + mock_client.__exit__.return_value = False + mock_client_class.return_value = mock_client + + model_info = TarsModelInfo() + models = model_info.get_models(api_key="test-key", api_base="https://api.router.tetrate.ai/v1") + + assert len(models) == 3 + assert "tars/claude-sonnet-4-20250514" in models + assert "tars/gpt-4o" in models + assert "tars/gpt-4o-mini" in models + assert models == sorted(models) # Verify models are sorted. + + +@patch('httpx.Client') +def test_tars_get_models_no_api_key(mock_client_class): + """Test get_models raises error when no API key provided.""" + model_info = TarsModelInfo() + + with pytest.raises(ValueError, match="TARS_API_KEY is not set"): + model_info.get_models(api_key=None, api_base="https://api.router.tetrate.ai/v1") + + +@patch('httpx.Client') +def test_tars_get_models_http_error(mock_client_class): + """Test get_models handles HTTP errors gracefully.""" + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 401 + mock_response.text = "Unauthorized" + + http_error = httpx.HTTPStatusError( + "Unauthorized", + request=MagicMock(), + response=mock_response + ) + mock_client.get.side_effect = http_error + mock_client.__enter__.return_value = mock_client + mock_client.__exit__.return_value = False + mock_client_class.return_value = mock_client + + model_info = TarsModelInfo() + + with pytest.raises(ValueError, match="Failed to fetch models from TARS"): + model_info.get_models(api_key="invalid-key", api_base="https://api.router.tetrate.ai/v1") + + +def test_tars_embedding_config(): + """Test TarsEmbeddingConfig initialization.""" + config = TarsEmbeddingConfig() + + assert config is not None + + +def test_tars_embedding_get_error_class(): + """Test TarsEmbeddingConfig returns correct error class.""" + config = TarsEmbeddingConfig() + + error = config.get_error_class( + error_message="Test error", + status_code=500, + headers={"content-type": "application/json"} + ) + + assert isinstance(error, TarsException) + assert error.status_code == 500 + + +def test_tars_config_get_error_class(): + """Test TarsConfig returns correct error class.""" + config = TarsConfig() + + error = config.get_error_class( + error_message="Test error", + status_code=429, + headers={"content-type": "application/json"} + ) + + assert isinstance(error, TarsException) + assert error.status_code == 429 + + +def test_config_get_config(): + """Test that get_config method returns the configuration.""" + config_dict = TarsConfig.get_config() + assert isinstance(config_dict, dict) + + +def test_response_format_support(): + """Test that response_format parameter is supported.""" + response_format = { + "type": "json_object" + } + + config = TarsConfig(response_format=response_format) + assert config.response_format == response_format + + +def test_tools_support(): + """Test that tools parameter is supported.""" + tools = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get weather information" + } + } + ] + + config = TarsConfig(tools=tools) + assert config.tools == tools + + +def test_functions_support(): + """Test that functions parameter is supported.""" + functions = [ + { + "name": "get_weather", + "description": "Get weather information", + "parameters": { + "type": "object", + "properties": {} + } + } + ] + + config = TarsConfig(functions=functions) + assert config.functions == functions + + +def test_stop_parameter_support(): + """Test that stop parameter supports both string and list.""" + # Test with string. + config1 = TarsConfig(stop="STOP") + assert config1.stop == "STOP" + + # Test with list. + config2 = TarsConfig(stop=["STOP", "END"]) + assert config2.stop == ["STOP", "END"] + + +def test_logit_bias_support(): + """Test that logit_bias parameter is supported.""" + logit_bias = {"50256": -100} + + config = TarsConfig(logit_bias=logit_bias) + assert config.logit_bias == logit_bias + + +def test_presence_penalty_support(): + """Test that presence_penalty parameter is supported.""" + config = TarsConfig(presence_penalty=0.5) + assert config.presence_penalty == 0.5 + + +def test_n_parameter_support(): + """Test that n parameter (number of completions) is supported.""" + config = TarsConfig(n=3) + assert config.n == 3 + + +def test_max_completion_tokens_support(): + """Test that max_completion_tokens parameter is supported.""" + config = TarsConfig(max_completion_tokens=150) + assert config.max_completion_tokens == 150 + + +def test_tars_config_inherits_openai(): + """Test that TarsConfig properly inherits from OpenAIGPTConfig.""" + from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig + + config = TarsConfig() + assert isinstance(config, OpenAIGPTConfig) + + +def test_tars_embedding_config_inherits_openai(): + """Test that TarsEmbeddingConfig properly inherits from OpenAIEmbeddingConfig.""" + from litellm.llms.openai.embedding.transformation import OpenAIEmbeddingConfig + + config = TarsEmbeddingConfig() + assert isinstance(config, OpenAIEmbeddingConfig) + + +def test_tars_cost_calculator_with_5_percent_margin(): + """Test that TARS cost calculator adds 5% margin to base costs.""" + usage = Usage( + prompt_tokens=1000, + completion_tokens=500, + total_tokens=1500 + ) + + # Test with a known model that has pricing. + prompt_cost, completion_cost = cost_per_token(model="gpt-4o", usage=usage) + + # Verify that costs are calculated (non-zero). + assert prompt_cost > 0 + assert completion_cost > 0 + + # The costs should include the 5% margin. + # We can't test exact values without knowing the base prices, but we can verify they're positive. + + +def test_tars_cost_calculator_no_pricing_fallback(): + """Test that TARS cost calculator returns (0.0, 0.0) when no pricing is available.""" + usage = Usage( + prompt_tokens=1000, + completion_tokens=500, + total_tokens=1500 + ) + + # Test with a model that doesn't exist in the pricing catalog. + prompt_cost, completion_cost = cost_per_token(model="nonexistent-model-12345", usage=usage) + + # Should return (0.0, 0.0) as fallback. + assert prompt_cost == 0.0 + assert completion_cost == 0.0 + + +def test_tars_cost_calculator_with_zero_tokens(): + """Test that TARS cost calculator handles zero tokens correctly.""" + usage = Usage( + prompt_tokens=0, + completion_tokens=0, + total_tokens=0 + ) + + # Test with a known model. + prompt_cost, completion_cost = cost_per_token(model="gpt-4o", usage=usage) + + # Should return 0.0 for both when no tokens are used. + assert prompt_cost == 0.0 + assert completion_cost == 0.0 + diff --git a/ui/litellm-dashboard/src/components/add_model/provider_specific_fields.tsx b/ui/litellm-dashboard/src/components/add_model/provider_specific_fields.tsx index 2d11569ab956..bf55319c2741 100644 --- a/ui/litellm-dashboard/src/components/add_model/provider_specific_fields.tsx +++ b/ui/litellm-dashboard/src/components/add_model/provider_specific_fields.tsx @@ -460,6 +460,22 @@ const PROVIDER_CREDENTIAL_FIELDS: Record = required: true, }, ], + [Providers.TARS]: [ + { + key: "api_base", + label: "API Base", + placeholder: "https://api.router.tetrate.ai/v1", + defaultValue: "https://api.router.tetrate.ai/v1", + tooltip: "The base URL for TARS API. Default: https://api.router.tetrate.ai/v1", + }, + { + key: "api_key", + label: "TARS API Key", + type: "password", + required: true, + tooltip: "Get your API key from https://router.tetrate.ai", + }, + ], [Providers.FireworksAI]: [ { key: "api_key", diff --git a/ui/litellm-dashboard/src/components/provider_info_helpers.tsx b/ui/litellm-dashboard/src/components/provider_info_helpers.tsx index fb09b9fa82f8..1f1433f48a66 100644 --- a/ui/litellm-dashboard/src/components/provider_info_helpers.tsx +++ b/ui/litellm-dashboard/src/components/provider_info_helpers.tsx @@ -30,6 +30,7 @@ export enum Providers { Openrouter = "Openrouter", Oracle = "Oracle Cloud Infrastructure (OCI)", Perplexity = "Perplexity", + TARS = "TARS (Tetrate Agent Router Service)", Sambanova = "Sambanova", Snowflake = "Snowflake", TogetherAI = "TogetherAI", @@ -66,6 +67,7 @@ export const provider_map: Record = { Perplexity: "perplexity", TogetherAI: "together_ai", Openrouter: "openrouter", + TARS: "tars", Oracle: "oci", Snowflake: "snowflake", FireworksAI: "fireworks_ai", @@ -124,6 +126,7 @@ export const providerLogoMap: Record = { [Providers.JinaAI]: `${asset_logos_folder}jina.png`, [Providers.VolcEngine]: `${asset_logos_folder}volcengine.png`, [Providers.DeepInfra]: `${asset_logos_folder}deepinfra.png`, + [Providers.TARS]: `${asset_logos_folder}openai_small.svg`, }; export const getProviderLogoAndName = (providerValue: string): { logo: string; displayName: string } => {