From dcb7f513ee99bc31c79000f76bd466a7ff0bb3b7 Mon Sep 17 00:00:00 2001 From: Shawn Craver Date: Mon, 15 Dec 2025 10:38:47 -0500 Subject: [PATCH 1/4] feat: add Vultr serverless inference provider support - Add Vultr API configuration with 5 supported models - Register vultrProvider in provider registry - Supports OpenAI-compatible endpoint for Kimi K2, Llama 3.1, Mistral, DeepSeek, and Qwen models - Uses correct API endpoint: https://api.vultrinference.com/ - Kimi K2 set as default large model (256k context window) --- internal/providers/configs/vultr.json | 53 +++++++++++++++++++++++++++ internal/providers/providers.go | 8 ++++ 2 files changed, 61 insertions(+) create mode 100644 internal/providers/configs/vultr.json diff --git a/internal/providers/configs/vultr.json b/internal/providers/configs/vultr.json new file mode 100644 index 00000000..02b33293 --- /dev/null +++ b/internal/providers/configs/vultr.json @@ -0,0 +1,53 @@ +{ + "name": "Vultr", + "id": "vultr", + "type": "openai-compat", + "api_key": "$VULTR_API_KEY", + "api_endpoint": "https://api.vultrinference.com/", + "default_large_model_id": "kimi-k2-instruct", + "default_small_model_id": "qwen2.5-32b-instruct", + "models": [ + { + "id": "kimi-k2-instruct", + "name": "Kimi K2 Instruct", + "cost_per_1m_in": 1, + "cost_per_1m_out": 3, + "context_window": 262144, + "default_max_tokens": 8192, + "can_reason": true + }, + { + "id": "llama-3.1-70b-instruct-fp8", + "name": "Llama 3.1 70B Instruct FP8", + "cost_per_1m_in": 0.35, + "cost_per_1m_out": 0.35, + "context_window": 131072, + "default_max_tokens": 4096 + }, + { + "id": "mistral-7b-v0.3", + "name": "Mistral 7B v0.3", + "cost_per_1m_in": 0.05, + "cost_per_1m_out": 0.05, + "context_window": 32768, + "default_max_tokens": 4096 + }, + { + "id": "deepseek-r1", + "name": "DeepSeek R1", + "cost_per_1m_in": 0.55, + "cost_per_1m_out": 2.19, + "context_window": 128000, + "default_max_tokens": 4096, + "can_reason": true + }, + { + "id": "qwen2.5-32b-instruct", + "name": "Qwen 2.5 32B Instruct", + "cost_per_1m_in": 0.14, + "cost_per_1m_out": 0.42, + "context_window": 32768, + "default_max_tokens": 4096 + } + ] +} diff --git a/internal/providers/providers.go b/internal/providers/providers.go index c24a9768..5202961e 100644 --- a/internal/providers/providers.go +++ b/internal/providers/providers.go @@ -63,6 +63,9 @@ var aiHubMixConfig []byte //go:embed configs/kimi.json var kimiCodingConfig []byte +//go:embed configs/vultr.json +var vultrConfig []byte + // ProviderFunc is a function that returns a Provider. type ProviderFunc func() catwalk.Provider @@ -84,6 +87,7 @@ var providerRegistry = []ProviderFunc{ deepSeekProvider, huggingFaceProvider, aiHubMixProvider, + vultrProvider, syntheticProvider, } @@ -176,3 +180,7 @@ func aiHubMixProvider() catwalk.Provider { func kimiCodingProvider() catwalk.Provider { return loadProviderFromConfig(kimiCodingConfig) } + +func vultrProvider() catwalk.Provider { + return loadProviderFromConfig(vultrConfig) +} From c75fee00e3e0dd895fd946a649559150b8a2d854 Mon Sep 17 00:00:00 2001 From: Charm <124303983+charmcli@users.noreply.github.com> Date: Tue, 16 Dec 2025 02:41:05 +0000 Subject: [PATCH 2/4] chore: auto-update generated files --- internal/providers/configs/openrouter.json | 186 ++++++++++++--------- 1 file changed, 109 insertions(+), 77 deletions(-) diff --git a/internal/providers/configs/openrouter.json b/internal/providers/configs/openrouter.json index 99a7e933..110d9a93 100644 --- a/internal/providers/configs/openrouter.json +++ b/internal/providers/configs/openrouter.json @@ -193,8 +193,8 @@ "name": "Anthropic: Claude 3.5 Sonnet", "cost_per_1m_in": 6, "cost_per_1m_out": 30, - "cost_per_1m_in_cached": 7.5, - "cost_per_1m_out_cached": 0.6, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, "context_window": 200000, "default_max_tokens": 4096, "can_reason": false, @@ -554,12 +554,12 @@ { "id": "deepseek/deepseek-v3.1-terminus", "name": "DeepSeek: DeepSeek V3.1 Terminus", - "cost_per_1m_in": 0.22999999999999998, - "cost_per_1m_out": 0.8999999999999999, + "cost_per_1m_in": 0.21, + "cost_per_1m_out": 0.7899999999999999, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0, + "cost_per_1m_out_cached": 0.16799999999999998, "context_window": 163840, - "default_max_tokens": 81920, + "default_max_tokens": 16384, "can_reason": true, "reasoning_levels": [ "low", @@ -573,11 +573,11 @@ { "id": "deepseek/deepseek-v3.1-terminus:exacto", "name": "DeepSeek: DeepSeek V3.1 Terminus (exacto)", - "cost_per_1m_in": 0.3, - "cost_per_1m_out": 1.2, + "cost_per_1m_in": 0.21, + "cost_per_1m_out": 0.7899999999999999, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0, - "context_window": 131072, + "cost_per_1m_out_cached": 0.16799999999999998, + "context_window": 163840, "default_max_tokens": 16384, "can_reason": true, "reasoning_levels": [ @@ -592,12 +592,12 @@ { "id": "deepseek/deepseek-v3.2", "name": "DeepSeek: DeepSeek V3.2", - "cost_per_1m_in": 0.24, + "cost_per_1m_in": 0.26, "cost_per_1m_out": 0.38, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0.19, + "cost_per_1m_out_cached": 0, "context_window": 163840, - "default_max_tokens": 81920, + "default_max_tokens": 32768, "can_reason": true, "reasoning_levels": [ "low", @@ -611,8 +611,8 @@ { "id": "deepseek/deepseek-v3.2-exp", "name": "DeepSeek: DeepSeek V3.2 Exp", - "cost_per_1m_in": 0.28, - "cost_per_1m_out": 0.39999999999999997, + "cost_per_1m_in": 0.21, + "cost_per_1m_out": 0.32, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 163840, @@ -649,12 +649,12 @@ { "id": "deepseek/deepseek-r1-0528", "name": "DeepSeek: R1 0528", - "cost_per_1m_in": 0.39999999999999997, - "cost_per_1m_out": 1.75, + "cost_per_1m_in": 0.56, + "cost_per_1m_out": 2, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0, + "cost_per_1m_out_cached": 0.28, "context_window": 163840, - "default_max_tokens": 81920, + "default_max_tokens": 16384, "can_reason": true, "reasoning_levels": [ "low", @@ -769,7 +769,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 1048576, - "default_max_tokens": 32768, + "default_max_tokens": 32767, "can_reason": true, "reasoning_levels": [ "low", @@ -788,7 +788,7 @@ "cost_per_1m_in_cached": 0.3833, "cost_per_1m_out_cached": 0.075, "context_window": 1048576, - "default_max_tokens": 32767, + "default_max_tokens": 32768, "can_reason": true, "reasoning_levels": [ "low", @@ -943,12 +943,12 @@ { "id": "meta-llama/llama-3.1-70b-instruct", "name": "Meta: Llama 3.1 70B Instruct", - "cost_per_1m_in": 0.88, - "cost_per_1m_out": 0.88, + "cost_per_1m_in": 0.39999999999999997, + "cost_per_1m_out": 0.39999999999999997, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 131072, - "default_max_tokens": 13107, + "default_max_tokens": 8192, "can_reason": false, "supports_attachments": false, "options": {} @@ -956,12 +956,12 @@ { "id": "meta-llama/llama-3.1-8b-instruct", "name": "Meta: Llama 3.1 8B Instruct", - "cost_per_1m_in": 0.049999999999999996, - "cost_per_1m_out": 0.08, + "cost_per_1m_in": 0.03, + "cost_per_1m_out": 0.049999999999999996, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 131072, - "default_max_tokens": 65536, + "default_max_tokens": 8192, "can_reason": false, "supports_attachments": false, "options": {} @@ -982,12 +982,12 @@ { "id": "meta-llama/llama-3.3-70b-instruct", "name": "Meta: Llama 3.3 70B Instruct", - "cost_per_1m_in": 0.25, - "cost_per_1m_out": 0.75, + "cost_per_1m_in": 0.09999999999999999, + "cost_per_1m_out": 0.32, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 131072, - "default_max_tokens": 13107, + "default_max_tokens": 8192, "can_reason": false, "supports_attachments": false, "options": {} @@ -1417,12 +1417,12 @@ { "id": "mistralai/mixtral-8x7b-instruct", "name": "Mistral: Mixtral 8x7B Instruct", - "cost_per_1m_in": 0.6, - "cost_per_1m_out": 0.6, + "cost_per_1m_in": 0.54, + "cost_per_1m_out": 0.54, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 32768, - "default_max_tokens": 1024, + "default_max_tokens": 8192, "can_reason": false, "supports_attachments": false, "options": {} @@ -1482,12 +1482,12 @@ { "id": "moonshotai/kimi-k2-0905", "name": "MoonshotAI: Kimi K2 0905", - "cost_per_1m_in": 0.6, - "cost_per_1m_out": 2.5, + "cost_per_1m_in": 0.59, + "cost_per_1m_out": 2.99, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 262144, - "default_max_tokens": 26214, + "default_max_tokens": 131072, "can_reason": false, "supports_attachments": false, "options": {} @@ -1556,6 +1556,25 @@ "supports_attachments": false, "options": {} }, + { + "id": "nvidia/nemotron-3-nano-30b-a3b:free", + "name": "NVIDIA: Nemotron 3 Nano 30B A3B (free)", + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 256000, + "default_max_tokens": 25600, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false, + "options": {} + }, { "id": "nvidia/nemotron-nano-12b-v2-vl:free", "name": "NVIDIA: Nemotron Nano 12B 2 VL (free)", @@ -1711,7 +1730,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0.5, "context_window": 1047576, - "default_max_tokens": 16384, + "default_max_tokens": 104757, "can_reason": false, "supports_attachments": true, "options": {} @@ -1735,9 +1754,9 @@ "cost_per_1m_in": 0.09999999999999999, "cost_per_1m_out": 0.39999999999999997, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0.024999999999999998, + "cost_per_1m_out_cached": 0.03, "context_window": 1047576, - "default_max_tokens": 16384, + "default_max_tokens": 104757, "can_reason": false, "supports_attachments": true, "options": {} @@ -1928,9 +1947,9 @@ "cost_per_1m_in": 0.25, "cost_per_1m_out": 2, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0.024999999999999998, + "cost_per_1m_out_cached": 0.03, "context_window": 400000, - "default_max_tokens": 64000, + "default_max_tokens": 40000, "can_reason": true, "reasoning_levels": [ "low", @@ -1947,9 +1966,9 @@ "cost_per_1m_in": 0.049999999999999996, "cost_per_1m_out": 0.39999999999999997, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0.005, + "cost_per_1m_out_cached": 0.01, "context_window": 400000, - "default_max_tokens": 64000, + "default_max_tokens": 40000, "can_reason": true, "reasoning_levels": [ "low", @@ -2122,8 +2141,8 @@ { "id": "openai/gpt-oss-120b", "name": "OpenAI: gpt-oss-120b", - "cost_per_1m_in": 0.15, - "cost_per_1m_out": 0.6, + "cost_per_1m_in": 0.09999999999999999, + "cost_per_1m_out": 0.49, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 131072, @@ -2179,8 +2198,8 @@ { "id": "openai/gpt-oss-20b", "name": "OpenAI: gpt-oss-20b", - "cost_per_1m_in": 0.049999999999999996, - "cost_per_1m_out": 0.19999999999999998, + "cost_per_1m_in": 0.04, + "cost_per_1m_out": 0.15, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 131072, @@ -2203,7 +2222,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 131072, - "default_max_tokens": 65536, + "default_max_tokens": 13107, "can_reason": true, "reasoning_levels": [ "low", @@ -2408,12 +2427,12 @@ { "id": "qwen/qwen-2.5-72b-instruct", "name": "Qwen2.5 72B Instruct", - "cost_per_1m_in": 0.12, - "cost_per_1m_out": 0.39, + "cost_per_1m_in": 0.07, + "cost_per_1m_out": 0.26, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 32768, - "default_max_tokens": 8192, + "default_max_tokens": 16384, "can_reason": false, "supports_attachments": false, "options": {} @@ -2505,8 +2524,8 @@ { "id": "qwen/qwen3-14b", "name": "Qwen: Qwen3 14B", - "cost_per_1m_in": 0.08, - "cost_per_1m_out": 0.24, + "cost_per_1m_in": 0.049999999999999996, + "cost_per_1m_out": 0.22, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 40960, @@ -2562,12 +2581,12 @@ { "id": "qwen/qwen3-235b-a22b-2507", "name": "Qwen: Qwen3 235B A22B Instruct 2507", - "cost_per_1m_in": 0.09999999999999999, - "cost_per_1m_out": 0.09999999999999999, + "cost_per_1m_in": 0.19999999999999998, + "cost_per_1m_out": 0.6, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 262144, - "default_max_tokens": 131072, + "default_max_tokens": 26214, "can_reason": false, "supports_attachments": false, "options": {} @@ -2626,12 +2645,12 @@ { "id": "qwen/qwen3-32b", "name": "Qwen: Qwen3 32B", - "cost_per_1m_in": 0.39999999999999997, - "cost_per_1m_out": 0.7999999999999999, + "cost_per_1m_in": 0.15, + "cost_per_1m_out": 0.5, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 131072, - "default_max_tokens": 16384, + "default_max_tokens": 4000, "can_reason": true, "reasoning_levels": [ "low", @@ -2683,8 +2702,8 @@ { "id": "qwen/qwen3-coder-30b-a3b-instruct", "name": "Qwen: Qwen3 Coder 30B A3B Instruct", - "cost_per_1m_in": 0.06, - "cost_per_1m_out": 0.25, + "cost_per_1m_in": 0.07, + "cost_per_1m_out": 0.28, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 262144, @@ -2696,12 +2715,12 @@ { "id": "qwen/qwen3-coder", "name": "Qwen: Qwen3 Coder 480B A35B", - "cost_per_1m_in": 0.39999999999999997, - "cost_per_1m_out": 1.7999999999999998, + "cost_per_1m_in": 0.28, + "cost_per_1m_out": 1.2, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 262144, - "default_max_tokens": 131072, + "default_max_tokens": 26214, "can_reason": false, "supports_attachments": false, "options": {} @@ -2774,12 +2793,12 @@ { "id": "qwen/qwen3-next-80b-a3b-instruct", "name": "Qwen: Qwen3 Next 80B A3B Instruct", - "cost_per_1m_in": 0.14, - "cost_per_1m_out": 1.4, + "cost_per_1m_in": 0.09, + "cost_per_1m_out": 1.1, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 262144, - "default_max_tokens": 131072, + "default_max_tokens": 26214, "can_reason": false, "supports_attachments": false, "options": {} @@ -2787,8 +2806,8 @@ { "id": "qwen/qwen3-next-80b-a3b-thinking", "name": "Qwen: Qwen3 Next 80B A3B Thinking", - "cost_per_1m_in": 0.3, - "cost_per_1m_out": 0.3, + "cost_per_1m_in": 0.15, + "cost_per_1m_out": 1.2, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 262144, @@ -2803,6 +2822,19 @@ "supports_attachments": false, "options": {} }, + { + "id": "qwen/qwen3-vl-235b-a22b-instruct", + "name": "Qwen: Qwen3 VL 235B A22B Instruct", + "cost_per_1m_in": 0.39999999999999997, + "cost_per_1m_out": 1.5999999999999999, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 16384, + "can_reason": false, + "supports_attachments": true, + "options": {} + }, { "id": "qwen/qwen3-vl-30b-a3b-instruct", "name": "Qwen: Qwen3 VL 30B A3B Instruct", @@ -3153,9 +3185,9 @@ "cost_per_1m_in": 0.3, "cost_per_1m_out": 0.8999999999999999, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0, + "cost_per_1m_out_cached": 0.049999999999999996, "context_window": 131072, - "default_max_tokens": 13107, + "default_max_tokens": 12000, "can_reason": true, "reasoning_levels": [ "low", @@ -3169,10 +3201,10 @@ { "id": "x-ai/grok-3", "name": "xAI: Grok 3", - "cost_per_1m_in": 3, - "cost_per_1m_out": 15, + "cost_per_1m_in": 5, + "cost_per_1m_out": 25, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0.75, + "cost_per_1m_out_cached": 1.25, "context_window": 131072, "default_max_tokens": 13107, "can_reason": false, @@ -3182,10 +3214,10 @@ { "id": "x-ai/grok-3-beta", "name": "xAI: Grok 3 Beta", - "cost_per_1m_in": 3, - "cost_per_1m_out": 15, + "cost_per_1m_in": 5, + "cost_per_1m_out": 25, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0.75, + "cost_per_1m_out_cached": 1.25, "context_window": 131072, "default_max_tokens": 13107, "can_reason": false, From 79fde0f57c5c29b783e0579db53ec1dbbcc17be2 Mon Sep 17 00:00:00 2001 From: Kujtim Hoxha Date: Tue, 16 Dec 2025 14:17:33 +0100 Subject: [PATCH 3/4] chore: update deepseek models --- internal/providers/configs/deepseek.json | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/internal/providers/configs/deepseek.json b/internal/providers/configs/deepseek.json index bb54cc2a..7a572b4d 100644 --- a/internal/providers/configs/deepseek.json +++ b/internal/providers/configs/deepseek.json @@ -9,11 +9,11 @@ "models": [ { "id": "deepseek-chat", - "name": "DeepSeek-V3.1 (Non-thinking Mode)", - "cost_per_1m_in": 0.56, - "cost_per_1m_out": 1.68, - "cost_per_1m_in_cached": 0.07, - "cost_per_1m_out_cached": 1.68, + "name": "DeepSeek-V3.2 (Non-thinking Mode)", + "cost_per_1m_in": 0.28, + "cost_per_1m_out": 0.42, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0.028, "context_window": 128000, "default_max_tokens": 4000, "can_reason": false, @@ -21,11 +21,11 @@ }, { "id": "deepseek-reasoner", - "name": "DeepSeek-V3.1 (Thinking Mode)", - "cost_per_1m_in": 0.56, - "cost_per_1m_out": 1.68, - "cost_per_1m_in_cached": 0.07, - "cost_per_1m_out_cached": 1.68, + "name": "DeepSeek-V3.2 (Thinking Mode)", + "cost_per_1m_in": 0.28, + "cost_per_1m_out": 0.42, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0.028, "context_window": 128000, "default_max_tokens": 32000, "can_reason": true, From 3cc5d532457eed079d3e6c76a2d353918ed3581c Mon Sep 17 00:00:00 2001 From: Charm <124303983+charmcli@users.noreply.github.com> Date: Wed, 17 Dec 2025 02:39:29 +0000 Subject: [PATCH 4/4] chore: auto-update generated files --- internal/providers/configs/openrouter.json | 152 +++++++++++++-------- 1 file changed, 92 insertions(+), 60 deletions(-) diff --git a/internal/providers/configs/openrouter.json b/internal/providers/configs/openrouter.json index 110d9a93..e86218e2 100644 --- a/internal/providers/configs/openrouter.json +++ b/internal/providers/configs/openrouter.json @@ -193,8 +193,8 @@ "name": "Anthropic: Claude 3.5 Sonnet", "cost_per_1m_in": 6, "cost_per_1m_out": 30, - "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0, + "cost_per_1m_in_cached": 7.5, + "cost_per_1m_out_cached": 0.6, "context_window": 200000, "default_max_tokens": 4096, "can_reason": false, @@ -209,7 +209,7 @@ "cost_per_1m_in_cached": 3.75, "cost_per_1m_out_cached": 0.3, "context_window": 200000, - "default_max_tokens": 32000, + "default_max_tokens": 64000, "can_reason": true, "reasoning_levels": [ "low", @@ -592,12 +592,12 @@ { "id": "deepseek/deepseek-v3.2", "name": "DeepSeek: DeepSeek V3.2", - "cost_per_1m_in": 0.26, + "cost_per_1m_in": 0.24, "cost_per_1m_out": 0.38, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0, + "cost_per_1m_out_cached": 0.19, "context_window": 163840, - "default_max_tokens": 32768, + "default_max_tokens": 81920, "can_reason": true, "reasoning_levels": [ "low", @@ -614,9 +614,9 @@ "cost_per_1m_in": 0.21, "cost_per_1m_out": 0.32, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0, + "cost_per_1m_out_cached": 0.16799999999999998, "context_window": 163840, - "default_max_tokens": 32768, + "default_max_tokens": 16384, "can_reason": true, "reasoning_levels": [ "low", @@ -649,12 +649,12 @@ { "id": "deepseek/deepseek-r1-0528", "name": "DeepSeek: R1 0528", - "cost_per_1m_in": 0.56, - "cost_per_1m_out": 2, + "cost_per_1m_in": 0.39999999999999997, + "cost_per_1m_out": 1.75, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0.28, + "cost_per_1m_out_cached": 0, "context_window": 163840, - "default_max_tokens": 16384, + "default_max_tokens": 81920, "can_reason": true, "reasoning_levels": [ "low", @@ -788,7 +788,7 @@ "cost_per_1m_in_cached": 0.3833, "cost_per_1m_out_cached": 0.075, "context_window": 1048576, - "default_max_tokens": 32768, + "default_max_tokens": 32767, "can_reason": true, "reasoning_levels": [ "low", @@ -943,12 +943,12 @@ { "id": "meta-llama/llama-3.1-70b-instruct", "name": "Meta: Llama 3.1 70B Instruct", - "cost_per_1m_in": 0.39999999999999997, - "cost_per_1m_out": 0.39999999999999997, + "cost_per_1m_in": 0.88, + "cost_per_1m_out": 0.88, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 131072, - "default_max_tokens": 8192, + "default_max_tokens": 13107, "can_reason": false, "supports_attachments": false, "options": {} @@ -956,12 +956,12 @@ { "id": "meta-llama/llama-3.1-8b-instruct", "name": "Meta: Llama 3.1 8B Instruct", - "cost_per_1m_in": 0.03, - "cost_per_1m_out": 0.049999999999999996, + "cost_per_1m_in": 0.02, + "cost_per_1m_out": 0.06, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 131072, - "default_max_tokens": 8192, + "default_max_tokens": 13107, "can_reason": false, "supports_attachments": false, "options": {} @@ -1008,12 +1008,12 @@ { "id": "meta-llama/llama-4-maverick", "name": "Meta: Llama 4 Maverick", - "cost_per_1m_in": 0.27, - "cost_per_1m_out": 0.85, + "cost_per_1m_in": 0.35, + "cost_per_1m_out": 1.15, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, - "context_window": 1048576, - "default_max_tokens": 104857, + "context_window": 524288, + "default_max_tokens": 4096, "can_reason": false, "supports_attachments": true, "options": {} @@ -1401,6 +1401,19 @@ "supports_attachments": true, "options": {} }, + { + "id": "mistralai/mistral-small-creative", + "name": "Mistral: Mistral Small Creative", + "cost_per_1m_in": 0.09999999999999999, + "cost_per_1m_out": 0.3, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 32768, + "default_max_tokens": 3276, + "can_reason": false, + "supports_attachments": false, + "options": {} + }, { "id": "mistralai/mixtral-8x22b-instruct", "name": "Mistral: Mixtral 8x22B Instruct", @@ -1469,10 +1482,10 @@ { "id": "moonshotai/kimi-k2", "name": "MoonshotAI: Kimi K2 0711", - "cost_per_1m_in": 0.5, - "cost_per_1m_out": 2.4, + "cost_per_1m_in": 0.6, + "cost_per_1m_out": 2.5, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0, + "cost_per_1m_out_cached": 0.15, "context_window": 131072, "default_max_tokens": 13107, "can_reason": false, @@ -2141,8 +2154,8 @@ { "id": "openai/gpt-oss-120b", "name": "OpenAI: gpt-oss-120b", - "cost_per_1m_in": 0.09999999999999999, - "cost_per_1m_out": 0.49, + "cost_per_1m_in": 0.039, + "cost_per_1m_out": 0.19, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 131072, @@ -2198,8 +2211,8 @@ { "id": "openai/gpt-oss-20b", "name": "OpenAI: gpt-oss-20b", - "cost_per_1m_in": 0.04, - "cost_per_1m_out": 0.15, + "cost_per_1m_in": 0.03, + "cost_per_1m_out": 0.14, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 131072, @@ -2222,7 +2235,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 131072, - "default_max_tokens": 13107, + "default_max_tokens": 65536, "can_reason": true, "reasoning_levels": [ "low", @@ -2581,12 +2594,12 @@ { "id": "qwen/qwen3-235b-a22b-2507", "name": "Qwen: Qwen3 235B A22B Instruct 2507", - "cost_per_1m_in": 0.19999999999999998, - "cost_per_1m_out": 0.6, + "cost_per_1m_in": 0.08, + "cost_per_1m_out": 0.55, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 262144, - "default_max_tokens": 26214, + "default_max_tokens": 131072, "can_reason": false, "supports_attachments": false, "options": {} @@ -2594,12 +2607,12 @@ { "id": "qwen/qwen3-235b-a22b-thinking-2507", "name": "Qwen: Qwen3 235B A22B Thinking 2507", - "cost_per_1m_in": 0.11, - "cost_per_1m_out": 0.6, + "cost_per_1m_in": 0.65, + "cost_per_1m_out": 3, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 262144, - "default_max_tokens": 131072, + "default_max_tokens": 26214, "can_reason": true, "reasoning_levels": [ "low", @@ -2613,8 +2626,8 @@ { "id": "qwen/qwen3-30b-a3b", "name": "Qwen: Qwen3 30B A3B", - "cost_per_1m_in": 0.09, - "cost_per_1m_out": 0.44999999999999996, + "cost_per_1m_in": 0.08, + "cost_per_1m_out": 0.28, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 131072, @@ -2703,11 +2716,11 @@ "id": "qwen/qwen3-coder-30b-a3b-instruct", "name": "Qwen: Qwen3 Coder 30B A3B Instruct", "cost_per_1m_in": 0.07, - "cost_per_1m_out": 0.28, + "cost_per_1m_out": 0.27, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, - "context_window": 262144, - "default_max_tokens": 131072, + "context_window": 160000, + "default_max_tokens": 16384, "can_reason": false, "supports_attachments": false, "options": {} @@ -2793,12 +2806,12 @@ { "id": "qwen/qwen3-next-80b-a3b-instruct", "name": "Qwen: Qwen3 Next 80B A3B Instruct", - "cost_per_1m_in": 0.09, - "cost_per_1m_out": 1.1, + "cost_per_1m_in": 0.09999999999999999, + "cost_per_1m_out": 0.7999999999999999, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 262144, - "default_max_tokens": 26214, + "default_max_tokens": 131072, "can_reason": false, "supports_attachments": false, "options": {} @@ -2806,8 +2819,8 @@ { "id": "qwen/qwen3-next-80b-a3b-thinking", "name": "Qwen: Qwen3 Next 80B A3B Thinking", - "cost_per_1m_in": 0.15, - "cost_per_1m_out": 1.2, + "cost_per_1m_in": 0.3, + "cost_per_1m_out": 0.3, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 262144, @@ -2825,12 +2838,12 @@ { "id": "qwen/qwen3-vl-235b-a22b-instruct", "name": "Qwen: Qwen3 VL 235B A22B Instruct", - "cost_per_1m_in": 0.39999999999999997, - "cost_per_1m_out": 1.5999999999999999, + "cost_per_1m_in": 0.3, + "cost_per_1m_out": 1.2, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, - "context_window": 131072, - "default_max_tokens": 16384, + "context_window": 262144, + "default_max_tokens": 131072, "can_reason": false, "supports_attachments": true, "options": {} @@ -3018,7 +3031,7 @@ "id": "alibaba/tongyi-deepresearch-30b-a3b", "name": "Tongyi DeepResearch 30B A3B", "cost_per_1m_in": 0.09, - "cost_per_1m_out": 0.44999999999999996, + "cost_per_1m_out": 0.39999999999999997, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 131072, @@ -3052,6 +3065,25 @@ "supports_attachments": false, "options": {} }, + { + "id": "xiaomi/mimo-v2-flash:free", + "name": "Xiaomi: MiMo-V2-Flash (free)", + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 131072, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false, + "options": {} + }, { "id": "z-ai/glm-4-32b", "name": "Z.AI: GLM 4 32B ", @@ -3185,9 +3217,9 @@ "cost_per_1m_in": 0.3, "cost_per_1m_out": 0.8999999999999999, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0.049999999999999996, + "cost_per_1m_out_cached": 0.055, "context_window": 131072, - "default_max_tokens": 12000, + "default_max_tokens": 16384, "can_reason": true, "reasoning_levels": [ "low", @@ -3227,10 +3259,10 @@ { "id": "x-ai/grok-3-mini", "name": "xAI: Grok 3 Mini", - "cost_per_1m_in": 0.3, - "cost_per_1m_out": 0.5, + "cost_per_1m_in": 0.6, + "cost_per_1m_out": 4, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0.075, + "cost_per_1m_out_cached": 0.15, "context_window": 131072, "default_max_tokens": 13107, "can_reason": true, @@ -3246,10 +3278,10 @@ { "id": "x-ai/grok-3-mini-beta", "name": "xAI: Grok 3 Mini Beta", - "cost_per_1m_in": 0.3, - "cost_per_1m_out": 0.5, + "cost_per_1m_in": 0.6, + "cost_per_1m_out": 4, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0.075, + "cost_per_1m_out_cached": 0.15, "context_window": 131072, "default_max_tokens": 13107, "can_reason": true,