diff --git a/providers/infinity/logo.svg b/providers/infinity/logo.svg new file mode 100644 index 000000000..be7ced745 --- /dev/null +++ b/providers/infinity/logo.svg @@ -0,0 +1,3 @@ + + + diff --git a/providers/infinity/models/QuantTrio/DeepSeek-V3.2-AWQ.toml b/providers/infinity/models/QuantTrio/DeepSeek-V3.2-AWQ.toml new file mode 100644 index 000000000..65c50b3a5 --- /dev/null +++ b/providers/infinity/models/QuantTrio/DeepSeek-V3.2-AWQ.toml @@ -0,0 +1,23 @@ +# DeepSeek-V3.2-AWQ - High-performance MoE model +name = "DeepSeek-V3.2-AWQ" +description = "A next-generation open language model that balances strong reasoning, efficient long-context processing and everyday agent tasks with high performance and practical inference efficiency" +family = "deepseek" +release_date = "2025-10-01" +last_updated = "2026-01-15" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.28 +output = 0.42 + +[limit] +context = 32_768 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/infinity/models/QuantTrio/GLM-4.7-AWQ.toml b/providers/infinity/models/QuantTrio/GLM-4.7-AWQ.toml new file mode 100644 index 000000000..5f2dcb55e --- /dev/null +++ b/providers/infinity/models/QuantTrio/GLM-4.7-AWQ.toml @@ -0,0 +1,28 @@ +# GLM-4.7-AWQ - 358B parameter reasoning model +name = "GLM-4.7-AWQ" +description = "GLM 4.7 with AWQ quantization (358B params)" +family = "glm" +release_date = "2025-12-22" +last_updated = "2026-01-15" +knowledge = "2025-04" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[interleaved] +field = "reasoning_content" + +[cost] +input = 0.40 +output = 1.50 +cache_read = 0.20 + +[limit] +context = 65_536 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/infinity/models/nvidia/Kimi-K2-Thinking-NVFP4.toml b/providers/infinity/models/nvidia/Kimi-K2-Thinking-NVFP4.toml new file mode 100644 index 000000000..c9251d940 --- /dev/null +++ b/providers/infinity/models/nvidia/Kimi-K2-Thinking-NVFP4.toml @@ -0,0 +1,26 @@ +# Kimi-K2-Thinking-NVFP4 - Moonshot's reasoning model in NVFP4 format +name = "Kimi-K2-Thinking-NVFP4" +description = "Built for long documents and deep analysis, Kimi K2 excels at reading, summarization, and multi-step reasoning across massive contexts." +family = "kimi-thinking" +release_date = "2025-11-06" +last_updated = "2026-01-15" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[interleaved] +field = "reasoning_content" + +[cost] +input = 0.40 +output = 1.75 + +[limit] +context = 32_768 +output = 32_768 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/infinity/models/openai/gpt-oss-120b.toml b/providers/infinity/models/openai/gpt-oss-120b.toml new file mode 100644 index 000000000..66b5fdd71 --- /dev/null +++ b/providers/infinity/models/openai/gpt-oss-120b.toml @@ -0,0 +1,23 @@ +# GPT-OSS-120B - OpenAI's open-source reasoning model +name = "GPT-OSS-120B" +description = "A large-scale open model designed for high-quality general intelligence. Ideal for advanced reasoning, rich text generation, and enterprise-grade applications." +family = "gpt-oss" +release_date = "2025-08-05" +last_updated = "2026-01-15" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.04 +output = 0.19 + +[limit] +context = 32_768 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/infinity/provider.toml b/providers/infinity/provider.toml new file mode 100644 index 000000000..154602a7c --- /dev/null +++ b/providers/infinity/provider.toml @@ -0,0 +1,6 @@ +name = "Infinity" +env = ["INFINITY_API_KEY"] +npm = "@ai-sdk/openai-compatible" +api = "https://api.infinity.inc/v1" +doc = "https://infinity.inc/docs" +