From 5f1028f8e4fdf28b839f3b3b6d0b893f713df835 Mon Sep 17 00:00:00 2001 From: "Matthew (BlueT) Lien" Date: Tue, 27 Jan 2026 06:12:23 +0800 Subject: [PATCH 1/2] feat(models): add google-vertex-openai provider for partner models Add support for Vertex AI partner models that use OpenAI-compatible endpoint: - GLM-4.7: Advanced multilingual MoE model with interleaved thinking - Llama 4 Maverick: Multimodal model with 1M context - Llama 3.3 70B: Text-only instruction model - Qwen3 235B: Large-scale model with hybrid thinking - DeepSeek V3.1: Advanced reasoning model These models use the OpenAI-compatible Chat Completions API at: /v1/projects/{project}/locations/{location}/endpoints/openapi/chat/completions Provider uses @ai-sdk/openai-compatible with Google OAuth injection. --- .../models/deepseek-v3.1-maas.toml | 22 ++++++++++++++++ .../models/glm-4.7-maas.toml | 25 +++++++++++++++++++ .../models/llama-3.3-70b-instruct-maas.toml | 22 ++++++++++++++++ ...ama-4-maverick-17b-128e-instruct-maas.toml | 22 ++++++++++++++++ .../qwen3-235b-a22b-instruct-2507-maas.toml | 22 ++++++++++++++++ providers/google-vertex-openai/provider.toml | 4 +++ 6 files changed, 117 insertions(+) create mode 100644 providers/google-vertex-openai/models/deepseek-v3.1-maas.toml create mode 100644 providers/google-vertex-openai/models/glm-4.7-maas.toml create mode 100644 providers/google-vertex-openai/models/llama-3.3-70b-instruct-maas.toml create mode 100644 providers/google-vertex-openai/models/llama-4-maverick-17b-128e-instruct-maas.toml create mode 100644 providers/google-vertex-openai/models/qwen3-235b-a22b-instruct-2507-maas.toml create mode 100644 providers/google-vertex-openai/provider.toml diff --git a/providers/google-vertex-openai/models/deepseek-v3.1-maas.toml b/providers/google-vertex-openai/models/deepseek-v3.1-maas.toml new file mode 100644 index 000000000..22b1b3b28 --- /dev/null +++ b/providers/google-vertex-openai/models/deepseek-v3.1-maas.toml @@ -0,0 +1,22 @@ +name = "DeepSeek V3.1" +family = "deepseek" +release_date = "2025-06-01" +last_updated = "2025-06-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-05" +open_weights = true + +[cost] +input = 0.27 +output = 1.10 + +[limit] +context = 131072 +output = 32768 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/google-vertex-openai/models/glm-4.7-maas.toml b/providers/google-vertex-openai/models/glm-4.7-maas.toml new file mode 100644 index 000000000..f0dcd2c15 --- /dev/null +++ b/providers/google-vertex-openai/models/glm-4.7-maas.toml @@ -0,0 +1,25 @@ +name = "GLM-4.7" +family = "glm" +release_date = "2025-12-22" +last_updated = "2025-12-22" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-04" +open_weights = true + +[interleaved] +field = "reasoning_content" + +[cost] +input = 0.6 +output = 2.2 + +[limit] +context = 204800 +output = 131072 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/google-vertex-openai/models/llama-3.3-70b-instruct-maas.toml b/providers/google-vertex-openai/models/llama-3.3-70b-instruct-maas.toml new file mode 100644 index 000000000..984220fb1 --- /dev/null +++ b/providers/google-vertex-openai/models/llama-3.3-70b-instruct-maas.toml @@ -0,0 +1,22 @@ +name = "Llama 3.3 70B Instruct" +family = "llama" +release_date = "2024-12-06" +last_updated = "2024-12-06" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-12" +open_weights = true + +[cost] +input = 0.20 +output = 0.20 + +[limit] +context = 131072 +output = 32768 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/google-vertex-openai/models/llama-4-maverick-17b-128e-instruct-maas.toml b/providers/google-vertex-openai/models/llama-4-maverick-17b-128e-instruct-maas.toml new file mode 100644 index 000000000..8d3242a26 --- /dev/null +++ b/providers/google-vertex-openai/models/llama-4-maverick-17b-128e-instruct-maas.toml @@ -0,0 +1,22 @@ +name = "Llama 4 Maverick 17B-128E" +family = "llama" +release_date = "2025-04-05" +last_updated = "2025-04-05" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-03" +open_weights = true + +[cost] +input = 0.18 +output = 0.59 + +[limit] +context = 1048576 +output = 131072 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/google-vertex-openai/models/qwen3-235b-a22b-instruct-2507-maas.toml b/providers/google-vertex-openai/models/qwen3-235b-a22b-instruct-2507-maas.toml new file mode 100644 index 000000000..ad27a64c1 --- /dev/null +++ b/providers/google-vertex-openai/models/qwen3-235b-a22b-instruct-2507-maas.toml @@ -0,0 +1,22 @@ +name = "Qwen3 235B Instruct" +family = "qwen" +release_date = "2025-07-25" +last_updated = "2025-07-25" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-06" +open_weights = true + +[cost] +input = 0.30 +output = 1.20 + +[limit] +context = 131072 +output = 32768 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/google-vertex-openai/provider.toml b/providers/google-vertex-openai/provider.toml new file mode 100644 index 000000000..fef5aab69 --- /dev/null +++ b/providers/google-vertex-openai/provider.toml @@ -0,0 +1,4 @@ +name = "Vertex (OpenAI-Compatible)" +env = ["GOOGLE_CLOUD_PROJECT", "GOOGLE_VERTEX_PROJECT", "GCP_PROJECT", "GCLOUD_PROJECT", "GOOGLE_CLOUD_LOCATION", "VERTEX_LOCATION", "GOOGLE_APPLICATION_CREDENTIALS"] +npm = "@ai-sdk/openai-compatible" +doc = "https://cloud.google.com/vertex-ai/generative-ai/docs/maas/call-open-model-apis" From adeba4ee50bb4600e9a3f317524074100a213c88 Mon Sep 17 00:00:00 2001 From: "Matthew (BlueT) Lien" Date: Tue, 27 Jan 2026 06:12:24 +0800 Subject: [PATCH 2/2] fix(models): remove non-working models from google-vertex provider Remove GLM and GPT-OSS models from google-vertex provider because @ai-sdk/google-vertex SDK only supports Gemini models. These models have been moved to the new google-vertex-openai provider which uses the correct OpenAI-compatible endpoint. --- .../models/openai/gpt-oss-120b-maas.toml | 21 ---------------- .../models/openai/gpt-oss-20b-maas.toml | 21 ---------------- .../models/zai-org/glm-4.7-maas.toml | 25 ------------------- 3 files changed, 67 deletions(-) delete mode 100644 providers/google-vertex/models/openai/gpt-oss-120b-maas.toml delete mode 100644 providers/google-vertex/models/openai/gpt-oss-20b-maas.toml delete mode 100644 providers/google-vertex/models/zai-org/glm-4.7-maas.toml diff --git a/providers/google-vertex/models/openai/gpt-oss-120b-maas.toml b/providers/google-vertex/models/openai/gpt-oss-120b-maas.toml deleted file mode 100644 index aeab25caf..000000000 --- a/providers/google-vertex/models/openai/gpt-oss-120b-maas.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "GPT OSS 120B" -family = "gpt-oss" -release_date = "2025-08-05" -last_updated = "2025-08-05" -attachment = false -reasoning = true -temperature = true -tool_call = true -open_weights = true - -[cost] -input = 0.09 -output = 0.36 - -[limit] -context = 131_072 -output = 32_768 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/google-vertex/models/openai/gpt-oss-20b-maas.toml b/providers/google-vertex/models/openai/gpt-oss-20b-maas.toml deleted file mode 100644 index 6c943fd99..000000000 --- a/providers/google-vertex/models/openai/gpt-oss-20b-maas.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "GPT OSS 20B" -family = "gpt-oss" -release_date = "2025-08-05" -last_updated = "2025-08-05" -attachment = false -reasoning = true -temperature = true -tool_call = true -open_weights = true - -[cost] -input = 0.07 -output = 0.25 - -[limit] -context = 131_072 -output = 32_768 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/google-vertex/models/zai-org/glm-4.7-maas.toml b/providers/google-vertex/models/zai-org/glm-4.7-maas.toml deleted file mode 100644 index f0dcd2c15..000000000 --- a/providers/google-vertex/models/zai-org/glm-4.7-maas.toml +++ /dev/null @@ -1,25 +0,0 @@ -name = "GLM-4.7" -family = "glm" -release_date = "2025-12-22" -last_updated = "2025-12-22" -attachment = false -reasoning = true -temperature = true -tool_call = true -knowledge = "2025-04" -open_weights = true - -[interleaved] -field = "reasoning_content" - -[cost] -input = 0.6 -output = 2.2 - -[limit] -context = 204800 -output = 131072 - -[modalities] -input = ["text"] -output = ["text"]