From ab97aa4b84dff6d7e241e171048f71872b99cc77 Mon Sep 17 00:00:00 2001
From: Jan Kazlouski <jan.kazlouski@elastic.co>
Date: Mon, 6 Oct 2025 15:37:27 +0000
Subject: [PATCH 1/2] Add Google Model Garden support for completion and
 chat_completion tasks

---
 output/openapi/elasticsearch-openapi.json     |  62 +++++-
 .../elasticsearch-serverless-openapi.json     |  62 +++++-
 output/schema/schema.json                     | 198 ++++++++++++++----
 output/typescript/types.ts                    |  14 +-
 specification/inference/_types/CommonTypes.ts |  56 ++++-
 specification/inference/_types/TaskType.ts    |   2 +
 .../PutGoogleVertexAiRequestExample3.yaml     |  17 ++
 .../PutGoogleVertexAiRequestExample4.yaml     |  17 ++
 .../PutGoogleVertexAiRequestExample5.yaml     |  14 ++
 .../PutGoogleVertexAiRequestExample6.yaml     |  14 ++
 10 files changed, 388 insertions(+), 68 deletions(-)
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml

diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
index ff5cbaf31d..9fca4bba7b 100644
--- a/output/openapi/elasticsearch-openapi.json
+++ b/output/openapi/elasticsearch-openapi.json
@@ -22729,6 +22729,26 @@
                   "summary": "A rerank task",
                   "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.",
                   "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"service_account_json\": \"service-account-json\",\n        \"project_id\": \"project-id\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample3": {
+                  "summary": "A completion task for Google Model Garden Anthropic endpoint",
+                  "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample4": {
+                  "summary": "A chat_completion task for Google Model Garden Anthropic endpoint",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample5": {
+                  "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided",
+                  "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url/openapi/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample6": {
+                  "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url/openapi/chat/completions\"\n    }\n}"
                 }
               }
             }
@@ -102554,22 +102574,38 @@
       "inference._types.GoogleVertexAIServiceSettings": {
         "type": "object",
         "properties": {
+          "provider": {
+            "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/inference._types.GoogleModelGardenProvider"
+              }
+            ]
+          },
+          "url": {
+            "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.",
+            "type": "string"
+          },
+          "streaming_url": {
+            "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.",
+            "type": "string"
+          },
           "location": {
             "externalDocs": {
               "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations"
             },
-            "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.",
+            "description": "The name of the location to use for the inference task for the Google Vertex AI inference task.\nFor Google Vertex AI, when `provider` is omitted or `google` `location` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `location` is ignored.\nRefer to the Google documentation for the list of supported locations.",
             "type": "string"
           },
           "model_id": {
             "externalDocs": {
               "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api"
             },
-            "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.",
+            "description": "The name of the model to use for the inference task.\nFor Google Vertex AI `model_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `model_id` will be used for some providers that require it, otherwise - ignored.\nRefer to the Google documentation for the list of supported models for Google Vertex AI.",
             "type": "string"
           },
           "project_id": {
-            "description": "The name of the project to use for the inference task.",
+            "description": "The name of the project to use for the Google Vertex AI inference task.\nFor Google Vertex AI `project_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `project_id` is ignored.",
             "type": "string"
           },
           "rate_limit": {
@@ -102593,12 +102629,20 @@
           }
         },
         "required": [
-          "location",
-          "model_id",
-          "project_id",
           "service_account_json"
         ]
       },
+      "inference._types.GoogleModelGardenProvider": {
+        "type": "string",
+        "enum": [
+          "google",
+          "anthropic",
+          "meta",
+          "hugging_face",
+          "mistral",
+          "ai21"
+        ]
+      },
       "inference._types.GoogleVertexAITaskSettings": {
         "type": "object",
         "properties": {
@@ -102620,6 +102664,10 @@
                 "$ref": "#/components/schemas/inference._types.ThinkingConfig"
               }
             ]
+          },
+          "max_tokens": {
+            "description": "For a `completion` or `chat_completion` task, allows setting up the `max_tokens` field for request to the Google Model Garden's Anthropic provider.\nIf `max_tokens` is specified - it must be a positive integer.",
+            "type": "number"
           }
         }
       },
@@ -102663,6 +102711,8 @@
       "inference._types.TaskTypeGoogleVertexAI": {
         "type": "string",
         "enum": [
+          "chat_completion",
+          "completion",
           "text_embedding",
           "rerank"
         ]
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
index 62ef662a4f..e7214b8641 100644
--- a/output/openapi/elasticsearch-serverless-openapi.json
+++ b/output/openapi/elasticsearch-serverless-openapi.json
@@ -13713,6 +13713,26 @@
                   "summary": "A rerank task",
                   "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.",
                   "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"service_account_json\": \"service-account-json\",\n        \"project_id\": \"project-id\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample3": {
+                  "summary": "A completion task for Google Model Garden Anthropic endpoint",
+                  "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample4": {
+                  "summary": "A chat_completion task for Google Model Garden Anthropic endpoint",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample5": {
+                  "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided",
+                  "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url/openapi/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample6": {
+                  "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url/openapi/chat/completions\"\n    }\n}"
                 }
               }
             }
@@ -66618,22 +66638,38 @@
       "inference._types.GoogleVertexAIServiceSettings": {
         "type": "object",
         "properties": {
+          "provider": {
+            "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/inference._types.GoogleModelGardenProvider"
+              }
+            ]
+          },
+          "url": {
+            "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.",
+            "type": "string"
+          },
+          "streaming_url": {
+            "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.",
+            "type": "string"
+          },
           "location": {
             "externalDocs": {
               "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations"
             },
-            "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.",
+            "description": "The name of the location to use for the inference task for the Google Vertex AI inference task.\nFor Google Vertex AI, when `provider` is omitted or `google` `location` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `location` is ignored.\nRefer to the Google documentation for the list of supported locations.",
             "type": "string"
           },
           "model_id": {
             "externalDocs": {
               "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api"
             },
-            "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.",
+            "description": "The name of the model to use for the inference task.\nFor Google Vertex AI `model_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `model_id` will be used for some providers that require it, otherwise - ignored.\nRefer to the Google documentation for the list of supported models for Google Vertex AI.",
             "type": "string"
           },
           "project_id": {
-            "description": "The name of the project to use for the inference task.",
+            "description": "The name of the project to use for the Google Vertex AI inference task.\nFor Google Vertex AI `project_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `project_id` is ignored.",
             "type": "string"
           },
           "rate_limit": {
@@ -66657,12 +66693,20 @@
           }
         },
         "required": [
-          "location",
-          "model_id",
-          "project_id",
           "service_account_json"
         ]
       },
+      "inference._types.GoogleModelGardenProvider": {
+        "type": "string",
+        "enum": [
+          "google",
+          "anthropic",
+          "meta",
+          "hugging_face",
+          "mistral",
+          "ai21"
+        ]
+      },
       "inference._types.GoogleVertexAITaskSettings": {
         "type": "object",
         "properties": {
@@ -66684,6 +66728,10 @@
                 "$ref": "#/components/schemas/inference._types.ThinkingConfig"
               }
             ]
+          },
+          "max_tokens": {
+            "description": "For a `completion` or `chat_completion` task, allows setting up the `max_tokens` field for request to the Google Model Garden's Anthropic provider.\nIf `max_tokens` is specified - it must be a positive integer.",
+            "type": "number"
           }
         }
       },
@@ -66727,6 +66775,8 @@
       "inference._types.TaskTypeGoogleVertexAI": {
         "type": "string",
         "enum": [
+          "chat_completion",
+          "completion",
           "text_embedding",
           "rerank"
         ]
diff --git a/output/schema/schema.json b/output/schema/schema.json
index 42aa410e93..3330a4154e 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -172049,6 +172049,34 @@
       },
       "specLocation": "inference/_types/CommonTypes.ts#L1383-L1386"
     },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "google"
+        },
+        {
+          "name": "anthropic"
+        },
+        {
+          "name": "meta"
+        },
+        {
+          "name": "hugging_face"
+        },
+        {
+          "name": "mistral"
+        },
+        {
+          "name": "ai21"
+        }
+      ],
+      "name": {
+        "name": "GoogleModelGardenProvider",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L1457-L1464"
+    },
     {
       "kind": "interface",
       "name": {
@@ -172057,11 +172085,47 @@
       },
       "properties": [
         {
-          "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.",
+          "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.",
+          "name": "provider",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "GoogleModelGardenProvider",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.",
+          "name": "url",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.",
+          "name": "streaming_url",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The name of the location to use for the inference task for the Google Vertex AI inference task.\nFor Google Vertex AI, when `provider` is omitted or `google` `location` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `location` is ignored.\nRefer to the Google documentation for the list of supported locations.",
           "extDocId": "googlevertexai-locations",
           "extDocUrl": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations",
           "name": "location",
-          "required": true,
+          "required": false,
           "type": {
             "kind": "instance_of",
             "type": {
@@ -172071,11 +172135,11 @@
           }
         },
         {
-          "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.",
+          "description": "The name of the model to use for the inference task.\nFor Google Vertex AI `model_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `model_id` will be used for some providers that require it, otherwise - ignored.\nRefer to the Google documentation for the list of supported models for Google Vertex AI.",
           "extDocId": "googlevertexai-models",
           "extDocUrl": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api",
           "name": "model_id",
-          "required": true,
+          "required": false,
           "type": {
             "kind": "instance_of",
             "type": {
@@ -172085,9 +172149,9 @@
           }
         },
         {
-          "description": "The name of the project to use for the inference task.",
+          "description": "The name of the project to use for the Google Vertex AI inference task.\nFor Google Vertex AI `project_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `project_id` is ignored.",
           "name": "project_id",
-          "required": true,
+          "required": false,
           "type": {
             "kind": "instance_of",
             "type": {
@@ -172135,7 +172199,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1392-L1425"
+      "specLocation": "inference/_types/CommonTypes.ts#L1392-L1455"
     },
     {
       "kind": "enum",
@@ -172148,7 +172212,7 @@
         "name": "GoogleVertexAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1458-L1460"
+      "specLocation": "inference/_types/CommonTypes.ts#L1502-L1504"
     },
     {
       "kind": "interface",
@@ -172194,9 +172258,21 @@
               "namespace": "inference._types"
             }
           }
+        },
+        {
+          "description": "For a `completion` or `chat_completion` task, allows setting up the `max_tokens` field for request to the Google Model Garden's Anthropic provider.\nIf `max_tokens` is specified - it must be a positive integer.",
+          "name": "max_tokens",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1427-L1442"
+      "specLocation": "inference/_types/CommonTypes.ts#L1466-L1486"
     },
     {
       "kind": "enum",
@@ -172218,7 +172294,7 @@
         "name": "GoogleVertexAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1451-L1456"
+      "specLocation": "inference/_types/CommonTypes.ts#L1495-L1500"
     },
     {
       "kind": "interface",
@@ -172280,7 +172356,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1462-L1494"
+      "specLocation": "inference/_types/CommonTypes.ts#L1506-L1538"
     },
     {
       "kind": "enum",
@@ -172293,7 +172369,7 @@
         "name": "HuggingFaceServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1515-L1517"
+      "specLocation": "inference/_types/CommonTypes.ts#L1559-L1561"
     },
     {
       "kind": "interface",
@@ -172327,7 +172403,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1496-L1506"
+      "specLocation": "inference/_types/CommonTypes.ts#L1540-L1550"
     },
     {
       "kind": "enum",
@@ -172349,7 +172425,7 @@
         "name": "HuggingFaceTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1508-L1513"
+      "specLocation": "inference/_types/CommonTypes.ts#L1552-L1557"
     },
     {
       "kind": "interface",
@@ -173581,7 +173657,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1519-L1548"
+      "specLocation": "inference/_types/CommonTypes.ts#L1563-L1592"
     },
     {
       "kind": "enum",
@@ -173594,7 +173670,7 @@
         "name": "JinaAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1578-L1580"
+      "specLocation": "inference/_types/CommonTypes.ts#L1622-L1624"
     },
     {
       "kind": "enum",
@@ -173613,7 +173689,7 @@
         "name": "JinaAISimilarityType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1582-L1586"
+      "specLocation": "inference/_types/CommonTypes.ts#L1626-L1630"
     },
     {
       "kind": "interface",
@@ -173659,7 +173735,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1550-L1571"
+      "specLocation": "inference/_types/CommonTypes.ts#L1594-L1615"
     },
     {
       "kind": "enum",
@@ -173675,7 +173751,7 @@
         "name": "JinaAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1573-L1576"
+      "specLocation": "inference/_types/CommonTypes.ts#L1617-L1620"
     },
     {
       "kind": "enum",
@@ -173697,7 +173773,7 @@
         "name": "JinaAITextEmbeddingTask",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1588-L1593"
+      "specLocation": "inference/_types/CommonTypes.ts#L1632-L1637"
     },
     {
       "kind": "interface",
@@ -173769,7 +173845,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1595-L1625"
+      "specLocation": "inference/_types/CommonTypes.ts#L1639-L1669"
     },
     {
       "kind": "enum",
@@ -173782,7 +173858,7 @@
         "name": "LlamaServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1633-L1635"
+      "specLocation": "inference/_types/CommonTypes.ts#L1677-L1679"
     },
     {
       "kind": "enum",
@@ -173801,7 +173877,7 @@
         "name": "LlamaSimilarityType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1637-L1641"
+      "specLocation": "inference/_types/CommonTypes.ts#L1681-L1685"
     },
     {
       "kind": "enum",
@@ -173820,7 +173896,7 @@
         "name": "LlamaTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1627-L1631"
+      "specLocation": "inference/_types/CommonTypes.ts#L1671-L1675"
     },
     {
       "kind": "interface",
@@ -173978,7 +174054,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1643-L1670"
+      "specLocation": "inference/_types/CommonTypes.ts#L1687-L1714"
     },
     {
       "kind": "enum",
@@ -173991,7 +174067,7 @@
         "name": "MistralServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1678-L1680"
+      "specLocation": "inference/_types/CommonTypes.ts#L1722-L1724"
     },
     {
       "kind": "enum",
@@ -174010,7 +174086,7 @@
         "name": "MistralTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1672-L1676"
+      "specLocation": "inference/_types/CommonTypes.ts#L1716-L1720"
     },
     {
       "kind": "interface",
@@ -174097,7 +174173,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1682-L1724"
+      "specLocation": "inference/_types/CommonTypes.ts#L1726-L1768"
     },
     {
       "kind": "enum",
@@ -174110,7 +174186,7 @@
         "name": "OpenAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1740-L1742"
+      "specLocation": "inference/_types/CommonTypes.ts#L1784-L1786"
     },
     {
       "kind": "interface",
@@ -174132,7 +174208,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1726-L1732"
+      "specLocation": "inference/_types/CommonTypes.ts#L1770-L1776"
     },
     {
       "kind": "enum",
@@ -174151,7 +174227,7 @@
         "name": "OpenAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1734-L1738"
+      "specLocation": "inference/_types/CommonTypes.ts#L1778-L1782"
     },
     {
       "kind": "interface",
@@ -174728,6 +174804,12 @@
     {
       "kind": "enum",
       "members": [
+        {
+          "name": "chat_completion"
+        },
+        {
+          "name": "completion"
+        },
         {
           "name": "text_embedding"
         },
@@ -174739,7 +174821,7 @@
         "name": "TaskTypeGoogleVertexAI",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L113-L116"
+      "specLocation": "inference/_types/TaskType.ts#L113-L118"
     },
     {
       "kind": "enum",
@@ -174761,7 +174843,7 @@
         "name": "TaskTypeHuggingFace",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L118-L123"
+      "specLocation": "inference/_types/TaskType.ts#L120-L125"
     },
     {
       "kind": "enum",
@@ -174796,7 +174878,7 @@
         "name": "TaskTypeLlama",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L125-L129"
+      "specLocation": "inference/_types/TaskType.ts#L127-L131"
     },
     {
       "kind": "enum",
@@ -174815,7 +174897,7 @@
         "name": "TaskTypeMistral",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L131-L135"
+      "specLocation": "inference/_types/TaskType.ts#L133-L137"
     },
     {
       "kind": "enum",
@@ -174834,7 +174916,7 @@
         "name": "TaskTypeOpenAI",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L137-L141"
+      "specLocation": "inference/_types/TaskType.ts#L139-L143"
     },
     {
       "kind": "enum",
@@ -174850,7 +174932,7 @@
         "name": "TaskTypeVoyageAI",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L143-L146"
+      "specLocation": "inference/_types/TaskType.ts#L145-L148"
     },
     {
       "kind": "enum",
@@ -174869,7 +174951,7 @@
         "name": "TaskTypeWatsonx",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L148-L152"
+      "specLocation": "inference/_types/TaskType.ts#L150-L154"
     },
     {
       "kind": "interface",
@@ -174991,7 +175073,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1444-L1449"
+      "specLocation": "inference/_types/CommonTypes.ts#L1488-L1493"
     },
     {
       "kind": "interface",
@@ -175137,7 +175219,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1744-L1775"
+      "specLocation": "inference/_types/CommonTypes.ts#L1788-L1819"
     },
     {
       "kind": "enum",
@@ -175150,7 +175232,7 @@
         "name": "VoyageAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1808-L1810"
+      "specLocation": "inference/_types/CommonTypes.ts#L1852-L1854"
     },
     {
       "kind": "interface",
@@ -175210,7 +175292,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1777-L1801"
+      "specLocation": "inference/_types/CommonTypes.ts#L1821-L1845"
     },
     {
       "kind": "enum",
@@ -175226,7 +175308,7 @@
         "name": "VoyageAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1803-L1806"
+      "specLocation": "inference/_types/CommonTypes.ts#L1847-L1850"
     },
     {
       "kind": "interface",
@@ -175314,7 +175396,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1812-L1850"
+      "specLocation": "inference/_types/CommonTypes.ts#L1856-L1894"
     },
     {
       "kind": "enum",
@@ -175327,7 +175409,7 @@
         "name": "WatsonxServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1858-L1860"
+      "specLocation": "inference/_types/CommonTypes.ts#L1902-L1904"
     },
     {
       "kind": "enum",
@@ -175346,7 +175428,7 @@
         "name": "WatsonxTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1852-L1856"
+      "specLocation": "inference/_types/CommonTypes.ts#L1896-L1900"
     },
     {
       "kind": "request",
@@ -179107,6 +179189,30 @@
           "method_request": "PUT _inference/rerank/google_vertex_ai_rerank",
           "summary": "A rerank task",
           "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"service_account_json\": \"service-account-json\",\n        \"project_id\": \"project-id\"\n    }\n}"
+        },
+        "PutGoogleVertexAiRequestExample3": {
+          "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.",
+          "method_request": "PUT _inference/completion/google_model_garden_anthropic_completion",
+          "summary": "A completion task for Google Model Garden Anthropic endpoint",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
+        },
+        "PutGoogleVertexAiRequestExample4": {
+          "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.",
+          "method_request": "PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion",
+          "summary": "A chat_completion task for Google Model Garden Anthropic endpoint",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
+        },
+        "PutGoogleVertexAiRequestExample5": {
+          "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.",
+          "method_request": "PUT _inference/completion/google_model_garden_meta_completion",
+          "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url/openapi/chat/completions\"\n    }\n}"
+        },
+        "PutGoogleVertexAiRequestExample6": {
+          "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.",
+          "method_request": "PUT _inference/chat_completion/google_model_garden_meta_chat_completion",
+          "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url/openapi/chat/completions\"\n    }\n}"
         }
       },
       "inherits": {
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
index 4d84479dd5..19fc293aa1 100644
--- a/output/typescript/types.ts
+++ b/output/typescript/types.ts
@@ -14122,10 +14122,15 @@ export interface InferenceGoogleAiStudioServiceSettings {
 
 export type InferenceGoogleAiStudioTaskType = 'completion' | 'text_embedding'
 
+export type InferenceGoogleModelGardenProvider = 'google' | 'anthropic' | 'meta' | 'hugging_face' | 'mistral' | 'ai21'
+
 export interface InferenceGoogleVertexAIServiceSettings {
-  location: string
-  model_id: string
-  project_id: string
+  provider?: InferenceGoogleModelGardenProvider
+  url?: string
+  streaming_url?: string
+  location?: string
+  model_id?: string
+  project_id?: string
   rate_limit?: InferenceRateLimitSetting
   service_account_json: string
   dimensions?: integer
@@ -14137,6 +14142,7 @@ export interface InferenceGoogleVertexAITaskSettings {
   auto_truncate?: boolean
   top_n?: integer
   thinking_config?: InferenceThinkingConfig
+  max_tokens?: integer
 }
 
 export type InferenceGoogleVertexAITaskType = 'rerank' | 'text_embedding' | 'completion' | 'chat_completion'
@@ -14438,7 +14444,7 @@ export type InferenceTaskTypeElasticsearch = 'sparse_embedding' | 'text_embeddin
 
 export type InferenceTaskTypeGoogleAIStudio = 'text_embedding' | 'completion'
 
-export type InferenceTaskTypeGoogleVertexAI = 'text_embedding' | 'rerank'
+export type InferenceTaskTypeGoogleVertexAI = 'chat_completion' | 'completion' | 'text_embedding' | 'rerank'
 
 export type InferenceTaskTypeHuggingFace = 'chat_completion' | 'completion' | 'rerank' | 'text_embedding'
 
diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
index 25d1be939a..de7e267504 100644
--- a/specification/inference/_types/CommonTypes.ts
+++ b/specification/inference/_types/CommonTypes.ts
@@ -1391,21 +1391,51 @@ export enum GoogleAiServiceType {
 
 export class GoogleVertexAIServiceSettings {
   /**
-   * The name of the location to use for the inference task.
+   * The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.
+   * In order for Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.
+   * Modes:
+   * - Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.
+   * - Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.
+   */
+  provider?: GoogleModelGardenProvider
+  /**
+   * The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.
+   * If both `url` and `streaming_url` are provided, each is used for its respective mode.
+   * If `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.
+   * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).
+   * At least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.
+   */
+  url?: string
+  /**
+   * The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.
+   * If both `streaming_url` and `url` are provided, each is used for its respective mode.
+   * If `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.
+   * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).
+   * At least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.
+   */
+  streaming_url?: string
+  /**
+   * The name of the location to use for the inference task for the Google Vertex AI inference task.
+   * For Google Vertex AI, when `provider` is omitted or `google` `location` is mandatory.
+   * For Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `location` is ignored.
    * Refer to the Google documentation for the list of supported locations.
    * @ext_doc_id googlevertexai-locations
    */
-  location: string
+  location?: string
   /**
    * The name of the model to use for the inference task.
-   * Refer to the Google documentation for the list of supported models.
+   * For Google Vertex AI `model_id` is mandatory.
+   * For Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `model_id` will be used for some providers that require it, otherwise - ignored.
+   * Refer to the Google documentation for the list of supported models for Google Vertex AI.
    * @ext_doc_id googlevertexai-models
    */
-  model_id: string
+  model_id?: string
   /**
-   * The name of the project to use for the inference task.
+   * The name of the project to use for the Google Vertex AI inference task.
+   * For Google Vertex AI `project_id` is mandatory.
+   * For Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `project_id` is ignored.
    */
-  project_id: string
+  project_id?: string
   /**
    * This setting helps to minimize the number of rate limit errors returned from Google Vertex AI.
    * By default, the `googlevertexai` service sets the number of requests allowed per minute to 30.000.
@@ -1424,6 +1454,15 @@ export class GoogleVertexAIServiceSettings {
   dimensions?: integer
 }
 
+export enum GoogleModelGardenProvider {
+  google,
+  anthropic,
+  meta,
+  hugging_face,
+  mistral,
+  ai21
+}
+
 export class GoogleVertexAITaskSettings {
   /**
    * For a `text_embedding` task, truncate inputs longer than the maximum token length automatically.
@@ -1439,6 +1478,11 @@ export class GoogleVertexAITaskSettings {
    * @ext_doc_id googlevertexai-thinking
    */
   thinking_config?: ThinkingConfig
+  /**
+   * For a `completion` or `chat_completion` task, allows setting up the `max_tokens` field for request to the Google Model Garden's Anthropic provider.
+   * If `max_tokens` is specified - it must be a positive integer.
+   */
+  max_tokens?: integer
 }
 
 export class ThinkingConfig {
diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts
index e0e5882eb3..c5d56e439f 100644
--- a/specification/inference/_types/TaskType.ts
+++ b/specification/inference/_types/TaskType.ts
@@ -111,6 +111,8 @@ export enum TaskTypeGoogleAIStudio {
 }
 
 export enum TaskTypeGoogleVertexAI {
+  chat_completion,
+  completion,
   text_embedding,
   rerank
 }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml
new file mode 100644
index 0000000000..5cb79753dc
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml
@@ -0,0 +1,17 @@
+summary: A completion task for Google Model Garden Anthropic endpoint
+description: Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.
+method_request: 'PUT _inference/completion/google_model_garden_anthropic_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "anthropic",
+          "service_account_json": "service-account-json",
+          "url": "https://url:rawPredict",
+          "streaming_url": "https://streaming_url:streamRawPredict"
+      },
+      "task_settings": {
+          "max_tokens": 128
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml
new file mode 100644
index 0000000000..52b7ececd8
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml
@@ -0,0 +1,17 @@
+summary: A chat_completion task for Google Model Garden Anthropic endpoint
+description: Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.
+method_request: 'PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "anthropic",
+          "service_account_json": "service-account-json",
+          "url": "https://url:rawPredict",
+          "streaming_url": "https://streaming_url:streamRawPredict"
+      },
+      "task_settings": {
+          "max_tokens": 128
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml
new file mode 100644
index 0000000000..750fc0eaf7
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml
@@ -0,0 +1,14 @@
+summary: A completion task for Google Model Garden Meta endpoint with single URL provided
+description: Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.
+method_request: 'PUT _inference/completion/google_model_garden_meta_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "meta",
+          "model_id": "meta/llama-3.3-70b-instruct-maas",
+          "service_account_json": "service-account-json",
+          "url": "https://url/openapi/chat/completions"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml
new file mode 100644
index 0000000000..8d38a23ca5
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml
@@ -0,0 +1,14 @@
+summary: A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.
+method_request: 'PUT _inference/chat_completion/google_model_garden_meta_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "meta",
+          "model_id": "meta/llama-3.3-70b-instruct-maas",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://url/openapi/chat/completions"
+      }
+  }

From 738a415945485fbf66f7941c410fb7c69d1c8aa8 Mon Sep 17 00:00:00 2001
From: Jan Kazlouski <jan.kazlouski@elastic.co>
Date: Wed, 8 Oct 2025 08:30:55 +0000
Subject: [PATCH 2/2] Fix comments

---
 output/openapi/elasticsearch-openapi.json     | 23 ++---
 .../elasticsearch-serverless-openapi.json     | 23 ++---
 output/schema/schema.json                     | 96 +++++++------------
 output/typescript/types.ts                    |  2 +-
 specification/_doc_ids/table.csv              |  1 +
 specification/inference/_types/CommonTypes.ts | 15 ++-
 .../PutGoogleVertexAiRequestExample5.yaml     | 14 ---
 .../PutGoogleVertexAiRequestExample6.yaml     | 14 ---
 8 files changed, 58 insertions(+), 130 deletions(-)
 delete mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml
 delete mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml

diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
index 9fca4bba7b..804ce7e3ee 100644
--- a/output/openapi/elasticsearch-openapi.json
+++ b/output/openapi/elasticsearch-openapi.json
@@ -22739,16 +22739,6 @@
                   "summary": "A chat_completion task for Google Model Garden Anthropic endpoint",
                   "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.",
                   "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
-                },
-                "PutGoogleVertexAiRequestExample5": {
-                  "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided",
-                  "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url/openapi/chat/completions\"\n    }\n}"
-                },
-                "PutGoogleVertexAiRequestExample6": {
-                  "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided",
-                  "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url/openapi/chat/completions\"\n    }\n}"
                 }
               }
             }
@@ -102575,7 +102565,7 @@
         "type": "object",
         "properties": {
           "provider": {
-            "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.",
+            "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for a Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.",
             "allOf": [
               {
                 "$ref": "#/components/schemas/inference._types.GoogleModelGardenProvider"
@@ -102636,11 +102626,7 @@
         "type": "string",
         "enum": [
           "google",
-          "anthropic",
-          "meta",
-          "hugging_face",
-          "mistral",
-          "ai21"
+          "anthropic"
         ]
       },
       "inference._types.GoogleVertexAITaskSettings": {
@@ -102666,7 +102652,10 @@
             ]
           },
           "max_tokens": {
-            "description": "For a `completion` or `chat_completion` task, allows setting up the `max_tokens` field for request to the Google Model Garden's Anthropic provider.\nIf `max_tokens` is specified - it must be a positive integer.",
+            "externalDocs": {
+              "url": "https://docs.claude.com/en/api/messages#body-max-tokens"
+            },
+            "description": "For `completion` and `chat_completion` tasks, specifies the `max_tokens` value for requests sent to the Google Model Garden `anthropic` provider.\nIf `provider` is not set to `anthropic`, this field is ignored.\nIf `max_tokens` is specified - it must be a positive integer. If not specified, the default value of 1024 is used.\nAnthropic models require `max_tokens` to be set for each request. Please refer to the Anthropic documentation for more information.",
             "type": "number"
           }
         }
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
index e7214b8641..4507e85164 100644
--- a/output/openapi/elasticsearch-serverless-openapi.json
+++ b/output/openapi/elasticsearch-serverless-openapi.json
@@ -13723,16 +13723,6 @@
                   "summary": "A chat_completion task for Google Model Garden Anthropic endpoint",
                   "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.",
                   "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
-                },
-                "PutGoogleVertexAiRequestExample5": {
-                  "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided",
-                  "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url/openapi/chat/completions\"\n    }\n}"
-                },
-                "PutGoogleVertexAiRequestExample6": {
-                  "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided",
-                  "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url/openapi/chat/completions\"\n    }\n}"
                 }
               }
             }
@@ -66639,7 +66629,7 @@
         "type": "object",
         "properties": {
           "provider": {
-            "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.",
+            "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for a Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.",
             "allOf": [
               {
                 "$ref": "#/components/schemas/inference._types.GoogleModelGardenProvider"
@@ -66700,11 +66690,7 @@
         "type": "string",
         "enum": [
           "google",
-          "anthropic",
-          "meta",
-          "hugging_face",
-          "mistral",
-          "ai21"
+          "anthropic"
         ]
       },
       "inference._types.GoogleVertexAITaskSettings": {
@@ -66730,7 +66716,10 @@
             ]
           },
           "max_tokens": {
-            "description": "For a `completion` or `chat_completion` task, allows setting up the `max_tokens` field for request to the Google Model Garden's Anthropic provider.\nIf `max_tokens` is specified - it must be a positive integer.",
+            "externalDocs": {
+              "url": "https://docs.claude.com/en/api/messages#body-max-tokens"
+            },
+            "description": "For `completion` and `chat_completion` tasks, specifies the `max_tokens` value for requests sent to the Google Model Garden `anthropic` provider.\nIf `provider` is not set to `anthropic`, this field is ignored.\nIf `max_tokens` is specified - it must be a positive integer. If not specified, the default value of 1024 is used.\nAnthropic models require `max_tokens` to be set for each request. Please refer to the Anthropic documentation for more information.",
             "type": "number"
           }
         }
diff --git a/output/schema/schema.json b/output/schema/schema.json
index 3330a4154e..22b2236ae1 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -172057,25 +172057,13 @@
         },
         {
           "name": "anthropic"
-        },
-        {
-          "name": "meta"
-        },
-        {
-          "name": "hugging_face"
-        },
-        {
-          "name": "mistral"
-        },
-        {
-          "name": "ai21"
         }
       ],
       "name": {
         "name": "GoogleModelGardenProvider",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1457-L1464"
+      "specLocation": "inference/_types/CommonTypes.ts#L1457-L1460"
     },
     {
       "kind": "interface",
@@ -172085,7 +172073,7 @@
       },
       "properties": [
         {
-          "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.",
+          "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for a Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.",
           "name": "provider",
           "required": false,
           "type": {
@@ -172212,7 +172200,7 @@
         "name": "GoogleVertexAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1502-L1504"
+      "specLocation": "inference/_types/CommonTypes.ts#L1501-L1503"
     },
     {
       "kind": "interface",
@@ -172260,7 +172248,9 @@
           }
         },
         {
-          "description": "For a `completion` or `chat_completion` task, allows setting up the `max_tokens` field for request to the Google Model Garden's Anthropic provider.\nIf `max_tokens` is specified - it must be a positive integer.",
+          "description": "For `completion` and `chat_completion` tasks, specifies the `max_tokens` value for requests sent to the Google Model Garden `anthropic` provider.\nIf `provider` is not set to `anthropic`, this field is ignored.\nIf `max_tokens` is specified - it must be a positive integer. If not specified, the default value of 1024 is used.\nAnthropic models require `max_tokens` to be set for each request. Please refer to the Anthropic documentation for more information.",
+          "extDocId": "anthropic-max-tokens",
+          "extDocUrl": "https://docs.claude.com/en/api/messages#body-max-tokens",
           "name": "max_tokens",
           "required": false,
           "type": {
@@ -172272,7 +172262,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1466-L1486"
+      "specLocation": "inference/_types/CommonTypes.ts#L1462-L1485"
     },
     {
       "kind": "enum",
@@ -172294,7 +172284,7 @@
         "name": "GoogleVertexAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1495-L1500"
+      "specLocation": "inference/_types/CommonTypes.ts#L1494-L1499"
     },
     {
       "kind": "interface",
@@ -172356,7 +172346,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1506-L1538"
+      "specLocation": "inference/_types/CommonTypes.ts#L1505-L1537"
     },
     {
       "kind": "enum",
@@ -172369,7 +172359,7 @@
         "name": "HuggingFaceServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1559-L1561"
+      "specLocation": "inference/_types/CommonTypes.ts#L1558-L1560"
     },
     {
       "kind": "interface",
@@ -172403,7 +172393,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1540-L1550"
+      "specLocation": "inference/_types/CommonTypes.ts#L1539-L1549"
     },
     {
       "kind": "enum",
@@ -172425,7 +172415,7 @@
         "name": "HuggingFaceTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1552-L1557"
+      "specLocation": "inference/_types/CommonTypes.ts#L1551-L1556"
     },
     {
       "kind": "interface",
@@ -173657,7 +173647,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1563-L1592"
+      "specLocation": "inference/_types/CommonTypes.ts#L1562-L1591"
     },
     {
       "kind": "enum",
@@ -173670,7 +173660,7 @@
         "name": "JinaAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1622-L1624"
+      "specLocation": "inference/_types/CommonTypes.ts#L1621-L1623"
     },
     {
       "kind": "enum",
@@ -173689,7 +173679,7 @@
         "name": "JinaAISimilarityType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1626-L1630"
+      "specLocation": "inference/_types/CommonTypes.ts#L1625-L1629"
     },
     {
       "kind": "interface",
@@ -173735,7 +173725,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1594-L1615"
+      "specLocation": "inference/_types/CommonTypes.ts#L1593-L1614"
     },
     {
       "kind": "enum",
@@ -173751,7 +173741,7 @@
         "name": "JinaAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1617-L1620"
+      "specLocation": "inference/_types/CommonTypes.ts#L1616-L1619"
     },
     {
       "kind": "enum",
@@ -173773,7 +173763,7 @@
         "name": "JinaAITextEmbeddingTask",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1632-L1637"
+      "specLocation": "inference/_types/CommonTypes.ts#L1631-L1636"
     },
     {
       "kind": "interface",
@@ -173845,7 +173835,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1639-L1669"
+      "specLocation": "inference/_types/CommonTypes.ts#L1638-L1668"
     },
     {
       "kind": "enum",
@@ -173858,7 +173848,7 @@
         "name": "LlamaServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1677-L1679"
+      "specLocation": "inference/_types/CommonTypes.ts#L1676-L1678"
     },
     {
       "kind": "enum",
@@ -173877,7 +173867,7 @@
         "name": "LlamaSimilarityType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1681-L1685"
+      "specLocation": "inference/_types/CommonTypes.ts#L1680-L1684"
     },
     {
       "kind": "enum",
@@ -173896,7 +173886,7 @@
         "name": "LlamaTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1671-L1675"
+      "specLocation": "inference/_types/CommonTypes.ts#L1670-L1674"
     },
     {
       "kind": "interface",
@@ -174054,7 +174044,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1687-L1714"
+      "specLocation": "inference/_types/CommonTypes.ts#L1686-L1713"
     },
     {
       "kind": "enum",
@@ -174067,7 +174057,7 @@
         "name": "MistralServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1722-L1724"
+      "specLocation": "inference/_types/CommonTypes.ts#L1721-L1723"
     },
     {
       "kind": "enum",
@@ -174086,7 +174076,7 @@
         "name": "MistralTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1716-L1720"
+      "specLocation": "inference/_types/CommonTypes.ts#L1715-L1719"
     },
     {
       "kind": "interface",
@@ -174173,7 +174163,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1726-L1768"
+      "specLocation": "inference/_types/CommonTypes.ts#L1725-L1767"
     },
     {
       "kind": "enum",
@@ -174186,7 +174176,7 @@
         "name": "OpenAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1784-L1786"
+      "specLocation": "inference/_types/CommonTypes.ts#L1783-L1785"
     },
     {
       "kind": "interface",
@@ -174208,7 +174198,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1770-L1776"
+      "specLocation": "inference/_types/CommonTypes.ts#L1769-L1775"
     },
     {
       "kind": "enum",
@@ -174227,7 +174217,7 @@
         "name": "OpenAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1778-L1782"
+      "specLocation": "inference/_types/CommonTypes.ts#L1777-L1781"
     },
     {
       "kind": "interface",
@@ -175073,7 +175063,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1488-L1493"
+      "specLocation": "inference/_types/CommonTypes.ts#L1487-L1492"
     },
     {
       "kind": "interface",
@@ -175219,7 +175209,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1788-L1819"
+      "specLocation": "inference/_types/CommonTypes.ts#L1787-L1818"
     },
     {
       "kind": "enum",
@@ -175232,7 +175222,7 @@
         "name": "VoyageAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1852-L1854"
+      "specLocation": "inference/_types/CommonTypes.ts#L1851-L1853"
     },
     {
       "kind": "interface",
@@ -175292,7 +175282,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1821-L1845"
+      "specLocation": "inference/_types/CommonTypes.ts#L1820-L1844"
     },
     {
       "kind": "enum",
@@ -175308,7 +175298,7 @@
         "name": "VoyageAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1847-L1850"
+      "specLocation": "inference/_types/CommonTypes.ts#L1846-L1849"
     },
     {
       "kind": "interface",
@@ -175396,7 +175386,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1856-L1894"
+      "specLocation": "inference/_types/CommonTypes.ts#L1855-L1893"
     },
     {
       "kind": "enum",
@@ -175409,7 +175399,7 @@
         "name": "WatsonxServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1902-L1904"
+      "specLocation": "inference/_types/CommonTypes.ts#L1901-L1903"
     },
     {
       "kind": "enum",
@@ -175428,7 +175418,7 @@
         "name": "WatsonxTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1896-L1900"
+      "specLocation": "inference/_types/CommonTypes.ts#L1895-L1899"
     },
     {
       "kind": "request",
@@ -179201,18 +179191,6 @@
           "method_request": "PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion",
           "summary": "A chat_completion task for Google Model Garden Anthropic endpoint",
           "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
-        },
-        "PutGoogleVertexAiRequestExample5": {
-          "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.",
-          "method_request": "PUT _inference/completion/google_model_garden_meta_completion",
-          "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided",
-          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url/openapi/chat/completions\"\n    }\n}"
-        },
-        "PutGoogleVertexAiRequestExample6": {
-          "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.",
-          "method_request": "PUT _inference/chat_completion/google_model_garden_meta_chat_completion",
-          "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided",
-          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url/openapi/chat/completions\"\n    }\n}"
         }
       },
       "inherits": {
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
index 19fc293aa1..d90ec3fd29 100644
--- a/output/typescript/types.ts
+++ b/output/typescript/types.ts
@@ -14122,7 +14122,7 @@ export interface InferenceGoogleAiStudioServiceSettings {
 
 export type InferenceGoogleAiStudioTaskType = 'completion' | 'text_embedding'
 
-export type InferenceGoogleModelGardenProvider = 'google' | 'anthropic' | 'meta' | 'hugging_face' | 'mistral' | 'ai21'
+export type InferenceGoogleModelGardenProvider = 'google' | 'anthropic'
 
 export interface InferenceGoogleVertexAIServiceSettings {
   provider?: InferenceGoogleModelGardenProvider
diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv
index 23941a5133..6d15e8cf37 100644
--- a/specification/_doc_ids/table.csv
+++ b/specification/_doc_ids/table.csv
@@ -22,6 +22,7 @@ analysis,https://www.elastic.co/docs/manage-data/data-store/text-analysis,,
 analyze-repository,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-snapshot-repository-analyze,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/repo-analysis-api.html,
 analyzer-anatomy,https://www.elastic.co/docs/manage-data/data-store/text-analysis/anatomy-of-an-analyzer,,
 analyzer-update-existing,https://www.elastic.co/docs/manage-data/data-store/text-analysis/specify-an-analyzer#update-analyzers-on-existing-indices,,
+anthropic-max-tokens,https://docs.claude.com/en/api/messages#body-max-tokens,,
 anthropic-messages,https://docs.anthropic.com/en/api/messages,,
 anthropic-models,https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names,,
 api-date-math-index-names,https://www.elastic.co/docs/reference/elasticsearch/rest-apis/api-conventions#api-date-math-index-names,,
diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
index de7e267504..f725891fab 100644
--- a/specification/inference/_types/CommonTypes.ts
+++ b/specification/inference/_types/CommonTypes.ts
@@ -1392,7 +1392,7 @@ export enum GoogleAiServiceType {
 export class GoogleVertexAIServiceSettings {
   /**
    * The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.
-   * In order for Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.
+   * In order for a Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.
    * Modes:
    * - Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.
    * - Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.
@@ -1456,11 +1456,7 @@ export class GoogleVertexAIServiceSettings {
 
 export enum GoogleModelGardenProvider {
   google,
-  anthropic,
-  meta,
-  hugging_face,
-  mistral,
-  ai21
+  anthropic
 }
 
 export class GoogleVertexAITaskSettings {
@@ -1479,8 +1475,11 @@ export class GoogleVertexAITaskSettings {
    */
   thinking_config?: ThinkingConfig
   /**
-   * For a `completion` or `chat_completion` task, allows setting up the `max_tokens` field for request to the Google Model Garden's Anthropic provider.
-   * If `max_tokens` is specified - it must be a positive integer.
+   * For `completion` and `chat_completion` tasks, specifies the `max_tokens` value for requests sent to the Google Model Garden `anthropic` provider.
+   * If `provider` is not set to `anthropic`, this field is ignored.
+   * If `max_tokens` is specified - it must be a positive integer. If not specified, the default value of 1024 is used.
+   * Anthropic models require `max_tokens` to be set for each request. Please refer to the Anthropic documentation for more information.
+   * @ext_doc_id anthropic-max-tokens
    */
   max_tokens?: integer
 }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml
deleted file mode 100644
index 750fc0eaf7..0000000000
--- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-summary: A completion task for Google Model Garden Meta endpoint with single URL provided
-description: Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.
-method_request: 'PUT _inference/completion/google_model_garden_meta_completion'
-# type: "request"
-value: |-
-  {
-      "service": "googlevertexai",
-      "service_settings": {
-          "provider": "meta",
-          "model_id": "meta/llama-3.3-70b-instruct-maas",
-          "service_account_json": "service-account-json",
-          "url": "https://url/openapi/chat/completions"
-      }
-  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml
deleted file mode 100644
index 8d38a23ca5..0000000000
--- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-summary: A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided
-description: Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.
-method_request: 'PUT _inference/chat_completion/google_model_garden_meta_chat_completion'
-# type: "request"
-value: |-
-  {
-      "service": "googlevertexai",
-      "service_settings": {
-          "provider": "meta",
-          "model_id": "meta/llama-3.3-70b-instruct-maas",
-          "service_account_json": "service-account-json",
-          "streaming_url": "https://url/openapi/chat/completions"
-      }
-  }