From ab97aa4b84dff6d7e241e171048f71872b99cc77 Mon Sep 17 00:00:00 2001 From: Jan Kazlouski Date: Mon, 6 Oct 2025 15:37:27 +0000 Subject: [PATCH 1/2] Add Google Model Garden support for completion and chat_completion tasks --- output/openapi/elasticsearch-openapi.json | 62 +++++- .../elasticsearch-serverless-openapi.json | 62 +++++- output/schema/schema.json | 198 ++++++++++++++---- output/typescript/types.ts | 14 +- specification/inference/_types/CommonTypes.ts | 56 ++++- specification/inference/_types/TaskType.ts | 2 + .../PutGoogleVertexAiRequestExample3.yaml | 17 ++ .../PutGoogleVertexAiRequestExample4.yaml | 17 ++ .../PutGoogleVertexAiRequestExample5.yaml | 14 ++ .../PutGoogleVertexAiRequestExample6.yaml | 14 ++ 10 files changed, 388 insertions(+), 68 deletions(-) create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index ff5cbaf31d..9fca4bba7b 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -22729,6 +22729,26 @@ "summary": "A rerank task", "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample3": { + "summary": "A completion task for Google Model Garden Anthropic endpoint", + "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + }, + "PutGoogleVertexAiRequestExample4": { + "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", + "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + }, + "PutGoogleVertexAiRequestExample5": { + "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided", + "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/openapi/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample6": { + "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/openapi/chat/completions\"\n }\n}" } } } @@ -102554,22 +102574,38 @@ "inference._types.GoogleVertexAIServiceSettings": { "type": "object", "properties": { + "provider": { + "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.GoogleModelGardenProvider" + } + ] + }, + "url": { + "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.", + "type": "string" + }, + "streaming_url": { + "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.", + "type": "string" + }, "location": { "externalDocs": { "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations" }, - "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.", + "description": "The name of the location to use for the inference task for the Google Vertex AI inference task.\nFor Google Vertex AI, when `provider` is omitted or `google` `location` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `location` is ignored.\nRefer to the Google documentation for the list of supported locations.", "type": "string" }, "model_id": { "externalDocs": { "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api" }, - "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", + "description": "The name of the model to use for the inference task.\nFor Google Vertex AI `model_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `model_id` will be used for some providers that require it, otherwise - ignored.\nRefer to the Google documentation for the list of supported models for Google Vertex AI.", "type": "string" }, "project_id": { - "description": "The name of the project to use for the inference task.", + "description": "The name of the project to use for the Google Vertex AI inference task.\nFor Google Vertex AI `project_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `project_id` is ignored.", "type": "string" }, "rate_limit": { @@ -102593,12 +102629,20 @@ } }, "required": [ - "location", - "model_id", - "project_id", "service_account_json" ] }, + "inference._types.GoogleModelGardenProvider": { + "type": "string", + "enum": [ + "google", + "anthropic", + "meta", + "hugging_face", + "mistral", + "ai21" + ] + }, "inference._types.GoogleVertexAITaskSettings": { "type": "object", "properties": { @@ -102620,6 +102664,10 @@ "$ref": "#/components/schemas/inference._types.ThinkingConfig" } ] + }, + "max_tokens": { + "description": "For a `completion` or `chat_completion` task, allows setting up the `max_tokens` field for request to the Google Model Garden's Anthropic provider.\nIf `max_tokens` is specified - it must be a positive integer.", + "type": "number" } } }, @@ -102663,6 +102711,8 @@ "inference._types.TaskTypeGoogleVertexAI": { "type": "string", "enum": [ + "chat_completion", + "completion", "text_embedding", "rerank" ] diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 62ef662a4f..e7214b8641 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -13713,6 +13713,26 @@ "summary": "A rerank task", "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample3": { + "summary": "A completion task for Google Model Garden Anthropic endpoint", + "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + }, + "PutGoogleVertexAiRequestExample4": { + "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", + "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + }, + "PutGoogleVertexAiRequestExample5": { + "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided", + "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/openapi/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample6": { + "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/openapi/chat/completions\"\n }\n}" } } } @@ -66618,22 +66638,38 @@ "inference._types.GoogleVertexAIServiceSettings": { "type": "object", "properties": { + "provider": { + "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.GoogleModelGardenProvider" + } + ] + }, + "url": { + "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.", + "type": "string" + }, + "streaming_url": { + "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.", + "type": "string" + }, "location": { "externalDocs": { "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations" }, - "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.", + "description": "The name of the location to use for the inference task for the Google Vertex AI inference task.\nFor Google Vertex AI, when `provider` is omitted or `google` `location` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `location` is ignored.\nRefer to the Google documentation for the list of supported locations.", "type": "string" }, "model_id": { "externalDocs": { "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api" }, - "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", + "description": "The name of the model to use for the inference task.\nFor Google Vertex AI `model_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `model_id` will be used for some providers that require it, otherwise - ignored.\nRefer to the Google documentation for the list of supported models for Google Vertex AI.", "type": "string" }, "project_id": { - "description": "The name of the project to use for the inference task.", + "description": "The name of the project to use for the Google Vertex AI inference task.\nFor Google Vertex AI `project_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `project_id` is ignored.", "type": "string" }, "rate_limit": { @@ -66657,12 +66693,20 @@ } }, "required": [ - "location", - "model_id", - "project_id", "service_account_json" ] }, + "inference._types.GoogleModelGardenProvider": { + "type": "string", + "enum": [ + "google", + "anthropic", + "meta", + "hugging_face", + "mistral", + "ai21" + ] + }, "inference._types.GoogleVertexAITaskSettings": { "type": "object", "properties": { @@ -66684,6 +66728,10 @@ "$ref": "#/components/schemas/inference._types.ThinkingConfig" } ] + }, + "max_tokens": { + "description": "For a `completion` or `chat_completion` task, allows setting up the `max_tokens` field for request to the Google Model Garden's Anthropic provider.\nIf `max_tokens` is specified - it must be a positive integer.", + "type": "number" } } }, @@ -66727,6 +66775,8 @@ "inference._types.TaskTypeGoogleVertexAI": { "type": "string", "enum": [ + "chat_completion", + "completion", "text_embedding", "rerank" ] diff --git a/output/schema/schema.json b/output/schema/schema.json index 42aa410e93..3330a4154e 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -172049,6 +172049,34 @@ }, "specLocation": "inference/_types/CommonTypes.ts#L1383-L1386" }, + { + "kind": "enum", + "members": [ + { + "name": "google" + }, + { + "name": "anthropic" + }, + { + "name": "meta" + }, + { + "name": "hugging_face" + }, + { + "name": "mistral" + }, + { + "name": "ai21" + } + ], + "name": { + "name": "GoogleModelGardenProvider", + "namespace": "inference._types" + }, + "specLocation": "inference/_types/CommonTypes.ts#L1457-L1464" + }, { "kind": "interface", "name": { @@ -172057,11 +172085,47 @@ }, "properties": [ { - "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.", + "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.", + "name": "provider", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "GoogleModelGardenProvider", + "namespace": "inference._types" + } + } + }, + { + "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.", + "name": "url", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.", + "name": "streaming_url", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the location to use for the inference task for the Google Vertex AI inference task.\nFor Google Vertex AI, when `provider` is omitted or `google` `location` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `location` is ignored.\nRefer to the Google documentation for the list of supported locations.", "extDocId": "googlevertexai-locations", "extDocUrl": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations", "name": "location", - "required": true, + "required": false, "type": { "kind": "instance_of", "type": { @@ -172071,11 +172135,11 @@ } }, { - "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", + "description": "The name of the model to use for the inference task.\nFor Google Vertex AI `model_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `model_id` will be used for some providers that require it, otherwise - ignored.\nRefer to the Google documentation for the list of supported models for Google Vertex AI.", "extDocId": "googlevertexai-models", "extDocUrl": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api", "name": "model_id", - "required": true, + "required": false, "type": { "kind": "instance_of", "type": { @@ -172085,9 +172149,9 @@ } }, { - "description": "The name of the project to use for the inference task.", + "description": "The name of the project to use for the Google Vertex AI inference task.\nFor Google Vertex AI `project_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `project_id` is ignored.", "name": "project_id", - "required": true, + "required": false, "type": { "kind": "instance_of", "type": { @@ -172135,7 +172199,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1392-L1425" + "specLocation": "inference/_types/CommonTypes.ts#L1392-L1455" }, { "kind": "enum", @@ -172148,7 +172212,7 @@ "name": "GoogleVertexAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1458-L1460" + "specLocation": "inference/_types/CommonTypes.ts#L1502-L1504" }, { "kind": "interface", @@ -172194,9 +172258,21 @@ "namespace": "inference._types" } } + }, + { + "description": "For a `completion` or `chat_completion` task, allows setting up the `max_tokens` field for request to the Google Model Garden's Anthropic provider.\nIf `max_tokens` is specified - it must be a positive integer.", + "name": "max_tokens", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1427-L1442" + "specLocation": "inference/_types/CommonTypes.ts#L1466-L1486" }, { "kind": "enum", @@ -172218,7 +172294,7 @@ "name": "GoogleVertexAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1451-L1456" + "specLocation": "inference/_types/CommonTypes.ts#L1495-L1500" }, { "kind": "interface", @@ -172280,7 +172356,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1462-L1494" + "specLocation": "inference/_types/CommonTypes.ts#L1506-L1538" }, { "kind": "enum", @@ -172293,7 +172369,7 @@ "name": "HuggingFaceServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1515-L1517" + "specLocation": "inference/_types/CommonTypes.ts#L1559-L1561" }, { "kind": "interface", @@ -172327,7 +172403,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1496-L1506" + "specLocation": "inference/_types/CommonTypes.ts#L1540-L1550" }, { "kind": "enum", @@ -172349,7 +172425,7 @@ "name": "HuggingFaceTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1508-L1513" + "specLocation": "inference/_types/CommonTypes.ts#L1552-L1557" }, { "kind": "interface", @@ -173581,7 +173657,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1519-L1548" + "specLocation": "inference/_types/CommonTypes.ts#L1563-L1592" }, { "kind": "enum", @@ -173594,7 +173670,7 @@ "name": "JinaAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1578-L1580" + "specLocation": "inference/_types/CommonTypes.ts#L1622-L1624" }, { "kind": "enum", @@ -173613,7 +173689,7 @@ "name": "JinaAISimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1582-L1586" + "specLocation": "inference/_types/CommonTypes.ts#L1626-L1630" }, { "kind": "interface", @@ -173659,7 +173735,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1550-L1571" + "specLocation": "inference/_types/CommonTypes.ts#L1594-L1615" }, { "kind": "enum", @@ -173675,7 +173751,7 @@ "name": "JinaAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1573-L1576" + "specLocation": "inference/_types/CommonTypes.ts#L1617-L1620" }, { "kind": "enum", @@ -173697,7 +173773,7 @@ "name": "JinaAITextEmbeddingTask", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1588-L1593" + "specLocation": "inference/_types/CommonTypes.ts#L1632-L1637" }, { "kind": "interface", @@ -173769,7 +173845,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1595-L1625" + "specLocation": "inference/_types/CommonTypes.ts#L1639-L1669" }, { "kind": "enum", @@ -173782,7 +173858,7 @@ "name": "LlamaServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1633-L1635" + "specLocation": "inference/_types/CommonTypes.ts#L1677-L1679" }, { "kind": "enum", @@ -173801,7 +173877,7 @@ "name": "LlamaSimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1637-L1641" + "specLocation": "inference/_types/CommonTypes.ts#L1681-L1685" }, { "kind": "enum", @@ -173820,7 +173896,7 @@ "name": "LlamaTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1627-L1631" + "specLocation": "inference/_types/CommonTypes.ts#L1671-L1675" }, { "kind": "interface", @@ -173978,7 +174054,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1643-L1670" + "specLocation": "inference/_types/CommonTypes.ts#L1687-L1714" }, { "kind": "enum", @@ -173991,7 +174067,7 @@ "name": "MistralServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1678-L1680" + "specLocation": "inference/_types/CommonTypes.ts#L1722-L1724" }, { "kind": "enum", @@ -174010,7 +174086,7 @@ "name": "MistralTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1672-L1676" + "specLocation": "inference/_types/CommonTypes.ts#L1716-L1720" }, { "kind": "interface", @@ -174097,7 +174173,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1682-L1724" + "specLocation": "inference/_types/CommonTypes.ts#L1726-L1768" }, { "kind": "enum", @@ -174110,7 +174186,7 @@ "name": "OpenAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1740-L1742" + "specLocation": "inference/_types/CommonTypes.ts#L1784-L1786" }, { "kind": "interface", @@ -174132,7 +174208,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1726-L1732" + "specLocation": "inference/_types/CommonTypes.ts#L1770-L1776" }, { "kind": "enum", @@ -174151,7 +174227,7 @@ "name": "OpenAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1734-L1738" + "specLocation": "inference/_types/CommonTypes.ts#L1778-L1782" }, { "kind": "interface", @@ -174728,6 +174804,12 @@ { "kind": "enum", "members": [ + { + "name": "chat_completion" + }, + { + "name": "completion" + }, { "name": "text_embedding" }, @@ -174739,7 +174821,7 @@ "name": "TaskTypeGoogleVertexAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L113-L116" + "specLocation": "inference/_types/TaskType.ts#L113-L118" }, { "kind": "enum", @@ -174761,7 +174843,7 @@ "name": "TaskTypeHuggingFace", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L118-L123" + "specLocation": "inference/_types/TaskType.ts#L120-L125" }, { "kind": "enum", @@ -174796,7 +174878,7 @@ "name": "TaskTypeLlama", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L125-L129" + "specLocation": "inference/_types/TaskType.ts#L127-L131" }, { "kind": "enum", @@ -174815,7 +174897,7 @@ "name": "TaskTypeMistral", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L131-L135" + "specLocation": "inference/_types/TaskType.ts#L133-L137" }, { "kind": "enum", @@ -174834,7 +174916,7 @@ "name": "TaskTypeOpenAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L137-L141" + "specLocation": "inference/_types/TaskType.ts#L139-L143" }, { "kind": "enum", @@ -174850,7 +174932,7 @@ "name": "TaskTypeVoyageAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L143-L146" + "specLocation": "inference/_types/TaskType.ts#L145-L148" }, { "kind": "enum", @@ -174869,7 +174951,7 @@ "name": "TaskTypeWatsonx", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L148-L152" + "specLocation": "inference/_types/TaskType.ts#L150-L154" }, { "kind": "interface", @@ -174991,7 +175073,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1444-L1449" + "specLocation": "inference/_types/CommonTypes.ts#L1488-L1493" }, { "kind": "interface", @@ -175137,7 +175219,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1744-L1775" + "specLocation": "inference/_types/CommonTypes.ts#L1788-L1819" }, { "kind": "enum", @@ -175150,7 +175232,7 @@ "name": "VoyageAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1808-L1810" + "specLocation": "inference/_types/CommonTypes.ts#L1852-L1854" }, { "kind": "interface", @@ -175210,7 +175292,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1777-L1801" + "specLocation": "inference/_types/CommonTypes.ts#L1821-L1845" }, { "kind": "enum", @@ -175226,7 +175308,7 @@ "name": "VoyageAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1803-L1806" + "specLocation": "inference/_types/CommonTypes.ts#L1847-L1850" }, { "kind": "interface", @@ -175314,7 +175396,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1812-L1850" + "specLocation": "inference/_types/CommonTypes.ts#L1856-L1894" }, { "kind": "enum", @@ -175327,7 +175409,7 @@ "name": "WatsonxServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1858-L1860" + "specLocation": "inference/_types/CommonTypes.ts#L1902-L1904" }, { "kind": "enum", @@ -175346,7 +175428,7 @@ "name": "WatsonxTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1852-L1856" + "specLocation": "inference/_types/CommonTypes.ts#L1896-L1900" }, { "kind": "request", @@ -179107,6 +179189,30 @@ "method_request": "PUT _inference/rerank/google_vertex_ai_rerank", "summary": "A rerank task", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample3": { + "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.", + "method_request": "PUT _inference/completion/google_model_garden_anthropic_completion", + "summary": "A completion task for Google Model Garden Anthropic endpoint", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + }, + "PutGoogleVertexAiRequestExample4": { + "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", + "method_request": "PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion", + "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + }, + "PutGoogleVertexAiRequestExample5": { + "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.", + "method_request": "PUT _inference/completion/google_model_garden_meta_completion", + "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/openapi/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample6": { + "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.", + "method_request": "PUT _inference/chat_completion/google_model_garden_meta_chat_completion", + "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/openapi/chat/completions\"\n }\n}" } }, "inherits": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 4d84479dd5..19fc293aa1 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -14122,10 +14122,15 @@ export interface InferenceGoogleAiStudioServiceSettings { export type InferenceGoogleAiStudioTaskType = 'completion' | 'text_embedding' +export type InferenceGoogleModelGardenProvider = 'google' | 'anthropic' | 'meta' | 'hugging_face' | 'mistral' | 'ai21' + export interface InferenceGoogleVertexAIServiceSettings { - location: string - model_id: string - project_id: string + provider?: InferenceGoogleModelGardenProvider + url?: string + streaming_url?: string + location?: string + model_id?: string + project_id?: string rate_limit?: InferenceRateLimitSetting service_account_json: string dimensions?: integer @@ -14137,6 +14142,7 @@ export interface InferenceGoogleVertexAITaskSettings { auto_truncate?: boolean top_n?: integer thinking_config?: InferenceThinkingConfig + max_tokens?: integer } export type InferenceGoogleVertexAITaskType = 'rerank' | 'text_embedding' | 'completion' | 'chat_completion' @@ -14438,7 +14444,7 @@ export type InferenceTaskTypeElasticsearch = 'sparse_embedding' | 'text_embeddin export type InferenceTaskTypeGoogleAIStudio = 'text_embedding' | 'completion' -export type InferenceTaskTypeGoogleVertexAI = 'text_embedding' | 'rerank' +export type InferenceTaskTypeGoogleVertexAI = 'chat_completion' | 'completion' | 'text_embedding' | 'rerank' export type InferenceTaskTypeHuggingFace = 'chat_completion' | 'completion' | 'rerank' | 'text_embedding' diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index 25d1be939a..de7e267504 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -1391,21 +1391,51 @@ export enum GoogleAiServiceType { export class GoogleVertexAIServiceSettings { /** - * The name of the location to use for the inference task. + * The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks. + * In order for Google Model Garden endpoint to be used `provider` must be defined and be other than `google`. + * Modes: + * - Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`. + * - Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters. + */ + provider?: GoogleModelGardenProvider + /** + * The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint. + * If both `url` and `streaming_url` are provided, each is used for its respective mode. + * If `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`. + * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`). + * At least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage. + */ + url?: string + /** + * The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint. + * If both `streaming_url` and `url` are provided, each is used for its respective mode. + * If `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests. + * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`). + * At least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage. + */ + streaming_url?: string + /** + * The name of the location to use for the inference task for the Google Vertex AI inference task. + * For Google Vertex AI, when `provider` is omitted or `google` `location` is mandatory. + * For Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `location` is ignored. * Refer to the Google documentation for the list of supported locations. * @ext_doc_id googlevertexai-locations */ - location: string + location?: string /** * The name of the model to use for the inference task. - * Refer to the Google documentation for the list of supported models. + * For Google Vertex AI `model_id` is mandatory. + * For Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `model_id` will be used for some providers that require it, otherwise - ignored. + * Refer to the Google documentation for the list of supported models for Google Vertex AI. * @ext_doc_id googlevertexai-models */ - model_id: string + model_id?: string /** - * The name of the project to use for the inference task. + * The name of the project to use for the Google Vertex AI inference task. + * For Google Vertex AI `project_id` is mandatory. + * For Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `project_id` is ignored. */ - project_id: string + project_id?: string /** * This setting helps to minimize the number of rate limit errors returned from Google Vertex AI. * By default, the `googlevertexai` service sets the number of requests allowed per minute to 30.000. @@ -1424,6 +1454,15 @@ export class GoogleVertexAIServiceSettings { dimensions?: integer } +export enum GoogleModelGardenProvider { + google, + anthropic, + meta, + hugging_face, + mistral, + ai21 +} + export class GoogleVertexAITaskSettings { /** * For a `text_embedding` task, truncate inputs longer than the maximum token length automatically. @@ -1439,6 +1478,11 @@ export class GoogleVertexAITaskSettings { * @ext_doc_id googlevertexai-thinking */ thinking_config?: ThinkingConfig + /** + * For a `completion` or `chat_completion` task, allows setting up the `max_tokens` field for request to the Google Model Garden's Anthropic provider. + * If `max_tokens` is specified - it must be a positive integer. + */ + max_tokens?: integer } export class ThinkingConfig { diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts index e0e5882eb3..c5d56e439f 100644 --- a/specification/inference/_types/TaskType.ts +++ b/specification/inference/_types/TaskType.ts @@ -111,6 +111,8 @@ export enum TaskTypeGoogleAIStudio { } export enum TaskTypeGoogleVertexAI { + chat_completion, + completion, text_embedding, rerank } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml new file mode 100644 index 0000000000..5cb79753dc --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml @@ -0,0 +1,17 @@ +summary: A completion task for Google Model Garden Anthropic endpoint +description: Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden. +method_request: 'PUT _inference/completion/google_model_garden_anthropic_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "anthropic", + "service_account_json": "service-account-json", + "url": "https://url:rawPredict", + "streaming_url": "https://streaming_url:streamRawPredict" + }, + "task_settings": { + "max_tokens": 128 + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml new file mode 100644 index 0000000000..52b7ececd8 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml @@ -0,0 +1,17 @@ +summary: A chat_completion task for Google Model Garden Anthropic endpoint +description: Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden. +method_request: 'PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "anthropic", + "service_account_json": "service-account-json", + "url": "https://url:rawPredict", + "streaming_url": "https://streaming_url:streamRawPredict" + }, + "task_settings": { + "max_tokens": 128 + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml new file mode 100644 index 0000000000..750fc0eaf7 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml @@ -0,0 +1,14 @@ +summary: A completion task for Google Model Garden Meta endpoint with single URL provided +description: Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided. +method_request: 'PUT _inference/completion/google_model_garden_meta_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "meta", + "model_id": "meta/llama-3.3-70b-instruct-maas", + "service_account_json": "service-account-json", + "url": "https://url/openapi/chat/completions" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml new file mode 100644 index 0000000000..8d38a23ca5 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml @@ -0,0 +1,14 @@ +summary: A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided +description: Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided. +method_request: 'PUT _inference/chat_completion/google_model_garden_meta_chat_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "meta", + "model_id": "meta/llama-3.3-70b-instruct-maas", + "service_account_json": "service-account-json", + "streaming_url": "https://url/openapi/chat/completions" + } + } From 738a415945485fbf66f7941c410fb7c69d1c8aa8 Mon Sep 17 00:00:00 2001 From: Jan Kazlouski Date: Wed, 8 Oct 2025 08:30:55 +0000 Subject: [PATCH 2/2] Fix comments --- output/openapi/elasticsearch-openapi.json | 23 ++--- .../elasticsearch-serverless-openapi.json | 23 ++--- output/schema/schema.json | 96 +++++++------------ output/typescript/types.ts | 2 +- specification/_doc_ids/table.csv | 1 + specification/inference/_types/CommonTypes.ts | 15 ++- .../PutGoogleVertexAiRequestExample5.yaml | 14 --- .../PutGoogleVertexAiRequestExample6.yaml | 14 --- 8 files changed, 58 insertions(+), 130 deletions(-) delete mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml delete mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 9fca4bba7b..804ce7e3ee 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -22739,16 +22739,6 @@ "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" - }, - "PutGoogleVertexAiRequestExample5": { - "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided", - "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/openapi/chat/completions\"\n }\n}" - }, - "PutGoogleVertexAiRequestExample6": { - "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided", - "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/openapi/chat/completions\"\n }\n}" } } } @@ -102575,7 +102565,7 @@ "type": "object", "properties": { "provider": { - "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.", + "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for a Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.", "allOf": [ { "$ref": "#/components/schemas/inference._types.GoogleModelGardenProvider" @@ -102636,11 +102626,7 @@ "type": "string", "enum": [ "google", - "anthropic", - "meta", - "hugging_face", - "mistral", - "ai21" + "anthropic" ] }, "inference._types.GoogleVertexAITaskSettings": { @@ -102666,7 +102652,10 @@ ] }, "max_tokens": { - "description": "For a `completion` or `chat_completion` task, allows setting up the `max_tokens` field for request to the Google Model Garden's Anthropic provider.\nIf `max_tokens` is specified - it must be a positive integer.", + "externalDocs": { + "url": "https://docs.claude.com/en/api/messages#body-max-tokens" + }, + "description": "For `completion` and `chat_completion` tasks, specifies the `max_tokens` value for requests sent to the Google Model Garden `anthropic` provider.\nIf `provider` is not set to `anthropic`, this field is ignored.\nIf `max_tokens` is specified - it must be a positive integer. If not specified, the default value of 1024 is used.\nAnthropic models require `max_tokens` to be set for each request. Please refer to the Anthropic documentation for more information.", "type": "number" } } diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index e7214b8641..4507e85164 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -13723,16 +13723,6 @@ "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" - }, - "PutGoogleVertexAiRequestExample5": { - "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided", - "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/openapi/chat/completions\"\n }\n}" - }, - "PutGoogleVertexAiRequestExample6": { - "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided", - "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/openapi/chat/completions\"\n }\n}" } } } @@ -66639,7 +66629,7 @@ "type": "object", "properties": { "provider": { - "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.", + "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for a Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.", "allOf": [ { "$ref": "#/components/schemas/inference._types.GoogleModelGardenProvider" @@ -66700,11 +66690,7 @@ "type": "string", "enum": [ "google", - "anthropic", - "meta", - "hugging_face", - "mistral", - "ai21" + "anthropic" ] }, "inference._types.GoogleVertexAITaskSettings": { @@ -66730,7 +66716,10 @@ ] }, "max_tokens": { - "description": "For a `completion` or `chat_completion` task, allows setting up the `max_tokens` field for request to the Google Model Garden's Anthropic provider.\nIf `max_tokens` is specified - it must be a positive integer.", + "externalDocs": { + "url": "https://docs.claude.com/en/api/messages#body-max-tokens" + }, + "description": "For `completion` and `chat_completion` tasks, specifies the `max_tokens` value for requests sent to the Google Model Garden `anthropic` provider.\nIf `provider` is not set to `anthropic`, this field is ignored.\nIf `max_tokens` is specified - it must be a positive integer. If not specified, the default value of 1024 is used.\nAnthropic models require `max_tokens` to be set for each request. Please refer to the Anthropic documentation for more information.", "type": "number" } } diff --git a/output/schema/schema.json b/output/schema/schema.json index 3330a4154e..22b2236ae1 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -172057,25 +172057,13 @@ }, { "name": "anthropic" - }, - { - "name": "meta" - }, - { - "name": "hugging_face" - }, - { - "name": "mistral" - }, - { - "name": "ai21" } ], "name": { "name": "GoogleModelGardenProvider", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1457-L1464" + "specLocation": "inference/_types/CommonTypes.ts#L1457-L1460" }, { "kind": "interface", @@ -172085,7 +172073,7 @@ }, "properties": [ { - "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.", + "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for a Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.", "name": "provider", "required": false, "type": { @@ -172212,7 +172200,7 @@ "name": "GoogleVertexAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1502-L1504" + "specLocation": "inference/_types/CommonTypes.ts#L1501-L1503" }, { "kind": "interface", @@ -172260,7 +172248,9 @@ } }, { - "description": "For a `completion` or `chat_completion` task, allows setting up the `max_tokens` field for request to the Google Model Garden's Anthropic provider.\nIf `max_tokens` is specified - it must be a positive integer.", + "description": "For `completion` and `chat_completion` tasks, specifies the `max_tokens` value for requests sent to the Google Model Garden `anthropic` provider.\nIf `provider` is not set to `anthropic`, this field is ignored.\nIf `max_tokens` is specified - it must be a positive integer. If not specified, the default value of 1024 is used.\nAnthropic models require `max_tokens` to be set for each request. Please refer to the Anthropic documentation for more information.", + "extDocId": "anthropic-max-tokens", + "extDocUrl": "https://docs.claude.com/en/api/messages#body-max-tokens", "name": "max_tokens", "required": false, "type": { @@ -172272,7 +172262,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1466-L1486" + "specLocation": "inference/_types/CommonTypes.ts#L1462-L1485" }, { "kind": "enum", @@ -172294,7 +172284,7 @@ "name": "GoogleVertexAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1495-L1500" + "specLocation": "inference/_types/CommonTypes.ts#L1494-L1499" }, { "kind": "interface", @@ -172356,7 +172346,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1506-L1538" + "specLocation": "inference/_types/CommonTypes.ts#L1505-L1537" }, { "kind": "enum", @@ -172369,7 +172359,7 @@ "name": "HuggingFaceServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1559-L1561" + "specLocation": "inference/_types/CommonTypes.ts#L1558-L1560" }, { "kind": "interface", @@ -172403,7 +172393,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1540-L1550" + "specLocation": "inference/_types/CommonTypes.ts#L1539-L1549" }, { "kind": "enum", @@ -172425,7 +172415,7 @@ "name": "HuggingFaceTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1552-L1557" + "specLocation": "inference/_types/CommonTypes.ts#L1551-L1556" }, { "kind": "interface", @@ -173657,7 +173647,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1563-L1592" + "specLocation": "inference/_types/CommonTypes.ts#L1562-L1591" }, { "kind": "enum", @@ -173670,7 +173660,7 @@ "name": "JinaAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1622-L1624" + "specLocation": "inference/_types/CommonTypes.ts#L1621-L1623" }, { "kind": "enum", @@ -173689,7 +173679,7 @@ "name": "JinaAISimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1626-L1630" + "specLocation": "inference/_types/CommonTypes.ts#L1625-L1629" }, { "kind": "interface", @@ -173735,7 +173725,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1594-L1615" + "specLocation": "inference/_types/CommonTypes.ts#L1593-L1614" }, { "kind": "enum", @@ -173751,7 +173741,7 @@ "name": "JinaAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1617-L1620" + "specLocation": "inference/_types/CommonTypes.ts#L1616-L1619" }, { "kind": "enum", @@ -173773,7 +173763,7 @@ "name": "JinaAITextEmbeddingTask", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1632-L1637" + "specLocation": "inference/_types/CommonTypes.ts#L1631-L1636" }, { "kind": "interface", @@ -173845,7 +173835,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1639-L1669" + "specLocation": "inference/_types/CommonTypes.ts#L1638-L1668" }, { "kind": "enum", @@ -173858,7 +173848,7 @@ "name": "LlamaServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1677-L1679" + "specLocation": "inference/_types/CommonTypes.ts#L1676-L1678" }, { "kind": "enum", @@ -173877,7 +173867,7 @@ "name": "LlamaSimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1681-L1685" + "specLocation": "inference/_types/CommonTypes.ts#L1680-L1684" }, { "kind": "enum", @@ -173896,7 +173886,7 @@ "name": "LlamaTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1671-L1675" + "specLocation": "inference/_types/CommonTypes.ts#L1670-L1674" }, { "kind": "interface", @@ -174054,7 +174044,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1687-L1714" + "specLocation": "inference/_types/CommonTypes.ts#L1686-L1713" }, { "kind": "enum", @@ -174067,7 +174057,7 @@ "name": "MistralServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1722-L1724" + "specLocation": "inference/_types/CommonTypes.ts#L1721-L1723" }, { "kind": "enum", @@ -174086,7 +174076,7 @@ "name": "MistralTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1716-L1720" + "specLocation": "inference/_types/CommonTypes.ts#L1715-L1719" }, { "kind": "interface", @@ -174173,7 +174163,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1726-L1768" + "specLocation": "inference/_types/CommonTypes.ts#L1725-L1767" }, { "kind": "enum", @@ -174186,7 +174176,7 @@ "name": "OpenAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1784-L1786" + "specLocation": "inference/_types/CommonTypes.ts#L1783-L1785" }, { "kind": "interface", @@ -174208,7 +174198,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1770-L1776" + "specLocation": "inference/_types/CommonTypes.ts#L1769-L1775" }, { "kind": "enum", @@ -174227,7 +174217,7 @@ "name": "OpenAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1778-L1782" + "specLocation": "inference/_types/CommonTypes.ts#L1777-L1781" }, { "kind": "interface", @@ -175073,7 +175063,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1488-L1493" + "specLocation": "inference/_types/CommonTypes.ts#L1487-L1492" }, { "kind": "interface", @@ -175219,7 +175209,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1788-L1819" + "specLocation": "inference/_types/CommonTypes.ts#L1787-L1818" }, { "kind": "enum", @@ -175232,7 +175222,7 @@ "name": "VoyageAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1852-L1854" + "specLocation": "inference/_types/CommonTypes.ts#L1851-L1853" }, { "kind": "interface", @@ -175292,7 +175282,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1821-L1845" + "specLocation": "inference/_types/CommonTypes.ts#L1820-L1844" }, { "kind": "enum", @@ -175308,7 +175298,7 @@ "name": "VoyageAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1847-L1850" + "specLocation": "inference/_types/CommonTypes.ts#L1846-L1849" }, { "kind": "interface", @@ -175396,7 +175386,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1856-L1894" + "specLocation": "inference/_types/CommonTypes.ts#L1855-L1893" }, { "kind": "enum", @@ -175409,7 +175399,7 @@ "name": "WatsonxServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1902-L1904" + "specLocation": "inference/_types/CommonTypes.ts#L1901-L1903" }, { "kind": "enum", @@ -175428,7 +175418,7 @@ "name": "WatsonxTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1896-L1900" + "specLocation": "inference/_types/CommonTypes.ts#L1895-L1899" }, { "kind": "request", @@ -179201,18 +179191,6 @@ "method_request": "PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion", "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" - }, - "PutGoogleVertexAiRequestExample5": { - "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.", - "method_request": "PUT _inference/completion/google_model_garden_meta_completion", - "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/openapi/chat/completions\"\n }\n}" - }, - "PutGoogleVertexAiRequestExample6": { - "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.", - "method_request": "PUT _inference/chat_completion/google_model_garden_meta_chat_completion", - "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/openapi/chat/completions\"\n }\n}" } }, "inherits": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 19fc293aa1..d90ec3fd29 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -14122,7 +14122,7 @@ export interface InferenceGoogleAiStudioServiceSettings { export type InferenceGoogleAiStudioTaskType = 'completion' | 'text_embedding' -export type InferenceGoogleModelGardenProvider = 'google' | 'anthropic' | 'meta' | 'hugging_face' | 'mistral' | 'ai21' +export type InferenceGoogleModelGardenProvider = 'google' | 'anthropic' export interface InferenceGoogleVertexAIServiceSettings { provider?: InferenceGoogleModelGardenProvider diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 23941a5133..6d15e8cf37 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -22,6 +22,7 @@ analysis,https://www.elastic.co/docs/manage-data/data-store/text-analysis,, analyze-repository,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-snapshot-repository-analyze,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/repo-analysis-api.html, analyzer-anatomy,https://www.elastic.co/docs/manage-data/data-store/text-analysis/anatomy-of-an-analyzer,, analyzer-update-existing,https://www.elastic.co/docs/manage-data/data-store/text-analysis/specify-an-analyzer#update-analyzers-on-existing-indices,, +anthropic-max-tokens,https://docs.claude.com/en/api/messages#body-max-tokens,, anthropic-messages,https://docs.anthropic.com/en/api/messages,, anthropic-models,https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names,, api-date-math-index-names,https://www.elastic.co/docs/reference/elasticsearch/rest-apis/api-conventions#api-date-math-index-names,, diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index de7e267504..f725891fab 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -1392,7 +1392,7 @@ export enum GoogleAiServiceType { export class GoogleVertexAIServiceSettings { /** * The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks. - * In order for Google Model Garden endpoint to be used `provider` must be defined and be other than `google`. + * In order for a Google Model Garden endpoint to be used `provider` must be defined and be other than `google`. * Modes: * - Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`. * - Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters. @@ -1456,11 +1456,7 @@ export class GoogleVertexAIServiceSettings { export enum GoogleModelGardenProvider { google, - anthropic, - meta, - hugging_face, - mistral, - ai21 + anthropic } export class GoogleVertexAITaskSettings { @@ -1479,8 +1475,11 @@ export class GoogleVertexAITaskSettings { */ thinking_config?: ThinkingConfig /** - * For a `completion` or `chat_completion` task, allows setting up the `max_tokens` field for request to the Google Model Garden's Anthropic provider. - * If `max_tokens` is specified - it must be a positive integer. + * For `completion` and `chat_completion` tasks, specifies the `max_tokens` value for requests sent to the Google Model Garden `anthropic` provider. + * If `provider` is not set to `anthropic`, this field is ignored. + * If `max_tokens` is specified - it must be a positive integer. If not specified, the default value of 1024 is used. + * Anthropic models require `max_tokens` to be set for each request. Please refer to the Anthropic documentation for more information. + * @ext_doc_id anthropic-max-tokens */ max_tokens?: integer } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml deleted file mode 100644 index 750fc0eaf7..0000000000 --- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml +++ /dev/null @@ -1,14 +0,0 @@ -summary: A completion task for Google Model Garden Meta endpoint with single URL provided -description: Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided. -method_request: 'PUT _inference/completion/google_model_garden_meta_completion' -# type: "request" -value: |- - { - "service": "googlevertexai", - "service_settings": { - "provider": "meta", - "model_id": "meta/llama-3.3-70b-instruct-maas", - "service_account_json": "service-account-json", - "url": "https://url/openapi/chat/completions" - } - } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml deleted file mode 100644 index 8d38a23ca5..0000000000 --- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml +++ /dev/null @@ -1,14 +0,0 @@ -summary: A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided -description: Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided. -method_request: 'PUT _inference/chat_completion/google_model_garden_meta_chat_completion' -# type: "request" -value: |- - { - "service": "googlevertexai", - "service_settings": { - "provider": "meta", - "model_id": "meta/llama-3.3-70b-instruct-maas", - "service_account_json": "service-account-json", - "streaming_url": "https://url/openapi/chat/completions" - } - }