diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 1569db6aa0..2d2b1bf4d2 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -22750,6 +22750,16 @@ "summary": "A rerank task", "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample3": { + "summary": "A completion task for Google Model Garden Anthropic endpoint", + "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + }, + "PutGoogleVertexAiRequestExample4": { + "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", + "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" } } } @@ -102943,22 +102953,38 @@ "inference._types.GoogleVertexAIServiceSettings": { "type": "object", "properties": { + "provider": { + "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for a Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.GoogleModelGardenProvider" + } + ] + }, + "url": { + "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.", + "type": "string" + }, + "streaming_url": { + "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.", + "type": "string" + }, "location": { "externalDocs": { "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations" }, - "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.", + "description": "The name of the location to use for the inference task for the Google Vertex AI inference task.\nFor Google Vertex AI, when `provider` is omitted or `google` `location` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `location` is ignored.\nRefer to the Google documentation for the list of supported locations.", "type": "string" }, "model_id": { "externalDocs": { "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api" }, - "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", + "description": "The name of the model to use for the inference task.\nFor Google Vertex AI `model_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `model_id` will be used for some providers that require it, otherwise - ignored.\nRefer to the Google documentation for the list of supported models for Google Vertex AI.", "type": "string" }, "project_id": { - "description": "The name of the project to use for the inference task.", + "description": "The name of the project to use for the Google Vertex AI inference task.\nFor Google Vertex AI `project_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `project_id` is ignored.", "type": "string" }, "rate_limit": { @@ -102982,12 +103008,16 @@ } }, "required": [ - "location", - "model_id", - "project_id", "service_account_json" ] }, + "inference._types.GoogleModelGardenProvider": { + "type": "string", + "enum": [ + "google", + "anthropic" + ] + }, "inference._types.GoogleVertexAITaskSettings": { "type": "object", "properties": { @@ -103009,6 +103039,13 @@ "$ref": "#/components/schemas/inference._types.ThinkingConfig" } ] + }, + "max_tokens": { + "externalDocs": { + "url": "https://docs.claude.com/en/api/messages#body-max-tokens" + }, + "description": "For `completion` and `chat_completion` tasks, specifies the `max_tokens` value for requests sent to the Google Model Garden `anthropic` provider.\nIf `provider` is not set to `anthropic`, this field is ignored.\nIf `max_tokens` is specified - it must be a positive integer. If not specified, the default value of 1024 is used.\nAnthropic models require `max_tokens` to be set for each request. Please refer to the Anthropic documentation for more information.", + "type": "number" } } }, @@ -103052,6 +103089,8 @@ "inference._types.TaskTypeGoogleVertexAI": { "type": "string", "enum": [ + "chat_completion", + "completion", "text_embedding", "rerank" ] diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index dd4e4fed8b..7ce70fcc14 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -13732,6 +13732,16 @@ "summary": "A rerank task", "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample3": { + "summary": "A completion task for Google Model Garden Anthropic endpoint", + "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + }, + "PutGoogleVertexAiRequestExample4": { + "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", + "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" } } } @@ -67023,22 +67033,38 @@ "inference._types.GoogleVertexAIServiceSettings": { "type": "object", "properties": { + "provider": { + "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for a Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.GoogleModelGardenProvider" + } + ] + }, + "url": { + "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.", + "type": "string" + }, + "streaming_url": { + "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.", + "type": "string" + }, "location": { "externalDocs": { "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations" }, - "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.", + "description": "The name of the location to use for the inference task for the Google Vertex AI inference task.\nFor Google Vertex AI, when `provider` is omitted or `google` `location` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `location` is ignored.\nRefer to the Google documentation for the list of supported locations.", "type": "string" }, "model_id": { "externalDocs": { "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api" }, - "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", + "description": "The name of the model to use for the inference task.\nFor Google Vertex AI `model_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `model_id` will be used for some providers that require it, otherwise - ignored.\nRefer to the Google documentation for the list of supported models for Google Vertex AI.", "type": "string" }, "project_id": { - "description": "The name of the project to use for the inference task.", + "description": "The name of the project to use for the Google Vertex AI inference task.\nFor Google Vertex AI `project_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `project_id` is ignored.", "type": "string" }, "rate_limit": { @@ -67062,12 +67088,16 @@ } }, "required": [ - "location", - "model_id", - "project_id", "service_account_json" ] }, + "inference._types.GoogleModelGardenProvider": { + "type": "string", + "enum": [ + "google", + "anthropic" + ] + }, "inference._types.GoogleVertexAITaskSettings": { "type": "object", "properties": { @@ -67089,6 +67119,13 @@ "$ref": "#/components/schemas/inference._types.ThinkingConfig" } ] + }, + "max_tokens": { + "externalDocs": { + "url": "https://docs.claude.com/en/api/messages#body-max-tokens" + }, + "description": "For `completion` and `chat_completion` tasks, specifies the `max_tokens` value for requests sent to the Google Model Garden `anthropic` provider.\nIf `provider` is not set to `anthropic`, this field is ignored.\nIf `max_tokens` is specified - it must be a positive integer. If not specified, the default value of 1024 is used.\nAnthropic models require `max_tokens` to be set for each request. Please refer to the Anthropic documentation for more information.", + "type": "number" } } }, @@ -67132,6 +67169,8 @@ "inference._types.TaskTypeGoogleVertexAI": { "type": "string", "enum": [ + "chat_completion", + "completion", "text_embedding", "rerank" ] diff --git a/output/schema/schema.json b/output/schema/schema.json index d1d9e8b128..3081b4bcce 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -173211,6 +173211,22 @@ }, "specLocation": "inference/_types/CommonTypes.ts#L1424-L1427" }, + { + "kind": "enum", + "members": [ + { + "name": "google" + }, + { + "name": "anthropic" + } + ], + "name": { + "name": "GoogleModelGardenProvider", + "namespace": "inference._types" + }, + "specLocation": "inference/_types/CommonTypes.ts#L1498-L1501" + }, { "kind": "interface", "name": { @@ -173219,11 +173235,47 @@ }, "properties": [ { - "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.", + "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for a Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.", + "name": "provider", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "GoogleModelGardenProvider", + "namespace": "inference._types" + } + } + }, + { + "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.", + "name": "url", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.", + "name": "streaming_url", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the location to use for the inference task for the Google Vertex AI inference task.\nFor Google Vertex AI, when `provider` is omitted or `google` `location` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `location` is ignored.\nRefer to the Google documentation for the list of supported locations.", "extDocId": "googlevertexai-locations", "extDocUrl": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations", "name": "location", - "required": true, + "required": false, "type": { "kind": "instance_of", "type": { @@ -173233,11 +173285,11 @@ } }, { - "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", + "description": "The name of the model to use for the inference task.\nFor Google Vertex AI `model_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `model_id` will be used for some providers that require it, otherwise - ignored.\nRefer to the Google documentation for the list of supported models for Google Vertex AI.", "extDocId": "googlevertexai-models", "extDocUrl": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api", "name": "model_id", - "required": true, + "required": false, "type": { "kind": "instance_of", "type": { @@ -173247,9 +173299,9 @@ } }, { - "description": "The name of the project to use for the inference task.", + "description": "The name of the project to use for the Google Vertex AI inference task.\nFor Google Vertex AI `project_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `project_id` is ignored.", "name": "project_id", - "required": true, + "required": false, "type": { "kind": "instance_of", "type": { @@ -173297,7 +173349,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1433-L1466" + "specLocation": "inference/_types/CommonTypes.ts#L1433-L1496" }, { "kind": "enum", @@ -173310,7 +173362,7 @@ "name": "GoogleVertexAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1499-L1501" + "specLocation": "inference/_types/CommonTypes.ts#L1542-L1544" }, { "kind": "interface", @@ -173356,9 +173408,23 @@ "namespace": "inference._types" } } + }, + { + "description": "For `completion` and `chat_completion` tasks, specifies the `max_tokens` value for requests sent to the Google Model Garden `anthropic` provider.\nIf `provider` is not set to `anthropic`, this field is ignored.\nIf `max_tokens` is specified - it must be a positive integer. If not specified, the default value of 1024 is used.\nAnthropic models require `max_tokens` to be set for each request. Please refer to the Anthropic documentation for more information.", + "extDocId": "anthropic-max-tokens", + "extDocUrl": "https://docs.claude.com/en/api/messages#body-max-tokens", + "name": "max_tokens", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1468-L1483" + "specLocation": "inference/_types/CommonTypes.ts#L1503-L1526" }, { "kind": "enum", @@ -173380,7 +173446,7 @@ "name": "GoogleVertexAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1492-L1497" + "specLocation": "inference/_types/CommonTypes.ts#L1535-L1540" }, { "kind": "interface", @@ -173442,7 +173508,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1503-L1535" + "specLocation": "inference/_types/CommonTypes.ts#L1546-L1578" }, { "kind": "enum", @@ -173455,7 +173521,7 @@ "name": "HuggingFaceServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1556-L1558" + "specLocation": "inference/_types/CommonTypes.ts#L1599-L1601" }, { "kind": "interface", @@ -173489,7 +173555,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1537-L1547" + "specLocation": "inference/_types/CommonTypes.ts#L1580-L1590" }, { "kind": "enum", @@ -173511,7 +173577,7 @@ "name": "HuggingFaceTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1549-L1554" + "specLocation": "inference/_types/CommonTypes.ts#L1592-L1597" }, { "kind": "interface", @@ -174743,7 +174809,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1560-L1589" + "specLocation": "inference/_types/CommonTypes.ts#L1603-L1632" }, { "kind": "enum", @@ -174756,7 +174822,7 @@ "name": "JinaAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1619-L1621" + "specLocation": "inference/_types/CommonTypes.ts#L1662-L1664" }, { "kind": "enum", @@ -174775,7 +174841,7 @@ "name": "JinaAISimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1623-L1627" + "specLocation": "inference/_types/CommonTypes.ts#L1666-L1670" }, { "kind": "interface", @@ -174821,7 +174887,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1591-L1612" + "specLocation": "inference/_types/CommonTypes.ts#L1634-L1655" }, { "kind": "enum", @@ -174837,7 +174903,7 @@ "name": "JinaAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1614-L1617" + "specLocation": "inference/_types/CommonTypes.ts#L1657-L1660" }, { "kind": "enum", @@ -174859,7 +174925,7 @@ "name": "JinaAITextEmbeddingTask", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1629-L1634" + "specLocation": "inference/_types/CommonTypes.ts#L1672-L1677" }, { "kind": "interface", @@ -174931,7 +174997,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1636-L1666" + "specLocation": "inference/_types/CommonTypes.ts#L1679-L1709" }, { "kind": "enum", @@ -174944,7 +175010,7 @@ "name": "LlamaServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1674-L1676" + "specLocation": "inference/_types/CommonTypes.ts#L1717-L1719" }, { "kind": "enum", @@ -174963,7 +175029,7 @@ "name": "LlamaSimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1678-L1682" + "specLocation": "inference/_types/CommonTypes.ts#L1721-L1725" }, { "kind": "enum", @@ -174982,7 +175048,7 @@ "name": "LlamaTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1668-L1672" + "specLocation": "inference/_types/CommonTypes.ts#L1711-L1715" }, { "kind": "interface", @@ -175140,7 +175206,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1684-L1711" + "specLocation": "inference/_types/CommonTypes.ts#L1727-L1754" }, { "kind": "enum", @@ -175153,7 +175219,7 @@ "name": "MistralServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1719-L1721" + "specLocation": "inference/_types/CommonTypes.ts#L1762-L1764" }, { "kind": "enum", @@ -175172,7 +175238,7 @@ "name": "MistralTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1713-L1717" + "specLocation": "inference/_types/CommonTypes.ts#L1756-L1760" }, { "kind": "interface", @@ -175259,7 +175325,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1723-L1765" + "specLocation": "inference/_types/CommonTypes.ts#L1766-L1808" }, { "kind": "enum", @@ -175272,7 +175338,7 @@ "name": "OpenAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1792-L1794" + "specLocation": "inference/_types/CommonTypes.ts#L1835-L1837" }, { "kind": "interface", @@ -175302,7 +175368,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1767-L1784" + "specLocation": "inference/_types/CommonTypes.ts#L1810-L1827" }, { "kind": "enum", @@ -175321,7 +175387,7 @@ "name": "OpenAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1786-L1790" + "specLocation": "inference/_types/CommonTypes.ts#L1829-L1833" }, { "kind": "interface", @@ -175898,6 +175964,12 @@ { "kind": "enum", "members": [ + { + "name": "chat_completion" + }, + { + "name": "completion" + }, { "name": "text_embedding" }, @@ -175909,7 +175981,7 @@ "name": "TaskTypeGoogleVertexAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L113-L116" + "specLocation": "inference/_types/TaskType.ts#L113-L118" }, { "kind": "enum", @@ -175931,7 +176003,7 @@ "name": "TaskTypeHuggingFace", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L118-L123" + "specLocation": "inference/_types/TaskType.ts#L120-L125" }, { "kind": "enum", @@ -175966,7 +176038,7 @@ "name": "TaskTypeLlama", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L125-L129" + "specLocation": "inference/_types/TaskType.ts#L127-L131" }, { "kind": "enum", @@ -175985,7 +176057,7 @@ "name": "TaskTypeMistral", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L131-L135" + "specLocation": "inference/_types/TaskType.ts#L133-L137" }, { "kind": "enum", @@ -176004,7 +176076,7 @@ "name": "TaskTypeOpenAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L137-L141" + "specLocation": "inference/_types/TaskType.ts#L139-L143" }, { "kind": "enum", @@ -176020,7 +176092,7 @@ "name": "TaskTypeVoyageAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L143-L146" + "specLocation": "inference/_types/TaskType.ts#L145-L148" }, { "kind": "enum", @@ -176039,7 +176111,7 @@ "name": "TaskTypeWatsonx", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L148-L152" + "specLocation": "inference/_types/TaskType.ts#L150-L154" }, { "kind": "interface", @@ -176161,7 +176233,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1485-L1490" + "specLocation": "inference/_types/CommonTypes.ts#L1528-L1533" }, { "kind": "interface", @@ -176307,7 +176379,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1796-L1827" + "specLocation": "inference/_types/CommonTypes.ts#L1839-L1870" }, { "kind": "enum", @@ -176320,7 +176392,7 @@ "name": "VoyageAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1860-L1862" + "specLocation": "inference/_types/CommonTypes.ts#L1903-L1905" }, { "kind": "interface", @@ -176380,7 +176452,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1829-L1853" + "specLocation": "inference/_types/CommonTypes.ts#L1872-L1896" }, { "kind": "enum", @@ -176396,7 +176468,7 @@ "name": "VoyageAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1855-L1858" + "specLocation": "inference/_types/CommonTypes.ts#L1898-L1901" }, { "kind": "interface", @@ -176484,7 +176556,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1864-L1902" + "specLocation": "inference/_types/CommonTypes.ts#L1907-L1945" }, { "kind": "enum", @@ -176497,7 +176569,7 @@ "name": "WatsonxServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1910-L1912" + "specLocation": "inference/_types/CommonTypes.ts#L1953-L1955" }, { "kind": "enum", @@ -176516,7 +176588,7 @@ "name": "WatsonxTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1904-L1908" + "specLocation": "inference/_types/CommonTypes.ts#L1947-L1951" }, { "kind": "request", @@ -180277,6 +180349,18 @@ "method_request": "PUT _inference/rerank/google_vertex_ai_rerank", "summary": "A rerank task", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample3": { + "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.", + "method_request": "PUT _inference/completion/google_model_garden_anthropic_completion", + "summary": "A completion task for Google Model Garden Anthropic endpoint", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + }, + "PutGoogleVertexAiRequestExample4": { + "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", + "method_request": "PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion", + "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" } }, "inherits": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 2ce5631967..f04b4076bc 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -14248,10 +14248,15 @@ export interface InferenceGoogleAiStudioServiceSettings { export type InferenceGoogleAiStudioTaskType = 'completion' | 'text_embedding' +export type InferenceGoogleModelGardenProvider = 'google' | 'anthropic' + export interface InferenceGoogleVertexAIServiceSettings { - location: string - model_id: string - project_id: string + provider?: InferenceGoogleModelGardenProvider + url?: string + streaming_url?: string + location?: string + model_id?: string + project_id?: string rate_limit?: InferenceRateLimitSetting service_account_json: string dimensions?: integer @@ -14263,6 +14268,7 @@ export interface InferenceGoogleVertexAITaskSettings { auto_truncate?: boolean top_n?: integer thinking_config?: InferenceThinkingConfig + max_tokens?: integer } export type InferenceGoogleVertexAITaskType = 'rerank' | 'text_embedding' | 'completion' | 'chat_completion' @@ -14565,7 +14571,7 @@ export type InferenceTaskTypeElasticsearch = 'sparse_embedding' | 'text_embeddin export type InferenceTaskTypeGoogleAIStudio = 'text_embedding' | 'completion' -export type InferenceTaskTypeGoogleVertexAI = 'text_embedding' | 'rerank' +export type InferenceTaskTypeGoogleVertexAI = 'chat_completion' | 'completion' | 'text_embedding' | 'rerank' export type InferenceTaskTypeHuggingFace = 'chat_completion' | 'completion' | 'rerank' | 'text_embedding' diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index c8eaad7102..ecc06c3d61 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -22,6 +22,7 @@ analysis,https://www.elastic.co/docs/manage-data/data-store/text-analysis,, analyze-repository,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-snapshot-repository-analyze,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/repo-analysis-api.html, analyzer-anatomy,https://www.elastic.co/docs/manage-data/data-store/text-analysis/anatomy-of-an-analyzer,, analyzer-update-existing,https://www.elastic.co/docs/manage-data/data-store/text-analysis/specify-an-analyzer#update-analyzers-on-existing-indices,, +anthropic-max-tokens,https://docs.claude.com/en/api/messages#body-max-tokens,, anthropic-messages,https://docs.anthropic.com/en/api/messages,, anthropic-models,https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names,, api-date-math-index-names,https://www.elastic.co/docs/reference/elasticsearch/rest-apis/api-conventions#api-date-math-index-names,, diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index 0cb66254d5..10f45fd4d7 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -1432,21 +1432,51 @@ export enum GoogleAiServiceType { export class GoogleVertexAIServiceSettings { /** - * The name of the location to use for the inference task. + * The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks. + * In order for a Google Model Garden endpoint to be used `provider` must be defined and be other than `google`. + * Modes: + * - Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`. + * - Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters. + */ + provider?: GoogleModelGardenProvider + /** + * The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint. + * If both `url` and `streaming_url` are provided, each is used for its respective mode. + * If `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`. + * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`). + * At least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage. + */ + url?: string + /** + * The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint. + * If both `streaming_url` and `url` are provided, each is used for its respective mode. + * If `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests. + * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`). + * At least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage. + */ + streaming_url?: string + /** + * The name of the location to use for the inference task for the Google Vertex AI inference task. + * For Google Vertex AI, when `provider` is omitted or `google` `location` is mandatory. + * For Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `location` is ignored. * Refer to the Google documentation for the list of supported locations. * @ext_doc_id googlevertexai-locations */ - location: string + location?: string /** * The name of the model to use for the inference task. - * Refer to the Google documentation for the list of supported models. + * For Google Vertex AI `model_id` is mandatory. + * For Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `model_id` will be used for some providers that require it, otherwise - ignored. + * Refer to the Google documentation for the list of supported models for Google Vertex AI. * @ext_doc_id googlevertexai-models */ - model_id: string + model_id?: string /** - * The name of the project to use for the inference task. + * The name of the project to use for the Google Vertex AI inference task. + * For Google Vertex AI `project_id` is mandatory. + * For Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `project_id` is ignored. */ - project_id: string + project_id?: string /** * This setting helps to minimize the number of rate limit errors returned from Google Vertex AI. * By default, the `googlevertexai` service sets the number of requests allowed per minute to 30.000. @@ -1465,6 +1495,11 @@ export class GoogleVertexAIServiceSettings { dimensions?: integer } +export enum GoogleModelGardenProvider { + google, + anthropic +} + export class GoogleVertexAITaskSettings { /** * For a `text_embedding` task, truncate inputs longer than the maximum token length automatically. @@ -1480,6 +1515,14 @@ export class GoogleVertexAITaskSettings { * @ext_doc_id googlevertexai-thinking */ thinking_config?: ThinkingConfig + /** + * For `completion` and `chat_completion` tasks, specifies the `max_tokens` value for requests sent to the Google Model Garden `anthropic` provider. + * If `provider` is not set to `anthropic`, this field is ignored. + * If `max_tokens` is specified - it must be a positive integer. If not specified, the default value of 1024 is used. + * Anthropic models require `max_tokens` to be set for each request. Please refer to the Anthropic documentation for more information. + * @ext_doc_id anthropic-max-tokens + */ + max_tokens?: integer } export class ThinkingConfig { diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts index e0e5882eb3..c5d56e439f 100644 --- a/specification/inference/_types/TaskType.ts +++ b/specification/inference/_types/TaskType.ts @@ -111,6 +111,8 @@ export enum TaskTypeGoogleAIStudio { } export enum TaskTypeGoogleVertexAI { + chat_completion, + completion, text_embedding, rerank } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml new file mode 100644 index 0000000000..5cb79753dc --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml @@ -0,0 +1,17 @@ +summary: A completion task for Google Model Garden Anthropic endpoint +description: Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden. +method_request: 'PUT _inference/completion/google_model_garden_anthropic_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "anthropic", + "service_account_json": "service-account-json", + "url": "https://url:rawPredict", + "streaming_url": "https://streaming_url:streamRawPredict" + }, + "task_settings": { + "max_tokens": 128 + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml new file mode 100644 index 0000000000..52b7ececd8 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml @@ -0,0 +1,17 @@ +summary: A chat_completion task for Google Model Garden Anthropic endpoint +description: Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden. +method_request: 'PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "anthropic", + "service_account_json": "service-account-json", + "url": "https://url:rawPredict", + "streaming_url": "https://streaming_url:streamRawPredict" + }, + "task_settings": { + "max_tokens": 128 + } + }