Skip to content

Commit 545412e

Browse files
github-actions[bot]kosabogipquentin
authored
Adds new parameters to the elasticsearch inference API for the rerank task type (#5476) (#5519)
* Adds new parameters to the elasticsearch inference rerank API * Adds unique inference chunking settings for elasticsearch * Addresses suggestions (cherry picked from commit a53ef07) Co-authored-by: kosabogi <[email protected]> Co-authored-by: Quentin Pradet <[email protected]>
1 parent 3009faf commit 545412e

File tree

3 files changed

+23
-1
lines changed

3 files changed

+23
-1
lines changed

specification/inference/_types/CommonTypes.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1322,6 +1322,24 @@ export class ElasticsearchServiceSettings {
13221322
* The maximum value is 32.
13231323
*/
13241324
num_threads: integer
1325+
/**
1326+
* Available only for the `rerank` task type using the Elastic reranker model.
1327+
* Controls the strategy used for processing long documents during inference.
1328+
*
1329+
* Possible values:
1330+
* - `truncate` (default): Processes only the beginning of each document.
1331+
* - `chunk`: Splits long documents into smaller parts (chunks) before inference.
1332+
*
1333+
* When `long_document_strategy` is set to `chunk`, Elasticsearch splits each document into smaller parts but still returns a single score per document.
1334+
* That score reflects the highest relevance score among all chunks.
1335+
*/
1336+
long_document_strategy?: string
1337+
/**
1338+
* Only for the `rerank` task type.
1339+
* Limits the number of chunks per document that are sent for inference when chunking is enabled.
1340+
* If not set, all chunks generated for the document are processed.
1341+
*/
1342+
max_chunks_per_doc?: integer
13251343
}
13261344

13271345
export class ElasticsearchTaskSettings {

specification/inference/_types/Services.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,9 @@ import {
5050
*/
5151
export class InferenceEndpoint {
5252
/**
53-
* Chunking configuration object
53+
* The chunking configuration object.
54+
* Applies only to the `sparse_embedding` and `text_embedding` task types.
55+
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
5456
*/
5557
chunking_settings?: InferenceChunkingSettings
5658
/**

specification/inference/put_elasticsearch/PutElasticsearchRequest.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ export interface Request extends RequestBase {
7979
body: {
8080
/**
8181
* The chunking configuration object.
82+
* Applies only to the `sparse_embedding` and `text_embedding` task types.
83+
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
8284
* @ext_doc_id inference-chunking
8385
*/
8486
chunking_settings?: InferenceChunkingSettings

0 commit comments

Comments
 (0)