Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion elasticsearch/dsl/_async/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class AsyncDocument(DocumentBase, metaclass=AsyncIndexMeta):

@classmethod
def _get_using(cls, using: Optional[AsyncUsingType] = None) -> AsyncUsingType:
return cast(AsyncUsingType, using or cls._index._using)
return using or cls._index._using

@classmethod
def _get_connection(
Expand Down
2 changes: 1 addition & 1 deletion elasticsearch/dsl/_sync/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ class Document(DocumentBase, metaclass=IndexMeta):

@classmethod
def _get_using(cls, using: Optional[UsingType] = None) -> UsingType:
return cast(UsingType, using or cls._index._using)
return using or cls._index._using

@classmethod
def _get_connection(cls, using: Optional[UsingType] = None) -> "Elasticsearch":
Expand Down
12 changes: 11 additions & 1 deletion elasticsearch/dsl/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -1290,7 +1290,7 @@ def _deserialize(self, data: Any) -> Union[datetime, date]:
if isinstance(data, datetime):
if self._default_timezone and data.tzinfo is None:
data = data.replace(tzinfo=self._default_timezone)
return data
return cast(datetime, data)
if isinstance(data, date):
return data
if isinstance(data, int):
Expand Down Expand Up @@ -3689,6 +3689,11 @@ class SemanticText(Field):
by using the Update mapping API. Use the Create inference API to
create the endpoint. If not specified, the inference endpoint
defined by inference_id will be used at both index and query time.
:arg chunking_settings: Settings for chunking text into smaller
passages. If specified, these will override the chunking settings
sent in the inference endpoint associated with inference_id. If
chunking settings are updated, they will not be applied to
existing documents until they are reindexed.
"""

name = "semantic_text"
Expand All @@ -3699,6 +3704,9 @@ def __init__(
meta: Union[Mapping[str, str], "DefaultType"] = DEFAULT,
inference_id: Union[str, "DefaultType"] = DEFAULT,
search_inference_id: Union[str, "DefaultType"] = DEFAULT,
chunking_settings: Union[
"types.ChunkingSettings", Dict[str, Any], "DefaultType"
] = DEFAULT,
**kwargs: Any,
):
if meta is not DEFAULT:
Expand All @@ -3707,6 +3715,8 @@ def __init__(
kwargs["inference_id"] = inference_id
if search_inference_id is not DEFAULT:
kwargs["search_inference_id"] = search_inference_id
if chunking_settings is not DEFAULT:
kwargs["chunking_settings"] = chunking_settings
super().__init__(*args, **kwargs)


Expand Down
44 changes: 43 additions & 1 deletion elasticsearch/dsl/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -1382,7 +1382,49 @@ def __init__(
min_term_freq: Union[int, "DefaultType"] = DEFAULT,
min_word_length: Union[int, "DefaultType"] = DEFAULT,
routing: Union[str, "DefaultType"] = DEFAULT,
stop_words: Union[str, Sequence[str], "DefaultType"] = DEFAULT,
stop_words: Union[
Literal[
"_arabic_",
"_armenian_",
"_basque_",
"_bengali_",
"_brazilian_",
"_bulgarian_",
"_catalan_",
"_cjk_",
"_czech_",
"_danish_",
"_dutch_",
"_english_",
"_estonian_",
"_finnish_",
"_french_",
"_galician_",
"_german_",
"_greek_",
"_hindi_",
"_hungarian_",
"_indonesian_",
"_irish_",
"_italian_",
"_latvian_",
"_lithuanian_",
"_norwegian_",
"_persian_",
"_portuguese_",
"_romanian_",
"_russian_",
"_serbian_",
"_sorani_",
"_spanish_",
"_swedish_",
"_thai_",
"_turkish_",
"_none_",
],
Sequence[str],
"DefaultType",
] = DEFAULT,
unlike: Union[
Union[str, "types.LikeDocument"],
Sequence[Union[str, "types.LikeDocument"]],
Expand Down
54 changes: 44 additions & 10 deletions elasticsearch/dsl/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,48 @@ def __init__(
super().__init__(kwargs)


class ChunkingSettings(AttrDict[Any]):
"""
:arg strategy: (required) The chunking strategy: `sentence` or `word`.
Defaults to `sentence` if omitted.
:arg max_chunk_size: (required) The maximum size of a chunk in words.
This value cannot be higher than `300` or lower than `20` (for
`sentence` strategy) or `10` (for `word` strategy). Defaults to
`250` if omitted.
:arg overlap: The number of overlapping words for chunks. It is
applicable only to a `word` chunking strategy. This value cannot
be higher than half the `max_chunk_size` value. Defaults to `100`
if omitted.
:arg sentence_overlap: The number of overlapping sentences for chunks.
It is applicable only for a `sentence` chunking strategy. It can
be either `1` or `0`. Defaults to `1` if omitted.
"""

strategy: Union[str, DefaultType]
max_chunk_size: Union[int, DefaultType]
overlap: Union[int, DefaultType]
sentence_overlap: Union[int, DefaultType]

def __init__(
self,
*,
strategy: Union[str, DefaultType] = DEFAULT,
max_chunk_size: Union[int, DefaultType] = DEFAULT,
overlap: Union[int, DefaultType] = DEFAULT,
sentence_overlap: Union[int, DefaultType] = DEFAULT,
**kwargs: Any,
):
if strategy is not DEFAULT:
kwargs["strategy"] = strategy
if max_chunk_size is not DEFAULT:
kwargs["max_chunk_size"] = max_chunk_size
if overlap is not DEFAULT:
kwargs["overlap"] = overlap
if sentence_overlap is not DEFAULT:
kwargs["sentence_overlap"] = sentence_overlap
super().__init__(kwargs)


class ClassificationInferenceOptions(AttrDict[Any]):
"""
:arg num_top_classes: Specifies the number of top class predictions to
Expand Down Expand Up @@ -1617,11 +1659,7 @@ class InnerHits(AttrDict[Any]):
DefaultType,
]
seq_no_primary_term: Union[bool, DefaultType]
fields: Union[
Union[str, InstrumentedField],
Sequence[Union[str, InstrumentedField]],
DefaultType,
]
fields: Union[Sequence[Union[str, InstrumentedField]], DefaultType]
sort: Union[
Union[Union[str, InstrumentedField], "SortOptions"],
Sequence[Union[Union[str, InstrumentedField], "SortOptions"]],
Expand Down Expand Up @@ -1656,11 +1694,7 @@ def __init__(
DefaultType,
] = DEFAULT,
seq_no_primary_term: Union[bool, DefaultType] = DEFAULT,
fields: Union[
Union[str, InstrumentedField],
Sequence[Union[str, InstrumentedField]],
DefaultType,
] = DEFAULT,
fields: Union[Sequence[Union[str, InstrumentedField]], DefaultType] = DEFAULT,
sort: Union[
Union[Union[str, InstrumentedField], "SortOptions"],
Sequence[Union[Union[str, InstrumentedField], "SortOptions"]],
Expand Down