Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion elasticsearch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

# Ensure that a compatible version of elastic-transport is installed.
_version_groups = tuple(int(x) for x in re.search(r"^(\d+)\.(\d+)\.(\d+)", _elastic_transport_version).groups()) # type: ignore[union-attr]
if _version_groups < (8, 0, 0) or _version_groups > (9, 0, 0):
if _version_groups < (9, 1, 0) or _version_groups > (10, 0, 0):
raise ImportError(
"An incompatible version of elastic-transport is installed. Must be between "
"v8.0.0 and v9.0.0. Install the correct version with the following command: "
Expand Down
10 changes: 5 additions & 5 deletions elasticsearch/_otel.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,11 @@ def span(
span_name = endpoint_id or method
with self.tracer.start_as_current_span(span_name) as otel_span:
otel_span.set_attribute("http.request.method", method)
otel_span.set_attribute("db.system", "elasticsearch")
otel_span.set_attribute("db.system.name", "elasticsearch")
if endpoint_id is not None:
otel_span.set_attribute("db.operation", endpoint_id)
otel_span.set_attribute("db.operation.name", endpoint_id)
for key, value in path_parts.items():
otel_span.set_attribute(f"db.elasticsearch.path_parts.{key}", value)
otel_span.set_attribute(f"db.operation.parameter.{key}", value)

yield OpenTelemetrySpan(
otel_span,
Expand All @@ -94,8 +94,8 @@ def helpers_span(self, span_name: str) -> Generator[OpenTelemetrySpan, None, Non
return

with self.tracer.start_as_current_span(span_name) as otel_span:
otel_span.set_attribute("db.system", "elasticsearch")
otel_span.set_attribute("db.operation", span_name)
otel_span.set_attribute("db.system.name", "elasticsearch")
otel_span.set_attribute("db.operation.name", span_name)
# Without a request method, Elastic APM does not display the traces
otel_span.set_attribute("http.request.method", "null")
yield OpenTelemetrySpan(otel_span)
Expand Down
8 changes: 8 additions & 0 deletions elasticsearch/dsl/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -4081,6 +4081,9 @@ def __init__(
class SparseVector(Field):
"""
:arg store:
:arg index_options: Additional index options for the sparse vector
field that controls the token pruning behavior of the sparse
vector field.
:arg meta: Metadata about the field.
:arg properties:
:arg ignore_above:
Expand All @@ -4099,6 +4102,9 @@ def __init__(
self,
*args: Any,
store: Union[bool, "DefaultType"] = DEFAULT,
index_options: Union[
"types.SparseVectorIndexOptions", Dict[str, Any], "DefaultType"
] = DEFAULT,
meta: Union[Mapping[str, str], "DefaultType"] = DEFAULT,
properties: Union[Mapping[str, Field], "DefaultType"] = DEFAULT,
ignore_above: Union[int, "DefaultType"] = DEFAULT,
Expand All @@ -4113,6 +4119,8 @@ def __init__(
):
if store is not DEFAULT:
kwargs["store"] = store
if index_options is not DEFAULT:
kwargs["index_options"] = index_options
if meta is not DEFAULT:
kwargs["meta"] = meta
if properties is not DEFAULT:
Expand Down
64 changes: 62 additions & 2 deletions elasticsearch/dsl/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,26 @@ def __init__(

class ChunkingSettings(AttrDict[Any]):
"""
:arg strategy: (required) The chunking strategy: `sentence` or `word`.
Defaults to `sentence` if omitted.
:arg strategy: (required) The chunking strategy: `sentence`, `word`,
`none` or `recursive`. * If `strategy` is set to `recursive`,
you must also specify: - `max_chunk_size` - either `separators`
or`separator_group` Learn more about different chunking
strategies in the linked documentation. Defaults to `sentence` if
omitted.
:arg separator_group: (required) This parameter is only applicable
when using the `recursive` chunking strategy. Sets a predefined
list of separators in the saved chunking settings based on the
selected text type. Values can be `markdown` or `plaintext`.
Using this parameter is an alternative to manually specifying a
custom `separators` list.
:arg separators: (required) A list of strings used as possible split
points when chunking text with the `recursive` strategy. Each
string can be a plain string or a regular expression (regex)
pattern. The system tries each separator in order to split the
text, starting from the first item in the list. After splitting,
it attempts to recombine smaller pieces into larger chunks that
stay within the `max_chunk_size` limit, to reduce the total number
of chunks generated.
:arg max_chunk_size: (required) The maximum size of a chunk in words.
This value cannot be higher than `300` or lower than `20` (for
`sentence` strategy) or `10` (for `word` strategy). Defaults to
Expand All @@ -160,6 +178,8 @@ class ChunkingSettings(AttrDict[Any]):
"""

strategy: Union[str, DefaultType]
separator_group: Union[str, DefaultType]
separators: Union[Sequence[str], DefaultType]
max_chunk_size: Union[int, DefaultType]
overlap: Union[int, DefaultType]
sentence_overlap: Union[int, DefaultType]
Expand All @@ -168,13 +188,19 @@ def __init__(
self,
*,
strategy: Union[str, DefaultType] = DEFAULT,
separator_group: Union[str, DefaultType] = DEFAULT,
separators: Union[Sequence[str], DefaultType] = DEFAULT,
max_chunk_size: Union[int, DefaultType] = DEFAULT,
overlap: Union[int, DefaultType] = DEFAULT,
sentence_overlap: Union[int, DefaultType] = DEFAULT,
**kwargs: Any,
):
if strategy is not DEFAULT:
kwargs["strategy"] = strategy
if separator_group is not DEFAULT:
kwargs["separator_group"] = separator_group
if separators is not DEFAULT:
kwargs["separators"] = separators
if max_chunk_size is not DEFAULT:
kwargs["max_chunk_size"] = max_chunk_size
if overlap is not DEFAULT:
Expand Down Expand Up @@ -3723,6 +3749,38 @@ def __init__(
super().__init__(kwargs)


class SparseVectorIndexOptions(AttrDict[Any]):
"""
:arg prune: Whether to perform pruning, omitting the non-significant
tokens from the query to improve query performance. If prune is
true but the pruning_config is not specified, pruning will occur
but default values will be used. Default: false
:arg pruning_config: Optional pruning configuration. If enabled, this
will omit non-significant tokens from the query in order to
improve query performance. This is only used if prune is set to
true. If prune is set to true but pruning_config is not specified,
default values will be used.
"""

prune: Union[bool, DefaultType]
pruning_config: Union["TokenPruningConfig", Dict[str, Any], DefaultType]

def __init__(
self,
*,
prune: Union[bool, DefaultType] = DEFAULT,
pruning_config: Union[
"TokenPruningConfig", Dict[str, Any], DefaultType
] = DEFAULT,
**kwargs: Any,
):
if prune is not DEFAULT:
kwargs["prune"] = prune
if pruning_config is not DEFAULT:
kwargs["pruning_config"] = pruning_config
super().__init__(kwargs)


class SuggestContext(AttrDict[Any]):
"""
:arg name: (required)
Expand Down Expand Up @@ -5166,9 +5224,11 @@ def buckets_as_dict(self) -> Mapping[str, "FiltersBucket"]:
class FiltersBucket(AttrDict[Any]):
"""
:arg doc_count: (required)
:arg key:
"""

doc_count: int
key: str


class FrequentItemSetsAggregate(AttrDict[Any]):
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ keywords = [
]
dynamic = ["version"]
dependencies = [
"elastic-transport>=8.15.1,<9",
"elastic-transport>=9.1.0,<10",
"python-dateutil",
"typing-extensions",
]
Expand Down
12 changes: 6 additions & 6 deletions test_elasticsearch/test_otel.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def test_minimal_span():
assert spans[0].name == "GET"
assert spans[0].attributes == {
"http.request.method": "GET",
"db.system": "elasticsearch",
"db.system.name": "elasticsearch",
}


Expand All @@ -92,11 +92,11 @@ def test_detailed_span():
assert spans[0].name == "ml.open_job"
assert spans[0].attributes == {
"http.request.method": "GET",
"db.system": "elasticsearch",
"db.operation": "ml.open_job",
"db.elasticsearch.path_parts.job_id": "my-job",
"db.elasticsearch.cluster.name": "e9106fc68e3044f0b1475b04bf4ffd5f",
"db.elasticsearch.node.name": "instance-0000000001",
"db.system.name": "elasticsearch",
"db.operation.name": "ml.open_job",
"db.operation.parameter.job_id": "my-job",
"db.namespace": "e9106fc68e3044f0b1475b04bf4ffd5f",
"elasticsearch.node.name": "instance-0000000001",
}


Expand Down
16 changes: 8 additions & 8 deletions test_elasticsearch/test_server/test_otel.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ def test_otel_end_to_end(sync_client):
assert spans[0].name == "search"
expected_attributes = {
"http.request.method": "POST",
"db.system": "elasticsearch",
"db.operation": "search",
"db.elasticsearch.path_parts.index": "logs-*",
"db.system.name": "elasticsearch",
"db.operation.name": "search",
"db.operation.parameter.index": "logs-*",
}
# Assert expected atttributes are here, but allow other attributes too
# to make this test robust to elastic-transport changes
Expand Down Expand Up @@ -89,8 +89,8 @@ def test_otel_bulk(sync_client, elasticsearch_url, bulk_helper_name):
parent_span = spans.pop()
assert parent_span.name == f"helpers.{bulk_helper_name}"
assert parent_span.attributes == {
"db.system": "elasticsearch",
"db.operation": f"helpers.{bulk_helper_name}",
"db.system.name": "elasticsearch",
"db.operation.name": f"helpers.{bulk_helper_name}",
"http.request.method": "null",
}

Expand All @@ -99,9 +99,9 @@ def test_otel_bulk(sync_client, elasticsearch_url, bulk_helper_name):
assert span.name == "bulk"
expected_attributes = {
"http.request.method": "PUT",
"db.system": "elasticsearch",
"db.operation": "bulk",
"db.elasticsearch.path_parts.index": "test-index",
"db.system.name": "elasticsearch",
"db.operation.name": "bulk",
"db.operation.parameter.index": "test-index",
}
# Assert expected atttributes are here, but allow other attributes too
# to make this test robust to elastic-transport changes
Expand Down