Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions pinecone/db_data/dataclasses/search_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,19 @@ class SearchQuery:
The unique ID of the vector to be used as a query vector.
"""

match_terms: Optional[Dict[str, Any]] = None
"""
Specifies which terms must be present in the text of each search hit based on the specified strategy.
The match is performed against the text field specified in the integrated index field_map configuration.
Terms are normalized and tokenized into single tokens before matching, and order does not matter.
Expected format: {"strategy": "all", "terms": ["term1", "term2", ...]}
Currently only "all" strategy is supported, which means all specified terms must be present.

**Limitations:** match_terms is only supported for sparse indexes with integrated embedding
configured to use the pinecone-sparse-english-v0 model.
Optional.
"""

def __post_init__(self):
"""
Converts `vector` to a `SearchQueryVectorTypedDict` instance if an enum is provided.
Expand All @@ -55,5 +68,6 @@ def as_dict(self) -> Dict[str, Any]:
"filter": self.filter,
"vector": self.vector,
"id": self.id,
"match_terms": self.match_terms,
}
return {k: v for k, v in d.items() if v is not None}
8 changes: 7 additions & 1 deletion pinecone/db_data/index_asyncio_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,13 @@ async def search(
"""
:param namespace: The namespace in the index to search.
:type namespace: str, required
:param query: The SearchQuery to use for the search.
:param query: The SearchQuery to use for the search. The query can include a ``match_terms`` field
to specify which terms must be present in the text of each search hit. The match_terms
should be a dict with ``strategy`` (str) and ``terms`` (List[str]) keys, e.g.
``{"strategy": "all", "terms": ["term1", "term2"]}``. Currently only "all" strategy
is supported, which means all specified terms must be present.
**Note:** match_terms is only supported for sparse indexes with integrated embedding
configured to use the pinecone-sparse-english-v0 model.
:type query: Union[Dict, SearchQuery], required
:param rerank: The SearchRerank to use with the search request.
:type rerank: Union[Dict, SearchRerank], optional
Expand Down
8 changes: 7 additions & 1 deletion pinecone/db_data/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,13 @@ def search(
"""
:param namespace: The namespace in the index to search.
:type namespace: str, required
:param query: The SearchQuery to use for the search.
:param query: The SearchQuery to use for the search. The query can include a ``match_terms`` field
to specify which terms must be present in the text of each search hit. The match_terms
should be a dict with ``strategy`` (str) and ``terms`` (List[str]) keys, e.g.
``{"strategy": "all", "terms": ["term1", "term2"]}``. Currently only "all" strategy
is supported, which means all specified terms must be present.
**Note:** match_terms is only supported for sparse indexes with integrated embedding
configured to use the pinecone-sparse-english-v0 model.
:type query: Union[Dict, SearchQuery], required
:param rerank: The SearchRerank to use with the search request.
:type rerank: Union[Dict, SearchRerank], optional
Expand Down
8 changes: 8 additions & 0 deletions pinecone/db_data/request_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
SearchRecordsRequest,
SearchRecordsRequestQuery,
SearchRecordsRequestRerank,
SearchMatchTerms,
VectorValues,
SearchRecordsVector,
UpsertRecord,
Expand Down Expand Up @@ -218,11 +219,18 @@ def _parse_search_query(
if isinstance(query_dict.get("vector", None), SearchQueryVector):
query_dict["vector"] = query_dict["vector"].as_dict()

# Extract match_terms for conversion if present
match_terms = query_dict.pop("match_terms", None)
if match_terms is not None and isinstance(match_terms, dict):
match_terms = SearchMatchTerms(**match_terms)

srrq = SearchRecordsRequestQuery(
**{k: v for k, v in query_dict.items() if k not in {"vector"}}
)
if query_dict.get("vector", None) is not None:
srrq.vector = IndexRequestFactory._parse_search_vector(query_dict["vector"])
if match_terms is not None:
srrq.match_terms = match_terms
return srrq

@staticmethod
Expand Down
13 changes: 13 additions & 0 deletions pinecone/db_data/types/search_query_typed_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,16 @@ class SearchQueryTypedDict(TypedDict):
"""
The unique ID of the vector to be used as a query vector.
"""

match_terms: Optional[Dict[str, Any]]
"""
Specifies which terms must be present in the text of each search hit based on the specified strategy.
The match is performed against the text field specified in the integrated index field_map configuration.
Terms are normalized and tokenized into single tokens before matching, and order does not matter.
Expected format: {"strategy": "all", "terms": ["term1", "term2", ...]}
Currently only "all" strategy is supported, which means all specified terms must be present.

**Limitations:** match_terms is only supported for sparse indexes with integrated embedding
configured to use the pinecone-sparse-english-v0 model.
Optional.
"""
60 changes: 60 additions & 0 deletions tests/integration/data/test_search_and_upsert_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,66 @@ def test_search_with_rerank_query(self, model_idx, records_to_upsert):
assert len(response.result.hits) == 3
assert response.usage is not None

def test_search_with_match_terms_dict(self, model_idx, records_to_upsert):
"""Test that match_terms can be passed via dict query."""
from pinecone import PineconeApiException

target_namespace = random_string(10)
model_idx.upsert_records(namespace=target_namespace, records=records_to_upsert)

poll_until_fetchable(
model_idx, target_namespace, [r["id"] for r in records_to_upsert], timeout=180
)

# Search with match_terms using dict
query_dict = {
"inputs": {"text": "Apple corporation"},
"top_k": 3,
"match_terms": {"strategy": "all", "terms": ["Apple", "corporation"]},
}
# match_terms is only supported for pinecone-sparse-english-v0 model
# If the API rejects it due to model incompatibility, that's expected
# and shows our code is correctly passing the parameter
try:
response = model_idx.search_records(namespace=target_namespace, query=query_dict)
assert response.usage is not None
# Test search alias
response2 = model_idx.search(namespace=target_namespace, query=query_dict)
assert response == response2
except PineconeApiException as e:
# Verify the error is about model compatibility, not parameter format
assert "match_terms" in str(e) or "pinecone-sparse-english-v0" in str(e)

def test_search_with_match_terms_searchquery(self, model_idx, records_to_upsert):
"""Test that match_terms can be passed via SearchQuery dataclass."""
from pinecone import SearchQuery, PineconeApiException

target_namespace = random_string(10)
model_idx.upsert_records(namespace=target_namespace, records=records_to_upsert)

poll_until_fetchable(
model_idx, target_namespace, [r["id"] for r in records_to_upsert], timeout=180
)

# Search with match_terms using SearchQuery dataclass
query = SearchQuery(
inputs={"text": "Apple corporation"},
top_k=3,
match_terms={"strategy": "all", "terms": ["Apple", "corporation"]},
)
# match_terms is only supported for pinecone-sparse-english-v0 model
# If the API rejects it due to model incompatibility, that's expected
# and shows our code is correctly passing the parameter
try:
response = model_idx.search_records(namespace=target_namespace, query=query)
assert response.usage is not None
# Test search alias
response2 = model_idx.search(namespace=target_namespace, query=query)
assert response == response2
except PineconeApiException as e:
# Verify the error is about model compatibility, not parameter format
assert "match_terms" in str(e) or "pinecone-sparse-english-v0" in str(e)


@pytest.mark.skipif(
os.getenv("USE_GRPC") != "false", reason="These actions are not supported in gRPC"
Expand Down
60 changes: 60 additions & 0 deletions tests/integration/data_asyncio/test_search_and_upsert_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,66 @@ async def test_search_with_rerank_query(self, model_index_host, records_to_upser
assert response.usage is not None
await model_idx.close()

async def test_search_with_match_terms_dict(self, model_index_host, records_to_upsert):
"""Test that match_terms can be passed via dict query."""
from pinecone import PineconeApiException

model_idx = build_asyncioindex_client(model_index_host)
target_namespace = random_string(10)
await model_idx.upsert_records(namespace=target_namespace, records=records_to_upsert)

await poll_for_freshness(model_idx, target_namespace, len(records_to_upsert))

# Search with match_terms using dict
query_dict = {
"inputs": {"text": "Apple corporation"},
"top_k": 3,
"match_terms": {"strategy": "all", "terms": ["Apple", "corporation"]},
}
# match_terms is only supported for pinecone-sparse-english-v0 model
# If the API rejects it due to model incompatibility, that's expected
# and shows our code is correctly passing the parameter
try:
response = await model_idx.search_records(namespace=target_namespace, query=query_dict)
assert response.usage is not None
# Test search alias
response2 = await model_idx.search(namespace=target_namespace, query=query_dict)
assert response == response2
except PineconeApiException as e:
# Verify the error is about model compatibility, not parameter format
assert "match_terms" in str(e) or "pinecone-sparse-english-v0" in str(e)
await model_idx.close()

async def test_search_with_match_terms_searchquery(self, model_index_host, records_to_upsert):
"""Test that match_terms can be passed via SearchQuery dataclass."""
from pinecone import SearchQuery, PineconeApiException

model_idx = build_asyncioindex_client(model_index_host)
target_namespace = random_string(10)
await model_idx.upsert_records(namespace=target_namespace, records=records_to_upsert)

await poll_for_freshness(model_idx, target_namespace, len(records_to_upsert))

# Search with match_terms using SearchQuery dataclass
query = SearchQuery(
inputs={"text": "Apple corporation"},
top_k=3,
match_terms={"strategy": "all", "terms": ["Apple", "corporation"]},
)
# match_terms is only supported for pinecone-sparse-english-v0 model
# If the API rejects it due to model incompatibility, that's expected
# and shows our code is correctly passing the parameter
try:
response = await model_idx.search_records(namespace=target_namespace, query=query)
assert response.usage is not None
# Test search alias
response2 = await model_idx.search(namespace=target_namespace, query=query)
assert response == response2
except PineconeApiException as e:
# Verify the error is about model compatibility, not parameter format
assert "match_terms" in str(e) or "pinecone-sparse-english-v0" in str(e)
await model_idx.close()


@pytest.mark.asyncio
class TestUpsertAndSearchRecordsErrorCases:
Expand Down
Loading