From 9fcb8dbff6440c1ffe4d2d6af5c78813b5a24ea9 Mon Sep 17 00:00:00 2001
From: Jen Hamon <jhamon@pinecone.io>
Date: Thu, 30 Jan 2025 08:43:47 -0500
Subject: [PATCH 1/5] Implement search, search_records, and upsert_records

---
 codegen/build-oas.sh                          |  39 ++
 pinecone/config/__init__.py                   |   2 +-
 pinecone/control/pinecone.py                  |  30 +-
 pinecone/core/openapi/db_data/model/hit.py    |  28 +-
 .../openapi/db_data/model/upsert_record.py    |  16 +-
 pinecone/data/dataclasses/__init__.py         |   3 +
 pinecone/data/dataclasses/search_query.py     |  59 +++
 .../data/dataclasses/search_query_vector.py   |  38 ++
 pinecone/data/dataclasses/search_rerank.py    |  61 +++
 .../inference/inference_request_builder.py    |   4 +-
 pinecone/data/index.py                        |  49 ++-
 pinecone/data/interfaces.py                   |  52 +++
 pinecone/data/request_factory.py              | 100 ++++-
 pinecone/data/types/__init__.py               |   3 +
 .../data/types/search_query_typed_dict.py     |  36 ++
 .../types/search_query_vector_typed_dict.py   |  25 ++
 .../data/types/search_rerank_typed_dict.py    |  41 ++
 pinecone/openapi_support/api_client.py        |   3 +-
 .../openapi_support/asyncio_api_client.py     |   3 +-
 pinecone/openapi_support/configuration.py     |   8 +-
 pinecone/openapi_support/exceptions.py        |   2 +-
 pinecone/openapi_support/model_utils.py       |   6 +-
 pinecone/openapi_support/rest_urllib3.py      |  24 +-
 pinecone/openapi_support/rest_utils.py        |   5 -
 pinecone/utils/__init__.py                    |   2 +
 pinecone/utils/convert_enum_to_string.py      |   8 +
 poetry.lock                                   |  29 +-
 pyproject.toml                                |   1 +
 tests/__init__.py                             |   5 +
 tests/integration/data/conftest.py            |  49 ++-
 .../data/test_search_and_upsert_records.py    | 221 ++++++++++
 tests/integration/helpers/__init__.py         |   1 +
 tests/integration/helpers/helpers.py          |   5 +
 tests/perf/test_re_vs_in.py                   |  46 ++
 tests/unit/data/test_request_factory.py       | 401 ++++++++++++++++++
 35 files changed, 1326 insertions(+), 79 deletions(-)
 create mode 100644 pinecone/data/dataclasses/search_query.py
 create mode 100644 pinecone/data/dataclasses/search_query_vector.py
 create mode 100644 pinecone/data/dataclasses/search_rerank.py
 create mode 100644 pinecone/data/types/search_query_typed_dict.py
 create mode 100644 pinecone/data/types/search_query_vector_typed_dict.py
 create mode 100644 pinecone/data/types/search_rerank_typed_dict.py
 create mode 100644 pinecone/utils/convert_enum_to_string.py
 create mode 100644 tests/integration/data/test_search_and_upsert_records.py
 create mode 100644 tests/perf/test_re_vs_in.py
 create mode 100644 tests/unit/data/test_request_factory.py

diff --git a/codegen/build-oas.sh b/codegen/build-oas.sh
index 738b12703..98321d6aa 100755
--- a/codegen/build-oas.sh
+++ b/codegen/build-oas.sh
@@ -134,6 +134,44 @@ remove_shared_classes() {
 	done
 }
 
+# Generated Python code attempts to internally map OpenAPI fields that begin
+# with "_" to a non-underscored alternative. Along with a polymorphic object,
+# this causes collisions and headaches. We massage the generated models to
+# maintain the original field names from the OpenAPI spec and circumvent
+# the remapping behavior as this is simpler for now than creating a fully
+# custom java generator class.
+clean_oas_underscore_manipulation() {
+	temp_file="$(mktemp)"
+
+	db_data_destination="${destination}/db_data"
+
+	# echo "Cleaning up upsert_record.py"
+	sed -i '' \
+	-e "s/'id'/'_id'/g" \
+	-e 's/self.id/self._id/g' \
+	-e 's/id (/_id (/g' \
+	-e 's/= id/= _id/g' \
+	-e 's/id,/_id,/g' \
+	-e "s/'vector\'/'_vector'/g" \
+	-e "s/'embed\'/'_embed'/g" \
+	-e 's/vector (/_vector (/g' \
+	-e 's/embed (/_embed (/g' \
+	"${db_data_destination}/model/upsert_record.py"
+
+	# echo "Cleaning up hit.py"
+	sed -i '' \
+	-e "s/'id'/'_id'/g" \
+	-e "s/'score'/'_score'/g" \
+	-e 's/ id, score,/ _id, _score,/g' \
+	-e 's/id (/_id (/g' \
+	-e 's/score (/_score (/g' \
+	-e 's/self.id/self._id/g' \
+	-e 's/self.score/self._score/g' \
+	-e 's/= id/= _id/g' \
+	-e 's/= score/= _score/g' \
+	"${db_data_destination}/model/hit.py"
+}
+
 update_apis_repo
 update_templates_repo
 verify_spec_version $version
@@ -144,6 +182,7 @@ mkdir -p "${destination}"
 for module in "${modules[@]}"; do
 	generate_client $module
 done
+clean_oas_underscore_manipulation
 
 # This also exists in the generated module code, but we need to reference it
 # in the pinecone.openapi_support package as well without creating a circular
diff --git a/pinecone/config/__init__.py b/pinecone/config/__init__.py
index 236260820..7abb7278e 100644
--- a/pinecone/config/__init__.py
+++ b/pinecone/config/__init__.py
@@ -5,4 +5,4 @@
 from .pinecone_config import PineconeConfig
 
 if os.getenv("PINECONE_DEBUG") is not None:
-    logging.basicConfig(level=logging.DEBUG)
+    logging.getLogger("pinecone").setLevel(level=logging.DEBUG)
diff --git a/pinecone/control/pinecone.py b/pinecone/control/pinecone.py
index 569625685..5bf594e01 100644
--- a/pinecone/control/pinecone.py
+++ b/pinecone/control/pinecone.py
@@ -1,7 +1,6 @@
 import time
 import logging
 from typing import Optional, Dict, Any, Union
-from enum import Enum
 
 from .index_host_store import IndexHostStore
 from .pinecone_interface import PineconeDBControlInterface
@@ -12,7 +11,12 @@
 from pinecone.openapi_support.api_client import ApiClient
 
 
-from pinecone.utils import normalize_host, setup_openapi_client, build_plugin_setup_client
+from pinecone.utils import (
+    normalize_host,
+    setup_openapi_client,
+    build_plugin_setup_client,
+    convert_enum_to_string,
+)
 from pinecone.core.openapi.db_control.models import (
     CreateCollectionRequest,
     CreateIndexForModelRequest,
@@ -156,17 +160,12 @@ def __parse_tags(self, tags: Optional[Dict[str, str]]) -> IndexTags:
     def __parse_deletion_protection(
         self, deletion_protection: Union[DeletionProtection, str]
     ) -> DeletionProtectionModel:
-        deletion_protection = self.__parse_enum_to_string(deletion_protection)
+        deletion_protection = convert_enum_to_string(deletion_protection)
         if deletion_protection in ["enabled", "disabled"]:
             return DeletionProtectionModel(deletion_protection)
         else:
             raise ValueError("deletion_protection must be either 'enabled' or 'disabled'")
 
-    def __parse_enum_to_string(self, value: Union[Enum, str]) -> str:
-        if isinstance(value, Enum):
-            return value.value
-        return value
-
     def __parse_index_spec(self, spec: Union[Dict, ServerlessSpec, PodSpec]) -> IndexSpec:
         if isinstance(spec, dict):
             if "serverless" in spec:
@@ -227,9 +226,9 @@ def create_index(
         tags: Optional[Dict[str, str]] = None,
     ) -> IndexModel:
         if metric is not None:
-            metric = self.__parse_enum_to_string(metric)
+            metric = convert_enum_to_string(metric)
         if vector_type is not None:
-            vector_type = self.__parse_enum_to_string(vector_type)
+            vector_type = convert_enum_to_string(vector_type)
         if deletion_protection is not None:
             dp = self.__parse_deletion_protection(deletion_protection)
         else:
@@ -270,8 +269,8 @@ def create_index_for_model(
         deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED,
         timeout: Optional[int] = None,
     ) -> IndexModel:
-        cloud = self.__parse_enum_to_string(cloud)
-        region = self.__parse_enum_to_string(region)
+        cloud = convert_enum_to_string(cloud)
+        region = convert_enum_to_string(region)
         if deletion_protection is not None:
             dp = self.__parse_deletion_protection(deletion_protection)
         else:
@@ -289,10 +288,7 @@ def create_index_for_model(
                     raise ValueError(f"{field} is required in embed")
             parsed_embed = {}
             for key, value in embed.items():
-                if isinstance(value, Enum):
-                    parsed_embed[key] = value.value
-                else:
-                    parsed_embed[key] = value
+                parsed_embed[key] = convert_enum_to_string(value)
 
         args = parse_non_empty_args(
             [
@@ -325,7 +321,7 @@ def is_ready():
             # Wait indefinitely
             while not is_ready():
                 logger.debug(
-                    f"Waiting for index {name} to be ready. Total wait time: {total_wait_time}"
+                    f"Waiting for index {name} to be ready. Total wait time {total_wait_time} seconds."
                 )
                 total_wait_time += 5
                 time.sleep(5)
diff --git a/pinecone/core/openapi/db_data/model/hit.py b/pinecone/core/openapi/db_data/model/hit.py
index 93583fdba..448ff2b12 100644
--- a/pinecone/core/openapi/db_data/model/hit.py
+++ b/pinecone/core/openapi/db_data/model/hit.py
@@ -78,8 +78,8 @@ def openapi_types():
                 and the value is attribute type.
         """
         return {
-            "id": (str,),  # noqa: E501
-            "score": (float,),  # noqa: E501
+            "_id": (str,),  # noqa: E501
+            "_score": (float,),  # noqa: E501
             "fields": ({str: (bool, dict, float, int, list, str, none_type)},),  # noqa: E501
         }
 
@@ -88,8 +88,8 @@ def discriminator():
         return None
 
     attribute_map = {
-        "id": "_id",  # noqa: E501
-        "score": "_score",  # noqa: E501
+        "_id": "_id",  # noqa: E501
+        "_score": "_score",  # noqa: E501
         "fields": "fields",  # noqa: E501
     }
 
@@ -99,12 +99,12 @@ def discriminator():
 
     @classmethod
     @convert_js_args_to_python_args
-    def _from_openapi_data(cls, id, score, fields, *args, **kwargs):  # noqa: E501
+    def _from_openapi_data(cls, _id, _score, fields, *args, **kwargs):  # noqa: E501
         """Hit - a model defined in OpenAPI
 
         Args:
-            id (str): The record id of the search hit.
-            score (float): The similarity score of the returned record.
+            _id (str): The record id of the search hit.
+            _score (float): The similarity score of the returned record.
             fields ({str: (bool, dict, float, int, list, str, none_type)}): The selected record fields associated with the search hit.
 
         Keyword Args:
@@ -163,8 +163,8 @@ def _from_openapi_data(cls, id, score, fields, *args, **kwargs):  # noqa: E501
         self._configuration = _configuration
         self._visited_composed_classes = _visited_composed_classes + (self.__class__,)
 
-        self.id = id
-        self.score = score
+        self._id = _id
+        self._score = _score
         self.fields = fields
         for var_name, var_value in kwargs.items():
             if (
@@ -190,12 +190,12 @@ def _from_openapi_data(cls, id, score, fields, *args, **kwargs):  # noqa: E501
     )
 
     @convert_js_args_to_python_args
-    def __init__(self, id, score, fields, *args, **kwargs):  # noqa: E501
+    def __init__(self, _id, _score, fields, *args, **kwargs):  # noqa: E501
         """Hit - a model defined in OpenAPI
 
         Args:
-            id (str): The record id of the search hit.
-            score (float): The similarity score of the returned record.
+            _id (str): The record id of the search hit.
+            _score (float): The similarity score of the returned record.
             fields ({str: (bool, dict, float, int, list, str, none_type)}): The selected record fields associated with the search hit.
 
         Keyword Args:
@@ -252,8 +252,8 @@ def __init__(self, id, score, fields, *args, **kwargs):  # noqa: E501
         self._configuration = _configuration
         self._visited_composed_classes = _visited_composed_classes + (self.__class__,)
 
-        self.id = id
-        self.score = score
+        self._id = _id
+        self._score = _score
         self.fields = fields
         for var_name, var_value in kwargs.items():
             if (
diff --git a/pinecone/core/openapi/db_data/model/upsert_record.py b/pinecone/core/openapi/db_data/model/upsert_record.py
index e76b9f65a..5fe322b27 100644
--- a/pinecone/core/openapi/db_data/model/upsert_record.py
+++ b/pinecone/core/openapi/db_data/model/upsert_record.py
@@ -78,7 +78,7 @@ def openapi_types():
                 and the value is attribute type.
         """
         return {
-            "id": (str,)  # noqa: E501
+            "_id": (str,)  # noqa: E501
         }
 
     @cached_property
@@ -86,7 +86,7 @@ def discriminator():
         return None
 
     attribute_map = {
-        "id": "_id"  # noqa: E501
+        "_id": "_id"  # noqa: E501
     }
 
     read_only_vars = {}
@@ -95,11 +95,11 @@ def discriminator():
 
     @classmethod
     @convert_js_args_to_python_args
-    def _from_openapi_data(cls, id, *args, **kwargs):  # noqa: E501
+    def _from_openapi_data(cls, _id, *args, **kwargs):  # noqa: E501
         """UpsertRecord - a model defined in OpenAPI
 
         Args:
-            id (str): The unique ID of the record to upsert. Note that `id` can be used as an alias for `_id`.
+            _id (str): The unique ID of the record to upsert. Note that `id` can be used as an alias for `_id`.
 
         Keyword Args:
             _check_type (bool): if True, values for parameters in openapi_types
@@ -157,7 +157,7 @@ def _from_openapi_data(cls, id, *args, **kwargs):  # noqa: E501
         self._configuration = _configuration
         self._visited_composed_classes = _visited_composed_classes + (self.__class__,)
 
-        self.id = id
+        self._id = _id
         for var_name, var_value in kwargs.items():
             if (
                 var_name not in self.attribute_map
@@ -182,11 +182,11 @@ def _from_openapi_data(cls, id, *args, **kwargs):  # noqa: E501
     )
 
     @convert_js_args_to_python_args
-    def __init__(self, id, *args, **kwargs):  # noqa: E501
+    def __init__(self, _id, *args, **kwargs):  # noqa: E501
         """UpsertRecord - a model defined in OpenAPI
 
         Args:
-            id (str): The unique ID of the record to upsert. Note that `id` can be used as an alias for `_id`.
+            _id (str): The unique ID of the record to upsert. Note that `id` can be used as an alias for `_id`.
 
         Keyword Args:
             _check_type (bool): if True, values for parameters in openapi_types
@@ -242,7 +242,7 @@ def __init__(self, id, *args, **kwargs):  # noqa: E501
         self._configuration = _configuration
         self._visited_composed_classes = _visited_composed_classes + (self.__class__,)
 
-        self.id = id
+        self._id = _id
         for var_name, var_value in kwargs.items():
             if (
                 var_name not in self.attribute_map
diff --git a/pinecone/data/dataclasses/__init__.py b/pinecone/data/dataclasses/__init__.py
index c481d1ab4..0183eb74e 100644
--- a/pinecone/data/dataclasses/__init__.py
+++ b/pinecone/data/dataclasses/__init__.py
@@ -1,3 +1,6 @@
 from .sparse_values import SparseValues
 from .vector import Vector
 from .fetch_response import FetchResponse
+from .search_query import SearchQuery
+from .search_query_vector import SearchQueryVector
+from .search_rerank import SearchRerank
diff --git a/pinecone/data/dataclasses/search_query.py b/pinecone/data/dataclasses/search_query.py
new file mode 100644
index 000000000..3adb80829
--- /dev/null
+++ b/pinecone/data/dataclasses/search_query.py
@@ -0,0 +1,59 @@
+from dataclasses import dataclass
+from typing import Optional, Any, Dict, Union
+from .search_query_vector import SearchQueryVector
+from ..types.search_query_vector_typed_dict import SearchQueryVectorTypedDict
+
+
+@dataclass
+class SearchQuery:
+    """
+    SearchQuery represents the query when searching within a specific namespace.
+    """
+
+    inputs: Dict[str, Any]
+    """
+    The input data to search with.
+    Required.
+    """
+
+    top_k: int
+    """
+    The number of results to return with each search.
+    Required.
+    """
+
+    filter: Optional[Dict[str, Any]] = None
+    """
+    The filter to apply to the search.
+    Optional.
+    """
+
+    vector: Optional[Union[SearchQueryVectorTypedDict, SearchQueryVector]] = None
+    """
+    The vector values to search with. If provided, it overwrites the inputs.
+    """
+
+    id: Optional[str] = None
+    """
+    The unique ID of the vector to be used as a query vector.
+    """
+
+    def __post_init__(self):
+        """
+        Converts `vector` to a `SearchQueryVectorTypedDict` instance if an enum is provided.
+        """
+        if isinstance(self.vector, SearchQueryVector):
+            self.vector = self.vector.as_dict()
+
+    def as_dict(self) -> Dict[str, Any]:
+        """
+        Returns the SearchQuery as a dictionary.
+        """
+        d = {
+            "inputs": self.inputs,
+            "top_k": self.top_k,
+            "filter": self.filter,
+            "vector": self.vector,
+            "id": self.id,
+        }
+        return {k: v for k, v in d.items() if v is not None}
diff --git a/pinecone/data/dataclasses/search_query_vector.py b/pinecone/data/dataclasses/search_query_vector.py
new file mode 100644
index 000000000..d829102f6
--- /dev/null
+++ b/pinecone/data/dataclasses/search_query_vector.py
@@ -0,0 +1,38 @@
+from dataclasses import dataclass
+from typing import Optional, List
+
+
+@dataclass
+class SearchQueryVector:
+    """
+    SearchQueryVector represents the vector values used to query.
+    """
+
+    values: Optional[List[float]] = None
+    """
+    The vector data included in the search request.
+    Optional.
+    """
+
+    sparse_values: Optional[List[float]] = None
+    """
+    The sparse embedding values to search with.
+    Optional.
+    """
+
+    sparse_indices: Optional[List[int]] = None
+    """
+    The sparse embedding indices to search with.
+    Optional.
+    """
+
+    def as_dict(self) -> dict:
+        """
+        Returns the SearchQueryVector as a dictionary.
+        """
+        d = {
+            "values": self.values,
+            "sparse_values": self.sparse_values,
+            "sparse_indices": self.sparse_indices,
+        }
+        return {k: v for k, v in d.items() if v is not None}
diff --git a/pinecone/data/dataclasses/search_rerank.py b/pinecone/data/dataclasses/search_rerank.py
new file mode 100644
index 000000000..1b9534ba3
--- /dev/null
+++ b/pinecone/data/dataclasses/search_rerank.py
@@ -0,0 +1,61 @@
+from dataclasses import dataclass
+from typing import Optional, Dict, Any, List
+from ..features.inference import RerankModel
+
+
+@dataclass
+class SearchRerank:
+    """
+    SearchRerank represents a rerank request when searching within a specific namespace.
+    """
+
+    model: str
+    """
+    The name of the [reranking model](https://docs.pinecone.io/guides/inference/understanding-inference#reranking-models) to use.
+    Required.
+    """
+
+    rank_fields: List[str]
+    """
+    The fields to use for reranking.
+    Required.
+    """
+
+    top_n: Optional[int] = None
+    """
+    The number of top results to return after reranking. Defaults to top_k.
+    Optional.
+    """
+
+    parameters: Optional[Dict[str, Any]] = None
+    """
+    Additional model-specific parameters. Refer to the [model guide](https://docs.pinecone.io/guides/inference/understanding-inference#models)
+    for available model parameters.
+    Optional.
+    """
+
+    query: Optional[str] = None
+    """
+    The query to rerank documents against. If a specific rerank query is specified, it overwrites
+    the query input that was provided at the top level.
+    """
+
+    def __post_init__(self):
+        """
+        Converts `model` to a string if an instance of `RerankEnum` is provided.
+        """
+        if isinstance(self.model, RerankModel):
+            self.model = self.model.value  # Convert Enum to string
+
+    def as_dict(self) -> Dict[str, Any]:
+        """
+        Returns the SearchRerank as a dictionary.
+        """
+        d = {
+            "model": self.model,
+            "rank_fields": self.rank_fields,
+            "top_n": self.top_n,
+            "parameters": self.parameters,
+            "query": self.query,
+        }
+        return {k: v for k, v in d.items() if v is not None}
diff --git a/pinecone/data/features/inference/inference_request_builder.py b/pinecone/data/features/inference/inference_request_builder.py
index 1c4e7e12a..0a8611390 100644
--- a/pinecone/data/features/inference/inference_request_builder.py
+++ b/pinecone/data/features/inference/inference_request_builder.py
@@ -7,6 +7,7 @@
     Document,
     RerankRequest,
 )
+from pinecone.utils import convert_enum_to_string
 
 
 class EmbedModel(Enum):
@@ -27,8 +28,7 @@ def embed_request(
         inputs: Union[str, List[Dict], List[str]],
         parameters: Optional[Dict[str, Any]] = None,
     ) -> EmbedRequest:
-        if isinstance(model, EmbedModel):
-            model = model.value
+        model = convert_enum_to_string(model)
         embeddings_inputs: List[EmbedRequestInputs] = []
         if isinstance(inputs, str):
             embeddings_inputs = [EmbedRequestInputs(text=inputs)]
diff --git a/pinecone/data/index.py b/pinecone/data/index.py
index 7958e808f..bb57df262 100644
--- a/pinecone/data/index.py
+++ b/pinecone/data/index.py
@@ -14,8 +14,10 @@
     IndexDescription as DescribeIndexStatsResponse,
     UpsertResponse,
     ListResponse,
+    UpsertRecord,
+    SearchRecordsResponse,
 )
-from .dataclasses import Vector, SparseValues, FetchResponse
+from .dataclasses import Vector, SparseValues, FetchResponse, SearchQuery, SearchRerank
 from .interfaces import IndexInterface
 from .request_factory import IndexRequestFactory
 from .features.bulk_import import ImportFeatureMixin
@@ -26,6 +28,8 @@
     VectorTuple,
     VectorTupleWithMetadata,
     FilterTypedDict,
+    SearchRerankTypedDict,
+    SearchQueryTypedDict,
 )
 from ..utils import (
     setup_openapi_client,
@@ -201,6 +205,49 @@ def upsert_from_dataframe(
 
         return UpsertResponse(upserted_count=upserted_count)
 
+    def upsert_records(self, namespace: str, records: List[Dict]):
+        if namespace is None:
+            raise ValueError("namespace is required when upserting records")
+        if not records or len(records) == 0:
+            raise ValueError("No records provided")
+
+        records_to_upsert = []
+        for record in records:
+            if not record.get("_id") and not record.get("id"):
+                raise ValueError("Each record must have an '_id' or 'id' value")
+
+            records_to_upsert.append(
+                UpsertRecord(
+                    record.get("_id", record.get("id")),
+                    **{k: v for k, v in record.items() if k not in {"_id", "id"}},
+                )
+            )
+
+        self._vector_api.upsert_records_namespace(namespace, records_to_upsert)
+
+    def search(
+        self,
+        namespace: str,
+        query: Union[SearchQueryTypedDict, SearchQuery],
+        rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None,
+        fields: Optional[List[str]] = ["*"],  # Default to returning all fields
+    ) -> SearchRecordsResponse:
+        if not namespace:
+            raise Exception("Namespace is required when searching records")
+
+        request = IndexRequestFactory.search_request(query=query, rerank=rerank, fields=fields)
+
+        return self._vector_api.search_records_namespace(namespace, request)
+
+    def search_records(
+        self,
+        namespace: str,
+        query: Union[SearchQueryTypedDict, SearchQuery],
+        rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None,
+        fields: Optional[List[str]] = ["*"],  # Default to returning all fields
+    ) -> SearchRecordsResponse:
+        return self.search(namespace, query=query, rerank=rerank, fields=fields)
+
     @validate_and_convert_errors
     def delete(
         self,
diff --git a/pinecone/data/interfaces.py b/pinecone/data/interfaces.py
index 5dd2bd968..2930e2754 100644
--- a/pinecone/data/interfaces.py
+++ b/pinecone/data/interfaces.py
@@ -9,6 +9,7 @@
     Vector,
     ListResponse,
     SparseValues,
+    SearchRecordsResponse,
 )
 from .query_results_aggregator import QueryNamespacesResults
 from multiprocessing.pool import ApplyResult
@@ -20,6 +21,7 @@
     VectorTuple,
     VectorTupleWithMetadata,
 )
+from .dataclasses import SearchQuery, SearchRerank
 
 
 class IndexInterface(ABC):
@@ -102,6 +104,56 @@ def upsert_from_dataframe(
         """
         pass
 
+    @abstractmethod
+    def upsert_records(self, namespace: str, records: List[Dict]):
+        """
+        Upsert records to a namespace.
+
+        Converts records into embeddings and upserts them into a namespacce in the index.
+
+        :param namespace: The namespace of the index to upsert records to.
+        :type namespace: str, required
+        :param records: The records to upsert into the index.
+        :type records: List[Dict], required
+        """
+        pass
+
+    @abstractmethod
+    def search(
+        self,
+        namespace: str,
+        query: Union[Dict, SearchQuery],
+        rerank: Optional[Union[Dict, SearchRerank]] = None,
+        fields: Optional[List[str]] = ["*"],  # Default to returning all fields
+    ) -> SearchRecordsResponse:
+        """
+        Search for records.
+
+        This operation converts a query to a vector embedding and then searches a namespace. You
+        can optionally provide a reranking operation as part of the search.
+
+        :param namespace: The namespace in the index to search.
+        :type namespace: str, required
+        :param query: The SearchQuery to use for the search.
+        :type query: Union[Dict, SearchQuery], required
+        :param rerank: The SearchRerank to use with the search request.
+        :type rerank: Union[Dict, SearchRerank], optional
+        :return: The records that match the search.
+        :rtype: RecordModel
+        """
+        pass
+
+    @abstractmethod
+    def search_records(
+        self,
+        namespace: str,
+        query: Union[Dict, SearchQuery],
+        rerank: Optional[Union[Dict, SearchRerank]] = None,
+        fields: Optional[List[str]] = ["*"],  # Default to returning all fields
+    ) -> SearchRecordsResponse:
+        """Alias of the search() method."""
+        pass
+
     @abstractmethod
     def delete(
         self,
diff --git a/pinecone/data/request_factory.py b/pinecone/data/request_factory.py
index a17965fa9..270963928 100644
--- a/pinecone/data/request_factory.py
+++ b/pinecone/data/request_factory.py
@@ -7,8 +7,13 @@
     DeleteRequest,
     UpdateRequest,
     DescribeIndexStatsRequest,
+    SearchRecordsRequest,
+    SearchRecordsRequestQuery,
+    SearchRecordsRequestRerank,
+    VectorValues,
+    SearchRecordsVector,
 )
-from ..utils import parse_non_empty_args
+from ..utils import parse_non_empty_args, convert_enum_to_string
 from .vector_factory import VectorFactory
 from .sparse_values_factory import SparseValuesFactory
 from pinecone.openapi_support import OPENAPI_ENDPOINT_PARAMS
@@ -19,8 +24,12 @@
     VectorTuple,
     VectorTupleWithMetadata,
     FilterTypedDict,
+    SearchQueryTypedDict,
+    SearchRerankTypedDict,
+    SearchQueryVectorTypedDict,
 )
-from .dataclasses import Vector, SparseValues
+
+from .dataclasses import Vector, SparseValues, SearchQuery, SearchRerank, SearchQueryVector
 
 logger = logging.getLogger(__name__)
 
@@ -150,3 +159,90 @@ def list_paginated_args(
                 ("pagination_token", pagination_token),
             ]
         )
+
+    @staticmethod
+    def search_request(
+        query: Union[SearchQueryTypedDict, SearchQuery],
+        rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None,
+        fields: Optional[List[str]] = ["*"],  # Default to returning all fields
+    ) -> SearchRecordsRequest:
+        request_args = parse_non_empty_args(
+            [
+                ("query", IndexRequestFactory._parse_search_query(query)),
+                ("fields", fields),
+                ("rerank", IndexRequestFactory._parse_search_rerank(rerank)),
+            ]
+        )
+
+        return SearchRecordsRequest(**request_args)
+
+    @staticmethod
+    def _parse_search_query(
+        query: Union[SearchQueryTypedDict, SearchQuery],
+    ) -> SearchRecordsRequestQuery:
+        if isinstance(query, SearchQuery):
+            query_dict = query.as_dict()
+        else:
+            query_dict = query
+
+        required_fields = {"top_k"}
+        for key in required_fields:
+            if query_dict.get(key, None) is None:
+                raise ValueError(f"Missing required field '{key}' in search query.")
+
+        # User-provided dict could contain object that need conversion
+        if isinstance(query_dict.get("vector", None), SearchQueryVector):
+            query_dict["vector"] = query_dict["vector"].as_dict()
+
+        srrq = SearchRecordsRequestQuery(
+            **{k: v for k, v in query_dict.items() if k not in {"vector"}}
+        )
+        if query_dict.get("vector", None) is not None:
+            srrq.vector = IndexRequestFactory._parse_search_vector(query_dict["vector"])
+        return srrq
+
+    @staticmethod
+    def _parse_search_vector(
+        vector: Optional[Union[SearchQueryVectorTypedDict, SearchQueryVector]],
+    ):
+        if vector is None:
+            return None
+
+        if isinstance(vector, SearchQueryVector):
+            if vector.values is None and vector.sparse_values is None:
+                return None
+            vector_dict = vector.as_dict()
+        else:
+            vector_dict = vector
+            if (
+                vector_dict.get("values", None) is None
+                and vector_dict.get("sparse_values", None) is None
+            ):
+                return None
+
+        srv = SearchRecordsVector(**{k: v for k, v in vector_dict.items() if k not in {"values"}})
+
+        values = vector_dict.get("values", None)
+        if values is not None:
+            srv.values = VectorValues(value=values)
+
+        return srv
+
+    @staticmethod
+    def _parse_search_rerank(rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None):
+        if rerank is None:
+            return None
+
+        if isinstance(rerank, SearchRerank):
+            rerank_dict = rerank.as_dict()
+        else:
+            rerank_dict = rerank
+
+        required_fields = {"model", "rank_fields"}
+        for key in required_fields:
+            if rerank_dict.get(key, None) is None:
+                raise ValueError(f"Missing required field '{key}' in rerank.")
+
+        rerank_dict["model"] = convert_enum_to_string(rerank_dict["model"])
+
+        return SearchRecordsRequestRerank(**rerank_dict)
diff --git a/pinecone/data/types/__init__.py b/pinecone/data/types/__init__.py
index 38f9c0879..ccc9fb50d 100644
--- a/pinecone/data/types/__init__.py
+++ b/pinecone/data/types/__init__.py
@@ -3,3 +3,6 @@
 from .vector_metadata_dict import VectorMetadataTypedDict
 from .vector_tuple import VectorTuple, VectorTupleWithMetadata
 from .query_filter import FilterTypedDict
+from .search_rerank_typed_dict import SearchRerankTypedDict
+from .search_query_typed_dict import SearchQueryTypedDict
+from .search_query_vector_typed_dict import SearchQueryVectorTypedDict
diff --git a/pinecone/data/types/search_query_typed_dict.py b/pinecone/data/types/search_query_typed_dict.py
new file mode 100644
index 000000000..118607ae0
--- /dev/null
+++ b/pinecone/data/types/search_query_typed_dict.py
@@ -0,0 +1,36 @@
+from typing import TypedDict, Optional, Union, Dict, Any
+from .search_query_vector_typed_dict import SearchQueryVectorTypedDict
+
+
+class SearchQueryTypedDict(TypedDict):
+    """
+    SearchQuery represents the query when searching within a specific namespace.
+    """
+
+    inputs: Dict[str, Any]
+    """
+    The input data to search with.
+    Required.
+    """
+
+    top_k: int
+    """
+    The number of results to return with each search.
+    Required.
+    """
+
+    filter: Optional[Dict[str, Any]] = None
+    """
+    The filter to apply to the search.
+    Optional.
+    """
+
+    vector: Optional[Union[SearchQueryVectorTypedDict]]
+    """
+    The vector values to search with. If provided, it overwrites the inputs.
+    """
+
+    id: Optional[str]
+    """
+    The unique ID of the vector to be used as a query vector.
+    """
diff --git a/pinecone/data/types/search_query_vector_typed_dict.py b/pinecone/data/types/search_query_vector_typed_dict.py
new file mode 100644
index 000000000..4269b904a
--- /dev/null
+++ b/pinecone/data/types/search_query_vector_typed_dict.py
@@ -0,0 +1,25 @@
+from typing import TypedDict, Optional, List
+
+
+class SearchQueryVectorTypedDict(TypedDict):
+    """
+    SearchQueryVector represents the vector values used to query.
+    """
+
+    values: Optional[List[float]]
+    """
+    The vector data included in the search request.
+    Optional.
+    """
+
+    sparse_values: Optional[List[float]]
+    """
+    The sparse embedding values to search with.
+    Optional.
+    """
+
+    sparse_indices: Optional[List[int]]
+    """
+    The sparse embedding indices to search with.
+    Optional.
+    """
diff --git a/pinecone/data/types/search_rerank_typed_dict.py b/pinecone/data/types/search_rerank_typed_dict.py
new file mode 100644
index 000000000..89c4f8d8a
--- /dev/null
+++ b/pinecone/data/types/search_rerank_typed_dict.py
@@ -0,0 +1,41 @@
+from typing import TypedDict, Optional, Union, Dict, Any
+from ..features.inference import RerankModel
+
+
+class SearchRerankTypedDict(TypedDict):
+    # """
+    # SearchRerank represents a rerank request when searching within a specific namespace.
+    # """
+
+    model: Union[str, RerankModel]
+    # model: str
+    # """
+    # The name of the [reranking model](https://docs.pinecone.io/guides/inference/understanding-inference#reranking-models) to use.
+    # Required.
+    # """
+
+    rank_fields: list[str]
+    # rank_fields: List[str]
+    # """
+    # The fields to use for reranking.
+    # Required.
+    # """
+
+    top_n: Optional[int]
+    # """
+    # The number of top results to return after reranking. Defaults to top_k.
+    # Optional.
+    # """
+
+    parameters: Optional[Dict[str, Any]]
+    # """
+    # Additional model-specific parameters. Refer to the [model guide](https://docs.pinecone.io/guides/inference/understanding-inference#models)
+    # for available model parameters.
+    # Optional.
+    # """
+
+    query: Optional[str]
+    # """
+    # The query to rerank documents against. If a specific rerank query is specified, it overwrites
+    # the query input that was provided at the top level.
+    # """
diff --git a/pinecone/openapi_support/api_client.py b/pinecone/openapi_support/api_client.py
index e9766b19e..19767a848 100644
--- a/pinecone/openapi_support/api_client.py
+++ b/pinecone/openapi_support/api_client.py
@@ -557,8 +557,7 @@ def request(
             )
         else:
             raise PineconeApiValueError(
-                "http method must be `GET`, `HEAD`, `OPTIONS`,"
-                " `POST`, `PATCH`, `PUT` or `DELETE`."
+                "http method must be `GET`, `HEAD`, `OPTIONS`, `POST`, `PATCH`, `PUT` or `DELETE`."
             )
 
     @classmethod
diff --git a/pinecone/openapi_support/asyncio_api_client.py b/pinecone/openapi_support/asyncio_api_client.py
index d21b0a1cb..068259d59 100644
--- a/pinecone/openapi_support/asyncio_api_client.py
+++ b/pinecone/openapi_support/asyncio_api_client.py
@@ -552,8 +552,7 @@ async def request(
             )
         else:
             raise PineconeApiValueError(
-                "http method must be `GET`, `HEAD`, `OPTIONS`,"
-                " `POST`, `PATCH`, `PUT` or `DELETE`."
+                "http method must be `GET`, `HEAD`, `OPTIONS`, `POST`, `PATCH`, `PUT` or `DELETE`."
             )
 
     @classmethod
diff --git a/pinecone/openapi_support/configuration.py b/pinecone/openapi_support/configuration.py
index fa7dfa8b6..1fdee2ea2 100644
--- a/pinecone/openapi_support/configuration.py
+++ b/pinecone/openapi_support/configuration.py
@@ -418,8 +418,9 @@ def get_host_from_settings(self, index, variables=None, servers=None):
             server = servers[index]
         except IndexError:
             raise ValueError(
-                "Invalid index {0} when selecting the host settings. "
-                "Must be less than {1}".format(index, len(servers))
+                "Invalid index {0} when selecting the host settings. Must be less than {1}".format(
+                    index, len(servers)
+                )
             )
 
         url = server["url"]
@@ -430,8 +431,7 @@ def get_host_from_settings(self, index, variables=None, servers=None):
 
             if "enum_values" in variable and used_value not in variable["enum_values"]:
                 raise ValueError(
-                    "The variable `{0}` in the host URL has invalid value "
-                    "{1}. Must be {2}.".format(
+                    "The variable `{0}` in the host URL has invalid value {1}. Must be {2}.".format(
                         variable_name, variables[variable_name], variable["enum_values"]
                     )
                 )
diff --git a/pinecone/openapi_support/exceptions.py b/pinecone/openapi_support/exceptions.py
index cd6045cd3..4107c4e75 100644
--- a/pinecone/openapi_support/exceptions.py
+++ b/pinecone/openapi_support/exceptions.py
@@ -99,7 +99,7 @@ def __init__(self, status=None, reason=None, http_resp=None):
 
     def __str__(self):
         """Custom error messages for exception"""
-        error_message = "({0})\n" "Reason: {1}\n".format(self.status, self.reason)
+        error_message = "({0})\nReason: {1}\n".format(self.status, self.reason)
         if self.headers:
             error_message += "HTTP response headers: {0}\n".format(self.headers)
 
diff --git a/pinecone/openapi_support/model_utils.py b/pinecone/openapi_support/model_utils.py
index 9db3c72b7..a96e59b8e 100644
--- a/pinecone/openapi_support/model_utils.py
+++ b/pinecone/openapi_support/model_utils.py
@@ -1653,10 +1653,8 @@ def type_error_message(var_value=None, var_name=None, valid_classes=None, key_ty
     if key_type:
         key_or_value = "key"
     valid_classes_phrase = get_valid_classes_phrase(valid_classes)
-    msg = (
-        "Invalid type for variable '{0}'. Required {1} type {2} and " "passed type was {3}".format(
-            var_name, key_or_value, valid_classes_phrase, type(var_value).__name__
-        )
+    msg = "Invalid type for variable '{0}'. Required {1} type {2} and passed type was {3}".format(
+        var_name, key_or_value, valid_classes_phrase, type(var_value).__name__
     )
     return msg
 
diff --git a/pinecone/openapi_support/rest_urllib3.py b/pinecone/openapi_support/rest_urllib3.py
index fef886296..59d3457e5 100644
--- a/pinecone/openapi_support/rest_urllib3.py
+++ b/pinecone/openapi_support/rest_urllib3.py
@@ -1,6 +1,5 @@
 import json
 import logging
-import re
 import ssl
 import os
 from urllib.parse import urlencode, quote
@@ -162,12 +161,18 @@ def request(
                     headers["Content-Type"] = "application/json"
                 if query_params:
                     url += "?" + urlencode(query_params, quote_via=quote)
-                if ("Content-Type" not in headers) or (
-                    re.search("json", headers["Content-Type"], re.IGNORECASE)
-                ):
-                    request_body = None
-                    if body is not None:
-                        request_body = json.dumps(body)
+
+                content_type = headers.get("Content-Type", "").lower()
+                if content_type == "" or ("json" in content_type):
+                    if body is None:
+                        request_body = None
+                    else:
+                        if content_type == "application/x-ndjson":
+                            # for x-ndjson requests, we are expecting an array of elements
+                            # that need to be converted to a newline separated string
+                            request_body = "\n".join(json.dumps(element) for element in body)
+                        else:  # content_type == "application/json":
+                            request_body = json.dumps(body)
                     r = self.pool_manager.request(
                         method,
                         url,
@@ -176,7 +181,8 @@ def request(
                         timeout=timeout,
                         headers=headers,
                     )
-                elif headers["Content-Type"] == "application/x-www-form-urlencoded":  # noqa: E501
+
+                elif content_type == "application/x-www-form-urlencoded":  # noqa: E501
                     r = self.pool_manager.request(
                         method,
                         url,
@@ -186,7 +192,7 @@ def request(
                         timeout=timeout,
                         headers=headers,
                     )
-                elif headers["Content-Type"] == "multipart/form-data":
+                elif content_type == "multipart/form-data":
                     # must del headers['Content-Type'], or the correct
                     # Content-Type which generated by urllib3 will be
                     # overwritten.
diff --git a/pinecone/openapi_support/rest_utils.py b/pinecone/openapi_support/rest_utils.py
index b002ef579..85a957ebd 100644
--- a/pinecone/openapi_support/rest_utils.py
+++ b/pinecone/openapi_support/rest_utils.py
@@ -11,11 +11,6 @@
 )
 
 logger = logging.getLogger(__name__)
-logging.basicConfig(
-    format="%(levelname)s [%(asctime)s] %(name)s - %(message)s",
-    datefmt="%Y-%m-%d %H:%M:%S",
-    level=logging.DEBUG,
-)
 
 
 class RESTResponse(io.IOBase):
diff --git a/pinecone/utils/__init__.py b/pinecone/utils/__init__.py
index 34e1d5aa5..5cd4d36c2 100644
--- a/pinecone/utils/__init__.py
+++ b/pinecone/utils/__init__.py
@@ -4,6 +4,7 @@
 from .deprecation_notice import warn_deprecated
 from .fix_tuple_length import fix_tuple_length
 from .convert_to_list import convert_to_list
+from .convert_enum_to_string import convert_enum_to_string
 from .normalize_host import normalize_host
 from .setup_openapi_client import setup_openapi_client, build_plugin_setup_client
 from .parse_args import parse_non_empty_args
@@ -25,4 +26,5 @@
     "docslinks",
     "install_json_repr_override",
     "validate_and_convert_errors",
+    "convert_enum_to_string",
 ]
diff --git a/pinecone/utils/convert_enum_to_string.py b/pinecone/utils/convert_enum_to_string.py
new file mode 100644
index 000000000..ba6558c2d
--- /dev/null
+++ b/pinecone/utils/convert_enum_to_string.py
@@ -0,0 +1,8 @@
+from typing import Union
+from enum import Enum
+
+
+def convert_enum_to_string(value: Union[Enum, str]) -> str:
+    if isinstance(value, Enum):
+        return value.value
+    return value
diff --git a/poetry.lock b/poetry.lock
index b2ac36c89..2e5b50ed2 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1627,6 +1627,33 @@ urllib3 = ">=1.25.10,<3.0"
 [package.extras]
 tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "tomli", "tomli-w", "types-PyYAML", "types-requests"]
 
+[[package]]
+name = "ruff"
+version = "0.9.3"
+description = "An extremely fast Python linter and code formatter, written in Rust."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "ruff-0.9.3-py3-none-linux_armv6l.whl", hash = "sha256:7f39b879064c7d9670197d91124a75d118d00b0990586549949aae80cdc16624"},
+    {file = "ruff-0.9.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:a187171e7c09efa4b4cc30ee5d0d55a8d6c5311b3e1b74ac5cb96cc89bafc43c"},
+    {file = "ruff-0.9.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c59ab92f8e92d6725b7ded9d4a31be3ef42688a115c6d3da9457a5bda140e2b4"},
+    {file = "ruff-0.9.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2dc153c25e715be41bb228bc651c1e9b1a88d5c6e5ed0194fa0dfea02b026439"},
+    {file = "ruff-0.9.3-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:646909a1e25e0dc28fbc529eab8eb7bb583079628e8cbe738192853dbbe43af5"},
+    {file = "ruff-0.9.3-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5a5a46e09355695fbdbb30ed9889d6cf1c61b77b700a9fafc21b41f097bfbba4"},
+    {file = "ruff-0.9.3-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c4bb09d2bbb394e3730d0918c00276e79b2de70ec2a5231cd4ebb51a57df9ba1"},
+    {file = "ruff-0.9.3-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:96a87ec31dc1044d8c2da2ebbed1c456d9b561e7d087734336518181b26b3aa5"},
+    {file = "ruff-0.9.3-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9bb7554aca6f842645022fe2d301c264e6925baa708b392867b7a62645304df4"},
+    {file = "ruff-0.9.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cabc332b7075a914ecea912cd1f3d4370489c8018f2c945a30bcc934e3bc06a6"},
+    {file = "ruff-0.9.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:33866c3cc2a575cbd546f2cd02bdd466fed65118e4365ee538a3deffd6fcb730"},
+    {file = "ruff-0.9.3-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:006e5de2621304c8810bcd2ee101587712fa93b4f955ed0985907a36c427e0c2"},
+    {file = "ruff-0.9.3-py3-none-musllinux_1_2_i686.whl", hash = "sha256:ba6eea4459dbd6b1be4e6bfc766079fb9b8dd2e5a35aff6baee4d9b1514ea519"},
+    {file = "ruff-0.9.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:90230a6b8055ad47d3325e9ee8f8a9ae7e273078a66401ac66df68943ced029b"},
+    {file = "ruff-0.9.3-py3-none-win32.whl", hash = "sha256:eabe5eb2c19a42f4808c03b82bd313fc84d4e395133fb3fc1b1516170a31213c"},
+    {file = "ruff-0.9.3-py3-none-win_amd64.whl", hash = "sha256:040ceb7f20791dfa0e78b4230ee9dce23da3b64dd5848e40e3bf3ab76468dcf4"},
+    {file = "ruff-0.9.3-py3-none-win_arm64.whl", hash = "sha256:800d773f6d4d33b0a3c60e2c6ae8f4c202ea2de056365acfa519aa48acf28e0b"},
+    {file = "ruff-0.9.3.tar.gz", hash = "sha256:8293f89985a090ebc3ed1064df31f3b4b56320cdfcec8b60d3295bddb955c22a"},
+]
+
 [[package]]
 name = "six"
 version = "1.16.0"
@@ -1910,4 +1937,4 @@ grpc = ["googleapis-common-protos", "grpcio", "grpcio", "lz4", "protobuf", "prot
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "ed686662d6a59d70a89367d84a17857aef9143eeff20610190c95dcb5be7c71b"
+content-hash = "b1bd9c6ecec94c1615c1b09c43808fca8344b16b5c15c61ec11775ad70b82448"
diff --git a/pyproject.toml b/pyproject.toml
index 911cb5ef9..c6f8665c7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -88,6 +88,7 @@ pytest-benchmark = [
 ]
 urllib3_mock = "0.3.3"
 responses = ">=0.8.1"
+ruff = "^0.9.3"
 
 
 [tool.poetry.extras]
diff --git a/tests/__init__.py b/tests/__init__.py
index e69de29bb..f2dab92a1 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -0,0 +1,5 @@
+import logging
+
+logging.basicConfig(
+    format="%(levelname)s [%(asctime)s] %(name)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
+)
diff --git a/tests/integration/data/conftest.py b/tests/integration/data/conftest.py
index 473bf809e..c7498cb81 100644
--- a/tests/integration/data/conftest.py
+++ b/tests/integration/data/conftest.py
@@ -3,6 +3,7 @@
 import json
 from ..helpers import get_environment_var, generate_index_name
 import logging
+from pinecone import EmbedModel, CloudProvider, AwsRegion, IndexEmbed
 
 logger = logging.getLogger(__name__)
 
@@ -61,6 +62,11 @@ def sparse_index_name():
     return generate_index_name("sparse")
 
 
+@pytest.fixture(scope="session")
+def model_index_name():
+    return generate_index_name("embed")
+
+
 def build_index_client(client, index_name, index_host):
     if use_grpc():
         return client.Index(name=index_name, host=index_host)
@@ -78,20 +84,51 @@ def sparse_idx(client, sparse_index_name, sparse_index_host):
     return build_index_client(client, sparse_index_name, sparse_index_host)
 
 
+@pytest.fixture(scope="session")
+def model_idx(client, model_index_name, model_index_host):
+    return build_index_client(client, model_index_name, model_index_host)
+
+
+@pytest.fixture(scope="session")
+def model_index_host(model_index_name):
+    pc = build_client()
+
+    if model_index_name not in pc.list_indexes().names():
+        logger.info(f"Creating index {model_index_name}")
+        pc.create_index_for_model(
+            name=model_index_name,
+            cloud=CloudProvider.AWS,
+            region=AwsRegion.US_WEST_2,
+            embed=IndexEmbed(
+                model=EmbedModel.Multilingual_E5_Large,
+                field_map={"text": "my_text_field"},
+                metric="cosine",
+            ),
+        )
+    else:
+        logger.info(f"Index {model_index_name} already exists")
+
+    description = pc.describe_index(name=model_index_name)
+    yield description.host
+
+    logger.info(f"Deleting index {model_index_name}")
+    pc.delete_index(model_index_name, -1)
+
+
 @pytest.fixture(scope="session")
 def index_host(index_name, metric, spec):
     pc = build_client()
 
     if index_name not in pc.list_indexes().names():
-        logger.info("Creating index with name: " + index_name)
+        logger.info(f"Creating index {index_name}")
         pc.create_index(name=index_name, dimension=2, metric=metric, spec=spec)
     else:
-        logger.info("Index with name " + index_name + " already exists")
+        logger.info(f"Index {index_name} already exists")
 
     description = pc.describe_index(name=index_name)
     yield description.host
 
-    logger.info("Deleting index with name: " + index_name)
+    logger.info(f"Deleting index {index_name}")
     pc.delete_index(index_name, -1)
 
 
@@ -100,15 +137,15 @@ def sparse_index_host(sparse_index_name, spec):
     pc = build_client()
 
     if sparse_index_name not in pc.list_indexes().names():
-        logger.info("Creating sparse index with name: " + sparse_index_name)
+        logger.info(f"Creating index {sparse_index_name}")
         pc.create_index(
             name=sparse_index_name, metric="dotproduct", spec=spec, vector_type="sparse"
         )
     else:
-        logger.info("Sparse index with name " + sparse_index_name + " already exists")
+        logger.info(f"Index {sparse_index_name} already exists")
 
     description = pc.describe_index(name=sparse_index_name)
     yield description.host
 
-    logger.info("Deleting sparse index with name: " + sparse_index_name)
+    logger.info(f"Deleting index {sparse_index_name}")
     pc.delete_index(sparse_index_name, -1)
diff --git a/tests/integration/data/test_search_and_upsert_records.py b/tests/integration/data/test_search_and_upsert_records.py
new file mode 100644
index 000000000..0b0f6444c
--- /dev/null
+++ b/tests/integration/data/test_search_and_upsert_records.py
@@ -0,0 +1,221 @@
+import time
+import pytest
+from typing import List
+from ..helpers import random_string, embedding_values
+import logging
+
+from pinecone import RerankModel, PineconeApiException
+from pinecone.data import _Index
+
+logger = logging.getLogger(__name__)
+
+model_index_dimension = 1024  # Currently controlled by "multilingual-e5-large"
+
+
+def poll_until_fetchable(idx: _Index, namespace: str, ids: List[str], timeout: int):
+    found = False
+    total_wait = 0
+    interval = 5
+
+    while not found:
+        if total_wait > timeout:
+            logger.debug(f"Failed to fetch records within {timeout} seconds.")
+            raise TimeoutError(f"Failed to fetch records within {timeout} seconds.")
+        time.sleep(interval)
+        total_wait += interval
+
+        response = idx.fetch(ids=ids, namespace=namespace)
+        logger.debug(
+            f"Polling {total_wait} seconds for fetch response with ids {ids} in namespace {namespace}"
+        )
+
+        if len(response.vectors) == len(ids):
+            found = True
+
+
+@pytest.fixture
+def records_to_upsert():
+    return [
+        {
+            "id": "test1",
+            "my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.",
+            "more_stuff": "more stuff!",
+        },
+        {
+            "id": "test2",
+            "my_text_field": "The tech company Apple is known for its innovative products like the iPhone.",
+            "more_stuff": "more stuff!",
+        },
+        {
+            "id": "test3",
+            "my_text_field": "Many people enjoy eating apples as a healthy snack.",
+            "more_stuff": "more stuff!",
+        },
+        {
+            "id": "test4",
+            "my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.",
+            "more_stuff": "more stuff!",
+        },
+        {
+            "id": "test5",
+            "my_text_field": "An apple a day keeps the doctor away, as the saying goes.",
+            "more_stuff": "more stuff!",
+        },
+        {
+            "id": "test6",
+            "my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.",
+            "more_stuff": "extra more stuff!",
+        },
+    ]
+
+
+class TestUpsertAndSearchRecords:
+    def test_search_records(self, model_idx, records_to_upsert):
+        target_namespace = random_string(10)
+        model_idx.upsert_records(namespace=target_namespace, records=records_to_upsert)
+
+        poll_until_fetchable(
+            model_idx, target_namespace, [r["id"] for r in records_to_upsert], timeout=180
+        )
+
+        response = model_idx.search_records(
+            namespace=target_namespace, query={"inputs": {"text": "Apple corporation"}, "top_k": 3}
+        )
+        assert len(response.result.hits) == 3
+        assert response.usage is not None
+
+        # Test search alias
+        response2 = model_idx.search(
+            namespace=target_namespace, query={"inputs": {"text": "Apple corporation"}, "top_k": 3}
+        )
+        assert response == response2
+
+        # validate similar records and contents
+        similar_record_ids = ["test2", "test4", "test6"]
+
+        for hit in response.result.hits:
+            assert hit._id in similar_record_ids
+            similar_record_ids.remove(hit._id)
+
+            assert hit._score > 0
+            assert hit.fields.get("my_text_field") is not None
+            assert hit.fields.get("more_stuff") is not None
+
+        # search for records while filtering fields
+        response_filtered = model_idx.search_records(
+            namespace="test-namespace",
+            query={"inputs": {"text": "Apple corporation"}, "top_k": 3},
+            fields=["more_stuff"],
+        )
+
+        for hit in response_filtered.result.hits:
+            assert hit.fields.get("my_text_field") is None
+            assert hit.fields.get("more_stuff") is not None
+
+    def test_search_records_with_vector(self, model_idx, records_to_upsert):
+        target_namespace = random_string(10)
+        model_idx.upsert_records(namespace=target_namespace, records=records_to_upsert)
+
+        poll_until_fetchable(
+            model_idx, target_namespace, [r["id"] for r in records_to_upsert], timeout=180
+        )
+
+        # Search for similar records
+        search_query = {"top_k": 3, "vector": {"values": embedding_values(model_index_dimension)}}
+        response = model_idx.search_records(namespace=target_namespace, query=search_query)
+        assert len(response.result.hits) == 3
+        assert response.usage is not None
+
+        # Test search alias
+        response2 = model_idx.search(namespace=target_namespace, query=search_query)
+        assert response == response2
+
+    @pytest.mark.parametrize("rerank_model", ["bge-reranker-v2-m3", RerankModel.Bge_Reranker_V2_M3])
+    def test_search_with_rerank(self, model_idx, records_to_upsert, rerank_model):
+        target_namespace = random_string(10)
+        model_idx.upsert_records(namespace=target_namespace, records=records_to_upsert)
+
+        poll_until_fetchable(
+            model_idx, target_namespace, [r["id"] for r in records_to_upsert], timeout=180
+        )
+
+        # Search for similar records
+        response = model_idx.search_records(
+            namespace=target_namespace,
+            query={"inputs": {"text": "Apple corporation"}, "top_k": 3},
+            rerank={"model": rerank_model, "rank_fields": ["my_text_field"], "top_n": 3},
+        )
+        assert len(response.result.hits) == 3
+        assert response.usage is not None
+
+        # validate similar records and contents
+        similar_record_ids = ["test6", "test4", "test2"]
+        for hit in response.result.hits:
+            assert hit._id in similar_record_ids
+            similar_record_ids.remove(hit._id)
+
+            assert hit._score > 0
+            assert hit.fields.get("my_text_field") is not None
+            assert hit.fields.get("more_stuff") is not None
+
+    def test_search_with_rerank_query(self, model_idx, records_to_upsert):
+        target_namespace = random_string(10)
+        model_idx.upsert_records(namespace=target_namespace, records=records_to_upsert)
+
+        # Sleep for freshness
+        poll_until_fetchable(
+            model_idx, target_namespace, [r["id"] for r in records_to_upsert], timeout=180
+        )
+
+        # Search for similar records
+        response = model_idx.search_records(
+            namespace=target_namespace,
+            query={"inputs": {"text": "Apple corporation"}, "top_k": 3},
+            rerank={
+                "model": "bge-reranker-v2-m3",
+                "rank_fields": ["my_text_field"],
+                "top_n": 3,
+                "query": "Apple corporation",
+            },
+        )
+        assert len(response.result.hits) == 3
+        assert response.usage is not None
+
+
+class TestUpsertAndSearchRecordsErrorCases:
+    def test_search_with_rerank_nonexistent_model_error(self, model_idx, records_to_upsert):
+        target_namespace = random_string(10)
+        model_idx.upsert_records(namespace=target_namespace, records=records_to_upsert)
+
+        poll_until_fetchable(
+            model_idx, target_namespace, [r["id"] for r in records_to_upsert], timeout=180
+        )
+
+        with pytest.raises(PineconeApiException, match=r"Model 'non-existent-model' not found"):
+            model_idx.search_records(
+                namespace=target_namespace,
+                query={"inputs": {"text": "Apple corporation"}, "top_k": 3},
+                rerank={
+                    "model": "non-existent-model",
+                    "rank_fields": ["my_text_field"],
+                    "top_n": 3,
+                },
+            )
+
+    @pytest.mark.skip(reason="Possible bug in the API")
+    def test_search_with_rerank_empty_rank_fields_error(self, model_idx, records_to_upsert):
+        target_namespace = random_string(10)
+        model_idx.upsert_records(namespace=target_namespace, records=records_to_upsert)
+
+        poll_until_fetchable(
+            model_idx, target_namespace, [r["id"] for r in records_to_upsert], timeout=180
+        )
+
+        with pytest.raises(
+            PineconeApiException, match=r"Only one rank field is supported for model"
+        ):
+            model_idx.search_records(
+                namespace="test-namespace",
+                query={"inputs": {"text": "Apple corporation"}, "top_k": 3},
+                rerank={"model": "bge-reranker-v2-m3", "rank_fields": [], "top_n": 3},
+            )
diff --git a/tests/integration/helpers/__init__.py b/tests/integration/helpers/__init__.py
index 91f99733f..f233d0895 100644
--- a/tests/integration/helpers/__init__.py
+++ b/tests/integration/helpers/__init__.py
@@ -7,4 +7,5 @@
     poll_stats_for_namespace,
     poll_fetch_for_ids_in_namespace,
     embedding_values,
+    jsonprint,
 )
diff --git a/tests/integration/helpers/helpers.py b/tests/integration/helpers/helpers.py
index a2e4d7b92..795f1a5ed 100644
--- a/tests/integration/helpers/helpers.py
+++ b/tests/integration/helpers/helpers.py
@@ -6,6 +6,7 @@
 import logging
 from typing import Any
 from datetime import datetime
+import json
 
 logger = logging.getLogger(__name__)
 
@@ -113,3 +114,7 @@ def poll_fetch_for_ids_in_namespace(idx, ids, namespace):
 
 def fake_api_key():
     return "-".join([random_string(x) for x in [8, 4, 4, 4, 12]])
+
+
+def jsonprint(obj):
+    print(json.dumps(obj.to_dict(), indent=2))
diff --git a/tests/perf/test_re_vs_in.py b/tests/perf/test_re_vs_in.py
new file mode 100644
index 000000000..8d32a94c1
--- /dev/null
+++ b/tests/perf/test_re_vs_in.py
@@ -0,0 +1,46 @@
+import pytest
+import re
+
+
+def use_in_match(headers):
+    ct = headers.get("Content-Type", "")
+    return "json" in ct
+
+
+def use_re_match(headers):
+    ct = headers.get("Content-Type", "")
+    return re.search("json", ct, re.IGNORECASE)
+
+
+class TestStringMatchPerformance:
+    @pytest.mark.parametrize(
+        "headers",
+        [
+            {"Content-Type": "application/json"}
+            # {"Content-Type": "application/JSON"},
+            # {"Content-Type": "application/xml"},
+            # {"Content-Type": "text/html"},
+            # {"Content-Type": "text/plain"},
+            # {"Content-Type": "application/xml"},
+            # {"Content-Type": "text/html"},
+            # {"Content-Type": "text/plain"},
+        ],
+    )
+    def test_using_in(self, benchmark, headers):
+        benchmark.pedantic(use_in_match, (headers,), rounds=100, warmup_rounds=1, iterations=50)
+
+    @pytest.mark.parametrize(
+        "headers",
+        [
+            {"Content-Type": "application/json"}
+            # {"Content-Type": "application/JSON"},
+            # {"Content-Type": "application/xml"},
+            # {"Content-Type": "text/html"},
+            # {"Content-Type": "text/plain"},
+            # {"Content-Type": "application/xml"},
+            # {"Content-Type": "text/html"},
+            # {"Content-Type": "text/plain"},
+        ],
+    )
+    def test_using_re(self, benchmark, headers):
+        benchmark.pedantic(use_re_match, (headers,), rounds=100, warmup_rounds=1, iterations=50)
diff --git a/tests/unit/data/test_request_factory.py b/tests/unit/data/test_request_factory.py
new file mode 100644
index 000000000..087436c91
--- /dev/null
+++ b/tests/unit/data/test_request_factory.py
@@ -0,0 +1,401 @@
+import pytest
+from pinecone.data.request_factory import (
+    IndexRequestFactory,
+    SearchQuery,
+    SearchQueryVector,
+    SearchRerank,
+)
+
+from pinecone.core.openapi.db_data.models import (
+    SearchRecordsRequestQuery,
+    SearchRecordsRequestRerank,
+    SearchRecordsVector,
+    VectorValues,
+    SearchRecordsRequest,
+)
+
+from pinecone import RerankModel
+
+
+class TestIndexRequestFactory:
+    def test_parsing_search_query_vector_dict(self):
+        vector1 = IndexRequestFactory._parse_search_vector(None)
+        assert vector1 is None
+
+        vector2 = IndexRequestFactory._parse_search_vector({"values": [0.1, 0.2, 0.3]})
+        assert vector2 == SearchRecordsVector(values=VectorValues([0.1, 0.2, 0.3]))
+
+        vector3 = IndexRequestFactory._parse_search_vector(
+            {
+                "values": [0.1, 0.2, 0.3],
+                "sparse_values": [0.4, 0.5, 0.6],
+                "sparse_indices": [1, 2, 3],
+            }
+        )
+        assert vector3 == SearchRecordsVector(
+            values=VectorValues([0.1, 0.2, 0.3]),
+            sparse_indices=[1, 2, 3],
+            sparse_values=[0.4, 0.5, 0.6],
+        )
+
+        vector4 = IndexRequestFactory._parse_search_vector(
+            {"values": [], "sparse_values": [0.4, 0.5, 0.6], "sparse_indices": [1, 2, 3]}
+        )
+        assert vector4 == SearchRecordsVector(
+            values=VectorValues([]), sparse_indices=[1, 2, 3], sparse_values=[0.4, 0.5, 0.6]
+        )
+
+        vector5 = IndexRequestFactory._parse_search_vector(
+            {"values": [0.1, 0.2, 0.3], "sparse_values": [], "sparse_indices": []}
+        )
+        assert vector5 == SearchRecordsVector(
+            values=VectorValues([0.1, 0.2, 0.3]), sparse_indices=[], sparse_values=[]
+        )
+
+        vector6 = IndexRequestFactory._parse_search_vector(
+            {"values": [], "sparse_values": [], "sparse_indices": []}
+        )
+        assert vector6 == SearchRecordsVector(
+            values=VectorValues([]), sparse_indices=[], sparse_values=[]
+        )
+
+        vector7 = IndexRequestFactory._parse_search_vector({})
+        assert vector7 is None
+
+        vector8 = IndexRequestFactory._parse_search_vector({"values": []})
+        assert vector8 == SearchRecordsVector(values=VectorValues([]))
+
+        vector9 = IndexRequestFactory._parse_search_vector(
+            {"sparse_values": [0.8], "sparse_indices": [100]}
+        )
+        assert vector9 == SearchRecordsVector(sparse_indices=[100], sparse_values=[0.8])
+
+    def test_parsing_search_query_vector_object(self):
+        vector2 = IndexRequestFactory._parse_search_vector(
+            SearchQueryVector(values=[0.1, 0.2, 0.3])
+        )
+        assert vector2 == SearchRecordsVector(values=VectorValues([0.1, 0.2, 0.3]))
+
+        vector3 = IndexRequestFactory._parse_search_vector(
+            SearchQueryVector(
+                values=[0.1, 0.2, 0.3], sparse_values=[0.4, 0.5, 0.6], sparse_indices=[1, 2, 3]
+            )
+        )
+        assert vector3 == SearchRecordsVector(
+            values=VectorValues([0.1, 0.2, 0.3]),
+            sparse_indices=[1, 2, 3],
+            sparse_values=[0.4, 0.5, 0.6],
+        )
+
+        vector4 = IndexRequestFactory._parse_search_vector(
+            SearchQueryVector(values=[], sparse_values=[0.4, 0.5, 0.6], sparse_indices=[1, 2, 3])
+        )
+        assert vector4 == SearchRecordsVector(
+            values=VectorValues([]), sparse_indices=[1, 2, 3], sparse_values=[0.4, 0.5, 0.6]
+        )
+
+        vector5 = IndexRequestFactory._parse_search_vector(
+            SearchQueryVector(values=[0.1, 0.2, 0.3], sparse_values=[], sparse_indices=[])
+        )
+        assert vector5 == SearchRecordsVector(
+            values=VectorValues([0.1, 0.2, 0.3]), sparse_indices=[], sparse_values=[]
+        )
+
+        vector6 = IndexRequestFactory._parse_search_vector(
+            SearchQueryVector(values=[], sparse_values=[], sparse_indices=[])
+        )
+        assert vector6 == SearchRecordsVector(
+            values=VectorValues([]), sparse_indices=[], sparse_values=[]
+        )
+
+        vector7 = IndexRequestFactory._parse_search_vector(SearchQueryVector(values=[]))
+        assert vector7 == SearchRecordsVector(values=VectorValues([]))
+
+    def test_parse_search_query_dict(self):
+        query1 = IndexRequestFactory._parse_search_query(
+            {
+                "inputs": {"text": "Apple corporation"},
+                "top_k": 3,
+                "vector": {"values": [0.1, 0.2, 0.3]},
+            }
+        )
+        assert query1 == SearchRecordsRequestQuery(
+            inputs={"text": "Apple corporation"},
+            top_k=3,
+            vector=SearchRecordsVector(values=VectorValues([0.1, 0.2, 0.3])),
+        )
+
+        query2 = IndexRequestFactory._parse_search_query(
+            {
+                "inputs": {"text": "Apple corporation"},
+                "top_k": 3,
+                "vector": {
+                    "values": [0.1, 0.2, 0.3],
+                    "sparse_values": [0.4, 0.5, 0.6],
+                    "sparse_indices": [1, 2, 3],
+                },
+            }
+        )
+        assert query2 == SearchRecordsRequestQuery(
+            inputs={"text": "Apple corporation"},
+            top_k=3,
+            vector=SearchRecordsVector(
+                values=VectorValues([0.1, 0.2, 0.3]),
+                sparse_indices=[1, 2, 3],
+                sparse_values=[0.4, 0.5, 0.6],
+            ),
+        )
+
+        query3 = IndexRequestFactory._parse_search_query(
+            {
+                "inputs": {"text": "Apple corporation"},
+                "top_k": 3,
+                "id": "test_id",
+                "filter": {"genre": {"$in": ["action"]}},
+            }
+        )
+        assert query3 == SearchRecordsRequestQuery(
+            inputs={"text": "Apple corporation"},
+            top_k=3,
+            id="test_id",
+            filter={"genre": {"$in": ["action"]}},
+        )
+
+    def test_parse_search_query_missing_required_fields(self):
+        with pytest.raises(ValueError) as e:
+            IndexRequestFactory._parse_search_query({"inputs": {"text": "Apple corporation"}})
+        assert str(e.value) == "Missing required field 'top_k' in search query."
+
+    def test_parse_search_query_with_objects(self):
+        query = IndexRequestFactory._parse_search_query(
+            SearchQuery(
+                inputs={"text": "Apple corporation"},
+                top_k=3,
+                filter={"genre": {"$in": ["action"]}},
+                id="test_id",
+                vector=SearchQueryVector(
+                    values=[0.1, 0.2, 0.3], sparse_indices=[1, 2, 3], sparse_values=[0.4, 0.5, 0.6]
+                ),
+            )
+        )
+        assert query == SearchRecordsRequestQuery(
+            inputs={"text": "Apple corporation"},
+            top_k=3,
+            id="test_id",
+            filter={"genre": {"$in": ["action"]}},
+            vector=SearchRecordsVector(
+                values=VectorValues([0.1, 0.2, 0.3]),
+                sparse_indices=[1, 2, 3],
+                sparse_values=[0.4, 0.5, 0.6],
+            ),
+        )
+
+    def test_parse_search_query_with_mixed_types(self):
+        query = IndexRequestFactory._parse_search_query(
+            SearchQuery(
+                inputs={"text": "Apple corporation"},
+                top_k=3,
+                filter={"genre": {"$in": ["action"]}},
+                id="test_id",
+                vector={
+                    "values": [0.1, 0.2, 0.3],
+                    "sparse_indices": [1, 2, 3],
+                    "sparse_values": [0.4, 0.5, 0.6],
+                },
+            )
+        )
+        assert query == SearchRecordsRequestQuery(
+            inputs={"text": "Apple corporation"},
+            top_k=3,
+            id="test_id",
+            filter={"genre": {"$in": ["action"]}},
+            vector=SearchRecordsVector(
+                values=VectorValues([0.1, 0.2, 0.3]),
+                sparse_indices=[1, 2, 3],
+                sparse_values=[0.4, 0.5, 0.6],
+            ),
+        )
+
+    def test_parse_search_rerank_dict(self):
+        rerank = IndexRequestFactory._parse_search_rerank(
+            {"model": "bge-reranker-v2-m3", "rank_fields": ["my_text_field"], "top_n": 3}
+        )
+        assert rerank == SearchRecordsRequestRerank(
+            model="bge-reranker-v2-m3", rank_fields=["my_text_field"], top_n=3
+        )
+
+        # Enum used in dict
+        rerank2 = IndexRequestFactory._parse_search_rerank(
+            {"model": RerankModel.Bge_Reranker_V2_M3, "rank_fields": ["my_text_field"], "top_n": 3}
+        )
+        assert rerank2 == SearchRecordsRequestRerank(
+            model="bge-reranker-v2-m3", rank_fields=["my_text_field"], top_n=3
+        )
+
+        rerank3 = IndexRequestFactory._parse_search_rerank(
+            {
+                "model": "bge-reranker-v2-m3",
+                "rank_fields": ["my_text_field"],
+                "top_n": 3,
+                "parameters": {"key": "value"},
+                "query": "foo",
+            }
+        )
+        assert rerank3 == SearchRecordsRequestRerank(
+            model="bge-reranker-v2-m3",
+            rank_fields=["my_text_field"],
+            top_n=3,
+            parameters={"key": "value"},
+            query="foo",
+        )
+
+    def test_parse_search_rerank_missing_required_fields(self):
+        with pytest.raises(ValueError) as e:
+            IndexRequestFactory._parse_search_rerank({"rank_fields": ["my_text_field"], "top_n": 3})
+        assert str(e.value) == "Missing required field 'model' in rerank."
+
+        with pytest.raises(ValueError) as e:
+            IndexRequestFactory._parse_search_rerank(
+                {"rank_fields": ["my_text_field"], "model": None}
+            )
+        assert str(e.value) == "Missing required field 'model' in rerank."
+
+        with pytest.raises(ValueError) as e:
+            IndexRequestFactory._parse_search_rerank({"rank_fields": None, "model": "foo"})
+        assert str(e.value) == "Missing required field 'rank_fields' in rerank."
+
+        with pytest.raises(ValueError) as e:
+            IndexRequestFactory._parse_search_rerank({"model": "bge-reranker-v2-m3", "top_n": 3})
+        assert str(e.value) == "Missing required field 'rank_fields' in rerank."
+
+        with pytest.raises(ValueError) as e:
+            IndexRequestFactory._parse_search_rerank(
+                {"model": RerankModel.Bge_Reranker_V2_M3, "top_n": 3}
+            )
+        assert str(e.value) == "Missing required field 'rank_fields' in rerank."
+
+    def test_parse_search_rerank_object(self):
+        rerank = IndexRequestFactory._parse_search_rerank(
+            SearchRerank(
+                model=RerankModel.Bge_Reranker_V2_M3, rank_fields=["my_text_field"], top_n=3
+            )
+        )
+        assert rerank == SearchRecordsRequestRerank(
+            model="bge-reranker-v2-m3", rank_fields=["my_text_field"], top_n=3
+        )
+
+        rerank2 = IndexRequestFactory._parse_search_rerank(
+            SearchRerank(
+                model=RerankModel.Bge_Reranker_V2_M3,
+                rank_fields=["my_text_field"],
+                top_n=3,
+                parameters={"key": "value"},
+                query="foo",
+            )
+        )
+        assert rerank2 == SearchRecordsRequestRerank(
+            model="bge-reranker-v2-m3",
+            rank_fields=["my_text_field"],
+            top_n=3,
+            parameters={"key": "value"},
+            query="foo",
+        )
+
+        rerank3 = IndexRequestFactory._parse_search_rerank(
+            SearchRerank(model="unknown-model", rank_fields=["my_text_field"])
+        )
+        assert rerank3 == SearchRecordsRequestRerank(
+            model="unknown-model", rank_fields=["my_text_field"]
+        )
+
+    def test_search_request_with_dicts(self):
+        request = IndexRequestFactory.search_request(
+            query={
+                "inputs": {"text": "Apple corporation"},
+                "top_k": 3,
+                "vector": {"values": [0.1, 0.2, 0.3]},
+            },
+            fields=["more_stuff"],
+            rerank={"model": "bge-reranker-v2-m3", "rank_fields": ["my_text_field"], "top_n": 3},
+        )
+
+        assert request is not None
+        assert request.query.vector == SearchRecordsVector(values=VectorValues([0.1, 0.2, 0.3]))
+
+    def test_search_request_with_objects_and_enums(self):
+        factory = IndexRequestFactory()
+        request = factory.search_request(
+            query=SearchQuery(
+                inputs={"text": "Apple corporation"},
+                top_k=3,
+                vector=SearchQueryVector(values=[0.1, 0.2, 0.3]),
+                filter={"genre": {"$in": ["action"]}},
+                id="test_id",
+            ),
+            fields=["more_stuff"],
+            rerank=SearchRerank(
+                model=RerankModel.Bge_Reranker_V2_M3,
+                rank_fields=["my_text_field"],
+                top_n=3,
+                parameters={"key": "value"},
+                query="foo",
+            ),
+        )
+
+        assert request == SearchRecordsRequest(
+            query=SearchRecordsRequestQuery(
+                inputs={"text": "Apple corporation"},
+                top_k=3,
+                vector=SearchRecordsVector(values=VectorValues([0.1, 0.2, 0.3])),
+                filter={"genre": {"$in": ["action"]}},
+                id="test_id",
+            ),
+            fields=["more_stuff"],
+            rerank=SearchRecordsRequestRerank(
+                model="bge-reranker-v2-m3",
+                rank_fields=["my_text_field"],
+                top_n=3,
+                parameters={"key": "value"},
+                query="foo",
+            ),
+        )
+
+    def test_search_request_with_no_rerank(self):
+        factory = IndexRequestFactory()
+        request = factory.search_request(
+            query=SearchQuery(
+                inputs={"text": "Apple corporation"},
+                top_k=3,
+                vector=SearchQueryVector(values=[0.1, 0.2, 0.3]),
+                filter={"genre": {"$in": ["action"]}},
+                id="test_id",
+            ),
+            fields=["more_stuff"],
+        )
+
+        assert request == SearchRecordsRequest(
+            query=SearchRecordsRequestQuery(
+                inputs={"text": "Apple corporation"},
+                top_k=3,
+                vector=SearchRecordsVector(values=VectorValues([0.1, 0.2, 0.3])),
+                filter={"genre": {"$in": ["action"]}},
+                id="test_id",
+            ),
+            fields=["more_stuff"],
+        )
+
+        request2 = IndexRequestFactory.search_request(
+            query={
+                "inputs": {"text": "Apple corporation"},
+                "top_k": 3,
+                "vector": {"values": [0.1, 0.2, 0.3]},
+            }
+        )
+        assert request2 == SearchRecordsRequest(
+            query=SearchRecordsRequestQuery(
+                inputs={"text": "Apple corporation"},
+                top_k=3,
+                vector=SearchRecordsVector(values=VectorValues([0.1, 0.2, 0.3])),
+            ),
+            fields=["*"],
+        )

From 60a1a60ccf3fdde5bf55fb38f81f3262e578dec8 Mon Sep 17 00:00:00 2001
From: Jen Hamon <jhamon@pinecone.io>
Date: Thu, 30 Jan 2025 08:50:29 -0500
Subject: [PATCH 2/5] Remove unneeded test

---
 tests/perf/test_re_vs_in.py | 46 -------------------------------------
 1 file changed, 46 deletions(-)
 delete mode 100644 tests/perf/test_re_vs_in.py

diff --git a/tests/perf/test_re_vs_in.py b/tests/perf/test_re_vs_in.py
deleted file mode 100644
index 8d32a94c1..000000000
--- a/tests/perf/test_re_vs_in.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import pytest
-import re
-
-
-def use_in_match(headers):
-    ct = headers.get("Content-Type", "")
-    return "json" in ct
-
-
-def use_re_match(headers):
-    ct = headers.get("Content-Type", "")
-    return re.search("json", ct, re.IGNORECASE)
-
-
-class TestStringMatchPerformance:
-    @pytest.mark.parametrize(
-        "headers",
-        [
-            {"Content-Type": "application/json"}
-            # {"Content-Type": "application/JSON"},
-            # {"Content-Type": "application/xml"},
-            # {"Content-Type": "text/html"},
-            # {"Content-Type": "text/plain"},
-            # {"Content-Type": "application/xml"},
-            # {"Content-Type": "text/html"},
-            # {"Content-Type": "text/plain"},
-        ],
-    )
-    def test_using_in(self, benchmark, headers):
-        benchmark.pedantic(use_in_match, (headers,), rounds=100, warmup_rounds=1, iterations=50)
-
-    @pytest.mark.parametrize(
-        "headers",
-        [
-            {"Content-Type": "application/json"}
-            # {"Content-Type": "application/JSON"},
-            # {"Content-Type": "application/xml"},
-            # {"Content-Type": "text/html"},
-            # {"Content-Type": "text/plain"},
-            # {"Content-Type": "application/xml"},
-            # {"Content-Type": "text/html"},
-            # {"Content-Type": "text/plain"},
-        ],
-    )
-    def test_using_re(self, benchmark, headers):
-        benchmark.pedantic(use_re_match, (headers,), rounds=100, warmup_rounds=1, iterations=50)

From c447bd4ee8c4292adce8ee38094a1ab556b636c1 Mon Sep 17 00:00:00 2001
From: Jen Hamon <jhamon@pinecone.io>
Date: Thu, 30 Jan 2025 09:13:47 -0500
Subject: [PATCH 3/5] mypy fixes

---
 pinecone/control/pinecone.py                  |  6 +++-
 pinecone/data/index.py                        | 21 ++-----------
 pinecone/data/interfaces.py                   | 10 ++++---
 pinecone/data/request_factory.py              | 30 ++++++++++++++++---
 .../data/types/search_query_typed_dict.py     |  2 +-
 5 files changed, 40 insertions(+), 29 deletions(-)

diff --git a/pinecone/control/pinecone.py b/pinecone/control/pinecone.py
index 5bf594e01..6290b4f11 100644
--- a/pinecone/control/pinecone.py
+++ b/pinecone/control/pinecone.py
@@ -1,6 +1,7 @@
 import time
 import logging
 from typing import Optional, Dict, Any, Union
+from enum import Enum
 
 from .index_host_store import IndexHostStore
 from .pinecone_interface import PineconeDBControlInterface
@@ -288,7 +289,10 @@ def create_index_for_model(
                     raise ValueError(f"{field} is required in embed")
             parsed_embed = {}
             for key, value in embed.items():
-                parsed_embed[key] = convert_enum_to_string(value)
+                if isinstance(value, Enum):
+                    parsed_embed[key] = convert_enum_to_string(value)
+                else:
+                    parsed_embed[key] = value
 
         args = parse_non_empty_args(
             [
diff --git a/pinecone/data/index.py b/pinecone/data/index.py
index bb57df262..c896a9dda 100644
--- a/pinecone/data/index.py
+++ b/pinecone/data/index.py
@@ -14,7 +14,6 @@
     IndexDescription as DescribeIndexStatsResponse,
     UpsertResponse,
     ListResponse,
-    UpsertRecord,
     SearchRecordsResponse,
 )
 from .dataclasses import Vector, SparseValues, FetchResponse, SearchQuery, SearchRerank
@@ -206,24 +205,8 @@ def upsert_from_dataframe(
         return UpsertResponse(upserted_count=upserted_count)
 
     def upsert_records(self, namespace: str, records: List[Dict]):
-        if namespace is None:
-            raise ValueError("namespace is required when upserting records")
-        if not records or len(records) == 0:
-            raise ValueError("No records provided")
-
-        records_to_upsert = []
-        for record in records:
-            if not record.get("_id") and not record.get("id"):
-                raise ValueError("Each record must have an '_id' or 'id' value")
-
-            records_to_upsert.append(
-                UpsertRecord(
-                    record.get("_id", record.get("id")),
-                    **{k: v for k, v in record.items() if k not in {"_id", "id"}},
-                )
-            )
-
-        self._vector_api.upsert_records_namespace(namespace, records_to_upsert)
+        args = IndexRequestFactory.upsert_records_args(namespace=namespace, records=records)
+        self._vector_api.upsert_records_namespace(**args)
 
     def search(
         self,
diff --git a/pinecone/data/interfaces.py b/pinecone/data/interfaces.py
index 2930e2754..fa837add8 100644
--- a/pinecone/data/interfaces.py
+++ b/pinecone/data/interfaces.py
@@ -20,6 +20,8 @@
     FilterTypedDict,
     VectorTuple,
     VectorTupleWithMetadata,
+    SearchQueryTypedDict,
+    SearchRerankTypedDict,
 )
 from .dataclasses import SearchQuery, SearchRerank
 
@@ -122,8 +124,8 @@ def upsert_records(self, namespace: str, records: List[Dict]):
     def search(
         self,
         namespace: str,
-        query: Union[Dict, SearchQuery],
-        rerank: Optional[Union[Dict, SearchRerank]] = None,
+        query: Union[SearchQueryTypedDict, SearchQuery],
+        rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None,
         fields: Optional[List[str]] = ["*"],  # Default to returning all fields
     ) -> SearchRecordsResponse:
         """
@@ -147,8 +149,8 @@ def search(
     def search_records(
         self,
         namespace: str,
-        query: Union[Dict, SearchQuery],
-        rerank: Optional[Union[Dict, SearchRerank]] = None,
+        query: Union[SearchQueryTypedDict, SearchQuery],
+        rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None,
         fields: Optional[List[str]] = ["*"],  # Default to returning all fields
     ) -> SearchRecordsResponse:
         """Alias of the search() method."""
diff --git a/pinecone/data/request_factory.py b/pinecone/data/request_factory.py
index 270963928..3cfe9455c 100644
--- a/pinecone/data/request_factory.py
+++ b/pinecone/data/request_factory.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Union, List, Optional, Dict, Any
+from typing import Union, List, Optional, Dict, Any, cast
 
 from pinecone.core.openapi.db_data.models import (
     QueryRequest,
@@ -12,6 +12,7 @@
     SearchRecordsRequestRerank,
     VectorValues,
     SearchRecordsVector,
+    UpsertRecord,
 )
 from ..utils import parse_non_empty_args, convert_enum_to_string
 from .vector_factory import VectorFactory
@@ -183,7 +184,7 @@ def _parse_search_query(
         if isinstance(query, SearchQuery):
             query_dict = query.as_dict()
         else:
-            query_dict = query
+            query_dict = cast(dict[str, Any], query)
 
         required_fields = {"top_k"}
         for key in required_fields:
@@ -213,7 +214,7 @@ def _parse_search_vector(
                 return None
             vector_dict = vector.as_dict()
         else:
-            vector_dict = vector
+            vector_dict = cast(dict[str, Any], vector)
             if (
                 vector_dict.get("values", None) is None
                 and vector_dict.get("sparse_values", None) is None
@@ -236,7 +237,7 @@ def _parse_search_rerank(rerank: Optional[Union[SearchRerankTypedDict, SearchRer
         if isinstance(rerank, SearchRerank):
             rerank_dict = rerank.as_dict()
         else:
-            rerank_dict = rerank
+            rerank_dict = cast(dict[str, Any], rerank)
 
         required_fields = {"model", "rank_fields"}
         for key in required_fields:
@@ -246,3 +247,24 @@ def _parse_search_rerank(rerank: Optional[Union[SearchRerankTypedDict, SearchRer
         rerank_dict["model"] = convert_enum_to_string(rerank_dict["model"])
 
         return SearchRecordsRequestRerank(**rerank_dict)
+
+    @staticmethod
+    def upsert_records_args(namespace: str, records: List[Dict]):
+        if namespace is None:
+            raise ValueError("namespace is required when upserting records")
+        if not records or len(records) == 0:
+            raise ValueError("No records provided")
+
+        records_to_upsert = []
+        for record in records:
+            if not record.get("_id") and not record.get("id"):
+                raise ValueError("Each record must have an '_id' or 'id' value")
+
+            records_to_upsert.append(
+                UpsertRecord(
+                    record.get("_id", record.get("id")),
+                    **{k: v for k, v in record.items() if k not in {"_id", "id"}},
+                )
+            )
+
+        return {"namespace": namespace, "upsert_record": records_to_upsert}
diff --git a/pinecone/data/types/search_query_typed_dict.py b/pinecone/data/types/search_query_typed_dict.py
index 118607ae0..c21ba1202 100644
--- a/pinecone/data/types/search_query_typed_dict.py
+++ b/pinecone/data/types/search_query_typed_dict.py
@@ -19,7 +19,7 @@ class SearchQueryTypedDict(TypedDict):
     Required.
     """
 
-    filter: Optional[Dict[str, Any]] = None
+    filter: Optional[Dict[str, Any]]
     """
     The filter to apply to the search.
     Optional.

From 1840eaa1ddd9fd027e8a9cdb0ac232e2b01411f2 Mon Sep 17 00:00:00 2001
From: Jen Hamon <jhamon@pinecone.io>
Date: Thu, 30 Jan 2025 09:46:57 -0500
Subject: [PATCH 4/5] Add asyncio implementation

---
 pinecone/data/index.py                        |   2 +-
 pinecone/data/index_asyncio.py                |  32 ++-
 pinecone/data/interfaces.py                   |  50 +++++
 pinecone/openapi_support/rest_aiohttp.py      |  27 ++-
 tests/integration/data_asyncio/conftest.py    |  39 +++-
 .../test_search_and_upsert_records.py         | 200 ++++++++++++++++++
 6 files changed, 338 insertions(+), 12 deletions(-)
 create mode 100644 tests/integration/data_asyncio/test_search_and_upsert_records.py

diff --git a/pinecone/data/index.py b/pinecone/data/index.py
index c896a9dda..f3841c37a 100644
--- a/pinecone/data/index.py
+++ b/pinecone/data/index.py
@@ -215,7 +215,7 @@ def search(
         rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None,
         fields: Optional[List[str]] = ["*"],  # Default to returning all fields
     ) -> SearchRecordsResponse:
-        if not namespace:
+        if namespace is None:
             raise Exception("Namespace is required when searching records")
 
         request = IndexRequestFactory.search_request(query=query, rerank=rerank, fields=fields)
diff --git a/pinecone/data/index_asyncio.py b/pinecone/data/index_asyncio.py
index 5b3c73eb0..47204e18d 100644
--- a/pinecone/data/index_asyncio.py
+++ b/pinecone/data/index_asyncio.py
@@ -20,6 +20,7 @@
     UpsertResponse,
     DeleteRequest,
     ListResponse,
+    SearchRecordsResponse,
 )
 
 from ..utils import (
@@ -35,8 +36,10 @@
     VectorTuple,
     VectorTupleWithMetadata,
     FilterTypedDict,
+    SearchQueryTypedDict,
+    SearchRerankTypedDict,
 )
-from .dataclasses import Vector, SparseValues, FetchResponse
+from .dataclasses import Vector, SparseValues, FetchResponse, SearchQuery, SearchRerank
 
 from pinecone.openapi_support import OPENAPI_ENDPOINT_PARAMS
 from .index import IndexRequestFactory
@@ -400,5 +403,32 @@ async def list(self, **kwargs):
             else:
                 done = True
 
+    async def upsert_records(self, namespace: str, records: List[Dict]):
+        args = IndexRequestFactory.upsert_records_args(namespace=namespace, records=records)
+        await self._vector_api.upsert_records_namespace(**args)
+
+    async def search(
+        self,
+        namespace: str,
+        query: Union[SearchQueryTypedDict, SearchQuery],
+        rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None,
+        fields: Optional[List[str]] = ["*"],  # Default to returning all fields
+    ) -> SearchRecordsResponse:
+        if namespace is None:
+            raise Exception("Namespace is required when searching records")
+
+        request = IndexRequestFactory.search_request(query=query, rerank=rerank, fields=fields)
+
+        return await self._vector_api.search_records_namespace(namespace, request)
+
+    async def search_records(
+        self,
+        namespace: str,
+        query: Union[SearchQueryTypedDict, SearchQuery],
+        rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None,
+        fields: Optional[List[str]] = ["*"],  # Default to returning all fields
+    ) -> SearchRecordsResponse:
+        return await self.search(namespace, query=query, rerank=rerank, fields=fields)
+
     def _openapi_kwargs(self, kwargs):
         return {k: v for k, v in kwargs.items() if k in OPENAPI_ENDPOINT_PARAMS}
diff --git a/pinecone/data/interfaces.py b/pinecone/data/interfaces.py
index fa837add8..9ebe4ad80 100644
--- a/pinecone/data/interfaces.py
+++ b/pinecone/data/interfaces.py
@@ -856,3 +856,53 @@ async def list(self, **kwargs):
             namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
         """
         pass
+
+    @abstractmethod
+    async def upsert_records(self, namespace: str, records: List[Dict]):
+        """
+        Upsert records to a namespace.
+
+        Converts records into embeddings and upserts them into a namespacce in the index.
+
+        :param namespace: The namespace of the index to upsert records to.
+        :type namespace: str, required
+        :param records: The records to upsert into the index.
+        :type records: List[Dict], required
+        """
+        pass
+
+    @abstractmethod
+    async def search(
+        self,
+        namespace: str,
+        query: Union[SearchQueryTypedDict, SearchQuery],
+        rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None,
+        fields: Optional[List[str]] = ["*"],  # Default to returning all fields
+    ) -> SearchRecordsResponse:
+        """
+        Search for records.
+
+        This operation converts a query to a vector embedding and then searches a namespace. You
+        can optionally provide a reranking operation as part of the search.
+
+        :param namespace: The namespace in the index to search.
+        :type namespace: str, required
+        :param query: The SearchQuery to use for the search.
+        :type query: Union[Dict, SearchQuery], required
+        :param rerank: The SearchRerank to use with the search request.
+        :type rerank: Union[Dict, SearchRerank], optional
+        :return: The records that match the search.
+        :rtype: RecordModel
+        """
+        pass
+
+    @abstractmethod
+    async def search_records(
+        self,
+        namespace: str,
+        query: Union[SearchQueryTypedDict, SearchQuery],
+        rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None,
+        fields: Optional[List[str]] = ["*"],  # Default to returning all fields
+    ) -> SearchRecordsResponse:
+        """Alias of the search() method."""
+        pass
diff --git a/pinecone/openapi_support/rest_aiohttp.py b/pinecone/openapi_support/rest_aiohttp.py
index e0e2e40a6..902ca9769 100644
--- a/pinecone/openapi_support/rest_aiohttp.py
+++ b/pinecone/openapi_support/rest_aiohttp.py
@@ -1,5 +1,6 @@
 import weakref
 import asyncio
+import json
 from .rest_utils import RestClientInterface, RESTResponse, raise_exceptions_or_return
 
 
@@ -43,13 +44,25 @@ async def request(
         if method in ["POST", "PUT", "PATCH", "OPTIONS"] and ("Content-Type" not in headers):
             headers["Content-Type"] = "application/json"
 
-        async with self._session.request(
-            method, url, params=query_params, headers=headers, json=body
-        ) as resp:
-            content = await resp.read()
-            return raise_exceptions_or_return(
-                RESTResponse(resp.status, content, resp.headers, resp.reason)
-            )
+        if "application/x-ndjson" in headers.get("Content-Type", "").lower():
+            ndjson_data = "\n".join(json.dumps(record) for record in body)
+
+            async with self._session.request(
+                method, url, params=query_params, headers=headers, data=ndjson_data
+            ) as resp:
+                content = await resp.read()
+                return raise_exceptions_or_return(
+                    RESTResponse(resp.status, content, resp.headers, resp.reason)
+                )
+
+        else:
+            async with self._session.request(
+                method, url, params=query_params, headers=headers, json=body
+            ) as resp:
+                content = await resp.read()
+                return raise_exceptions_or_return(
+                    RESTResponse(resp.status, content, resp.headers, resp.reason)
+                )
 
     async def GET(
         self, url, headers=None, query_params=None, _preload_content=True, _request_timeout=None
diff --git a/tests/integration/data_asyncio/conftest.py b/tests/integration/data_asyncio/conftest.py
index 09d796150..fd4c0657d 100644
--- a/tests/integration/data_asyncio/conftest.py
+++ b/tests/integration/data_asyncio/conftest.py
@@ -7,6 +7,8 @@
 import logging
 from typing import Callable, Optional, Awaitable, Union
 
+from pinecone import CloudProvider, AwsRegion, IndexEmbed, EmbedModel
+
 logger = logging.getLogger(__name__)
 
 
@@ -69,6 +71,11 @@ def sparse_index_name():
     return generate_index_name("sparse")
 
 
+@pytest.fixture(scope="session")
+def model_index_name():
+    return generate_index_name("embed")
+
+
 def build_asyncioindex_client(client, index_host) -> _AsyncioIndex:
     return client.AsyncioIndex(host=index_host)
 
@@ -107,20 +114,46 @@ def sparse_index_host(sparse_index_name, spec):
     pc = build_client()
 
     if sparse_index_name not in pc.list_indexes().names():
-        logger.info("Creating sparse index with name: " + sparse_index_name)
+        logger.info(f"Creating index with name {sparse_index_name}")
         pc.create_index(
             name=sparse_index_name, metric="dotproduct", spec=spec, vector_type="sparse"
         )
     else:
-        logger.info("Sparse index with name " + sparse_index_name + " already exists")
+        logger.info(f"Index with name {sparse_index_name} already exists")
 
     description = pc.describe_index(name=sparse_index_name)
     yield description.host
 
-    logger.info("Deleting sparse index with name: " + sparse_index_name)
+    logger.info(f"Deleting index with name {sparse_index_name}")
     pc.delete_index(sparse_index_name, -1)
 
 
+@pytest.fixture(scope="session")
+def model_index_host(model_index_name):
+    pc = build_client()
+
+    if model_index_name not in pc.list_indexes().names():
+        logger.info(f"Creating index {model_index_name}")
+        pc.create_index_for_model(
+            name=model_index_name,
+            cloud=CloudProvider.AWS,
+            region=AwsRegion.US_WEST_2,
+            embed=IndexEmbed(
+                model=EmbedModel.Multilingual_E5_Large,
+                field_map={"text": "my_text_field"},
+                metric="cosine",
+            ),
+        )
+    else:
+        logger.info(f"Index {model_index_name} already exists")
+
+    description = pc.describe_index(name=model_index_name)
+    yield description.host
+
+    logger.info(f"Deleting index {model_index_name}")
+    pc.delete_index(model_index_name, -1)
+
+
 async def poll_for_freshness(asyncio_idx, target_namespace, target_vector_count):
     max_wait_time = 60 * 3  # 3 minutes
     time_waited = 0
diff --git a/tests/integration/data_asyncio/test_search_and_upsert_records.py b/tests/integration/data_asyncio/test_search_and_upsert_records.py
new file mode 100644
index 000000000..a9f69f382
--- /dev/null
+++ b/tests/integration/data_asyncio/test_search_and_upsert_records.py
@@ -0,0 +1,200 @@
+import pytest
+import logging
+from ..helpers import random_string, embedding_values
+from .conftest import build_asyncioindex_client, poll_for_freshness
+
+from pinecone import RerankModel, PineconeApiException
+
+logger = logging.getLogger(__name__)
+
+model_index_dimension = 1024  # Currently controlled by "multilingual-e5-large"
+
+
+@pytest.fixture
+def records_to_upsert():
+    return [
+        {
+            "id": "test1",
+            "my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.",
+            "more_stuff": "more stuff!",
+        },
+        {
+            "id": "test2",
+            "my_text_field": "The tech company Apple is known for its innovative products like the iPhone.",
+            "more_stuff": "more stuff!",
+        },
+        {
+            "id": "test3",
+            "my_text_field": "Many people enjoy eating apples as a healthy snack.",
+            "more_stuff": "more stuff!",
+        },
+        {
+            "id": "test4",
+            "my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.",
+            "more_stuff": "more stuff!",
+        },
+        {
+            "id": "test5",
+            "my_text_field": "An apple a day keeps the doctor away, as the saying goes.",
+            "more_stuff": "more stuff!",
+        },
+        {
+            "id": "test6",
+            "my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.",
+            "more_stuff": "extra more stuff!",
+        },
+    ]
+
+
+@pytest.mark.asyncio
+class TestUpsertAndSearchRecords:
+    async def test_search_records(self, pc, model_index_host, records_to_upsert):
+        model_idx = build_asyncioindex_client(pc, model_index_host)
+
+        target_namespace = random_string(10)
+        await model_idx.upsert_records(namespace=target_namespace, records=records_to_upsert)
+
+        await poll_for_freshness(model_idx, target_namespace, len(records_to_upsert))
+
+        response = await model_idx.search_records(
+            namespace=target_namespace, query={"inputs": {"text": "Apple corporation"}, "top_k": 3}
+        )
+        assert len(response.result.hits) == 3
+        assert response.usage is not None
+
+        # Test search alias
+        response2 = await model_idx.search(
+            namespace=target_namespace, query={"inputs": {"text": "Apple corporation"}, "top_k": 3}
+        )
+        assert response == response2
+
+        # validate similar records and contents
+        similar_record_ids = ["test2", "test4", "test6"]
+
+        for hit in response.result.hits:
+            assert hit._id in similar_record_ids
+            similar_record_ids.remove(hit._id)
+
+            assert hit._score > 0
+            assert hit.fields.get("my_text_field") is not None
+            assert hit.fields.get("more_stuff") is not None
+
+        # search for records while filtering fields
+        response_filtered = await model_idx.search_records(
+            namespace="test-namespace",
+            query={"inputs": {"text": "Apple corporation"}, "top_k": 3},
+            fields=["more_stuff"],
+        )
+
+        for hit in response_filtered.result.hits:
+            assert hit.fields.get("my_text_field") is None
+            assert hit.fields.get("more_stuff") is not None
+
+    async def test_search_records_with_vector(self, pc, model_index_host, records_to_upsert):
+        model_idx = build_asyncioindex_client(pc, model_index_host)
+
+        target_namespace = random_string(10)
+        await model_idx.upsert_records(namespace=target_namespace, records=records_to_upsert)
+
+        await poll_for_freshness(model_idx, target_namespace, len(records_to_upsert))
+
+        # Search for similar records
+        search_query = {"top_k": 3, "vector": {"values": embedding_values(model_index_dimension)}}
+        response = await model_idx.search_records(namespace=target_namespace, query=search_query)
+        assert len(response.result.hits) == 3
+        assert response.usage is not None
+
+        # Test search alias
+        response2 = await model_idx.search(namespace=target_namespace, query=search_query)
+        assert response == response2
+
+    @pytest.mark.parametrize("rerank_model", ["bge-reranker-v2-m3", RerankModel.Bge_Reranker_V2_M3])
+    async def test_search_with_rerank(self, pc, model_index_host, records_to_upsert, rerank_model):
+        model_idx = build_asyncioindex_client(pc, model_index_host)
+        target_namespace = random_string(10)
+        await model_idx.upsert_records(namespace=target_namespace, records=records_to_upsert)
+
+        await poll_for_freshness(model_idx, target_namespace, len(records_to_upsert))
+
+        # Search for similar records
+        response = await model_idx.search_records(
+            namespace=target_namespace,
+            query={"inputs": {"text": "Apple corporation"}, "top_k": 3},
+            rerank={"model": rerank_model, "rank_fields": ["my_text_field"], "top_n": 3},
+        )
+        assert len(response.result.hits) == 3
+        assert response.usage is not None
+
+        # validate similar records and contents
+        similar_record_ids = ["test6", "test4", "test2"]
+        for hit in response.result.hits:
+            assert hit._id in similar_record_ids
+            similar_record_ids.remove(hit._id)
+
+            assert hit._score > 0
+            assert hit.fields.get("my_text_field") is not None
+            assert hit.fields.get("more_stuff") is not None
+
+    async def test_search_with_rerank_query(self, pc, model_index_host, records_to_upsert):
+        model_idx = build_asyncioindex_client(pc, model_index_host)
+        target_namespace = random_string(10)
+        await model_idx.upsert_records(namespace=target_namespace, records=records_to_upsert)
+
+        # Sleep for freshness
+        await poll_for_freshness(model_idx, target_namespace, len(records_to_upsert))
+
+        # Search for similar records
+        response = await model_idx.search_records(
+            namespace=target_namespace,
+            query={"inputs": {"text": "Apple corporation"}, "top_k": 3},
+            rerank={
+                "model": "bge-reranker-v2-m3",
+                "rank_fields": ["my_text_field"],
+                "top_n": 3,
+                "query": "Apple corporation",
+            },
+        )
+        assert len(response.result.hits) == 3
+        assert response.usage is not None
+
+
+@pytest.mark.asyncio
+class TestUpsertAndSearchRecordsErrorCases:
+    async def test_search_with_rerank_nonexistent_model_error(
+        self, pc, model_index_host, records_to_upsert
+    ):
+        model_idx = build_asyncioindex_client(pc, model_index_host)
+        target_namespace = random_string(10)
+        await model_idx.upsert_records(namespace=target_namespace, records=records_to_upsert)
+
+        await poll_for_freshness(model_idx, target_namespace, len(records_to_upsert))
+
+        with pytest.raises(PineconeApiException, match=r"Model 'non-existent-model' not found"):
+            await model_idx.search_records(
+                namespace=target_namespace,
+                query={"inputs": {"text": "Apple corporation"}, "top_k": 3},
+                rerank={
+                    "model": "non-existent-model",
+                    "rank_fields": ["my_text_field"],
+                    "top_n": 3,
+                },
+            )
+
+    @pytest.mark.skip(reason="Possible bug in the API")
+    async def test_search_with_rerank_empty_rank_fields_error(
+        self, pc, model_index_host, records_to_upsert
+    ):
+        model_idx = build_asyncioindex_client(pc, model_index_host)
+        target_namespace = random_string(10)
+        await model_idx.upsert_records(namespace=target_namespace, records=records_to_upsert)
+
+        await poll_for_freshness(model_idx, target_namespace, len(records_to_upsert))
+
+        with pytest.raises(
+            PineconeApiException, match=r"Only one rank field is supported for model"
+        ):
+            await model_idx.search_records(
+                namespace="test-namespace",
+                query={"inputs": {"text": "Apple corporation"}, "top_k": 3},
+                rerank={"model": "bge-reranker-v2-m3", "rank_fields": [], "top_n": 3},
+            )

From 906b0ae97e79135ed2eb0c3b262cf0aed4495fc2 Mon Sep 17 00:00:00 2001
From: Jen Hamon <jhamon@pinecone.io>
Date: Thu, 30 Jan 2025 10:07:08 -0500
Subject: [PATCH 5/5] Skip search tests on grpc run

---
 tests/integration/data/test_search_and_upsert_records.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/integration/data/test_search_and_upsert_records.py b/tests/integration/data/test_search_and_upsert_records.py
index 0b0f6444c..e83a5cd88 100644
--- a/tests/integration/data/test_search_and_upsert_records.py
+++ b/tests/integration/data/test_search_and_upsert_records.py
@@ -3,6 +3,7 @@
 from typing import List
 from ..helpers import random_string, embedding_values
 import logging
+import os
 
 from pinecone import RerankModel, PineconeApiException
 from pinecone.data import _Index
@@ -69,6 +70,9 @@ def records_to_upsert():
     ]
 
 
+@pytest.mark.skipif(
+    os.getenv("USE_GRPC") != "false", reason="These actions are not supported in gRPC"
+)
 class TestUpsertAndSearchRecords:
     def test_search_records(self, model_idx, records_to_upsert):
         target_namespace = random_string(10)
@@ -182,6 +186,9 @@ def test_search_with_rerank_query(self, model_idx, records_to_upsert):
         assert response.usage is not None
 
 
+@pytest.mark.skipif(
+    os.getenv("USE_GRPC") != "false", reason="These actions are not supported in gRPC"
+)
 class TestUpsertAndSearchRecordsErrorCases:
     def test_search_with_rerank_nonexistent_model_error(self, model_idx, records_to_upsert):
         target_namespace = random_string(10)