Improve parsing of fetch response

jhamon · jhamon · commit 9b038aab30ea · 2025-11-17T14:59:44.000-05:00
diff --git a/pinecone/grpc/utils.py b/pinecone/grpc/utils.py
@@ -55,38 +55,49 @@ def parse_sparse_values(sparse_values: dict | None) -> SparseValues:
 def parse_fetch_response(
     response: Message, initial_metadata: dict[str, str] | None = None
 ) -> FetchResponse:
-    json_response = json_format.MessageToDict(response)
+    """Parse a FetchResponse protobuf message directly without MessageToDict conversion.
+
+    This optimized version directly accesses protobuf fields for better performance.
+    """
+    # Extract response info from initial metadata
+    from pinecone.utils.response_info import extract_response_info
 
+    metadata = initial_metadata or {}
+    response_info = extract_response_info(metadata)
+
+    # Directly access protobuf fields instead of converting entire message to dict
     vd = {}
-    vectors = json_response.get("vectors", {})
-    namespace = json_response.get("namespace", "")
+    # namespace is a required string field, so it will always have a value (default empty string)
+    namespace = response.namespace
 
-    for id, vec in vectors.items():
-        # Convert to Vector dataclass
-        sparse_vals = vec.get("sparseValues")
+    # Iterate over vectors map directly
+    for vec_id, vec in response.vectors.items():
+        # Convert vector.values (RepeatedScalarFieldContainer) to list
+        values = list(vec.values) if vec.values else []
+
+        # Handle sparse_values if present (check if field is set and not empty)
         parsed_sparse = None
-        if sparse_vals:
+        if vec.HasField("sparse_values") and vec.sparse_values:
             from pinecone.db_data.dataclasses import SparseValues
 
             parsed_sparse = SparseValues(
-                indices=sparse_vals.get("indices", []), values=sparse_vals.get("values", [])
+                indices=list(vec.sparse_values.indices), values=list(vec.sparse_values.values)
             )
-        vd[id] = Vector(
-            id=vec["id"],
-            values=vec.get("values") or [],
-            sparse_values=parsed_sparse,
-            metadata=vec.get("metadata", None),
-        )
 
-    # Extract response info from initial metadata
-    from pinecone.utils.response_info import extract_response_info
+        # Convert metadata Struct to dict only when needed
+        metadata_dict = None
+        if vec.HasField("metadata") and vec.metadata:
+            metadata_dict = json_format.MessageToDict(vec.metadata)
 
-    metadata = initial_metadata or {}
-    response_info = extract_response_info(metadata)
+        vd[vec_id] = Vector(
+            id=vec.id, values=values, sparse_values=parsed_sparse, metadata=metadata_dict
+        )
 
+    # Parse usage if present (usage is optional, so check HasField)
     usage = None
-    if json_response.get("usage"):
-        usage = parse_usage(json_response.get("usage", {}))
+    if response.HasField("usage") and response.usage:
+        usage = parse_usage({"readUnits": response.usage.read_units})
+
     fetch_response = FetchResponse(
         vectors=vd, namespace=namespace, usage=usage, _response_info=response_info
     )
@@ -204,40 +215,86 @@ def parse_query_response(
     _check_type: bool = False,
     initial_metadata: dict[str, str] | None = None,
 ) -> QueryResponse:
-    if isinstance(response, Message):
-        json_response = json_format.MessageToDict(response)
-    else:
-        json_response = response
-
-    matches = []
-    for item in json_response.get("matches", []):
-        sc = ScoredVector(
-            id=item["id"],
-            score=item.get("score", 0.0),
-            values=item.get("values", []),
-            sparse_values=parse_sparse_values(item.get("sparseValues")),
-            metadata=item.get("metadata", None),
-            _check_type=_check_type,
-        )
-        matches.append(sc)
-
-    # Due to OpenAPI model classes / actual parsing cost, we want to avoid
-    # creating empty `Usage` objects and then passing them into QueryResponse
-    # when they are not actually present in the response from the server.
-    args = {"namespace": json_response.get("namespace", ""), "matches": matches}
-    usage = json_response.get("usage")
-    if usage:
-        args["usage"] = parse_usage(usage)
+    """Parse a QueryResponse protobuf message directly without MessageToDict conversion.
 
+    This optimized version directly accesses protobuf fields for better performance.
+    For dict responses (REST API), falls back to the original dict-based parsing.
+    """
     # Extract response info from initial metadata
-    # For gRPC, LSN headers are in initial_metadata
     from pinecone.utils.response_info import extract_response_info
 
     metadata = initial_metadata or {}
     response_info = extract_response_info(metadata)
 
-    query_response = QueryResponse(**args, _response_info=response_info)
-    return query_response
+    if isinstance(response, Message):
+        # Optimized path: directly access protobuf fields
+        matches = []
+        # namespace is a required string field, so it will always have a value (default empty string)
+        namespace = response.namespace
+
+        # Iterate over matches directly
+        for match in response.matches:
+            # Convert match.values (RepeatedScalarFieldContainer) to list
+            values = list(match.values) if match.values else []
+
+            # Handle sparse_values if present (check if field is set and not empty)
+            parsed_sparse = None
+            if match.HasField("sparse_values") and match.sparse_values:
+                parsed_sparse = SparseValues(
+                    indices=list(match.sparse_values.indices),
+                    values=list(match.sparse_values.values),
+                )
+
+            # Convert metadata Struct to dict only when needed
+            metadata_dict = None
+            if match.HasField("metadata") and match.metadata:
+                metadata_dict = json_format.MessageToDict(match.metadata)
+
+            sc = ScoredVector(
+                id=match.id,
+                score=match.score,
+                values=values,
+                sparse_values=parsed_sparse,
+                metadata=metadata_dict,
+                _check_type=_check_type,
+            )
+            matches.append(sc)
+
+        # Parse usage if present (usage is optional, so check HasField)
+        usage = None
+        if response.HasField("usage") and response.usage:
+            usage = parse_usage({"readUnits": response.usage.read_units})
+
+        query_response = QueryResponse(
+            namespace=namespace, matches=matches, usage=usage, _response_info=response_info
+        )
+        return query_response
+    else:
+        # Fallback for dict responses (REST API)
+        json_response = response
+
+        matches = []
+        for item in json_response.get("matches", []):
+            sc = ScoredVector(
+                id=item["id"],
+                score=item.get("score", 0.0),
+                values=item.get("values", []),
+                sparse_values=parse_sparse_values(item.get("sparseValues")),
+                metadata=item.get("metadata", None),
+                _check_type=_check_type,
+            )
+            matches.append(sc)
+
+        # Due to OpenAPI model classes / actual parsing cost, we want to avoid
+        # creating empty `Usage` objects and then passing them into QueryResponse
+        # when they are not actually present in the response from the server.
+        args = {"namespace": json_response.get("namespace", ""), "matches": matches}
+        usage = json_response.get("usage")
+        if usage:
+            args["usage"] = parse_usage(usage)
+
+        query_response = QueryResponse(**args, _response_info=response_info)
+        return query_response
 
 
 def parse_stats_response(response: dict) -> "DescribeIndexStatsResponse":
diff --git a/tests/perf/test_grpc_parsing_perf.py b/tests/perf/test_grpc_parsing_perf.py
@@ -0,0 +1,162 @@
+"""Performance benchmarks for gRPC response parsing functions.
+
+These tests measure the performance of parse_fetch_response and parse_query_response
+to establish baselines and verify optimizations.
+"""
+
+import random
+import pytest
+from google.protobuf import struct_pb2
+
+from pinecone.core.grpc.protos.db_data_2025_10_pb2 import (
+    FetchResponse,
+    QueryResponse,
+    Vector,
+    ScoredVector,
+    SparseValues,
+    Usage,
+)
+from pinecone.grpc.utils import parse_fetch_response, parse_query_response
+
+
+def create_vector(id: str, dimension: int, include_sparse: bool = False) -> Vector:
+    """Create a Vector protobuf message with random values."""
+    values = [random.random() for _ in range(dimension)]
+
+    # Create sparse values if needed
+    sparse_values_obj = None
+    if include_sparse:
+        # Create sparse values with ~10% of dimension as non-zero
+        sparse_size = max(1, dimension // 10)
+        indices = sorted(random.sample(range(dimension), sparse_size))
+        sparse_values_list = [random.random() for _ in range(sparse_size)]
+        sparse_values_obj = SparseValues(indices=indices, values=sparse_values_list)
+
+    # Add some metadata
+    metadata = struct_pb2.Struct()
+    metadata.update({"category": f"cat_{random.randint(1, 10)}", "score": random.random()})
+
+    # Create vector with all fields
+    if sparse_values_obj:
+        vector = Vector(id=id, values=values, sparse_values=sparse_values_obj, metadata=metadata)
+    else:
+        vector = Vector(id=id, values=values, metadata=metadata)
+
+    return vector
+
+
+def create_scored_vector(id: str, dimension: int, include_sparse: bool = False) -> ScoredVector:
+    """Create a ScoredVector protobuf message with random values."""
+    values = [random.random() for _ in range(dimension)]
+
+    # Create sparse values if needed
+    sparse_values_obj = None
+    if include_sparse:
+        # Create sparse values with ~10% of dimension as non-zero
+        sparse_size = max(1, dimension // 10)
+        indices = sorted(random.sample(range(dimension), sparse_size))
+        sparse_values_list = [random.random() for _ in range(sparse_size)]
+        sparse_values_obj = SparseValues(indices=indices, values=sparse_values_list)
+
+    # Add some metadata
+    metadata = struct_pb2.Struct()
+    metadata.update({"category": f"cat_{random.randint(1, 10)}", "score": random.random()})
+
+    # Create scored vector with all fields
+    if sparse_values_obj:
+        scored_vector = ScoredVector(
+            id=id,
+            score=random.random(),
+            values=values,
+            sparse_values=sparse_values_obj,
+            metadata=metadata,
+        )
+    else:
+        scored_vector = ScoredVector(id=id, score=random.random(), values=values, metadata=metadata)
+
+    return scored_vector
+
+
+def create_fetch_response(
+    num_vectors: int, dimension: int, include_sparse: bool = False
+) -> FetchResponse:
+    """Create a FetchResponse protobuf message with specified number of vectors."""
+    vectors = {}
+    for i in range(num_vectors):
+        vector = create_vector(f"vec_{i}", dimension, include_sparse)
+        vectors[f"vec_{i}"] = vector
+
+    return FetchResponse(
+        vectors=vectors, namespace="test_namespace", usage=Usage(read_units=num_vectors)
+    )
+
+
+def create_query_response(
+    num_matches: int, dimension: int, include_sparse: bool = False
+) -> QueryResponse:
+    """Create a QueryResponse protobuf message with specified number of matches."""
+    matches = [
+        create_scored_vector(f"match_{i}", dimension, include_sparse) for i in range(num_matches)
+    ]
+
+    return QueryResponse(
+        matches=matches, namespace="test_namespace", usage=Usage(read_units=num_matches)
+    )
+
+
+class TestFetchResponseParsingPerf:
+    """Performance benchmarks for parse_fetch_response."""
+
+    @pytest.mark.parametrize(
+        "num_vectors,dimension",
+        [
+            (10, 128),
+            (10, 512),
+            (10, 1024),
+            (100, 128),
+            (100, 512),
+            (100, 1024),
+            (1000, 128),
+            (1000, 512),
+            (1000, 1024),
+        ],
+    )
+    def test_parse_fetch_response_dense(self, benchmark, num_vectors, dimension):
+        """Benchmark parse_fetch_response with dense vectors."""
+        response = create_fetch_response(num_vectors, dimension, include_sparse=False)
+        benchmark(parse_fetch_response, response, None)
+
+    @pytest.mark.parametrize("num_vectors,dimension", [(10, 128), (100, 128), (1000, 128)])
+    def test_parse_fetch_response_sparse(self, benchmark, num_vectors, dimension):
+        """Benchmark parse_fetch_response with sparse vectors."""
+        response = create_fetch_response(num_vectors, dimension, include_sparse=True)
+        benchmark(parse_fetch_response, response, None)
+
+
+class TestQueryResponseParsingPerf:
+    """Performance benchmarks for parse_query_response."""
+
+    @pytest.mark.parametrize(
+        "num_matches,dimension",
+        [
+            (10, 128),
+            (10, 512),
+            (10, 1024),
+            (100, 128),
+            (100, 512),
+            (100, 1024),
+            (1000, 128),
+            (1000, 512),
+            (1000, 1024),
+        ],
+    )
+    def test_parse_query_response_dense(self, benchmark, num_matches, dimension):
+        """Benchmark parse_query_response with dense vectors."""
+        response = create_query_response(num_matches, dimension, include_sparse=False)
+        benchmark(parse_query_response, response, False, None)
+
+    @pytest.mark.parametrize("num_matches,dimension", [(10, 128), (100, 128), (1000, 128)])
+    def test_parse_query_response_sparse(self, benchmark, num_matches, dimension):
+        """Benchmark parse_query_response with sparse vectors."""
+        response = create_query_response(num_matches, dimension, include_sparse=True)
+        benchmark(parse_query_response, response, False, None)