@@ -55,38 +55,49 @@ def parse_sparse_values(sparse_values: dict | None) -> SparseValues:
5555def parse_fetch_response (
5656 response : Message , initial_metadata : dict [str , str ] | None = None
5757) -> FetchResponse :
58- json_response = json_format .MessageToDict (response )
58+ """Parse a FetchResponse protobuf message directly without MessageToDict conversion.
59+
60+ This optimized version directly accesses protobuf fields for better performance.
61+ """
62+ # Extract response info from initial metadata
63+ from pinecone .utils .response_info import extract_response_info
5964
65+ metadata = initial_metadata or {}
66+ response_info = extract_response_info (metadata )
67+
68+ # Directly access protobuf fields instead of converting entire message to dict
6069 vd = {}
61- vectors = json_response . get ( "vectors" , {} )
62- namespace = json_response . get ( " namespace" , "" )
70+ # namespace is a required string field, so it will always have a value (default empty string )
71+ namespace = response . namespace
6372
64- for id , vec in vectors .items ():
65- # Convert to Vector dataclass
66- sparse_vals = vec .get ("sparseValues" )
73+ # Iterate over vectors map directly
74+ for vec_id , vec in response .vectors .items ():
75+ # Convert vector.values (RepeatedScalarFieldContainer) to list
76+ values = list (vec .values ) if vec .values else []
77+
78+ # Handle sparse_values if present (check if field is set and not empty)
6779 parsed_sparse = None
68- if sparse_vals :
80+ if vec . HasField ( "sparse_values" ) and vec . sparse_values :
6981 from pinecone .db_data .dataclasses import SparseValues
7082
7183 parsed_sparse = SparseValues (
72- indices = sparse_vals . get ( " indices" , [] ), values = sparse_vals . get ( " values" , [] )
84+ indices = list ( vec . sparse_values . indices ), values = list ( vec . sparse_values . values )
7385 )
74- vd [id ] = Vector (
75- id = vec ["id" ],
76- values = vec .get ("values" ) or [],
77- sparse_values = parsed_sparse ,
78- metadata = vec .get ("metadata" , None ),
79- )
8086
81- # Extract response info from initial metadata
82- from pinecone .utils .response_info import extract_response_info
87+ # Convert metadata Struct to dict only when needed
88+ metadata_dict = None
89+ if vec .HasField ("metadata" ) and vec .metadata :
90+ metadata_dict = json_format .MessageToDict (vec .metadata )
8391
84- metadata = initial_metadata or {}
85- response_info = extract_response_info (metadata )
92+ vd [vec_id ] = Vector (
93+ id = vec .id , values = values , sparse_values = parsed_sparse , metadata = metadata_dict
94+ )
8695
96+ # Parse usage if present (usage is optional, so check HasField)
8797 usage = None
88- if json_response .get ("usage" ):
89- usage = parse_usage (json_response .get ("usage" , {}))
98+ if response .HasField ("usage" ) and response .usage :
99+ usage = parse_usage ({"readUnits" : response .usage .read_units })
100+
90101 fetch_response = FetchResponse (
91102 vectors = vd , namespace = namespace , usage = usage , _response_info = response_info
92103 )
@@ -204,40 +215,86 @@ def parse_query_response(
204215 _check_type : bool = False ,
205216 initial_metadata : dict [str , str ] | None = None ,
206217) -> QueryResponse :
207- if isinstance (response , Message ):
208- json_response = json_format .MessageToDict (response )
209- else :
210- json_response = response
211-
212- matches = []
213- for item in json_response .get ("matches" , []):
214- sc = ScoredVector (
215- id = item ["id" ],
216- score = item .get ("score" , 0.0 ),
217- values = item .get ("values" , []),
218- sparse_values = parse_sparse_values (item .get ("sparseValues" )),
219- metadata = item .get ("metadata" , None ),
220- _check_type = _check_type ,
221- )
222- matches .append (sc )
223-
224- # Due to OpenAPI model classes / actual parsing cost, we want to avoid
225- # creating empty `Usage` objects and then passing them into QueryResponse
226- # when they are not actually present in the response from the server.
227- args = {"namespace" : json_response .get ("namespace" , "" ), "matches" : matches }
228- usage = json_response .get ("usage" )
229- if usage :
230- args ["usage" ] = parse_usage (usage )
218+ """Parse a QueryResponse protobuf message directly without MessageToDict conversion.
231219
220+ This optimized version directly accesses protobuf fields for better performance.
221+ For dict responses (REST API), falls back to the original dict-based parsing.
222+ """
232223 # Extract response info from initial metadata
233- # For gRPC, LSN headers are in initial_metadata
234224 from pinecone .utils .response_info import extract_response_info
235225
236226 metadata = initial_metadata or {}
237227 response_info = extract_response_info (metadata )
238228
239- query_response = QueryResponse (** args , _response_info = response_info )
240- return query_response
229+ if isinstance (response , Message ):
230+ # Optimized path: directly access protobuf fields
231+ matches = []
232+ # namespace is a required string field, so it will always have a value (default empty string)
233+ namespace = response .namespace
234+
235+ # Iterate over matches directly
236+ for match in response .matches :
237+ # Convert match.values (RepeatedScalarFieldContainer) to list
238+ values = list (match .values ) if match .values else []
239+
240+ # Handle sparse_values if present (check if field is set and not empty)
241+ parsed_sparse = None
242+ if match .HasField ("sparse_values" ) and match .sparse_values :
243+ parsed_sparse = SparseValues (
244+ indices = list (match .sparse_values .indices ),
245+ values = list (match .sparse_values .values ),
246+ )
247+
248+ # Convert metadata Struct to dict only when needed
249+ metadata_dict = None
250+ if match .HasField ("metadata" ) and match .metadata :
251+ metadata_dict = json_format .MessageToDict (match .metadata )
252+
253+ sc = ScoredVector (
254+ id = match .id ,
255+ score = match .score ,
256+ values = values ,
257+ sparse_values = parsed_sparse ,
258+ metadata = metadata_dict ,
259+ _check_type = _check_type ,
260+ )
261+ matches .append (sc )
262+
263+ # Parse usage if present (usage is optional, so check HasField)
264+ usage = None
265+ if response .HasField ("usage" ) and response .usage :
266+ usage = parse_usage ({"readUnits" : response .usage .read_units })
267+
268+ query_response = QueryResponse (
269+ namespace = namespace , matches = matches , usage = usage , _response_info = response_info
270+ )
271+ return query_response
272+ else :
273+ # Fallback for dict responses (REST API)
274+ json_response = response
275+
276+ matches = []
277+ for item in json_response .get ("matches" , []):
278+ sc = ScoredVector (
279+ id = item ["id" ],
280+ score = item .get ("score" , 0.0 ),
281+ values = item .get ("values" , []),
282+ sparse_values = parse_sparse_values (item .get ("sparseValues" )),
283+ metadata = item .get ("metadata" , None ),
284+ _check_type = _check_type ,
285+ )
286+ matches .append (sc )
287+
288+ # Due to OpenAPI model classes / actual parsing cost, we want to avoid
289+ # creating empty `Usage` objects and then passing them into QueryResponse
290+ # when they are not actually present in the response from the server.
291+ args = {"namespace" : json_response .get ("namespace" , "" ), "matches" : matches }
292+ usage = json_response .get ("usage" )
293+ if usage :
294+ args ["usage" ] = parse_usage (usage )
295+
296+ query_response = QueryResponse (** args , _response_info = response_info )
297+ return query_response
241298
242299
243300def parse_stats_response (response : dict ) -> "DescribeIndexStatsResponse" :
0 commit comments