You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# Handles edge case where document is uploaded with a specific user-generated ID, then the identical content is uploaded with a hash generated ID.
132
+
logger.warning(
133
+
f"""Documents may be ready, the search has found identical content with a different ID and {"identical"ifquery_result[0][0].get("metadata") ==doc.get("metadata") else"different"} metadata. Duplicate ID: {str(query_result[0][0]["_id"])}"""
134
+
)
135
+
else:
136
+
raiseTimeoutError(f"Document {self.index_name} is not ready!")
For large numbers of Documents, insertion is performed in batches.
277
286
287
+
Documents are recommended to not have an ID field, as the method will generate Hashed ID's for them.
288
+
278
289
Args:
279
-
docs: List[Document] | A list of documents. Each document is a TypedDict `Document`.
290
+
docs: List[Document] | A list of documents. Each document is a TypedDict `Document`, which may contain an ID. Documents without ID's will have them generated.
280
291
collection_name: str | The name of the collection. Default is None.
281
292
upsert: bool | Whether to update the document if it exists. Default is False.
282
293
batch_size: Number of documents to be inserted in each batch
294
+
kwargs: Additional keyword arguments. Use `hash_length` to set the length of the hash generated ID's, use `overwrite_ids` to overwrite existing ID's with Hashed Values.
283
295
"""
296
+
hash_length=kwargs.get("hash_length")
297
+
overwrite_ids=kwargs.get("overwrite_ids", False)
298
+
299
+
ifany(doc.get("content") isNonefordocindocs):
300
+
raiseValueError("The document content is required.")
301
+
284
302
ifnotdocs:
285
303
logger.info("No documents to insert.")
286
304
return
287
305
306
+
docs=deepcopy(docs)
288
307
collection=self.get_collection(collection_name)
308
+
309
+
assert (
310
+
len({doc.get("id") isNonefordocindocs}) ==1
311
+
), "Documents provided must all have ID's or all not have ID's"
312
+
313
+
ifdocs[0].get("id") isNoneoroverwrite_ids:
314
+
logger.info("No id field in the documents. The documents will be inserted with Hash generated IDs.")
0 commit comments