diff --git a/langchain/vectorstores/__init__.py b/langchain/vectorstores/__init__.py index 30d1ca7ecdc3e..ed3982ad7e1ab 100644 --- a/langchain/vectorstores/__init__.py +++ b/langchain/vectorstores/__init__.py @@ -7,6 +7,8 @@ from langchain.vectorstores.deeplake import DeepLake from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch from langchain.vectorstores.faiss import FAISS +from langchain.vectorstores.hnsw_lib import HnswLib +from langchain.vectorstores.in_memory_exact_search import InMemoryExactSearch from langchain.vectorstores.milvus import Milvus from langchain.vectorstores.myscale import MyScale, MyScaleSettings from langchain.vectorstores.opensearch_vector_search import OpenSearchVectorSearch @@ -34,4 +36,6 @@ "MyScaleSettings", "SupabaseVectorStore", "AnalyticDB", + "HnswLib", + "InMemoryExactSearch", ] diff --git a/langchain/vectorstores/hnsw_lib.py b/langchain/vectorstores/hnsw_lib.py new file mode 100644 index 0000000000000..2857248f0f5aa --- /dev/null +++ b/langchain/vectorstores/hnsw_lib.py @@ -0,0 +1,141 @@ +"""Wrapper around HnswLib store.""" +from __future__ import annotations + +from typing import List, Optional, Type + +from langchain.embeddings.base import Embeddings +from langchain.vectorstores.base import VST +from langchain.vectorstores.vector_store_from_doc_index import ( + VecStoreFromDocIndex, + _check_docarray_import, +) + + +class HnswLib(VecStoreFromDocIndex): + """Wrapper around HnswLib storage. + + To use it, you should have the ``docarray[hnswlib]`` package with version >=0.31.0 installed. + You can install it with `pip install "langchain[hnswlib]"`. + """ + + def __init__( + self, + embedding: Embeddings, + work_dir: str, + n_dim: int, + dist_metric: str = "cosine", + max_elements: int = 1024, + index: bool = True, + ef_construction: int = 200, + ef: int = 10, + M: int = 16, + allow_replace_deleted: bool = True, + num_threads: int = 1, + ) -> None: + """Initialize HnswLib store. + + Args: + embedding (Embeddings): Embedding function. + work_dir (str): path to the location where all the data will be stored. + n_dim (int): dimension of an embedding. + dist_metric (str): Distance metric for HnswLib can be one of: "cosine", + "ip", and "l2". Defaults to "cosine". + max_elements (int): Maximum number of vectors that can be stored. + Defaults to 1024. + index (bool): Whether an index should be built for this field. + Defaults to True. + ef_construction (int): defines a construction time/accuracy trade-off. + Defaults to 200. + ef (int): parameter controlling query time/accuracy trade-off. + Defaults to 10. + M (int): parameter that defines the maximum number of outgoing + connections in the graph. Defaults to 16. + allow_replace_deleted (bool): Enables replacing of deleted elements + with new added ones. Defaults to True. + num_threads (int): Sets the number of cpu threads to use. Defaults to 1. + """ + _check_docarray_import() + from docarray.index import HnswDocumentIndex + + try: + import google.protobuf + except ImportError: + raise ImportError( + "Could not import all required packages. " + "Please install it with `pip install \"langchain[hnswlib]\"`." + ) + + doc_cls = self._get_doc_cls( + { + "dim": n_dim, + "space": dist_metric, + "max_elements": max_elements, + "index": index, + "ef_construction": ef_construction, + "ef": ef, + "M": M, + "allow_replace_deleted": allow_replace_deleted, + "num_threads": num_threads, + } + ) + doc_index = HnswDocumentIndex[doc_cls](work_dir=work_dir) + super().__init__(doc_index, embedding) + + @classmethod + def from_texts( + cls: Type[VST], + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + work_dir: str = None, + n_dim: int = None, + dist_metric: str = "l2", + max_elements: int = 1024, + index: bool = True, + ef_construction: int = 200, + ef: int = 10, + M: int = 16, + allow_replace_deleted: bool = True, + num_threads: int = 1, + ) -> HnswLib: + """Create an HnswLib store and insert data. + + Args: + texts (List[str]): Text data. + embedding (Embeddings): Embedding function. + metadatas (Optional[List[dict]]): Metadata for each text if it exists. + Defaults to None. + work_dir (str): path to the location where all the data will be stored. + n_dim (int): dimension of an embedding. + dist_metric (str): Distance metric for HnswLib can be one of: "cosine", + "ip", and "l2". Defaults to "l2". + max_elements (int): Maximum number of vectors that can be stored. + Defaults to 1024. + index (bool): Whether an index should be built for this field. + Defaults to True. + ef_construction (int): defines a construction time/accuracy trade-off. + Defaults to 200. + ef (int): parameter controlling query time/accuracy trade-off. + Defaults to 10. + M (int): parameter that defines the maximum number of outgoing + connections in the graph. Defaults to 16. + allow_replace_deleted (bool): Enables replacing of deleted elements + with new added ones. Defaults to True. + num_threads (int): Sets the number of cpu threads to use. Defaults to 1. + + Returns: + HnswLib Vector Store + """ + if work_dir is None: + raise ValueError("`work_dir` parameter hs not been set.") + if n_dim is None: + raise ValueError("`n_dim` parameter has not been set.") + + store = cls( + work_dir=work_dir, + n_dim=n_dim, + embedding=embedding, + dist_metric=dist_metric, + ) + store.add_texts(texts=texts, metadatas=metadatas) + return store diff --git a/langchain/vectorstores/in_memory_exact_search.py b/langchain/vectorstores/in_memory_exact_search.py new file mode 100644 index 0000000000000..bbaabe7e11c6b --- /dev/null +++ b/langchain/vectorstores/in_memory_exact_search.py @@ -0,0 +1,68 @@ +"""Wrapper around in-memory storage.""" +from __future__ import annotations + +from typing import List, Optional, Type + +from langchain.embeddings.base import Embeddings +from langchain.vectorstores.base import VST +from langchain.vectorstores.vector_store_from_doc_index import ( + VecStoreFromDocIndex, + _check_docarray_import, +) + + +class InMemoryExactSearch(VecStoreFromDocIndex): + """Wrapper around in-memory storage for exact search. + + To use it, you should have the ``docarray`` package with version >=0.31.0 installed. + You can install it with `pip install "langchain[in_memory_store]"`. + """ + + def __init__( + self, + embedding: Embeddings, + metric: str = "cosine_sim", + ) -> None: + """Initialize InMemoryExactSearch store. + + Args: + embedding (Embeddings): Embedding function. + metric (str): metric for exact nearest-neighbor search. + Can be one of: "cosine_sim", "euclidean_dist" and "sqeuclidean_dist". + Defaults to "cosine_sim". + """ + _check_docarray_import() + from docarray.index import InMemoryExactNNIndex + + doc_cls = self._get_doc_cls({"space": metric}) + doc_index = InMemoryExactNNIndex[doc_cls]() + super().__init__(doc_index, embedding) + + @classmethod + def from_texts( + cls: Type[VST], + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + metric: str = "cosine_sim", + ) -> InMemoryExactSearch: + """Create an InMemoryExactSearch store and insert data. + + Args: + texts (List[str]): Text data. + embedding (Embeddings): Embedding function. + metadatas (Optional[List[dict]]): Metadata for each text if it exists. + Defaults to None. + metric (str): metric for exact nearest-neighbor search. + Can be one of: "cosine_sim", "euclidean_dist" and "sqeuclidean_dist". + Defaults to "cosine_sim". + + Returns: + InMemoryExactSearch Vector Store + """ + store = cls( + embedding=embedding, + metric=metric, + ) + store.add_texts(texts=texts, metadatas=metadatas) + return store diff --git a/langchain/vectorstores/vector_store_from_doc_index.py b/langchain/vectorstores/vector_store_from_doc_index.py new file mode 100644 index 0000000000000..a471bfe1cd703 --- /dev/null +++ b/langchain/vectorstores/vector_store_from_doc_index.py @@ -0,0 +1,189 @@ +from operator import itemgetter +from typing import Any, Dict, Iterable, List, Optional, Tuple, Type + +try: + from docarray import BaseDoc + from docarray.index.abstract import BaseDocIndex + from docarray.typing import NdArray +except ImportError: + BaseDoc = None + BaseDocIndex = None + NdArray = None + +from langchain.embeddings.base import Embeddings +from langchain.schema import Document +from langchain.vectorstores import VectorStore +from langchain.vectorstores.utils import maximal_marginal_relevance + + +def _check_docarray_import() -> None: + try: + import docarray + + da_version = docarray.__version__.split(".") + if int(da_version[0]) == 0 and int(da_version[1]) <= 30: + raise ValueError( + f"To use the HnswLib VectorStore the docarray version >=0.31.0 is expected, " + f"received: {docarray.__version__}." + f"To upgrade, please run: `pip install -U docarray`." + ) + except ImportError: + raise ImportError( + "Could not import docarray python package. " + "Please install it with `pip install \"langchain[docarray]\"`." + ) + + +class VecStoreFromDocIndex(VectorStore): + doc_index: BaseDocIndex + doc_cls: Type[BaseDoc] + embedding: Embeddings + + def __init__( + self, + doc_index: BaseDocIndex, + embedding: Embeddings, + ): + """Initialize a vector store from DocArray's DocIndex.""" + self.doc_index = doc_index + self.doc_cls = doc_index._schema + self.embedding = embedding + + @staticmethod + def _get_doc_cls(embeddings_params: Dict[str, Any]) -> Type[BaseDoc]: + """Get docarray Document class describing the schema of DocIndex.""" + from docarray import BaseDoc + from pydantic import Field + + class DocArrayDoc(BaseDoc): + text: Optional[str] + embedding: Optional[NdArray] = Field(**embeddings_params) + metadata: Optional[dict] + + return DocArrayDoc + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> List[str]: + """Run more texts through the embeddings and add to the vectorstore. + + Args: + texts: Iterable of strings to add to the vectorstore. + metadatas: Optional list of metadatas associated with the texts. + + Returns: + List of ids from adding the texts into the vectorstore. + """ + if metadatas is None: + metadatas = [{} for _ in range(len(list(texts)))] + + ids: List[str] = [] + embeddings = self.embedding.embed_documents(texts) + for t, m, e in zip(texts, metadatas, embeddings): + doc = self.doc_cls(text=t, embedding=e, metadata=m) + self.doc_index.index([doc]) + ids.append(str(doc.id)) + + return ids + + def similarity_search_with_score( + self, query: str, k: int = 4, **kwargs: Any + ) -> List[Tuple[Document, float]]: + """Return docs most similar to query. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + + Returns: + List of Documents most similar to the query and score for each. + """ + query_embedding = self.embedding.embed_query(query) + query_doc = self.doc_cls(embedding=query_embedding) + docs, scores = self.doc_index.find(query_doc, search_field="embedding", limit=k) + + result = [ + (Document(page_content=doc.text), score) for doc, score in zip(docs, scores) + ] + return result + + def similarity_search( + self, query: str, k: int = 4, **kwargs: Any + ) -> List[Document]: + """Return docs most similar to query. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + + Returns: + List of Documents most similar to the query. + """ + results = self.similarity_search_with_score(query, k) + return list(map(itemgetter(0), results)) + + def _similarity_search_with_relevance_scores( + self, + query: str, + k: int = 4, + **kwargs: Any, + ) -> List[Tuple[Document, float]]: + """Return docs and relevance scores, normalized on a scale from 0 to 1. + + 0 is dissimilar, 1 is most similar. + """ + raise NotImplementedError + + def similarity_search_by_vector( + self, embedding: List[float], k: int = 4, **kwargs: Any + ) -> List[Document]: + """Return docs most similar to embedding vector. + + Args: + embedding: Embedding to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + + Returns: + List of Documents most similar to the query vector. + """ + + query_doc = self.doc_cls(embedding=embedding) + docs = self.doc_index.find( + query_doc, search_field="embedding", limit=k + ).documents + + result = [Document(page_content=doc.text) for doc in docs] + return result + + def max_marginal_relevance_search( + self, query: str, k: int = 4, fetch_k: int = 20, **kwargs: Any + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance. + + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + fetch_k: Number of Documents to fetch to pass to MMR algorithm. + + Returns: + List of Documents selected by maximal marginal relevance. + """ + query_embedding = self.embedding.embed_query(query) + query_doc = self.doc_cls(embedding=query_embedding) + + docs = self.doc_index.find( + query_doc, search_field="embedding", limit=fetch_k + ).documents + + mmr_selected = maximal_marginal_relevance(query_embedding, docs.embedding, k=k) + results = [ + Document(page_content=docs[idx].text, metadata=docs[idx].metadata) + for idx in mmr_selected + ] + return results diff --git a/poetry.lock b/poetry.lock index 1138b9196a649..4109d28490ed9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. +# This file is automatically @generated by Poetry and should not be changed by hand. [[package]] name = "absl-py" @@ -1515,32 +1515,40 @@ wmi = ["wmi (>=1.5.1,<2.0.0)"] [[package]] name = "docarray" -version = "0.21.0" -description = "The data structure for unstructured data" +version = "0.31.0.dev35" +description = "The data structure for multimodal data" category = "main" optional = true -python-versions = "*" +python-versions = ">=3.7,<4.0" files = [ - {file = "docarray-0.21.0.tar.gz", hash = "sha256:3c9f605123800c1b0cdf8c458be3fb19c05e9a81f723e51200ef531b02e689ee"}, + {file = "docarray-0.31.0.dev35-py3-none-any.whl", hash = "sha256:a5c578cbf69853dddd17e845cc3fb2250cb1a0800ef48082d2a40a38bc9a7165"}, + {file = "docarray-0.31.0.dev35.tar.gz", hash = "sha256:f918cc5c35ed2df9b9ad7ef0abcc0bf5f3fe38a8f9e33526a33293d26a956f2e"}, ] [package.dependencies] -jina-hubble-sdk = ">=0.24.0" -numpy = "*" -rich = ">=12.0.0" - -[package.extras] -annlite = ["annlite"] -benchmark = ["h5py", "matplotlib", "pandas", "seaborn"] -common = ["Pillow", "fastapi", "lz4", "matplotlib", "protobuf (>=3.13.0)", "pydantic (>=1.9.0)", "requests", "uvicorn"] -elasticsearch = ["elasticsearch (>=8.2.0)"] -full = ["Pillow", "av", "fastapi", "grpcio (>=1.46.0,<1.48.1)", "grpcio-health-checking (>=1.46.0,<1.48.1)", "grpcio-reflection (>=1.46.0,<1.48.1)", "ipython", "lz4", "matplotlib", "protobuf (>=3.13.0)", "pydantic (>=1.9.0)", "requests", "scipy", "strawberry-graphql", "trimesh[easy]", "uvicorn"] -milvus = ["pymilvus (>=2.1.0,<2.2.0)"] -opensearch = ["opensearch-py (==2.0.1)"] -qdrant = ["qdrant-client (>=0.10.3,<0.11.0)"] -redis = ["redis (>=4.3.0)"] -test = ["annlite", "black (==22.3.0)", "datasets", "elasticsearch (>=8.2.0)", "jina", "jupyterlab", "mock", "onnx", "onnxruntime", "opensearch-py (==2.0.1)", "paddlepaddle", "protobuf (>=3.13.0,<=3.20.0)", "pymilvus (==2.1.3)", "pytest", "pytest-cov (==3.0.0)", "pytest-custom_exit_code", "pytest-mock", "pytest-mock", "pytest-repeat", "pytest-reraise", "pytest-timeout", "redis (>=4.3.0)", "tensorflow (==2.7.0)", "torch (==1.9.0)", "torchvision (==0.10.0)", "transformers (>=4.16.2)", "weaviate-client (>=3.9.0,<3.10.0)"] -weaviate = ["weaviate-client (>=3.9.0,<3.10.0)"] +numpy = ">=1.17.3" +orjson = ">=3.8.2" +pydantic = ">=1.10.2" +rich = ">=13.1.0" +types-requests = ">=2.28.11.6" +typing-inspect = ">=0.8.0" + +[package.extras] +audio = ["pydub (>=0.25.1,<0.26.0)"] +aws = ["smart-open[s3] (>=6.3.0)"] +elasticsearch = ["elastic-transport (>=8.4.0,<9.0.0)", "elasticsearch (>=7.10.1)"] +full = ["av (>=10.0.0)", "lz4 (>=1.0.0)", "pandas (>=1.1.0)", "pillow (>=9.3.0)", "protobuf (>=3.19.0)", "pydub (>=0.25.1,<0.26.0)", "trimesh[easy] (>=3.17.1)", "types-pillow (>=9.3.0.1)"] +hnswlib = ["hnswlib (>=0.6.2)"] +image = ["pillow (>=9.3.0)", "types-pillow (>=9.3.0.1)"] +jac = ["jina-hubble-sdk (>=0.34.0)"] +mesh = ["trimesh[easy] (>=3.17.1)"] +pandas = ["pandas (>=1.1.0)"] +proto = ["lz4 (>=1.0.0)", "protobuf (>=3.19.0)"] +qdrant = ["qdrant-client (>=1.1.4)"] +torch = ["torch (>=1.0.0)"] +video = ["av (>=10.0.0)"] +weaviate = ["weaviate-client (>=3.15)"] +web = ["fastapi (>=0.87.0)"] [[package]] name = "docker" @@ -2018,26 +2026,24 @@ files = [ [[package]] name = "google-api-core" -version = "2.11.0" +version = "2.8.2" description = "Google API client core library" category = "main" optional = true -python-versions = ">=3.7" +python-versions = ">=3.6" files = [ - {file = "google-api-core-2.11.0.tar.gz", hash = "sha256:4b9bb5d5a380a0befa0573b302651b8a9a89262c1730e37bf423cec511804c22"}, - {file = "google_api_core-2.11.0-py3-none-any.whl", hash = "sha256:ce222e27b0de0d7bc63eb043b956996d6dccab14cc3b690aaea91c9cc99dc16e"}, + {file = "google-api-core-2.8.2.tar.gz", hash = "sha256:06f7244c640322b508b125903bb5701bebabce8832f85aba9335ec00b3d02edc"}, + {file = "google_api_core-2.8.2-py3-none-any.whl", hash = "sha256:93c6a91ccac79079ac6bbf8b74ee75db970cc899278b97d53bc012f35908cf50"}, ] [package.dependencies] -google-auth = ">=2.14.1,<3.0dev" +google-auth = ">=1.25.0,<3.0dev" googleapis-common-protos = ">=1.56.2,<2.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" +protobuf = ">=3.15.0,<5.0.0dev" requests = ">=2.18.0,<3.0.0dev" [package.extras] -grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0dev)", "grpcio-status (>=1.49.1,<2.0dev)"] -grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0dev)"] -grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0dev)"] +grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio-status (>=1.33.2,<2.0dev)"] [[package]] name = "google-api-python-client" @@ -2151,21 +2157,21 @@ requests = "*" [[package]] name = "googleapis-common-protos" -version = "1.59.0" +version = "1.56.4" description = "Common protobufs used in Google APIs" category = "main" optional = true python-versions = ">=3.7" files = [ - {file = "googleapis-common-protos-1.59.0.tar.gz", hash = "sha256:4168fcb568a826a52f23510412da405abd93f4d23ba544bb68d943b14ba3cb44"}, - {file = "googleapis_common_protos-1.59.0-py2.py3-none-any.whl", hash = "sha256:b287dc48449d1d41af0c69f4ea26242b5ae4c3d7249a38b0984c86a4caffff1f"}, + {file = "googleapis-common-protos-1.56.4.tar.gz", hash = "sha256:c25873c47279387cfdcbdafa36149887901d36202cb645a0e4f29686bf6e4417"}, + {file = "googleapis_common_protos-1.56.4-py2.py3-none-any.whl", hash = "sha256:8eb2cbc91b69feaf23e32452a7ae60e791e09967d81d4fcc7fc388182d1bd394"}, ] [package.dependencies] -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" +protobuf = ">=3.15.0,<5.0.0dev" [package.extras] -grpc = ["grpcio (>=1.44.0,<2.0.0dev)"] +grpc = ["grpcio (>=1.0.0,<2.0.0dev)"] [[package]] name = "gptcache" @@ -2483,7 +2489,7 @@ numpy = ">=1.14.5" name = "hnswlib" version = "0.7.0" description = "hnswlib" -category = "dev" +category = "main" optional = false python-versions = "*" files = [ @@ -2955,20 +2961,20 @@ testing = ["Django (<3.1)", "attrs", "colorama", "docopt", "pytest (<7.0.0)"] [[package]] name = "jina" -version = "3.15.0" +version = "3.14.1" description = "Build multimodal AI services via cloud native technologies · Neural Search · Generative AI · MLOps" category = "main" optional = true python-versions = "*" files = [ - {file = "jina-3.15.0.tar.gz", hash = "sha256:18a3be8ddca14ed66a554d8480a277bcb7620ebc6ae11352a9835c91865f9d1e"}, + {file = "jina-3.14.1.tar.gz", hash = "sha256:00b1f5995b13c9a49a2287bd534bd32eb8c05706064752035d569e616a15b411"}, ] [package.dependencies] aiofiles = "*" aiohttp = "*" aiostream = "*" -docarray = ">=0.16.4,<0.30.0" +docarray = ">=0.16.4" docker = "*" fastapi = ">=0.76.0" filelock = "*" @@ -3002,14 +3008,14 @@ websockets = "*" aiofiles = ["aiofiles"] aiohttp = ["aiohttp"] aiostream = ["aiostream"] -all = ["Pillow", "aiofiles", "aiohttp", "aiostream", "black (==22.3.0)", "bs4", "coverage (==6.2)", "docarray (>=0.16.4,<0.30.0)", "docker", "fastapi (>=0.76.0)", "filelock", "flaky", "grpcio (>=1.46.0,<1.48.1)", "grpcio-health-checking (>=1.46.0,<1.48.1)", "grpcio-reflection (>=1.46.0,<1.48.1)", "jcloud (>=0.0.35)", "jina-hubble-sdk (>=0.30.4)", "jsonschema", "kubernetes (>=18.20.0)", "mock", "numpy", "opentelemetry-api (>=1.12.0)", "opentelemetry-exporter-otlp (>=1.12.0)", "opentelemetry-exporter-otlp-proto-grpc (>=1.13.0)", "opentelemetry-exporter-prometheus (>=1.12.0rc1)", "opentelemetry-instrumentation-aiohttp-client (>=0.33b0)", "opentelemetry-instrumentation-fastapi (>=0.33b0)", "opentelemetry-instrumentation-grpc (>=0.35b0)", "opentelemetry-sdk (>=1.14.0)", "opentelemetry-test-utils (>=0.33b0)", "packaging (>=20.0)", "pathspec", "portforward (>=0.2.4,<0.4.3)", "prometheus-api-client (>=0.5.1)", "prometheus_client (>=0.12.0)", "protobuf (>=3.19.0)", "psutil", "pydantic", "pytest", "pytest-asyncio", "pytest-cov (==3.0.0)", "pytest-custom_exit_code", "pytest-kind (==22.11.1)", "pytest-lazy-fixture", "pytest-mock", "pytest-repeat", "pytest-reraise", "pytest-timeout", "python-multipart", "pyyaml (>=5.3.1)", "requests", "requests-mock", "scipy (>=1.6.1)", "sgqlc", "strawberry-graphql (>=0.96.0)", "tensorflow (>=2.0)", "torch", "uvicorn[standard]", "uvloop", "watchfiles (>=0.18.0)", "websockets"] +all = ["Pillow", "aiofiles", "aiohttp", "aiostream", "black (==22.3.0)", "bs4", "coverage (==6.2)", "docarray (>=0.16.4)", "docker", "fastapi (>=0.76.0)", "filelock", "flaky", "grpcio (>=1.46.0,<1.48.1)", "grpcio-health-checking (>=1.46.0,<1.48.1)", "grpcio-reflection (>=1.46.0,<1.48.1)", "jcloud (>=0.0.35)", "jina-hubble-sdk (>=0.30.4)", "jsonschema", "kubernetes (>=18.20.0)", "mock", "numpy", "opentelemetry-api (>=1.12.0)", "opentelemetry-exporter-otlp (>=1.12.0)", "opentelemetry-exporter-otlp-proto-grpc (>=1.13.0)", "opentelemetry-exporter-prometheus (>=1.12.0rc1)", "opentelemetry-instrumentation-aiohttp-client (>=0.33b0)", "opentelemetry-instrumentation-fastapi (>=0.33b0)", "opentelemetry-instrumentation-grpc (>=0.35b0)", "opentelemetry-sdk (>=1.14.0)", "opentelemetry-test-utils (>=0.33b0)", "packaging (>=20.0)", "pathspec", "portforward (>=0.2.4)", "prometheus-api-client (>=0.5.1)", "prometheus_client (>=0.12.0)", "protobuf (>=3.19.0)", "psutil", "pydantic", "pytest", "pytest-asyncio", "pytest-cov (==3.0.0)", "pytest-custom_exit_code", "pytest-kind (==22.11.1)", "pytest-lazy-fixture", "pytest-mock", "pytest-repeat", "pytest-reraise", "pytest-timeout", "python-multipart", "pyyaml (>=5.3.1)", "requests", "requests-mock", "scipy (>=1.6.1)", "sgqlc", "strawberry-graphql (>=0.96.0)", "tensorflow (>=2.0)", "torch", "uvicorn[standard]", "uvloop", "watchfiles (>=0.18.0)", "websockets"] black = ["black (==22.3.0)"] bs4 = ["bs4"] -cicd = ["bs4", "jsonschema", "portforward (>=0.2.4,<0.4.3)", "sgqlc", "strawberry-graphql (>=0.96.0)", "tensorflow (>=2.0)", "torch"] -core = ["aiostream", "docarray (>=0.16.4,<0.30.0)", "grpcio (>=1.46.0,<1.48.1)", "grpcio-health-checking (>=1.46.0,<1.48.1)", "grpcio-reflection (>=1.46.0,<1.48.1)", "jcloud (>=0.0.35)", "jina-hubble-sdk (>=0.30.4)", "numpy", "opentelemetry-api (>=1.12.0)", "opentelemetry-instrumentation-grpc (>=0.35b0)", "packaging (>=20.0)", "protobuf (>=3.19.0)", "pyyaml (>=5.3.1)"] +cicd = ["bs4", "jsonschema", "portforward (>=0.2.4)", "sgqlc", "strawberry-graphql (>=0.96.0)", "tensorflow (>=2.0)", "torch"] +core = ["docarray (>=0.16.4)", "grpcio (>=1.46.0,<1.48.1)", "grpcio-health-checking (>=1.46.0,<1.48.1)", "grpcio-reflection (>=1.46.0,<1.48.1)", "jcloud (>=0.0.35)", "jina-hubble-sdk (>=0.30.4)", "numpy", "opentelemetry-api (>=1.12.0)", "opentelemetry-instrumentation-grpc (>=0.35b0)", "packaging (>=20.0)", "protobuf (>=3.19.0)", "pyyaml (>=5.3.1)"] coverage = ["coverage (==6.2)"] -devel = ["aiofiles", "aiohttp", "docker", "fastapi (>=0.76.0)", "filelock", "opentelemetry-exporter-otlp (>=1.12.0)", "opentelemetry-exporter-otlp-proto-grpc (>=1.13.0)", "opentelemetry-exporter-prometheus (>=1.12.0rc1)", "opentelemetry-instrumentation-aiohttp-client (>=0.33b0)", "opentelemetry-instrumentation-fastapi (>=0.33b0)", "opentelemetry-sdk (>=1.14.0)", "pathspec", "prometheus_client (>=0.12.0)", "pydantic", "python-multipart", "requests", "sgqlc", "strawberry-graphql (>=0.96.0)", "uvicorn[standard]", "uvloop", "watchfiles (>=0.18.0)", "websockets"] -docarray = ["docarray (>=0.16.4,<0.30.0)"] +devel = ["aiofiles", "aiohttp", "aiostream", "docker", "fastapi (>=0.76.0)", "filelock", "opentelemetry-exporter-otlp (>=1.12.0)", "opentelemetry-exporter-otlp-proto-grpc (>=1.13.0)", "opentelemetry-exporter-prometheus (>=1.12.0rc1)", "opentelemetry-instrumentation-aiohttp-client (>=0.33b0)", "opentelemetry-instrumentation-fastapi (>=0.33b0)", "opentelemetry-sdk (>=1.14.0)", "pathspec", "prometheus_client (>=0.12.0)", "pydantic", "python-multipart", "requests", "sgqlc", "strawberry-graphql (>=0.96.0)", "uvicorn[standard]", "uvloop", "watchfiles (>=0.18.0)", "websockets"] +docarray = ["docarray (>=0.16.4)"] docker = ["docker"] fastapi = ["fastapi (>=0.76.0)"] filelock = ["filelock"] @@ -3036,7 +3042,7 @@ packaging = ["packaging (>=20.0)"] pathspec = ["pathspec"] perf = ["opentelemetry-exporter-otlp (>=1.12.0)", "opentelemetry-exporter-otlp-proto-grpc (>=1.13.0)", "opentelemetry-exporter-prometheus (>=1.12.0rc1)", "opentelemetry-instrumentation-aiohttp-client (>=0.33b0)", "opentelemetry-instrumentation-fastapi (>=0.33b0)", "opentelemetry-sdk (>=1.14.0)", "prometheus_client (>=0.12.0)", "uvloop"] pillow = ["Pillow"] -portforward = ["portforward (>=0.2.4,<0.4.3)"] +portforward = ["portforward (>=0.2.4)"] prometheus-api-client = ["prometheus-api-client (>=0.5.1)"] prometheus-client = ["prometheus_client (>=0.12.0)"] protobuf = ["protobuf (>=3.19.0)"] @@ -3058,7 +3064,7 @@ requests = ["requests"] requests-mock = ["requests-mock"] scipy = ["scipy (>=1.6.1)"] sgqlc = ["sgqlc"] -standard = ["aiofiles", "aiohttp", "docker", "fastapi (>=0.76.0)", "filelock", "opentelemetry-exporter-otlp (>=1.12.0)", "opentelemetry-exporter-prometheus (>=1.12.0rc1)", "opentelemetry-instrumentation-aiohttp-client (>=0.33b0)", "opentelemetry-instrumentation-fastapi (>=0.33b0)", "opentelemetry-sdk (>=1.14.0)", "pathspec", "prometheus_client (>=0.12.0)", "pydantic", "python-multipart", "requests", "uvicorn[standard]", "uvloop", "websockets"] +standard = ["aiofiles", "aiohttp", "aiostream", "docker", "fastapi (>=0.76.0)", "filelock", "opentelemetry-exporter-otlp (>=1.12.0)", "opentelemetry-exporter-prometheus (>=1.12.0rc1)", "opentelemetry-instrumentation-aiohttp-client (>=0.33b0)", "opentelemetry-instrumentation-fastapi (>=0.33b0)", "opentelemetry-sdk (>=1.14.0)", "pathspec", "prometheus_client (>=0.12.0)", "pydantic", "python-multipart", "requests", "uvicorn[standard]", "uvloop", "websockets"] standrad = ["opentelemetry-exporter-otlp-proto-grpc (>=1.13.0)"] strawberry-graphql = ["strawberry-graphql (>=0.96.0)"] tensorflow = ["tensorflow (>=2.0)"] @@ -4991,6 +4997,72 @@ numpy = ">=1.7" docs = ["numpydoc", "sphinx (==1.2.3)", "sphinx-rtd-theme", "sphinxcontrib-napoleon"] tests = ["pytest", "pytest-cov", "pytest-pep8"] +[[package]] +name = "orjson" +version = "3.8.10" +description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" +category = "main" +optional = true +python-versions = ">= 3.7" +files = [ + {file = "orjson-3.8.10-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:4dfe0651e26492d5d929bbf4322de9afbd1c51ac2e3947a7f78492b20359711d"}, + {file = "orjson-3.8.10-cp310-cp310-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:bc30de5c7b3a402eb59cc0656b8ee53ca36322fc52ab67739c92635174f88336"}, + {file = "orjson-3.8.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c08b426fae7b9577b528f99af0f7e0ff3ce46858dd9a7d1bf86d30f18df89a4c"}, + {file = "orjson-3.8.10-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bce970f293825e008dbf739268dfa41dfe583aa2a1b5ef4efe53a0e92e9671ea"}, + {file = "orjson-3.8.10-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9b23fb0264bbdd7218aa685cb6fc71f0dcecf34182f0a8596a3a0dff010c06f9"}, + {file = "orjson-3.8.10-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0826ad2dc1cea1547edff14ce580374f0061d853cbac088c71162dbfe2e52205"}, + {file = "orjson-3.8.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7bce6e61cea6426309259b04c6ee2295b3f823ea51a033749459fe2dd0423b2"}, + {file = "orjson-3.8.10-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:0b470d31244a6f647e5402aac7d2abaf7bb4f52379acf67722a09d35a45c9417"}, + {file = "orjson-3.8.10-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:48824649019a25d3e52f6454435cf19fe1eb3d05ee697e65d257f58ae3aa94d9"}, + {file = "orjson-3.8.10-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:faee89e885796a9cc493c930013fa5cfcec9bfaee431ddf00f0fbfb57166a8b3"}, + {file = "orjson-3.8.10-cp310-none-win_amd64.whl", hash = "sha256:3cfe32b1227fe029a5ad989fbec0b453a34e5e6d9a977723f7c3046d062d3537"}, + {file = "orjson-3.8.10-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:2073b62822738d6740bd2492f6035af5c2fd34aa198322b803dc0e70559a17b7"}, + {file = "orjson-3.8.10-cp311-cp311-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:b2c4faf20b6bb5a2d7ac0c16f58eb1a3800abcef188c011296d1dc2bb2224d48"}, + {file = "orjson-3.8.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c1825997232a324911d11c75d91e1e0338c7b723c149cf53a5fc24496c048a4"}, + {file = "orjson-3.8.10-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f7e85d4682f3ed7321d36846cad0503e944ea9579ef435d4c162e1b73ead8ac9"}, + {file = "orjson-3.8.10-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b8cdaacecb92997916603ab232bb096d0fa9e56b418ca956b9754187d65ca06"}, + {file = "orjson-3.8.10-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ddabc5e44702d13137949adee3c60b7091e73a664f6e07c7b428eebb2dea7bbf"}, + {file = "orjson-3.8.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27bb26e171e9cfdbec39c7ca4739b6bef8bd06c293d56d92d5e3a3fc017df17d"}, + {file = "orjson-3.8.10-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:1810e5446fe68d61732e9743592da0ec807e63972eef076d09e02878c2f5958e"}, + {file = "orjson-3.8.10-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:61e2e51cefe7ef90c4fbbc9fd38ecc091575a3ea7751d56fad95cbebeae2a054"}, + {file = "orjson-3.8.10-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f3e9ac9483c2b4cd794e760316966b7bd1e6afb52b0218f068a4e80c9b2db4f6"}, + {file = "orjson-3.8.10-cp311-none-win_amd64.whl", hash = "sha256:26aee557cf8c93b2a971b5a4a8e3cca19780573531493ce6573aa1002f5c4378"}, + {file = "orjson-3.8.10-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:11ae68f995a50724032af297c92f20bcde31005e0bf3653b12bff9356394615b"}, + {file = "orjson-3.8.10-cp37-cp37m-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:35d879b46b8029e1e01e9f6067928b470a4efa1ca749b6d053232b873c2dcf66"}, + {file = "orjson-3.8.10-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:345e41abd1d9e3ecfb554e1e75ff818cf42e268bd06ad25a96c34e00f73a327e"}, + {file = "orjson-3.8.10-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:45a5afc9cda6b8aac066dd50d8194432fbc33e71f7164f95402999b725232d78"}, + {file = "orjson-3.8.10-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad632dc330a7b39da42530c8d146f76f727d476c01b719dc6743c2b5701aaf6b"}, + {file = "orjson-3.8.10-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4bf2556ba99292c4dc550560384dd22e88b5cdbe6d98fb4e202e902b5775cf9f"}, + {file = "orjson-3.8.10-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b88afd662190f19c3bb5036a903589f88b1d2c2608fbb97281ce000db6b08897"}, + {file = "orjson-3.8.10-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:abce8d319aae800fd2d774db1106f926dee0e8a5ca85998fd76391fcb58ef94f"}, + {file = "orjson-3.8.10-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:e999abca892accada083f7079612307d94dd14cc105a699588a324f843216509"}, + {file = "orjson-3.8.10-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a3fdee68c4bb3c5d6f89ed4560f1384b5d6260e48fbf868bae1a245a3c693d4d"}, + {file = "orjson-3.8.10-cp37-none-win_amd64.whl", hash = "sha256:e5d7f82506212e047b184c06e4bcd48c1483e101969013623cebcf51cf12cad9"}, + {file = "orjson-3.8.10-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:d953e6c2087dcd990e794f8405011369ee11cf13e9aaae3172ee762ee63947f2"}, + {file = "orjson-3.8.10-cp38-cp38-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:81aa3f321d201bff0bd0f4014ea44e51d58a9a02d8f2b0eeab2cee22611be8e1"}, + {file = "orjson-3.8.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d27b6182f75896dd8c10ea0f78b9265a3454be72d00632b97f84d7031900dd4"}, + {file = "orjson-3.8.10-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1486600bc1dd1db26c588dd482689edba3d72d301accbe4301db4b2b28bd7aa4"}, + {file = "orjson-3.8.10-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:344ea91c556a2ce6423dc13401b83ab0392aa697a97fa4142c2c63a6fd0bbfef"}, + {file = "orjson-3.8.10-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:979f231e3bad1c835627eef1a30db12a8af58bfb475a6758868ea7e81897211f"}, + {file = "orjson-3.8.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fa3a26dcf0f5f2912a8ce8e87273e68b2a9526854d19fd09ea671b154418e88"}, + {file = "orjson-3.8.10-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:b6e79d8864794635974b18821b49a7f27859d17b93413d4603efadf2e92da7a5"}, + {file = "orjson-3.8.10-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ce49999bcbbc14791c61844bc8a69af44f5205d219be540e074660038adae6bf"}, + {file = "orjson-3.8.10-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c2ef690335b24f9272dbf6639353c1ffc3f196623a92b851063e28e9515cf7dd"}, + {file = "orjson-3.8.10-cp38-none-win_amd64.whl", hash = "sha256:5a0b1f4e4fa75e26f814161196e365fc0e1a16e3c07428154505b680a17df02f"}, + {file = "orjson-3.8.10-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:af7601a78b99f0515af2f8ab12c955c0072ffcc1e437fb2556f4465783a4d813"}, + {file = "orjson-3.8.10-cp39-cp39-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:6bbd7b3a3e2030b03c68c4d4b19a2ef5b89081cbb43c05fe2010767ef5e408db"}, + {file = "orjson-3.8.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4355c9aedfefe60904e8bd7901315ebbc8bb828f665e4c9bc94b1432e67cb6f7"}, + {file = "orjson-3.8.10-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b7b0ba074375e25c1594e770e2215941e2017c3cd121889150737fa1123e8bfe"}, + {file = "orjson-3.8.10-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:34b6901c110c06ab9e8d7d0496db4bc9a0c162ca8d77f67539d22cb39e0a1ef4"}, + {file = "orjson-3.8.10-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cb62ec16a1c26ad9487727b529103cb6a94a1d4969d5b32dd0eab5c3f4f5a6f2"}, + {file = "orjson-3.8.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:595e1e7d04aaaa3d41113e4eb9f765ab642173c4001182684ae9ddc621bb11c8"}, + {file = "orjson-3.8.10-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:64ffd92328473a2f9af059410bd10c703206a4bbc7b70abb1bedcd8761e39eb8"}, + {file = "orjson-3.8.10-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b1f648ec89c6a426098868460c0ef8c86b457ce1378d7569ff4acb6c0c454048"}, + {file = "orjson-3.8.10-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6a286ad379972e4f46579e772f0477e6b505f1823aabcd64ef097dbb4549e1a4"}, + {file = "orjson-3.8.10-cp39-none-win_amd64.whl", hash = "sha256:d2874cee6856d7c386b596e50bc517d1973d73dc40b2bd6abec057b5e7c76b2f"}, + {file = "orjson-3.8.10.tar.gz", hash = "sha256:dcf6adb4471b69875034afab51a14b64f1026bc968175a2bb02c5f6b358bd413"}, +] + [[package]] name = "packaging" version = "23.1" @@ -5546,37 +5618,36 @@ requests = "*" [[package]] name = "protobuf" -version = "3.19.6" +version = "3.19.0" description = "Protocol Buffers" category = "main" optional = true python-versions = ">=3.5" files = [ - {file = "protobuf-3.19.6-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:010be24d5a44be7b0613750ab40bc8b8cedc796db468eae6c779b395f50d1fa1"}, - {file = "protobuf-3.19.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11478547958c2dfea921920617eb457bc26867b0d1aa065ab05f35080c5d9eb6"}, - {file = "protobuf-3.19.6-cp310-cp310-win32.whl", hash = "sha256:559670e006e3173308c9254d63facb2c03865818f22204037ab76f7a0ff70b5f"}, - {file = "protobuf-3.19.6-cp310-cp310-win_amd64.whl", hash = "sha256:347b393d4dd06fb93a77620781e11c058b3b0a5289262f094379ada2920a3730"}, - {file = "protobuf-3.19.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a8ce5ae0de28b51dff886fb922012dad885e66176663950cb2344c0439ecb473"}, - {file = "protobuf-3.19.6-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90b0d02163c4e67279ddb6dc25e063db0130fc299aefabb5d481053509fae5c8"}, - {file = "protobuf-3.19.6-cp36-cp36m-win32.whl", hash = "sha256:30f5370d50295b246eaa0296533403961f7e64b03ea12265d6dfce3a391d8992"}, - {file = "protobuf-3.19.6-cp36-cp36m-win_amd64.whl", hash = "sha256:0c0714b025ec057b5a7600cb66ce7c693815f897cfda6d6efb58201c472e3437"}, - {file = "protobuf-3.19.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5057c64052a1f1dd7d4450e9aac25af6bf36cfbfb3a1cd89d16393a036c49157"}, - {file = "protobuf-3.19.6-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:bb6776bd18f01ffe9920e78e03a8676530a5d6c5911934c6a1ac6eb78973ecb6"}, - {file = "protobuf-3.19.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84a04134866861b11556a82dd91ea6daf1f4925746b992f277b84013a7cc1229"}, - {file = "protobuf-3.19.6-cp37-cp37m-win32.whl", hash = "sha256:4bc98de3cdccfb5cd769620d5785b92c662b6bfad03a202b83799b6ed3fa1fa7"}, - {file = "protobuf-3.19.6-cp37-cp37m-win_amd64.whl", hash = "sha256:aa3b82ca1f24ab5326dcf4ea00fcbda703e986b22f3d27541654f749564d778b"}, - {file = "protobuf-3.19.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2b2d2913bcda0e0ec9a784d194bc490f5dc3d9d71d322d070b11a0ade32ff6ba"}, - {file = "protobuf-3.19.6-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:d0b635cefebd7a8a0f92020562dead912f81f401af7e71f16bf9506ff3bdbb38"}, - {file = "protobuf-3.19.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a552af4dc34793803f4e735aabe97ffc45962dfd3a237bdde242bff5a3de684"}, - {file = "protobuf-3.19.6-cp38-cp38-win32.whl", hash = "sha256:0469bc66160180165e4e29de7f445e57a34ab68f49357392c5b2f54c656ab25e"}, - {file = "protobuf-3.19.6-cp38-cp38-win_amd64.whl", hash = "sha256:91d5f1e139ff92c37e0ff07f391101df77e55ebb97f46bbc1535298d72019462"}, - {file = "protobuf-3.19.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c0ccd3f940fe7f3b35a261b1dd1b4fc850c8fde9f74207015431f174be5976b3"}, - {file = "protobuf-3.19.6-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:30a15015d86b9c3b8d6bf78d5b8c7749f2512c29f168ca259c9d7727604d0e39"}, - {file = "protobuf-3.19.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:878b4cd080a21ddda6ac6d1e163403ec6eea2e206cf225982ae04567d39be7b0"}, - {file = "protobuf-3.19.6-cp39-cp39-win32.whl", hash = "sha256:5a0d7539a1b1fb7e76bf5faa0b44b30f812758e989e59c40f77a7dab320e79b9"}, - {file = "protobuf-3.19.6-cp39-cp39-win_amd64.whl", hash = "sha256:bbf5cea5048272e1c60d235c7bd12ce1b14b8a16e76917f371c718bd3005f045"}, - {file = "protobuf-3.19.6-py2.py3-none-any.whl", hash = "sha256:14082457dc02be946f60b15aad35e9f5c69e738f80ebbc0900a19bc83734a5a4"}, - {file = "protobuf-3.19.6.tar.gz", hash = "sha256:5f5540d57a43042389e87661c6eaa50f47c19c6176e8cf1c4f287aeefeccb5c4"}, + {file = "protobuf-3.19.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:01a0645ef3acddfbc90237e1cdfae1086130fc7cb480b5874656193afd657083"}, + {file = "protobuf-3.19.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:d3861c9721a90ba83ee0936a9cfcc4fa1c4b4144ac9658fb6f6343b38558e9b4"}, + {file = "protobuf-3.19.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b64be5d7270cf5e76375bac049846e8a9543a2d4368b69afe78ab725380a7487"}, + {file = "protobuf-3.19.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:2f6046b9e2feee0dce994493186e8715b4392ed5f50f356280ad9c2f9f93080a"}, + {file = "protobuf-3.19.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac2f8ec942d414609aba0331952ae12bb823e8f424bbb6b8c422f1cef32dc842"}, + {file = "protobuf-3.19.0-cp36-cp36m-win32.whl", hash = "sha256:3fea09aa04ef2f8b01fcc9bb87f19509934f8a35d177c865b8f9ee5c32b60c1b"}, + {file = "protobuf-3.19.0-cp36-cp36m-win_amd64.whl", hash = "sha256:d1f4277d321f60456845ca9b882c4845736f1f5c1c69eb778eba22a97977d8af"}, + {file = "protobuf-3.19.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8488c2276f14f294e890cc1260ab342a13e90cd20dcc03319d2eea258f1fd321"}, + {file = "protobuf-3.19.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:36bf292f44966c67080e535321501717f4f1eba30faef8f2cd4b0c745a027211"}, + {file = "protobuf-3.19.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c99af73ae34c93e0e2ace57ea2e70243f34fc015c8c23fd39ee93652e726f7e7"}, + {file = "protobuf-3.19.0-cp37-cp37m-win32.whl", hash = "sha256:f7a031cf8e2fc14acc0ba694f6dff0a01e06b70d817eba6edc72ee6cc20517ac"}, + {file = "protobuf-3.19.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d4ca5f0c7bc8d2e6966ca3bbd85e9ebe7191b6e21f067896d4af6b28ecff29fe"}, + {file = "protobuf-3.19.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9a8a880593015ef2c83f7af797fa4fbf583b2c98b4bd94e46c5b61fee319d84b"}, + {file = "protobuf-3.19.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:6f16925f5c977dd7787973a50c242e60c22b1d1182aba6bec7bd02862579c10f"}, + {file = "protobuf-3.19.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9097327d277b0aa4a3224e61cd6850aef3269172397715299bcffc9f90293c9"}, + {file = "protobuf-3.19.0-cp38-cp38-win32.whl", hash = "sha256:708d04394a63ee9bdc797938b6e15ed5bf24a1cb37743eb3886fd74a5a67a234"}, + {file = "protobuf-3.19.0-cp38-cp38-win_amd64.whl", hash = "sha256:ee4d07d596357f51316b6ecf1cc1927660e9d5e418385bb1c51fd2496cd9bee7"}, + {file = "protobuf-3.19.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:34a77b8fafdeb8f89fee2b7108ae60d8958d72e33478680cc1e05517892ecc46"}, + {file = "protobuf-3.19.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:4f93e0f6af796ddd1502225ff8ea25340ced186ca05b601c44d5c88b45ba80a0"}, + {file = "protobuf-3.19.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:942dd6bc8bd2a3c6a156d8ab0f80bd45313f22b78e1176283270054dcc8ca4c2"}, + {file = "protobuf-3.19.0-cp39-cp39-win32.whl", hash = "sha256:7b3867795708ac88fde8d6f34f0d9a50af56087e41f624bdb2e9ff808ea5dda7"}, + {file = "protobuf-3.19.0-cp39-cp39-win_amd64.whl", hash = "sha256:a74432e9d28a6072a2359a0f49f81eb14dd718e7dbbfb6c0789b456c49e1f130"}, + {file = "protobuf-3.19.0-py2.py3-none-any.whl", hash = "sha256:c96e94d3e523a82caa3e5f74b35dd1c4884199358d01c950d95c341255ff48bc"}, + {file = "protobuf-3.19.0.tar.gz", hash = "sha256:6a1dc6584d24ef86f5b104bcad64fa0fe06ed36e5687f426e0445d363a041d18"}, ] [[package]] @@ -7507,7 +7578,7 @@ files = [ ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and platform_machine == \"aarch64\" or python_version >= \"3\" and platform_machine == \"ppc64le\" or python_version >= \"3\" and platform_machine == \"x86_64\" or python_version >= \"3\" and platform_machine == \"amd64\" or python_version >= \"3\" and platform_machine == \"AMD64\" or python_version >= \"3\" and platform_machine == \"win32\" or python_version >= \"3\" and platform_machine == \"WIN32\""} +greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} [package.extras] aiomysql = ["aiomysql", "greenlet (!=0.4.17)"] @@ -7759,18 +7830,18 @@ files = [ [[package]] name = "tensorflow-hub" -version = "0.13.0" +version = "0.12.0" description = "TensorFlow Hub is a library to foster the publication, discovery, and consumption of reusable parts of machine learning models." category = "main" optional = true python-versions = "*" files = [ - {file = "tensorflow_hub-0.13.0-py2.py3-none-any.whl", hash = "sha256:3544f4fd9fd99e4eeb6da1b5b5320e4a2dbdef7f9bb778f66f76d6790f32dd65"}, + {file = "tensorflow_hub-0.12.0-py2.py3-none-any.whl", hash = "sha256:822fe5f7338c95efcc3a534011c6689e4309ba2459def87194179c4de8a6e1fc"}, ] [package.dependencies] numpy = ">=1.12.0" -protobuf = ">=3.19.6" +protobuf = ">=3.8.0" [package.extras] make-image-classifier = ["keras-preprocessing[image]"] @@ -8414,7 +8485,7 @@ types-pyOpenSSL = "*" name = "types-requests" version = "2.28.11.17" description = "Typing stubs for requests" -category = "dev" +category = "main" optional = false python-versions = "*" files = [ @@ -8441,7 +8512,7 @@ files = [ name = "types-urllib3" version = "1.26.25.10" description = "Typing stubs for urllib3" -category = "dev" +category = "main" optional = false python-versions = "*" files = [ @@ -9267,13 +9338,16 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\ cffi = ["cffi (>=1.11)"] [extras] -all = ["aleph-alpha-client", "anthropic", "arxiv", "atlassian-python-api", "azure-identity", "beautifulsoup4", "clickhouse-connect", "cohere", "deeplake", "duckduckgo-search", "elasticsearch", "faiss-cpu", "google-api-python-client", "google-search-results", "gptcache", "html2text", "huggingface_hub", "jina", "jinja2", "manifest-ml", "networkx", "nlpcloud", "nltk", "nomic", "openai", "opensearch-py", "pgvector", "pinecone-client", "pinecone-text", "psycopg2-binary", "pyowm", "pypdf", "pytesseract", "qdrant-client", "redis", "sentence-transformers", "spacy", "tensorflow-text", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"] +all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect", "docarray", "protobuf", "hnswlib"] cohere = ["cohere"] -llms = ["anthropic", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "torch", "transformers"] +embeddings = ["sentence-transformers"] +hnswlib = ["docarray", "protobuf", "hnswlib"] +in-memory-store = ["docarray"] +llms = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "manifest-ml", "torch", "transformers"] openai = ["openai"] qdrant = ["qdrant-client"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "ab6ea1c53c7a6e792d5bdcf8865b87e5dcfe4c89080c18b356dc4ed8a17cc3a3" +content-hash = "5223e3c6bdf37a28e1ee1cfb26e7f8d84fd6bc94893c96ecaca428fb9e8278eb" diff --git a/pyproject.toml b/pyproject.toml index 0eec46451897e..869d5f8d0d453 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,9 @@ pytesseract = {version = "^0.3.10", optional=true} html2text = {version="^2020.1.16", optional=true} numexpr = "^2.8.4" duckduckgo-search = {version="^2.8.6", optional=true} - +docarray = {version="^0.31.0.dev35", optional=true} +protobuf = {version="3.19", optional=true} +hnswlib = {version="^0.7.0", optional=true} [tool.poetry.group.docs.dependencies] autodoc_pydantic = "^1.8.0" @@ -145,8 +147,10 @@ llms = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "manifes qdrant = ["qdrant-client"] openai = ["openai"] cohere = ["cohere"] +in_memory_store = ["docarray"] +hnswlib = ["docarray", "protobuf", "hnswlib"] embeddings = ["sentence-transformers"] -all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "boto3", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect"] +all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "boto3", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect", "docarray", "protobuf", "hnswlib"] [tool.ruff] select = [ diff --git a/tests/integration_tests/vectorstores/test_hnsw_lib.py b/tests/integration_tests/vectorstores/test_hnsw_lib.py new file mode 100644 index 0000000000000..a4a6441eec779 --- /dev/null +++ b/tests/integration_tests/vectorstores/test_hnsw_lib.py @@ -0,0 +1,149 @@ +import numpy as np +import pytest + +from langchain.schema import Document +from langchain.vectorstores.hnsw_lib import HnswLib +from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings + + +def test_hnswlib_vec_store_from_texts(tmp_path) -> None: + """Test end to end construction and simple similarity search.""" + texts = ["foo", "bar", "baz"] + docsearch = HnswLib.from_texts( + texts, + FakeEmbeddings(), + work_dir=str(tmp_path), + n_dim=10, + dist_metric='cosine', + ) + assert isinstance(docsearch, HnswLib) + assert docsearch.doc_index.num_docs() == 3 + + +def test_hnswlib_vec_store_add_texts(tmp_path) -> None: + """Test end to end construction and simple similarity search.""" + docsearch = HnswLib( + work_dir=str(tmp_path), + n_dim=10, + embedding=FakeEmbeddings(), + dist_metric='cosine', + ) + assert isinstance(docsearch, HnswLib) + assert docsearch.doc_index.num_docs() == 0 + + texts = ["foo", "bar", "baz"] + docsearch.add_texts(texts=texts) + assert docsearch.doc_index.num_docs() == 3 + + +@pytest.mark.parametrize('metric', ['cosine', 'l2']) +def test_sim_search(metric, tmp_path) -> None: + """Test end to end construction and simple similarity search.""" + texts = ["foo", "bar", "baz"] + hnswlib_vec_store = HnswLib.from_texts( + texts, + FakeEmbeddings(), + work_dir=str(tmp_path), + n_dim=10, + dist_metric=metric, + ) + output = hnswlib_vec_store.similarity_search("foo", k=1) + assert output == [Document(page_content="foo")] + + +@pytest.mark.parametrize('metric', ['cosine', 'l2']) +def test_sim_search_all_configurations(metric, tmp_path) -> None: + """Test end to end construction and simple similarity search.""" + texts = ["foo", "bar", "baz"] + hnswlib_vec_store = HnswLib.from_texts( + texts, + FakeEmbeddings(), + work_dir=str(tmp_path), + dist_metric=metric, + n_dim=10, + max_elements=8, + index=False, + ef_construction=300, + ef=20, + M=8, + allow_replace_deleted=False, + num_threads=2, + ) + output = hnswlib_vec_store.similarity_search("foo", k=1) + assert output == [Document(page_content="foo")] + + +@pytest.mark.parametrize('metric', ['cosine', 'l2']) +def test_sim_search_by_vector(metric, tmp_path) -> None: + """Test end to end construction and similarity search by vector.""" + texts = ["foo", "bar", "baz"] + hnswlib_vec_store = HnswLib.from_texts( + texts, + FakeEmbeddings(), + work_dir=str(tmp_path), + n_dim=10, + dist_metric=metric, + ) + embedding = [1.0] * 10 + output = hnswlib_vec_store.similarity_search_by_vector(embedding, k=1) + + assert output == [Document(page_content="bar")] + + +@pytest.mark.parametrize('metric', ['cosine', 'l2']) +def test_sim_search_with_score(metric, tmp_path) -> None: + """Test end to end construction and similarity search with score.""" + texts = ["foo", "bar", "baz"] + hnswlib_vec_store = HnswLib.from_texts( + texts, + FakeEmbeddings(), + work_dir=str(tmp_path), + n_dim=10, + dist_metric=metric, + ) + output = hnswlib_vec_store.similarity_search_with_score("foo", k=1) + assert len(output) == 1 + + out_doc, out_score = output[0] + assert out_doc == Document(page_content="foo") + assert np.isclose(out_score, 0.0, atol=1.e-6) + + +def test_sim_search_with_score_for_ip_metric(tmp_path) -> None: + """ + Test end to end construction and similarity search with score for ip + (inner-product) metric. + """ + texts = ["foo", "bar", "baz"] + hnswlib_vec_store = HnswLib.from_texts( + texts, + FakeEmbeddings(), + work_dir=str(tmp_path), + n_dim=10, + dist_metric='ip', + ) + output = hnswlib_vec_store.similarity_search_with_score("foo", k=3) + assert len(output) == 3 + + for result in output: + assert result[1] == -8.0 + + +@pytest.mark.parametrize('metric', ['cosine', 'l2']) +def test_max_marginal_relevance_search(metric, tmp_path) -> None: + """Test MRR search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": i} for i in range(len(texts))] + docsearch = HnswLib.from_texts( + texts, + FakeEmbeddings(), + metadatas=metadatas, + dist_metric=metric, + work_dir=str(tmp_path), + n_dim=10, + ) + output = docsearch.max_marginal_relevance_search("foo", k=2, fetch_k=3) + assert output == [ + Document(page_content="foo", metadata={"page": 0}), + Document(page_content="bar", metadata={"page": 1}), + ] diff --git a/tests/integration_tests/vectorstores/test_in_memory_exact_search.py b/tests/integration_tests/vectorstores/test_in_memory_exact_search.py new file mode 100644 index 0000000000000..7e0142ec8212f --- /dev/null +++ b/tests/integration_tests/vectorstores/test_in_memory_exact_search.py @@ -0,0 +1,97 @@ +import numpy as np +import pytest + +from langchain.schema import Document +from langchain.vectorstores.in_memory_exact_search import InMemoryExactSearch +from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings + + +def test_in_memory_vec_store_from_texts() -> None: + """Test end to end construction and simple similarity search.""" + texts = ["foo", "bar", "baz"] + docsearch = InMemoryExactSearch.from_texts( + texts, + FakeEmbeddings(), + ) + assert isinstance(docsearch, InMemoryExactSearch) + assert docsearch.doc_index.num_docs() == 3 + + +def test_in_memory_vec_store_add_texts(tmp_path) -> None: + """Test end to end construction and simple similarity search.""" + docsearch = InMemoryExactSearch( + embedding=FakeEmbeddings(), + ) + assert isinstance(docsearch, InMemoryExactSearch) + assert docsearch.doc_index.num_docs() == 0 + + texts = ["foo", "bar", "baz"] + docsearch.add_texts(texts=texts) + assert docsearch.doc_index.num_docs() == 3 + + +@pytest.mark.parametrize('metric', ['cosine_sim', 'euclidean_dist', 'sqeuclidean_dist']) +def test_sim_search(metric) -> None: + """Test end to end construction and simple similarity search.""" + texts = ["foo", "bar", "baz"] + in_memory_vec_store = InMemoryExactSearch.from_texts( + texts=texts, + embedding=FakeEmbeddings(), + metric=metric, + ) + + output = in_memory_vec_store.similarity_search("foo", k=1) + assert output == [Document(page_content="foo")] + + +@pytest.mark.parametrize('metric', ['cosine_sim', 'euclidean_dist', 'sqeuclidean_dist']) +def test_sim_search_with_score(metric) -> None: + """Test end to end construction and similarity search with score.""" + texts = ["foo", "bar", "baz"] + in_memory_vec_store = InMemoryExactSearch.from_texts( + texts=texts, + embedding=FakeEmbeddings(), + metric=metric, + ) + + output = in_memory_vec_store.similarity_search_with_score("foo", k=1) + + out_doc, out_score = output[0] + assert out_doc == Document(page_content="foo") + + expected_score = 0.0 if 'dist' in metric else 1.0 + assert np.isclose(out_score, expected_score, atol=1.e-6) + + +@pytest.mark.parametrize('metric', ['cosine_sim', 'euclidean_dist', 'sqeuclidean_dist']) +def test_sim_search_by_vector(metric) -> None: + """Test end to end construction and similarity search by vector.""" + texts = ["foo", "bar", "baz"] + in_memory_vec_store = InMemoryExactSearch.from_texts( + texts=texts, + embedding=FakeEmbeddings(), + metric=metric, + ) + + embedding = [1.0] * 10 + output = in_memory_vec_store.similarity_search_by_vector(embedding, k=1) + + assert output == [Document(page_content="bar")] + + +@pytest.mark.parametrize('metric', ['cosine_sim', 'euclidean_dist', 'sqeuclidean_dist']) +def test_max_marginal_relevance_search(metric) -> None: + """Test MRR search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": i} for i in range(len(texts))] + docsearch = InMemoryExactSearch.from_texts( + texts, + FakeEmbeddings(), + metadatas=metadatas, + metric=metric + ) + output = docsearch.max_marginal_relevance_search("foo", k=2, fetch_k=3) + assert output == [ + Document(page_content="foo", metadata={"page": 0}), + Document(page_content="bar", metadata={"page": 1}), + ]