77import weaviate .classes as wvc
88from llama_index .core .schema import TextNode
99from llama_index .core .vector_stores .types import (
10- MetadataFilter ,
1110 MetadataFilters ,
1211 VectorStoreQuery ,
1312 VectorStoreQueryMode ,
1918 node_to_metadata_dict ,
2019)
2120from pydantic import Field , PrivateAttr
22- from weaviate .classes .config import Configure , DataType , Property
21+ from weaviate .classes .config import DataType , Property
2322from weaviate .connect .helpers import connect_to_custom , connect_to_wcs
2423
2524from llmstack .data .destinations .base import BaseDestination
2625from llmstack .data .schemas import DataDocument
2726from llmstack .processors .providers .weaviate import (
2827 APIKey ,
29- EmbeddingsProvider ,
3028 WeaviateCloudInstance ,
3129 WeaviateLocalInstance ,
3230 WeaviateProviderConfig ,
@@ -232,40 +230,24 @@ def delete_index(self) -> None:
232230 def create_index (self , schema : Optional [dict ]) -> None :
233231 if not self ._weaviate_client .collections .exists (self ._index_name ):
234232 properties = []
235- vectorizer_config = None
236233 if schema :
237- if "properties" in schema :
238- for prop in schema ["properties" ]:
239- data_type = DataType .TEXT
240- if prop ["dataType" ][0 ] == "string[]" :
241- data_type = DataType .TEXT_ARRAY
242- properties .append (
243- Property (
244- name = prop ["name" ],
245- data_type = data_type ,
246- description = prop ["description" ],
247- vectorize_property_name = False ,
248- )
249- )
250-
251- if "vectorizer" in schema :
252- if schema ["vectorizer" ] == "text2vec-openai" :
253- module_config = schema ["moduleConfig" ]["text2vec-openai" ]
254- module_config .pop ("type" , None )
255- vectorizer_config = Configure .Vectorizer .text2vec_openai (** module_config )
234+ for prop in schema .get ("properties" , []):
235+ data_type = DataType .TEXT
236+ if prop ["dataType" ][0 ] == "string[]" :
237+ data_type = DataType .TEXT_ARRAY
238+ properties .append (Property (name = prop ["name" ], data_type = data_type , description = prop ["description" ]))
256239
257- return self ._weaviate_client .collections .create (
258- name = self ._index_name ,
259- vectorizer_config = vectorizer_config ,
260- properties = properties ,
261- )
240+ return self ._weaviate_client .collections .create (name = self ._index_name , properties = properties )
262241
263242 def query (self , query : VectorStoreQuery , ** kwargs : Any ) -> VectorStoreQueryResult :
264243 nodes = []
265244 node_ids = []
266245 similarities = []
267246 filters = None
268247
248+ if query .doc_ids :
249+ filters = wvc .query .Filter .by_property ("datasource_uuid" ).contains_any (query .doc_ids )
250+
269251 if query .filters :
270252 filters = _to_weaviate_filter (query .filters )
271253
@@ -336,15 +318,8 @@ def initialize_client(self, *args, **kwargs):
336318 DEFAULT_SCHEMA = {
337319 "class" : index_name ,
338320 "description" : "Text data source" ,
339- "vectorizer" : "text2vec-openai" ,
340- "moduleConfig" : {"text2vec-openai" : {"model" : "ada" , "type" : "text" }},
341321 "properties" : [
342- {
343- "name" : self .text_key ,
344- "dataType" : ["text" ],
345- "description" : "Text" ,
346- "moduleConfig" : {"text2vec-openai" : {"skip" : False , "vectorizePropertyName" : False }},
347- },
322+ {"name" : self .text_key , "dataType" : ["text" ], "description" : "Text" },
348323 {"name" : "source" , "dataType" : ["text" ], "description" : "Document source" },
349324 {"name" : "metadata" , "dataType" : ["string[]" ], "description" : "Document metadata" },
350325 {"name" : "datasource_uuid" , "dataType" : ["text" ], "description" : "Datasource UUID" },
@@ -356,16 +331,8 @@ def initialize_client(self, *args, **kwargs):
356331 self ._schema_dict = json .loads (self .weaviate_schema ) if self .weaviate_schema else DEFAULT_SCHEMA
357332 except Exception :
358333 pass
359- if self ._deployment_config and self ._deployment_config .module_config :
360- self ._schema_dict ["moduleConfig" ] = json .loads (self ._deployment_config .module_config )
361334
362335 additional_headers = self ._deployment_config .additional_headers_dict or {}
363- if self ._deployment_config .embeddings_provider == EmbeddingsProvider .AZURE_OPENAI :
364- azure_deployment_config = datasource .profile .get_provider_config (provider_slug = "azure" )
365- additional_headers ["X-Azure-Api-Key" ] = azure_deployment_config .api_key
366- elif self ._deployment_config .embeddings_provider == EmbeddingsProvider .OPENAI :
367- openai_deployment_config = datasource .profile .get_provider_config (provider_slug = "openai" )
368- additional_headers ["X-Openai-Api-Key" ] = openai_deployment_config .api_key
369336
370337 auth = None
371338 if isinstance (self ._deployment_config .auth , APIKey ):
@@ -398,17 +365,17 @@ def search(self, query: str, **kwargs):
398365 )
399366
400367 datasource_uuid = kwargs ["datasource_uuid" ]
401- filters = MetadataFilters (filters = [MetadataFilter (key = "datasource_uuid" , value = datasource_uuid )])
402368
403369 vector_store_query = VectorStoreQuery (
370+ doc_ids = [datasource_uuid ],
404371 query_str = query ,
405372 mode = (
406373 VectorStoreQueryMode .HYBRID if kwargs .get ("use_hybrid_search" , False ) else VectorStoreQueryMode .DEFAULT
407374 ),
408375 alpha = kwargs .get ("alpha" , 0.75 ),
409376 hybrid_top_k = kwargs .get ("limit" , 2 ),
410377 query_embedding = kwargs .get ("query_embedding" , None ),
411- filters = filters ,
378+ filters = None ,
412379 )
413380
414381 return self ._client .query (query = vector_store_query )
0 commit comments