Skip to content

Commit 495a009

Browse files
committed
Delete vectorizer config in weaviate
1 parent 4bc9343 commit 495a009

File tree

1 file changed

+13
-46
lines changed

1 file changed

+13
-46
lines changed

llmstack/data/destinations/vector_stores/weaviate.py

Lines changed: 13 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import weaviate.classes as wvc
88
from llama_index.core.schema import TextNode
99
from llama_index.core.vector_stores.types import (
10-
MetadataFilter,
1110
MetadataFilters,
1211
VectorStoreQuery,
1312
VectorStoreQueryMode,
@@ -19,14 +18,13 @@
1918
node_to_metadata_dict,
2019
)
2120
from pydantic import Field, PrivateAttr
22-
from weaviate.classes.config import Configure, DataType, Property
21+
from weaviate.classes.config import DataType, Property
2322
from weaviate.connect.helpers import connect_to_custom, connect_to_wcs
2423

2524
from llmstack.data.destinations.base import BaseDestination
2625
from llmstack.data.schemas import DataDocument
2726
from llmstack.processors.providers.weaviate import (
2827
APIKey,
29-
EmbeddingsProvider,
3028
WeaviateCloudInstance,
3129
WeaviateLocalInstance,
3230
WeaviateProviderConfig,
@@ -232,40 +230,24 @@ def delete_index(self) -> None:
232230
def create_index(self, schema: Optional[dict]) -> None:
233231
if not self._weaviate_client.collections.exists(self._index_name):
234232
properties = []
235-
vectorizer_config = None
236233
if schema:
237-
if "properties" in schema:
238-
for prop in schema["properties"]:
239-
data_type = DataType.TEXT
240-
if prop["dataType"][0] == "string[]":
241-
data_type = DataType.TEXT_ARRAY
242-
properties.append(
243-
Property(
244-
name=prop["name"],
245-
data_type=data_type,
246-
description=prop["description"],
247-
vectorize_property_name=False,
248-
)
249-
)
250-
251-
if "vectorizer" in schema:
252-
if schema["vectorizer"] == "text2vec-openai":
253-
module_config = schema["moduleConfig"]["text2vec-openai"]
254-
module_config.pop("type", None)
255-
vectorizer_config = Configure.Vectorizer.text2vec_openai(**module_config)
234+
for prop in schema.get("properties", []):
235+
data_type = DataType.TEXT
236+
if prop["dataType"][0] == "string[]":
237+
data_type = DataType.TEXT_ARRAY
238+
properties.append(Property(name=prop["name"], data_type=data_type, description=prop["description"]))
256239

257-
return self._weaviate_client.collections.create(
258-
name=self._index_name,
259-
vectorizer_config=vectorizer_config,
260-
properties=properties,
261-
)
240+
return self._weaviate_client.collections.create(name=self._index_name, properties=properties)
262241

263242
def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
264243
nodes = []
265244
node_ids = []
266245
similarities = []
267246
filters = None
268247

248+
if query.doc_ids:
249+
filters = wvc.query.Filter.by_property("datasource_uuid").contains_any(query.doc_ids)
250+
269251
if query.filters:
270252
filters = _to_weaviate_filter(query.filters)
271253

@@ -336,15 +318,8 @@ def initialize_client(self, *args, **kwargs):
336318
DEFAULT_SCHEMA = {
337319
"class": index_name,
338320
"description": "Text data source",
339-
"vectorizer": "text2vec-openai",
340-
"moduleConfig": {"text2vec-openai": {"model": "ada", "type": "text"}},
341321
"properties": [
342-
{
343-
"name": self.text_key,
344-
"dataType": ["text"],
345-
"description": "Text",
346-
"moduleConfig": {"text2vec-openai": {"skip": False, "vectorizePropertyName": False}},
347-
},
322+
{"name": self.text_key, "dataType": ["text"], "description": "Text"},
348323
{"name": "source", "dataType": ["text"], "description": "Document source"},
349324
{"name": "metadata", "dataType": ["string[]"], "description": "Document metadata"},
350325
{"name": "datasource_uuid", "dataType": ["text"], "description": "Datasource UUID"},
@@ -356,16 +331,8 @@ def initialize_client(self, *args, **kwargs):
356331
self._schema_dict = json.loads(self.weaviate_schema) if self.weaviate_schema else DEFAULT_SCHEMA
357332
except Exception:
358333
pass
359-
if self._deployment_config and self._deployment_config.module_config:
360-
self._schema_dict["moduleConfig"] = json.loads(self._deployment_config.module_config)
361334

362335
additional_headers = self._deployment_config.additional_headers_dict or {}
363-
if self._deployment_config.embeddings_provider == EmbeddingsProvider.AZURE_OPENAI:
364-
azure_deployment_config = datasource.profile.get_provider_config(provider_slug="azure")
365-
additional_headers["X-Azure-Api-Key"] = azure_deployment_config.api_key
366-
elif self._deployment_config.embeddings_provider == EmbeddingsProvider.OPENAI:
367-
openai_deployment_config = datasource.profile.get_provider_config(provider_slug="openai")
368-
additional_headers["X-Openai-Api-Key"] = openai_deployment_config.api_key
369336

370337
auth = None
371338
if isinstance(self._deployment_config.auth, APIKey):
@@ -398,17 +365,17 @@ def search(self, query: str, **kwargs):
398365
)
399366

400367
datasource_uuid = kwargs["datasource_uuid"]
401-
filters = MetadataFilters(filters=[MetadataFilter(key="datasource_uuid", value=datasource_uuid)])
402368

403369
vector_store_query = VectorStoreQuery(
370+
doc_ids=[datasource_uuid],
404371
query_str=query,
405372
mode=(
406373
VectorStoreQueryMode.HYBRID if kwargs.get("use_hybrid_search", False) else VectorStoreQueryMode.DEFAULT
407374
),
408375
alpha=kwargs.get("alpha", 0.75),
409376
hybrid_top_k=kwargs.get("limit", 2),
410377
query_embedding=kwargs.get("query_embedding", None),
411-
filters=filters,
378+
filters=None,
412379
)
413380

414381
return self._client.query(query=vector_store_query)

0 commit comments

Comments
 (0)