-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_chroma.py
95 lines (76 loc) · 3.15 KB
/
test_chroma.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/env python3
"""
Test script for Chroma vector database implementation
"""
import numpy as np
import os
import logging
from pathlib import Path
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Import our modules
from src.vector_db.vector_store import VectorStore
from src.llm_interface.ollama_client import OllamaClient
def main():
"""Test the Chroma vector store implementation"""
# Create a test directory for Chroma
test_db_path = Path("./test_chroma_db")
test_db_path.mkdir(exist_ok=True)
# Initialize with mock mode to avoid Ollama dependency
ollama_client = OllamaClient(
api_base="http://localhost:11434",
model_name="llama3.2:latest",
embedding_model_name="nomic-embed-text",
mock_mode=True # Use mock mode to avoid needing Ollama running
)
# Initialize the vector store
vector_store = VectorStore(
db_path=str(test_db_path),
embedding_client=ollama_client
)
# Create some test embeddings
test_embeddings = []
test_chunks = []
# Create 5 test chunks with random embeddings
for i in range(5):
chunk_id = f"test_chunk_{i}"
# Create a deterministic random embedding for testing
np.random.seed(i)
embedding = np.random.rand(ollama_client.embedding_dimension).astype(np.float32)
embedding = embedding / np.linalg.norm(embedding) # Normalize
# Create test chunk data
chunk_data = {
"id": chunk_id,
"file_path": f"test_file_{i}.py",
"language": "python",
"chunk_type": "function",
"chunk_name": f"test_function_{i}",
"code": f"def test_function_{i}():\n return 'This is test function {i}'"
}
test_embeddings.append((chunk_id, embedding))
test_chunks.append(chunk_data)
# Add to vector store
logger.info(f"Adding test chunk {chunk_id}")
vector_store.add_embedding(chunk_id, embedding, chunk_data)
# Test retrieval of all embeddings
logger.info("Testing retrieval of all embeddings")
all_embeddings = vector_store.get_all_embeddings()
logger.info(f"Retrieved {len(all_embeddings)} embeddings")
# Test similarity search
logger.info("Testing similarity search")
# Use the first embedding as the query
query_embedding = test_embeddings[0][1]
similar_chunks = vector_store.find_similar(query_embedding, top_n=3)
logger.info(f"Found {len(similar_chunks)} similar chunks")
for i, (chunk, similarity) in enumerate(similar_chunks):
logger.info(f"Similar chunk {i+1}: {chunk.get('chunk_name')} (similarity: {similarity:.4f})")
# Test clearing all embeddings
logger.info("Testing clearing all embeddings")
vector_store.clear_all_embeddings()
# Verify that all embeddings are cleared
all_embeddings_after_clear = vector_store.get_all_embeddings()
logger.info(f"After clearing, there are {len(all_embeddings_after_clear)} embeddings")
logger.info("All tests completed successfully!")
if __name__ == "__main__":
main()