diff --git a/docs/getting-started/environment-variables.md b/docs/getting-started/environment-variables.md index d2b813df..24c99633 100644 --- a/docs/getting-started/environment-variables.md +++ b/docs/getting-started/environment-variables.md @@ -63,6 +63,7 @@ Claude Context supports a global configuration file at `~/.context/.env` to simp | `SPLITTER_TYPE` | Code splitter type: `ast`, `langchain` | `ast` | | `CUSTOM_EXTENSIONS` | Additional file extensions to include (comma-separated, e.g., `.vue,.svelte,.astro`) | None | | `CUSTOM_IGNORE_PATTERNS` | Additional ignore patterns (comma-separated, e.g., `temp/**,*.backup,private/**`) | None | +| `CONTEXT_ENCRYPTION_KEY` | Encryption key for content encryption. Can be a 64-character hex string or any string (will be hashed). **Note: Only works in semantic search mode (`HYBRID_MODE=false`). Automatically disabled in hybrid mode as BM25 requires plain text.** | None (encryption disabled) | ## šŸš€ Quick Setup diff --git a/examples/basic-usage/index.ts b/examples/basic-usage/index.ts index daec6df0..80a029df 100644 --- a/examples/basic-usage/index.ts +++ b/examples/basic-usage/index.ts @@ -1,4 +1,4 @@ -import { Context, MilvusVectorDatabase, MilvusRestfulVectorDatabase, AstCodeSplitter, LangChainCodeSplitter } from '@zilliz/claude-context-core'; +import { Context, MilvusVectorDatabase, MilvusRestfulVectorDatabase, PostgresVectorDatabase, AstCodeSplitter, LangChainCodeSplitter, OllamaEmbedding, OpenAIEmbedding } from '@zilliz/claude-context-core'; import { envManager } from '@zilliz/claude-context-core'; import * as path from 'path'; @@ -15,29 +15,48 @@ async function main() { try { // 1. Choose Vector Database implementation - // Set to true to use RESTful API (for environments without gRPC support) - // Set to false to use gRPC (default, more efficient) - const useRestfulApi = false; - const milvusAddress = envManager.get('MILVUS_ADDRESS') || 'localhost:19530'; - const milvusToken = envManager.get('MILVUS_TOKEN'); + const vectorDbProvider = envManager.get('VECTOR_DATABASE_PROVIDER')?.toLowerCase() || 'milvus'; + const useRestfulApi = envManager.get('MILVUS_USE_RESTFUL') === 'true'; const splitterType = envManager.get('SPLITTER_TYPE')?.toLowerCase() || 'ast'; - console.log(`šŸ”§ Using ${useRestfulApi ? 'RESTful API' : 'gRPC'} implementation`); - console.log(`šŸ”Œ Connecting to Milvus at: ${milvusAddress}`); + console.log(`šŸ”§ Using vector database provider: ${vectorDbProvider}`); let vectorDatabase; - if (useRestfulApi) { - // Use RESTful implementation (for environments without gRPC support) - vectorDatabase = new MilvusRestfulVectorDatabase({ - address: milvusAddress, - ...(milvusToken && { token: milvusToken }) - }); + if (vectorDbProvider === 'postgres') { + // Use PostgreSQL with pgvector + const postgresConfig = { + connectionString: envManager.get('POSTGRES_CONNECTION_STRING'), + host: envManager.get('POSTGRES_HOST') || 'localhost', + port: envManager.get('POSTGRES_PORT') ? parseInt(envManager.get('POSTGRES_PORT')!) : 5432, + database: envManager.get('POSTGRES_DATABASE') || 'postgres', + username: envManager.get('POSTGRES_USERNAME') || 'postgres', + password: envManager.get('POSTGRES_PASSWORD'), + ssl: envManager.get('POSTGRES_SSL') === 'true' + }; + + console.log(`šŸ”Œ Connecting to PostgreSQL at: ${postgresConfig.connectionString || `${postgresConfig.host}:${postgresConfig.port}/${postgresConfig.database}`}`); + vectorDatabase = new PostgresVectorDatabase(postgresConfig); } else { - // Use gRPC implementation (default, more efficient) - vectorDatabase = new MilvusVectorDatabase({ - address: milvusAddress, - ...(milvusToken && { token: milvusToken }) - }); + // Use Milvus (default) + const milvusAddress = envManager.get('MILVUS_ADDRESS') || 'localhost:19530'; + const milvusToken = envManager.get('MILVUS_TOKEN'); + + console.log(`šŸ”§ Using ${useRestfulApi ? 'RESTful API' : 'gRPC'} implementation`); + console.log(`šŸ”Œ Connecting to Milvus at: ${milvusAddress}`); + + if (useRestfulApi) { + // Use RESTful implementation (for environments without gRPC support) + vectorDatabase = new MilvusRestfulVectorDatabase({ + address: milvusAddress, + ...(milvusToken && { token: milvusToken }) + }); + } else { + // Use gRPC implementation (default, more efficient) + vectorDatabase = new MilvusVectorDatabase({ + address: milvusAddress, + ...(milvusToken && { token: milvusToken }) + }); + } } // 2. Create Context instance @@ -47,7 +66,26 @@ async function main() { } else { codeSplitter = new AstCodeSplitter(2500, 300); } + + console.log('šŸ”§ Using embedding provider: ', envManager.get('EMBEDDING_PROVIDER')); + let embedding; + const embeddingProvider = envManager.get('EMBEDDING_PROVIDER')?.toLowerCase() || 'openai'; + if (embeddingProvider === 'ollama') { + console.log('šŸ”§ Using Ollama embedding provider'); + embedding = new OllamaEmbedding({ + host: envManager.get('OLLAMA_HOST') || 'http://127.0.0.1:11434', + model: envManager.get('EMBEDDING_MODEL') || 'all-minilm' + }); + } else { + embedding = new OpenAIEmbedding({ + model: 'text-embedding-3-small', + apiKey: envManager.get('OPENAI_API_KEY') || 'your-openai-api-key', + baseURL: envManager.get('OPENAI_BASE_URL') || 'https://api.openai.com/v1' + }); + } + const context = new Context({ + embedding, vectorDatabase, codeSplitter, supportedExtensions: ['.ts', '.js', '.py', '.java', '.cpp', '.go', '.rs'] @@ -119,8 +157,17 @@ async function main() { console.log('\nšŸ’” Environment Variables:'); console.log(' - OPENAI_API_KEY: Your OpenAI API key (required)'); console.log(' - OPENAI_BASE_URL: Custom OpenAI API endpoint (optional)'); + console.log(' - VECTOR_DATABASE_PROVIDER: Vector database provider - "milvus" or "postgres" (default: milvus)'); console.log(' - MILVUS_ADDRESS: Milvus server address (default: localhost:19530)'); console.log(' - MILVUS_TOKEN: Milvus authentication token (optional)'); + console.log(' - MILVUS_USE_RESTFUL: Use Milvus REST API instead of gRPC (true/false, default: false)'); + console.log(' - POSTGRES_CONNECTION_STRING: PostgreSQL connection string (e.g., postgresql://user:pass@localhost:5432/db)'); + console.log(' - POSTGRES_HOST: PostgreSQL host (default: localhost)'); + console.log(' - POSTGRES_PORT: PostgreSQL port (default: 5432)'); + console.log(' - POSTGRES_DATABASE: PostgreSQL database name (default: postgres)'); + console.log(' - POSTGRES_USERNAME: PostgreSQL username (default: postgres)'); + console.log(' - POSTGRES_PASSWORD: PostgreSQL password'); + console.log(' - POSTGRES_SSL: Enable SSL connection (true/false, default: false)'); console.log(' - SPLITTER_TYPE: Code splitter type - "ast" or "langchain" (default: ast)'); } diff --git a/examples/basic-usage/package.json b/examples/basic-usage/package.json index 2e2bfe72..122c51d8 100644 --- a/examples/basic-usage/package.json +++ b/examples/basic-usage/package.json @@ -9,12 +9,14 @@ "dev": "tsx --watch index.ts" }, "dependencies": { - "@zilliz/claude-context-core": "workspace:*" + "@zilliz/claude-context-core": "workspace:*", + "pg": "^8.11.3" }, "devDependencies": { "tsx": "^4.0.0", "typescript": "^5.0.0", "@types/node": "^20.0.0", + "@types/pg": "^8.10.9", "dotenv": "^16.0.0" }, "private": true diff --git a/packages/core/package.json b/packages/core/package.json index 48b476ce..74845d2f 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -21,6 +21,7 @@ "langchain": "^0.3.27", "ollama": "^0.5.16", "openai": "^5.1.1", + "pg": "^8.11.3", "tree-sitter": "^0.21.1", "tree-sitter-cpp": "^0.22.0", "tree-sitter-go": "^0.21.0", @@ -38,6 +39,7 @@ "@types/fs-extra": "^11.0.0", "@types/jest": "^30.0.0", "@types/mock-fs": "^4.13.4", + "@types/pg": "^8.10.9", "jest": "^30.0.0", "mock-fs": "^5.5.0", "ts-jest": "^29.4.0" diff --git a/packages/core/src/context.ts b/packages/core/src/context.ts index 1af13058..81e377ab 100644 --- a/packages/core/src/context.ts +++ b/packages/core/src/context.ts @@ -18,6 +18,7 @@ import { } from './vectordb'; import { SemanticSearchResult } from './types'; import { envManager } from './utils/env-manager'; +import { EncryptionManager } from './utils/encryption'; import * as fs from 'fs'; import * as path from 'path'; import * as crypto from 'crypto'; @@ -103,12 +104,13 @@ export class Context { private supportedExtensions: string[]; private ignorePatterns: string[]; private synchronizers = new Map(); + private encryptionManager: EncryptionManager; constructor(config: ContextConfig = {}) { // Initialize services this.embedding = config.embedding || new OpenAIEmbedding({ apiKey: envManager.get('OPENAI_API_KEY') || 'your-openai-api-key', - model: 'text-embedding-3-small', + model: envManager.get('EMBEDDING_MODEL') || 'text-embedding-3-small', ...(envManager.get('OPENAI_BASE_URL') && { baseURL: envManager.get('OPENAI_BASE_URL') }) }); @@ -119,6 +121,15 @@ export class Context { this.codeSplitter = config.codeSplitter || new AstCodeSplitter(2500, 300); + // Initialize encryption manager based on hybrid mode + const isHybrid = this.getIsHybrid(); + const enableEncryption = !isHybrid; // Disable encryption if hybrid mode is enabled + this.encryptionManager = new EncryptionManager(enableEncryption); + + if (isHybrid && envManager.get('CONTEXT_ENCRYPTION_KEY')) { + console.log('[Context] āš ļø Content encryption disabled because hybrid mode is enabled (BM25 requires plain text)'); + } + // Load custom extensions from environment variables const envCustomExtensions = this.getCustomExtensionsFromEnv(); @@ -474,7 +485,7 @@ export class Context { // 4. Convert to semantic search result format const results: SemanticSearchResult[] = searchResults.map(result => ({ - content: result.document.content, + content: this.encryptionManager.decrypt(result.document.content), // Decrypt content after retrieval relativePath: result.document.relativePath, startLine: result.document.startLine, endLine: result.document.endLine, @@ -502,7 +513,7 @@ export class Context { // 3. Convert to semantic search result format const results: SemanticSearchResult[] = searchResults.map(result => ({ - content: result.document.content, + content: this.encryptionManager.decrypt(result.document.content), // Decrypt content after retrieval relativePath: result.document.relativePath, startLine: result.document.startLine, endLine: result.document.endLine, @@ -828,7 +839,7 @@ export class Context { return { id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content), - content: chunk.content, // Full text content for BM25 and storage + content: this.encryptionManager.encrypt(chunk.content), // Encrypt content before storage vector: embeddings[index].vector, // Dense vector relativePath, startLine: chunk.metadata.startLine || 0, @@ -859,7 +870,7 @@ export class Context { return { id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content), vector: embeddings[index].vector, - content: chunk.content, + content: this.encryptionManager.encrypt(chunk.content), // Encrypt content before storage relativePath, startLine: chunk.metadata.startLine || 0, endLine: chunk.metadata.endLine || 0, @@ -967,6 +978,8 @@ export class Context { } else { console.log('šŸ“„ No ignore files found, keeping existing patterns'); } + + console.log(`[Context] šŸ” Ignore patterns: ${this.ignorePatterns.join(', ')}`); } catch (error) { console.warn(`[Context] āš ļø Failed to load ignore patterns: ${error}`); // Continue with existing patterns on error - don't reset them diff --git a/packages/core/src/utils/encryption.ts b/packages/core/src/utils/encryption.ts new file mode 100644 index 00000000..079ffc70 --- /dev/null +++ b/packages/core/src/utils/encryption.ts @@ -0,0 +1,172 @@ +import * as crypto from 'crypto'; +import { envManager } from './env-manager'; + +/** + * Encryption utility class for content encryption/decryption + * Uses AES-256-CBC for content encryption + */ +export class EncryptionManager { + private static readonly ALGORITHM = 'aes-256-cbc'; + private static readonly KEY_LENGTH = 32; // 256 bits + private static readonly IV_LENGTH = 16; // 128 bits + + private encryptionKey: Buffer | null = null; + private isEncryptionEnabled: boolean = false; + + constructor(enableEncryption: boolean = true) { + this.initializeEncryption(enableEncryption); + } + + /** + * Initialize encryption based on environment variables and enablement flag + * @param enableEncryption Whether encryption should be enabled (controlled by caller) + */ + private initializeEncryption(enableEncryption: boolean): void { + const encryptionKey = envManager.get('CONTEXT_ENCRYPTION_KEY'); + + // If encryption is disabled by caller (e.g., due to hybrid mode), don't enable it + if (!enableEncryption) { + this.isEncryptionEnabled = false; + if (encryptionKey) { + console.log('[Encryption] āš ļø Content encryption disabled by system configuration'); + } else { + console.log('[Encryption] ā„¹ļø Content encryption disabled'); + } + return; + } + + if (encryptionKey) { + try { + // If key is provided as hex string, convert to buffer + if (encryptionKey.length === 64 && /^[0-9a-fA-F]+$/.test(encryptionKey)) { + this.encryptionKey = Buffer.from(encryptionKey, 'hex'); + } else { + // If key is provided as string, hash it to get consistent 32-byte key + this.encryptionKey = crypto.createHash('sha256').update(encryptionKey, 'utf-8').digest(); + } + + this.isEncryptionEnabled = true; + console.log('[Encryption] āœ… Content encryption enabled'); + } catch (error) { + console.warn('[Encryption] āš ļø Failed to initialize encryption key:', error); + this.isEncryptionEnabled = false; + } + } else { + this.isEncryptionEnabled = false; + console.log('[Encryption] ā„¹ļø Content encryption disabled (no CONTEXT_ENCRYPTION_KEY found)'); + } + } + + /** + * Check if encryption is enabled + */ + isEnabled(): boolean { + return this.isEncryptionEnabled; + } + + /** + * Encrypt content using AES-256-CBC + * @param content Content to encrypt + * @returns Encrypted content as base64 string with format: iv:encryptedData + */ + encrypt(content: string): string { + if (!this.isEncryptionEnabled || !this.encryptionKey) { + return content; // Return original content if encryption is disabled + } + + try { + // Generate random IV for each encryption + const iv = crypto.randomBytes(EncryptionManager.IV_LENGTH); + + // Create cipher using the modern API + const cipher = crypto.createCipheriv('aes-256-cbc', this.encryptionKey, iv); + + // Encrypt the content + let encrypted = cipher.update(content, 'utf8', 'hex'); + encrypted += cipher.final('hex'); + + // Combine iv and encrypted data + const result = iv.toString('hex') + ':' + encrypted; + + return Buffer.from(result).toString('base64'); + } catch (error) { + console.error('[Encryption] āŒ Failed to encrypt content:', error); + throw new Error('Failed to encrypt content'); + } + } + + /** + * Decrypt content using AES-256-CBC + * @param encryptedContent Encrypted content as base64 string + * @returns Decrypted content as string + */ + decrypt(encryptedContent: string): string { + if (!this.isEncryptionEnabled || !this.encryptionKey) { + return encryptedContent; // Return original content if encryption is disabled + } + + try { + // Parse the encrypted data + const data = Buffer.from(encryptedContent, 'base64').toString(); + const parts = data.split(':'); + + if (parts.length !== 2) { + throw new Error('Invalid encrypted data format'); + } + + // Extract IV and encrypted content + const iv = Buffer.from(parts[0], 'hex'); + const encrypted = parts[1]; + + // Create decipher using the modern API + const decipher = crypto.createDecipheriv('aes-256-cbc', this.encryptionKey, iv); + + // Decrypt the content + let decrypted = decipher.update(encrypted, 'hex', 'utf8'); + decrypted += decipher.final('utf8'); + + return decrypted; + } catch (error) { + console.error('[Encryption] āŒ Failed to decrypt content:', error); + throw new Error('Failed to decrypt content'); + } + } + + /** + * Generate a new encryption key + * @returns New encryption key as hex string + */ + static generateKey(): string { + return crypto.randomBytes(EncryptionManager.KEY_LENGTH).toString('hex'); + } + + /** + * Encrypt content in batch for performance + * @param contents Array of content strings to encrypt + * @returns Array of encrypted content strings + */ + encryptBatch(contents: string[]): string[] { + if (!this.isEncryptionEnabled) { + return contents; // Return original contents if encryption is disabled + } + + return contents.map(content => this.encrypt(content)); + } + + /** + * Decrypt content in batch for performance + * @param encryptedContents Array of encrypted content strings + * @returns Array of decrypted content strings + */ + decryptBatch(encryptedContents: string[]): string[] { + if (!this.isEncryptionEnabled) { + return encryptedContents; // Return original contents if encryption is disabled + } + + return encryptedContents.map(content => this.decrypt(content)); + } +} + +// Export a default instance for convenience (backward compatibility) +// Note: In practice, Context class should create its own instance with proper hybrid mode detection +export const encryptionManager = new EncryptionManager(); diff --git a/packages/core/src/utils/index.ts b/packages/core/src/utils/index.ts index 32cf1d40..b9ecd241 100644 --- a/packages/core/src/utils/index.ts +++ b/packages/core/src/utils/index.ts @@ -1 +1,2 @@ -export { EnvManager, envManager } from './env-manager'; \ No newline at end of file +export { EnvManager, envManager } from './env-manager'; +export { EncryptionManager, encryptionManager } from './encryption'; \ No newline at end of file diff --git a/packages/core/src/vectordb/index.ts b/packages/core/src/vectordb/index.ts index b4231c04..90ef1a30 100644 --- a/packages/core/src/vectordb/index.ts +++ b/packages/core/src/vectordb/index.ts @@ -14,6 +14,7 @@ export { // Implementation class exports export { MilvusRestfulVectorDatabase, MilvusRestfulConfig } from './milvus-restful-vectordb'; export { MilvusVectorDatabase, MilvusConfig } from './milvus-vectordb'; +export { PostgresVectorDatabase, PostgresConfig } from './postgres-vectordb'; export { ClusterManager, ZillizConfig, diff --git a/packages/core/src/vectordb/postgres-vectordb.ts b/packages/core/src/vectordb/postgres-vectordb.ts new file mode 100644 index 00000000..837fa865 --- /dev/null +++ b/packages/core/src/vectordb/postgres-vectordb.ts @@ -0,0 +1,883 @@ +import { Pool, PoolConfig } from 'pg'; +import { + VectorDocument, + SearchOptions, + VectorSearchResult, + VectorDatabase, + HybridSearchRequest, + HybridSearchOptions, + HybridSearchResult, +} from './types'; + +export interface PostgresConfig { + connectionString?: string; + host?: string; + port?: number; + database?: string; + username?: string; + password?: string; + ssl?: boolean; + maxConnections?: number; + batchSize?: number; // Number of documents to insert in each batch (default: 100) +} + +/** + * PostgreSQL Vector Database implementation using pgvector extension + * This implementation provides vector storage and similarity search using PostgreSQL with pgvector + * Uses separate tables for each collection (codebase) for better data isolation and performance + */ +export class PostgresVectorDatabase implements VectorDatabase { + protected config: PostgresConfig; + private pool: Pool | null = null; + protected initializationPromise: Promise; + + constructor(config: PostgresConfig) { + this.config = config; + this.initializationPromise = this.initialize(); + } + + private async initialize(): Promise { + await this.initializeClient(); + } + + private async initializeClient(): Promise { + let poolConfig: PoolConfig; + + if (this.config.connectionString) { + poolConfig = { + connectionString: this.config.connectionString, + max: this.config.maxConnections || 10, + }; + } else { + poolConfig = { + host: this.config.host || 'localhost', + port: this.config.port || 5432, + database: this.config.database || 'postgres', + user: this.config.username || 'postgres', + password: this.config.password, + ssl: this.config.ssl || false, + max: this.config.maxConnections || 10, + }; + } + + console.log('šŸ”Œ Connecting to PostgreSQL vector database...'); + this.pool = new Pool(poolConfig); + + // Test connection and ensure pgvector extension is available + await this.ensurePgvectorExtension(); + } + + private async ensurePgvectorExtension(): Promise { + if (!this.pool) { + throw new Error('PostgreSQL pool not initialized'); + } + + try { + // Check if vectors extension exists (pgvector 0.6.0+ with optimized indexing) + let result = await this.pool.query( + "SELECT 1 FROM pg_extension WHERE extname = 'vectors'" + ); + + if (result.rows.length === 0) { + // Fallback to check for 'vector' extension (older pgvector versions) + result = await this.pool.query( + "SELECT 1 FROM pg_extension WHERE extname = 'vector'" + ); + + if (result.rows.length === 0) { + console.error('āŒ pgvector extension is not installed in this PostgreSQL database'); + throw new Error(`pgvector extension is required but not installed. Please install it first`); + } else { + console.log('āœ… pgvector extension (vector) is available'); + } + } else { + console.log('āœ… pgvector extension (vectors) is available'); + } + } catch (error) { + if (error instanceof Error && error.message.includes('pgvector extension is required')) { + throw error; // Re-throw our custom error with installation instructions + } + console.error('āŒ Failed to check pgvector extension:', error); + throw new Error(`Failed to verify pgvector extension. Please ensure: +1. PostgreSQL is accessible +2. You have proper permissions +3. pgvector extension is installed and enabled`); + } + } + + protected async ensureInitialized(): Promise { + await this.initializationPromise; + if (!this.pool) { + throw new Error('PostgreSQL pool not initialized'); + } + } + + + + private getTableName(collectionName: string): string { + return collectionName.toLowerCase(); + } + + private async tableExists(tableName: string): Promise { + try { + const result = await this.pool!.query( + 'SELECT 1 FROM information_schema.tables WHERE table_name = $1 AND table_schema = $2', + [tableName, 'public'] + ); + return result.rows.length > 0; + } catch (error) { + console.error('āŒ Failed to check table existence:', error); + return false; + } + } + + async createCollection(collectionName: string, dimension: number, description?: string): Promise { + await this.ensureInitialized(); + const tableName = this.getTableName(collectionName); + + try { + // Create collection-specific table + const createTableQuery = ` + CREATE TABLE IF NOT EXISTS ${tableName} ( + id TEXT PRIMARY KEY, + vector vector(${dimension}), + content TEXT NOT NULL, + relative_path TEXT NOT NULL, + start_line INTEGER NOT NULL, + end_line INTEGER NOT NULL, + file_extension TEXT NOT NULL, + metadata text DEFAULT '{}'::text, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP + ) + `; + + await this.pool!.query(createTableQuery); + + // Create vector similarity index using vectors extension with HNSW (with fallback to older syntax) + const vectorIndexName = `${tableName}_vector_idx`; + + try { + // Try newer vectors extension syntax first + const createIndexQuery = ` + CREATE INDEX IF NOT EXISTS ${vectorIndexName} + ON ${tableName} + USING vectors (vector vector_cos_ops) + WITH (options = $$ +[indexing.hnsw] +m = 30 +ef_construction = 500 +$$) + `; + await this.pool!.query(createIndexQuery); + console.log(`āœ… Created HNSW index using 'vectors' extension for ${tableName}`); + } catch (error) { + console.log(`āš ļø Failed to create index with 'vectors' extension, falling back to 'vector' extension...`); + // Fallback to older vector extension syntax + const fallbackIndexQuery = ` + CREATE INDEX IF NOT EXISTS ${vectorIndexName} + ON ${tableName} + USING hnsw (vector vector_cosine_ops) + WITH (m = 30, ef_construction = 500) + `; + await this.pool!.query(fallbackIndexQuery); + console.log(`āœ… Created HNSW index using 'vector' extension for ${tableName}`); + } + + // Create additional indexes for common query patterns + await this.pool!.query(`CREATE INDEX IF NOT EXISTS ${tableName}_path_idx ON ${tableName} (relative_path)`); + await this.pool!.query(`CREATE INDEX IF NOT EXISTS ${tableName}_ext_idx ON ${tableName} (file_extension)`); + + console.log(`āœ… PostgreSQL collection '${collectionName}' created successfully with vector dimension ${dimension}`); + } catch (error) { + console.error(`āŒ Failed to create collection '${collectionName}':`, error); + throw error; + } + } + + async createHybridCollection(collectionName: string, dimension: number, description?: string): Promise { + await this.ensureInitialized(); + const tableName = this.getTableName(collectionName); + + console.log(`[PostgresDB] šŸ“ Creating hybrid collection '${collectionName}' with full-text search support`); + + try { + // Create collection-specific table with full-text search support + const createTableQuery = ` + CREATE TABLE IF NOT EXISTS ${tableName} ( + id TEXT PRIMARY KEY, + vector vector(${dimension}), + content TEXT NOT NULL, + relative_path TEXT NOT NULL, + start_line INTEGER NOT NULL, + end_line INTEGER NOT NULL, + file_extension TEXT NOT NULL, + metadata text DEFAULT '{}'::text, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + -- Add full-text search vector column + content_tsvector tsvector + ) + `; + + await this.pool!.query(createTableQuery); + + // Create vector similarity index using vectors extension with HNSW (with fallback to older syntax) + const vectorIndexName = `${tableName}_vector_idx`; + + try { + // Try newer vectors extension syntax first + const createIndexQuery = ` + CREATE INDEX IF NOT EXISTS ${vectorIndexName} + ON ${tableName} + USING vectors (vector vector_cos_ops) + WITH (options = $$ +[indexing.hnsw] +m = 30 +ef_construction = 500 +$$) + `; + await this.pool!.query(createIndexQuery); + console.log(`āœ… Created HNSW index using 'vectors' extension for ${tableName}`); + } catch (error) { + console.log(`āš ļø Failed to create index with 'vectors' extension, falling back to 'vector' extension...`); + // Fallback to older vector extension syntax + const fallbackIndexQuery = ` + CREATE INDEX IF NOT EXISTS ${vectorIndexName} + ON ${tableName} + USING hnsw (vector vector_cosine_ops) + WITH (m = 30, ef_construction = 500) + `; + await this.pool!.query(fallbackIndexQuery); + console.log(`āœ… Created HNSW index using 'vector' extension for ${tableName}`); + } + + // Create full-text search index using GIN + const fulltextIndexName = `${tableName}_fulltext_idx`; + await this.pool!.query(` + CREATE INDEX IF NOT EXISTS ${fulltextIndexName} + ON ${tableName} + USING gin(content_tsvector) + `); + + // Note: No trigger needed - tsvector is populated directly during insert/update operations + + // Create additional indexes for common query patterns + await this.pool!.query(`CREATE INDEX IF NOT EXISTS ${tableName}_path_idx ON ${tableName} (relative_path)`); + await this.pool!.query(`CREATE INDEX IF NOT EXISTS ${tableName}_ext_idx ON ${tableName} (file_extension)`); + + // Update existing records to populate tsvector column if any exist + await this.pool!.query(` + UPDATE ${tableName} + SET content_tsvector = to_tsvector('english', content) + WHERE content_tsvector IS NULL + `); + + console.log(`āœ… PostgreSQL hybrid collection '${collectionName}' created successfully with vector dimension ${dimension} and full-text search support`); + } catch (error) { + console.error(`āŒ Failed to create hybrid collection '${collectionName}':`, error); + throw error; + } + } + + async dropCollection(collectionName: string): Promise { + await this.ensureInitialized(); + const tableName = this.getTableName(collectionName); + + try { + // Drop the entire collection table + const dropQuery = `DROP TABLE IF EXISTS ${tableName}`; + await this.pool!.query(dropQuery); + console.log(`āœ… PostgreSQL collection '${collectionName}' dropped successfully (table ${tableName} removed)`); + } catch (error) { + console.error(`āŒ Failed to drop collection '${collectionName}':`, error); + throw error; + } + } + + async hasCollection(collectionName: string): Promise { + await this.ensureInitialized(); + const tableName = this.getTableName(collectionName); + + try { + // Check if the collection table exists + return await this.tableExists(tableName); + } catch (error) { + console.error(`āŒ Failed to check collection existence '${collectionName}':`, error); + return false; + } + } + + async listCollections(): Promise { + await this.ensureInitialized(); + + try { + // Get all tables that have vector columns (our vector database tables) + const result = await this.pool!.query(` + SELECT t.table_name + FROM information_schema.tables t + JOIN information_schema.columns c ON t.table_name = c.table_name + WHERE t.table_schema = 'public' + AND c.table_schema = 'public' + AND c.data_type = 'USER-DEFINED' + AND c.udt_name = 'vector' + ORDER BY t.table_name + `); + + return result.rows.map(row => row.table_name); + } catch (error) { + console.error('āŒ Failed to list collections:', error); + throw error; + } + } + + async insert(collectionName: string, documents: VectorDocument[]): Promise { + await this.ensureInitialized(); + const tableName = this.getTableName(collectionName); + + if (documents.length === 0) { + return; + } + + // Deduplicate documents by id (keep the last occurrence) + // This is necessary as we batch insert the documents using on conflict do update, which means we need to ensure the id is unique in the batch. + const deduplicatedDocs = Array.from( + new Map(documents.map(doc => [doc.id, doc])).values() + ); + + const originalCount = documents.length; + const deduplicatedCount = deduplicatedDocs.length; + + if (originalCount !== deduplicatedCount) { + console.log(`[PostgresDB] šŸ”„ Deduplicated ${originalCount - deduplicatedCount} duplicate documents (${originalCount} → ${deduplicatedCount})`); + } + + const startTime = Date.now(); + console.log(`[PostgresDB] šŸ“ Starting to insert ${deduplicatedCount} documents into collection '${collectionName}'...`); + + try { + const client = await this.pool!.connect(); + try { + await client.query('BEGIN'); + + // Batch insert using VALUES with multiple rows + // PostgreSQL parameter limit is ~65535, with 8 params per doc, max ~8000 docs per batch + const batchSize = Math.min(this.config.batchSize || 100, 1000); // Cap at 1000 for safety + for (let i = 0; i < deduplicatedDocs.length; i += batchSize) { + const batchStartTime = Date.now(); + const batch = deduplicatedDocs.slice(i, i + batchSize); + + // Build VALUES clause with placeholders and explicit type casting + const valuesClauses: string[] = []; + const allParams: any[] = []; // No collection name param needed + + batch.forEach((doc, index) => { + const baseIndex = index * 8 + 1; // Each doc uses 8 params, starting from $1 + valuesClauses.push(`($${baseIndex}::text, $${baseIndex + 1}::vector, $${baseIndex + 2}::text, $${baseIndex + 3}::text, $${baseIndex + 4}::integer, $${baseIndex + 5}::integer, $${baseIndex + 6}::text, $${baseIndex + 7}::text)`); + + allParams.push( + doc.id, // $baseIndex + `[${doc.vector.join(',')}]`, // $baseIndex + 1 + doc.content, // $baseIndex + 2 + doc.relativePath, // $baseIndex + 3 + doc.startLine, // $baseIndex + 4 + doc.endLine, // $baseIndex + 5 + doc.fileExtension, // $baseIndex + 6 + JSON.stringify(doc.metadata) // $baseIndex + 7 + ); + }); + + const batchInsertQuery = ` + INSERT INTO ${tableName} + (id, vector, content, relative_path, start_line, end_line, file_extension, metadata) + VALUES ${valuesClauses.join(', ')} + ON CONFLICT (id) DO UPDATE SET + vector = EXCLUDED.vector, + content = EXCLUDED.content, + relative_path = EXCLUDED.relative_path, + start_line = EXCLUDED.start_line, + end_line = EXCLUDED.end_line, + file_extension = EXCLUDED.file_extension, + metadata = EXCLUDED.metadata + `; + + await client.query(batchInsertQuery, allParams); + const batchDuration = Date.now() - batchStartTime; + console.log(`āœ… Batch inserted ${batch.length} documents (${i + 1}-${Math.min(i + batchSize, deduplicatedDocs.length)} of ${deduplicatedDocs.length}) in ${batchDuration}ms`); + } + + await client.query('COMMIT'); + const totalDuration = Date.now() - startTime; + const docsPerSecond = Math.round((deduplicatedDocs.length / totalDuration) * 1000); + console.log(`āœ… Successfully inserted ${deduplicatedDocs.length} documents into PostgreSQL collection '${collectionName}' in ${totalDuration}ms (${docsPerSecond} docs/sec)`); + } catch (error) { + await client.query('ROLLBACK'); + throw error; + } finally { + client.release(); + } + } catch (error) { + console.error(`āŒ Failed to insert documents into collection '${collectionName}':`, error); + throw error; + } + } + + async insertHybrid(collectionName: string, documents: VectorDocument[]): Promise { + await this.ensureInitialized(); + const tableName = this.getTableName(collectionName); + + if (documents.length === 0) { + return; + } + + // Deduplicate documents by id (keep the last occurrence) + const deduplicatedDocs = Array.from( + new Map(documents.map(doc => [doc.id, doc])).values() + ); + + const originalCount = documents.length; + const deduplicatedCount = deduplicatedDocs.length; + + if (originalCount !== deduplicatedCount) { + console.log(`[PostgresDB] šŸ”„ Deduplicated ${originalCount - deduplicatedCount} duplicate documents (${originalCount} → ${deduplicatedCount})`); + } + + const startTime = Date.now(); + console.log(`[PostgresDB] šŸ“ Starting to insert ${deduplicatedCount} documents for hybrid collection '${collectionName}' with full-text indexing...`); + + try { + const client = await this.pool!.connect(); + try { + await client.query('BEGIN'); + + // Batch insert using VALUES with multiple rows + // PostgreSQL parameter limit is ~65535, with 9 params per doc, max ~7000 docs per batch + const batchSize = Math.min(this.config.batchSize || 100, 1000); // Cap at 1000 for safety + for (let i = 0; i < deduplicatedDocs.length; i += batchSize) { + const batchStartTime = Date.now(); + const batch = deduplicatedDocs.slice(i, i + batchSize); + + // Build VALUES clause with placeholders and explicit type casting + const valuesClauses: string[] = []; + const allParams: any[] = []; // No collection name param needed + + batch.forEach((doc, index) => { + const baseIndex = index * 9 + 1; // Each doc uses 9 params, starting from $1 + valuesClauses.push(`($${baseIndex}::text, $${baseIndex + 1}::vector, $${baseIndex + 2}::text, $${baseIndex + 3}::text, $${baseIndex + 4}::integer, $${baseIndex + 5}::integer, $${baseIndex + 6}::text, $${baseIndex + 7}::text, to_tsvector('english', $${baseIndex + 2}))`); + allParams.push( + doc.id, // $baseIndex + `[${doc.vector.join(',')}]`, // $baseIndex + 1 + doc.content, // $baseIndex + 2 + doc.relativePath, // $baseIndex + 3 + doc.startLine, // $baseIndex + 4 + doc.endLine, // $baseIndex + 5 + doc.fileExtension, // $baseIndex + 6 + JSON.stringify(doc.metadata) // $baseIndex + 7 + // $baseIndex + 8 is the tsvector generated inline + ); + }); + + const batchInsertQuery = ` + INSERT INTO ${tableName} + (id, vector, content, relative_path, start_line, end_line, file_extension, metadata, content_tsvector) + VALUES ${valuesClauses.join(', ')} + ON CONFLICT (id) DO UPDATE SET + vector = EXCLUDED.vector, + content = EXCLUDED.content, + relative_path = EXCLUDED.relative_path, + start_line = EXCLUDED.start_line, + end_line = EXCLUDED.end_line, + file_extension = EXCLUDED.file_extension, + metadata = EXCLUDED.metadata, + content_tsvector = to_tsvector('english', EXCLUDED.content) + `; + + await client.query(batchInsertQuery, allParams); + const batchDuration = Date.now() - batchStartTime; + console.log(`āœ… Batch inserted ${batch.length} hybrid documents (${i + 1}-${Math.min(i + batchSize, deduplicatedDocs.length)} of ${deduplicatedDocs.length}) in ${batchDuration}ms`); + } + + await client.query('COMMIT'); + const totalDuration = Date.now() - startTime; + const docsPerSecond = Math.round((deduplicatedDocs.length / totalDuration) * 1000); + console.log(`āœ… Successfully inserted ${deduplicatedDocs.length} documents into PostgreSQL hybrid collection '${collectionName}' with full-text indexing in ${totalDuration}ms (${docsPerSecond} docs/sec)`); + } catch (error) { + await client.query('ROLLBACK'); + throw error; + } finally { + client.release(); + } + } catch (error) { + console.error(`āŒ Failed to insert hybrid documents into collection '${collectionName}':`, error); + throw error; + } + } + + async search(collectionName: string, queryVector: number[], options?: SearchOptions): Promise { + await this.ensureInitialized(); + + const topK = options?.topK || 10; + const threshold = options?.threshold || 0.0; + + try { + const tableName = this.getTableName(collectionName); + let whereClause = ''; + const queryParams: any[] = [`[${queryVector.join(',')}]`, topK]; + let paramIndex = 3; + + // Add additional filter conditions if provided + if (options?.filterExpr) { + whereClause = whereClause ? `${whereClause} AND (${options.filterExpr})` : `WHERE (${options.filterExpr})`; + } + + // Add threshold filter to WHERE clause + if (threshold > 0) { + const thresholdCondition = `1 - (vector <=> $1) >= $${paramIndex++}`; + whereClause = whereClause ? `${whereClause} AND ${thresholdCondition}` : `WHERE ${thresholdCondition}`; + queryParams.push(threshold); + } + + const searchQuery = ` + SELECT + id, + content, + relative_path, + start_line, + end_line, + file_extension, + metadata, + 1 - (vector <=> $1) AS score + FROM ${tableName} + ${whereClause} + ORDER BY vector <=> $1 + LIMIT $2 + `; + + const result = await this.pool!.query(searchQuery, queryParams); + + return result.rows.map((row: any) => ({ + document: { + id: row.id, + vector: queryVector, + content: row.content, + relativePath: row.relative_path, + startLine: row.start_line, + endLine: row.end_line, + fileExtension: row.file_extension, + metadata: typeof row.metadata === 'string' ? JSON.parse(row.metadata) : row.metadata, + }, + score: parseFloat(row.score) + })); + } catch (error) { + console.error(`āŒ Failed to search collection '${collectionName}':`, error); + throw error; + } + } + + async hybridSearch(collectionName: string, searchRequests: HybridSearchRequest[], options?: HybridSearchOptions): Promise { + await this.ensureInitialized(); + + const limit = options?.limit || 10; + const rerankStrategy = options?.rerank?.strategy || 'rrf'; + const rrfK = options?.rerank?.params?.k || 60; + const tableName = this.getTableName(collectionName); + + console.log(`[PostgresDB] šŸ” Performing hybrid search on collection '${collectionName}' with ${searchRequests.length} search requests`); + + try { + // Find dense vector search request + const denseRequest = searchRequests.find(req => req.anns_field === 'vector'); + // Find sparse/text search request + const sparseRequest = searchRequests.find(req => req.anns_field === 'sparse_vector'); + + if (!denseRequest) { + throw new Error('Dense vector search request is required for hybrid search'); + } + + let hybridQuery: string; + let queryParams: any[]; + + if (sparseRequest && typeof sparseRequest.data === 'string') { + console.log(`[PostgresDB] šŸ” Hybrid search: vector + full-text search using ${rerankStrategy} reranking`); + + // Enhanced hybrid search with better full-text search and reranking + if (rerankStrategy === 'rrf') { + // RRF (Reciprocal Rank Fusion) reranking + hybridQuery = ` + WITH vector_search AS ( + SELECT + id, content, relative_path, start_line, end_line, file_extension, metadata, + 1 - (vector <=> $1) AS vector_score, + ROW_NUMBER() OVER (ORDER BY vector <=> $1) AS vector_rank + FROM ${tableName} + ORDER BY vector <=> $1 + LIMIT $2 + ), + text_search AS ( + SELECT + id, content, relative_path, start_line, end_line, file_extension, metadata, + ts_rank_cd(content_tsvector, plainto_tsquery('english', $3), 32) AS text_score, + ROW_NUMBER() OVER (ORDER BY ts_rank_cd(content_tsvector, plainto_tsquery('english', $3), 32) DESC) AS text_rank + FROM ${tableName} + WHERE content_tsvector @@ plainto_tsquery('english', $3) + ORDER BY ts_rank_cd(content_tsvector, plainto_tsquery('english', $3), 32) DESC + LIMIT $2 + ), + combined AS ( + SELECT + COALESCE(v.id, t.id) as id, + COALESCE(v.content, t.content) as content, + COALESCE(v.relative_path, t.relative_path) as relative_path, + COALESCE(v.start_line, t.start_line) as start_line, + COALESCE(v.end_line, t.end_line) as end_line, + COALESCE(v.file_extension, t.file_extension) as file_extension, + COALESCE(v.metadata, t.metadata) as metadata, + COALESCE(v.vector_score, 0) as vector_score, + COALESCE(t.text_score, 0) as text_score, + COALESCE(v.vector_rank, 999999) as vector_rank, + COALESCE(t.text_rank, 999999) as text_rank, + -- RRF Score: 1/(k + rank) for each ranking + (1.0 / ($4 + COALESCE(v.vector_rank, 999999))) + + (1.0 / ($4 + COALESCE(t.text_rank, 999999))) as rrf_score + FROM vector_search v + FULL OUTER JOIN text_search t ON v.id = t.id + ) + SELECT + id, content, relative_path, start_line, end_line, file_extension, metadata, + vector_score, text_score, rrf_score as final_score + FROM combined + ORDER BY rrf_score DESC + LIMIT $5 + `; + queryParams = [ + `[${(denseRequest.data as number[]).join(',')}]`, + Math.max(denseRequest.limit, sparseRequest.limit), + sparseRequest.data as string, + rrfK, + limit + ]; + } else { + // Weighted reranking + const vectorWeight = options?.rerank?.params?.vector_weight || 0.7; + const textWeight = options?.rerank?.params?.text_weight || 0.3; + + hybridQuery = ` + WITH vector_search AS ( + SELECT + id, content, relative_path, start_line, end_line, file_extension, metadata, + 1 - (vector <=> $1) AS vector_score + FROM ${tableName} + ORDER BY vector <=> $1 + LIMIT $2 + ), + text_search AS ( + SELECT + id, content, relative_path, start_line, end_line, file_extension, metadata, + ts_rank_cd(content_tsvector, plainto_tsquery('english', $3), 32) AS text_score + FROM ${tableName} + WHERE content_tsvector @@ plainto_tsquery('english', $3) + ORDER BY ts_rank_cd(content_tsvector, plainto_tsquery('english', $3), 32) DESC + LIMIT $2 + ), + combined AS ( + SELECT + COALESCE(v.id, t.id) as id, + COALESCE(v.content, t.content) as content, + COALESCE(v.relative_path, t.relative_path) as relative_path, + COALESCE(v.start_line, t.start_line) as start_line, + COALESCE(v.end_line, t.end_line) as end_line, + COALESCE(v.file_extension, t.file_extension) as file_extension, + COALESCE(v.metadata, t.metadata) as metadata, + COALESCE(v.vector_score, 0) as vector_score, + COALESCE(t.text_score, 0) as text_score, + -- Weighted combination + COALESCE(v.vector_score, 0) * $4 + COALESCE(t.text_score, 0) * $5 as weighted_score + FROM vector_search v + FULL OUTER JOIN text_search t ON v.id = t.id + ) + SELECT + id, content, relative_path, start_line, end_line, file_extension, metadata, + vector_score, text_score, weighted_score as final_score + FROM combined + ORDER BY weighted_score DESC + LIMIT $6 + `; + queryParams = [ + `[${(denseRequest.data as number[]).join(',')}]`, + Math.max(denseRequest.limit, sparseRequest.limit), + sparseRequest.data as string, + vectorWeight, + textWeight, + limit + ]; + } + } else { + console.log(`[PostgresDB] šŸ” Vector-only search (no text query provided)`); + // Fallback to dense vector search only + hybridQuery = ` + SELECT + id, content, relative_path, start_line, end_line, file_extension, metadata, + 1 - (vector <=> $1) AS final_score + FROM ${tableName} + ORDER BY vector <=> $1 + LIMIT $2 + `; + queryParams = [ + `[${(denseRequest.data as number[]).join(',')}]`, + limit + ]; + } + + // Add filter expression if provided + if (options?.filterExpr && options.filterExpr.trim()) { + // For CTE queries, add WHERE clause if not present, or extend existing WHERE + if (hybridQuery.includes('WHERE content_tsvector')) { + hybridQuery = hybridQuery.replace( + /WHERE content_tsvector/g, + `WHERE (${options.filterExpr}) AND content_tsvector` + ); + } else { + // For simple queries or queries without WHERE clauses, add WHERE + hybridQuery = hybridQuery.replace( + /ORDER BY/g, + `WHERE (${options.filterExpr}) ORDER BY` + ); + } + } + + console.log(`[PostgresDB] šŸ” Executing hybrid search query with ${queryParams.length} parameters`); + const result = await this.pool!.query(hybridQuery, queryParams); + + console.log(`[PostgresDB] āœ… Hybrid search completed: ${result.rows.length} results found`); + + return result.rows.map((row: any) => ({ + document: { + id: row.id, + vector: denseRequest.data as number[], + content: row.content, + relativePath: row.relative_path, + startLine: row.start_line, + endLine: row.end_line, + fileExtension: row.file_extension, + metadata: typeof row.metadata === 'string' ? JSON.parse(row.metadata) : row.metadata, + }, + score: parseFloat(row.final_score) + })); + } catch (error) { + console.error(`āŒ Failed to perform hybrid search on collection '${collectionName}':`, error); + throw error; + } + } + + async delete(collectionName: string, ids: string[]): Promise { + await this.ensureInitialized(); + + const tableName = this.getTableName(collectionName); + + if (ids.length === 0) { + return; + } + + try { + const placeholders = ids.map((_, index) => `$${index + 1}`).join(', '); + const deleteQuery = `DELETE FROM ${tableName} WHERE id IN (${placeholders})`; + + const result = await this.pool!.query(deleteQuery, ids); + console.log(`āœ… Deleted ${result.rowCount} documents from PostgreSQL collection '${collectionName}'`); + } catch (error) { + console.error(`āŒ Failed to delete documents from collection '${collectionName}':`, error); + throw error; + } + } + + async query(collectionName: string, filter: string, outputFields: string[], limit?: number): Promise[]> { + await this.ensureInitialized(); + + const queryLimit = limit || 100; + + try { + // Map output fields to database columns + const fieldMapping: Record = { + 'id': 'id', + 'content': 'content', + 'relativePath': 'relative_path', + 'startLine': 'start_line', + 'endLine': 'end_line', + 'fileExtension': 'file_extension', + 'metadata': 'metadata' + }; + + const dbFields = outputFields.map(field => fieldMapping[field] || field); + const selectClause = dbFields.length > 0 ? dbFields.join(', ') : '*'; + + const tableName = this.getTableName(collectionName); + let queryText = `SELECT ${selectClause} FROM ${tableName}`; + const queryParams: any[] = []; + + if (filter && filter.trim()) { + queryText += ` WHERE (${filter})`; + } + + queryText += ` LIMIT $${queryParams.length + 1}`; + queryParams.push(queryLimit); + + const result = await this.pool!.query(queryText, queryParams); + + return result.rows.map(row => { + const mapped: Record = {}; + for (const [originalField, dbField] of Object.entries(fieldMapping)) { + if (outputFields.includes(originalField) && row[dbField] !== undefined) { + mapped[originalField] = row[dbField]; + } + } + return mapped; + }); + } catch (error) { + console.error(`āŒ Failed to query collection '${collectionName}':`, error); + throw error; + } + } + + async checkCollectionLimit(): Promise { + await this.ensureInitialized(); + + try { + // PostgreSQL doesn't have collection limits like cloud services + // Check if we can create a test table (basic connectivity test) + const testTableName = 'claude_context_limit_test_' + Date.now(); + await this.pool!.query(`CREATE TEMP TABLE ${testTableName} (id TEXT)`); + await this.pool!.query(`DROP TABLE ${testTableName}`); + return true; + } catch (error) { + console.error('āŒ Failed to check collection limit:', error); + return false; + } + } + + async getCollectionStats(collectionName: string): Promise<{ entityCount: number }> { + await this.ensureInitialized(); + const tableName = this.getTableName(collectionName); + + try { + const result = await this.pool!.query( + `SELECT COUNT(*) as count FROM ${tableName}` + ); + return { + entityCount: parseInt(result.rows[0].count) + }; + } catch (error) { + console.error(`āŒ Failed to get collection stats for '${collectionName}':`, error); + throw error; + } + } + + + /** + * Clean up resources + */ + async close(): Promise { + if (this.pool) { + await this.pool.end(); + this.pool = null; + console.log('šŸ”Œ PostgreSQL connection pool closed'); + } + } +} diff --git a/packages/mcp/package.json b/packages/mcp/package.json index 40f65e94..9f0cb586 100644 --- a/packages/mcp/package.json +++ b/packages/mcp/package.json @@ -20,10 +20,12 @@ "dependencies": { "@zilliz/claude-context-core": "workspace:*", "@modelcontextprotocol/sdk": "^1.12.1", + "pg": "^8.11.3", "zod": "^3.25.55" }, "devDependencies": { "@types/node": "^20.0.0", + "@types/pg": "^8.10.9", "tsx": "^4.19.4", "typescript": "^5.0.0" }, diff --git a/packages/mcp/src/config.ts b/packages/mcp/src/config.ts index 428f9474..d390c567 100644 --- a/packages/mcp/src/config.ts +++ b/packages/mcp/src/config.ts @@ -16,8 +16,17 @@ export interface ContextMcpConfig { ollamaModel?: string; ollamaHost?: string; // Vector database configuration + vectorDatabaseProvider: 'milvus' | 'postgres'; milvusAddress?: string; // Optional, can be auto-resolved from token milvusToken?: string; + // PostgreSQL configuration + postgresConnectionString?: string; + postgresHost?: string; + postgresPort?: number; + postgresDatabase?: string; + postgresUsername?: string; + postgresPassword?: string; + postgresSSL?: boolean; } // Legacy format (v1) - for backward compatibility @@ -110,7 +119,9 @@ export function createMcpConfig(): ContextMcpConfig { console.log(`[DEBUG] OLLAMA_MODEL: ${envManager.get('OLLAMA_MODEL') || 'NOT SET'}`); console.log(`[DEBUG] GEMINI_API_KEY: ${envManager.get('GEMINI_API_KEY') ? 'SET (length: ' + envManager.get('GEMINI_API_KEY')!.length + ')' : 'NOT SET'}`); console.log(`[DEBUG] OPENAI_API_KEY: ${envManager.get('OPENAI_API_KEY') ? 'SET (length: ' + envManager.get('OPENAI_API_KEY')!.length + ')' : 'NOT SET'}`); + console.log(`[DEBUG] VECTOR_DATABASE_PROVIDER: ${envManager.get('VECTOR_DATABASE_PROVIDER') || 'NOT SET'}`); console.log(`[DEBUG] MILVUS_ADDRESS: ${envManager.get('MILVUS_ADDRESS') || 'NOT SET'}`); + console.log(`[DEBUG] POSTGRES_CONNECTION_STRING: ${envManager.get('POSTGRES_CONNECTION_STRING') ? 'SET' : 'NOT SET'}`); console.log(`[DEBUG] NODE_ENV: ${envManager.get('NODE_ENV') || 'NOT SET'}`); const config: ContextMcpConfig = { @@ -128,9 +139,18 @@ export function createMcpConfig(): ContextMcpConfig { // Ollama configuration ollamaModel: envManager.get('OLLAMA_MODEL'), ollamaHost: envManager.get('OLLAMA_HOST'), - // Vector database configuration - address can be auto-resolved from token + // Vector database configuration + vectorDatabaseProvider: (envManager.get('VECTOR_DATABASE_PROVIDER') as 'milvus' | 'postgres') || 'milvus', milvusAddress: envManager.get('MILVUS_ADDRESS'), // Optional, can be resolved from token - milvusToken: envManager.get('MILVUS_TOKEN') + milvusToken: envManager.get('MILVUS_TOKEN'), + // PostgreSQL configuration + postgresConnectionString: envManager.get('POSTGRES_CONNECTION_STRING'), + postgresHost: envManager.get('POSTGRES_HOST'), + postgresPort: envManager.get('POSTGRES_PORT') ? parseInt(envManager.get('POSTGRES_PORT')!) : undefined, + postgresDatabase: envManager.get('POSTGRES_DATABASE'), + postgresUsername: envManager.get('POSTGRES_USERNAME'), + postgresPassword: envManager.get('POSTGRES_PASSWORD'), + postgresSSL: envManager.get('POSTGRES_SSL') === 'true' }; return config; @@ -143,7 +163,15 @@ export function logConfigurationSummary(config: ContextMcpConfig): void { console.log(`[MCP] Server: ${config.name} v${config.version}`); console.log(`[MCP] Embedding Provider: ${config.embeddingProvider}`); console.log(`[MCP] Embedding Model: ${config.embeddingModel}`); - console.log(`[MCP] Milvus Address: ${config.milvusAddress || (config.milvusToken ? '[Auto-resolve from token]' : '[Not configured]')}`); + console.log(`[MCP] Vector Database Provider: ${config.vectorDatabaseProvider}`); + + // Log vector database configuration + if (config.vectorDatabaseProvider === 'postgres') { + console.log(`[MCP] PostgreSQL Connection: ${config.postgresConnectionString ? 'āœ… Connection string configured' : + (config.postgresHost ? `${config.postgresHost}:${config.postgresPort || 5432}/${config.postgresDatabase || 'postgres'}` : 'āŒ Not configured')}`); + } else { + console.log(`[MCP] Milvus Address: ${config.milvusAddress || (config.milvusToken ? '[Auto-resolve from token]' : '[Not configured]')}`); + } // Log provider-specific configuration without exposing sensitive data switch (config.embeddingProvider) { @@ -200,13 +228,28 @@ Environment Variables: OLLAMA_MODEL Ollama model name (alternative to EMBEDDING_MODEL for Ollama) Vector Database Configuration: + VECTOR_DATABASE_PROVIDER Vector database provider: milvus, postgres (default: milvus) + + Milvus Configuration: MILVUS_ADDRESS Milvus address (optional, can be auto-resolved from token) MILVUS_TOKEN Milvus token (optional, used for authentication and address resolution) + + PostgreSQL Configuration: + POSTGRES_CONNECTION_STRING PostgreSQL connection string (e.g., postgresql://user:pass@localhost:5432/db) + POSTGRES_HOST PostgreSQL host (default: localhost) + POSTGRES_PORT PostgreSQL port (default: 5432) + POSTGRES_DATABASE PostgreSQL database name (default: postgres) + POSTGRES_USERNAME PostgreSQL username (default: postgres) + POSTGRES_PASSWORD PostgreSQL password + POSTGRES_SSL Enable SSL connection (true/false, default: false) Examples: # Start MCP server with OpenAI (default) and explicit Milvus address OPENAI_API_KEY=sk-xxx MILVUS_ADDRESS=localhost:19530 npx @zilliz/claude-context-mcp@latest + # Start MCP server with OpenAI and PostgreSQL + OPENAI_API_KEY=sk-xxx VECTOR_DATABASE_PROVIDER=postgres POSTGRES_CONNECTION_STRING=postgresql://user:pass@localhost:5432/db npx @zilliz/claude-context-mcp@latest + # Start MCP server with OpenAI and specific model OPENAI_API_KEY=sk-xxx EMBEDDING_MODEL=text-embedding-3-large MILVUS_TOKEN=your-token npx @zilliz/claude-context-mcp@latest diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index 8c4c3b28..6bc43827 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -2,18 +2,46 @@ // CRITICAL: Redirect console outputs to stderr IMMEDIATELY to avoid interfering with MCP JSON protocol // Only MCP protocol messages should go to stdout +import * as fs from 'fs'; +import * as path from 'path'; + const originalConsoleLog = console.log; const originalConsoleWarn = console.warn; +const originalConsoleError = console.error; + +// Create a log file for debugging (optional, controlled by environment variable) +const enableFileLogging = process.env.MCP_LOG_FILE === 'true'; +const logFilePath = path.join(process.cwd(), 'claude_context_mcp.log'); + +function writeToLogFile(level: string, message: string) { + if (enableFileLogging) { + const timestamp = new Date().toISOString(); + const logEntry = `[${timestamp}] ${level}: ${message}\n`; + try { + fs.appendFileSync(logFilePath, logEntry); + } catch (error) { + // Ignore file write errors to avoid breaking the server + } + } +} console.log = (...args: any[]) => { - process.stderr.write('[LOG] ' + args.join(' ') + '\n'); + const message = args.join(' '); + process.stderr.write('[LOG] ' + message + '\n'); + writeToLogFile('LOG', message); }; console.warn = (...args: any[]) => { - process.stderr.write('[WARN] ' + args.join(' ') + '\n'); + const message = args.join(' '); + process.stderr.write('[WARN] ' + message + '\n'); + writeToLogFile('WARN', message); }; -// console.error already goes to stderr by default +console.error = (...args: any[]) => { + const message = args.join(' '); + originalConsoleError('[ERROR]', message); + writeToLogFile('ERROR', message); +}; import { Server } from "@modelcontextprotocol/sdk/server/index.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; @@ -21,8 +49,7 @@ import { ListToolsRequestSchema, CallToolRequestSchema } from "@modelcontextprotocol/sdk/types.js"; -import { Context } from "@zilliz/claude-context-core"; -import { MilvusVectorDatabase } from "@zilliz/claude-context-core"; +import { Context, MilvusVectorDatabase, PostgresVectorDatabase, VectorDatabase } from "@zilliz/claude-context-core"; // Import our modular components import { createMcpConfig, logConfigurationSummary, showHelpMessage, ContextMcpConfig } from "./config.js"; @@ -59,11 +86,26 @@ class ContextMcpServer { const embedding = createEmbeddingInstance(config); logEmbeddingProviderInfo(config, embedding); - // Initialize vector database - const vectorDatabase = new MilvusVectorDatabase({ - address: config.milvusAddress, - ...(config.milvusToken && { token: config.milvusToken }) - }); + // Initialize vector database based on provider + let vectorDatabase: VectorDatabase; + if (config.vectorDatabaseProvider === 'postgres') { + console.log(`[VECTOR_DB] Initializing PostgreSQL vector database...`); + vectorDatabase = new PostgresVectorDatabase({ + connectionString: config.postgresConnectionString, + host: config.postgresHost, + port: config.postgresPort, + database: config.postgresDatabase, + username: config.postgresUsername, + password: config.postgresPassword, + ssl: config.postgresSSL + }); + } else { + console.log(`[VECTOR_DB] Initializing Milvus vector database...`); + vectorDatabase = new MilvusVectorDatabase({ + address: config.milvusAddress, + ...(config.milvusToken && { token: config.milvusToken }) + }); + } // Initialize Claude Context this.context = new Context({ diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 11b1f019..30659217 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -56,10 +56,16 @@ importers: '@zilliz/claude-context-core': specifier: workspace:* version: link:../../packages/core + pg: + specifier: ^8.11.3 + version: 8.16.3 devDependencies: '@types/node': specifier: ^20.0.0 version: 20.19.0 + '@types/pg': + specifier: ^8.10.9 + version: 8.15.5 dotenv: specifier: ^16.0.0 version: 16.5.0 @@ -166,6 +172,9 @@ importers: openai: specifier: ^5.1.1 version: 5.1.1(ws@8.18.3)(zod@3.25.55) + pg: + specifier: ^8.11.3 + version: 8.16.3 tree-sitter: specifier: ^0.21.1 version: 0.21.1 @@ -212,6 +221,9 @@ importers: '@types/mock-fs': specifier: ^4.13.4 version: 4.13.4 + '@types/pg': + specifier: ^8.10.9 + version: 8.15.5 jest: specifier: ^30.0.0 version: 30.0.1(@types/node@20.19.0) @@ -230,6 +242,9 @@ importers: '@zilliz/claude-context-core': specifier: workspace:* version: link:../core + pg: + specifier: ^8.11.3 + version: 8.16.3 zod: specifier: ^3.25.55 version: 3.25.55 @@ -237,6 +252,9 @@ importers: '@types/node': specifier: ^20.0.0 version: 20.19.0 + '@types/pg': + specifier: ^8.10.9 + version: 8.15.5 tsx: specifier: ^4.19.4 version: 4.19.4 @@ -1107,6 +1125,9 @@ packages: '@types/normalize-package-data@2.4.4': resolution: {integrity: sha512-37i+OaWTh9qeK4LSHPsyRC7NahnGotNuZvjLSgcPzblpHB3rrCJxAOgI5gCdKm7coonsaX1Of0ILiTcnZjbfxA==} + '@types/pg@8.15.5': + resolution: {integrity: sha512-LF7lF6zWEKxuT3/OR8wAZGzkg4ENGXFNyiV/JeOt9z5B+0ZVwbql9McqX5c/WStFq1GaGso7H1AzP/qSzmlCKQ==} + '@types/retry@0.12.0': resolution: {integrity: sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==} @@ -1236,49 +1257,41 @@ packages: resolution: {integrity: sha512-vdqBh911wc5awE2bX2zx3eflbyv8U9xbE/jVKAm425eRoOVv/VseGZsqi3A3SykckSpF4wSROkbQPvbQFn8EsA==} cpu: [arm64] os: [linux] - libc: [glibc] '@unrs/resolver-binding-linux-arm64-musl@1.9.0': resolution: {integrity: sha512-/8JFZ/SnuDr1lLEVsxsuVwrsGquTvT51RZGvyDB/dOK3oYK2UqeXzgeyq6Otp8FZXQcEYqJwxb9v+gtdXn03eQ==} cpu: [arm64] os: [linux] - libc: [musl] '@unrs/resolver-binding-linux-ppc64-gnu@1.9.0': resolution: {integrity: sha512-FkJjybtrl+rajTw4loI3L6YqSOpeZfDls4SstL/5lsP2bka9TiHUjgMBjygeZEis1oC8LfJTS8FSgpKPaQx2tQ==} cpu: [ppc64] os: [linux] - libc: [glibc] '@unrs/resolver-binding-linux-riscv64-gnu@1.9.0': resolution: {integrity: sha512-w/NZfHNeDusbqSZ8r/hp8iL4S39h4+vQMc9/vvzuIKMWKppyUGKm3IST0Qv0aOZ1rzIbl9SrDeIqK86ZpUK37w==} cpu: [riscv64] os: [linux] - libc: [glibc] '@unrs/resolver-binding-linux-riscv64-musl@1.9.0': resolution: {integrity: sha512-bEPBosut8/8KQbUixPry8zg/fOzVOWyvwzOfz0C0Rw6dp+wIBseyiHKjkcSyZKv/98edrbMknBaMNJfA/UEdqw==} cpu: [riscv64] os: [linux] - libc: [musl] '@unrs/resolver-binding-linux-s390x-gnu@1.9.0': resolution: {integrity: sha512-LDtMT7moE3gK753gG4pc31AAqGUC86j3AplaFusc717EUGF9ZFJ356sdQzzZzkBk1XzMdxFyZ4f/i35NKM/lFA==} cpu: [s390x] os: [linux] - libc: [glibc] '@unrs/resolver-binding-linux-x64-gnu@1.9.0': resolution: {integrity: sha512-WmFd5KINHIXj8o1mPaT8QRjA9HgSXhN1gl9Da4IZihARihEnOylu4co7i/yeaIpcfsI6sYs33cNZKyHYDh0lrA==} cpu: [x64] os: [linux] - libc: [glibc] '@unrs/resolver-binding-linux-x64-musl@1.9.0': resolution: {integrity: sha512-CYuXbANW+WgzVRIl8/QvZmDaZxrqvOldOwlbUjIM4pQ46FJ0W5cinJ/Ghwa/Ng1ZPMJMk1VFdsD/XwmCGIXBWg==} cpu: [x64] os: [linux] - libc: [musl] '@unrs/resolver-binding-wasm32-wasi@1.9.0': resolution: {integrity: sha512-6Rp2WH0OoitMYR57Z6VE8Y6corX8C6QEMWLgOV6qXiJIeZ1F9WGXY/yQ8yDC4iTraotyLOeJ2Asea0urWj2fKQ==} @@ -3432,6 +3445,40 @@ packages: pend@1.2.0: resolution: {integrity: sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==} + pg-cloudflare@1.2.7: + resolution: {integrity: sha512-YgCtzMH0ptvZJslLM1ffsY4EuGaU0cx4XSdXLRFae8bPP4dS5xL1tNB3k2o/N64cHJpwU7dxKli/nZ2lUa5fLg==} + + pg-connection-string@2.9.1: + resolution: {integrity: sha512-nkc6NpDcvPVpZXxrreI/FOtX3XemeLl8E0qFr6F2Lrm/I8WOnaWNhIPK2Z7OHpw7gh5XJThi6j6ppgNoaT1w4w==} + + pg-int8@1.0.1: + resolution: {integrity: sha512-WCtabS6t3c8SkpDBUlb1kjOs7l66xsGdKpIPZsg4wR+B3+u9UAum2odSsF9tnvxg80h4ZxLWMy4pRjOsFIqQpw==} + engines: {node: '>=4.0.0'} + + pg-pool@3.10.1: + resolution: {integrity: sha512-Tu8jMlcX+9d8+QVzKIvM/uJtp07PKr82IUOYEphaWcoBhIYkoHpLXN3qO59nAI11ripznDsEzEv8nUxBVWajGg==} + peerDependencies: + pg: '>=8.0' + + pg-protocol@1.10.3: + resolution: {integrity: sha512-6DIBgBQaTKDJyxnXaLiLR8wBpQQcGWuAESkRBX/t6OwA8YsqP+iVSiond2EDy6Y/dsGk8rh/jtax3js5NeV7JQ==} + + pg-types@2.2.0: + resolution: {integrity: sha512-qTAAlrEsl8s4OiEQY69wDvcMIdQN6wdz5ojQiOy6YRMuynxenON0O5oCpJI6lshc6scgAY8qvJ2On/p+CXY0GA==} + engines: {node: '>=4'} + + pg@8.16.3: + resolution: {integrity: sha512-enxc1h0jA/aq5oSDMvqyW3q89ra6XIIDZgCX9vkMrnz5DFTw/Ny3Li2lFQ+pt3L6MCgm/5o2o8HW9hiJji+xvw==} + engines: {node: '>= 16.0.0'} + peerDependencies: + pg-native: '>=3.0.1' + peerDependenciesMeta: + pg-native: + optional: true + + pgpass@1.0.5: + resolution: {integrity: sha512-FdW9r/jQZhSeohs1Z3sI1yxFQNFvMcnmfuj4WBMUTxOrAyLMaTcE1aAMBiTlbMNaXvBCQuVi0R7hd8udDSP7ug==} + picocolors@1.1.1: resolution: {integrity: sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==} @@ -3466,6 +3513,22 @@ packages: resolution: {integrity: sha512-/+5VFTchJDoVj3bhoqi6UeymcD00DAwb1nJwamzPvHEszJ4FpF6SNNbUbOS8yI56qHzdV8eK0qEfOSiodkTdxg==} engines: {node: '>= 0.4'} + postgres-array@2.0.0: + resolution: {integrity: sha512-VpZrUqU5A69eQyW2c5CA1jtLecCsN2U/bD6VilrFDWq5+5UIEVO7nazS3TEcHf1zuPYO/sqGvUvW62g86RXZuA==} + engines: {node: '>=4'} + + postgres-bytea@1.0.0: + resolution: {integrity: sha512-xy3pmLuQqRBZBXDULy7KbaitYqLcmxigw14Q5sj8QBVLqEwXfeybIKVWiqAXTlcvdvb0+xkOtDbfQMOf4lST1w==} + engines: {node: '>=0.10.0'} + + postgres-date@1.0.7: + resolution: {integrity: sha512-suDmjLVQg78nMK2UZ454hAG+OAW+HQPZ6n++TNDUX+L0+uUlLywnoxJKDou51Zm+zTCjrCl0Nq6J9C5hP9vK/Q==} + engines: {node: '>=0.10.0'} + + postgres-interval@1.2.0: + resolution: {integrity: sha512-9ZhXKM/rw350N1ovuWHbGxnGh/SNJ4cnxHiM0rxE4VN41wsg8P8zWn9hv/buK00RP4WvlOyr/RBDiptyxVbkZQ==} + engines: {node: '>=0.10.0'} + prebuild-install@7.1.3: resolution: {integrity: sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==} engines: {node: '>=10'} @@ -3790,6 +3853,10 @@ packages: spdx-license-ids@3.0.21: resolution: {integrity: sha512-Bvg/8F5XephndSK3JffaRqdT+gyhfqIPwDHpX80tJrF8QQRYMo8sNMeaZ2Dp5+jhwKnUmIOyFFQfHRkjJm5nXg==} + split2@4.2.0: + resolution: {integrity: sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==} + engines: {node: '>= 10.x'} + sprintf-js@1.0.3: resolution: {integrity: sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==} @@ -5388,6 +5455,12 @@ snapshots: '@types/normalize-package-data@2.4.4': {} + '@types/pg@8.15.5': + dependencies: + '@types/node': 20.19.0 + pg-protocol: 1.10.3 + pg-types: 2.2.0 + '@types/retry@0.12.0': {} '@types/sarif@2.1.7': {} @@ -8039,6 +8112,41 @@ snapshots: pend@1.2.0: {} + pg-cloudflare@1.2.7: + optional: true + + pg-connection-string@2.9.1: {} + + pg-int8@1.0.1: {} + + pg-pool@3.10.1(pg@8.16.3): + dependencies: + pg: 8.16.3 + + pg-protocol@1.10.3: {} + + pg-types@2.2.0: + dependencies: + pg-int8: 1.0.1 + postgres-array: 2.0.0 + postgres-bytea: 1.0.0 + postgres-date: 1.0.7 + postgres-interval: 1.2.0 + + pg@8.16.3: + dependencies: + pg-connection-string: 2.9.1 + pg-pool: 3.10.1(pg@8.16.3) + pg-protocol: 1.10.3 + pg-types: 2.2.0 + pgpass: 1.0.5 + optionalDependencies: + pg-cloudflare: 1.2.7 + + pgpass@1.0.5: + dependencies: + split2: 4.2.0 + picocolors@1.1.1: {} picomatch@2.3.1: {} @@ -8059,6 +8167,16 @@ snapshots: possible-typed-array-names@1.1.0: {} + postgres-array@2.0.0: {} + + postgres-bytea@1.0.0: {} + + postgres-date@1.0.7: {} + + postgres-interval@1.2.0: + dependencies: + xtend: 4.0.2 + prebuild-install@7.1.3: dependencies: detect-libc: 2.0.4 @@ -8453,6 +8571,8 @@ snapshots: spdx-license-ids@3.0.21: {} + split2@4.2.0: {} + sprintf-js@1.0.3: {} stack-trace@0.0.10: {}