diff --git a/README.md b/README.md index cb5527da..ad334a41 100644 --- a/README.md +++ b/README.md @@ -60,10 +60,20 @@ Copy your key and use it in the configuration examples below as `your-openai-api Use the command line interface to add the Claude Context MCP server: ```bash +# Add the Claude Context MCP server claude mcp add claude-context \ -e OPENAI_API_KEY=sk-your-openai-api-key \ -e MILVUS_TOKEN=your-zilliz-cloud-api-key \ -- npx @zilliz/claude-context-mcp@latest + +# OAPI Forwarding: Use OpenAI-compatible API that forwards to Ollama +claude mcp add claude-context-oapi \ + -e OPENAI_API_KEY=ollama-key \ + -e OPENAI_BASE_URL=http://localhost:8080/v1 \ + -e EMBEDDING_MODEL=nomic-embed-text \ + -e OPENAI_CUSTOM_BASE_USING_OLLAMA_MODEL=true \ + -e MILVUS_TOKEN=your-zilliz-cloud-api-key \ + -- npx @zilliz/claude-context-mcp@latest ``` diff --git a/docs/getting-started/environment-variables.md b/docs/getting-started/environment-variables.md index c2ce8fd3..7a76b24a 100644 --- a/docs/getting-started/environment-variables.md +++ b/docs/getting-started/environment-variables.md @@ -53,6 +53,11 @@ Claude Context supports a global configuration file at `~/.context/.env` to simp | `OLLAMA_MODEL`(alternative to `EMBEDDING_MODEL`) | Model name | | +### OpenAI Custom Base (Ollama Forwarding) +| Variable | Description | Default | +|----------|-------------|---------| +| `OPENAI_CUSTOM_BASE_USING_OLLAMA_MODEL` | Enable OAPI forwarding for Ollama models via OpenAI provider. Set to `true` when using OpenAI-compatible API endpoints that forward to Ollama | `false` | + ### Advanced Configuration | Variable | Description | Default | |----------|-------------|---------| diff --git a/packages/core/README.md b/packages/core/README.md index 3ea03911..57955783 100644 --- a/packages/core/README.md +++ b/packages/core/README.md @@ -59,6 +59,14 @@ const embedding = new OpenAIEmbedding({ model: 'text-embedding-3-small' }); +// OAPI Forwarding: Use OpenAI provider with Ollama models +const ollamaEmbedding = new OpenAIEmbedding({ + apiKey: 'ollama-key', + baseURL: 'http://localhost:8080/v1', + model: 'nomic-embed-text', + useOllamaModel: true // Enable OAPI forwarding for Ollama models +}); + // Initialize vector database const vectorDatabase = new MilvusVectorDatabase({ address: process.env.MILVUS_ADDRESS || 'localhost:19530', diff --git a/packages/core/jest.config.js b/packages/core/jest.config.js new file mode 100644 index 00000000..080908fa --- /dev/null +++ b/packages/core/jest.config.js @@ -0,0 +1,17 @@ +module.exports = { + preset: 'ts-jest', + testEnvironment: 'node', + roots: ['/src'], + testMatch: ['**/__tests__/**/*.ts', '**/?(*.)+(spec|test).ts'], + transform: { + '^.+\\.ts$': 'ts-jest', + }, + moduleFileExtensions: ['ts', 'js', 'json', 'node'], + collectCoverageFrom: [ + 'src/**/*.ts', + '!src/**/*.d.ts', + '!src/**/*.test.ts', + ], + coverageDirectory: 'coverage', + coverageReporters: ['text', 'lcov', 'html'], +}; \ No newline at end of file diff --git a/packages/core/package.json b/packages/core/package.json index a6bd3cc6..dbb8399b 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -10,7 +10,10 @@ "clean": "rm -rf dist", "lint": "eslint src --ext .ts", "lint:fix": "eslint src --ext .ts --fix", - "typecheck": "tsc --noEmit" + "typecheck": "tsc --noEmit", + "test": "jest", + "test:watch": "jest --watch", + "test:coverage": "jest --coverage" }, "dependencies": { "@google/genai": "^1.9.0", @@ -22,15 +25,15 @@ "ollama": "^0.5.16", "openai": "^5.1.1", "tree-sitter": "^0.21.1", + "tree-sitter-c-sharp": "^0.21.0", "tree-sitter-cpp": "^0.22.0", "tree-sitter-go": "^0.21.0", "tree-sitter-java": "^0.21.0", "tree-sitter-javascript": "^0.21.0", "tree-sitter-python": "^0.21.0", - "tree-sitter-c-sharp": "^0.21.0", "tree-sitter-rust": "^0.21.0", - "tree-sitter-typescript": "^0.21.0", "tree-sitter-scala": "^0.24.0", + "tree-sitter-typescript": "^0.21.0", "typescript": "^5.0.0", "voyageai": "^0.0.4" }, @@ -55,4 +58,4 @@ "publishConfig": { "access": "public" } -} \ No newline at end of file +} diff --git a/packages/core/src/embedding/ollama-embedding.ts b/packages/core/src/embedding/ollama-embedding.ts index ef3ba070..1b7b4b3d 100644 --- a/packages/core/src/embedding/ollama-embedding.ts +++ b/packages/core/src/embedding/ollama-embedding.ts @@ -16,6 +16,7 @@ export class OllamaEmbedding extends Embedding { private config: OllamaEmbeddingConfig; private dimension: number = 768; // Default dimension for many embedding models private dimensionDetected: boolean = false; // Track if dimension has been detected + private detectionPromise: Promise | null = null; // Track detection process for race condition safety protected maxTokens: number = 2048; // Default context window for Ollama constructor(config: OllamaEmbeddingConfig) { @@ -43,6 +44,29 @@ export class OllamaEmbedding extends Embedding { // If no dimension is provided, it will be detected in the first embed call } + /** + * Ensure dimension is detected (race condition safe) + */ + private async ensureDimensionDetected(): Promise { + if (this.dimensionDetected || this.config.dimension) { + return; + } + + if (this.detectionPromise) { + await this.detectionPromise; + return; + } + + this.detectionPromise = this.detectDimension(); + try { + this.dimension = await this.detectionPromise; + this.dimensionDetected = true; + console.log(`[OllamaEmbedding] 📏 Detected Ollama embedding dimension: ${this.dimension} for model: ${this.config.model}`); + } finally { + this.detectionPromise = null; + } + } + private setDefaultMaxTokensForModel(model: string): void { // Set different max tokens based on known models if (model?.includes('nomic-embed-text')) { @@ -59,11 +83,7 @@ export class OllamaEmbedding extends Embedding { const processedText = this.preprocessText(text); // Detect dimension on first use if not configured - if (!this.dimensionDetected && !this.config.dimension) { - this.dimension = await this.detectDimension(); - this.dimensionDetected = true; - console.log(`[OllamaEmbedding] 📏 Detected Ollama embedding dimension: ${this.dimension} for model: ${this.config.model}`); - } + await this.ensureDimensionDetected(); const embedOptions: any = { model: this.config.model, @@ -92,12 +112,8 @@ export class OllamaEmbedding extends Embedding { // Preprocess all texts const processedTexts = this.preprocessTexts(texts); - // Detect dimension on first use if not configured - if (!this.dimensionDetected && !this.config.dimension) { - this.dimension = await this.detectDimension(); - this.dimensionDetected = true; - console.log(`[OllamaEmbedding] 📏 Detected Ollama embedding dimension: ${this.dimension} for model: ${this.config.model}`); - } + // Detect dimension on first use if not already detected + await this.ensureDimensionDetected(); // Use Ollama's native batch embedding API const embedOptions: any = { @@ -140,14 +156,13 @@ export class OllamaEmbedding extends Embedding { this.config.model = model; // Reset dimension detection when model changes this.dimensionDetected = false; + this.detectionPromise = null; // Update max tokens for new model this.setDefaultMaxTokensForModel(model); if (!this.config.dimension) { - this.dimension = await this.detectDimension(); - this.dimensionDetected = true; - console.log(`[OllamaEmbedding] 📏 Detected Ollama embedding dimension: ${this.dimension} for model: ${this.config.model}`); + await this.ensureDimensionDetected(); } else { - console.log('[OllamaEmbedding] Dimension already detected for model ' + this.config.model); + console.log(`[OllamaEmbedding] Using configured dimension ${this.dimension} for model ${this.config.model}`); } } diff --git a/packages/core/src/embedding/openai-embedding.test.ts b/packages/core/src/embedding/openai-embedding.test.ts new file mode 100644 index 00000000..04d7f74c --- /dev/null +++ b/packages/core/src/embedding/openai-embedding.test.ts @@ -0,0 +1,235 @@ +import OpenAI from 'openai'; +import { OpenAIEmbedding } from './openai-embedding'; +import type { EmbeddingVector } from './base-embedding'; + +// Mock the OpenAI client module +const mockEmbeddingsCreate = jest.fn(); +jest.mock('openai', () => { + return jest.fn().mockImplementation(() => ({ + embeddings: { + create: mockEmbeddingsCreate, + }, + })); +}); + +const MockOpenAI = OpenAI as unknown as jest.Mock; + +describe('OpenAIEmbedding OAPI Forwarding', () => { + const originalEnv = process.env; + let consoleLogSpy: jest.SpyInstance; + + beforeEach(() => { + jest.resetModules(); + process.env = { ...originalEnv }; + mockEmbeddingsCreate.mockClear(); + MockOpenAI.mockClear(); + consoleLogSpy = jest.spyOn(console, 'log').mockImplementation(() => {}); + }); + + afterEach(() => { + process.env = originalEnv; + consoleLogSpy.mockRestore(); + }); + + describe('Constructor and Configuration', () => { + it('should initialize for standard OpenAI API by default', () => { + const embedding = new OpenAIEmbedding({ model: 'text-embedding-3-small', apiKey: 'test-key' }); + expect(embedding['isOllamaViaOAPI']).toBe(false); + expect(embedding.getDimension()).toBe(1536); + expect(consoleLogSpy).not.toHaveBeenCalledWith(expect.stringContaining('Configured for Ollama model')); + }); + + it('should enable OAPI forwarding via config flag useOllamaModel: true', () => { + const embedding = new OpenAIEmbedding({ + model: 'nomic-embed-text', + apiKey: 'ollama-key', + useOllamaModel: true, + }); + expect(embedding['isOllamaViaOAPI']).toBe(true); + expect(embedding.getDimension()).toBe(768); + expect(consoleLogSpy).toHaveBeenCalledWith('[OpenAI] Configured for Ollama model nomic-embed-text via OAPI forwarding'); + }); + + it.each([ + ['true'], + ['True'], + ])('should enable OAPI forwarding when OPENAI_CUSTOM_BASE_USING_OLLAMA_MODEL is "%s"', (envValue) => { + process.env.OPENAI_CUSTOM_BASE_USING_OLLAMA_MODEL = envValue; + const embedding = new OpenAIEmbedding({ model: 'nomic-embed-text', apiKey: 'ollama-key' }); + expect(embedding['isOllamaViaOAPI']).toBe(true); + expect(embedding.getDimension()).toBe(768); + }); + + it('should not enable OAPI forwarding for other env var values', () => { + process.env.OPENAI_CUSTOM_BASE_USING_OLLAMA_MODEL = 'false'; + const embedding = new OpenAIEmbedding({ model: 'text-embedding-3-small', apiKey: 'test-key' }); + expect(embedding['isOllamaViaOAPI']).toBe(false); + }); + }); + + describe('baseURL Correction', () => { + it('should append /v1 to baseURL if missing', () => { + new OpenAIEmbedding({ model: 'any-model', apiKey: 'key', baseURL: 'http://localhost:8080' }); + expect(MockOpenAI).toHaveBeenCalledWith({ apiKey: 'key', baseURL: 'http://localhost:8080/v1' }); + expect(consoleLogSpy).toHaveBeenCalledWith('[OpenAI] Auto-correcting baseURL: http://localhost:8080 → http://localhost:8080/v1'); + }); + + it('should append /v1 to baseURL with trailing slash', () => { + new OpenAIEmbedding({ model: 'any-model', apiKey: 'key', baseURL: 'http://localhost:8080/' }); + expect(MockOpenAI).toHaveBeenCalledWith({ apiKey: 'key', baseURL: 'http://localhost:8080/v1' }); + }); + + it('should not modify baseURL if it already contains /v1', () => { + new OpenAIEmbedding({ model: 'any-model', apiKey: 'key', baseURL: 'http://localhost:8080/v1' }); + expect(MockOpenAI).toHaveBeenCalledWith({ apiKey: 'key', baseURL: 'http://localhost:8080/v1' }); + expect(consoleLogSpy).not.toHaveBeenCalledWith(expect.stringContaining('Auto-correcting baseURL')); + }); + + it('should not modify official OpenAI API URLs', () => { + const officialURL = 'https://api.openai.com/v1'; + new OpenAIEmbedding({ model: 'any-model', apiKey: 'key', baseURL: officialURL }); + expect(MockOpenAI).toHaveBeenCalledWith({ apiKey: 'key', baseURL: officialURL }); + }); + + it('should pass undefined baseURL if not provided', () => { + new OpenAIEmbedding({ model: 'any-model', apiKey: 'key' }); + expect(MockOpenAI).toHaveBeenCalledWith({ apiKey: 'key', baseURL: undefined }); + }); + }); + + describe('OAPI Forwarding (Ollama)', () => { + const ollamaConfig = { model: 'nomic-embed-text', apiKey: 'ollama-key', useOllamaModel: true }; + + it('should use OAPI-specific logic for embed()', async () => { + const embedding = new OpenAIEmbedding(ollamaConfig); + const mockVector = Array(768).fill(0.1); + mockEmbeddingsCreate.mockResolvedValue({ data: [{ embedding: mockVector }] }); + + const result = await embedding.embed('hello ollama'); + + expect(result.vector).toEqual(mockVector); + expect(result.dimension).toBe(768); + expect(mockEmbeddingsCreate).toHaveBeenCalledWith({ + model: 'nomic-embed-text', + input: 'hello ollama', + encoding_format: 'float', + }); + }); + + it('should detect dimension on first call if default is present', async () => { + const embedding = new OpenAIEmbedding(ollamaConfig); + embedding['dimension'] = 1536; + + const detectionVector = Array(768).fill(0.2); + const embedVector = Array(768).fill(0.3); + mockEmbeddingsCreate + .mockResolvedValueOnce({ data: [{ embedding: detectionVector }] }) + .mockResolvedValueOnce({ data: [{ embedding: embedVector }] }); + + await embedding.embed('test text'); + + expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(2); + expect(embedding.getDimension()).toBe(768); + }); + + it('should throw OAPI-specific error on empty response for embed()', async () => { + const embedding = new OpenAIEmbedding(ollamaConfig); + mockEmbeddingsCreate.mockResolvedValue({ data: [] }); + + await expect(embedding.embed('test')).rejects.toThrow( + 'Failed to detect Ollama dimension via OAPI for nomic-embed-text' + ); + }); + + it('should throw OAPI-specific error on batch mismatch', async () => { + const embedding = new OpenAIEmbedding(ollamaConfig); + mockEmbeddingsCreate.mockResolvedValue({ data: [{ embedding: [1,2,3] }] }); + + await expect(embedding.embedBatch(['text1', 'text2'])).rejects.toThrow( + 'OAPI forwarding returned 1 embeddings but expected 2 for Ollama model nomic-embed-text.' + ); + }); + }); + + describe('Standard OpenAI Embedding', () => { + const openaiConfig = { model: 'text-embedding-3-small', apiKey: 'openai-key' }; + + it('should generate embedding for a known model', async () => { + const embedding = new OpenAIEmbedding(openaiConfig); + const mockVector = Array(1536).fill(0.5); + mockEmbeddingsCreate.mockResolvedValue({ data: [{ embedding: mockVector }] }); + + const result = await embedding.embed('hello openai'); + + expect(result.vector).toEqual(mockVector); + expect(result.dimension).toBe(1536); + expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(1); + expect(consoleLogSpy).not.toHaveBeenCalledWith(expect.stringContaining('Detecting')); + }); + + it('should detect dimension for unknown model before embedding', async () => { + const customModelConfig = { model: 'my-custom-model', apiKey: 'openai-key' }; + const embedding = new OpenAIEmbedding(customModelConfig); + + const detectionVector = Array(512).fill(0.3); + const embedVector = Array(512).fill(0.4); + mockEmbeddingsCreate + .mockResolvedValueOnce({ data: [{ embedding: detectionVector }] }) + .mockResolvedValueOnce({ data: [{ embedding: embedVector }] }); + + const result = await embedding.embed('test'); + + expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(2); + expect(embedding.getDimension()).toBe(512); + expect(result.dimension).toBe(512); + expect(result.vector).toEqual(embedVector); + }); + + it('should throw specific error for empty API response', async () => { + const embedding = new OpenAIEmbedding(openaiConfig); + mockEmbeddingsCreate.mockResolvedValue({ data: [] }); + + await expect(embedding.embed('test')).rejects.toThrow( + 'API returned empty response.' + ); + }); + + it('should handle batch embeddings correctly', async () => { + const embedding = new OpenAIEmbedding(openaiConfig); + const vectors = [Array(1536).fill(0.1), Array(1536).fill(0.2)]; + mockEmbeddingsCreate.mockResolvedValue({ + data: [ + { embedding: vectors[0] }, + { embedding: vectors[1] }, + ] + }); + + const results = await embedding.embedBatch(['text1', 'text2']); + expect(results.length).toBe(2); + expect(results[0].vector).toEqual(vectors[0]); + expect(results[1].dimension).toBe(1536); + }); + }); + + describe('Backward Compatibility', () => { + it('should maintain existing OpenAI interface without OAPI features', () => { + const embedding = new OpenAIEmbedding({ + model: 'text-embedding-3-small', + apiKey: 'test-key' + }); + + // Verify all existing methods still work + expect(embedding.getProvider()).toBe('OpenAI'); + expect(embedding.getDimension()).toBe(1536); + expect(typeof embedding.getClient()).toBe('object'); + expect(typeof embedding.setModel).toBe('function'); + }); + + it('should support all existing static methods', () => { + const models = OpenAIEmbedding.getSupportedModels(); + expect(models['text-embedding-3-small']).toBeDefined(); + expect(models['text-embedding-3-large']).toBeDefined(); + expect(models['text-embedding-ada-002']).toBeDefined(); + }); + }); +}); \ No newline at end of file diff --git a/packages/core/src/embedding/openai-embedding.ts b/packages/core/src/embedding/openai-embedding.ts index be0de117..d2a1e6db 100644 --- a/packages/core/src/embedding/openai-embedding.ts +++ b/packages/core/src/embedding/openai-embedding.ts @@ -5,25 +5,93 @@ export interface OpenAIEmbeddingConfig { model: string; apiKey: string; baseURL?: string; // OpenAI supports custom baseURL + useOllamaModel?: boolean; // Whether this is actually an Ollama model via OAPI forwarding } +// Constants +const DEFAULT_OLLAMA_DIMENSION = 768; +const OPENAI_API_DOMAIN = 'api.openai.com'; + export class OpenAIEmbedding extends Embedding { private client: OpenAI; private config: OpenAIEmbeddingConfig; private dimension: number = 1536; // Default dimension for text-embedding-3-small + private isDimensionDetected: boolean = false; // Track if dimension has been detected for any provider type + private dimensionDetectionPromise: Promise | null = null; // Track detection process (unified for all providers) protected maxTokens: number = 8192; // Maximum tokens for OpenAI embedding models + private isOllamaViaOAPI: boolean = false; // Whether using Ollama model via OAPI constructor(config: OpenAIEmbeddingConfig) { super(); this.config = config; + + // Check environment variable for Ollama via OAPI + this.isOllamaViaOAPI = config.useOllamaModel || + (process.env.OPENAI_CUSTOM_BASE_USING_OLLAMA_MODEL || '').toLowerCase() === 'true'; + + // Auto-correct baseURL if needed + const correctedBaseURL = this.correctBaseURL(config.baseURL); + this.client = new OpenAI({ apiKey: config.apiKey, - baseURL: config.baseURL, + baseURL: correctedBaseURL, }); + + if (this.isOllamaViaOAPI) { + this.log(`Configured for Ollama model ${config.model} via OAPI forwarding`); + // Reset dimension since Ollama models have different dimensions + this.dimension = DEFAULT_OLLAMA_DIMENSION; // Common Ollama embedding dimension + } else { + // Set dimension detection flag for known models + const knownModels = OpenAIEmbedding.getSupportedModels(); + if (knownModels[config.model]) { + this.dimension = knownModels[config.model].dimension; + this.isDimensionDetected = true; + } + } + } + + /** + * Internal logging method that can be easily controlled + */ + private log(message: string): void { + // In production, this could be replaced with proper logging library + // For now, only log if debugging is enabled + if (process.env.NODE_ENV !== 'production' || process.env.DEBUG_EMBEDDINGS) { + console.log(`[OpenAI] ${message}`); + } + } + + /** + * Correct baseURL by adding /v1 if needed for OpenAI compatibility + */ + private correctBaseURL(baseURL?: string): string | undefined { + if (!baseURL) return baseURL; + + // If it's the official OpenAI API, don't modify + if (baseURL.includes(OPENAI_API_DOMAIN)) { + return baseURL; + } + + // For custom endpoints, ensure /v1 path is present + if (!baseURL.endsWith('/v1') && !baseURL.includes('/v1/')) { + const normalizedURL = baseURL.endsWith('/') ? baseURL.slice(0, -1) : baseURL; + this.log(`Auto-correcting baseURL: ${baseURL} → ${normalizedURL}/v1`); + return `${normalizedURL}/v1`; + } + + return baseURL; } async detectDimension(testText: string = "test"): Promise { const model = this.config.model || 'text-embedding-3-small'; + + // Special handling for Ollama models via OAPI + if (this.isOllamaViaOAPI) { + return this.detectOllamaDimensionViaOAPI(testText, model); + } + + // Standard OpenAI dimension detection const knownModels = OpenAIEmbedding.getSupportedModels(); // Use known dimension for standard models @@ -39,6 +107,11 @@ export class OpenAIEmbedding extends Embedding { input: processedText, encoding_format: 'float', }); + + if (!response.data || response.data.length === 0) { + throw new Error(OpenAIEmbedding.getEmptyResponseError()); + } + return response.data[0].embedding.length; } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error'; @@ -52,16 +125,50 @@ export class OpenAIEmbedding extends Embedding { throw new Error(`Failed to detect dimension for model ${model}: ${errorMessage}`); } } + + /** + * Detect dimension for Ollama models accessed via OAPI + */ + private async detectOllamaDimensionViaOAPI(testText: string, model: string): Promise { + this.log(`Detecting Ollama model dimension via OAPI for ${model}...`); + + try { + const processedText = this.preprocessText(testText); + const response = await this.client.embeddings.create({ + model: model, + input: processedText, + encoding_format: 'float', + }); + + if (!response.data || response.data.length === 0) { + throw new Error(OpenAIEmbedding.getOllamaEmptyResponseError(model)); + } + + const dimension = response.data[0].embedding.length; + this.log(`Detected Ollama dimension via OAPI: ${dimension}`); + return dimension; + } catch (error) { + const errorMessage = error instanceof Error ? error.message : 'Unknown error'; + throw new Error(`Failed to detect Ollama dimension via OAPI for ${model}: ${errorMessage}. Ensure OPENAI_CUSTOM_BASE_USING_OLLAMA_MODEL=true is set correctly.`); + } + } async embed(text: string): Promise { + // Special handling for Ollama models via OAPI + if (this.isOllamaViaOAPI) { + return this.embedOllamaViaOAPI(text); + } + + // Standard OpenAI embedding logic const processedText = this.preprocessText(text); const model = this.config.model || 'text-embedding-3-small'; const knownModels = OpenAIEmbedding.getSupportedModels(); if (knownModels[model] && this.dimension !== knownModels[model].dimension) { this.dimension = knownModels[model].dimension; - } else if (!knownModels[model]) { - this.dimension = await this.detectDimension(); + this.isDimensionDetected = true; + } else if (!knownModels[model] && !this.isDimensionDetected) { + await this.ensureDimensionDetected(model); } try { @@ -71,9 +178,11 @@ export class OpenAIEmbedding extends Embedding { encoding_format: 'float', }); - // Update dimension from actual response - this.dimension = response.data[0].embedding.length; - + // Validate response before accessing data + if (!response.data || response.data.length === 0) { + throw new Error(OpenAIEmbedding.getEmptyResponseError()); + } + return { vector: response.data[0].embedding, dimension: this.dimension @@ -83,16 +192,56 @@ export class OpenAIEmbedding extends Embedding { throw new Error(`Failed to generate OpenAI embedding: ${errorMessage}`); } } + + /** + * Embed text using Ollama model via OAPI forwarding + */ + private async embedOllamaViaOAPI(text: string): Promise { + const processedText = this.preprocessText(text); + const model = this.config.model; + + // Detect dimension if not already detected for Ollama + if (!this.isDimensionDetected) { + await this.ensureOllamaDimensionDetected(model); + } + + try { + const response = await this.client.embeddings.create({ + model: model, + input: processedText, + encoding_format: 'float', + }); + + if (!response.data || response.data.length === 0) { + throw new Error(`OAPI forwarding returned empty response for Ollama model ${model}. Check OAPI service and Ollama model availability.`); + } + + return { + vector: response.data[0].embedding, + dimension: this.dimension + }; + } catch (error) { + const errorMessage = error instanceof Error ? error.message : 'Unknown error'; + throw new Error(`Failed to embed via OAPI for Ollama model ${model}: ${errorMessage}`); + } + } async embedBatch(texts: string[]): Promise { + // Special handling for Ollama models via OAPI + if (this.isOllamaViaOAPI) { + return this.embedBatchOllamaViaOAPI(texts); + } + + // Standard OpenAI batch embedding const processedTexts = this.preprocessTexts(texts); const model = this.config.model || 'text-embedding-3-small'; const knownModels = OpenAIEmbedding.getSupportedModels(); if (knownModels[model] && this.dimension !== knownModels[model].dimension) { this.dimension = knownModels[model].dimension; - } else if (!knownModels[model]) { - this.dimension = await this.detectDimension(); + this.isDimensionDetected = true; + } else if (!knownModels[model] && !this.isDimensionDetected) { + await this.ensureDimensionDetected(model); } try { @@ -102,7 +251,10 @@ export class OpenAIEmbedding extends Embedding { encoding_format: 'float', }); - this.dimension = response.data[0].embedding.length; + // Validate response array length matches input + if (!response.data || response.data.length !== processedTexts.length) { + throw new Error(OpenAIEmbedding.getBatchMismatchError(response.data?.length || 0, processedTexts.length)); + } return response.data.map((item) => ({ vector: item.embedding, @@ -113,6 +265,42 @@ export class OpenAIEmbedding extends Embedding { throw new Error(`Failed to generate OpenAI batch embeddings: ${errorMessage}`); } } + + /** + * Batch embed using Ollama model via OAPI forwarding + */ + private async embedBatchOllamaViaOAPI(texts: string[]): Promise { + this.log(`Batch embedding ${texts.length} texts with Ollama model ${this.config.model} via OAPI...`); + + const processedTexts = this.preprocessTexts(texts); + const model = this.config.model; + + // Detect dimension if not already detected for Ollama + if (!this.isDimensionDetected) { + await this.ensureOllamaDimensionDetected(model); + } + + try { + const response = await this.client.embeddings.create({ + model: model, + input: processedTexts, + encoding_format: 'float', + }); + + // Critical validation for OAPI forwarding to Ollama + if (!response.data || response.data.length !== processedTexts.length) { + throw new Error(OpenAIEmbedding.getOllamaBatchMismatchError(response.data?.length || 0, processedTexts.length, model)); + } + + return response.data.map((item) => ({ + vector: item.embedding, + dimension: this.dimension + })); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : 'Unknown error'; + throw new Error(`Failed to batch embed via OAPI for Ollama model ${model}: ${errorMessage}`); + } + } getDimension(): number { // For custom models, we need to detect the dimension first @@ -140,11 +328,24 @@ export class OpenAIEmbedding extends Embedding { */ async setModel(model: string): Promise { this.config.model = model; + + // Reset all detection states + this.isDimensionDetected = false; + this.dimensionDetectionPromise = null; + const knownModels = OpenAIEmbedding.getSupportedModels(); - if (knownModels[model]) { + + if (knownModels[model] && !this.isOllamaViaOAPI) { + // Known OpenAI model this.dimension = knownModels[model].dimension; + this.isDimensionDetected = true; } else { - this.dimension = await this.detectDimension(); + // Unknown model or OAPI model - detect dimension + if (this.isOllamaViaOAPI) { + await this.ensureOllamaDimensionDetected(model); + } else { + await this.ensureDimensionDetected(model); + } } } @@ -174,4 +375,67 @@ export class OpenAIEmbedding extends Embedding { } }; } + + /** + * Error message generators for consistent error reporting + */ + private static getEmptyResponseError(): string { + return `API returned empty response. This might indicate: 1) Incorrect baseURL (missing /v1?), 2) Invalid API key, 3) Model not available, or 4) Input text was filtered out`; + } + + private static getOllamaEmptyResponseError(model: string): string { + return `OAPI forwarding returned empty response for Ollama model ${model}. Check: 1) OAPI service is running, 2) Ollama model is available, 3) API key is valid for OAPI service`; + } + + private static getBatchMismatchError(actual: number, expected: number): string { + return `API returned ${actual} embeddings but expected ${expected}. This might indicate: 1) Some texts were filtered/rejected, 2) API rate limiting, 3) Invalid API key, or 4) OAPI forwarding issues`; + } + + private static getOllamaBatchMismatchError(actual: number, expected: number, model: string): string { + return `OAPI forwarding returned ${actual} embeddings but expected ${expected} for Ollama model ${model}. This indicates: 1) Some texts were rejected by Ollama, 2) OAPI service issues, 3) Ollama model capacity limits. Check OAPI logs and Ollama status.`; + } + + /** + * Ensure dimension is detected for standard OpenAI models (race condition safe) + */ + private async ensureDimensionDetected(model: string): Promise { + if (this.isDimensionDetected) { + return; + } + + if (this.dimensionDetectionPromise) { + await this.dimensionDetectionPromise; + return; + } + + this.dimensionDetectionPromise = this.detectDimension(); + try { + this.dimension = await this.dimensionDetectionPromise; + this.isDimensionDetected = true; + } finally { + this.dimensionDetectionPromise = null; + } + } + + /** + * Ensure OAPI dimension is detected for Ollama models (race condition safe) + */ + private async ensureOllamaDimensionDetected(model: string): Promise { + if (this.isDimensionDetected) { + return; + } + + if (this.dimensionDetectionPromise) { + await this.dimensionDetectionPromise; + return; + } + + this.dimensionDetectionPromise = this.detectOllamaDimensionViaOAPI('test', model); + try { + this.dimension = await this.dimensionDetectionPromise; + this.isDimensionDetected = true; + } finally { + this.dimensionDetectionPromise = null; + } + } } \ No newline at end of file diff --git a/packages/mcp/README.md b/packages/mcp/README.md index 7021f1e3..e71e83bf 100644 --- a/packages/mcp/README.md +++ b/packages/mcp/README.md @@ -317,6 +317,26 @@ Pasting the following configuration into your Cursor `~/.cursor/mcp.json` file i } ``` +**OAPI Forwarding Configuration (OpenAI-compatible API → Ollama):** +```json +{ + "mcpServers": { + "claude-context": { + "command": "npx", + "args": ["-y", "@zilliz/claude-context-mcp@latest"], + "env": { + "EMBEDDING_PROVIDER": "OpenAI", + "OPENAI_API_KEY": "ollama-key", + "OPENAI_BASE_URL": "http://localhost:8080/v1", + "EMBEDDING_MODEL": "nomic-embed-text", + "OPENAI_CUSTOM_BASE_USING_OLLAMA_MODEL": "true", + "MILVUS_TOKEN": "your-zilliz-cloud-api-key" + } + } + } +} +``` + diff --git a/packages/vscode-extension/src/config/configManager.ts b/packages/vscode-extension/src/config/configManager.ts index 4be832ab..7f8347ea 100644 --- a/packages/vscode-extension/src/config/configManager.ts +++ b/packages/vscode-extension/src/config/configManager.ts @@ -40,7 +40,7 @@ type FieldDefinition = { name: string; type: string; description: string; - inputType?: 'text' | 'password' | 'url' | 'select' | 'select-with-custom'; + inputType?: 'text' | 'password' | 'url' | 'select' | 'select-with-custom' | 'checkbox'; placeholder?: string; required?: boolean; }; @@ -55,7 +55,8 @@ const EMBEDDING_PROVIDERS = { { name: 'apiKey', type: 'string', description: 'OpenAI API key', inputType: 'password', required: true } ] as FieldDefinition[], optionalFields: [ - { name: 'baseURL', type: 'string', description: 'Custom API endpoint URL (optional)', inputType: 'url', placeholder: 'https://api.openai.com/v1' } + { name: 'baseURL', type: 'string', description: 'Custom API endpoint URL (optional)', inputType: 'url', placeholder: 'https://api.openai.com/v1' }, + { name: 'useOllamaModel', type: 'boolean', description: 'Enable OAPI forwarding for Ollama models via OpenAI-compatible APIs', inputType: 'checkbox' } ] as FieldDefinition[], defaultConfig: { model: 'text-embedding-3-small'