-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathembedder.ts
More file actions
120 lines (106 loc) · 3.91 KB
/
embedder.ts
File metadata and controls
120 lines (106 loc) · 3.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/**
* Local embedding client for hybrid BM25+vector search.
*
* SECURITY DESIGN:
* - Uses Ollama at hardcoded 127.0.0.1:11434 — NOT user-supplied, no SSRF risk
* - Only sends text content to Ollama — no credentials, no code, no env vars
* - Completely optional: gracefully returns null if Ollama is not running
* - Vectors are float arrays — cannot contain injection payloads
*
* MODEL VERSION TRACKING:
* - Returns model name and dimensions alongside the vector
* - knowledge.ts stores model_name in embeddings table
* - On model change, stale embeddings are excluded from cosine reranking
* - Switch models with: ZC_OLLAMA_MODEL=mxbai-embed-large
*
* Supported models (via Ollama):
* nomic-embed-text (768d, MIT license, default)
* mxbai-embed-large (1024d, Apache 2.0)
* all-minilm (384d, Apache 2.0, fastest)
*/
import { Config } from "./config.js";
/** The currently active model name — exported so knowledge.ts can filter stale vectors */
export const ACTIVE_MODEL = Config.OLLAMA_MODEL;
export interface EmbeddingResult {
vector: Float32Array;
modelName: string;
dimensions: number;
}
// Module-level availability cache — avoid hammering Ollama on every call
let ollamaAvailable: boolean | null = null;
let lastAvailabilityCheck = 0;
async function isOllamaAvailable(): Promise<boolean> {
const now = Date.now();
if (ollamaAvailable !== null && now - lastAvailabilityCheck < Config.EMBED_AVAIL_TTL) {
return ollamaAvailable;
}
try {
const ctrl = new AbortController();
const timer = setTimeout(() => ctrl.abort(), 2_000);
const resp = await fetch(Config.OLLAMA_TAGS_URL, { signal: ctrl.signal });
clearTimeout(timer);
ollamaAvailable = resp.ok;
} catch {
ollamaAvailable = false;
}
lastAvailabilityCheck = now;
return ollamaAvailable;
}
/**
* Compute an embedding vector for the given text.
* Returns null if Ollama is not available — caller falls back to BM25-only.
* Returns EmbeddingResult with model metadata for version-tracking.
*/
export async function getEmbedding(text: string): Promise<EmbeddingResult | null> {
if (!(await isOllamaAvailable())) return null;
const truncated = text.slice(0, Config.EMBED_MAX_CHARS);
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), Config.EMBED_TIMEOUT_MS);
try {
const resp = await fetch(Config.OLLAMA_URL, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ model: Config.OLLAMA_MODEL, prompt: truncated }),
signal: controller.signal,
});
clearTimeout(timer);
if (!resp.ok) {
ollamaAvailable = false;
return null;
}
const data = (await resp.json()) as { embedding?: number[] };
if (!Array.isArray(data.embedding) || data.embedding.length === 0) return null;
const vector = new Float32Array(data.embedding);
return {
vector,
modelName: Config.OLLAMA_MODEL,
dimensions: vector.length,
};
} catch {
ollamaAvailable = false;
return null;
}
}
/**
* Cosine similarity between two float vectors.
* Returns 0 if lengths differ or either vector has zero magnitude.
*/
export function cosineSimilarity(a: Float32Array, b: Float32Array): number {
if (a.length !== b.length || a.length === 0) return 0;
let dot = 0, normA = 0, normB = 0;
for (let i = 0; i < a.length; i++) {
dot += a[i]! * b[i]!;
normA += a[i]! * a[i]!;
normB += b[i]! * b[i]!;
}
const denom = Math.sqrt(normA) * Math.sqrt(normB);
return denom === 0 ? 0 : dot / denom;
}
/** Serialize Float32Array to Buffer for SQLite BLOB storage */
export function serializeVector(v: Float32Array): Buffer {
return Buffer.from(v.buffer);
}
/** Deserialize Buffer from SQLite BLOB back to Float32Array */
export function deserializeVector(buf: Buffer): Float32Array {
return new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4);
}