diff --git a/src/memory/SemanticMemory.ts b/src/memory/SemanticMemory.ts index a1f19d23a..1ca0c4468 100644 --- a/src/memory/SemanticMemory.ts +++ b/src/memory/SemanticMemory.ts @@ -567,6 +567,25 @@ export class SemanticMemory { this.db = constructor(this.config.dbPath) as Database; + // Pre-load sqlite-vec so vec0 virtual tables are queryable during the probe + // below. Without this, an existing entity_embeddings (vec0) table — which is + // recreated asynchronously by initializeVectorSearch() the first time vector + // search runs — causes the probe to throw "no such module: vec0" and triggers + // false-positive corruption quarantine. The probe runs synchronously in open() + // before the async vector-search init has a chance to load the extension. + // + // Loading here uses sqlite-vec directly (not via EmbeddingProvider) because: + // 1. EmbeddingProvider may not be attached at open() time. + // 2. The probe must succeed regardless of whether vector search is wired up, + // since the on-disk DB schema doesn't know about that runtime choice. + // + // Failure is non-fatal: the probe loop below has its own missing-module guard + // that skips virtual tables whose extension isn't available. + try { + const vec = await import('sqlite-vec'); + vec.load(this.db); + } catch { /* @silent-fallback-ok: sqlite-vec unavailable — probe will skip vec0 tables */ } + // Integrity check — auto-recover from corruption (JSONL is source of truth). // Corrupt DBs are quarantined (renamed) not deleted, and a marker file is written // so operators can notice the recovery after the fact. @@ -589,9 +608,23 @@ export class SemanticMemory { if (!this._needsRebuild) { try { const tables = this.db!.prepare( - "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'fts%' AND name NOT LIKE 'sqlite%'" - ).all() as Array<{ name: string }>; + "SELECT name, sql FROM sqlite_master WHERE type='table' AND name NOT LIKE 'fts%' AND name NOT LIKE 'sqlite%'" + ).all() as Array<{ name: string; sql: string | null }>; for (const t of tables) { + // Virtual tables require their module to be loaded into the connection. + // If a virtual-table probe throws "no such module", the table isn't + // corrupt — its extension just isn't available (sqlite-vec missing, + // a custom module not yet registered, etc.). Treating that as + // corruption produces an infinite quarantine loop on every open. + if (t.sql && /^\s*CREATE\s+VIRTUAL\s+TABLE/i.test(t.sql)) { + try { + this.db!.prepare(`SELECT * FROM "${t.name}" LIMIT 100`).all(); + } catch (vErr) { + if (/no such module/i.test((vErr as Error).message)) continue; + throw vErr; + } + continue; + } this.db!.prepare(`SELECT * FROM "${t.name}" LIMIT 100`).all(); } } catch (err) { diff --git a/tests/unit/semantic-memory-corruption-recovery.test.ts b/tests/unit/semantic-memory-corruption-recovery.test.ts index 438e03bdf..e8267a5c5 100644 --- a/tests/unit/semantic-memory-corruption-recovery.test.ts +++ b/tests/unit/semantic-memory-corruption-recovery.test.ts @@ -22,6 +22,7 @@ import fs from 'node:fs'; import path from 'node:path'; import os from 'node:os'; import { SemanticMemory } from '../../src/memory/SemanticMemory.js'; +import { EmbeddingProvider } from '../../src/memory/EmbeddingProvider.js'; interface Setup { dir: string; @@ -310,3 +311,106 @@ describe('SemanticMemory corruption auto-recovery', () => { expect(afterSecond.corruptFiles.length).toBe(beforeSecond.corruptFiles.length); }); }); + +/** + * Virtual-table probe load-order tests. + * + * Contract: + * The secondary probe-read MUST NOT treat "no such module: " on a + * virtual table as corruption. Virtual tables require their extension to be + * loaded into the connection before they're queryable, and the probe runs + * synchronously in open() before any deferred extension load could happen. + * Treating a missing-module error as corruption produces an infinite + * quarantine loop on every reopen (see the upstream gap report). + * + * Concretely for sqlite-vec: open() pre-loads the extension when available, + * so a healthy DB with an `entity_embeddings` (vec0) virtual table opens + * without quarantine. + */ +describe('SemanticMemory probe — virtual tables and load order', () => { + let setup: Setup; + beforeEach(() => { setup = makeSetup(); }); + afterEach(() => setup.cleanup()); + + async function seedDbWithVec0VirtualTable(dbPath: string): Promise { + // Seed the DB exactly the way SemanticMemory's hybrid-search path does: open + // through SemanticMemory with an EmbeddingProvider attached, initialize vector + // search (which loads sqlite-vec and creates the `entity_embeddings` vec0 + // virtual table via VectorSearch.createTable), then close. The on-disk schema + // now has a vec0 virtual table that requires the sqlite-vec module to query. + const memory = new SemanticMemory({ dbPath, decayHalfLifeDays: 30, lessonDecayHalfLifeDays: 90, staleThreshold: 0.2 }); + memory.setEmbeddingProvider(new EmbeddingProvider()); + await memory.open(); + await memory.initializeVectorSearch(); + memory.close(); + } + + it('does not quarantine a healthy DB that contains a vec0 virtual table', async () => { + await seedDbWithVec0VirtualTable(setup.dbPath); + + // Sanity: schema actually contains a CREATE VIRTUAL TABLE for entity_embeddings. + const BetterSqlite3 = (await import('better-sqlite3')).default; + const inspect = new BetterSqlite3(setup.dbPath, { readonly: true }); + const row = inspect.prepare( + "SELECT name, sql FROM sqlite_master WHERE name='entity_embeddings'" + ).get() as { name: string; sql: string } | undefined; + inspect.close(); + expect(row).toBeDefined(); + expect(row!.sql).toMatch(/CREATE\s+VIRTUAL\s+TABLE/i); + + // Reopen via SemanticMemory — must not throw and must not quarantine. + const mem = new SemanticMemory({ dbPath: setup.dbPath, decayHalfLifeDays: 30, lessonDecayHalfLifeDays: 90, staleThreshold: 0.2 }); + await expect(mem.open()).resolves.not.toThrow(); + mem.close(); + + const { corruptFiles, markerFiles } = listCorruptArtifacts(setup.dir); + expect(corruptFiles).toEqual([]); + expect(markerFiles).toEqual([]); + }); + + it('opening repeatedly does not accumulate quarantine artifacts (no probe-loop regression)', async () => { + await seedDbWithVec0VirtualTable(setup.dbPath); + + for (let i = 0; i < 3; i++) { + const mem = new SemanticMemory({ dbPath: setup.dbPath, decayHalfLifeDays: 30, lessonDecayHalfLifeDays: 90, staleThreshold: 0.2 }); + await mem.open(); + mem.close(); + } + + const { corruptFiles, markerFiles } = listCorruptArtifacts(setup.dir); + expect(corruptFiles).toEqual([]); + expect(markerFiles).toEqual([]); + }); + + it('opens cleanly with no embedding provider attached (probe still survives the vec0 schema)', async () => { + // The deferred-attachment shape: vector search may not be wired up on every + // reopen, but the on-disk schema still contains the vec0 virtual table from + // a previous run. The pre-load + per-table missing-module guard must cover + // this path so we don't quarantine. + await seedDbWithVec0VirtualTable(setup.dbPath); + + const mem = new SemanticMemory({ dbPath: setup.dbPath, decayHalfLifeDays: 30, lessonDecayHalfLifeDays: 90, staleThreshold: 0.2 }); + // Intentionally no setEmbeddingProvider() — exercise the "vec0 table exists + // on disk but caller hasn't asked for vector search" reopen path. + await mem.open(); + mem.close(); + + const { corruptFiles, markerFiles } = listCorruptArtifacts(setup.dir); + expect(corruptFiles).toEqual([]); + expect(markerFiles).toEqual([]); + }); + + it('genuine probe-detected corruption still quarantines (regression guard for the virtual-table carve-out)', async () => { + // Make sure the virtual-table skip didn't accidentally widen the probe's + // tolerance for actual corruption in non-virtual tables. + await writePartiallyCorruptDb(setup.dbPath); + + const mem = new SemanticMemory({ dbPath: setup.dbPath, decayHalfLifeDays: 30, lessonDecayHalfLifeDays: 90, staleThreshold: 0.2 }); + await mem.open(); + mem.close(); + + const { corruptFiles, markerFiles } = listCorruptArtifacts(setup.dir); + expect(corruptFiles.length).toBeGreaterThanOrEqual(1); + expect(markerFiles.length).toBe(1); + }); +});