diff --git a/Cargo.lock b/Cargo.lock index caec34b..c51638f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1253,7 +1253,7 @@ dependencies = [ [[package]] name = "icm-cli" -version = "0.10.2" +version = "0.10.4" dependencies = [ "anyhow", "chrono", @@ -1311,6 +1311,7 @@ dependencies = [ "serde_json", "sqlite-vec", "tempfile", + "tracing", "zerocopy", ] diff --git a/crates/icm-cli/src/cloud.rs b/crates/icm-cli/src/cloud.rs index ed1e796..dce09c4 100644 --- a/crates/icm-cli/src/cloud.rs +++ b/crates/icm-cli/src/cloud.rs @@ -77,6 +77,14 @@ pub fn save_credentials(creds: &Credentials) -> Result<()> { } let json = serde_json::to_string_pretty(creds)?; std::fs::write(&path, json)?; + + // Restrict file permissions to owner-only on Unix (0o600) + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o600))?; + } + Ok(()) } @@ -459,51 +467,6 @@ pub fn pull_memories( Ok(memories) } -/// Delete a memory from RTK Cloud. -/// DELETE {endpoint}/api/icm/memories/{id} -pub fn delete_cloud_memory(creds: &Credentials, memory_id: &str) -> Result<()> { - let url = format!( - "{}/api/icm/memories/{}", - creds.endpoint.trim_end_matches('/'), - memory_id - ); - - let resp = ureq::delete(&url) - .set("Authorization", &format!("Bearer {}", creds.token)) - .set("X-Org-Id", &creds.org_id) - .timeout(std::time::Duration::from_secs(5)) - .call() - .context("Failed to delete cloud memory")?; - - let status = resp.status(); - if status != 200 && status != 204 { - let body = resp.into_string().unwrap_or_default(); - anyhow::bail!("Cloud delete failed ({}): {}", status, body); - } - - Ok(()) -} - -/// Fire-and-forget sync: push memory in a background thread. -/// Used after store/update operations to sync without blocking. -pub fn sync_memory_background(memory: Memory) { - let creds = match load_credentials() { - Some(c) => c, - None => return, - }; - - // Only sync project/org scoped memories - if memory.scope == Scope::User { - return; - } - - std::thread::spawn(move || { - if let Err(e) = sync_memory(&creds, &memory) { - tracing::warn!("Cloud sync failed: {}", e); - } - }); -} - /// Check if cloud sync is available (credentials exist and scope requires it). pub fn requires_cloud(scope: Scope) -> bool { scope != Scope::User @@ -541,6 +504,28 @@ mod tests { assert_eq!(url_decode("no_encoding"), "no_encoding"); } + #[cfg(unix)] + #[test] + fn test_credentials_file_permissions_0600() { + use std::os::unix::fs::PermissionsExt; + + // Create a temp file, apply the same permission logic as save_credentials + let dir = std::env::temp_dir().join(format!("icm-test-{}", std::process::id())); + std::fs::create_dir_all(&dir).unwrap(); + let path = dir.join("test-credentials.json"); + + std::fs::write(&path, "{}").unwrap(); + std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o600)).unwrap(); + + let metadata = std::fs::metadata(&path).unwrap(); + let mode = metadata.permissions().mode() & 0o777; + assert_eq!(mode, 0o600, "credentials file should be owner-only (0o600)"); + + // Cleanup + let _ = std::fs::remove_file(&path); + let _ = std::fs::remove_dir(&dir); + } + #[test] fn test_requires_cloud() { assert!(!requires_cloud(Scope::User)); diff --git a/crates/icm-cli/src/main.rs b/crates/icm-cli/src/main.rs index 0685ee5..ccf1a3f 100644 --- a/crates/icm-cli/src/main.rs +++ b/crates/icm-cli/src/main.rs @@ -14,8 +14,8 @@ use clap::{Parser, Subcommand, ValueEnum}; use serde_json::Value; use icm_core::{ - Concept, ConceptLink, Feedback, FeedbackStore, Importance, Label, Memoir, MemoirStore, Memory, - MemoryStore, Relation, + keyword_matches, topic_matches, Concept, ConceptLink, Feedback, FeedbackStore, Importance, + Label, Memoir, MemoirStore, Memory, MemoryStore, Relation, MSG_NO_MEMORIES, }; use icm_store::SqliteStore; @@ -678,7 +678,7 @@ fn main() -> Result<()> { use icm_core::Embedder; e.dimensions() }) - .unwrap_or(384); + .unwrap_or(icm_core::DEFAULT_EMBEDDING_DIMS); let db_path = cli.db.clone().unwrap_or_else(default_db_path); let store = open_store(cli.db, embedding_dims)?; @@ -774,7 +774,10 @@ fn main() -> Result<()> { } => { #[cfg(feature = "embeddings")] { - let emb = embedder.as_ref().expect("embeddings feature enabled"); + let emb = match embedder.as_ref() { + Some(e) => e, + None => bail!("embeddings not available — check your configuration"), + }; cmd_embed(&store, emb, topic.as_deref(), force, batch_size) } #[cfg(not(feature = "embeddings"))] @@ -919,7 +922,9 @@ fn cmd_recall( keyword: Option<&str>, ) -> Result<()> { // Auto-decay if >24h since last decay - let _ = store.maybe_auto_decay(); + if let Err(e) = store.maybe_auto_decay() { + tracing::warn!(error = %e, "auto-decay failed during recall"); + } // Try hybrid search if embedder is available if let Some(emb) = embedder { @@ -927,19 +932,20 @@ fn cmd_recall( if let Ok(results) = store.search_hybrid(query, &query_emb, limit) { let mut scored = results; if let Some(t) = topic { - scored.retain(|(m, _)| m.topic == t); + scored.retain(|(m, _)| topic_matches(&m.topic, t)); } if let Some(kw) = keyword { - scored.retain(|(m, _)| m.keywords.iter().any(|k| k.contains(kw))); + scored.retain(|(m, _)| keyword_matches(&m.keywords, kw)); } if scored.is_empty() { - println!("No memories found."); + println!("{MSG_NO_MEMORIES}"); return Ok(()); } + let ids: Vec<&str> = scored.iter().map(|(m, _)| m.id.as_str()).collect(); + let _ = store.batch_update_access(&ids); for (mem, score) in &scored { - let _ = store.update_access(&mem.id); print_memory_detail(mem, Some(*score)); } return Ok(()); @@ -956,19 +962,20 @@ fn cmd_recall( } if let Some(t) = topic { - results.retain(|m| m.topic == t); + results.retain(|m| topic_matches(&m.topic, t)); } if let Some(kw) = keyword { - results.retain(|m| m.keywords.iter().any(|k| k.contains(kw))); + results.retain(|m| keyword_matches(&m.keywords, kw)); } if results.is_empty() { - println!("No memories found."); + println!("{MSG_NO_MEMORIES}"); return Ok(()); } + let ids: Vec<&str> = results.iter().map(|m| m.id.as_str()).collect(); + let _ = store.batch_update_access(&ids); for mem in &results { - let _ = store.update_access(&mem.id); print_memory_detail(mem, None); } @@ -992,7 +999,7 @@ fn cmd_list(store: &SqliteStore, topic: Option<&str>, all: bool, sort: SortField } if memories.is_empty() { - println!("No memories found."); + println!("{MSG_NO_MEMORIES}"); return Ok(()); } diff --git a/crates/icm-core/src/lib.rs b/crates/icm-core/src/lib.rs index 75835d5..10dcc85 100644 --- a/crates/icm-core/src/lib.rs +++ b/crates/icm-core/src/lib.rs @@ -9,6 +9,9 @@ pub mod memoir_store; pub mod memory; pub mod store; +/// Default embedding vector dimensions (used when no embedder is configured). +pub const DEFAULT_EMBEDDING_DIMS: usize = 384; + pub use embedder::Embedder; pub use error::{IcmError, IcmResult}; #[cfg(feature = "embeddings")] @@ -21,3 +24,16 @@ pub use memory::{ Importance, Memory, MemorySource, PatternCluster, Scope, StoreStats, TopicHealth, }; pub use store::MemoryStore; + +/// Common message for empty search results. +pub const MSG_NO_MEMORIES: &str = "No memories found."; + +/// Check if a memory's topic matches a filter (supports prefix with ':'). +pub fn topic_matches(memory_topic: &str, filter: &str) -> bool { + memory_topic == filter || memory_topic.starts_with(&format!("{filter}:")) +} + +/// Check if any keyword contains the filter string. +pub fn keyword_matches(keywords: &[String], filter: &str) -> bool { + keywords.iter().any(|k| k.contains(filter)) +} diff --git a/crates/icm-mcp/src/tools.rs b/crates/icm-mcp/src/tools.rs index 7c2ae60..31e0845 100644 --- a/crates/icm-mcp/src/tools.rs +++ b/crates/icm-mcp/src/tools.rs @@ -2,8 +2,8 @@ use chrono::Utc; use serde_json::{json, Value}; use icm_core::{ - Concept, ConceptLink, Embedder, Feedback, FeedbackStore, Label, Memoir, MemoirStore, Memory, - MemoryStore, Relation, + keyword_matches, topic_matches, Concept, ConceptLink, Embedder, Feedback, FeedbackStore, Label, + Memoir, MemoirStore, Memory, MemoryStore, Relation, MSG_NO_MEMORIES, }; use icm_store::SqliteStore; @@ -12,6 +12,15 @@ use crate::protocol::ToolResult; /// Default threshold for auto-consolidation (can be overridden by config). const AUTO_CONSOLIDATE_THRESHOLD: usize = 10; +/// Similarity score above which a new memory is considered a duplicate of an existing one. +const DEDUP_SIMILARITY_THRESHOLD: f32 = 0.85; + +/// Maximum allowed length for topic names. +const MAX_TOPIC_LEN: usize = 255; + +/// Maximum allowed length for content/summary text. +const MAX_CONTENT_LEN: usize = 100_000; + /// Parse a JSON keywords array from tool arguments. fn parse_keywords(args: &Value) -> Vec { args.get("keywords") @@ -24,16 +33,6 @@ fn parse_keywords(args: &Value) -> Vec { .unwrap_or_default() } -/// Check if a memory topic matches a filter (exact or prefix match). -fn topic_matches(memory_topic: &str, filter: &str) -> bool { - memory_topic == filter || memory_topic.starts_with(&format!("{filter}:")) -} - -/// Check if memory keywords contain a substring match. -fn keyword_matches(keywords: &[String], filter: &str) -> bool { - keywords.iter().any(|k| k.contains(filter)) -} - /// Try to auto-consolidate a topic if it exceeds the threshold. /// Returns a human-readable message if consolidation happened, or empty string. fn try_auto_consolidate(store: &SqliteStore, topic: &str, threshold: usize) -> String { @@ -602,6 +601,21 @@ fn tool_store( Some(c) => c, None => return ToolResult::error("missing required field: content".into()), }; + + // Input length validation + if topic.len() > MAX_TOPIC_LEN { + return ToolResult::error(format!( + "topic exceeds maximum length ({} > {MAX_TOPIC_LEN} chars)", + topic.len() + )); + } + if content.len() > MAX_CONTENT_LEN { + return ToolResult::error(format!( + "content exceeds maximum length ({} > {MAX_CONTENT_LEN} chars)", + content.len() + )); + } + let importance_str = get_str(args, "importance").unwrap_or("medium"); let importance = importance_str .parse() @@ -640,21 +654,34 @@ fn tool_store( if let Some(ref query_emb) = embed_vec { if let Ok(similar) = store.search_hybrid(&embed_text, query_emb, 1) { if let Some((existing, score)) = similar.first() { - if score > &0.85 && existing.topic == topic { + if *score > DEDUP_SIMILARITY_THRESHOLD && existing.topic == topic { // Very similar content in same topic — update instead of duplicate - let mut updated = existing.clone(); - updated.summary = content.to_string(); - updated.updated_at = Utc::now(); - updated.weight = 1.0; // Reset weight on update - if let Some(raw) = get_str(args, "raw_excerpt") { - updated.raw_excerpt = Some(raw.into()); - } - let kw = parse_keywords(args); - if !kw.is_empty() { - updated.keywords = kw; - } - updated.importance = importance; - updated.embedding = Some(query_emb.clone()); + let updated = Memory { + id: existing.id.clone(), + created_at: existing.created_at, + last_accessed: existing.last_accessed, + access_count: existing.access_count, + weight: 1.0, // Reset weight on update + topic: existing.topic.clone(), + summary: content.to_string(), + raw_excerpt: get_str(args, "raw_excerpt") + .map(|r| r.into()) + .or_else(|| existing.raw_excerpt.clone()), + keywords: { + let kw = parse_keywords(args); + if kw.is_empty() { + existing.keywords.clone() + } else { + kw + } + }, + embedding: Some(query_emb.clone()), + importance, + source: existing.source.clone(), + related_ids: existing.related_ids.clone(), + updated_at: Utc::now(), + scope: existing.scope, + }; if let Err(e) = store.update(&updated) { return ToolResult::error(format!("failed to update: {e}")); } @@ -746,7 +773,9 @@ fn tool_recall( compact: bool, ) -> ToolResult { // Auto-decay if >24h since last decay - let _ = store.maybe_auto_decay(); + if let Err(e) = store.maybe_auto_decay() { + tracing::warn!(error = %e, "auto-decay failed during recall"); + } let query = match get_str(args, "query") { Some(q) => q, @@ -773,7 +802,7 @@ fn tool_recall( let _ = store.batch_update_access(&ids); if scored_results.is_empty() { - return ToolResult::text("No memories found.".into()); + return ToolResult::text(MSG_NO_MEMORIES.into()); } return ToolResult::text(format_memory_output(&scored_results, compact)); @@ -807,7 +836,7 @@ fn tool_recall( let _ = store.batch_update_access(&ids); if results.is_empty() { - return ToolResult::text("No memories found.".into()); + return ToolResult::text(MSG_NO_MEMORIES.into()); } // Convert to scored format (no score = -1.0) @@ -2069,9 +2098,9 @@ mod tests { } #[test] - fn test_store_very_large_content() { + fn test_store_very_large_content_rejected() { let store = test_store(); - let huge = "x".repeat(500_000); + let huge = "x".repeat(MAX_CONTENT_LEN + 1); let result = call_tool( &store, None, @@ -2079,6 +2108,23 @@ mod tests { &json!({"topic": "big", "content": huge}), false, ); + assert!(result.is_error); + assert!(result.content[0] + .text + .contains("content exceeds maximum length")); + } + + #[test] + fn test_store_large_content_within_limit_ok() { + let store = test_store(); + let big = "x".repeat(MAX_CONTENT_LEN); + let result = call_tool( + &store, + None, + "icm_memory_store", + &json!({"topic": "big", "content": big}), + false, + ); assert!(!result.is_error); } @@ -2369,6 +2415,20 @@ mod tests { } } + #[test] + fn test_recall_empty_query() { + let store = test_store(); + let result = call_tool( + &store, + None, + "icm_memory_recall", + &json!({"query": ""}), + false, + ); + // Should return empty results, not error + assert!(!result.is_error); + } + // === Feedback tool tests === #[test] @@ -2499,4 +2559,54 @@ mod tests { assert!(result.content[0].text.contains("triage")); assert!(result.content[0].text.contains("pr-review")); } + + // === Input validation tests === + + #[test] + fn test_store_topic_too_long() { + let store = test_store(); + let long_topic = "a".repeat(MAX_TOPIC_LEN + 1); + let result = call_tool( + &store, + None, + "icm_memory_store", + &json!({"topic": long_topic, "content": "hello"}), + false, + ); + assert!(result.is_error); + assert!(result.content[0] + .text + .contains("topic exceeds maximum length")); + } + + #[test] + fn test_store_content_too_long() { + let store = test_store(); + let long_content = "x".repeat(MAX_CONTENT_LEN + 1); + let result = call_tool( + &store, + None, + "icm_memory_store", + &json!({"topic": "test", "content": long_content}), + false, + ); + assert!(result.is_error); + assert!(result.content[0] + .text + .contains("content exceeds maximum length")); + } + + #[test] + fn test_store_topic_at_max_length_ok() { + let store = test_store(); + let max_topic = "a".repeat(MAX_TOPIC_LEN); + let result = call_tool( + &store, + None, + "icm_memory_store", + &json!({"topic": max_topic, "content": "hello"}), + false, + ); + assert!(!result.is_error); + } } diff --git a/crates/icm-store/Cargo.toml b/crates/icm-store/Cargo.toml index 931b7bc..3c760ab 100644 --- a/crates/icm-store/Cargo.toml +++ b/crates/icm-store/Cargo.toml @@ -10,6 +10,7 @@ sqlite-vec = { workspace = true } zerocopy = { workspace = true } serde_json = { workspace = true } chrono = { workspace = true } +tracing = { workspace = true } [dev-dependencies] tempfile = "3" diff --git a/crates/icm-store/src/schema.rs b/crates/icm-store/src/schema.rs index 18f5cda..763671a 100644 --- a/crates/icm-store/src/schema.rs +++ b/crates/icm-store/src/schema.rs @@ -15,6 +15,12 @@ fn fts_table_exists(conn: &Connection, name: &str) -> Result { } fn create_vec_table(conn: &Connection, embedding_dims: usize) -> Result<(), IcmError> { + if !(64..=4096).contains(&embedding_dims) { + return Err(IcmError::Config(format!( + "embedding_dims must be between 64 and 4096, got {embedding_dims}" + ))); + } + conn.execute_batch(&format!( "CREATE VIRTUAL TABLE vec_memories USING vec0( memory_id TEXT PRIMARY KEY, @@ -33,7 +39,7 @@ fn create_vec_table(conn: &Connection, embedding_dims: usize) -> Result<(), IcmE /// Initialize the database schema. `embedding_dims` controls the sqlite-vec vector size. /// Pass `None` to skip vector table creation (no embeddings feature). pub fn init_db(conn: &Connection) -> Result<(), IcmError> { - init_db_with_dims(conn, 384) + init_db_with_dims(conn, icm_core::DEFAULT_EMBEDDING_DIMS) } pub fn init_db_with_dims(conn: &Connection, embedding_dims: usize) -> Result<(), IcmError> { @@ -282,7 +288,9 @@ pub fn init_db_with_dims(conn: &Connection, embedding_dims: usize) -> Result<(), |row| row.get(0), ) .ok(); - let stored: usize = stored_dims.and_then(|s| s.parse().ok()).unwrap_or(384); + let stored: usize = stored_dims + .and_then(|s| s.parse().ok()) + .unwrap_or(icm_core::DEFAULT_EMBEDDING_DIMS); if stored != embedding_dims { // Model changed — drop vec table and clear embeddings conn.execute_batch("DROP TABLE IF EXISTS vec_memories") @@ -352,6 +360,40 @@ mod tests { init_db(&conn).unwrap(); } + #[test] + fn test_embedding_dims_too_small() { + ensure_vec_init(); + let conn = Connection::open_in_memory().unwrap(); + // dims < 64 should fail + let result = init_db_with_dims(&conn, 32); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.to_string().contains("embedding_dims"), + "error should mention embedding_dims: {err}" + ); + } + + #[test] + fn test_embedding_dims_too_large() { + ensure_vec_init(); + let conn = Connection::open_in_memory().unwrap(); + // dims > 4096 should fail + let result = init_db_with_dims(&conn, 8192); + assert!(result.is_err()); + } + + #[test] + fn test_embedding_dims_boundary_valid() { + ensure_vec_init(); + let conn = Connection::open_in_memory().unwrap(); + // 64 and 4096 are valid boundary values + assert!(init_db_with_dims(&conn, 64).is_ok()); + + let conn2 = Connection::open_in_memory().unwrap(); + assert!(init_db_with_dims(&conn2, 4096).is_ok()); + } + #[test] fn test_memoir_tables_exist() { ensure_vec_init(); diff --git a/crates/icm-store/src/store.rs b/crates/icm-store/src/store.rs index 7e5e8f9..1065e15 100644 --- a/crates/icm-store/src/store.rs +++ b/crates/icm-store/src/store.rs @@ -43,7 +43,7 @@ pub struct SqliteStore { impl SqliteStore { pub fn new(path: &Path) -> IcmResult { - Self::with_dims(path, 384) + Self::with_dims(path, icm_core::DEFAULT_EMBEDDING_DIMS) } /// Open or create a store with a specific embedding dimension. @@ -145,9 +145,9 @@ fn embedding_to_blob(embedding: &[f32]) -> Vec { fn blob_to_embedding(blob: &[u8]) -> Vec { if !blob.len().is_multiple_of(4) { - eprintln!( - "[icm] warning: embedding blob size {} not divisible by 4, truncating", - blob.len() + tracing::warn!( + blob_size = blob.len(), + "embedding blob size not divisible by 4, truncating" ); } blob.chunks_exact(4) @@ -218,6 +218,13 @@ const SELECT_COLS: &str = "id, created_at, updated_at, last_accessed, access_cou /// /// This function strips special chars and wraps each token in double quotes. fn sanitize_fts_query(query: &str) -> String { + // Limit input length to prevent abuse + let query = if query.len() > 10_000 { + &query[..10_000] + } else { + query + }; + // Replace FTS5 operator chars with spaces, then quote each resulting token. // FTS5 tokenizer (unicode61) splits on `-` too, so we must keep tokens separate. let cleaned: String = query @@ -237,7 +244,12 @@ fn sanitize_fts_query(query: &str) -> String { let tokens: Vec = cleaned .split_whitespace() .filter(|w| !w.is_empty()) - .map(|w| format!("\"{w}\"")) + .take(100) // Limit token count to prevent excessive query complexity + .map(|w| { + // Strip any remaining quotes from tokens before wrapping in quotes + let stripped = w.replace('"', ""); + format!("\"{stripped}\"") + }) .collect(); tokens.join(" ") } @@ -386,7 +398,7 @@ impl MemoryStore for SqliteStore { // Cap keywords to avoid massive SQL generation let keywords = &keywords[..keywords.len().min(50)]; - let limit = limit.min(1000); + let limit = limit.min(100); let where_parts: Vec = (0..keywords.len()) .map(|i| { @@ -420,6 +432,7 @@ impl MemoryStore for SqliteStore { } fn search_fts(&self, query: &str, limit: usize) -> IcmResult> { + let limit = limit.min(100); let sanitized = sanitize_fts_query(query); if sanitized.is_empty() { return Ok(Vec::new()); @@ -557,12 +570,13 @@ impl MemoryStore for SqliteStore { } // 3. Combine scores: 30% FTS + 70% vector - let mut scored: Vec<(String, f32)> = Vec::with_capacity(all_memories.len()); - for id in all_memories.keys() { - let fts_score = fts_scores.get(id).copied().unwrap_or(0.0); - let vec_score = vec_scores.get(id).copied().unwrap_or(0.0); + let keys: Vec = all_memories.keys().cloned().collect(); + let mut scored: Vec<(String, f32)> = Vec::with_capacity(keys.len()); + for id in keys { + let fts_score = fts_scores.get(&id).copied().unwrap_or(0.0); + let vec_score = vec_scores.get(&id).copied().unwrap_or(0.0); let combined = 0.3 * fts_score + 0.7 * vec_score; - scored.push((id.clone(), combined)); + scored.push((id, combined)); } // Sort by combined score descending @@ -716,6 +730,7 @@ impl MemoryStore for SqliteStore { )", params![topic], ) { + tracing::warn!(topic, error = %e, "consolidate_topic: rolling back after vec_memories delete failed"); let _ = self.conn.execute_batch("ROLLBACK;"); return Err(IcmError::Database(e.to_string())); } @@ -724,11 +739,13 @@ impl MemoryStore for SqliteStore { .conn .execute("DELETE FROM memories WHERE topic = ?1", params![topic]) { + tracing::warn!(topic, error = %e, "consolidate_topic: rolling back after memories delete failed"); let _ = self.conn.execute_batch("ROLLBACK;"); return Err(IcmError::Database(e.to_string())); } if let Err(e) = self.store(consolidated) { + tracing::warn!(topic, error = %e, "consolidate_topic: rolling back after store failed"); let _ = self.conn.execute_batch("ROLLBACK;"); return Err(e); } @@ -740,6 +757,7 @@ impl MemoryStore for SqliteStore { .conn .execute_batch("INSERT INTO memories_fts(memories_fts) VALUES('rebuild');") { + tracing::warn!(topic, error = %e, "consolidate_topic: rolling back after FTS rebuild failed"); let _ = self.conn.execute_batch("ROLLBACK;"); return Err(IcmError::Database(e.to_string())); } @@ -811,9 +829,13 @@ impl MemoryStore for SqliteStore { } let parse_dt = |s: &str| -> Option> { - DateTime::parse_from_rfc3339(s) - .ok() - .map(|d| d.with_timezone(&Utc)) + match DateTime::parse_from_rfc3339(s) { + Ok(d) => Some(d.with_timezone(&Utc)), + Err(e) => { + tracing::warn!("invalid timestamp '{}': {}", s, e); + None + } + } }; Ok(TopicHealth { @@ -1882,15 +1904,15 @@ impl SqliteStore { } // Representative = the highest-weight memory in the cluster - let best_idx = *indices - .iter() - .max_by(|&&a, &&b| { - memories[a] - .weight - .partial_cmp(&memories[b].weight) - .unwrap_or(std::cmp::Ordering::Equal) - }) - .unwrap(); + let best_idx = match indices.iter().max_by(|&&a, &&b| { + memories[a] + .weight + .partial_cmp(&memories[b].weight) + .unwrap_or(std::cmp::Ordering::Equal) + }) { + Some(&idx) => idx, + None => continue, // empty cluster, skip + }; // Collect all unique keywords from the cluster let mut all_kw: Vec = Vec::new(); @@ -3558,4 +3580,203 @@ mod tests { assert_eq!(stats.most_applied.len(), 1); assert_eq!(stats.most_applied[0].1, 1); } + + // === sanitize_fts_query tests === + + #[test] + fn test_sanitize_fts_empty() { + assert_eq!(sanitize_fts_query(""), ""); + assert_eq!(sanitize_fts_query(" "), ""); + } + + #[test] + fn test_sanitize_fts_special_chars() { + // All FTS5 operators should be stripped + assert_eq!(sanitize_fts_query("hello-world"), "\"hello\" \"world\""); + assert_eq!(sanitize_fts_query("foo*bar"), "\"foo\" \"bar\""); + assert_eq!(sanitize_fts_query("a:b"), "\"a\" \"b\""); + assert_eq!(sanitize_fts_query("(test)"), "\"test\""); + assert_eq!(sanitize_fts_query("x^y+z~w"), "\"x\" \"y\" \"z\" \"w\""); + } + + #[test] + fn test_sanitize_fts_quotes_stripped() { + // Embedded quotes must be removed before wrapping in quotes + assert_eq!(sanitize_fts_query("say \"hello\""), "\"say\" \"hello\""); + } + + #[test] + fn test_sanitize_fts_unicode() { + assert_eq!(sanitize_fts_query("café résumé"), "\"café\" \"résumé\""); + assert_eq!(sanitize_fts_query("日本語テスト"), "\"日本語テスト\""); + } + + #[test] + fn test_sanitize_fts_long_input_truncated() { + let long = "a ".repeat(6000); // 12000 chars + let result = sanitize_fts_query(&long); + // Input is truncated to 10_000 chars, then tokens are capped at 100 + let token_count = result.split_whitespace().count(); + assert!(token_count <= 100); + } + + #[test] + fn test_sanitize_fts_many_tokens_capped() { + // 200 tokens should be capped to 100 + let many_tokens: String = (0..200) + .map(|i| format!("word{i}")) + .collect::>() + .join(" "); + let result = sanitize_fts_query(&many_tokens); + let token_count = result.split_whitespace().count(); + assert_eq!(token_count, 100); + } + + // === search limit cap tests === + + #[test] + fn test_search_fts_limit_capped() { + let store = test_store(); + // Store a memory so search has something to find + store.store(make_memory("test", "hello world")).unwrap(); + + // Even with a huge limit, it should not error (capped internally) + let results = store.search_fts("hello", 999_999).unwrap(); + assert!(results.len() <= 100); + } + + #[test] + fn test_search_by_keywords_limit_capped() { + let store = test_store(); + let mut mem = make_memory("test", "keyword search test"); + mem.keywords = vec!["findme".into()]; + store.store(mem).unwrap(); + + let results = store.search_by_keywords(&["findme"], 999_999).unwrap(); + assert!(results.len() <= 100); + } + + // === Additional MemoryStore coverage === + + #[test] + fn test_search_fts_empty_query() { + let store = test_store(); + store.store(make_memory("topic", "hello world")).unwrap(); + let results = store.search_fts("", 10).unwrap(); + assert!(results.is_empty()); + } + + #[test] + fn test_search_by_keywords_empty() { + let store = test_store(); + let results = store.search_by_keywords(&[], 10).unwrap(); + assert!(results.is_empty()); + } + + #[test] + fn test_update_nonexistent_memory() { + let store = test_store(); + let mut mem = make_memory("t", "s"); + mem.id = "nonexistent-id".to_string(); + let result = store.update(&mem); + assert!(result.is_err()); + } + + #[test] + fn test_delete_nonexistent_memory() { + let store = test_store(); + let result = store.delete("nonexistent-id"); + assert!(result.is_err()); + } + + #[test] + fn test_batch_update_access() { + let store = test_store(); + let id1 = store.store(make_memory("t", "one")).unwrap(); + let id2 = store.store(make_memory("t", "two")).unwrap(); + store.batch_update_access(&[&id1, &id2]).unwrap(); + let m1 = store.get(&id1).unwrap().unwrap(); + let m2 = store.get(&id2).unwrap().unwrap(); + assert_eq!(m1.access_count, 1); + assert_eq!(m2.access_count, 1); + } + + #[test] + fn test_auto_consolidate_below_threshold() { + let store = test_store(); + store.store(make_memory("t", "one")).unwrap(); + store.store(make_memory("t", "two")).unwrap(); + // Threshold is 10, so no consolidation + let result = store.auto_consolidate("t", 10).unwrap(); + assert!(!result); + assert_eq!(store.count_by_topic("t").unwrap(), 2); + } + + #[test] + fn test_auto_consolidate_above_threshold() { + let store = test_store(); + for i in 0..12 { + store + .store(make_memory("bulk", &format!("entry {i}"))) + .unwrap(); + } + let result = store.auto_consolidate("bulk", 10).unwrap(); + assert!(result); + assert_eq!(store.count_by_topic("bulk").unwrap(), 1); + } + + #[test] + fn test_apply_decay_with_aggressive_factor() { + let store = test_store(); + store.store(make_memory("t", "decayable")).unwrap(); + let affected = store.apply_decay(0.5).unwrap(); + assert!(affected > 0); + let mems = store.get_by_topic("t").unwrap(); + assert!(mems[0].weight < 1.0); + } + + #[test] + fn test_prune_low_weight() { + let store = test_store(); + store.store(make_memory("t", "will be pruned")).unwrap(); + // Apply aggressive decay + store.apply_decay(0.01).unwrap(); + let pruned = store.prune(0.5).unwrap(); + assert!(pruned > 0); + assert_eq!(store.count().unwrap(), 0); + } + + #[test] + fn test_list_topics_multiple() { + let store = test_store(); + store.store(make_memory("alpha", "a")).unwrap(); + store.store(make_memory("beta", "b")).unwrap(); + store.store(make_memory("alpha", "c")).unwrap(); + let topics = store.list_topics().unwrap(); + assert_eq!(topics.len(), 2); + } + + #[test] + fn test_stats_multi_topic() { + let store = test_store(); + store.store(make_memory("t1", "one")).unwrap(); + store.store(make_memory("t2", "two")).unwrap(); + let stats = store.stats().unwrap(); + assert_eq!(stats.total_memories, 2); + assert_eq!(stats.total_topics, 2); + } + + #[test] + fn test_get_by_topic_prefix() { + let store = test_store(); + store + .store(make_memory("project:web", "web stuff")) + .unwrap(); + store + .store(make_memory("project:api", "api stuff")) + .unwrap(); + store.store(make_memory("other", "unrelated")).unwrap(); + let results = store.get_by_topic_prefix("project:*").unwrap(); + assert_eq!(results.len(), 2); + } }