diff --git a/platform/daemon-rs/crates/signet-daemon/src/routes/hooks.rs b/platform/daemon-rs/crates/signet-daemon/src/routes/hooks.rs index ff00d8ef0..9190707b6 100644 --- a/platform/daemon-rs/crates/signet-daemon/src/routes/hooks.rs +++ b/platform/daemon-rs/crates/signet-daemon/src/routes/hooks.rs @@ -4,6 +4,7 @@ //! session lifecycle: session-start, prompt-submit, session-end, //! remember, recall, pre-compaction, and compaction-complete. +use std::collections::{HashMap, HashSet}; use std::fs; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -855,24 +856,147 @@ fn cap_prompt_inject(text: &str, limit: usize) -> String { } fn normalize_prompt_entity_text(text: &str) -> String { - let mut out = String::new(); - let mut prev_space = true; - for ch in text.to_lowercase().chars() { - if ch.is_ascii_alphanumeric() { - out.push(ch); - prev_space = false; - } else if !prev_space { - out.push(' '); - prev_space = true; + let mut terms = Vec::new(); + let mut current = String::new(); + for ch in text.to_lowercase().replace('’', "'").chars() { + if ch.is_ascii_alphanumeric() || ch == '\'' { + current.push(ch); + } else if !current.is_empty() { + push_prompt_entity_term(&mut terms, ¤t); + current.clear(); } } - out.trim().to_string() + if !current.is_empty() { + push_prompt_entity_term(&mut terms, ¤t); + } + terms.join(" ") +} + +fn push_prompt_entity_term(terms: &mut Vec, raw: &str) { + let token = raw + .strip_suffix("'s") + .or_else(|| raw.strip_suffix('\'')) + .unwrap_or(raw); + for part in token.split('\'') { + if !part.is_empty() { + terms.push(part.to_string()); + } + } +} + +fn prompt_entity_terms(text: &str) -> Vec { + normalize_prompt_entity_text(text) + .split_whitespace() + .map(str::to_string) + .collect() +} + +fn prompt_bare_possessive_allowed(phrase_term: &str) -> bool { + phrase_term.len() >= 4 + && !matches!( + phrase_term, + "agent" + | "artifact" + | "concept" + | "connector" + | "document" + | "event" + | "memory" + | "policy" + | "preference" + | "product" + | "project" + | "skill" + | "source" + | "system" + | "task" + | "tool" + | "workflow" + ) +} + +fn prompt_entity_term_matches(prompt_term: &str, phrase_term: &str) -> bool { + prompt_term == phrase_term + || (prompt_bare_possessive_allowed(phrase_term) && prompt_term == format!("{phrase_term}s")) +} + +fn prompt_phrase_span(prompt: &str, phrase: &str) -> Option<(usize, usize)> { + let prompt_terms = prompt_entity_terms(prompt); + let phrase_terms = prompt_entity_terms(phrase); + if phrase_terms.join(" ").len() < MIN_PROMPT_ENTITY_MATCH_CHARS + || phrase_terms.is_empty() + || phrase_terms.len() > prompt_terms.len() + { + return None; + } + for start in 0..=(prompt_terms.len() - phrase_terms.len()) { + if phrase_terms + .iter() + .enumerate() + .all(|(offset, term)| prompt_entity_term_matches(&prompt_terms[start + offset], term)) + { + return Some((start, start + phrase_terms.len())); + } + } + None +} + +fn prompt_spans_overlap(a: (usize, usize), b: (usize, usize)) -> bool { + a.0 < b.1 && b.0 < a.1 +} + +fn prompt_entity_context_type_allowed(entity_type: &str) -> bool { + matches!( + entity_type.to_ascii_lowercase().as_str(), + "person" | "project" + ) +} + +fn prompt_generic_entity_phrase(phrase_terms: &[String]) -> bool { + if phrase_terms.len() != 1 { + return false; + } + let term = phrase_terms[0].as_str(); + if !prompt_bare_possessive_allowed(term) { + return true; + } + term.strip_suffix('s') + .is_some_and(|singular| !prompt_bare_possessive_allowed(singular)) +} + +fn score_prompt_entity_candidate( + match_source: &str, + matched_text: &str, + mentions: i64, + pinned: i64, +) -> f64 { + let phrase = normalize_prompt_entity_text(matched_text); + let phrase_terms = prompt_entity_terms(matched_text); + phrase_terms.len() as f64 * 8.0 + + phrase.len() as f64 * 0.35 + + (mentions.max(0) as f64).ln_1p() + + pinned.clamp(0, 1) as f64 * 8.0 + + if match_source == "alias" { -0.25 } else { 0.0 } } -fn phrase_appears_in_prompt(prompt: &str, phrase: &str) -> bool { - let prompt = format!(" {} ", normalize_prompt_entity_text(prompt)); - let phrase = normalize_prompt_entity_text(phrase); - phrase.len() >= MIN_PROMPT_ENTITY_MATCH_CHARS && prompt.contains(&format!(" {phrase} ")) +#[derive(Clone)] +struct PromptEntityRow { + entity_id: String, + entity_name: String, + entity_type: String, + matched_text: String, + match_source: String, + mentions: i64, + pinned: i64, +} + +#[derive(Clone)] +struct PromptEntityCandidate { + row: PromptEntityRow, + normalized_phrase: String, + span_start: usize, + span_end: usize, + score: f64, } fn is_low_signal_prompt(prompt: &str) -> bool { @@ -1274,14 +1398,18 @@ pub async fn prompt_submit( } let entity_rows = match conn.prepare( - "SELECT id, name, COALESCE(canonical_name, LOWER(name)) AS matched_text, - 'name' AS source, COALESCE(mentions, 0) AS mentions + "SELECT id, name, COALESCE(entity_type, 'unknown') AS entity_type, + COALESCE(canonical_name, LOWER(name)) AS matched_text, + 'name' AS source, COALESCE(mentions, 0) AS mentions, + COALESCE(pinned, 0) AS pinned FROM entities WHERE agent_id = ?1 AND COALESCE(status, 'active') = 'active' UNION ALL - SELECT e.id, e.name, a.alias AS matched_text, - 'alias' AS source, COALESCE(e.mentions, 0) AS mentions + SELECT e.id, e.name, COALESCE(e.entity_type, 'unknown') AS entity_type, + a.alias AS matched_text, + 'alias' AS source, COALESCE(e.mentions, 0) AS mentions, + COALESCE(e.pinned, 0) AS pinned FROM entity_aliases a JOIN entities e ON e.id = a.entity_id AND e.agent_id = a.agent_id WHERE a.agent_id = ?1 @@ -1290,13 +1418,15 @@ pub async fn prompt_submit( ) { Ok(mut stmt) => stmt .query_map([agent_id.clone()], |row| { - Ok(( - row.get::<_, String>(0)?, - row.get::<_, String>(1)?, - row.get::<_, String>(2)?, - row.get::<_, String>(3)?, - row.get::<_, i64>(4)?, - )) + Ok(PromptEntityRow { + entity_id: row.get::<_, String>(0)?, + entity_name: row.get::<_, String>(1)?, + entity_type: row.get::<_, String>(2)?, + matched_text: row.get::<_, String>(3)?, + match_source: row.get::<_, String>(4)?, + mentions: row.get::<_, i64>(5)?, + pinned: row.get::<_, i64>(6)?, + }) }) .ok() .map(|rows| rows.filter_map(|r| r.ok()).collect::>()) @@ -1304,41 +1434,86 @@ pub async fn prompt_submit( Err(_) => vec![], }; - let mut matches = entity_rows - .into_iter() - .filter(|(_, _, matched_text, _, _)| { - phrase_appears_in_prompt(&cleaned, matched_text) + let mut candidates_by_phrase = HashMap::>::new(); + for row in entity_rows { + if !prompt_entity_context_type_allowed(&row.entity_type) + || prompt_generic_entity_phrase(&prompt_entity_terms(&row.matched_text)) + { + continue; + } + let Some((span_start, span_end)) = prompt_phrase_span(&cleaned, &row.matched_text) else { + continue; + }; + let normalized_phrase = normalize_prompt_entity_text(&row.matched_text); + let score = score_prompt_entity_candidate( + &row.match_source, + &row.matched_text, + row.mentions, + row.pinned, + ); + candidates_by_phrase + .entry(normalized_phrase.clone()) + .or_default() + .push(PromptEntityCandidate { + row, + normalized_phrase, + span_start, + span_end, + score, + }); + } + + let mut phrase_winners = candidates_by_phrase + .into_values() + .filter_map(|mut candidates| { + candidates.sort_by(|a, b| { + b.score + .partial_cmp(&a.score) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| b.row.mentions.cmp(&a.row.mentions)) + .then_with(|| b.normalized_phrase.len().cmp(&a.normalized_phrase.len())) + .then_with(|| a.row.entity_name.cmp(&b.row.entity_name)) + }); + candidates.into_iter().next() }) .collect::>(); - matches.sort_by(|a, b| { - normalize_prompt_entity_text(&b.2) - .len() - .cmp(&normalize_prompt_entity_text(&a.2).len()) - .then_with(|| b.4.cmp(&a.4)) + let top_score = phrase_winners + .iter() + .map(|candidate| candidate.score) + .fold(0.0, f64::max); + let minimum_score = 12.0_f64.max(top_score * 0.45); + phrase_winners.retain(|candidate| candidate.score >= minimum_score); + phrase_winners.sort_by(|a, b| { + b.score + .partial_cmp(&a.score) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| { + (b.span_end - b.span_start).cmp(&(a.span_end - a.span_start)) + }) + .then_with(|| b.row.mentions.cmp(&a.row.mentions)) + .then_with(|| a.row.entity_name.cmp(&b.row.entity_name)) }); - let mut phrase_entities = std::collections::HashMap::>::new(); - for (id, _, matched_text, _, _) in &matches { - phrase_entities - .entry(normalize_prompt_entity_text(matched_text)) - .or_default() - .insert(id.clone()); - } - if phrase_entities.values().any(|ids| ids.len() > 1) { - return Ok(serde_json::json!({ - "inject": "", - "memoryCount": 0, - "queryTerms": query_terms_for_resp, - "engine": "ambiguous-entity", - })); + let mut seen = HashSet::new(); + let mut selected_spans = Vec::new(); + let mut entity_matches = Vec::new(); + for candidate in phrase_winners { + if !seen.insert(candidate.row.entity_id.clone()) { + continue; + } + let span = (candidate.span_start, candidate.span_end); + if selected_spans + .iter() + .any(|selected| prompt_spans_overlap(*selected, span)) + { + continue; + } + selected_spans.push(span); + entity_matches.push(candidate.row); + if entity_matches.len() >= 2 { + break; + } } - - let mut seen = std::collections::HashSet::new(); - let entity_matches = matches - .into_iter() - .filter(|(id, _, _, _, _)| seen.insert(id.clone())) - .take(2) - .collect::>(); if entity_matches.is_empty() { return Ok(serde_json::json!({ "inject": "", @@ -1352,18 +1527,25 @@ pub async fn prompt_submit( .split_whitespace() .filter(|term| is_prompt_context_term(term)) .map(str::to_string) - .collect::>(); + .collect::>(); let mut lines = Vec::new(); - for (entity_id, entity_name, matched_text, _, _) in entity_matches { - let entity_terms = normalize_prompt_entity_text(&format!("{entity_name} {matched_text}")) + for entity in entity_matches { + let entity_terms = normalize_prompt_entity_text(&format!( + "{} {}", + entity.entity_name, entity.matched_text + )) .split_whitespace() .map(str::to_string) - .collect::>(); + .collect::>(); let context_terms = prompt_terms .iter() - .filter(|term| !entity_terms.contains(*term)) + .filter(|term| { + !entity_terms + .iter() + .any(|entity_term| prompt_entity_term_matches(term, entity_term)) + }) .cloned() - .collect::>(); + .collect::>(); let aspects = match conn.prepare( "SELECT id, name, canonical_name, weight FROM entity_aspects @@ -1374,7 +1556,7 @@ pub async fn prompt_submit( LIMIT 12", ) { Ok(mut stmt) => stmt - .query_map(rusqlite::params![entity_id, agent_id.clone()], |row| { + .query_map(rusqlite::params![&entity.entity_id, agent_id.clone()], |row| { Ok(( row.get::<_, String>(0)?, row.get::<_, String>(1)?, @@ -1513,7 +1695,8 @@ pub async fn prompt_submit( format!("v{version}") }; lines.push(format!( - "- [{kind}] {entity_name} / {aspect_name} / {group} / {claim}: {} ({source})", + "- [{kind}] {} / {aspect_name} / {group} / {claim}: {} ({source})", + entity.entity_name, trim_for_inject(&content, 240) )); if lines.len() >= 8 { @@ -3827,6 +4010,345 @@ mod tests { let _ = writer.await; } + #[tokio::test] + async fn prompt_submit_prefers_canonical_entity_over_possessive_duplicate() { + let (state, writer, _tmp) = test_state("hooks-prompt-submit-possessive-entity"); + state + .pool + .write(Priority::Low, move |conn| { + let now = "2026-05-27T00:00:00Z"; + conn.execute( + "INSERT INTO entities (id, name, canonical_name, entity_type, description, agent_id, mentions, created_at, updated_at) + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?8)", + rusqlite::params![ + "entity-signet", + "Signet", + "signet", + "project", + "Source-backed agent continuity substrate", + "agent-a", + 10_i64, + now, + ], + )?; + conn.execute( + "INSERT INTO entity_aspects (id, entity_id, agent_id, name, canonical_name, weight, created_at, updated_at) + VALUES ('aspect-preferences', 'entity-signet', 'agent-a', 'preferences', 'preferences', 0.9, ?1, ?1)", + rusqlite::params![now], + )?; + conn.execute( + "INSERT INTO entity_attributes + (id, aspect_id, agent_id, memory_id, kind, content, normalized_content, confidence, importance, + status, group_key, claim_key, created_at, updated_at) + VALUES ('attr-preferences-pen', 'aspect-preferences', 'agent-a', NULL, 'attribute', + 'Favorite pen is a Pilot G-2.', + 'favorite pen is a pilot g 2', 0.95, 0.9, 'active', + 'writing', 'favorite_pen', ?1, ?1)", + rusqlite::params![now], + )?; + conn.execute( + "INSERT INTO entities (id, name, canonical_name, entity_type, description, agent_id, mentions, created_at, updated_at) + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?8)", + rusqlite::params![ + "entity-signet-possessive", + "Signet's", + "signet's", + "tool", + "Possessive duplicate", + "agent-a", + 2_i64, + now, + ], + )?; + conn.execute( + "INSERT INTO entity_aspects (id, entity_id, agent_id, name, canonical_name, weight, created_at, updated_at) + VALUES ('aspect-possessive-noise', 'entity-signet-possessive', 'agent-a', 'noise', 'noise', 1.0, ?1, ?1)", + rusqlite::params![now], + )?; + conn.execute( + "INSERT INTO entity_attributes + (id, aspect_id, agent_id, memory_id, kind, content, normalized_content, confidence, importance, + status, group_key, claim_key, created_at, updated_at) + VALUES ('attr-possessive-noise', 'aspect-possessive-noise', 'agent-a', NULL, 'attribute', + 'Possessive duplicate entity should not win prompt matching.', + 'possessive duplicate entity should not win prompt matching', 0.95, 0.9, 'active', + 'runtime', 'duplicate_guard', ?1, ?1)", + rusqlite::params![now], + )?; + Ok(serde_json::Value::Null) + }) + .await + .unwrap(); + + let resp = prompt_submit( + State(state.clone()), + HeaderMap::new(), + Json(PromptSubmitBody { + harness: Some("test".to_string()), + project: Some("platform/daemon-rs".to_string()), + agent_id: Some("agent-a".to_string()), + user_message: Some("What are Signet's favorite pens?".to_string()), + user_prompt: None, + last_assistant_message: None, + session_key: Some("sess-prompt-possessive-entity".to_string()), + transcript: None, + transcript_path: None, + runtime_path: None, + }), + ) + .await; + + assert_eq!(resp.status(), StatusCode::OK); + let body = test_json(resp).await; + let inject = body["inject"].as_str().unwrap_or_default(); + assert_eq!( + body["engine"], + serde_json::Value::String("entity-context".to_string()) + ); + assert!(inject.contains("Signet / preferences / writing / favorite_pen")); + assert!(!inject.contains("Signet's / noise")); + assert!(!inject.contains("Possessive duplicate entity should not win")); + + let bare_resp = prompt_submit( + State(state.clone()), + HeaderMap::new(), + Json(PromptSubmitBody { + harness: Some("test".to_string()), + project: Some("platform/daemon-rs".to_string()), + agent_id: Some("agent-a".to_string()), + user_message: Some("What are Signets favorite pens?".to_string()), + user_prompt: None, + last_assistant_message: None, + session_key: Some("sess-prompt-bare-possessive-entity".to_string()), + transcript: None, + transcript_path: None, + runtime_path: None, + }), + ) + .await; + + assert_eq!(bare_resp.status(), StatusCode::OK); + let bare_body = test_json(bare_resp).await; + let bare_inject = bare_body["inject"].as_str().unwrap_or_default(); + assert_eq!( + bare_body["engine"], + serde_json::Value::String("entity-context".to_string()) + ); + assert!(bare_inject.contains("Signet / preferences / writing / favorite_pen")); + + drop(state); + let _ = writer.await; + } + + #[tokio::test] + async fn prompt_submit_prefers_longest_non_overlapping_entity_span() { + let (state, writer, _tmp) = test_state("hooks-prompt-submit-longest-entity-span"); + state + .pool + .write(Priority::Low, move |conn| { + let now = "2026-05-27T00:00:00Z"; + for (id, name, canonical, mentions) in [ + ( + "entity-claude-code-connector", + "Claude Code connector", + "claude code connector", + 8_i64, + ), + ("entity-claude-code", "Claude Code", "claude code", 135_i64), + ("entity-claude", "Claude", "claude", 113_i64), + ("entity-code", "code", "code", 15_i64), + ("entity-connector", "connector", "connector", 5_i64), + ] { + conn.execute( + "INSERT INTO entities (id, name, canonical_name, entity_type, description, agent_id, mentions, created_at, updated_at) + VALUES (?1, ?2, ?3, 'project', 'Prompt test entity', 'agent-a', ?4, ?5, ?5)", + rusqlite::params![id, name, canonical, mentions, now], + )?; + conn.execute( + "INSERT INTO entity_aspects (id, entity_id, agent_id, name, canonical_name, weight, created_at, updated_at) + VALUES (?1, ?2, 'agent-a', 'runtime', 'runtime', 1.0, ?3, ?3)", + rusqlite::params![format!("aspect-{id}"), id, now], + )?; + conn.execute( + "INSERT INTO entity_attributes + (id, aspect_id, agent_id, memory_id, kind, content, normalized_content, confidence, importance, + status, group_key, claim_key, created_at, updated_at) + VALUES (?1, ?2, 'agent-a', NULL, 'attribute', ?3, ?4, 0.95, 0.9, 'active', + 'setup', 'routing', ?5, ?5)", + rusqlite::params![ + format!("attr-{id}"), + format!("aspect-{id}"), + format!("{name} setup context."), + format!("{canonical} setup context"), + now, + ], + )?; + } + Ok(serde_json::Value::Null) + }) + .await + .unwrap(); + + let resp = prompt_submit( + State(state.clone()), + HeaderMap::new(), + Json(PromptSubmitBody { + harness: Some("test".to_string()), + project: Some("platform/daemon-rs".to_string()), + agent_id: Some("agent-a".to_string()), + user_message: Some("Claude Code connector setup".to_string()), + user_prompt: None, + last_assistant_message: None, + session_key: Some("sess-prompt-longest-entity-span".to_string()), + transcript: None, + transcript_path: None, + runtime_path: None, + }), + ) + .await; + + assert_eq!(resp.status(), StatusCode::OK); + let body = test_json(resp).await; + let inject = body["inject"].as_str().unwrap_or_default(); + assert_eq!( + body["engine"], + serde_json::Value::String("entity-context".to_string()) + ); + assert!(inject.contains("Claude Code connector / runtime / setup / routing")); + assert!(!inject.contains("- [attribute] Claude Code / runtime")); + assert!(!inject.contains("- [attribute] connector / runtime")); + + drop(state); + let _ = writer.await; + } + + #[tokio::test] + async fn prompt_submit_ignores_disallowed_entity_types() { + let (state, writer, _tmp) = test_state("hooks-prompt-submit-disallowed-entity-type"); + state + .pool + .write(Priority::Low, move |conn| { + let now = "2026-05-27T00:00:00Z"; + conn.execute( + "INSERT INTO entities (id, name, canonical_name, entity_type, description, agent_id, mentions, created_at, updated_at) + VALUES ('entity-claude-code-connector', 'Claude Code connector', 'claude code connector', + 'tool', 'Prompt test entity', 'agent-a', 80, ?1, ?1)", + rusqlite::params![now], + )?; + conn.execute( + "INSERT INTO entity_aspects (id, entity_id, agent_id, name, canonical_name, weight, created_at, updated_at) + VALUES ('aspect-claude-code-connector-runtime', 'entity-claude-code-connector', + 'agent-a', 'runtime', 'runtime', 1.0, ?1, ?1)", + rusqlite::params![now], + )?; + conn.execute( + "INSERT INTO entity_attributes + (id, aspect_id, agent_id, memory_id, kind, content, normalized_content, confidence, importance, + status, group_key, claim_key, created_at, updated_at) + VALUES ('attr-claude-code-connector-runtime', 'aspect-claude-code-connector-runtime', + 'agent-a', NULL, 'attribute', + 'Claude Code connector setup context should not inject.', + 'claude code connector setup context should not inject', + 0.95, 0.9, 'active', 'setup', 'routing', ?1, ?1)", + rusqlite::params![now], + )?; + Ok(serde_json::Value::Null) + }) + .await + .unwrap(); + + let resp = prompt_submit( + State(state.clone()), + HeaderMap::new(), + Json(PromptSubmitBody { + harness: Some("test".to_string()), + project: Some("platform/daemon-rs".to_string()), + agent_id: Some("agent-a".to_string()), + user_message: Some("Claude Code connector setup".to_string()), + user_prompt: None, + last_assistant_message: None, + session_key: Some("sess-prompt-disallowed-entity-type".to_string()), + transcript: None, + transcript_path: None, + runtime_path: None, + }), + ) + .await; + + assert_eq!(resp.status(), StatusCode::OK); + let body = test_json(resp).await; + assert_eq!( + body["engine"], + serde_json::Value::String("no-entity".to_string()) + ); + assert_eq!(body["inject"], serde_json::Value::String(String::new())); + + drop(state); + let _ = writer.await; + } + + #[tokio::test] + async fn prompt_submit_does_not_match_generic_plural_entity_terms() { + let (state, writer, _tmp) = test_state("hooks-prompt-submit-generic-plural"); + state + .pool + .write(Priority::Low, move |conn| { + let now = "2026-05-27T00:00:00Z"; + conn.execute( + "INSERT INTO entities (id, name, canonical_name, entity_type, description, agent_id, mentions, created_at, updated_at) + VALUES ('entity-project', 'Project', 'project', 'project', 'Generic project entity', 'agent-a', 50, ?1, ?1)", + rusqlite::params![now], + )?; + conn.execute( + "INSERT INTO entity_aspects (id, entity_id, agent_id, name, canonical_name, weight, created_at, updated_at) + VALUES ('aspect-project-roadmap', 'entity-project', 'agent-a', 'roadmap', 'roadmap', 1.0, ?1, ?1)", + rusqlite::params![now], + )?; + conn.execute( + "INSERT INTO entity_attributes + (id, aspect_id, agent_id, memory_id, kind, content, normalized_content, confidence, importance, + status, group_key, claim_key, created_at, updated_at) + VALUES ('attr-project-roadmap', 'aspect-project-roadmap', 'agent-a', NULL, 'attribute', + 'Generic project roadmap context should not inject for plural projects.', + 'generic project roadmap context should not inject for plural projects', + 0.95, 0.9, 'active', 'general', 'roadmap', ?1, ?1)", + rusqlite::params![now], + )?; + Ok(serde_json::Value::Null) + }) + .await + .unwrap(); + + let resp = prompt_submit( + State(state.clone()), + HeaderMap::new(), + Json(PromptSubmitBody { + harness: Some("test".to_string()), + project: Some("platform/daemon-rs".to_string()), + agent_id: Some("agent-a".to_string()), + user_message: Some("projects roadmap".to_string()), + user_prompt: None, + last_assistant_message: None, + session_key: Some("sess-prompt-generic-plural".to_string()), + transcript: None, + transcript_path: None, + runtime_path: None, + }), + ) + .await; + + assert_eq!(resp.status(), StatusCode::OK); + let body = test_json(resp).await; + assert_eq!( + body["engine"], + serde_json::Value::String("no-entity".to_string()) + ); + assert_eq!(body["inject"], serde_json::Value::String(String::new())); + + drop(state); + let _ = writer.await; + } + #[tokio::test] async fn prompt_submit_entity_only_alias_without_aspect_hit_stays_silent() { let (state, writer, _tmp) = test_state("hooks-prompt-submit-entity-only"); diff --git a/platform/daemon/src/hooks.prompt-submit.test.ts b/platform/daemon/src/hooks.prompt-submit.test.ts index b29c808f2..0988b0e17 100644 --- a/platform/daemon/src/hooks.prompt-submit.test.ts +++ b/platform/daemon/src/hooks.prompt-submit.test.ts @@ -304,6 +304,244 @@ describe("handleUserPromptSubmit entity context", () => { expect(result.inject).not.toContain("## Relevant Memory"); }); + it("normalizes possessive entity matches to the dominant canonical entity", async () => { + seedEntityContext(); + getDbAccessor().withWriteTx((db) => { + const now = "2026-05-27T00:00:00.000Z"; + db.prepare( + `INSERT INTO entities + (id, name, canonical_name, entity_type, agent_id, mentions, created_at, updated_at) + VALUES ('entity-signet-possessive', 'Signet''s', 'signet''s', 'tool', 'default', 2, ?, ?)`, + ).run(now, now); + db.prepare( + `INSERT INTO entity_aspects + (id, entity_id, agent_id, name, canonical_name, weight, created_at, updated_at) + VALUES ('aspect-signet-possessive-noise', 'entity-signet-possessive', 'default', + 'noise', 'noise', 1, ?, ?)`, + ).run(now, now); + db.prepare( + `INSERT INTO entity_attributes + (id, aspect_id, agent_id, kind, content, normalized_content, group_key, claim_key, + confidence, importance, status, created_at, updated_at) + VALUES ('attr-signet-possessive-noise', 'aspect-signet-possessive-noise', 'default', + 'attribute', 'Possessive duplicate entity should not win prompt matching.', + 'possessive duplicate entity should not win prompt matching', + 'runtime', 'duplicate_guard', 0.9, 0.9, 'active', ?, ?)`, + ).run(now, now); + }); + fetchEmbeddingMock.mockImplementationOnce(async () => [1, 0]); + + const result = await handleUserPromptSubmit( + { + harness: "codex", + userMessage: "What are Signet's favorite pens?", + sessionKey: "session-possessive-entity", + }, + makeDeps(), + ); + + expect(fetchEmbeddingMock.mock.calls.at(-1)?.[0]).toBe("favorite pens"); + expect(result.engine).toBe("entity-context"); + expect(result.inject).toContain("Signet / preferences / writing / favorite_pen"); + expect(result.inject).not.toContain("Signet's / noise"); + expect(result.inject).not.toContain("Possessive duplicate entity should not win"); + }); + + it("matches missing-apostrophe possessive entity mentions", async () => { + seedEntityContext(); + fetchEmbeddingMock.mockImplementationOnce(async () => [1, 0]); + + const result = await handleUserPromptSubmit( + { + harness: "codex", + userMessage: "What are Signets favorite pens?", + sessionKey: "session-bare-possessive-entity", + }, + makeDeps(), + ); + + expect(fetchEmbeddingMock.mock.calls.at(-1)?.[0]).toBe("favorite pens"); + expect(result.engine).toBe("entity-context"); + expect(result.inject).toContain("Signet / preferences / writing / favorite_pen"); + }); + + it("does not treat generic plural ontology nouns as possessive entity mentions", async () => { + getDbAccessor().withWriteTx((db) => { + const now = "2026-05-27T00:00:00.000Z"; + db.prepare( + `INSERT INTO entities + (id, name, canonical_name, entity_type, agent_id, mentions, created_at, updated_at) + VALUES ('entity-project', 'Project', 'project', 'project', 'default', 50, ?, ?)`, + ).run(now, now); + db.prepare( + `INSERT INTO entities + (id, name, canonical_name, entity_type, agent_id, mentions, created_at, updated_at) + VALUES ('entity-agents', 'Agents', 'agents', 'concept', 'default', 200, ?, ?)`, + ).run(now, now); + db.prepare( + `INSERT INTO entity_aspects + (id, entity_id, agent_id, name, canonical_name, weight, created_at, updated_at) + VALUES ('aspect-project-roadmap', 'entity-project', 'default', 'roadmap', 'roadmap', 1, ?, ?)`, + ).run(now, now); + db.prepare( + `INSERT INTO entity_attributes + (id, aspect_id, agent_id, kind, content, normalized_content, group_key, claim_key, + confidence, importance, status, created_at, updated_at) + VALUES ('attr-project-roadmap', 'aspect-project-roadmap', 'default', 'attribute', + 'Generic project roadmap context should not inject for plural projects.', + 'generic project roadmap context should not inject for plural projects', + 'general', 'roadmap', 0.9, 0.9, 'active', ?, ?)`, + ).run(now, now); + }); + + const result = await handleUserPromptSubmit( + { + harness: "codex", + userMessage: "projects roadmap", + sessionKey: "session-generic-plural-entity", + }, + makeDeps(), + ); + + expect(result.engine).toBe("no-entity"); + expect(result.inject).not.toContain("Project / roadmap"); + + const pluralResult = await handleUserPromptSubmit( + { + harness: "codex", + userMessage: "agents are useful", + sessionKey: "session-generic-plural-entity-exact", + }, + makeDeps(), + ); + + expect(pluralResult.engine).toBe("no-entity"); + }); + + it("ignores disallowed entity types for prompt context", async () => { + getDbAccessor().withWriteTx((db) => { + const now = "2026-05-27T00:00:00.000Z"; + db.prepare( + `INSERT INTO entities + (id, name, canonical_name, entity_type, agent_id, mentions, created_at, updated_at) + VALUES ('entity-claude-code-connector', 'Claude Code connector', 'claude code connector', + 'tool', 'default', 80, ?, ?)`, + ).run(now, now); + db.prepare( + `INSERT INTO entity_aspects + (id, entity_id, agent_id, name, canonical_name, weight, created_at, updated_at) + VALUES ('aspect-claude-code-connector-runtime', 'entity-claude-code-connector', 'default', + 'runtime', 'runtime', 1, ?, ?)`, + ).run(now, now); + db.prepare( + `INSERT INTO entity_attributes + (id, aspect_id, agent_id, kind, content, normalized_content, group_key, claim_key, + confidence, importance, status, created_at, updated_at) + VALUES ('attr-claude-code-connector-runtime', 'aspect-claude-code-connector-runtime', + 'default', 'attribute', 'Claude Code connector setup context should not inject.', + 'claude code connector setup context should not inject', + 'setup', 'routing', 0.9, 0.9, 'active', ?, ?)`, + ).run(now, now); + }); + + const result = await handleUserPromptSubmit( + { + harness: "codex", + userMessage: "Claude Code connector setup", + sessionKey: "session-disallowed-entity-type", + }, + makeDeps(), + ); + + expect(result.engine).toBe("no-entity"); + expect(result.inject).not.toContain("Claude Code connector / runtime"); + }); + + it("ignores low-quality generic entity collisions when a stronger entity is present", async () => { + seedEntityContext(); + getDbAccessor().withWriteTx((db) => { + const now = "2026-05-27T00:00:00.000Z"; + db.prepare( + `INSERT INTO entities + (id, name, canonical_name, entity_type, agent_id, mentions, created_at, updated_at) + VALUES ('entity-favorite', 'Favorite', 'favorite', 'extracted', 'default', 3, ?, ?)`, + ).run(now, now); + db.prepare( + `INSERT INTO entity_aspects + (id, entity_id, agent_id, name, canonical_name, weight, created_at, updated_at) + VALUES ('aspect-favorite-noise', 'entity-favorite', 'default', 'noise', 'noise', 1, ?, ?)`, + ).run(now, now); + db.prepare( + `INSERT INTO entity_attributes + (id, aspect_id, agent_id, kind, content, normalized_content, group_key, claim_key, + confidence, importance, status, created_at, updated_at) + VALUES ('attr-favorite-noise', 'aspect-favorite-noise', 'default', 'attribute', + 'Favorite pens from generic extracted entities should not inject.', + 'favorite pens from generic extracted entities should not inject', + 'runtime', 'generic_collision', 0.9, 0.9, 'active', ?, ?)`, + ).run(now, now); + }); + fetchEmbeddingMock.mockImplementationOnce(async () => [1, 0]); + + const result = await handleUserPromptSubmit( + { + harness: "codex", + userMessage: "What are Signet favorite pens?", + sessionKey: "session-generic-collision", + }, + makeDeps(), + ); + + expect(result.engine).toBe("entity-context"); + expect(result.inject).toContain("Signet / preferences / writing / favorite_pen"); + expect(result.inject).not.toContain("Favorite / noise"); + expect(result.inject).not.toContain("generic extracted entities"); + }); + + it("prefers the longest non-overlapping entity span", async () => { + getDbAccessor().withWriteTx((db) => { + const now = "2026-05-27T00:00:00.000Z"; + for (const [id, name, canonical, mentions] of [ + ["entity-claude-code-connector", "Claude Code connector", "claude code connector", 8], + ["entity-claude-code", "Claude Code", "claude code", 135], + ["entity-claude", "Claude", "claude", 113], + ["entity-code", "code", "code", 15], + ["entity-connector", "connector", "connector", 5], + ] as const) { + db.prepare( + `INSERT INTO entities + (id, name, canonical_name, entity_type, agent_id, mentions, created_at, updated_at) + VALUES (?, ?, ?, 'project', 'default', ?, ?, ?)`, + ).run(id, name, canonical, mentions, now, now); + db.prepare( + `INSERT INTO entity_aspects + (id, entity_id, agent_id, name, canonical_name, weight, created_at, updated_at) + VALUES (?, ?, 'default', 'runtime', 'runtime', 1, ?, ?)`, + ).run(`aspect-${id}`, id, now, now); + db.prepare( + `INSERT INTO entity_attributes + (id, aspect_id, agent_id, kind, content, normalized_content, group_key, claim_key, + confidence, importance, status, created_at, updated_at) + VALUES (?, ?, 'default', 'attribute', ?, ?, 'setup', 'routing', 0.9, 0.9, 'active', ?, ?)`, + ).run(`attr-${id}`, `aspect-${id}`, `${name} setup context.`, `${canonical} setup context`, now, now); + } + }); + + const result = await handleUserPromptSubmit( + { + harness: "codex", + userMessage: "Claude Code connector setup", + sessionKey: "session-longest-span", + }, + makeDeps(), + ); + + expect(result.engine).toBe("entity-context"); + expect(result.inject).toContain("Claude Code connector / runtime / setup / routing"); + expect(result.inject).not.toContain("Claude Code / runtime"); + expect(result.inject).not.toContain("- [attribute] connector / runtime"); + }); + it("keeps semantic attribute scoring scoped to the current agent", async () => { seedEntityContext(); getDbAccessor().withWriteTx((db) => { diff --git a/platform/daemon/src/hooks.ts b/platform/daemon/src/hooks.ts index db90a39ab..716fecfe7 100644 --- a/platform/daemon/src/hooks.ts +++ b/platform/daemon/src/hooks.ts @@ -687,6 +687,13 @@ type PromptEntityMatch = { readonly mentions: number; }; +type PromptEntityCandidate = PromptEntityMatch & { + readonly normalizedPhrase: string; + readonly spanStart: number; + readonly spanEnd: number; + readonly score: number; +}; + type PromptEntityContextLine = { readonly entityName: string; readonly aspectName: string; @@ -706,7 +713,7 @@ type PromptEntityContextLine = { type PromptEntityContextResult = { readonly lines: readonly string[]; readonly memoryCount: number; - readonly engine: "entity-context" | "low-signal" | "no-entity" | "ambiguous-entity" | "no-aspect-hit"; + readonly engine: "entity-context" | "low-signal" | "no-entity" | "no-aspect-hit"; }; const LOW_SIGNAL_PROMPTS = new Set([ @@ -737,15 +744,64 @@ const MIN_PROMPT_ENTITY_MATCH_CHARS = 3; function normalizePromptEntityText(value: string): string { return value .toLowerCase() + .replace(/[’]/g, "'") + .replace(/\b([a-z0-9]+)'s\b/g, "$1") + .replace(/\b([a-z0-9]+)s'\b/g, "$1s") .replace(/[^a-z0-9]+/g, " ") .replace(/\s+/g, " ") .trim(); } -function phraseAppearsInPrompt(prompt: string, phrase: string): boolean { - const normalizedPrompt = ` ${normalizePromptEntityText(prompt)} `; - const normalizedPhrase = normalizePromptEntityText(phrase); - return normalizedPhrase.length >= MIN_PROMPT_ENTITY_MATCH_CHARS && normalizedPrompt.includes(` ${normalizedPhrase} `); +function promptEntityTerms(value: string): string[] { + const normalized = normalizePromptEntityText(value); + return normalized.length > 0 ? normalized.split(" ") : []; +} + +const PROMPT_BARE_POSSESSIVE_DENY_TERMS = new Set([ + "agent", + "artifact", + "concept", + "connector", + "document", + "event", + "memory", + "policy", + "preference", + "product", + "project", + "skill", + "source", + "system", + "task", + "tool", + "workflow", +]); + +function promptEntityTermMatches(promptTerm: string, phraseTerm: string): boolean { + return ( + promptTerm === phraseTerm || + (phraseTerm.length >= 4 && !PROMPT_BARE_POSSESSIVE_DENY_TERMS.has(phraseTerm) && promptTerm === `${phraseTerm}s`) + ); +} + +function promptPhraseSpan(prompt: string, phrase: string): { readonly start: number; readonly end: number } | null { + const promptTerms = promptEntityTerms(prompt); + const phraseTerms = promptEntityTerms(phrase); + if (phraseTerms.join(" ").length < MIN_PROMPT_ENTITY_MATCH_CHARS) return null; + if (phraseTerms.length === 0 || phraseTerms.length > promptTerms.length) return null; + for (let start = 0; start <= promptTerms.length - phraseTerms.length; start += 1) { + if (phraseTerms.every((term, offset) => promptEntityTermMatches(promptTerms[start + offset] ?? "", term))) { + return { start, end: start + phraseTerms.length }; + } + } + return null; +} + +function spansOverlap( + a: { readonly start: number; readonly end: number }, + b: { readonly start: number; readonly end: number }, +): boolean { + return a.start < b.end && b.start < a.end; } function isLowSignalPrompt(userMessage: string): boolean { @@ -773,11 +829,37 @@ function entityContextTablesAvailable(db: ReadDb): boolean { ); } -function resolvePromptEntityMatches( - db: ReadDb, - agentId: string, - userMessage: string, -): PromptEntityMatch[] | "ambiguous" { +const PROMPT_ENTITY_CONTEXT_ALLOWED_TYPES = new Set(["person", "project"]); + +function isPromptEntityContextTypeAllowed(entityType: string): boolean { + return PROMPT_ENTITY_CONTEXT_ALLOWED_TYPES.has(entityType.toLowerCase()); +} + +function isPromptGenericEntityPhrase(phraseTerms: readonly string[]): boolean { + if (phraseTerms.length !== 1) return false; + const term = phraseTerms[0] ?? ""; + if (PROMPT_BARE_POSSESSIVE_DENY_TERMS.has(term)) return true; + return term.endsWith("s") && PROMPT_BARE_POSSESSIVE_DENY_TERMS.has(term.slice(0, -1)); +} + +function scorePromptEntityCandidate(row: { + readonly match_source: "name" | "alias"; + readonly matched_text: string; + readonly mentions: number; + readonly pinned: number; +}): number { + const phrase = normalizePromptEntityText(row.matched_text); + const phraseTerms = promptEntityTerms(row.matched_text); + return ( + phraseTerms.length * 8 + + phrase.length * 0.35 + + Math.log1p(Math.max(0, row.mentions)) + + Math.min(Math.max(0, row.pinned), 1) * 8 + + (row.match_source === "alias" ? -0.25 : 0) + ); +} + +function resolvePromptEntityMatches(db: ReadDb, agentId: string, userMessage: string): PromptEntityMatch[] { if (!entityContextTablesAvailable(db)) return []; const rows = db .prepare( @@ -788,7 +870,8 @@ function resolvePromptEntityMatches( e.description AS description, COALESCE(e.canonical_name, LOWER(e.name)) AS matched_text, 'name' AS match_source, - COALESCE(e.mentions, 0) AS mentions + COALESCE(e.mentions, 0) AS mentions, + COALESCE(e.pinned, 0) AS pinned FROM entities e WHERE e.agent_id = ? AND COALESCE(e.status, 'active') = 'active' @@ -800,7 +883,8 @@ function resolvePromptEntityMatches( e.description AS description, a.alias AS matched_text, 'alias' AS match_source, - COALESCE(e.mentions, 0) AS mentions + COALESCE(e.mentions, 0) AS mentions, + COALESCE(e.pinned, 0) AS pinned FROM entity_aliases a JOIN entities e ON e.id = a.entity_id AND e.agent_id = a.agent_id WHERE a.agent_id = ? @@ -815,30 +899,17 @@ function resolvePromptEntityMatches( matched_text: string; match_source: "name" | "alias"; mentions: number; + pinned: number; }>; - const matched = rows - .filter((row) => phraseAppearsInPrompt(userMessage, row.matched_text)) - .sort((a, b) => { - const lengthDelta = - normalizePromptEntityText(b.matched_text).length - normalizePromptEntityText(a.matched_text).length; - if (lengthDelta !== 0) return lengthDelta; - return b.mentions - a.mentions; - }); - const entityIdsByPhrase = new Map>(); - for (const row of matched) { - const phrase = normalizePromptEntityText(row.matched_text); - if (!entityIdsByPhrase.has(phrase)) entityIdsByPhrase.set(phrase, new Set()); - entityIdsByPhrase.get(phrase)?.add(row.entity_id); - } - if ([...entityIdsByPhrase.values()].some((ids) => ids.size > 1)) return "ambiguous"; - - const seen = new Set(); - const result: PromptEntityMatch[] = []; - for (const row of matched) { - if (seen.has(row.entity_id)) continue; - seen.add(row.entity_id); - result.push({ + const candidatesByPhrase = new Map(); + for (const row of rows) { + if (!isPromptEntityContextTypeAllowed(row.entity_type)) continue; + if (isPromptGenericEntityPhrase(promptEntityTerms(row.matched_text))) continue; + const span = promptPhraseSpan(userMessage, row.matched_text); + if (!span) continue; + const normalizedPhrase = normalizePromptEntityText(row.matched_text); + const candidate: PromptEntityCandidate = { entityId: row.entity_id, entityName: row.entity_name, entityType: row.entity_type, @@ -846,6 +917,53 @@ function resolvePromptEntityMatches( matchedText: row.matched_text, matchSource: row.match_source, mentions: row.mentions, + normalizedPhrase, + spanStart: span.start, + spanEnd: span.end, + score: scorePromptEntityCandidate(row), + }; + candidatesByPhrase.set(normalizedPhrase, [...(candidatesByPhrase.get(normalizedPhrase) ?? []), candidate]); + } + + const phraseWinners = [...candidatesByPhrase.values()] + .map( + (candidates) => + [...candidates].sort( + (a, b) => + b.score - a.score || + b.mentions - a.mentions || + b.normalizedPhrase.length - a.normalizedPhrase.length || + a.entityName.localeCompare(b.entityName), + )[0], + ) + .filter((candidate): candidate is PromptEntityCandidate => !!candidate); + const topScore = phraseWinners.reduce((max, candidate) => Math.max(max, candidate.score), 0); + const minimumScore = Math.max(12, topScore * 0.45); + const ranked = phraseWinners + .filter((candidate) => candidate.score >= minimumScore) + .sort( + (a, b) => + b.score - a.score || + b.spanEnd - b.spanStart - (a.spanEnd - a.spanStart) || + b.mentions - a.mentions || + a.entityName.localeCompare(b.entityName), + ); + const seen = new Set(); + const selectedSpans: Array<{ readonly start: number; readonly end: number }> = []; + const result: PromptEntityMatch[] = []; + for (const row of ranked) { + if (seen.has(row.entityId)) continue; + if (selectedSpans.some((span) => spansOverlap(span, { start: row.spanStart, end: row.spanEnd }))) continue; + seen.add(row.entityId); + selectedSpans.push({ start: row.spanStart, end: row.spanEnd }); + result.push({ + entityId: row.entityId, + entityName: row.entityName, + entityType: row.entityType, + description: row.description, + matchedText: row.matchedText, + matchSource: row.matchSource, + mentions: row.mentions, }); if (result.length >= ENTITY_CONTEXT_MAX_ENTITIES) break; } @@ -864,7 +982,7 @@ function queryWithoutPromptEntities(userMessage: string, entities: ReadonlyArray entities.flatMap((entity) => extractSubstantiveWords(`${entity.entityName} ${entity.matchedText}`)), ); return extractSubstantiveWords(userMessage) - .filter((term) => !entityTerms.has(term)) + .filter((term) => ![...entityTerms].some((entityTerm) => promptEntityTermMatches(term, entityTerm))) .join(" "); } @@ -921,12 +1039,11 @@ function loadEntityContextLines( db: ReadDb, entity: PromptEntityMatch, agentId: string, - userMessage: string, + semanticQuery: string, minScore: number, queryVector: Float32Array | null, ): PromptEntityContextLine[] { - const entityTerms = new Set(extractSubstantiveWords(`${entity.entityName} ${entity.matchedText}`)); - const promptTerms = extractSubstantiveWords(userMessage).filter((term) => !entityTerms.has(term)); + const promptTerms = extractSubstantiveWords(semanticQuery); if (promptTerms.length === 0) return []; const candidateRows = db .prepare( @@ -1115,22 +1232,37 @@ async function buildEntityPromptContext( if (!existsSync(getMemoryDbPath())) return { lines: [], memoryCount: 0, engine: "no-entity" }; if (!hasDbAccessor()) return { lines: [], memoryCount: 0, engine: "no-entity" }; const matches = getDbAccessor().withReadDb((db) => resolvePromptEntityMatches(db, agentId, userMessage)); - if (matches === "ambiguous") return { lines: [], memoryCount: 0, engine: "ambiguous-entity" }; if (matches.length === 0) return { lines: [], memoryCount: 0, engine: "no-entity" }; - const semanticQuery = queryWithoutPromptEntities(userMessage, matches); - if (!semanticQuery) return { lines: [], memoryCount: 0, engine: "no-aspect-hit" }; - let queryVector: Float32Array | null = null; - try { - const vector = await embedFn(semanticQuery, embeddingCfg); - if (vector) queryVector = new Float32Array(vector); - } catch (error) { - logger.warn("hooks", "Entity attribute semantic scoring failed; using lexical attribute scoring", { - error: error instanceof Error ? error.message : String(error), - }); + + const vectorsByEntity = new Map< + string, + { readonly semanticQuery: string; readonly queryVector: Float32Array | null } + >(); + for (const entity of matches) { + const semanticQuery = queryWithoutPromptEntities(userMessage, [entity]); + if (!semanticQuery) continue; + let queryVector: Float32Array | null = null; + try { + const vector = await embedFn(semanticQuery, embeddingCfg); + if (vector) queryVector = new Float32Array(vector); + } catch (error) { + logger.warn("hooks", "Entity attribute semantic scoring failed; using lexical attribute scoring", { + error: error instanceof Error ? error.message : String(error), + }); + } + vectorsByEntity.set(entity.entityId, { semanticQuery, queryVector }); } + if (vectorsByEntity.size === 0) return { lines: [], memoryCount: 0, engine: "no-aspect-hit" }; return getDbAccessor().withReadDb((db) => { const lines = matches.flatMap((entity) => - loadEntityContextLines(db, entity, agentId, userMessage, minScore, queryVector), + loadEntityContextLines( + db, + entity, + agentId, + vectorsByEntity.get(entity.entityId)?.semanticQuery ?? "", + minScore, + vectorsByEntity.get(entity.entityId)?.queryVector ?? null, + ), ); if (lines.length === 0) return { lines: [], memoryCount: 0, engine: "no-aspect-hit" }; const selected = selectWithBudgetSkippingOversized(