diff --git a/openviking/prompts/templates/retrieval/intent_analysis.yaml b/openviking/prompts/templates/retrieval/intent_analysis.yaml index 08ec6c420..68f7580e5 100644 --- a/openviking/prompts/templates/retrieval/intent_analysis.yaml +++ b/openviking/prompts/templates/retrieval/intent_analysis.yaml @@ -190,7 +190,13 @@ template: | - Queries should be short, specific, and retrievable - Avoid lengthy descriptions - 5. **Priority setting**: + 5. **Do not leak conversation transcript into query text**: + - Query text must stay close to the user's current message and the concrete retrieval target + - Do not include speaker roles, chat history summaries, review workflow narration, or multi-step task decomposition in query text + - Recent conversation is for disambiguation only, not for copying into the final query + - If the current message is already a focused search intent, prefer minimal rewrite or keep it unchanged + + 6. **Priority setting**: - 1 = Highest priority (core requirement) - 3 = Medium priority (helpful) - 5 = Lowest priority (optional) diff --git a/openviking/storage/viking_fs.py b/openviking/storage/viking_fs.py index 3cbdfa439..4c2062d73 100644 --- a/openviking/storage/viking_fs.py +++ b/openviking/storage/viking_fs.py @@ -1047,6 +1047,22 @@ async def search( target_abstract=target_abstract, ) typed_queries = query_plan.queries + + # Guardrail: keep retrieval queries semantically anchored to the current + # user query. Intent analysis may expand into task-planning language + # using recent conversation context, which hurts retrieval precision and + # can pollute rerank inputs with session-level narration. + original_query = (query or "").strip() + if original_query: + for tq in typed_queries: + rewritten = (tq.query or "").strip() + if not rewritten: + tq.query = original_query + continue + token_overlap = set(original_query.lower().split()) & set(rewritten.lower().split()) + if len(token_overlap) < 2 and len(original_query) <= 200: + tq.query = original_query + # Set target_directories if target_uri_list: for tq in typed_queries: