Skip to content

Commit 5185a1e

Browse files
committed
feat: 중복 문서 제거
1 parent b20fe22 commit 5185a1e

1 file changed

Lines changed: 13 additions & 7 deletions

File tree

rag_handler_milvus.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,18 @@ async def stream_rag_answer(user_id: str, query: str, is_new_topic: bool):
7575
raw_history = await get_chat_history(user_id)
7676
history = [parse_message(msg) for msg in raw_history]
7777

78-
# 유사 문서 검색
79-
retrieved_docs = vectorstore.similarity_search(query, k=10)
80-
docs_content = "\n---------------------------\n".join([
81-
f"[출처: {doc.metadata.get('urlTitle', '제목 없음')}] {doc.page_content}\n링크: {doc.metadata.get('scrapUrl', '링크 없음')}"
82-
for doc in retrieved_docs
83-
])
78+
# 유사 문서 검색 (중복 제거)
79+
raw_docs = vectorstore.similarity_search(query, k=20)
80+
unique_docs = []
81+
seen_ids = set()
82+
83+
for doc in raw_docs:
84+
doc_id = doc.metadata.get("id")
85+
if doc_id and doc_id not in seen_ids:
86+
seen_ids.add(doc_id)
87+
unique_docs.append(doc)
88+
if len(unique_docs) == 10:
89+
break
8490

8591
# Prompt 구성
8692
prompt = PromptTemplate(
@@ -110,7 +116,7 @@ async def stream_rag_answer(user_id: str, query: str, is_new_topic: bool):
110116
input_variables=["documents", "query"]
111117
)
112118

113-
user_prompt = prompt.format(documents=docs_content, query=query)
119+
user_prompt = prompt.format(documents=unique_docs, query=query)
114120
messages = history[-MAX_MESSAGES:] + [HumanMessage(content=user_prompt)]
115121

116122
async def generator():

0 commit comments

Comments
 (0)