Skip to content

Commit 7598009

Browse files
committed
add LLM StremingMode, change Editor, add ReloadButton
2 parents 4a6615a + d4c6ba5 commit 7598009

File tree

5 files changed

+88
-123
lines changed

5 files changed

+88
-123
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ nohup.out
44
__pycache__/
55
venv
66
./venv
7-
uploads/*
7+
uploads/*

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
# Makefile
22
run:
3-
uvicorn main:app --host 0.0.0.0 --port 8080 --reload
3+
uvicorn main:app --host 0.0.0.0 --port 8080 --reload --http h11

routers/file.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -177,14 +177,18 @@ def download_file(
177177
if not os.path.exists(file_path):
178178
raise HTTPException(status_code=404, detail="서버에 파일이 존재하지 않습니다.")
179179

180-
# original_name 을 percent-encoding 해서 ASCII 만으로 헤더 구성
181-
filename_quoted = quote(file_obj.original_name)
182-
content_disposition = f"inline; filename*=UTF-8''{filename_quoted}"
183-
180+
# filename_star = file_obj.original_name
181+
# return FileResponse(
182+
# path=file_path,
183+
# media_type=file_obj.content_type,
184+
# headers={"Content-Disposition": f"inline; filename*=UTF-8''{filename_star}"}
185+
# )
186+
# FastAPI가 내부에서 UTF-8로 인코딩된 Content-Disposition 헤더를 생성해 줌
184187
return FileResponse(
185188
path=file_path,
186189
media_type=file_obj.content_type,
187-
headers={"Content-Disposition": content_disposition}
190+
filename=file_obj.original_name,
191+
background=None
188192
)
189193

190194

routers/note.py

Lines changed: 38 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
11
import os
22
from dotenv import load_dotenv
3-
from fastapi import APIRouter, Depends, HTTPException
3+
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
4+
from fastapi.responses import StreamingResponse
45
from sqlalchemy.orm import Session
56
from typing import List
67
from datetime import datetime
8+
import traceback
79

8-
from db import get_db
10+
from db import get_db, SessionLocal
911
from models.note import Note
1012
from schemas.note import NoteCreate, NoteUpdate, NoteResponse, FavoriteUpdate
1113
from utils.jwt_utils import get_current_user
14+
from fastapi.responses import StreamingResponse
15+
from utils.llm import stream_summary_with_langchain
1216

1317
load_dotenv()
1418
HF_TOKEN = os.getenv("HF_API_TOKEN")
@@ -140,35 +144,39 @@ def toggle_favorite(
140144
db.refresh(note)
141145
return note
142146

143-
144-
# 8) 노트 요약 (LLM 호출)
145-
@router.post("/notes/{note_id}/summarize", response_model=NoteResponse)
146-
def summarize_note(
147+
def save_summary(note_id: int, text: str):
148+
db2 = SessionLocal()
149+
try:
150+
tgt = db2.query(Note).filter(Note.id == note_id).first()
151+
if tgt:
152+
tgt.content = text
153+
tgt.updated_at = datetime.utcnow()
154+
db2.commit()
155+
finally:
156+
db2.close()
157+
158+
@router.post("/notes/{note_id}/summarize")
159+
async def summarize_stream_langchain(
147160
note_id: int,
161+
background_tasks: BackgroundTasks,
148162
db: Session = Depends(get_db),
149163
user = Depends(get_current_user)
150164
):
151-
note = db.query(Note).filter(
152-
Note.id == note_id, Note.user_id == user.u_id
153-
).first()
154-
if not note:
155-
raise HTTPException(status_code=404, detail="Note not found")
156-
157-
original = note.content or ""
158-
if not original.strip():
159-
raise HTTPException(status_code=400, detail="내용이 비어 있어 요약할 수 없습니다.")
160-
161-
# ────────────────────────────────────────────────────────────────────
162-
# 실제 요약 함수 호출 (지연 임포트)
163-
try:
164-
from utils.llm import summarize_with_qwen3
165-
summary_text = summarize_with_qwen3(original)
166-
except Exception as e:
167-
raise HTTPException(status_code=500, detail=f"요약 중 오류 발생: {e}")
168-
# ────────────────────────────────────────────────────────────────────
169-
170-
note.content = summary_text
171-
note.updated_at = datetime.utcnow()
172-
db.commit()
173-
db.refresh(note)
174-
return note
165+
note = db.query(Note).filter(Note.id == note_id, Note.user_id == user.u_id).first()
166+
if not note or not (note.content or "").strip():
167+
raise HTTPException(status_code=404, detail="요약 대상 없음")
168+
169+
async def event_gen():
170+
parts = []
171+
async for sse in stream_summary_with_langchain(note.content):
172+
parts.append(sse.removeprefix("data: ").strip())
173+
yield sse.encode()
174+
full = "".join(parts).strip()
175+
if full:
176+
background_tasks.add_task(save_summary, note.id, full)
177+
178+
return StreamingResponse(
179+
event_gen(),
180+
media_type="text/event-stream",
181+
headers={"Cache-Control": "no-cache"}
182+
)

utils/llm.py

Lines changed: 39 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,97 +1,50 @@
1-
# ~/noteflow/Backend/utils/llm.py
1+
from langchain.callbacks import AsyncIteratorCallbackHandler
2+
from langchain_ollama import ChatOllama
3+
from langchain.schema import HumanMessage, SystemMessage
4+
import re, asyncio
25

3-
import torch
4-
from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM
6+
_THOUGHT_PAT = re.compile(
7+
r"^\s*(okay|let\s*me|i\s*need\s*to|first[, ]|then[, ]|next[, ]|in summary|먼저|그\s*다음|요약하면)",
8+
re.I,
9+
)
510

6-
_MODEL_NAME = "Qwen/Qwen3-8B"
7-
8-
# 전역 변수: 최초에는 토크나이저/모델이 None
9-
_tokenizer = None
10-
_model = None
11-
12-
def _load_model():
13-
"""
14-
summarize_with_qwen3()가 최초 호출될 때만 Qwen3-8B 모델과 토크나이저를 메모리에 로드합니다.
15-
"""
16-
global _tokenizer, _model
17-
if _model is None or _tokenizer is None:
18-
# 1) Config 불러와서 parallel_style 지정
19-
config = AutoConfig.from_pretrained(
20-
_MODEL_NAME,
21-
trust_remote_code=True
22-
)
23-
# 반드시 "auto"로 지정 (NoneType 오류 방지)
24-
config.parallel_style = "auto"
25-
26-
# 2) 토크나이저 로드
27-
_tokenizer = AutoTokenizer.from_pretrained(
28-
_MODEL_NAME,
29-
trust_remote_code=True
30-
)
31-
32-
# 3) 모델 로드 시 config 인자 추가
33-
_model = AutoModelForCausalLM.from_pretrained(
34-
_MODEL_NAME,
35-
config=config, # custom config 전달
36-
torch_dtype="auto",
37-
device_map="auto",
38-
trust_remote_code=True
39-
)
40-
_model.eval()
41-
42-
43-
def summarize_with_qwen3(
44-
text: str,
45-
max_new_tokens: int = 256,
46-
temperature: float = 0.6
47-
) -> str:
11+
async def stream_summary_with_langchain(text: str):
4812
"""
49-
- 한국어 문서를 간결하고 핵심적으로 요약
50-
- 반환값: 요약된 한국어 문자열
13+
LangChain + Ollama에서 토큰을 비동기로 받아
14+
SSE("data: ...\\n\\n") 형식으로 yield 하는 async generator
5115
"""
52-
# 모델/토크나이저가 아직 로드되지 않았다면, 이 시점에만 로드
53-
if _model is None or _tokenizer is None:
54-
_load_model()
16+
# 1) LangChain용 콜백 핸들러
17+
cb = AsyncIteratorCallbackHandler()
18+
19+
# 2) Ollama Chat 모델 (streaming=True)
20+
llm = ChatOllama(
21+
base_url="http://localhost:11434",
22+
model="qwen3:8b",
23+
streaming=True,
24+
callbacks=[cb],
25+
temperature=0.6,
26+
)
5527

56-
# Chat-format prompt 생성
28+
# 3) 프롬프트
5729
messages = [
58-
{
59-
"role": "system",
60-
"content": (
61-
"당신은 한국어 문서를 간결하고 핵심적으로 요약하는 전문가입니다. "
62-
"요약 외에는 절대 다른 말을 하지 마세요."
63-
)
64-
},
65-
{
66-
"role": "user",
67-
"content": text
68-
}
30+
SystemMessage(
31+
content="다음 텍스트를 한국어로 간결하게 요약하세요. "
32+
"사고 과정(Chain‑of‑Thought)은 절대 출력하지 마세요./no_think"
33+
),
34+
HumanMessage(content=text),
6935
]
7036

71-
# tokenizer.apply_chat_template()를 통해 모델 친화적인 프롬프트 생성
72-
prompt = _tokenizer.apply_chat_template(
73-
messages,
74-
tokenize=False,
75-
add_generation_prompt=True,
76-
enable_thinking=False
77-
)
78-
79-
# 입력 토크나이즈 후 모델 디바이스로 이동
80-
inputs = _tokenizer(prompt, return_tensors="pt").to(_model.device)
37+
# 4) LLM 호출 비동기 실행
38+
task = asyncio.create_task(llm.agenerate([messages]))
8139

82-
# 모델 generate 호출
83-
outputs = _model.generate(
84-
**inputs,
85-
max_new_tokens=max_new_tokens,
86-
temperature=temperature,
87-
top_p=0.95,
88-
top_k=20,
89-
do_sample=False, # 안정적인 요약을 위해 샘플링 끄기
90-
eos_token_id=_tokenizer.eos_token_id
91-
)
40+
buffer = ""
41+
async for token in cb.aiter():
42+
buffer += token
43+
if buffer.endswith(("\n", "。", ".", "…")):
44+
line = buffer.strip()
45+
buffer = ""
9246

93-
# 입력 프롬프트 뒤에 생성된 토큰만 디코딩
94-
gen_tokens = outputs[0].tolist()[len(inputs.input_ids[0]):]
95-
decoded = _tokenizer.decode(gen_tokens, skip_special_tokens=True)
47+
if not _THOUGHT_PAT.match(line):
48+
yield f"data: {line}\n\n" # SSE 청크 전송
9649

97-
return decoded.strip()
50+
await task # 예외 전파

0 commit comments

Comments
 (0)