Skip to content

Commit 4f511e8

Browse files
authored
Merge pull request #164 from Open-Finance-Lab/fingpt_backend_prod
Web info fetch rewrote
2 parents f10a7d6 + 5da32a6 commit 4f511e8

File tree

13 files changed

+338
-415
lines changed

13 files changed

+338
-415
lines changed

Main/backend/api/views.py

Lines changed: 16 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -121,9 +121,9 @@ def _build_status_frame(label: str, detail: Optional[str] = None, url: Optional[
121121
@csrf_exempt
122122
def chat_response(request: HttpRequest) -> JsonResponse:
123123
"""
124-
Normal Mode: Help user understand the CURRENT website using Playwright navigation.
125-
Agent stays within the current domain and navigates to find information.
126-
Now uses Unified Context Manager for full conversation history.
124+
Thinking Mode: Process user questions using LLM with available MCP tools.
125+
Note: Browser automation has been removed. For web research, use Research mode.
126+
Uses Unified Context Manager for full conversation history.
127127
"""
128128
try:
129129
question = request.GET.get('question', '')
@@ -134,13 +134,7 @@ def chat_response(request: HttpRequest) -> JsonResponse:
134134
if not question:
135135
return JsonResponse({'error': 'No question provided'}, status=400)
136136

137-
# Extract domain from current URL for restriction
138-
restricted_domain = None
139-
if current_url:
140-
parsed = urlparse(current_url)
141-
restricted_domain = parsed.netloc
142-
143-
logger.info(f"Chat request: question='{question[:50]}...', domain={restricted_domain}")
137+
logger.info(f"Chat request: question='{question[:50]}...'")
144138

145139
# Get session ID
146140
session_id = _get_session_id(request)
@@ -173,34 +167,23 @@ def chat_response(request: HttpRequest) -> JsonResponse:
173167
import time
174168
start_time = time.time()
175169

176-
# Use agent with Playwright for domain navigation
170+
# Use agent with MCP tools (SEC-EDGAR, filesystem)
177171
response = ds.create_agent_response(
178172
user_input=question,
179173
message_list=messages,
180174
model=model,
181-
use_playwright=True,
182-
restricted_domain=restricted_domain,
183-
current_url=current_url,
184-
auto_fetch_page=True
175+
current_url=current_url
185176
)
186177

187178
responses[model] = response
188-
# Persist Playwright scraped context, if any
189-
for entry in ds.get_last_playwright_context() or []:
190-
integration.add_playwright_content(
191-
session_id=session_id,
192-
content=entry.get("content", ""),
193-
url=entry.get("url") or current_url,
194-
action=entry.get("action")
195-
)
196179

197180
# Add response to context
198181
response_time_ms = int((time.time() - start_time) * 1000)
199182
context_mgr.add_assistant_message(
200183
session_id=session_id,
201184
content=response,
202185
model=model,
203-
tools_used=["playwright"] if restricted_domain else [],
186+
tools_used=[],
204187
response_time_ms=response_time_ms
205188
)
206189

@@ -354,14 +337,14 @@ def adv_response(request: HttpRequest) -> JsonResponse:
354337
@csrf_exempt
355338
def agent_chat_response(request: HttpRequest) -> JsonResponse:
356339
"""
357-
Process chat response via Agent with optional tools (Playwright, etc.)
358-
Now uses Unified Context Manager for full conversation history.
340+
Process chat response via Agent with MCP tools (SEC-EDGAR, filesystem).
341+
Note: Browser automation has been removed. For web research, use Research mode.
342+
Uses Unified Context Manager for full conversation history.
359343
"""
360344
try:
361345
question = request.GET.get('question', '')
362346
selected_models = request.GET.get('models', 'gpt-4o-mini')
363347
current_url = request.GET.get('current_url', '')
364-
use_playwright = request.GET.get('use_playwright', 'false').lower() == 'true'
365348

366349
if not question:
367350
return JsonResponse({'error': 'No question provided'}, status=400)
@@ -397,35 +380,23 @@ def agent_chat_response(request: HttpRequest) -> JsonResponse:
397380
import time
398381
start_time = time.time()
399382

400-
# Create agent response
383+
# Create agent response with MCP tools
401384
response = ds.create_agent_response(
402385
user_input=question,
403386
message_list=messages,
404387
model=model,
405-
use_playwright=use_playwright,
406-
restricted_domain=None, # No restriction in agent mode
407-
current_url=current_url,
408-
auto_fetch_page=True
388+
current_url=current_url
409389
)
410390

411391
responses[model] = response
412-
# Persist Playwright scraped context, if any
413-
for entry in ds.get_last_playwright_context() or []:
414-
integration.add_playwright_content(
415-
session_id=session_id,
416-
content=entry.get("content", ""),
417-
url=entry.get("url") or current_url,
418-
action=entry.get("action")
419-
)
420392

421393
# Add response to context
422394
response_time_ms = int((time.time() - start_time) * 1000)
423-
tools_used = ["playwright"] if use_playwright else []
424395
context_mgr.add_assistant_message(
425396
session_id=session_id,
426397
content=response,
427398
model=model,
428-
tools_used=tools_used,
399+
tools_used=[],
429400
response_time_ms=response_time_ms
430401
)
431402

@@ -466,8 +437,8 @@ def agent_chat_response(request: HttpRequest) -> JsonResponse:
466437
@csrf_exempt
467438
def chat_response_stream(request: HttpRequest) -> StreamingHttpResponse:
468439
"""
469-
Normal Mode Streaming: Help user understand the CURRENT website using Playwright navigation.
470-
Agent stays within the current domain and navigates to find information.
440+
Thinking Mode Streaming: Process user questions using LLM with available MCP tools.
441+
Note: Browser automation has been removed. For web research, use Research mode.
471442
"""
472443
try:
473444
question = request.GET.get('question', '')
@@ -477,12 +448,6 @@ def chat_response_stream(request: HttpRequest) -> StreamingHttpResponse:
477448
if not question:
478449
return JsonResponse({'error': 'No question provided'}, status=400)
479450

480-
# Extract domain
481-
restricted_domain = None
482-
if current_url:
483-
parsed = urlparse(current_url)
484-
restricted_domain = parsed.netloc
485-
486451
# Get session ID
487452
session_id = _get_session_id(request)
488453

@@ -515,9 +480,6 @@ def event_stream():
515480
yield b'event: connected\ndata: {"status": "connected"}\n\n'
516481
yield _build_status_frame("Preparing context")
517482

518-
if restricted_domain:
519-
yield _build_status_frame("Navigating site", restricted_domain)
520-
521483
import time
522484
start_time = time.time()
523485
aggregated_chunks: List[str] = []
@@ -526,10 +488,7 @@ def event_stream():
526488
user_input=question,
527489
message_list=messages,
528490
model=model,
529-
use_playwright=True,
530-
restricted_domain=restricted_domain,
531491
current_url=current_url,
532-
auto_fetch_page=True,
533492
user_timezone=user_timezone,
534493
user_time=user_time
535494
)
@@ -572,22 +531,13 @@ def event_stream():
572531
if not final_response and aggregated_chunks:
573532
final_response = "".join(aggregated_chunks)
574533

575-
# Persist Playwright scraped context, if any
576-
for entry in ds.get_last_playwright_context() or []:
577-
integration.add_playwright_content(
578-
session_id=session_id,
579-
content=entry.get("content", ""),
580-
url=entry.get("url") or current_url,
581-
action=entry.get("action")
582-
)
583-
584534
# Add to context
585535
response_time_ms = int((time.time() - start_time) * 1000)
586536
context_mgr.add_assistant_message(
587537
session_id=session_id,
588538
content=final_response,
589539
model=model,
590-
tools_used=["playwright"] if restricted_domain else [],
540+
tools_used=[],
591541
response_time_ms=response_time_ms
592542
)
593543

Main/backend/data/site_map.json

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
{
2+
"version": "1.0",
3+
"description": "Route definitions for financial data sources - used by resolve_url tool",
4+
"routes": {
5+
"yahoo_quote": {
6+
"description": "Yahoo Finance stock quote page - shows current price, daily change, volume, market cap",
7+
"url_pattern": "https://finance.yahoo.com/quote/{ticker}",
8+
"params": ["ticker"],
9+
"example": "yahoo_quote with ticker=AAPL"
10+
},
11+
"yahoo_news": {
12+
"description": "Yahoo Finance news page for a company - shows latest news articles",
13+
"url_pattern": "https://finance.yahoo.com/quote/{ticker}/news",
14+
"params": ["ticker"],
15+
"example": "yahoo_news with ticker=AAPL"
16+
},
17+
"yahoo_financials": {
18+
"description": "Yahoo Finance financials page - shows income statement, balance sheet, cash flow",
19+
"url_pattern": "https://finance.yahoo.com/quote/{ticker}/financials",
20+
"params": ["ticker"],
21+
"example": "yahoo_financials with ticker=AAPL"
22+
},
23+
"yahoo_holders": {
24+
"description": "Yahoo Finance holders page - shows major institutional and mutual fund holders",
25+
"url_pattern": "https://finance.yahoo.com/quote/{ticker}/holders",
26+
"params": ["ticker"],
27+
"example": "yahoo_holders with ticker=AAPL"
28+
},
29+
"yahoo_analysis": {
30+
"description": "Yahoo Finance analysis page - shows analyst recommendations, price targets, EPS estimates",
31+
"url_pattern": "https://finance.yahoo.com/quote/{ticker}/analysis",
32+
"params": ["ticker"],
33+
"example": "yahoo_analysis with ticker=AAPL"
34+
},
35+
"yahoo_history": {
36+
"description": "Yahoo Finance historical data page - shows historical prices and dividends",
37+
"url_pattern": "https://finance.yahoo.com/quote/{ticker}/history",
38+
"params": ["ticker"],
39+
"example": "yahoo_history with ticker=AAPL"
40+
},
41+
"yahoo_options": {
42+
"description": "Yahoo Finance options page - shows options chain with calls and puts",
43+
"url_pattern": "https://finance.yahoo.com/quote/{ticker}/options",
44+
"params": ["ticker"],
45+
"example": "yahoo_options with ticker=AAPL"
46+
},
47+
"yahoo_profile": {
48+
"description": "Yahoo Finance company profile - shows company description, sector, industry, executives",
49+
"url_pattern": "https://finance.yahoo.com/quote/{ticker}/profile",
50+
"params": ["ticker"],
51+
"example": "yahoo_profile with ticker=AAPL"
52+
},
53+
"yahoo_statistics": {
54+
"description": "Yahoo Finance statistics page - shows valuation metrics, financial highlights, trading info",
55+
"url_pattern": "https://finance.yahoo.com/quote/{ticker}/key-statistics",
56+
"params": ["ticker"],
57+
"example": "yahoo_statistics with ticker=AAPL"
58+
},
59+
"sec_filings_search": {
60+
"description": "SEC EDGAR filing search - search for company filings by CIK or ticker",
61+
"url_pattern": "https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={ticker}&type={filing_type}&dateb=&owner=include&count=40",
62+
"params": ["ticker", "filing_type"],
63+
"param_defaults": {
64+
"filing_type": ""
65+
},
66+
"example": "sec_filings_search with ticker=AAPL and filing_type=10-K"
67+
},
68+
"sec_full_text_search": {
69+
"description": "SEC EDGAR full-text search - search filing content",
70+
"url_pattern": "https://efts.sec.gov/LATEST/search-index?q={query}&dateRange=custom&startdt=2020-01-01&enddt=2025-12-31",
71+
"params": ["query"],
72+
"example": "sec_full_text_search with query=revenue guidance"
73+
},
74+
"bloomberg_quote": {
75+
"description": "Bloomberg quote page - shows stock price and basic info",
76+
"url_pattern": "https://www.bloomberg.com/quote/{ticker}:US",
77+
"params": ["ticker"],
78+
"example": "bloomberg_quote with ticker=AAPL"
79+
},
80+
"marketwatch_quote": {
81+
"description": "MarketWatch stock quote page - shows price, news, analysis",
82+
"url_pattern": "https://www.marketwatch.com/investing/stock/{ticker}",
83+
"params": ["ticker"],
84+
"example": "marketwatch_quote with ticker=AAPL"
85+
},
86+
"marketwatch_financials": {
87+
"description": "MarketWatch financials page - shows income statement, balance sheet",
88+
"url_pattern": "https://www.marketwatch.com/investing/stock/{ticker}/financials",
89+
"params": ["ticker"],
90+
"example": "marketwatch_financials with ticker=AAPL"
91+
},
92+
"finviz_quote": {
93+
"description": "Finviz stock quote page - shows technical chart, fundamentals, news",
94+
"url_pattern": "https://finviz.com/quote.ashx?t={ticker}",
95+
"params": ["ticker"],
96+
"example": "finviz_quote with ticker=AAPL"
97+
},
98+
"cnbc_quote": {
99+
"description": "CNBC stock quote page - shows price, news, analysis",
100+
"url_pattern": "https://www.cnbc.com/quotes/{ticker}",
101+
"params": ["ticker"],
102+
"example": "cnbc_quote with ticker=AAPL"
103+
},
104+
"seeking_alpha": {
105+
"description": "Seeking Alpha stock page - shows analysis, news, earnings",
106+
"url_pattern": "https://seekingalpha.com/symbol/{ticker}",
107+
"params": ["ticker"],
108+
"example": "seeking_alpha with ticker=AAPL"
109+
},
110+
"tipranks_stock": {
111+
"description": "TipRanks stock page - shows analyst ratings, price targets, insider trades",
112+
"url_pattern": "https://www.tipranks.com/stocks/{ticker}",
113+
"params": ["ticker"],
114+
"example": "tipranks_stock with ticker=aapl (lowercase)"
115+
},
116+
"generic_url": {
117+
"description": "Fallback for any custom URL - use when no predefined route matches",
118+
"url_pattern": "{url}",
119+
"params": ["url"],
120+
"example": "generic_url with url=https://example.com/page"
121+
}
122+
},
123+
"route_categories": {
124+
"stock_price": ["yahoo_quote", "bloomberg_quote", "marketwatch_quote", "finviz_quote", "cnbc_quote"],
125+
"company_news": ["yahoo_news", "seeking_alpha"],
126+
"financials": ["yahoo_financials", "marketwatch_financials"],
127+
"analysis": ["yahoo_analysis", "tipranks_stock", "seeking_alpha"],
128+
"sec_filings": ["sec_filings_search", "sec_full_text_search"],
129+
"company_info": ["yahoo_profile", "yahoo_holders", "yahoo_statistics"]
130+
}
131+
}

Main/backend/datascraper/context_integration.py

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def _determine_mode(self, request: HttpRequest, endpoint: str) -> ContextMode:
5858
# Determine based on endpoint
5959
if 'adv' in endpoint or 'advanced' in endpoint:
6060
return ContextMode.RESEARCH
61-
elif 'agent' in endpoint or request.GET.get('use_playwright') == 'true':
61+
elif 'agent' in endpoint:
6262
return ContextMode.THINKING
6363
else:
6464
return ContextMode.NORMAL
@@ -165,26 +165,6 @@ def add_search_results(
165165
}
166166
)
167167

168-
def add_playwright_content(
169-
self,
170-
session_id: str,
171-
content: str,
172-
url: str,
173-
action: Optional[str] = None
174-
) -> None:
175-
"""Add content scraped by Playwright"""
176-
extracted_data = {}
177-
if action:
178-
extracted_data['action'] = action
179-
180-
self.context_manager.add_fetched_context(
181-
session_id=session_id,
182-
source_type="playwright",
183-
content=content,
184-
url=url,
185-
extracted_data=extracted_data
186-
)
187-
188168
def clear_messages(
189169
self,
190170
request: HttpRequest,

0 commit comments

Comments
 (0)