Open-Finance-Lab
diff --git a/‎Main/backend/api/views.py‎
Lines changed: 16 additions & 66 deletions b/‎Main/backend/api/views.py‎
Lines changed: 16 additions & 66 deletions
diff --git a/‎Main/backend/data/site_map.json‎
Lines changed: 131 additions & 0 deletions b/‎Main/backend/data/site_map.json‎
Lines changed: 131 additions & 0 deletions
diff --git a/‎Main/backend/datascraper/context_integration.py‎
Lines changed: 1 addition & 21 deletions b/‎Main/backend/datascraper/context_integration.py‎
Lines changed: 1 addition & 21 deletions
@@ -121,9 +121,9 @@ def _build_status_frame(label: str, detail: Optional[str] = None, url: Optional[
 @csrf_exempt
 def chat_response(request: HttpRequest) -> JsonResponse:
     """
-    Normal Mode: Help user understand the CURRENT website using Playwright navigation.
-    Agent stays within the current domain and navigates to find information.
-    Now uses Unified Context Manager for full conversation history.
+    Thinking Mode: Process user questions using LLM with available MCP tools.
+    Note: Browser automation has been removed. For web research, use Research mode.
+    Uses Unified Context Manager for full conversation history.
     """
     try:
         question = request.GET.get('question', '')
@@ -134,13 +134,7 @@ def chat_response(request: HttpRequest) -> JsonResponse:
         if not question:
             return JsonResponse({'error': 'No question provided'}, status=400)
 
-        # Extract domain from current URL for restriction
-        restricted_domain = None
-        if current_url:
-            parsed = urlparse(current_url)
-            restricted_domain = parsed.netloc
-
-        logger.info(f"Chat request: question='{question[:50]}...', domain={restricted_domain}")
+        logger.info(f"Chat request: question='{question[:50]}...'")
 
         # Get session ID
         session_id = _get_session_id(request)
@@ -173,34 +167,23 @@ def chat_response(request: HttpRequest) -> JsonResponse:
                 import time
                 start_time = time.time()
 
-                # Use agent with Playwright for domain navigation
+                # Use agent with MCP tools (SEC-EDGAR, filesystem)
                 response = ds.create_agent_response(
                     user_input=question,
                     message_list=messages,
                     model=model,
-                    use_playwright=True,
-                    restricted_domain=restricted_domain,
-                    current_url=current_url,
-            auto_fetch_page=True
+                    current_url=current_url
                 )
 
                 responses[model] = response
-                # Persist Playwright scraped context, if any
-                for entry in ds.get_last_playwright_context() or []:
-                    integration.add_playwright_content(
-                        session_id=session_id,
-                        content=entry.get("content", ""),
-                        url=entry.get("url") or current_url,
-                        action=entry.get("action")
-                    )
 
                 # Add response to context
                 response_time_ms = int((time.time() - start_time) * 1000)
                 context_mgr.add_assistant_message(
                     session_id=session_id,
                     content=response,
                     model=model,
-                    tools_used=["playwright"] if restricted_domain else [],
+                    tools_used=[],
                     response_time_ms=response_time_ms
                 )
 
@@ -354,14 +337,14 @@ def adv_response(request: HttpRequest) -> JsonResponse:
 @csrf_exempt
 def agent_chat_response(request: HttpRequest) -> JsonResponse:
     """
-    Process chat response via Agent with optional tools (Playwright, etc.)
-    Now uses Unified Context Manager for full conversation history.
+    Process chat response via Agent with MCP tools (SEC-EDGAR, filesystem).
+    Note: Browser automation has been removed. For web research, use Research mode.
+    Uses Unified Context Manager for full conversation history.
     """
     try:
         question = request.GET.get('question', '')
         selected_models = request.GET.get('models', 'gpt-4o-mini')
         current_url = request.GET.get('current_url', '')
-        use_playwright = request.GET.get('use_playwright', 'false').lower() == 'true'
 
         if not question:
             return JsonResponse({'error': 'No question provided'}, status=400)
@@ -397,35 +380,23 @@ def agent_chat_response(request: HttpRequest) -> JsonResponse:
                 import time
                 start_time = time.time()
 
-                # Create agent response
+                # Create agent response with MCP tools
                 response = ds.create_agent_response(
                     user_input=question,
                     message_list=messages,
                     model=model,
-                    use_playwright=use_playwright,
-                    restricted_domain=None,  # No restriction in agent mode
-                    current_url=current_url,
-                    auto_fetch_page=True
+                    current_url=current_url
                 )
 
                 responses[model] = response
-                # Persist Playwright scraped context, if any
-                for entry in ds.get_last_playwright_context() or []:
-                    integration.add_playwright_content(
-                        session_id=session_id,
-                        content=entry.get("content", ""),
-                        url=entry.get("url") or current_url,
-                        action=entry.get("action")
-                    )
 
                 # Add response to context
                 response_time_ms = int((time.time() - start_time) * 1000)
-                tools_used = ["playwright"] if use_playwright else []
                 context_mgr.add_assistant_message(
                     session_id=session_id,
                     content=response,
                     model=model,
-                    tools_used=tools_used,
+                    tools_used=[],
                     response_time_ms=response_time_ms
                 )
 
@@ -466,8 +437,8 @@ def agent_chat_response(request: HttpRequest) -> JsonResponse:
 @csrf_exempt
 def chat_response_stream(request: HttpRequest) -> StreamingHttpResponse:
     """
-    Normal Mode Streaming: Help user understand the CURRENT website using Playwright navigation.
-    Agent stays within the current domain and navigates to find information.
+    Thinking Mode Streaming: Process user questions using LLM with available MCP tools.
+    Note: Browser automation has been removed. For web research, use Research mode.
     """
     try:
         question = request.GET.get('question', '')
@@ -477,12 +448,6 @@ def chat_response_stream(request: HttpRequest) -> StreamingHttpResponse:
         if not question:
             return JsonResponse({'error': 'No question provided'}, status=400)
 
-        # Extract domain
-        restricted_domain = None
-        if current_url:
-            parsed = urlparse(current_url)
-            restricted_domain = parsed.netloc
-
         # Get session ID
         session_id = _get_session_id(request)
 
@@ -515,9 +480,6 @@ def event_stream():
                 yield b'event: connected\ndata: {"status": "connected"}\n\n'
                 yield _build_status_frame("Preparing context")
 
-                if restricted_domain:
-                    yield _build_status_frame("Navigating site", restricted_domain)
-
                 import time
                 start_time = time.time()
                 aggregated_chunks: List[str] = []
@@ -526,10 +488,7 @@ def event_stream():
                     user_input=question,
                     message_list=messages,
                     model=model,
-                    use_playwright=True,
-                    restricted_domain=restricted_domain,
                     current_url=current_url,
-                    auto_fetch_page=True,
                     user_timezone=user_timezone,
                     user_time=user_time
                 )
@@ -572,22 +531,13 @@ def event_stream():
                 if not final_response and aggregated_chunks:
                     final_response = "".join(aggregated_chunks)
 
-                # Persist Playwright scraped context, if any
-                for entry in ds.get_last_playwright_context() or []:
-                    integration.add_playwright_content(
-                        session_id=session_id,
-                        content=entry.get("content", ""),
-                        url=entry.get("url") or current_url,
-                        action=entry.get("action")
-                    )
-
                 # Add to context
                 response_time_ms = int((time.time() - start_time) * 1000)
                 context_mgr.add_assistant_message(
                     session_id=session_id,
                     content=final_response,
                     model=model,
-                    tools_used=["playwright"] if restricted_domain else [],
+                    tools_used=[],
                     response_time_ms=response_time_ms
                 )
 
 
@@ -0,0 +1,131 @@
+{
+  "version": "1.0",
+  "description": "Route definitions for financial data sources - used by resolve_url tool",
+  "routes": {
+    "yahoo_quote": {
+      "description": "Yahoo Finance stock quote page - shows current price, daily change, volume, market cap",
+      "url_pattern": "https://finance.yahoo.com/quote/{ticker}",
+      "params": ["ticker"],
+      "example": "yahoo_quote with ticker=AAPL"
+    },
+    "yahoo_news": {
+      "description": "Yahoo Finance news page for a company - shows latest news articles",
+      "url_pattern": "https://finance.yahoo.com/quote/{ticker}/news",
+      "params": ["ticker"],
+      "example": "yahoo_news with ticker=AAPL"
+    },
+    "yahoo_financials": {
+      "description": "Yahoo Finance financials page - shows income statement, balance sheet, cash flow",
+      "url_pattern": "https://finance.yahoo.com/quote/{ticker}/financials",
+      "params": ["ticker"],
+      "example": "yahoo_financials with ticker=AAPL"
+    },
+    "yahoo_holders": {
+      "description": "Yahoo Finance holders page - shows major institutional and mutual fund holders",
+      "url_pattern": "https://finance.yahoo.com/quote/{ticker}/holders",
+      "params": ["ticker"],
+      "example": "yahoo_holders with ticker=AAPL"
+    },
+    "yahoo_analysis": {
+      "description": "Yahoo Finance analysis page - shows analyst recommendations, price targets, EPS estimates",
+      "url_pattern": "https://finance.yahoo.com/quote/{ticker}/analysis",
+      "params": ["ticker"],
+      "example": "yahoo_analysis with ticker=AAPL"
+    },
+    "yahoo_history": {
+      "description": "Yahoo Finance historical data page - shows historical prices and dividends",
+      "url_pattern": "https://finance.yahoo.com/quote/{ticker}/history",
+      "params": ["ticker"],
+      "example": "yahoo_history with ticker=AAPL"
+    },
+    "yahoo_options": {
+      "description": "Yahoo Finance options page - shows options chain with calls and puts",
+      "url_pattern": "https://finance.yahoo.com/quote/{ticker}/options",
+      "params": ["ticker"],
+      "example": "yahoo_options with ticker=AAPL"
+    },
+    "yahoo_profile": {
+      "description": "Yahoo Finance company profile - shows company description, sector, industry, executives",
+      "url_pattern": "https://finance.yahoo.com/quote/{ticker}/profile",
+      "params": ["ticker"],
+      "example": "yahoo_profile with ticker=AAPL"
+    },
+    "yahoo_statistics": {
+      "description": "Yahoo Finance statistics page - shows valuation metrics, financial highlights, trading info",
+      "url_pattern": "https://finance.yahoo.com/quote/{ticker}/key-statistics",
+      "params": ["ticker"],
+      "example": "yahoo_statistics with ticker=AAPL"
+    },
+    "sec_filings_search": {
+      "description": "SEC EDGAR filing search - search for company filings by CIK or ticker",
+      "url_pattern": "https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={ticker}&type={filing_type}&dateb=&owner=include&count=40",
+      "params": ["ticker", "filing_type"],
+      "param_defaults": {
+        "filing_type": ""
+      },
+      "example": "sec_filings_search with ticker=AAPL and filing_type=10-K"
+    },
+    "sec_full_text_search": {
+      "description": "SEC EDGAR full-text search - search filing content",
+      "url_pattern": "https://efts.sec.gov/LATEST/search-index?q={query}&dateRange=custom&startdt=2020-01-01&enddt=2025-12-31",
+      "params": ["query"],
+      "example": "sec_full_text_search with query=revenue guidance"
+    },
+    "bloomberg_quote": {
+      "description": "Bloomberg quote page - shows stock price and basic info",
+      "url_pattern": "https://www.bloomberg.com/quote/{ticker}:US",
+      "params": ["ticker"],
+      "example": "bloomberg_quote with ticker=AAPL"
+    },
+    "marketwatch_quote": {
+      "description": "MarketWatch stock quote page - shows price, news, analysis",
+      "url_pattern": "https://www.marketwatch.com/investing/stock/{ticker}",
+      "params": ["ticker"],
+      "example": "marketwatch_quote with ticker=AAPL"
+    },
+    "marketwatch_financials": {
+      "description": "MarketWatch financials page - shows income statement, balance sheet",
+      "url_pattern": "https://www.marketwatch.com/investing/stock/{ticker}/financials",
+      "params": ["ticker"],
+      "example": "marketwatch_financials with ticker=AAPL"
+    },
+    "finviz_quote": {
+      "description": "Finviz stock quote page - shows technical chart, fundamentals, news",
+      "url_pattern": "https://finviz.com/quote.ashx?t={ticker}",
+      "params": ["ticker"],
+      "example": "finviz_quote with ticker=AAPL"
+    },
+    "cnbc_quote": {
+      "description": "CNBC stock quote page - shows price, news, analysis",
+      "url_pattern": "https://www.cnbc.com/quotes/{ticker}",
+      "params": ["ticker"],
+      "example": "cnbc_quote with ticker=AAPL"
+    },
+    "seeking_alpha": {
+      "description": "Seeking Alpha stock page - shows analysis, news, earnings",
+      "url_pattern": "https://seekingalpha.com/symbol/{ticker}",
+      "params": ["ticker"],
+      "example": "seeking_alpha with ticker=AAPL"
+    },
+    "tipranks_stock": {
+      "description": "TipRanks stock page - shows analyst ratings, price targets, insider trades",
+      "url_pattern": "https://www.tipranks.com/stocks/{ticker}",
+      "params": ["ticker"],
+      "example": "tipranks_stock with ticker=aapl (lowercase)"
+    },
+    "generic_url": {
+      "description": "Fallback for any custom URL - use when no predefined route matches",
+      "url_pattern": "{url}",
+      "params": ["url"],
+      "example": "generic_url with url=https://example.com/page"
+    }
+  },
+  "route_categories": {
+    "stock_price": ["yahoo_quote", "bloomberg_quote", "marketwatch_quote", "finviz_quote", "cnbc_quote"],
+    "company_news": ["yahoo_news", "seeking_alpha"],
+    "financials": ["yahoo_financials", "marketwatch_financials"],
+    "analysis": ["yahoo_analysis", "tipranks_stock", "seeking_alpha"],
+    "sec_filings": ["sec_filings_search", "sec_full_text_search"],
+    "company_info": ["yahoo_profile", "yahoo_holders", "yahoo_statistics"]
+  }
+}
@@ -58,7 +58,7 @@ def _determine_mode(self, request: HttpRequest, endpoint: str) -> ContextMode:
         # Determine based on endpoint
         if 'adv' in endpoint or 'advanced' in endpoint:
             return ContextMode.RESEARCH
-        elif 'agent' in endpoint or request.GET.get('use_playwright') == 'true':
+        elif 'agent' in endpoint:
             return ContextMode.THINKING
         else:
             return ContextMode.NORMAL
@@ -165,26 +165,6 @@ def add_search_results(
                 }
             )
 
-    def add_playwright_content(
-        self,
-        session_id: str,
-        content: str,
-        url: str,
-        action: Optional[str] = None
-    ) -> None:
-        """Add content scraped by Playwright"""
-        extracted_data = {}
-        if action:
-            extracted_data['action'] = action
-
-        self.context_manager.add_fetched_context(
-            session_id=session_id,
-            source_type="playwright",
-            content=content,
-            url=url,
-            extracted_data=extracted_data
-        )
-
     def clear_messages(
         self,
         request: HttpRequest,