diff --git a/README.md b/README.md index 23599b3cf8..6556e72afb 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ In a bit more detail, here is what happens when you submit a query: This project was 99% vibe coded as a fun Saturday hack because I wanted to explore and evaluate a number of LLMs side by side in the process of [reading books together with LLMs](https://x.com/karpathy/status/1990577951671509438). It's nice and useful to see multiple responses side by side, and also the cross-opinions of all LLMs on each other's outputs. I'm not going to support it in any way, it's provided here as is for other people's inspiration and I don't intend to improve it. Code is ephemeral now and libraries are over, ask your LLM to change it in whatever way you like. -## Setup +## Setup d ### 1. Install Dependencies diff --git a/backend/config.py b/backend/config.py index a9cf7c473e..4b03a84fa7 100644 --- a/backend/config.py +++ b/backend/config.py @@ -1,6 +1,7 @@ """Configuration for the LLM Council.""" import os +import json from dotenv import load_dotenv load_dotenv() @@ -8,19 +9,46 @@ # OpenRouter API key OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") -# Council members - list of OpenRouter model identifiers -COUNCIL_MODELS = [ - "openai/gpt-5.1", - "google/gemini-3-pro-preview", - "anthropic/claude-sonnet-4.5", - "x-ai/grok-4", -] - -# Chairman model - synthesizes final response -CHAIRMAN_MODEL = "google/gemini-3-pro-preview" - # OpenRouter API endpoint OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions" # Data directory for conversation storage DATA_DIR = "data/conversations" + +# Config file for dynamic model settings +CONFIG_FILE = "data/council_config.json" + +# Default models +DEFAULT_COUNCIL_MODELS = [ + "google/gemini-2.5-flash-lite", + "deepseek/deepseek-chat-v3.1", + "openai/gpt-5-mini", +] +DEFAULT_CHAIRMAN_MODEL = "openai/gpt-5-mini" + + +def load_config(): + """Load config from file, fallback to defaults.""" + if os.path.exists(CONFIG_FILE): + with open(CONFIG_FILE, "r") as f: + return json.load(f) + return { + "council_models": DEFAULT_COUNCIL_MODELS, + "chairman_model": DEFAULT_CHAIRMAN_MODEL, + } + + +def save_config(council_models, chairman_model): + """Save config to file.""" + os.makedirs(os.path.dirname(CONFIG_FILE), exist_ok=True) + with open(CONFIG_FILE, "w") as f: + json.dump({ + "council_models": council_models, + "chairman_model": chairman_model, + }, f) + + +# Load on startup +_config = load_config() +COUNCIL_MODELS = _config["council_models"] +CHAIRMAN_MODEL = _config["chairman_model"] diff --git a/backend/council.py b/backend/council.py index 5069abec98..95807dedb9 100644 --- a/backend/council.py +++ b/backend/council.py @@ -1,29 +1,26 @@ """3-stage LLM Council orchestration.""" +import logging from typing import List, Dict, Any, Tuple from .openrouter import query_models_parallel, query_model -from .config import COUNCIL_MODELS, CHAIRMAN_MODEL +from .config import load_config +from . import storage + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) async def stage1_collect_responses(user_query: str) -> List[Dict[str, Any]]: """ Stage 1: Collect individual responses from all council models. - - Args: - user_query: The user's question - - Returns: - List of dicts with 'model' and 'response' keys """ messages = [{"role": "user", "content": user_query}] - # Query all models in parallel - responses = await query_models_parallel(COUNCIL_MODELS, messages) + responses = await query_models_parallel(load_config()["council_models"], messages) - # Format results stage1_results = [] for model, response in responses.items(): - if response is not None: # Only include successful responses + if response is not None: stage1_results.append({ "model": model, "response": response.get('content', '') @@ -38,24 +35,14 @@ async def stage2_collect_rankings( ) -> Tuple[List[Dict[str, Any]], Dict[str, str]]: """ Stage 2: Each model ranks the anonymized responses. - - Args: - user_query: The original user query - stage1_results: Results from Stage 1 - - Returns: - Tuple of (rankings list, label_to_model mapping) """ - # Create anonymized labels for responses (Response A, Response B, etc.) - labels = [chr(65 + i) for i in range(len(stage1_results))] # A, B, C, ... + labels = [chr(65 + i) for i in range(len(stage1_results))] - # Create mapping from label to model name label_to_model = { f"Response {label}": result['model'] for label, result in zip(labels, stage1_results) } - # Build the ranking prompt responses_text = "\n\n".join([ f"Response {label}:\n{result['response']}" for label, result in zip(labels, stage1_results) @@ -94,10 +81,8 @@ async def stage2_collect_rankings( messages = [{"role": "user", "content": ranking_prompt}] - # Get rankings from all council models in parallel - responses = await query_models_parallel(COUNCIL_MODELS, messages) + responses = await query_models_parallel(load_config()["council_models"], messages) - # Format results stage2_results = [] for model, response in responses.items(): if response is not None: @@ -119,16 +104,9 @@ async def stage3_synthesize_final( ) -> Dict[str, Any]: """ Stage 3: Chairman synthesizes final response. - - Args: - user_query: The original user query - stage1_results: Individual model responses from Stage 1 - stage2_results: Rankings from Stage 2 - - Returns: - Dict with 'model' and 'response' keys """ - # Build comprehensive context for chairman + chairman_model = load_config()["chairman_model"] + stage1_text = "\n\n".join([ f"Model: {result['model']}\nResponse: {result['response']}" for result in stage1_results @@ -158,18 +136,16 @@ async def stage3_synthesize_final( messages = [{"role": "user", "content": chairman_prompt}] - # Query the chairman model - response = await query_model(CHAIRMAN_MODEL, messages) + response = await query_model(chairman_model, messages) if response is None: - # Fallback if chairman fails return { - "model": CHAIRMAN_MODEL, + "model": chairman_model, "response": "Error: Unable to generate final synthesis." } return { - "model": CHAIRMAN_MODEL, + "model": chairman_model, "response": response.get('content', '') } @@ -177,33 +153,19 @@ async def stage3_synthesize_final( def parse_ranking_from_text(ranking_text: str) -> List[str]: """ Parse the FINAL RANKING section from the model's response. - - Args: - ranking_text: The full text response from the model - - Returns: - List of response labels in ranked order """ import re - # Look for "FINAL RANKING:" section if "FINAL RANKING:" in ranking_text: - # Extract everything after "FINAL RANKING:" parts = ranking_text.split("FINAL RANKING:") if len(parts) >= 2: ranking_section = parts[1] - # Try to extract numbered list format (e.g., "1. Response A") - # This pattern looks for: number, period, optional space, "Response X" numbered_matches = re.findall(r'\d+\.\s*Response [A-Z]', ranking_section) if numbered_matches: - # Extract just the "Response X" part return [re.search(r'Response [A-Z]', m).group() for m in numbered_matches] - - # Fallback: Extract all "Response X" patterns in order matches = re.findall(r'Response [A-Z]', ranking_section) return matches - # Fallback: try to find any "Response X" patterns in order matches = re.findall(r'Response [A-Z]', ranking_text) return matches @@ -214,23 +176,13 @@ def calculate_aggregate_rankings( ) -> List[Dict[str, Any]]: """ Calculate aggregate rankings across all models. - - Args: - stage2_results: Rankings from each model - label_to_model: Mapping from anonymous labels to model names - - Returns: - List of dicts with model name and average rank, sorted best to worst """ from collections import defaultdict - # Track positions for each model model_positions = defaultdict(list) for ranking in stage2_results: ranking_text = ranking['ranking'] - - # Parse the ranking from the structured format parsed_ranking = parse_ranking_from_text(ranking_text) for position, label in enumerate(parsed_ranking, start=1): @@ -238,7 +190,6 @@ def calculate_aggregate_rankings( model_name = label_to_model[label] model_positions[model_name].append(position) - # Calculate average position for each model aggregate = [] for model, positions in model_positions.items(): if positions: @@ -249,7 +200,6 @@ def calculate_aggregate_rankings( "rankings_count": len(positions) }) - # Sort by average rank (lower is better) aggregate.sort(key=lambda x: x['average_rank']) return aggregate @@ -258,12 +208,6 @@ def calculate_aggregate_rankings( async def generate_conversation_title(user_query: str) -> str: """ Generate a short title for a conversation based on the first user message. - - Args: - user_query: The first user message - - Returns: - A short title (3-5 words) """ title_prompt = f"""Generate a very short title (3-5 words maximum) that summarizes the following question. The title should be concise and descriptive. Do not use quotes or punctuation in the title. @@ -274,19 +218,14 @@ async def generate_conversation_title(user_query: str) -> str: messages = [{"role": "user", "content": title_prompt}] - # Use gemini-2.5-flash for title generation (fast and cheap) response = await query_model("google/gemini-2.5-flash", messages, timeout=30.0) if response is None: - # Fallback to a generic title return "New Conversation" title = response.get('content', 'New Conversation').strip() - - # Clean up the title - remove quotes, limit length title = title.strip('"\'') - # Truncate if too long if len(title) > 50: title = title[:47] + "..." @@ -296,37 +235,35 @@ async def generate_conversation_title(user_query: str) -> str: async def run_full_council(user_query: str) -> Tuple[List, List, Dict, Dict]: """ Run the complete 3-stage council process. - - Args: - user_query: The user's question - - Returns: - Tuple of (stage1_results, stage2_results, stage3_result, metadata) """ - # Stage 1: Collect individual responses stage1_results = await stage1_collect_responses(user_query) - # If no models responded successfully, return error if not stage1_results: return [], [], { "model": "error", "response": "All models failed to respond. Please try again." }, {} - # Stage 2: Collect rankings stage2_results, label_to_model = await stage2_collect_rankings(user_query, stage1_results) - # Calculate aggregate rankings aggregate_rankings = calculate_aggregate_rankings(stage2_results, label_to_model) - # Stage 3: Synthesize final answer + # Record model stats in Supabase + logger.info(f"DEBUG: Recording stats for models: {[r['model'] for r in stage1_results]}") + logger.info(f"DEBUG: aggregate_rankings: {aggregate_rankings}") + try: + all_models = [r["model"] for r in stage1_results] + storage.record_model_appearances(all_models, aggregate_rankings) + logger.info("DEBUG: Stats recorded successfully") + except Exception as e: + logger.exception(f"STATS ERROR: {e}") + stage3_result = await stage3_synthesize_final( user_query, stage1_results, stage2_results ) - # Prepare metadata metadata = { "label_to_model": label_to_model, "aggregate_rankings": aggregate_rankings diff --git a/backend/main.py b/backend/main.py index e33ce59a6d..55816e1e53 100644 --- a/backend/main.py +++ b/backend/main.py @@ -1,5 +1,6 @@ """FastAPI backend for LLM Council.""" - +import logging +from . import config as app_config from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import StreamingResponse @@ -12,18 +13,19 @@ from . import storage from .council import run_full_council, generate_conversation_title, stage1_collect_responses, stage2_collect_rankings, stage3_synthesize_final, calculate_aggregate_rankings +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + app = FastAPI(title="LLM Council API") # Enable CORS for local development app.add_middleware( CORSMiddleware, - allow_origins=["http://localhost:5173", "http://localhost:3000"], - allow_credentials=True, + allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) - class CreateConversationRequest(BaseModel): """Request to create a new conversation.""" pass @@ -85,28 +87,21 @@ async def send_message(conversation_id: str, request: SendMessageRequest): Send a message and run the 3-stage council process. Returns the complete response with all stages. """ - # Check if conversation exists conversation = storage.get_conversation(conversation_id) if conversation is None: raise HTTPException(status_code=404, detail="Conversation not found") - # Check if this is the first message is_first_message = len(conversation["messages"]) == 0 - - # Add user message storage.add_user_message(conversation_id, request.content) - # If this is the first message, generate a title if is_first_message: title = await generate_conversation_title(request.content) storage.update_conversation_title(conversation_id, title) - # Run the 3-stage council process stage1_results, stage2_results, stage3_result, metadata = await run_full_council( request.content ) - # Add assistant message with all stages storage.add_assistant_message( conversation_id, stage1_results, @@ -114,7 +109,6 @@ async def send_message(conversation_id: str, request: SendMessageRequest): stage3_result ) - # Return the complete response with metadata return { "stage1": stage1_results, "stage2": stage2_results, @@ -129,47 +123,49 @@ async def send_message_stream(conversation_id: str, request: SendMessageRequest) Send a message and stream the 3-stage council process. Returns Server-Sent Events as each stage completes. """ - # Check if conversation exists conversation = storage.get_conversation(conversation_id) if conversation is None: raise HTTPException(status_code=404, detail="Conversation not found") - # Check if this is the first message is_first_message = len(conversation["messages"]) == 0 async def event_generator(): try: - # Add user message storage.add_user_message(conversation_id, request.content) - # Start title generation in parallel (don't await yet) title_task = None if is_first_message: title_task = asyncio.create_task(generate_conversation_title(request.content)) - # Stage 1: Collect responses + # Stage 1 yield f"data: {json.dumps({'type': 'stage1_start'})}\n\n" stage1_results = await stage1_collect_responses(request.content) yield f"data: {json.dumps({'type': 'stage1_complete', 'data': stage1_results})}\n\n" - # Stage 2: Collect rankings + # Stage 2 yield f"data: {json.dumps({'type': 'stage2_start'})}\n\n" stage2_results, label_to_model = await stage2_collect_rankings(request.content, stage1_results) aggregate_rankings = calculate_aggregate_rankings(stage2_results, label_to_model) yield f"data: {json.dumps({'type': 'stage2_complete', 'data': stage2_results, 'metadata': {'label_to_model': label_to_model, 'aggregate_rankings': aggregate_rankings}})}\n\n" - # Stage 3: Synthesize final answer + # ← NEW: Record model stats after Stage 2 + try: + all_models = [r["model"] for r in stage1_results] + storage.record_model_appearances(all_models, aggregate_rankings) + logger.info(f"Stats recorded for models: {all_models}") + except Exception as e: + logger.exception(f"STATS ERROR: {e}") + + # Stage 3 yield f"data: {json.dumps({'type': 'stage3_start'})}\n\n" stage3_result = await stage3_synthesize_final(request.content, stage1_results, stage2_results) yield f"data: {json.dumps({'type': 'stage3_complete', 'data': stage3_result})}\n\n" - # Wait for title generation if it was started if title_task: title = await title_task storage.update_conversation_title(conversation_id, title) yield f"data: {json.dumps({'type': 'title_complete', 'data': {'title': title}})}\n\n" - # Save complete assistant message storage.add_assistant_message( conversation_id, stage1_results, @@ -177,11 +173,9 @@ async def event_generator(): stage3_result ) - # Send completion event yield f"data: {json.dumps({'type': 'complete'})}\n\n" except Exception as e: - # Send error event yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n" return StreamingResponse( @@ -194,6 +188,34 @@ async def event_generator(): ) +@app.get("/api/config") +async def get_config(): + """Get current council configuration.""" + cfg = app_config.load_config() + return cfg + + +@app.post("/api/config") +async def update_config(data: dict): + """Update council models configuration.""" + council_models = data.get("council_models", app_config.DEFAULT_COUNCIL_MODELS) + chairman_model = data.get("chairman_model", app_config.DEFAULT_CHAIRMAN_MODEL) + + app_config.save_config(council_models, chairman_model) + + app_config.COUNCIL_MODELS = council_models + app_config.CHAIRMAN_MODEL = chairman_model + + return {"status": "ok", "council_models": council_models, "chairman_model": chairman_model} + + +@app.get("/api/stats") +async def get_stats(): + """Get model performance stats.""" + from . import storage + return storage.get_model_stats() + + if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8001) diff --git a/backend/storage.py b/backend/storage.py index 180111da4c..013edb3b19 100644 --- a/backend/storage.py +++ b/backend/storage.py @@ -1,35 +1,20 @@ -"""JSON-based storage for conversations.""" +"""Supabase-based storage for conversations.""" -import json import os from datetime import datetime from typing import List, Dict, Any, Optional -from pathlib import Path +from supabase import create_client, Client from .config import DATA_DIR -def ensure_data_dir(): - """Ensure the data directory exists.""" - Path(DATA_DIR).mkdir(parents=True, exist_ok=True) - - -def get_conversation_path(conversation_id: str) -> str: - """Get the file path for a conversation.""" - return os.path.join(DATA_DIR, f"{conversation_id}.json") +def get_client() -> Client: + url = os.environ.get("SUPABASE_URL") + key = os.environ.get("SUPABASE_KEY") + return create_client(url, key) def create_conversation(conversation_id: str) -> Dict[str, Any]: - """ - Create a new conversation. - - Args: - conversation_id: Unique identifier for the conversation - - Returns: - New conversation dict - """ - ensure_data_dir() - + """Create a new conversation.""" conversation = { "id": conversation_id, "created_at": datetime.utcnow().isoformat(), @@ -37,84 +22,55 @@ def create_conversation(conversation_id: str) -> Dict[str, Any]: "messages": [] } - # Save to file - path = get_conversation_path(conversation_id) - with open(path, 'w') as f: - json.dump(conversation, f, indent=2) - + get_client().table("conversations").insert(conversation).execute() return conversation def get_conversation(conversation_id: str) -> Optional[Dict[str, Any]]: - """ - Load a conversation from storage. - - Args: - conversation_id: Unique identifier for the conversation - - Returns: - Conversation dict or None if not found - """ - path = get_conversation_path(conversation_id) - - if not os.path.exists(path): + """Load a conversation from Supabase.""" + result = ( + get_client() + .table("conversations") + .select("*") + .eq("id", conversation_id) + .execute() + ) + + if not result.data: return None - with open(path, 'r') as f: - return json.load(f) + return result.data[0] def save_conversation(conversation: Dict[str, Any]): - """ - Save a conversation to storage. - - Args: - conversation: Conversation dict to save - """ - ensure_data_dir() - - path = get_conversation_path(conversation['id']) - with open(path, 'w') as f: - json.dump(conversation, f, indent=2) + """Save a conversation to Supabase.""" + get_client().table("conversations").upsert(conversation).execute() def list_conversations() -> List[Dict[str, Any]]: - """ - List all conversations (metadata only). - - Returns: - List of conversation metadata dicts - """ - ensure_data_dir() + """List all conversations (metadata only).""" + result = ( + get_client() + .table("conversations") + .select("id, created_at, title, messages") + .order("created_at", desc=True) + .execute() + ) conversations = [] - for filename in os.listdir(DATA_DIR): - if filename.endswith('.json'): - path = os.path.join(DATA_DIR, filename) - with open(path, 'r') as f: - data = json.load(f) - # Return metadata only - conversations.append({ - "id": data["id"], - "created_at": data["created_at"], - "title": data.get("title", "New Conversation"), - "message_count": len(data["messages"]) - }) - - # Sort by creation time, newest first - conversations.sort(key=lambda x: x["created_at"], reverse=True) + for data in result.data: + conversations.append({ + "id": data["id"], + "created_at": data["created_at"], + "title": data.get("title", "New Conversation"), + "message_count": len(data["messages"]) + }) return conversations def add_user_message(conversation_id: str, content: str): - """ - Add a user message to a conversation. - - Args: - conversation_id: Conversation identifier - content: User message content - """ + """Add a user message to a conversation.""" conversation = get_conversation(conversation_id) if conversation is None: raise ValueError(f"Conversation {conversation_id} not found") @@ -133,15 +89,7 @@ def add_assistant_message( stage2: List[Dict[str, Any]], stage3: Dict[str, Any] ): - """ - Add an assistant message with all 3 stages to a conversation. - - Args: - conversation_id: Conversation identifier - stage1: List of individual model responses - stage2: List of model rankings - stage3: Final synthesized response - """ + """Add an assistant message with all 3 stages to a conversation.""" conversation = get_conversation(conversation_id) if conversation is None: raise ValueError(f"Conversation {conversation_id} not found") @@ -157,16 +105,60 @@ def add_assistant_message( def update_conversation_title(conversation_id: str, title: str): - """ - Update the title of a conversation. + """Update the title of a conversation.""" + get_client().table("conversations").update( + {"title": title} + ).eq("id", conversation_id).execute() - Args: - conversation_id: Conversation identifier - title: New title for the conversation - """ - conversation = get_conversation(conversation_id) - if conversation is None: - raise ValueError(f"Conversation {conversation_id} not found") - conversation["title"] = title - save_conversation(conversation) +def record_model_appearances(models: list, rankings: list): + """ + Record appearances and wins for all models after a council round. + """ + client = get_client() + + for model in models: + rank_entry = next((r for r in rankings if r["model"] == model), None) + avg_rank = float(rank_entry["average_rank"]) if rank_entry else 0.0 + is_winner = rankings[0]["model"] == model if rankings else False + + result = client.table("model_stats").select("*").eq("model", model).execute() + + if result.data: + existing = result.data[0] + new_appearances = existing["total_appearances"] + 1 + new_wins = existing["wins"] + (1 if is_winner else 0) + new_rank_points = float(existing["total_rank_points"]) + avg_rank + new_avg_rank = round(new_rank_points / new_appearances, 2) + new_win_rate = round(new_wins / new_appearances, 2) + + client.table("model_stats").update({ + "wins": new_wins, + "total_appearances": new_appearances, + "win_rate": new_win_rate, + "avg_rank": new_avg_rank, + "total_rank_points": new_rank_points, + "last_updated": datetime.utcnow().isoformat() + }).eq("model", model).execute() + else: + client.table("model_stats").insert({ + "model": model, + "wins": 1 if is_winner else 0, + "total_appearances": 1, + "win_rate": 1.0 if is_winner else 0.0, + "avg_rank": avg_rank, + "total_rank_points": avg_rank, + "last_updated": datetime.utcnow().isoformat() + }).execute() + + +def get_model_stats() -> list: + """Get all model stats sorted by win rate.""" + result = ( + get_client() + .table("model_stats") + .select("*") + .order("win_rate", desc=True) + .execute() + ) + return result.data diff --git a/frontend/src/App.css b/frontend/src/App.css index 6863cd40b4..2d6e693f6c 100644 --- a/frontend/src/App.css +++ b/frontend/src/App.css @@ -4,8 +4,8 @@ .app { display: flex; - height: 100vh; - width: 100vw; + min-height: 100vh; + width: 100%; overflow: hidden; background: #ffffff; color: #333; @@ -13,3 +13,90 @@ 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', sans-serif; } + +.app-toolbar { + position: fixed; + top: 16px; + right: 16px; + left: auto; + z-index: 1001; + display: flex; + align-items: center; + gap: 10px; +} + +.toolbar-actions { + display: flex; + align-items: center; + gap: 10px; +} + +.toolbar-button { + display: inline-flex; + align-items: center; + justify-content: center; + width: 44px; + height: 44px; + background: rgba(42, 42, 62, 0.95); + border: 1px solid #444; + border-radius: 12px; + color: #fff; + cursor: pointer; + font-size: 20px; + box-shadow: 0 10px 30px rgba(0, 0, 0, 0.18); +} + +.toolbar-button:hover { + background: #222235; +} + +.mobile-only { + display: none; +} + +.sidebar-backdrop { + position: fixed; + inset: 0; + background: rgba(15, 23, 42, 0.35); + opacity: 0; + pointer-events: none; + transition: opacity 0.2s ease; + z-index: 998; +} + +@media (max-width: 900px) { + .app-toolbar { + top: 12px; + left: 12px; + right: 12px; + justify-content: space-between; + } + + .mobile-only { + display: inline-flex; + } + + .app.sidebar-open .sidebar-backdrop { + opacity: 1; + pointer-events: auto; + } +} + +@media (max-width: 640px) { + .app-toolbar { + top: 10px; + left: 10px; + right: 10px; + } + + .toolbar-button { + width: 40px; + height: 40px; + font-size: 18px; + border-radius: 10px; + } + + .toolbar-actions { + gap: 8px; + } +} diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index 1954155981..6fbc863faf 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -1,6 +1,8 @@ import { useState, useEffect } from 'react'; import Sidebar from './components/Sidebar'; import ChatInterface from './components/ChatInterface'; +import Settings from './Settings'; +import Leaderboard from './Leaderboard'; import { api } from './api'; import './App.css'; @@ -9,36 +11,54 @@ function App() { const [currentConversationId, setCurrentConversationId] = useState(null); const [currentConversation, setCurrentConversation] = useState(null); const [isLoading, setIsLoading] = useState(false); + const [showSettings, setShowSettings] = useState(false); + const [showLeaderboard, setShowLeaderboard] = useState(false); + const [isSidebarOpen, setIsSidebarOpen] = useState(false); + - // Load conversations on mount useEffect(() => { - loadConversations(); + let isActive = true; + + const fetchConversations = async () => { + try { + const convs = await api.listConversations(); + if (isActive) { + setConversations(convs); + } + } catch (error) { + console.error('Failed to load conversations:', error); + } + }; + + fetchConversations(); + + return () => { + isActive = false; + }; }, []); - // Load conversation details when selected useEffect(() => { - if (currentConversationId) { - loadConversation(currentConversationId); - } - }, [currentConversationId]); + if (!currentConversationId) return undefined; - const loadConversations = async () => { - try { - const convs = await api.listConversations(); - setConversations(convs); - } catch (error) { - console.error('Failed to load conversations:', error); - } - }; + let isActive = true; - const loadConversation = async (id) => { - try { - const conv = await api.getConversation(id); - setCurrentConversation(conv); - } catch (error) { - console.error('Failed to load conversation:', error); - } - }; + const fetchConversation = async () => { + try { + const conv = await api.getConversation(currentConversationId); + if (isActive) { + setCurrentConversation(conv); + } + } catch (error) { + console.error('Failed to load conversation:', error); + } + }; + + fetchConversation(); + + return () => { + isActive = false; + }; + }, [currentConversationId]); const handleNewConversation = async () => { try { @@ -48,6 +68,7 @@ function App() { ...conversations, ]); setCurrentConversationId(newConv.id); + setIsSidebarOpen(false); } catch (error) { console.error('Failed to create conversation:', error); } @@ -55,6 +76,7 @@ function App() { const handleSelectConversation = (id) => { setCurrentConversationId(id); + setIsSidebarOpen(false); }; const handleSendMessage = async (content) => { @@ -62,14 +84,12 @@ function App() { setIsLoading(true); try { - // Optimistically add user message to UI const userMessage = { role: 'user', content }; setCurrentConversation((prev) => ({ ...prev, messages: [...prev.messages, userMessage], })); - // Create a partial assistant message that will be updated progressively const assistantMessage = { role: 'assistant', stage1: null, @@ -83,13 +103,11 @@ function App() { }, }; - // Add the partial assistant message setCurrentConversation((prev) => ({ ...prev, messages: [...prev.messages, assistantMessage], })); - // Send message with streaming await api.sendMessageStream(currentConversationId, content, (eventType, event) => { switch (eventType) { case 'stage1_start': @@ -151,13 +169,15 @@ function App() { break; case 'title_complete': - // Reload conversations to get updated title - loadConversations(); + api.listConversations().then(setConversations).catch((error) => { + console.error('Failed to load conversations:', error); + }); break; case 'complete': - // Stream complete, reload conversations list - loadConversations(); + api.listConversations().then(setConversations).catch((error) => { + console.error('Failed to load conversations:', error); + }); setIsLoading(false); break; @@ -172,7 +192,6 @@ function App() { }); } catch (error) { console.error('Failed to send message:', error); - // Remove optimistic messages on error setCurrentConversation((prev) => ({ ...prev, messages: prev.messages.slice(0, -2), @@ -182,12 +201,54 @@ function App() { }; return ( -
Loading stats...
+ ) : stats.length === 0 ? ( +No data yet — run some queries first!
+ ) : ( +| Rank | +Model | +Win Rate | +Wins | +Appearances | +Avg Rank | +
|---|---|---|---|---|---|
| {medal(i)} | +{s.model.split("/")[1] || s.model} | +
+
+
+
+
+
+ {Math.round(s.win_rate * 100)}%
+ |
+ {s.wins} | +{s.total_appearances} | +{s.avg_rank} | +
+ Based on peer rankings across all council sessions. Lower avg rank = better. +
+