diff --git a/.gitignore b/.gitignore index a1b82c165..5423a7e3e 100644 --- a/.gitignore +++ b/.gitignore @@ -72,3 +72,6 @@ next-env.d.ts # docs /docs tests/generation/*.eval.result.md + +# Eval results +eval/whiteboard-layout/results/ diff --git a/app/eval/whiteboard/page.tsx b/app/eval/whiteboard/page.tsx new file mode 100644 index 000000000..209ca2f27 --- /dev/null +++ b/app/eval/whiteboard/page.tsx @@ -0,0 +1,107 @@ +'use client'; + +import { useEffect, useState } from 'react'; +import { ScreenElement } from '@/components/slide-renderer/Editor/ScreenElement'; +import { SceneProvider } from '@/lib/contexts/scene-context'; +import { useStageStore } from '@/lib/store/stage'; +import type { PPTElement } from '@/lib/types/slides'; + +const EVAL_STAGE_ID = '__eval_stage__'; +const EVAL_SCENE_ID = '__eval_scene__'; +const CANVAS_WIDTH = 1000; +const CANVAS_HEIGHT = 562.5; + +function WhiteboardCanvas() { + const [elements, setElements] = useState([]); + const [ready, setReady] = useState(false); + + useEffect(() => { + // Bootstrap store with a synthetic stage + scene + const store = useStageStore.getState(); + store.setStage({ + id: EVAL_STAGE_ID, + name: 'eval', + createdAt: 0, + updatedAt: 0, + }); + store.setScenes([ + { + id: EVAL_SCENE_ID, + stageId: EVAL_STAGE_ID, + type: 'slide', + title: 'eval', + order: 0, + content: { + type: 'slide', + canvas: { + id: EVAL_SCENE_ID, + viewportSize: CANVAS_WIDTH, + viewportRatio: CANVAS_HEIGHT / CANVAS_WIDTH, + theme: { + backgroundColor: '#ffffff', + themeColors: ['#5b9bd5'], + fontColor: '#333333', + fontName: 'Microsoft YaHei', + }, + elements: [], + }, + }, + }, + ]); + store.setCurrentSceneId(EVAL_SCENE_ID); + + // Expose setter for Playwright + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (window as any).__setElements = (incoming: PPTElement[]) => { + setElements(incoming); + // Also update the store so SceneProvider/ScreenElement reads the theme + useStageStore.getState().updateScene(EVAL_SCENE_ID, { + content: { + type: 'slide', + canvas: { + id: EVAL_SCENE_ID, + viewportSize: CANVAS_WIDTH, + viewportRatio: CANVAS_HEIGHT / CANVAS_WIDTH, + theme: { + backgroundColor: '#ffffff', + themeColors: ['#5b9bd5'], + fontColor: '#333333', + fontName: 'Microsoft YaHei', + }, + elements: incoming, + }, + }, + }); + }; + + // Signal readiness + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (window as any).__evalReady = true; + // Defer setReady to avoid cascading render warning + queueMicrotask(() => setReady(true)); + }, []); + + if (!ready) return null; + + return ( + +
+ {elements.map((element, index) => ( + + ))} +
+
+ ); +} + +export default function EvalWhiteboardPage() { + return ; +} diff --git a/components/chat/use-chat-sessions.ts b/components/chat/use-chat-sessions.ts index 8b30343a6..33d8f6b3e 100644 --- a/components/chat/use-chat-sessions.ts +++ b/components/chat/use-chat-sessions.ts @@ -22,6 +22,11 @@ import { USER_AVATAR } from '@/lib/types/roundtable'; import { processSSEStream } from './process-sse-stream'; import { StreamBuffer } from '@/lib/buffer/stream-buffer'; import type { AgentStartItem, ActionItem } from '@/lib/buffer/stream-buffer'; +import { + runAgentLoop, + type AgentLoopIterationResult, + type AgentLoopStoreState, +} from '@/lib/chat/agent-loop'; import { ActionEngine } from '@/lib/action/engine'; import { toast } from 'sonner'; import { createLogger } from '@/lib/logger'; @@ -389,7 +394,7 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { agentHadContent: data.agentHadContent ?? true, cueUserReceived: loopDoneDataRef.current?.cueUserReceived ?? false, }; - // Session completion is handled by runAgentLoop, not here + // Session completion is handled by runAgentLoopFn, not here // (Lectures don't use the agent loop and complete via endSession) }, @@ -428,15 +433,12 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { ); /** - * Frontend-driven agent loop. Sends per-agent requests until: - * - Director returns END (no agent spoke, no cue_user) - * - Director returns USER (cue_user event received) - * - maxTurns reached - * - Request aborted + * Frontend-driven agent loop. Delegates to the shared runAgentLoop + * from lib/chat/agent-loop.ts, wiring StreamBuffer for UI pacing. * * Each iteration: POST /api/chat → process SSE → wait for buffer drain → check outcome. */ - const runAgentLoop = useCallback( + const runAgentLoopFn = useCallback( async ( sessionId: string, requestTemplate: { @@ -475,110 +477,124 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { ? parseInt(settingsState.maxTurns, 10) || defaultMaxTurns : defaultMaxTurns; - let directorState: DirectorState | undefined = undefined; - let turnCount = 0; - let currentMessages = requestTemplate.messages; - let consecutiveEmptyTurns = 0; - - while (turnCount < maxTurns) { - if (controller.signal.aborted) break; - - // Reset loop state for this iteration - loopDoneDataRef.current = null; - - // Refresh store state each iteration — agent actions may have changed - // whiteboard, scene, or mode between turns - const freshState = useStageStore.getState(); - const freshStoreState = { - stage: freshState.stage, - scenes: freshState.scenes, - currentSceneId: freshState.currentSceneId, - mode: freshState.mode, - whiteboardOpen: useCanvasStore.getState().whiteboardOpen, - }; + // Per-iteration buffer reference — set in onEvent, used in onIterationEnd + let currentBuffer: StreamBuffer | null = null; - const response = await fetch('/api/chat', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - ...requestTemplate, - messages: currentMessages, - storeState: freshStoreState, - directorState, - }), - signal: controller.signal, - }); + const outcome = await runAgentLoop( + { + config: requestTemplate.config, + userProfile: requestTemplate.userProfile, + apiKey: requestTemplate.apiKey, + baseUrl: requestTemplate.baseUrl, + model: requestTemplate.model, + providerType: requestTemplate.providerType, + }, + { + getStoreState: (): AgentLoopStoreState => { + const freshState = useStageStore.getState(); + return { + stage: freshState.stage, + scenes: freshState.scenes, + currentSceneId: freshState.currentSceneId, + mode: freshState.mode, + whiteboardOpen: useCanvasStore.getState().whiteboardOpen, + }; + }, - if (!response.ok) { - const errorText = await response.text(); - throw new Error(`API error: ${response.status} - ${errorText}`); - } + getMessages: () => { + const currentSession = sessionsRef.current.find((s) => s.id === sessionId); + return currentSession?.messages ?? requestTemplate.messages; + }, - const buffer = createBufferForSession(sessionId, sessionType); - await processSSEStream(response, sessionId, buffer, controller.signal); + fetchChat: (body, signal) => + fetch('/api/chat', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + signal, + }), - // Wait for buffer to finish playing all items (character animations, delays) - try { - await buffer.waitUntilDrained(); - } catch { - // Buffer was disposed/shutdown (abort or session end) — exit loop - break; - } + onEvent: (event) => { + // Create buffer on first event of each iteration + if (!currentBuffer) { + currentBuffer = createBufferForSession(sessionId, sessionType); + } - if (controller.signal.aborted) break; - - // Read loop outcome from done data. - // loopDoneDataRef is mutated by StreamBuffer callbacks (onDone, onCueUser); - // TypeScript's CFA can't track cross-callback mutations. - const doneData = loopDoneDataRef.current as { - directorState?: DirectorState; - totalAgents: number; - agentHadContent?: boolean; - cueUserReceived: boolean; - } | null; - if (!doneData) break; // No done event — something went wrong - - // Update accumulated director state - directorState = doneData.directorState; - turnCount = directorState?.turnCount ?? turnCount + 1; - - // Check outcome - if (doneData.cueUserReceived) { - // Director said USER — stop loop, wait for user input - break; - } - if (doneData.totalAgents === 0) { - // Director said END — no agent spoke, conversation complete - break; - } + // Pipe SSE events into StreamBuffer (mirrors processSSEStream) + switch (event.type) { + case 'agent_start': { + const { messageId, agentId, agentName, agentAvatar, agentColor } = event.data; + currentBuffer.pushAgentStart({ + messageId, + agentId, + agentName, + avatar: agentAvatar, + color: agentColor, + }); + break; + } + case 'text_delta': + currentBuffer.pushText(event.data.messageId ?? '', event.data.content); + break; + case 'action': + currentBuffer.pushAction({ + actionId: event.data.actionId, + actionName: event.data.actionName, + params: event.data.params, + messageId: event.data.messageId ?? '', + agentId: event.data.agentId, + }); + break; + case 'thinking': + currentBuffer.pushThinking(event.data); + break; + case 'cue_user': + currentBuffer.pushCueUser(event.data); + break; + case 'done': + currentBuffer.pushDone(event.data); + break; + case 'error': + currentBuffer.pushError(event.data.message); + break; + } + }, - // Track consecutive empty responses (agent dispatched but produced no content) - if (doneData.agentHadContent === false) { - consecutiveEmptyTurns++; - if (consecutiveEmptyTurns >= 2) { - log.warn( - `[AgentLoop] ${consecutiveEmptyTurns} consecutive empty agent responses, stopping loop`, - ); - break; - } - } else { - consecutiveEmptyTurns = 0; - } + onIterationEnd: async () => { + if (!currentBuffer) return null; - // Agent spoke — continue loop if under maxTurns - // Refresh messages from latest session state for next iteration - const currentSession = sessionsRef.current.find((s) => s.id === sessionId); - if (currentSession) { - currentMessages = currentSession.messages; - } - } + // Wait for buffer to finish playing all items (character animations, delays) + try { + await currentBuffer.waitUntilDrained(); + } catch { + // Buffer was disposed/shutdown (abort or session end) + currentBuffer = null; + return null; + } + + currentBuffer = null; + + // Read the iteration result from loopDoneDataRef + // (populated by buffer's onDone/onCueUser callbacks) + const doneData = loopDoneDataRef.current; + loopDoneDataRef.current = null; + + if (!doneData) return null; + return { + directorState: doneData.directorState, + totalAgents: doneData.totalAgents, + agentHadContent: doneData.agentHadContent ?? true, + cueUserReceived: doneData.cueUserReceived, + }; + }, + }, + controller.signal, + maxTurns, + ); - // Handle loop completion - const doneData = loopDoneDataRef.current; + // Handle loop completion (UI-specific) if (!controller.signal.aborted) { - const wasCueUser = doneData?.cueUserReceived ?? false; - if (!wasCueUser) { - // Session completed normally (END or maxTurns reached) + if (outcome.reason !== 'cue_user') { setSessions((prev) => prev.map((s) => s.id === sessionId @@ -592,10 +608,6 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { ); onStopSessionRef.current?.(); } - // If maxTurns reached, log it - if (turnCount >= maxTurns && doneData && doneData.totalAgents > 0) { - log.info(`[AgentLoop] Max turns (${maxTurns}) reached for session ${sessionId}`); - } } }, [createBufferForSession], @@ -851,7 +863,7 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { ? useSettingsStore.getState().selectedAgentIds : session.config.agentIds; - await runAgentLoop( + await runAgentLoopFn( sessionId, { messages: session.messages, @@ -896,7 +908,7 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { } } }, - [clearLiveSessionAfterError, runAgentLoop], + [clearLiveSessionAfterError, runAgentLoopFn], ); /** @@ -1062,7 +1074,7 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { const userProfileState = useUserProfileStore.getState(); const mc = getCurrentModelConfig(); - await runAgentLoop( + await runAgentLoopFn( sessionId!, { messages: sessionMessages, @@ -1116,7 +1128,7 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { isStreaming, createSession, endSession, - runAgentLoop, + runAgentLoopFn, t, ], ); @@ -1201,7 +1213,7 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { const userProfileState = useUserProfileStore.getState(); const mc = getCurrentModelConfig(); - await runAgentLoop( + await runAgentLoopFn( sessionId, { messages: [], @@ -1253,7 +1265,7 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { } }, // eslint-disable-next-line react-hooks/exhaustive-deps -- t is stable from i18n context - [clearLiveSessionAfterError, endSession, runAgentLoop], + [clearLiveSessionAfterError, endSession, runAgentLoopFn], ); /** diff --git a/eval/whiteboard-layout/capture.ts b/eval/whiteboard-layout/capture.ts new file mode 100644 index 000000000..aeb142b33 --- /dev/null +++ b/eval/whiteboard-layout/capture.ts @@ -0,0 +1,66 @@ +import { chromium, type Browser, type Page } from '@playwright/test'; +import type { PPTElement } from '@/lib/types/slides'; +import { mkdirSync } from 'fs'; +import { join } from 'path'; + +const VIEWPORT = { width: 1000, height: 563 }; + +let browser: Browser | null = null; +let page: Page | null = null; + +/** + * Initialize Playwright browser (reused across captures). + */ +export async function initCapture(baseUrl: string): Promise { + browser = await chromium.launch({ headless: true }); + const context = await browser.newContext({ viewport: VIEWPORT }); + page = await context.newPage(); + + await page.goto(`${baseUrl}/eval/whiteboard`); + // Wait for the page to signal readiness + await page.waitForFunction( + () => (window as unknown as Record).__evalReady === true, + ); +} + +/** + * Capture a screenshot of the whiteboard with the given elements. + * Returns the path to the saved screenshot. + */ +export async function captureWhiteboard( + elements: PPTElement[], + outputDir: string, + filename: string, +): Promise { + if (!page) throw new Error('Capture not initialized. Call initCapture() first.'); + + // Inject elements into the page + await page.evaluate( + (els: unknown[]) => { + const setter = (window as unknown as Record void>).__setElements; + setter(els); + }, + elements as unknown as unknown[], + ); + + // Wait for rendering to stabilize (fonts, KaTeX, images) + await page.waitForTimeout(1500); + + mkdirSync(outputDir, { recursive: true }); + const filepath = join(outputDir, filename); + + await page.screenshot({ path: filepath, clip: { x: 0, y: 0, width: 1000, height: 563 } }); + + return filepath; +} + +/** + * Close the browser. + */ +export async function closeCapture(): Promise { + if (browser) { + await browser.close(); + browser = null; + page = null; + } +} diff --git a/eval/whiteboard-layout/chat-client.ts b/eval/whiteboard-layout/chat-client.ts new file mode 100644 index 000000000..76d68b21b --- /dev/null +++ b/eval/whiteboard-layout/chat-client.ts @@ -0,0 +1,71 @@ +import type { StatelessEvent, DirectorState } from '@/lib/types/chat'; + +export interface ChatRequest { + baseUrl: string; + messages: Array<{ role: string; content: string; parts?: unknown[]; metadata?: unknown }>; + storeState: Record; + config: { + agentIds: string[]; + sessionType?: string; + agentConfigs?: Record[]; + }; + directorState?: DirectorState; + userProfile?: { nickname?: string; bio?: string }; + apiKey: string; + baseUrlOverride?: string; + model?: string; + providerType?: string; +} + +/** + * Call /api/chat and yield parsed SSE events. + */ +export async function* chatStream( + request: ChatRequest, + signal?: AbortSignal, +): AsyncGenerator { + const { baseUrl, ...body } = request; + + const response = await fetch(`${baseUrl}/api/chat`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + signal, + }); + + if (!response.ok) { + const text = await response.text(); + throw new Error(`/api/chat returned ${response.status}: ${text}`); + } + + const reader = response.body?.getReader(); + if (!reader) throw new Error('No response body'); + + const decoder = new TextDecoder(); + let buffer = ''; + + try { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + buffer += decoder.decode(value, { stream: true }); + const parts = buffer.split('\n\n'); + buffer = parts.pop() || ''; + + for (const part of parts) { + const line = part.trim(); + if (!line.startsWith('data: ')) continue; + + try { + const event: StatelessEvent = JSON.parse(line.slice(6)); + yield event; + } catch { + // Skip malformed events (heartbeats, etc.) + } + } + } + } finally { + reader.releaseLock(); + } +} diff --git a/eval/whiteboard-layout/reporter.ts b/eval/whiteboard-layout/reporter.ts new file mode 100644 index 000000000..b519fcedc --- /dev/null +++ b/eval/whiteboard-layout/reporter.ts @@ -0,0 +1,92 @@ +import { writeFileSync, mkdirSync } from 'fs'; +import { join } from 'path'; +import type { EvalReport, VlmScore } from './types'; + +function mean(nums: number[]): number { + if (nums.length === 0) return 0; + return nums.reduce((a, b) => a + b, 0) / nums.length; +} + +function formatNum(n: number): string { + return n.toFixed(1); +} + +/** + * Generate JSON + Markdown reports from eval results. + */ +export function generateReport( + report: EvalReport, + outputDir: string, +): { json: string; md: string } { + mkdirSync(outputDir, { recursive: true }); + + // Collect all scores across all checkpoints + const allScores: VlmScore[] = []; + for (const scenario of report.scenarios) { + for (const cp of scenario.checkpoints) { + allScores.push(cp.score); + } + } + + const dimensions = ['readability', 'overlap', 'space_utilization', 'layout_logic'] as const; + + // Build summary stats (guard against empty arrays) + const summary: Record = {}; + if (allScores.length > 0) { + for (const dim of dimensions) { + const vals = allScores.map((s) => s[dim].score); + summary[dim] = { + mean: mean(vals), + min: Math.min(...vals), + max: Math.max(...vals), + }; + } + const overallVals = allScores.map((s) => s.overall); + summary['overall'] = { + mean: mean(overallVals), + min: Math.min(...overallVals), + max: Math.max(...overallVals), + }; + } + + // Write JSON + const jsonPath = join(outputDir, 'report.json'); + writeFileSync(jsonPath, JSON.stringify(report, null, 2)); + + // Build Markdown + const lines: string[] = []; + lines.push('# Whiteboard Layout Eval Report'); + lines.push( + `Run: ${report.timestamp} | Model: ${report.model} | Scenarios: ${report.scenarios.length}`, + ); + lines.push(''); + lines.push('## Summary'); + lines.push('| Metric | Mean | Min | Max |'); + lines.push('|--------|------|-----|-----|'); + for (const [key, stats] of Object.entries(summary)) { + lines.push(`| ${key} | ${formatNum(stats.mean)} | ${stats.min} | ${stats.max} |`); + } + lines.push(''); + + lines.push('## Scenarios'); + for (const scenario of report.scenarios) { + const lastCp = scenario.checkpoints[scenario.checkpoints.length - 1]; + lines.push(`### ${scenario.scenarioId} (run ${scenario.runIndex + 1})`); + if (scenario.error) { + lines.push(`- Error: ${scenario.error}`); + } else if (lastCp) { + lines.push(`- Overall: ${lastCp.score.overall}`); + lines.push(`- Overlap: ${lastCp.score.overlap.score} — ${lastCp.score.overlap.reason}`); + if (lastCp.score.issues.length > 0) { + lines.push(`- Issues: ${lastCp.score.issues.join('; ')}`); + } + lines.push(`- Screenshot: ${lastCp.screenshotPath}`); + } + lines.push(''); + } + + const mdPath = join(outputDir, 'report.md'); + writeFileSync(mdPath, lines.join('\n')); + + return { json: jsonPath, md: mdPath }; +} diff --git a/eval/whiteboard-layout/runner.ts b/eval/whiteboard-layout/runner.ts new file mode 100644 index 000000000..75e9e46cf --- /dev/null +++ b/eval/whiteboard-layout/runner.ts @@ -0,0 +1,292 @@ +import { readFileSync, readdirSync, mkdirSync } from 'fs'; +import { join, dirname } from 'path'; +import { fileURLToPath } from 'url'; +import { parseArgs } from 'util'; +import type { EvalScenario, ScenarioRunResult, CheckpointResult, EvalReport } from './types'; +import type { Action } from '@/lib/types/action'; +import { runAgentLoop, type AgentLoopIterationResult } from '@/lib/chat/agent-loop'; +import { EvalStateManager } from './state-manager'; +import { initCapture, captureWhiteboard, closeCapture } from './capture'; +import { scoreScreenshot } from './scorer'; +import { generateReport } from './reporter'; + +// ==================== CLI Args ==================== +// +// Model configuration follows the same pattern as the outline-language eval: +// EVAL_CHAT_MODEL Model for chat generation (default: DEFAULT_MODEL or gpt-4o-mini) +// EVAL_SCORER_MODEL Model for VLM scoring (default: openai:gpt-4o) +// +// Usage: +// EVAL_CHAT_MODEL=google:gemini-3.1-pro-preview \ +// EVAL_SCORER_MODEL=google:gemini-2.0-flash \ +// pnpm eval:whiteboard --scenario physics-force-decomposition + +const { values: args } = parseArgs({ + options: { + scenario: { type: 'string' }, + repeat: { type: 'string', default: '1' }, + 'base-url': { type: 'string', default: 'http://localhost:3000' }, + 'output-dir': { type: 'string', default: 'eval/whiteboard-layout/results' }, + }, +}); + +const BASE_URL = args['base-url']!; +const CHAT_MODEL = process.env.EVAL_CHAT_MODEL || process.env.DEFAULT_MODEL || 'openai/gpt-4o-mini'; +const SCORER_MODEL = process.env.EVAL_SCORER_MODEL || 'openai:gpt-4o'; +const REPEAT = parseInt(args.repeat || '1', 10); +const OUTPUT_DIR = args['output-dir']!; +const SCENARIO_FILTER = args.scenario; +const MAX_AGENT_TURNS = 10; + +// ==================== Scenario Loading ==================== + +function loadScenarios(): EvalScenario[] { + const currentDir = + typeof __dirname !== 'undefined' ? __dirname : dirname(fileURLToPath(import.meta.url)); + const scenarioDir = join(currentDir, 'scenarios'); + const files = readdirSync(scenarioDir).filter((f) => f.endsWith('.json')); + const scenarios: EvalScenario[] = []; + + for (const file of files) { + const scenario: EvalScenario = JSON.parse(readFileSync(join(scenarioDir, file), 'utf-8')); + if (SCENARIO_FILTER && scenario.id !== SCENARIO_FILTER && !file.includes(SCENARIO_FILTER)) { + continue; + } + scenarios.push(scenario); + } + + return scenarios; +} + +// ==================== Single Scenario Run ==================== + +async function runScenario( + scenario: EvalScenario, + runIndex: number, + runDir: string, +): Promise { + const model = scenario.model || CHAT_MODEL; + const checkpoints: CheckpointResult[] = []; + + console.log(` [run ${runIndex + 1}] Starting...`); + + const stateManager = new EvalStateManager(scenario.initialStoreState); + const messages: Array<{ + role: string; + content: string; + parts?: unknown[]; + metadata?: unknown; + }> = []; + + try { + for (let turnIdx = 0; turnIdx < scenario.turns.length; turnIdx++) { + const turn = scenario.turns[turnIdx]; + console.log(` Turn ${turnIdx + 1}: "${turn.userMessage.slice(0, 50)}..."`); + + // Add user message + messages.push({ + role: 'user', + content: turn.userMessage, + parts: [{ type: 'text', text: turn.userMessage }], + metadata: { createdAt: Date.now() }, + }); + + // Per-iteration state for the eval callbacks + let iterResult: AgentLoopIterationResult | null = null; + let currentAgentId: string | null = null; + let currentMessageId: string | null = null; + const textParts: string[] = []; + const actionParts: Array<{ type: string; actionName: string; params: unknown }> = []; + let cueUserReceived = false; + + // Use the shared agent loop — same logic as frontend + const controller = new AbortController(); + await runAgentLoop( + { + config: scenario.config, + apiKey: '', // Server resolves API key from env/YAML + model, + }, + { + getStoreState: () => stateManager.getStoreState(), + getMessages: () => messages, + + fetchChat: async (body, signal) => { + // Reset per-iteration accumulators + currentAgentId = null; + currentMessageId = null; + textParts.length = 0; + actionParts.length = 0; + cueUserReceived = false; + iterResult = null; + + return fetch(`${BASE_URL}/api/chat`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + signal, + }); + }, + + onEvent: (event) => { + switch (event.type) { + case 'agent_start': + currentAgentId = event.data.agentId; + currentMessageId = event.data.messageId; + break; + + case 'text_delta': + textParts.push(event.data.content); + break; + + case 'action': { + const action: Action = { + id: event.data.actionId, + type: event.data.actionName, + ...event.data.params, + } as Action; + // Note: executeAction is async but we fire-and-forget here + // to match the shared loop's sync onEvent signature. + // Actions are awaited via ActionEngine's internal delay(). + void stateManager.executeAction(action); + actionParts.push({ + type: `action-${event.data.actionName}`, + actionName: event.data.actionName, + params: event.data.params, + }); + break; + } + + case 'cue_user': + cueUserReceived = true; + break; + + case 'done': + iterResult = { + directorState: event.data.directorState, + totalAgents: event.data.totalAgents, + agentHadContent: event.data.agentHadContent ?? true, + cueUserReceived, + }; + break; + + case 'error': + throw new Error(`API error: ${event.data.message}`); + } + }, + + onIterationEnd: async () => { + // Build assistant message for conversation history + if (currentMessageId && (textParts.length > 0 || actionParts.length > 0)) { + const parts: unknown[] = []; + if (textParts.length > 0) { + parts.push({ type: 'text', text: textParts.join('') }); + } + for (const ap of actionParts) { + parts.push({ ...ap, state: 'result', output: { success: true } }); + } + messages.push({ + role: 'assistant', + content: textParts.join(''), + parts, + metadata: { + senderName: currentAgentId || 'agent', + originalRole: 'agent', + agentId: currentAgentId, + createdAt: Date.now(), + }, + }); + } + + return iterResult; + }, + }, + controller.signal, + MAX_AGENT_TURNS, + ); + + // Checkpoint: capture + score + const isLastTurn = turnIdx === scenario.turns.length - 1; + if (turn.checkpoint || isLastTurn) { + const elements = stateManager.getWhiteboardElements(); + const screenshotFilename = `${scenario.id}_run${runIndex}_turn${turnIdx}.png`; + const screenshotPath = await captureWhiteboard(elements, runDir, screenshotFilename); + + console.log(` Captured: ${screenshotFilename} (${elements.length} elements)`); + + const score = await scoreScreenshot(screenshotPath, SCORER_MODEL); + + console.log(` Score: overall=${score.overall}, overlap=${score.overlap.score}`); + + checkpoints.push({ + turnIndex: turnIdx, + screenshotPath, + score, + elements, + }); + } + } + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + console.error(` Error: ${msg}`); + return { scenarioId: scenario.id, runIndex, model, checkpoints, error: msg }; + } finally { + stateManager.dispose(); + } + + return { scenarioId: scenario.id, runIndex, model, checkpoints }; +} + +// ==================== Main ==================== + +async function main() { + console.log('=== Whiteboard Layout Eval ==='); + console.log(`Chat: ${CHAT_MODEL} | Scorer: ${SCORER_MODEL} | Repeats: ${REPEAT}`); + console.log(''); + + const scenarios = loadScenarios(); + if (scenarios.length === 0) { + console.error('No scenarios found. Check eval/whiteboard-layout/scenarios/'); + process.exit(1); + } + console.log(`Loaded ${scenarios.length} scenario(s)`); + + // Create run directory: results/// + const sanitizedModel = CHAT_MODEL.replace(/[:/]/g, '-'); + const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19); + const runDir = join(OUTPUT_DIR, sanitizedModel, timestamp); + mkdirSync(runDir, { recursive: true }); + console.log(`Output: ${runDir}`); + + await initCapture(BASE_URL); + + const allResults: ScenarioRunResult[] = []; + + for (const scenario of scenarios) { + console.log(`\nScenario: ${scenario.name} (${scenario.id})`); + const repeats = scenario.repeat ?? REPEAT; + + for (let r = 0; r < repeats; r++) { + const result = await runScenario(scenario, r, runDir); + allResults.push(result); + } + } + + await closeCapture(); + + const report: EvalReport = { + timestamp: new Date().toISOString(), + model: CHAT_MODEL, + scenarios: allResults, + }; + + const { json, md } = generateReport(report, runDir); + console.log(`\nReport saved:`); + console.log(` JSON: ${json}`); + console.log(` Markdown: ${md}`); +} + +main().catch((err) => { + console.error('Fatal error:', err); + process.exit(1); +}); diff --git a/eval/whiteboard-layout/scenarios/econ-tech-innovation.json b/eval/whiteboard-layout/scenarios/econ-tech-innovation.json new file mode 100644 index 000000000..5b8a77f3c --- /dev/null +++ b/eval/whiteboard-layout/scenarios/econ-tech-innovation.json @@ -0,0 +1,92 @@ +{ + "id": "econ-tech-innovation", + "name": "Development Economics — Technology & Innovation", + "description": "qa模式,英文课程,chart+table并排布局测试", + "tags": ["economics", "qa", "single-agent", "en-US", "chart", "table"], + "initialStoreState": { + "stage": { + "id": "eval-econ-innovation", + "name": "Development Economics", + "createdAt": 1700000000, + "updatedAt": 1700000000, + "languageDirective": "en-US" + }, + "scenes": [ + { + "id": "sc-econ-1", + "stageId": "eval-econ-innovation", + "type": "slide", + "title": "Technology and Innovation", + "order": 0, + "content": { + "type": "slide", + "canvas": { + "id": "slide-0", + "viewportSize": 1000, + "viewportRatio": 0.5625, + "theme": { + "backgroundColor": "#ffffff", + "themeColors": ["#5b9bd5", "#ed7d31", "#a5a5a5", "#ffc000", "#4472c4"], + "fontColor": "#333333", + "fontName": "Microsoft YaHei" + }, + "elements": [ + { + "type": "text", + "id": "title-5", + "content": "

Technology Progress & Innovation

", + "left": 60, + "top": 40, + "width": 880, + "height": 70, + "rotate": 0, + "defaultFontName": "Microsoft YaHei", + "defaultColor": "#333333" + }, + { + "type": "text", + "id": "sub-5", + "content": "

Schumpeter's Creative Destruction Theory

", + "left": 80, + "top": 130, + "width": 500, + "height": 50, + "rotate": 0, + "defaultFontName": "Microsoft YaHei", + "defaultColor": "#333333" + }, + { + "type": "image", + "id": "img-econ", + "src": "https://placehold.co/400x300", + "left": 540, + "top": 120, + "width": 400, + "height": 280, + "rotate": 0, + "fixedRatio": true + } + ] + } + } + } + ], + "currentSceneId": "sc-econ-1" + }, + "config": { + "agentIds": ["default-1"], + "sessionType": "qa" + }, + "turns": [ + { + "userMessage": "Can you compare R&D intensity vs capital returns on the whiteboard?" + }, + { + "userMessage": "Add a table with specific examples", + "checkpoint": true + }, + { + "userMessage": "Now show the Silicon Valley innovation formula" + } + ] +} diff --git a/eval/whiteboard-layout/scenarios/finance-tax-architecture.json b/eval/whiteboard-layout/scenarios/finance-tax-architecture.json new file mode 100644 index 000000000..0b582d6d8 --- /dev/null +++ b/eval/whiteboard-layout/scenarios/finance-tax-architecture.json @@ -0,0 +1,197 @@ +{ + "id": "finance-tax-architecture", + "name": "企业财务 — 三层架构税务筹划", + "description": "qa模式,多agent讨论,表格+公式+形状混合白板", + "tags": ["finance", "qa", "multi-agent", "zh-CN", "table", "latex"], + "initialStoreState": { + "stage": { + "id": "eval-finance-tax", + "name": "企业财务战略", + "createdAt": 1700000000, + "updatedAt": 1700000000, + "languageDirective": "zh-CN" + }, + "scenes": [ + { + "id": "sc-fin-1", + "stageId": "eval-finance-tax", + "type": "slide", + "title": "企业架构与税务优化", + "order": 0, + "content": { + "type": "slide", + "canvas": { + "id": "slide-0", + "viewportSize": 1000, + "viewportRatio": 0.5625, + "theme": { + "backgroundColor": "#ffffff", + "themeColors": ["#5b9bd5", "#ed7d31", "#a5a5a5", "#ffc000", "#4472c4"], + "fontColor": "#333333", + "fontName": "Microsoft YaHei" + }, + "elements": [ + { + "type": "text", + "id": "title-3", + "content": "

家族公司+持股公司+业务子公司 三层架构

", + "left": 60, + "top": 40, + "width": 880, + "height": 70, + "rotate": 0, + "defaultFontName": "Microsoft YaHei", + "defaultColor": "#333333" + }, + { + "type": "shape", + "id": "box-1", + "viewBox": [1000, 1000], + "path": "M 0 0 L 1000 0 L 1000 1000 L 0 1000 Z", + "left": 60, + "top": 130, + "width": 280, + "height": 120, + "rotate": 0, + "fill": "#E3F2FD", + "fixedRatio": false + }, + { + "type": "text", + "id": "label-1", + "content": "

家族公司

", + "left": 100, + "top": 170, + "width": 200, + "height": 40, + "rotate": 0, + "defaultFontName": "Microsoft YaHei", + "defaultColor": "#333333" + }, + { + "type": "shape", + "id": "box-2", + "viewBox": [1000, 1000], + "path": "M 0 0 L 1000 0 L 1000 1000 L 0 1000 Z", + "left": 360, + "top": 130, + "width": 280, + "height": 120, + "rotate": 0, + "fill": "#FFF3E0", + "fixedRatio": false + }, + { + "type": "text", + "id": "label-2", + "content": "

持股公司

", + "left": 400, + "top": 170, + "width": 200, + "height": 40, + "rotate": 0, + "defaultFontName": "Microsoft YaHei", + "defaultColor": "#333333" + }, + { + "type": "shape", + "id": "box-3", + "viewBox": [1000, 1000], + "path": "M 0 0 L 1000 0 L 1000 1000 L 0 1000 Z", + "left": 660, + "top": 130, + "width": 280, + "height": 120, + "rotate": 0, + "fill": "#E8F5E9", + "fixedRatio": false + }, + { + "type": "text", + "id": "label-3", + "content": "

业务子公司

", + "left": 700, + "top": 170, + "width": 200, + "height": 40, + "rotate": 0, + "defaultFontName": "Microsoft YaHei", + "defaultColor": "#333333" + } + ] + } + } + } + ], + "currentSceneId": "sc-fin-1" + }, + "config": { + "agentIds": ["gen-teacher-01", "gen-assistant-01"], + "sessionType": "qa", + "agentConfigs": [ + { + "id": "gen-teacher-01", + "name": "林教授", + "role": "teacher", + "persona": "严谨认真的林教授,善于用白板辅助讲解。", + "avatar": "👨‍🏫", + "color": "#4A90D9", + "allowedActions": [ + "wb_open", + "wb_close", + "wb_clear", + "wb_delete", + "wb_draw_text", + "wb_draw_shape", + "wb_draw_chart", + "wb_draw_latex", + "wb_draw_table", + "wb_draw_line", + "spotlight", + "laser" + ], + "priority": 10 + }, + { + "id": "gen-assistant-01", + "name": "小雅", + "role": "assistant", + "persona": "热情活泼的小雅,负责补充老师遗漏的要点。", + "avatar": "🧑‍💼", + "color": "#E8913A", + "allowedActions": [ + "wb_open", + "wb_close", + "wb_clear", + "wb_delete", + "wb_draw_text", + "wb_draw_shape", + "wb_draw_chart", + "wb_draw_latex", + "wb_draw_table", + "wb_draw_line" + ], + "priority": 7 + } + ] + }, + "turns": [ + { + "userMessage": "工资和分红在税务上有什么区别?" + }, + { + "userMessage": "发奖金也是工资薪金吧,分红是分红", + "checkpoint": true + }, + { + "userMessage": "那家族公司到底怎么省税的" + }, + { + "userMessage": "确实心疼", + "checkpoint": true + }, + { + "userMessage": "搞明白了,那IPO有什么影响" + } + ] +} diff --git a/eval/whiteboard-layout/scenarios/math-quadratic-inequality.json b/eval/whiteboard-layout/scenarios/math-quadratic-inequality.json new file mode 100644 index 000000000..6282910ad --- /dev/null +++ b/eval/whiteboard-layout/scenarios/math-quadratic-inequality.json @@ -0,0 +1,100 @@ +{ + "id": "math-quadratic-inequality", + "name": "高中数学 — 二次函数与不等式", + "description": "qa模式,单agent,用户追问驱动公式推导和图表绘制", + "tags": ["math", "qa", "single-agent", "zh-CN", "latex"], + "initialStoreState": { + "stage": { + "id": "eval-math-quadratic", + "name": "高中数学函数", + "createdAt": 1700000000, + "updatedAt": 1700000000, + "languageDirective": "zh-CN" + }, + "scenes": [ + { + "id": "sc-math-1", + "stageId": "eval-math-quadratic", + "type": "slide", + "title": "二次函数与一元二次不等式", + "order": 0, + "content": { + "type": "slide", + "canvas": { + "id": "slide-0", + "viewportSize": 1000, + "viewportRatio": 0.5625, + "theme": { + "backgroundColor": "#ffffff", + "themeColors": ["#5b9bd5", "#ed7d31", "#a5a5a5", "#ffc000", "#4472c4"], + "fontColor": "#333333", + "fontName": "Microsoft YaHei" + }, + "elements": [ + { + "type": "text", + "id": "title-2", + "content": "

二次函数与一元二次不等式

", + "left": 60, + "top": 40, + "width": 880, + "height": 70, + "rotate": 0, + "defaultFontName": "Microsoft YaHei", + "defaultColor": "#333333" + }, + { + "type": "text", + "id": "def-1", + "content": "

一元二次不等式 ax²+bx+c>0 的解集

", + "left": 80, + "top": 140, + "width": 500, + "height": 50, + "rotate": 0, + "defaultFontName": "Microsoft YaHei", + "defaultColor": "#333333" + }, + { + "type": "text", + "id": "def-2", + "content": "

与二次函数 y=ax²+bx+c 的图像关系

", + "left": 80, + "top": 200, + "width": 500, + "height": 50, + "rotate": 0, + "defaultFontName": "Microsoft YaHei", + "defaultColor": "#333333" + } + ] + } + } + } + ], + "currentSceneId": "sc-math-1" + }, + "config": { + "agentIds": ["default-1"], + "sessionType": "qa" + }, + "turns": [ + { + "userMessage": "能在白板上推导一下 x²-5x+6>0 怎么解吗" + }, + { + "userMessage": "嗯,然后呢", + "checkpoint": true + }, + { + "userMessage": "那如果是小于零呢" + }, + { + "userMessage": "画个图看看", + "checkpoint": true + }, + { + "userMessage": "韦达定理也写一下" + } + ] +} diff --git a/eval/whiteboard-layout/scenarios/med-gcp-compliance.json b/eval/whiteboard-layout/scenarios/med-gcp-compliance.json new file mode 100644 index 000000000..04470301c --- /dev/null +++ b/eval/whiteboard-layout/scenarios/med-gcp-compliance.json @@ -0,0 +1,150 @@ +{ + "id": "med-gcp-compliance", + "name": "临床医学 — GCP合规与风险监查", + "description": "discussion模式,紧凑递进式白板布局", + "tags": ["medical", "discussion", "multi-agent", "zh-CN"], + "initialStoreState": { + "stage": { + "id": "eval-med-gcp", + "name": "临床试验GCP", + "createdAt": 1700000000, + "updatedAt": 1700000000, + "languageDirective": "zh-CN" + }, + "scenes": [ + { + "id": "sc-med-1", + "stageId": "eval-med-gcp", + "type": "slide", + "title": "GCP合规要点", + "order": 0, + "content": { + "type": "slide", + "canvas": { + "id": "slide-0", + "viewportSize": 1000, + "viewportRatio": 0.5625, + "theme": { + "backgroundColor": "#ffffff", + "themeColors": ["#5b9bd5", "#ed7d31", "#a5a5a5", "#ffc000", "#4472c4"], + "fontColor": "#333333", + "fontName": "Microsoft YaHei" + }, + "elements": [ + { + "type": "text", + "id": "title-6", + "content": "

ICH-GCP 药物临床试验质量管理

", + "left": 60, + "top": 40, + "width": 880, + "height": 70, + "rotate": 0, + "defaultFontName": "Microsoft YaHei", + "defaultColor": "#333333" + }, + { + "type": "text", + "id": "p-1", + "content": "

传统核查 (SDV) vs 基于风险的监查 (RBM)

", + "left": 80, + "top": 140, + "width": 600, + "height": 50, + "rotate": 0, + "defaultFontName": "Microsoft YaHei", + "defaultColor": "#333333" + }, + { + "type": "text", + "id": "p-2", + "content": "

知情同意的电子化转型

", + "left": 80, + "top": 200, + "width": 600, + "height": 50, + "rotate": 0, + "defaultFontName": "Microsoft YaHei", + "defaultColor": "#333333" + } + ] + } + } + } + ], + "currentSceneId": "sc-med-1" + }, + "config": { + "agentIds": ["gen-teacher-01", "gen-assistant-01", "gen-student-张强"], + "sessionType": "discussion", + "triggerAgentId": "gen-student-张强", + "agentConfigs": [ + { + "id": "gen-teacher-01", + "name": "林教授", + "role": "teacher", + "persona": "严谨认真的林教授,善于用白板辅助讲解。", + "avatar": "👨‍🏫", + "color": "#4A90D9", + "allowedActions": [ + "wb_open", + "wb_close", + "wb_clear", + "wb_delete", + "wb_draw_text", + "wb_draw_shape", + "wb_draw_chart", + "wb_draw_latex", + "wb_draw_table", + "wb_draw_line", + "spotlight", + "laser" + ], + "priority": 10 + }, + { + "id": "gen-assistant-01", + "name": "苏助手", + "role": "assistant", + "persona": "热情活泼的苏助手,负责补充老师遗漏的要点。", + "avatar": "🧑‍💼", + "color": "#E8913A", + "allowedActions": [ + "wb_open", + "wb_close", + "wb_clear", + "wb_delete", + "wb_draw_text", + "wb_draw_shape", + "wb_draw_chart", + "wb_draw_latex", + "wb_draw_table", + "wb_draw_line" + ], + "priority": 7 + }, + { + "id": "gen-student-张强", + "name": "张强", + "role": "student", + "persona": "好奇心强的学生张强。临床医学专业", + "avatar": "🧑‍🎓", + "color": "#66BB6A", + "allowedActions": ["wb_open", "wb_draw_text", "wb_draw_latex"], + "priority": 3 + } + ] + }, + "turns": [ + { + "userMessage": "SDV和RBM到底有什么区别?" + }, + { + "userMessage": "嗯,那博弈点在哪", + "checkpoint": true + }, + { + "userMessage": "动态合规怎么理解" + } + ] +} diff --git a/eval/whiteboard-layout/scenarios/physics-force-decomposition.json b/eval/whiteboard-layout/scenarios/physics-force-decomposition.json new file mode 100644 index 000000000..2ff2d5a64 --- /dev/null +++ b/eval/whiteboard-layout/scenarios/physics-force-decomposition.json @@ -0,0 +1,191 @@ +{ + "id": "physics-force-decomposition", + "name": "初中物理 — 力的分解", + "description": "discussion模式,4个agent,用户短回复驱动多轮白板绘制", + "tags": ["physics", "discussion", "multi-agent", "zh-CN"], + "initialStoreState": { + "stage": { + "id": "eval-physics-forces", + "name": "初中物理力学", + "createdAt": 1700000000, + "updatedAt": 1700000000, + "languageDirective": "zh-CN" + }, + "scenes": [ + { + "id": "sc-phys-1", + "stageId": "eval-physics-forces", + "type": "slide", + "title": "力的合成与分解", + "order": 0, + "content": { + "type": "slide", + "canvas": { + "id": "slide-0", + "viewportSize": 1000, + "viewportRatio": 0.5625, + "theme": { + "backgroundColor": "#ffffff", + "themeColors": ["#5b9bd5", "#ed7d31", "#a5a5a5", "#ffc000", "#4472c4"], + "fontColor": "#333333", + "fontName": "Microsoft YaHei" + }, + "elements": [ + { + "type": "text", + "id": "title-1", + "content": "

力的合成与分解

", + "left": 60, + "top": 40, + "width": 880, + "height": 70, + "rotate": 0, + "defaultFontName": "Microsoft YaHei", + "defaultColor": "#333333" + }, + { + "type": "shape", + "id": "bg-1", + "viewBox": [1000, 1000], + "path": "M 0 0 L 1000 0 L 1000 1000 L 0 1000 Z", + "left": 60, + "top": 120, + "width": 880, + "height": 3, + "rotate": 0, + "fill": "#cccccc", + "fixedRatio": false + }, + { + "type": "text", + "id": "point-1", + "content": "

合力与分力的关系

", + "left": 80, + "top": 150, + "width": 400, + "height": 50, + "rotate": 0, + "defaultFontName": "Microsoft YaHei", + "defaultColor": "#333333" + }, + { + "type": "text", + "id": "point-2", + "content": "

平行四边形定则

", + "left": 80, + "top": 210, + "width": 400, + "height": 50, + "rotate": 0, + "defaultFontName": "Microsoft YaHei", + "defaultColor": "#333333" + }, + { + "type": "image", + "id": "img-1", + "src": "https://placehold.co/400x300", + "left": 540, + "top": 140, + "width": 380, + "height": 280, + "rotate": 0, + "fixedRatio": true + } + ] + } + } + } + ], + "currentSceneId": "sc-phys-1" + }, + "config": { + "agentIds": ["gen-teacher-01", "gen-assistant-01", "gen-student-小明", "gen-student-小红"], + "sessionType": "discussion", + "triggerAgentId": "gen-teacher-01", + "agentConfigs": [ + { + "id": "gen-teacher-01", + "name": "张老师", + "role": "teacher", + "persona": "严谨认真的张老师,善于用白板辅助讲解。", + "avatar": "👨‍🏫", + "color": "#4A90D9", + "allowedActions": [ + "wb_open", + "wb_close", + "wb_clear", + "wb_delete", + "wb_draw_text", + "wb_draw_shape", + "wb_draw_chart", + "wb_draw_latex", + "wb_draw_table", + "wb_draw_line", + "spotlight", + "laser" + ], + "priority": 10 + }, + { + "id": "gen-assistant-01", + "name": "小助手", + "role": "assistant", + "persona": "热情活泼的小助手,负责补充老师遗漏的要点。", + "avatar": "🧑‍💼", + "color": "#E8913A", + "allowedActions": [ + "wb_open", + "wb_close", + "wb_clear", + "wb_delete", + "wb_draw_text", + "wb_draw_shape", + "wb_draw_chart", + "wb_draw_latex", + "wb_draw_table", + "wb_draw_line" + ], + "priority": 7 + }, + { + "id": "gen-student-小明", + "name": "小明", + "role": "student", + "persona": "好奇心强的学生小明。", + "avatar": "🧑‍🎓", + "color": "#66BB6A", + "allowedActions": ["wb_open", "wb_draw_text", "wb_draw_latex"], + "priority": 3 + }, + { + "id": "gen-student-小红", + "name": "小红", + "role": "student", + "persona": "好奇心强的学生小红。喜欢提问", + "avatar": "🧑‍🎓", + "color": "#66BB6A", + "allowedActions": ["wb_open", "wb_draw_text", "wb_draw_latex"], + "priority": 3 + } + ] + }, + "turns": [ + { + "userMessage": "怎么把一个力分成两个力啊?" + }, + { + "userMessage": "嗯。", + "checkpoint": true + }, + { + "userMessage": "那个平行四边形怎么画?" + }, + { + "userMessage": "明白了。", + "checkpoint": true + }, + { + "userMessage": "斜面上的物体怎么分解?" + } + ] +} diff --git a/eval/whiteboard-layout/scenarios/primary-math-rotation.json b/eval/whiteboard-layout/scenarios/primary-math-rotation.json new file mode 100644 index 000000000..d07afdc24 --- /dev/null +++ b/eval/whiteboard-layout/scenarios/primary-math-rotation.json @@ -0,0 +1,144 @@ +{ + "id": "primary-math-rotation", + "name": "小学数学 — 图形旋转", + "description": "discussion模式,大量shape组合表示复杂图形,多次wb_clear", + "tags": ["math", "discussion", "multi-agent", "zh-CN", "shapes"], + "initialStoreState": { + "stage": { + "id": "eval-math-rotation", + "name": "小学数学图形", + "createdAt": 1700000000, + "updatedAt": 1700000000, + "languageDirective": "zh-CN" + }, + "scenes": [ + { + "id": "sc-rot-1", + "stageId": "eval-math-rotation", + "type": "slide", + "title": "图形的旋转", + "order": 0, + "content": { + "type": "slide", + "canvas": { + "id": "slide-0", + "viewportSize": 1000, + "viewportRatio": 0.5625, + "theme": { + "backgroundColor": "#ffffff", + "themeColors": ["#5b9bd5", "#ed7d31", "#a5a5a5", "#ffc000", "#4472c4"], + "fontColor": "#333333", + "fontName": "Microsoft YaHei" + }, + "elements": [ + { + "type": "text", + "id": "title-4", + "content": "

图形的旋转与对称

", + "left": 60, + "top": 40, + "width": 880, + "height": 70, + "rotate": 0, + "defaultFontName": "Microsoft YaHei", + "defaultColor": "#333333" + }, + { + "type": "image", + "id": "img-rot", + "src": "https://placehold.co/400x300", + "left": 300, + "top": 140, + "width": 400, + "height": 300, + "rotate": 0, + "fixedRatio": true + } + ] + } + } + } + ], + "currentSceneId": "sc-rot-1" + }, + "config": { + "agentIds": ["gen-teacher-01", "gen-assistant-01", "gen-student-乐乐"], + "sessionType": "discussion", + "triggerAgentId": "gen-teacher-01", + "agentConfigs": [ + { + "id": "gen-teacher-01", + "name": "高老师", + "role": "teacher", + "persona": "严谨认真的高老师,善于用白板辅助讲解。", + "avatar": "👨‍🏫", + "color": "#4A90D9", + "allowedActions": [ + "wb_open", + "wb_close", + "wb_clear", + "wb_delete", + "wb_draw_text", + "wb_draw_shape", + "wb_draw_chart", + "wb_draw_latex", + "wb_draw_table", + "wb_draw_line", + "spotlight", + "laser" + ], + "priority": 10 + }, + { + "id": "gen-assistant-01", + "name": "方块姐姐", + "role": "assistant", + "persona": "热情活泼的方块姐姐,负责补充老师遗漏的要点。", + "avatar": "🧑‍💼", + "color": "#E8913A", + "allowedActions": [ + "wb_open", + "wb_close", + "wb_clear", + "wb_delete", + "wb_draw_text", + "wb_draw_shape", + "wb_draw_chart", + "wb_draw_latex", + "wb_draw_table", + "wb_draw_line" + ], + "priority": 7 + }, + { + "id": "gen-student-乐乐", + "name": "乐乐", + "role": "student", + "persona": "好奇心强的学生乐乐。活泼好动", + "avatar": "🧑‍🎓", + "color": "#66BB6A", + "allowedActions": ["wb_open", "wb_draw_text", "wb_draw_latex"], + "priority": 3 + } + ] + }, + "turns": [ + { + "userMessage": "门的旋转中心在哪里?" + }, + { + "userMessage": "嗯", + "checkpoint": true + }, + { + "userMessage": "360度" + }, + { + "userMessage": "嗯嗯,对", + "checkpoint": true + }, + { + "userMessage": "左转两次等于右转两次吗" + } + ] +} diff --git a/eval/whiteboard-layout/scorer.ts b/eval/whiteboard-layout/scorer.ts new file mode 100644 index 000000000..298f7fdcb --- /dev/null +++ b/eval/whiteboard-layout/scorer.ts @@ -0,0 +1,91 @@ +/** + * VLM Scorer for whiteboard layout quality. + * + * Uses the project's LLM infrastructure (resolveModel + generateText from AI SDK) + * so model configuration follows the same `provider:model` convention as the rest + * of the codebase. Supports all providers (OpenAI, Google, Anthropic, etc.). + * + * Environment variable: EVAL_SCORER_MODEL (default: openai:gpt-4o) + */ + +import { readFileSync } from 'fs'; +import { generateText } from 'ai'; +import { resolveModel } from '@/lib/server/resolve-model'; +import type { VlmScore } from './types'; + +const SCORER_MODEL_DEFAULT = 'openai:gpt-4o'; + +const RUBRIC_PROMPT = `You are a whiteboard layout quality reviewer. Evaluate the whiteboard screenshot below. + +Score each dimension from 1 to 10: + +1. readability: Is text clearly legible? Are font sizes appropriate? Are LaTeX formulas fully rendered? +2. overlap: Do any elements occlude or overlap each other? 10 = no overlap, 1 = severe occlusion. +3. space_utilization: Is content reasonably distributed across the canvas? Penalize clustering in one area or excessive empty space. +4. layout_logic: Are related elements grouped together? Is there a clear visual hierarchy and reading order? + +Output ONLY a JSON object with this structure: +{"readability":{"score":N,"reason":"..."},"overlap":{"score":N,"reason":"..."},"space_utilization":{"score":N,"reason":"..."},"layout_logic":{"score":N,"reason":"..."},"overall":N,"issues":["..."]}`; + +/** + * Score a whiteboard screenshot using a VLM. + * + * Model is resolved via EVAL_SCORER_MODEL env var or the provided modelString, + * using the same resolveModel() infrastructure as the rest of the project. + */ +export async function scoreScreenshot( + screenshotPath: string, + modelString?: string, +): Promise { + const imageBuffer = readFileSync(screenshotPath); + + const { model } = await resolveModel({ + modelString: modelString || process.env.EVAL_SCORER_MODEL || SCORER_MODEL_DEFAULT, + }); + + const result = await generateText({ + model, + messages: [ + { + role: 'user', + content: [ + { type: 'text', text: RUBRIC_PROMPT }, + { type: 'image', image: imageBuffer }, + ], + }, + ], + temperature: 0, + maxOutputTokens: 1000, + }); + + const content = result.text; + + // Extract JSON from response (may be wrapped in markdown code fences) + const jsonMatch = content.match(/\{[\s\S]*\}/); + if (!jsonMatch) { + throw new Error(`VLM returned non-JSON response: ${content.slice(0, 200)}`); + } + + const raw = JSON.parse(jsonMatch[0]); + + // Validate required fields + const dimensions = ['readability', 'overlap', 'space_utilization', 'layout_logic'] as const; + for (const dim of dimensions) { + if (!raw[dim] || typeof raw[dim].score !== 'number') { + throw new Error(`VLM response missing or invalid dimension: ${dim}`); + } + } + if (typeof raw.overall !== 'number') { + throw new Error('VLM response missing overall score'); + } + + const score: VlmScore = { + readability: raw.readability, + overlap: raw.overlap, + space_utilization: raw.space_utilization, + layout_logic: raw.layout_logic, + overall: raw.overall, + issues: Array.isArray(raw.issues) ? raw.issues : [], + }; + return score; +} diff --git a/eval/whiteboard-layout/state-manager.ts b/eval/whiteboard-layout/state-manager.ts new file mode 100644 index 000000000..04d2e543f --- /dev/null +++ b/eval/whiteboard-layout/state-manager.ts @@ -0,0 +1,100 @@ +import { useStageStore } from '@/lib/store/stage'; +import { useCanvasStore } from '@/lib/store/canvas'; +import { useWhiteboardHistoryStore } from '@/lib/store/whiteboard-history'; +import { ActionEngine } from '@/lib/action/engine'; +import type { Action } from '@/lib/types/action'; +import type { PPTElement } from '@/lib/types/slides'; +import type { Stage, Scene } from '@/lib/types/stage'; + +interface InitialState { + stage: Stage | null; + scenes: Scene[]; + currentSceneId: string | null; + whiteboardElements?: PPTElement[]; +} + +/** + * Manages headless Zustand stores + ActionEngine for eval. + * + * Zustand stores are singletons (module-level). We reset them + * for each scenario via setState(). ActionEngine reads/writes + * these same stores — no simulation drift. + */ +export class EvalStateManager { + private actionEngine: ActionEngine; + + constructor(initial: InitialState) { + // Reset stores to clean state + useCanvasStore.setState({ + whiteboardOpen: false, + whiteboardClearing: false, + }); + useWhiteboardHistoryStore.setState({ snapshots: [] }); + + // Build stage with optional pre-existing whiteboard elements + const now = Date.now(); + const stage: Stage = initial.stage ?? { + id: 'eval-stage', + name: 'Eval Stage', + languageDirective: 'en-US', + createdAt: now, + updatedAt: now, + }; + + // If pre-existing whiteboard elements provided, seed the whiteboard + if (initial.whiteboardElements && initial.whiteboardElements.length > 0) { + stage.whiteboard = [ + { + id: 'eval-whiteboard', + viewportSize: 1000, + viewportRatio: 16 / 9, + elements: initial.whiteboardElements, + background: { type: 'solid', color: '#ffffff' }, + animations: [], + }, + ]; + } + + useStageStore.setState({ + stage, + scenes: initial.scenes, + currentSceneId: initial.currentSceneId, + mode: 'autonomous', + }); + + // ActionEngine takes the store module as its StageStore argument + this.actionEngine = new ActionEngine(useStageStore); + } + + async executeAction(action: Action): Promise { + await this.actionEngine.execute(action); + } + + getStoreState(): { + stage: Stage | null; + scenes: Scene[]; + currentSceneId: string | null; + mode: string; + whiteboardOpen: boolean; + } { + const s = useStageStore.getState(); + return { + stage: s.stage, + scenes: s.scenes, + currentSceneId: s.currentSceneId, + mode: s.mode, + whiteboardOpen: useCanvasStore.getState().whiteboardOpen, + }; + } + + getWhiteboardElements(): PPTElement[] { + const stage = useStageStore.getState().stage; + if (!stage?.whiteboard || stage.whiteboard.length === 0) return []; + const lastWb = stage.whiteboard[stage.whiteboard.length - 1]; + return lastWb.elements ?? []; + } + + dispose(): void { + this.actionEngine.dispose(); + } +} diff --git a/eval/whiteboard-layout/types.ts b/eval/whiteboard-layout/types.ts new file mode 100644 index 000000000..cedfac7d4 --- /dev/null +++ b/eval/whiteboard-layout/types.ts @@ -0,0 +1,68 @@ +import type { PPTElement } from '@/lib/types/slides'; +import type { Stage, Scene } from '@/lib/types/stage'; + +// ==================== Scenario ==================== + +export interface EvalTurn { + userMessage: string; + checkpoint?: boolean; +} + +export interface EvalScenario { + id: string; + name: string; + description: string; + tags: string[]; + initialStoreState: { + stage: Stage | null; + scenes: Scene[]; + currentSceneId: string | null; + whiteboardElements?: PPTElement[]; + }; + config: { + agentIds: string[]; + sessionType: 'qa' | 'discussion'; + }; + turns: EvalTurn[]; + model?: string; + repeat?: number; +} + +// ==================== Scoring ==================== + +export interface DimensionScore { + score: number; + reason: string; +} + +export interface VlmScore { + readability: DimensionScore; + overlap: DimensionScore; + space_utilization: DimensionScore; + layout_logic: DimensionScore; + overall: number; + issues: string[]; +} + +// ==================== Results ==================== + +export interface CheckpointResult { + turnIndex: number; + screenshotPath: string; + score: VlmScore; + elements: PPTElement[]; +} + +export interface ScenarioRunResult { + scenarioId: string; + runIndex: number; + model: string; + checkpoints: CheckpointResult[]; + error?: string; +} + +export interface EvalReport { + timestamp: string; + model: string; + scenarios: ScenarioRunResult[]; +} diff --git a/lib/chat/agent-loop.ts b/lib/chat/agent-loop.ts new file mode 100644 index 000000000..5301f3b1e --- /dev/null +++ b/lib/chat/agent-loop.ts @@ -0,0 +1,223 @@ +/** + * Agent Loop — Shared core logic for the frontend-driven multi-agent loop. + * + * Extracted from use-chat-sessions.ts so both the frontend hook and the + * eval harness share the same loop logic. No React dependency — pure + * async function with callback injection for environment-specific behavior. + * + * The loop runs per-user-message: the director dispatches agents one at a + * time, each agent generates a response, and the loop continues until the + * director says END, cues the user, or maxTurns is reached. + */ + +import type { StatelessEvent, DirectorState } from '@/lib/types/chat'; +import { createLogger } from '@/lib/logger'; + +const log = createLogger('AgentLoop'); + +// ==================== Types ==================== + +/** Store state snapshot sent with each /api/chat request */ +export interface AgentLoopStoreState { + stage: unknown; + scenes: unknown[]; + currentSceneId: string | null; + mode: string; + whiteboardOpen: boolean; +} + +/** Request template — fields that stay constant across loop iterations */ +export interface AgentLoopRequest { + config: { + agentIds: string[]; + sessionType?: string; + agentConfigs?: Record[]; + [key: string]: unknown; + }; + userProfile?: { nickname?: string; bio?: string }; + apiKey: string; + baseUrl?: string; + model?: string; + providerType?: string; +} + +/** Per-iteration outcome extracted from the done event */ +export interface AgentLoopIterationResult { + directorState?: DirectorState; + totalAgents: number; + agentHadContent: boolean; + cueUserReceived: boolean; +} + +/** Callbacks injected by the caller (frontend or eval) */ +export interface AgentLoopCallbacks { + /** Get fresh store state for each iteration (whiteboard may have changed) */ + getStoreState: () => AgentLoopStoreState; + + /** Get current messages for the request */ + getMessages: () => unknown[]; + + /** + * Make the HTTP request to /api/chat. + * Returns a Response object (or equivalent with .body ReadableStream). + */ + fetchChat: (body: Record, signal: AbortSignal) => Promise; + + /** + * Process a single SSE event. Called for every event in the stream. + * The callback should handle action execution, text accumulation, + * message construction, and UI updates. + */ + onEvent: (event: StatelessEvent) => void; + + /** + * Called after all SSE events for one iteration have been processed + * and the stream is closed. + * + * Must return the iteration result (extracted from the 'done' event). + * The frontend waits for buffer drain here before reading the result + * from loopDoneDataRef. The eval harness returns a result it + * accumulated during onEvent calls. + */ + onIterationEnd: () => Promise; +} + +/** Final outcome of the agent loop */ +export interface AgentLoopOutcome { + /** Why the loop stopped */ + reason: 'end' | 'cue_user' | 'max_turns' | 'aborted' | 'empty_turns' | 'no_done'; + /** Accumulated director state */ + directorState?: DirectorState; + /** Number of iterations completed */ + turnCount: number; +} + +// ==================== Core Loop ==================== + +/** + * Run the agent loop — shared between frontend and eval. + * + * Each iteration: refresh state → POST /api/chat → process SSE events + * → check exit conditions → repeat. + */ +export async function runAgentLoop( + request: AgentLoopRequest, + callbacks: AgentLoopCallbacks, + signal: AbortSignal, + maxTurns: number, +): Promise { + let directorState: DirectorState | undefined = undefined; + let turnCount = 0; + let consecutiveEmptyTurns = 0; + + while (turnCount < maxTurns) { + if (signal.aborted) { + return { reason: 'aborted', directorState, turnCount }; + } + + // Refresh store state each iteration — agent actions may have changed + // whiteboard, scene, or mode between turns + const freshStoreState = callbacks.getStoreState(); + const currentMessages = callbacks.getMessages(); + + // Build request body + const body: Record = { + messages: currentMessages, + storeState: freshStoreState, + config: request.config, + directorState, + userProfile: request.userProfile, + apiKey: request.apiKey, + baseUrl: request.baseUrl, + model: request.model, + providerType: request.providerType, + }; + + // Fetch + const response = await callbacks.fetchChat(body, signal); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`API error: ${response.status} - ${errorText}`); + } + + // Parse SSE stream and process events + const reader = response.body?.getReader(); + if (!reader) throw new Error('No response body'); + + const decoder = new TextDecoder(); + let sseBuffer = ''; + + try { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + sseBuffer += decoder.decode(value, { stream: true }); + const parts = sseBuffer.split('\n\n'); + sseBuffer = parts.pop() || ''; + + for (const part of parts) { + const line = part.trim(); + if (!line.startsWith('data: ')) continue; + + try { + const event: StatelessEvent = JSON.parse(line.slice(6)); + callbacks.onEvent(event); + } catch { + // Skip malformed events (heartbeats, etc.) + } + } + } + } finally { + reader.releaseLock(); + } + + if (signal.aborted) { + return { reason: 'aborted', directorState, turnCount }; + } + + // Post-iteration: wait for buffer drain (frontend) or collect results (eval) + const iterationResult = await callbacks.onIterationEnd(); + + // Check exit conditions + if (!iterationResult) { + return { reason: 'no_done', directorState, turnCount }; + } + + // Update accumulated director state + directorState = iterationResult.directorState; + turnCount = directorState?.turnCount ?? turnCount + 1; + + // Director said USER — stop loop + if (iterationResult.cueUserReceived) { + return { reason: 'cue_user', directorState, turnCount }; + } + + // Director said END — no agent spoke + if (iterationResult.totalAgents === 0) { + return { reason: 'end', directorState, turnCount }; + } + + // Track consecutive empty responses + if (!iterationResult.agentHadContent) { + consecutiveEmptyTurns++; + if (consecutiveEmptyTurns >= 2) { + log.warn( + `[AgentLoop] ${consecutiveEmptyTurns} consecutive empty agent responses, stopping loop`, + ); + return { reason: 'empty_turns', directorState, turnCount }; + } + } else { + consecutiveEmptyTurns = 0; + } + + turnCount++; + } + + // maxTurns reached + if (turnCount >= maxTurns) { + log.info(`[AgentLoop] Max turns (${maxTurns}) reached`); + } + return { reason: 'max_turns', directorState, turnCount }; +} diff --git a/package.json b/package.json index c27dba7ce..f04131bee 100644 --- a/package.json +++ b/package.json @@ -16,7 +16,8 @@ "format": "prettier . --write", "test": "vitest run", "test:e2e": "playwright test", - "test:e2e:ui": "playwright test --ui" + "test:e2e:ui": "playwright test --ui", + "eval:whiteboard": "tsx eval/whiteboard-layout/runner.ts" }, "dependencies": { "@ai-sdk/anthropic": "^3.0.23", @@ -120,6 +121,7 @@ "rollup-plugin-typescript2": "^0.36.0", "tailwindcss": "^4", "tslib": "^2.8.0", + "tsx": "^4.21.0", "typescript": "^5", "vitest": "^4.1.0", "vue-to-react": "^1.0.0" diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7592f157b..6a886eb78 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -290,7 +290,7 @@ importers: version: 9.39.4(jiti@2.6.1) eslint-config-next: specifier: 16.1.2 - version: 16.1.2(@typescript-eslint/parser@8.57.0(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3) + version: 16.1.2(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3) prettier: specifier: 3.8.1 version: 3.8.1 @@ -306,12 +306,15 @@ importers: tslib: specifier: ^2.8.0 version: 2.8.1 + tsx: + specifier: ^4.21.0 + version: 4.21.0 typescript: specifier: ^5 version: 5.9.3 vitest: specifier: ^4.1.0 - version: 4.1.0(@opentelemetry/api@1.9.0)(@types/node@20.19.37)(msw@2.12.10(@types/node@20.19.37)(typescript@5.9.3))(vite@8.0.0(@types/node@20.19.37)(jiti@2.6.1)(yaml@2.8.2)) + version: 4.1.0(@opentelemetry/api@1.9.0)(@types/node@20.19.37)(msw@2.12.10(@types/node@20.19.37)(typescript@5.9.3))(vite@8.0.0(@types/node@20.19.37)(esbuild@0.27.7)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2)) vue-to-react: specifier: ^1.0.0 version: 1.0.0 @@ -896,6 +899,162 @@ packages: resolution: {integrity: sha512-CsFmA3u3c2QoLDTfEpGr4t25fjMU31nyvse7IzWTvb0ZycuPjMjb0fjlheh+PbhBYb9YLugnT2uY6Mwcg1o+Zg==} engines: {node: '>=18.0.0'} + '@esbuild/aix-ppc64@0.27.7': + resolution: {integrity: sha512-EKX3Qwmhz1eMdEJokhALr0YiD0lhQNwDqkPYyPhiSwKrh7/4KRjQc04sZ8db+5DVVnZ1LmbNDI1uAMPEUBnQPg==} + engines: {node: '>=18'} + cpu: [ppc64] + os: [aix] + + '@esbuild/android-arm64@0.27.7': + resolution: {integrity: sha512-62dPZHpIXzvChfvfLJow3q5dDtiNMkwiRzPylSCfriLvZeq0a1bWChrGx/BbUbPwOrsWKMn8idSllklzBy+dgQ==} + engines: {node: '>=18'} + cpu: [arm64] + os: [android] + + '@esbuild/android-arm@0.27.7': + resolution: {integrity: sha512-jbPXvB4Yj2yBV7HUfE2KHe4GJX51QplCN1pGbYjvsyCZbQmies29EoJbkEc+vYuU5o45AfQn37vZlyXy4YJ8RQ==} + engines: {node: '>=18'} + cpu: [arm] + os: [android] + + '@esbuild/android-x64@0.27.7': + resolution: {integrity: sha512-x5VpMODneVDb70PYV2VQOmIUUiBtY3D3mPBG8NxVk5CogneYhkR7MmM3yR/uMdITLrC1ml/NV1rj4bMJuy9MCg==} + engines: {node: '>=18'} + cpu: [x64] + os: [android] + + '@esbuild/darwin-arm64@0.27.7': + resolution: {integrity: sha512-5lckdqeuBPlKUwvoCXIgI2D9/ABmPq3Rdp7IfL70393YgaASt7tbju3Ac+ePVi3KDH6N2RqePfHnXkaDtY9fkw==} + engines: {node: '>=18'} + cpu: [arm64] + os: [darwin] + + '@esbuild/darwin-x64@0.27.7': + resolution: {integrity: sha512-rYnXrKcXuT7Z+WL5K980jVFdvVKhCHhUwid+dDYQpH+qu+TefcomiMAJpIiC2EM3Rjtq0sO3StMV/+3w3MyyqQ==} + engines: {node: '>=18'} + cpu: [x64] + os: [darwin] + + '@esbuild/freebsd-arm64@0.27.7': + resolution: {integrity: sha512-B48PqeCsEgOtzME2GbNM2roU29AMTuOIN91dsMO30t+Ydis3z/3Ngoj5hhnsOSSwNzS+6JppqWsuhTp6E82l2w==} + engines: {node: '>=18'} + cpu: [arm64] + os: [freebsd] + + '@esbuild/freebsd-x64@0.27.7': + resolution: {integrity: sha512-jOBDK5XEjA4m5IJK3bpAQF9/Lelu/Z9ZcdhTRLf4cajlB+8VEhFFRjWgfy3M1O4rO2GQ/b2dLwCUGpiF/eATNQ==} + engines: {node: '>=18'} + cpu: [x64] + os: [freebsd] + + '@esbuild/linux-arm64@0.27.7': + resolution: {integrity: sha512-RZPHBoxXuNnPQO9rvjh5jdkRmVizktkT7TCDkDmQ0W2SwHInKCAV95GRuvdSvA7w4VMwfCjUiPwDi0ZO6Nfe9A==} + engines: {node: '>=18'} + cpu: [arm64] + os: [linux] + + '@esbuild/linux-arm@0.27.7': + resolution: {integrity: sha512-RkT/YXYBTSULo3+af8Ib0ykH8u2MBh57o7q/DAs3lTJlyVQkgQvlrPTnjIzzRPQyavxtPtfg0EopvDyIt0j1rA==} + engines: {node: '>=18'} + cpu: [arm] + os: [linux] + + '@esbuild/linux-ia32@0.27.7': + resolution: {integrity: sha512-GA48aKNkyQDbd3KtkplYWT102C5sn/EZTY4XROkxONgruHPU72l+gW+FfF8tf2cFjeHaRbWpOYa/uRBz/Xq1Pg==} + engines: {node: '>=18'} + cpu: [ia32] + os: [linux] + + '@esbuild/linux-loong64@0.27.7': + resolution: {integrity: sha512-a4POruNM2oWsD4WKvBSEKGIiWQF8fZOAsycHOt6JBpZ+JN2n2JH9WAv56SOyu9X5IqAjqSIPTaJkqN8F7XOQ5Q==} + engines: {node: '>=18'} + cpu: [loong64] + os: [linux] + + '@esbuild/linux-mips64el@0.27.7': + resolution: {integrity: sha512-KabT5I6StirGfIz0FMgl1I+R1H73Gp0ofL9A3nG3i/cYFJzKHhouBV5VWK1CSgKvVaG4q1RNpCTR2LuTVB3fIw==} + engines: {node: '>=18'} + cpu: [mips64el] + os: [linux] + + '@esbuild/linux-ppc64@0.27.7': + resolution: {integrity: sha512-gRsL4x6wsGHGRqhtI+ifpN/vpOFTQtnbsupUF5R5YTAg+y/lKelYR1hXbnBdzDjGbMYjVJLJTd2OFmMewAgwlQ==} + engines: {node: '>=18'} + cpu: [ppc64] + os: [linux] + + '@esbuild/linux-riscv64@0.27.7': + resolution: {integrity: sha512-hL25LbxO1QOngGzu2U5xeXtxXcW+/GvMN3ejANqXkxZ/opySAZMrc+9LY/WyjAan41unrR3YrmtTsUpwT66InQ==} + engines: {node: '>=18'} + cpu: [riscv64] + os: [linux] + + '@esbuild/linux-s390x@0.27.7': + resolution: {integrity: sha512-2k8go8Ycu1Kb46vEelhu1vqEP+UeRVj2zY1pSuPdgvbd5ykAw82Lrro28vXUrRmzEsUV0NzCf54yARIK8r0fdw==} + engines: {node: '>=18'} + cpu: [s390x] + os: [linux] + + '@esbuild/linux-x64@0.27.7': + resolution: {integrity: sha512-hzznmADPt+OmsYzw1EE33ccA+HPdIqiCRq7cQeL1Jlq2gb1+OyWBkMCrYGBJ+sxVzve2ZJEVeePbLM2iEIZSxA==} + engines: {node: '>=18'} + cpu: [x64] + os: [linux] + + '@esbuild/netbsd-arm64@0.27.7': + resolution: {integrity: sha512-b6pqtrQdigZBwZxAn1UpazEisvwaIDvdbMbmrly7cDTMFnw/+3lVxxCTGOrkPVnsYIosJJXAsILG9XcQS+Yu6w==} + engines: {node: '>=18'} + cpu: [arm64] + os: [netbsd] + + '@esbuild/netbsd-x64@0.27.7': + resolution: {integrity: sha512-OfatkLojr6U+WN5EDYuoQhtM+1xco+/6FSzJJnuWiUw5eVcicbyK3dq5EeV/QHT1uy6GoDhGbFpprUiHUYggrw==} + engines: {node: '>=18'} + cpu: [x64] + os: [netbsd] + + '@esbuild/openbsd-arm64@0.27.7': + resolution: {integrity: sha512-AFuojMQTxAz75Fo8idVcqoQWEHIXFRbOc1TrVcFSgCZtQfSdc1RXgB3tjOn/krRHENUB4j00bfGjyl2mJrU37A==} + engines: {node: '>=18'} + cpu: [arm64] + os: [openbsd] + + '@esbuild/openbsd-x64@0.27.7': + resolution: {integrity: sha512-+A1NJmfM8WNDv5CLVQYJ5PshuRm/4cI6WMZRg1by1GwPIQPCTs1GLEUHwiiQGT5zDdyLiRM/l1G0Pv54gvtKIg==} + engines: {node: '>=18'} + cpu: [x64] + os: [openbsd] + + '@esbuild/openharmony-arm64@0.27.7': + resolution: {integrity: sha512-+KrvYb/C8zA9CU/g0sR6w2RBw7IGc5J2BPnc3dYc5VJxHCSF1yNMxTV5LQ7GuKteQXZtspjFbiuW5/dOj7H4Yw==} + engines: {node: '>=18'} + cpu: [arm64] + os: [openharmony] + + '@esbuild/sunos-x64@0.27.7': + resolution: {integrity: sha512-ikktIhFBzQNt/QDyOL580ti9+5mL/YZeUPKU2ivGtGjdTYoqz6jObj6nOMfhASpS4GU4Q/Clh1QtxWAvcYKamA==} + engines: {node: '>=18'} + cpu: [x64] + os: [sunos] + + '@esbuild/win32-arm64@0.27.7': + resolution: {integrity: sha512-7yRhbHvPqSpRUV7Q20VuDwbjW5kIMwTHpptuUzV+AA46kiPze5Z7qgt6CLCK3pWFrHeNfDd1VKgyP4O+ng17CA==} + engines: {node: '>=18'} + cpu: [arm64] + os: [win32] + + '@esbuild/win32-ia32@0.27.7': + resolution: {integrity: sha512-SmwKXe6VHIyZYbBLJrhOoCJRB/Z1tckzmgTLfFYOfpMAx63BJEaL9ExI8x7v0oAO3Zh6D/Oi1gVxEYr5oUCFhw==} + engines: {node: '>=18'} + cpu: [ia32] + os: [win32] + + '@esbuild/win32-x64@0.27.7': + resolution: {integrity: sha512-56hiAJPhwQ1R4i+21FVF7V8kSD5zZTdHcVuRFMW0hn753vVfQN8xlx4uOPT4xoGH0Z/oVATuR82AiqSTDIpaHg==} + engines: {node: '>=18'} + cpu: [x64] + os: [win32] + '@eslint-community/eslint-utils@4.9.1': resolution: {integrity: sha512-phrYmNiYppR7znFEdqgfWHXR6NCkZEK7hwWDHZUjit/2/U0r6XvkDl0SYnoM51Hq7FhCGdLDT6zxCCOY1hexsQ==} engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} @@ -4681,6 +4840,11 @@ packages: es6-weak-map@2.0.3: resolution: {integrity: sha512-p5um32HOTO1kP+w7PRnB+5lQ43Z6muuMuIMffvDN8ZB4GcnjLBV6zGStpbASIMk4DCAvEaamhe2zhyCb/QXXsA==} + esbuild@0.27.7: + resolution: {integrity: sha512-IxpibTjyVnmrIQo5aqNpCgoACA/dTKLTlhMHihVHhdkxKyPO1uBBthumT0rdHmcsk9uMonIWS0m4FljWzILh3w==} + engines: {node: '>=18'} + hasBin: true + escalade@3.2.0: resolution: {integrity: sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==} engines: {node: '>=6'} @@ -8627,6 +8791,11 @@ packages: tslib@2.8.1: resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==} + tsx@4.21.0: + resolution: {integrity: sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==} + engines: {node: '>=18.0.0'} + hasBin: true + tw-animate-css@1.4.0: resolution: {integrity: sha512-7bziOlRqH0hJx80h/3mbicLW7o8qLsH5+RaLR2t+OHM3D0JlWGODQKQ4cxbK7WlvmUxpcj6Kgu6EKqjrGFe3QQ==} @@ -10041,6 +10210,84 @@ snapshots: '@whatwg-node/promise-helpers': 1.3.2 tslib: 2.8.1 + '@esbuild/aix-ppc64@0.27.7': + optional: true + + '@esbuild/android-arm64@0.27.7': + optional: true + + '@esbuild/android-arm@0.27.7': + optional: true + + '@esbuild/android-x64@0.27.7': + optional: true + + '@esbuild/darwin-arm64@0.27.7': + optional: true + + '@esbuild/darwin-x64@0.27.7': + optional: true + + '@esbuild/freebsd-arm64@0.27.7': + optional: true + + '@esbuild/freebsd-x64@0.27.7': + optional: true + + '@esbuild/linux-arm64@0.27.7': + optional: true + + '@esbuild/linux-arm@0.27.7': + optional: true + + '@esbuild/linux-ia32@0.27.7': + optional: true + + '@esbuild/linux-loong64@0.27.7': + optional: true + + '@esbuild/linux-mips64el@0.27.7': + optional: true + + '@esbuild/linux-ppc64@0.27.7': + optional: true + + '@esbuild/linux-riscv64@0.27.7': + optional: true + + '@esbuild/linux-s390x@0.27.7': + optional: true + + '@esbuild/linux-x64@0.27.7': + optional: true + + '@esbuild/netbsd-arm64@0.27.7': + optional: true + + '@esbuild/netbsd-x64@0.27.7': + optional: true + + '@esbuild/openbsd-arm64@0.27.7': + optional: true + + '@esbuild/openbsd-x64@0.27.7': + optional: true + + '@esbuild/openharmony-arm64@0.27.7': + optional: true + + '@esbuild/sunos-x64@0.27.7': + optional: true + + '@esbuild/win32-arm64@0.27.7': + optional: true + + '@esbuild/win32-ia32@0.27.7': + optional: true + + '@esbuild/win32-x64@0.27.7': + optional: true + '@eslint-community/eslint-utils@4.9.1(eslint@9.39.4(jiti@2.6.1))': dependencies: eslint: 9.39.4(jiti@2.6.1) @@ -12456,14 +12703,14 @@ snapshots: chai: 6.2.2 tinyrainbow: 3.1.0 - '@vitest/mocker@4.1.0(msw@2.12.10(@types/node@20.19.37)(typescript@5.9.3))(vite@8.0.0(@types/node@20.19.37)(jiti@2.6.1)(yaml@2.8.2))': + '@vitest/mocker@4.1.0(msw@2.12.10(@types/node@20.19.37)(typescript@5.9.3))(vite@8.0.0(@types/node@20.19.37)(esbuild@0.27.7)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2))': dependencies: '@vitest/spy': 4.1.0 estree-walker: 3.0.3 magic-string: 0.30.21 optionalDependencies: msw: 2.12.10(@types/node@20.19.37)(typescript@5.9.3) - vite: 8.0.0(@types/node@20.19.37)(jiti@2.6.1)(yaml@2.8.2) + vite: 8.0.0(@types/node@20.19.37)(esbuild@0.27.7)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2) '@vitest/pretty-format@4.1.0': dependencies: @@ -13796,6 +14043,35 @@ snapshots: es6-iterator: 2.0.3 es6-symbol: 3.1.4 + esbuild@0.27.7: + optionalDependencies: + '@esbuild/aix-ppc64': 0.27.7 + '@esbuild/android-arm': 0.27.7 + '@esbuild/android-arm64': 0.27.7 + '@esbuild/android-x64': 0.27.7 + '@esbuild/darwin-arm64': 0.27.7 + '@esbuild/darwin-x64': 0.27.7 + '@esbuild/freebsd-arm64': 0.27.7 + '@esbuild/freebsd-x64': 0.27.7 + '@esbuild/linux-arm': 0.27.7 + '@esbuild/linux-arm64': 0.27.7 + '@esbuild/linux-ia32': 0.27.7 + '@esbuild/linux-loong64': 0.27.7 + '@esbuild/linux-mips64el': 0.27.7 + '@esbuild/linux-ppc64': 0.27.7 + '@esbuild/linux-riscv64': 0.27.7 + '@esbuild/linux-s390x': 0.27.7 + '@esbuild/linux-x64': 0.27.7 + '@esbuild/netbsd-arm64': 0.27.7 + '@esbuild/netbsd-x64': 0.27.7 + '@esbuild/openbsd-arm64': 0.27.7 + '@esbuild/openbsd-x64': 0.27.7 + '@esbuild/openharmony-arm64': 0.27.7 + '@esbuild/sunos-x64': 0.27.7 + '@esbuild/win32-arm64': 0.27.7 + '@esbuild/win32-ia32': 0.27.7 + '@esbuild/win32-x64': 0.27.7 + escalade@3.2.0: {} escape-html@1.0.3: {} @@ -13808,13 +14084,13 @@ snapshots: escape-string-regexp@5.0.0: {} - eslint-config-next@16.1.2(@typescript-eslint/parser@8.57.0(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3): + eslint-config-next@16.1.2(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3): dependencies: '@next/eslint-plugin-next': 16.1.2 eslint: 9.39.4(jiti@2.6.1) eslint-import-resolver-node: 0.3.9 eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.4(jiti@2.6.1)) - eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.57.0(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.4(jiti@2.6.1)) + eslint-plugin-import: 2.32.0(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.4(jiti@2.6.1)) eslint-plugin-jsx-a11y: 6.10.2(eslint@9.39.4(jiti@2.6.1)) eslint-plugin-react: 7.37.5(eslint@9.39.4(jiti@2.6.1)) eslint-plugin-react-hooks: 7.0.1(eslint@9.39.4(jiti@2.6.1)) @@ -13847,22 +14123,21 @@ snapshots: tinyglobby: 0.2.15 unrs-resolver: 1.11.1 optionalDependencies: - eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.57.0(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.4(jiti@2.6.1)) + eslint-plugin-import: 2.32.0(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.4(jiti@2.6.1)) transitivePeerDependencies: - supports-color - eslint-module-utils@2.12.1(@typescript-eslint/parser@8.57.0(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.4(jiti@2.6.1)): + eslint-module-utils@2.12.1(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.4(jiti@2.6.1)): dependencies: debug: 3.2.7 optionalDependencies: - '@typescript-eslint/parser': 8.57.0(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3) eslint: 9.39.4(jiti@2.6.1) eslint-import-resolver-node: 0.3.9 eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.4(jiti@2.6.1)) transitivePeerDependencies: - supports-color - eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.57.0(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.4(jiti@2.6.1)): + eslint-plugin-import@2.32.0(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.4(jiti@2.6.1)): dependencies: '@rtsao/scc': 1.1.0 array-includes: 3.1.9 @@ -13873,7 +14148,7 @@ snapshots: doctrine: 2.1.0 eslint: 9.39.4(jiti@2.6.1) eslint-import-resolver-node: 0.3.9 - eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.57.0(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.4(jiti@2.6.1)) + eslint-module-utils: 2.12.1(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.4(jiti@2.6.1)) hasown: 2.0.2 is-core-module: 2.16.1 is-glob: 4.0.3 @@ -13884,8 +14159,6 @@ snapshots: semver: 6.3.1 string.prototype.trimend: 1.0.9 tsconfig-paths: 3.15.0 - optionalDependencies: - '@typescript-eslint/parser': 8.57.0(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3) transitivePeerDependencies: - eslint-import-resolver-typescript - eslint-import-resolver-webpack @@ -18625,6 +18898,13 @@ snapshots: tslib@2.8.1: {} + tsx@4.21.0: + dependencies: + esbuild: 0.27.7 + get-tsconfig: 4.13.6 + optionalDependencies: + fsevents: 2.3.3 + tw-animate-css@1.4.0: {} txml@5.2.1: @@ -18977,7 +19257,7 @@ snapshots: - bare-abort-controller - react-native-b4a - vite@8.0.0(@types/node@20.19.37)(jiti@2.6.1)(yaml@2.8.2): + vite@8.0.0(@types/node@20.19.37)(esbuild@0.27.7)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2): dependencies: '@oxc-project/runtime': 0.115.0 lightningcss: 1.32.0 @@ -18987,14 +19267,16 @@ snapshots: tinyglobby: 0.2.15 optionalDependencies: '@types/node': 20.19.37 + esbuild: 0.27.7 fsevents: 2.3.3 jiti: 2.6.1 + tsx: 4.21.0 yaml: 2.8.2 - vitest@4.1.0(@opentelemetry/api@1.9.0)(@types/node@20.19.37)(msw@2.12.10(@types/node@20.19.37)(typescript@5.9.3))(vite@8.0.0(@types/node@20.19.37)(jiti@2.6.1)(yaml@2.8.2)): + vitest@4.1.0(@opentelemetry/api@1.9.0)(@types/node@20.19.37)(msw@2.12.10(@types/node@20.19.37)(typescript@5.9.3))(vite@8.0.0(@types/node@20.19.37)(esbuild@0.27.7)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2)): dependencies: '@vitest/expect': 4.1.0 - '@vitest/mocker': 4.1.0(msw@2.12.10(@types/node@20.19.37)(typescript@5.9.3))(vite@8.0.0(@types/node@20.19.37)(jiti@2.6.1)(yaml@2.8.2)) + '@vitest/mocker': 4.1.0(msw@2.12.10(@types/node@20.19.37)(typescript@5.9.3))(vite@8.0.0(@types/node@20.19.37)(esbuild@0.27.7)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2)) '@vitest/pretty-format': 4.1.0 '@vitest/runner': 4.1.0 '@vitest/snapshot': 4.1.0 @@ -19011,7 +19293,7 @@ snapshots: tinyexec: 1.0.2 tinyglobby: 0.2.15 tinyrainbow: 3.1.0 - vite: 8.0.0(@types/node@20.19.37)(jiti@2.6.1)(yaml@2.8.2) + vite: 8.0.0(@types/node@20.19.37)(esbuild@0.27.7)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2) why-is-node-running: 2.3.0 optionalDependencies: '@opentelemetry/api': 1.9.0