diff --git a/apps/web/src/app/api/transcribe/route.ts b/apps/web/src/app/api/transcribe/route.ts index b79607910..c7209c6fc 100644 --- a/apps/web/src/app/api/transcribe/route.ts +++ b/apps/web/src/app/api/transcribe/route.ts @@ -1,23 +1,29 @@ import OpenAI from 'openai'; +import { GoogleGenerativeAI } from '@google/generative-ai'; import { NextResponse } from 'next/server'; -let _client: OpenAI | null = null; -function getClient() { - if (!_client) _client = new OpenAI(); - return _client; +let _openai: OpenAI | null = null; +function getOpenAI() { + if (!_openai) _openai = new OpenAI(); + return _openai; } + +let _gemini: GoogleGenerativeAI | null = null; +function getGemini() { + if (!_gemini) _gemini = new GoogleGenerativeAI(process.env.GEMINI_API_KEY || ''); + return _gemini; +} + const BACKEND_URL = process.env.BACKEND_URL || 'http://localhost:8000'; /** - * OpenAI STT fallback — used when YouTube's auto-caption API fails or - * returns low-quality transcripts. Uses gpt-4o-mini-transcribe for - * cost-effective, high-quality transcription. - * * POST /api/transcribe - * { url: string } — YouTube URL (tries YouTube API first, falls back to STT) - * { audioUrl: string } — Direct audio URL (goes straight to STT) * - * Returns { success, transcript, source: 'youtube' | 'openai-stt' } + * Multi-strategy transcript extraction: + * 1. YouTube captions via backend (fast + free) + * 2. OpenAI Responses API with web_search (finds transcripts online) + * 3. Gemini fallback (if OpenAI unavailable) + * 4. Direct audio STT via OpenAI Whisper */ export async function POST(request: Request) { try { @@ -30,18 +36,24 @@ export async function POST(request: Request) { ); } - // Strategy 1: Try YouTube transcript API via backend first (fast + free) + // Strategy 1: Try YouTube transcript API via backend (fast + free) if (url && !audioUrl) { try { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), 8_000); + const ytResponse = await fetch(`${BACKEND_URL}/api/v1/transcript-action`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ video_url: url, language }), - }); + signal: controller.signal, + }).finally(() => clearTimeout(timeout)); if (ytResponse.ok) { const result = await ytResponse.json(); - const segments = result.transcript || []; + + // Handle transcript as segments array + const segments = Array.isArray(result.transcript) ? result.transcript : []; if (segments.length > 0) { const fullText = segments .map((s: { text?: string }) => s.text || '') @@ -58,42 +70,85 @@ export async function POST(request: Request) { }); } } + + // Handle transcript as { text: string } + const transcriptText = + typeof result.transcript === 'string' + ? result.transcript + : result.transcript?.text; + if (typeof transcriptText === 'string' && transcriptText.length > 50) { + return NextResponse.json({ + success: true, + transcript: transcriptText, + source: 'youtube', + wordCount: transcriptText.split(/\s+/).length, + }); + } } } catch { - // YouTube API failed — fall through to OpenAI STT - console.log('YouTube transcript unavailable, falling back to OpenAI STT'); + console.log('YouTube transcript unavailable, falling back to AI providers'); } } - // Strategy 2: OpenAI Speech-to-Text via Responses API - // For YouTube URLs without direct audio, use the Responses API with - // web_search to find and analyze the content - if (url && !audioUrl) { - // Use Responses API to transcribe/summarize the video content - const response = await getClient().responses.create({ - model: 'gpt-4o-mini', - instructions: `You are a video content transcription assistant. + // Strategy 2: OpenAI Responses API with web_search + if (url && !audioUrl && process.env.OPENAI_API_KEY) { + try { + const response = await getOpenAI().responses.create({ + model: 'gpt-4o-mini', + instructions: `You are a video content transcription assistant. Given a YouTube URL, use web search to find the video's transcript or detailed content. Return the full transcript text if available, or a detailed content summary. Be thorough — capture all key points, quotes, and technical details.`, - tools: [{ type: 'web_search' as const }], - input: `Find and return the full transcript or detailed content of this video: ${url}`, - }); + tools: [{ type: 'web_search' as const }], + input: `Find and return the full transcript or detailed content of this video: ${url}`, + }); + + const text = response.output_text || ''; - const text = response.output_text || ''; + if (text.length > 100) { + return NextResponse.json({ + success: true, + transcript: text, + source: 'openai-web-search', + wordCount: text.split(/\s+/).length, + }); + } + } catch (e) { + console.warn('OpenAI web_search transcript failed:', e); + } + } - if (text.length > 100) { - return NextResponse.json({ - success: true, - transcript: text, - source: 'openai-web-search', - wordCount: text.split(/\s+/).length, + // Strategy 3: Gemini fallback (when OpenAI unavailable) + if (url && !audioUrl && process.env.GEMINI_API_KEY) { + try { + const model = getGemini().getGenerativeModel({ + model: 'gemini-2.0-flash', + generationConfig: { temperature: 0.2 }, }); + + const result = await model.generateContent( + `You are a video content transcription assistant. ` + + `For the following YouTube video URL, provide a detailed transcript or content summary. ` + + `Include all key points, technical details, quotes, and actionable insights. ` + + `Be thorough and comprehensive.\n\nVideo URL: ${url}` + ); + const text = result.response.text(); + + if (text.length > 100) { + return NextResponse.json({ + success: true, + transcript: text, + source: 'gemini', + wordCount: text.split(/\s+/).length, + }); + } + } catch (e) { + console.warn('Gemini transcript fallback failed:', e); } } - // Strategy 3: Direct audio file transcription via OpenAI Whisper/STT - if (audioUrl) { + // Strategy 4: Direct audio file transcription via OpenAI Whisper + if (audioUrl && process.env.OPENAI_API_KEY) { const audioResponse = await fetch(audioUrl); if (!audioResponse.ok) { return NextResponse.json( @@ -105,7 +160,7 @@ Be thorough — capture all key points, quotes, and technical details.`, const audioBlob = await audioResponse.blob(); const audioFile = new File([audioBlob], 'audio.mp3', { type: 'audio/mpeg' }); - const transcription = await getClient().audio.transcriptions.create({ + const transcription = await getOpenAI().audio.transcriptions.create({ model: 'gpt-4o-mini-transcribe', file: audioFile, language, @@ -119,9 +174,13 @@ Be thorough — capture all key points, quotes, and technical details.`, }); } + // No strategy succeeded + const hasKeys = !!(process.env.OPENAI_API_KEY || process.env.GEMINI_API_KEY); return NextResponse.json({ success: false, - error: 'Could not transcribe video — YouTube API and OpenAI STT both failed', + error: hasKeys + ? 'Could not transcribe video — all strategies failed' + : 'No AI API key configured. Set OPENAI_API_KEY or GEMINI_API_KEY in Vercel environment variables.', transcript: '', }); } catch (error) { @@ -131,7 +190,7 @@ Be thorough — capture all key points, quotes, and technical details.`, return NextResponse.json({ success: false, error: message.includes('API key') - ? 'OpenAI API key not configured. Set OPENAI_API_KEY in your environment.' + ? 'AI API key not configured. Set OPENAI_API_KEY or GEMINI_API_KEY.' : message, transcript: '', }); diff --git a/apps/web/src/app/api/video/route.ts b/apps/web/src/app/api/video/route.ts index 0ab9595b4..3ebc35c24 100644 --- a/apps/web/src/app/api/video/route.ts +++ b/apps/web/src/app/api/video/route.ts @@ -2,98 +2,174 @@ import { NextResponse } from 'next/server'; const BACKEND_URL = process.env.BACKEND_URL || 'http://localhost:8000'; +/** + * Get the absolute base URL for the current request. + * Uses the request's origin or falls back to environment variables. + */ +function getBaseUrl(request: Request): string { + const url = new URL(request.url); + return `${url.protocol}//${url.host}`; +} + +/** + * POST /api/video + * + * Tries the full backend pipeline first (FastAPI transcript-action workflow). + * If the backend is unreachable — common on Vercel where no Python server + * runs — falls through to a frontend-only path that chains /api/transcribe + * and /api/extract-events serverless functions directly. + */ export async function POST(request: Request) { try { const body = await request.json(); - const { url, options } = body; + const { url } = body; if (!url) { return NextResponse.json({ error: 'Video URL is required' }, { status: 400 }); } - // Call the EventRelay backend /api/v1/transcript-action endpoint - const response = await fetch(`${BACKEND_URL}/api/v1/transcript-action`, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - body: JSON.stringify({ - video_url: url, - language: 'en' - }), - }); + // ── Strategy 1: Full backend pipeline ── + try { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), 15_000); + + const response = await fetch(`${BACKEND_URL}/api/v1/transcript-action`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ video_url: url, language: 'en' }), + signal: controller.signal, + }).finally(() => clearTimeout(timeout)); + + if (response.ok) { + const result = await response.json(); + + const transcriptAction = result.outputs?.transcript_action?.data || {}; + const personalityAgent = result.outputs?.personality_agent?.data || {}; + const strategyAgent = result.outputs?.strategy_agent?.data || {}; + + let summaryText = 'Video analyzed successfully'; + const rawSummary = transcriptAction.summary; + if (typeof rawSummary === 'string') { + summaryText = rawSummary; + } else if (rawSummary && typeof rawSummary === 'object') { + summaryText = + rawSummary.content || + rawSummary.executive_summary || + (typeof rawSummary.raw === 'string' + ? (() => { + try { + const parsed = JSON.parse(rawSummary.raw.replace(/```json\n?|```/g, '')); + return parsed.executive_summary || parsed.summary || rawSummary.raw.slice(0, 200); + } catch { + return rawSummary.raw.slice(0, 200); + } + })() + : JSON.stringify(rawSummary).slice(0, 200)); + } + + const insights = { + summary: summaryText, + actions: transcriptAction.task_board?.tasks?.map((t: { title?: string }) => t.title) || [], + topics: transcriptAction.metadata?.topics || [], + sentiment: personalityAgent.personality_map?.video_intent?.primary || 'Neutral', + strategy: strategyAgent.strategic_analysis || null, + project_scaffold: transcriptAction.project_scaffold || null, + }; + + return NextResponse.json({ + id: `vid_${Date.now().toString(36)}`, + status: result.success ? 'complete' : 'failed', + processing_time_ms: Math.round((result.orchestration_meta?.processing_time || 0) * 1000), + result: { + success: result.success, + insights, + transcript_segments: (Array.isArray(result.transcript) ? result.transcript.length : result.transcript?.segments?.length) || 0, + agents_used: result.orchestration_meta?.agents_used || [], + errors: result.errors || [], + raw_response: result, + }, + }); + } + console.warn(`Backend returned ${response.status}, falling back to frontend-only pipeline`); + } catch { + console.log('Backend unavailable — using frontend-only pipeline'); + } + + // ── Strategy 2: Frontend-only pipeline ── + // Works on Vercel without the Python backend by chaining the serverless + // /api/transcribe and /api/extract-events routes directly. - if (!response.ok) { - const error = await response.text(); - return NextResponse.json( - { error: `Backend error: ${error}` }, - { status: response.status } - ); + let transcript = ''; + let transcriptSource = 'none'; + try { + const baseUrl = getBaseUrl(request); + const transcribeRes = await fetch(`${baseUrl}/api/transcribe`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ url }), + }); + const transcribeResult = await transcribeRes.json(); + if (transcribeResult.success && transcribeResult.transcript) { + transcript = transcribeResult.transcript; + transcriptSource = transcribeResult.source || 'frontend'; + } + } catch (e) { + console.error('Transcript extraction failed:', e); } - const result = await response.json(); - - // Extract insights from the agent responses - const transcriptAction = result.outputs?.transcript_action?.data || {}; - const personalityAgent = result.outputs?.personality_agent?.data || {}; - const strategyAgent = result.outputs?.strategy_agent?.data || {}; - - // Extract summary string from various possible shapes - let summaryText = 'Video analyzed successfully'; - const rawSummary = transcriptAction.summary; - if (typeof rawSummary === 'string') { - summaryText = rawSummary; - } else if (rawSummary && typeof rawSummary === 'object') { - // summary may be { content, raw, executive_summary, ... } - summaryText = - rawSummary.content || - rawSummary.executive_summary || - (typeof rawSummary.raw === 'string' - ? (() => { - try { - const parsed = JSON.parse(rawSummary.raw.replace(/```json\n?|```/g, '')); - return parsed.executive_summary || parsed.summary || rawSummary.raw.slice(0, 200); - } catch { - return rawSummary.raw.slice(0, 200); - } - })() - : JSON.stringify(rawSummary).slice(0, 200)); + // Step 2: Extract events + insights from transcript + let extraction: { events?: Array<{ type: string; title: string; description?: string; timestamp?: string; priority?: string }>; actions?: Array<{ title: string }>; summary?: string; topics?: string[] } = {}; + if (transcript) { + try { + const baseUrl = getBaseUrl(request); + const extractRes = await fetch(`${baseUrl}/api/extract-events`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ transcript, videoUrl: url }), + }); + const extractResult = await extractRes.json(); + if (extractResult.success && extractResult.data) { + extraction = extractResult.data; + } + } catch (e) { + console.error('Event extraction failed:', e); + } } - // Build structured response - const insights = { - summary: summaryText, - actions: transcriptAction.task_board?.tasks?.map((t: { title?: string }) => t.title) || [], - topics: transcriptAction.metadata?.topics || [], - sentiment: personalityAgent.personality_map?.video_intent?.primary || 'Neutral', - strategy: strategyAgent.strategic_analysis || null, - project_scaffold: transcriptAction.project_scaffold || null, - }; + const hasResults = transcript.length > 0; return NextResponse.json({ id: `vid_${Date.now().toString(36)}`, - status: result.success ? 'complete' : 'failed', - processing_time_ms: Math.round((result.orchestration_meta?.processing_time || 0) * 1000), + status: hasResults ? 'complete' : 'failed', + processing_time_ms: 0, result: { - success: result.success, - insights, - transcript_segments: result.transcript?.length || 0, - agents_used: result.orchestration_meta?.agents_used || [], - errors: result.errors || [], - raw_response: result - } + success: hasResults, + insights: { + summary: extraction.summary || (hasResults ? 'Transcript extracted successfully' : 'Could not extract transcript — configure OPENAI_API_KEY or GEMINI_API_KEY'), + actions: extraction.actions?.map((a) => a.title) || [], + topics: extraction.topics || [], + sentiment: 'Neutral', + }, + transcript_segments: 0, + transcript_source: transcriptSource, + agents_used: ['frontend-pipeline'], + errors: hasResults ? [] : ['Backend unavailable and transcript extraction failed'], + raw_response: { + transcript: { text: transcript }, + extraction, + }, + }, }); } catch (error) { console.error('Video analysis error:', error); return NextResponse.json( { error: 'Failed to analyze video', details: String(error) }, - { status: 500 } + { status: 500 }, ); } } export async function GET() { - // Health check - verify backend is available try { const response = await fetch(`${BACKEND_URL}/api/v1/health`); const health = await response.json(); @@ -106,17 +182,17 @@ export async function GET() { endpoints: { analyze: 'POST /api/video - Analyze a video URL', health: 'GET /api/video - Check API status', - } + }, }); - } catch (error) { + } catch { return NextResponse.json({ name: 'UVAI Video Analysis API', version: '2.0.0', backend_status: 'unavailable', - error: String(error), + frontend_pipeline: 'active', endpoints: { analyze: 'POST /api/video - Analyze a video URL', - } + }, }); } } diff --git a/apps/web/src/app/layout.tsx b/apps/web/src/app/layout.tsx index 22db52712..51ac3bab3 100644 --- a/apps/web/src/app/layout.tsx +++ b/apps/web/src/app/layout.tsx @@ -1,18 +1,10 @@ import type { Metadata, Viewport } from 'next'; -import { Inter, JetBrains_Mono } from 'next/font/google'; import './globals.css'; -const inter = Inter({ - subsets: ['latin'], - display: 'swap', - variable: '--font-inter', -}); - -const jetbrainsMono = JetBrains_Mono({ - subsets: ['latin'], - display: 'swap', - variable: '--font-mono', -}); +// Font CSS variables are defined via to Google Fonts in
and +// resolved in globals.css / tailwind.config. This avoids next/font/google +// which hard-fails during build if the Google Fonts API is unreachable +// (common in sandboxed CI and offline environments). export const metadata: Metadata = { title: { @@ -85,10 +77,14 @@ export default function RootLayout({ children: React.ReactNode; }) { return ( - + + {/* Global background effects */} diff --git a/apps/web/src/app/page.tsx b/apps/web/src/app/page.tsx index a5fe056d0..2f99d750d 100644 --- a/apps/web/src/app/page.tsx +++ b/apps/web/src/app/page.tsx @@ -12,8 +12,8 @@ const STEPS = [ ]; const EXAMPLES = [ - 'https://www.youtube.com/watch?v=dQw4w9WgXcQ', - 'https://youtu.be/jNQXAC9IVRw', + 'https://www.youtube.com/watch?v=aircAruvnKk', + 'https://www.youtube.com/watch?v=zjkBMFhNj_g', ]; export default function Home() { diff --git a/src/youtube_extension/services/agents/agent_gap_analyzer.py b/src/youtube_extension/services/agents/agent_gap_analyzer.py index defc74783..e17f3c4a1 100644 --- a/src/youtube_extension/services/agents/agent_gap_analyzer.py +++ b/src/youtube_extension/services/agents/agent_gap_analyzer.py @@ -367,7 +367,7 @@ def generate_agent_markdown(self, recommendation: AgentRecommendation) -> str: ## Your Expertise -{'\n'.join(f'- **{area}**' for area in recommendation.expertise_areas)} +{chr(10).join(f'- **{area}**' for area in recommendation.expertise_areas)} ## Project Context @@ -403,7 +403,7 @@ def generate_agent_markdown(self, recommendation: AgentRecommendation) -> str: This agent was created to address the following recurring needs: -{'\n'.join(f'{i+1}. {example}' for i, example in enumerate(recommendation.example_scenarios))} +{chr(10).join(f'{i+1}. {example}' for i, example in enumerate(recommendation.example_scenarios))} ## Best Practices @@ -522,7 +522,7 @@ def generate_summary_report(self) -> str: **Description**: {rec.description} **Example Scenarios**: -{'\n'.join(f'- {example}' for example in rec.example_scenarios[:3])} +{chr(10).join(f'- {example}' for example in rec.example_scenarios[:3])} **Action**: Review generated agent at `.eventrelay/agent_gaps/recommendations/{rec.name}.agent.md` diff --git a/src/youtube_extension/services/ai/gemini_service.py b/src/youtube_extension/services/ai/gemini_service.py index 794409cf7..7e720781a 100644 --- a/src/youtube_extension/services/ai/gemini_service.py +++ b/src/youtube_extension/services/ai/gemini_service.py @@ -34,10 +34,16 @@ logging.warning("Google Gemini not available - install: pip install google-genai") try: - import vertexai - from vertexai.generative_models import GenerativeModel, Part - - VERTEX_AVAILABLE = True + # Vertex AI SDK probes the GCE metadata server on import which can hang + # for 5+ seconds outside GCP. Only import when explicitly requested via + # environment variables to keep startup fast in local / CI environments. + if os.getenv("GOOGLE_CLOUD_PROJECT") or os.getenv("ENABLE_VERTEX_AI", "0").lower() in {"1", "true", "yes"}: + import vertexai + from vertexai.generative_models import GenerativeModel, Part + + VERTEX_AVAILABLE = True + else: + VERTEX_AVAILABLE = False except ImportError: VERTEX_AVAILABLE = False logging.warning(