-
Notifications
You must be signed in to change notification settings - Fork 0
Enhance NotebookLM integration and frontend video ingestion pipeline #51
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5b5306d
3ed838c
a587381
e352876
22b5088
a285dfd
ce78244
f58d954
abbaa43
b19d73c
c23818d
534edc3
b3a309c
50394be
38df181
094c9cf
02d2cb8
db6541f
c095d73
b0629d7
410ce1a
e3cf036
bb808ff
a9c7665
2288b6c
ff6d5e4
8792227
1ec87f3
86a4208
d59ce5d
0b66974
8847930
91e1c84
f925e30
3be0d81
8e6ae78
2cd9b5b
e95ce42
ebf0327
ff05ed3
36ebbf6
d95c7f7
2296f5e
a148b3c
ed013e1
0995d78
2b9c1f4
6ecc4c3
279c7fd
4a32a44
8b96dce
ee6fc60
d9e2fc6
5640a32
0fd9e90
12915db
456954b
a2075c3
ee035ff
50dc924
d0cacd1
98de680
f170b18
924afa2
f38b34c
fb65641
f8a970a
c1eea83
7ddd93b
6a7ff52
aef4d51
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| {"schema":["../dataconnect/.dataconnect/**/*.gql","../dataconnect/schema/**/*.gql"],"document":["../dataconnect/example/**/*.gql"]} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| { | ||
| "projects": { | ||
| "default": "uvai-730bb" | ||
| }, | ||
| "targets": {}, | ||
| "etags": {} | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -155,3 +155,4 @@ UVAI_Digital_Refinery_Blueprint.pdf | |
| *.db | ||
| .vercel | ||
| .env*.local | ||
| .next/ | ||
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -1,20 +1,15 @@ | ||||||||
| import OpenAI from 'openai'; | ||||||||
| import { GoogleGenerativeAI } from '@google/generative-ai'; | ||||||||
| import { Type } from '@google/genai'; | ||||||||
| import { NextResponse } from 'next/server'; | ||||||||
| import { getGeminiClient, hasGeminiKey } from '@/lib/gemini-client'; | ||||||||
|
|
||||||||
| let _openai: OpenAI | null = null; | ||||||||
| function getOpenAI() { | ||||||||
| if (!_openai) _openai = new OpenAI(); | ||||||||
| return _openai; | ||||||||
| } | ||||||||
|
|
||||||||
| let _gemini: GoogleGenerativeAI | null = null; | ||||||||
| function getGemini() { | ||||||||
| if (!_gemini) _gemini = new GoogleGenerativeAI(process.env.GEMINI_API_KEY || ''); | ||||||||
| return _gemini; | ||||||||
| } | ||||||||
|
|
||||||||
| // JSON Schema for structured extraction via Responses API | ||||||||
| // JSON Schema for structured extraction via OpenAI Responses API | ||||||||
| const extractionSchema = { | ||||||||
| type: 'object' as const, | ||||||||
| properties: { | ||||||||
|
|
@@ -54,6 +49,43 @@ const extractionSchema = { | |||||||
| additionalProperties: false, | ||||||||
| }; | ||||||||
|
|
||||||||
| // Gemini responseSchema using @google/genai Type system | ||||||||
| const geminiResponseSchema = { | ||||||||
| type: Type.OBJECT, | ||||||||
| properties: { | ||||||||
| events: { | ||||||||
| type: Type.ARRAY, | ||||||||
| items: { | ||||||||
| type: Type.OBJECT, | ||||||||
| properties: { | ||||||||
| type: { type: Type.STRING, enum: ['action', 'topic', 'insight', 'tool', 'resource'] }, | ||||||||
| title: { type: Type.STRING }, | ||||||||
| description: { type: Type.STRING }, | ||||||||
| timestamp: { type: Type.STRING, nullable: true }, | ||||||||
| priority: { type: Type.STRING, enum: ['high', 'medium', 'low'] }, | ||||||||
| }, | ||||||||
| required: ['type', 'title', 'description', 'priority'], | ||||||||
| }, | ||||||||
| }, | ||||||||
| actions: { | ||||||||
| type: Type.ARRAY, | ||||||||
| items: { | ||||||||
| type: Type.OBJECT, | ||||||||
| properties: { | ||||||||
| title: { type: Type.STRING }, | ||||||||
| description: { type: Type.STRING }, | ||||||||
| category: { type: Type.STRING, enum: ['setup', 'build', 'deploy', 'learn', 'research', 'configure'] }, | ||||||||
| estimatedMinutes: { type: Type.NUMBER, nullable: true }, | ||||||||
| }, | ||||||||
| required: ['title', 'description', 'category'], | ||||||||
| }, | ||||||||
| }, | ||||||||
| summary: { type: Type.STRING }, | ||||||||
| topics: { type: Type.ARRAY, items: { type: Type.STRING } }, | ||||||||
| }, | ||||||||
| required: ['events', 'actions', 'summary', 'topics'], | ||||||||
| }; | ||||||||
|
|
||||||||
| const SYSTEM_PROMPT = `You are an expert content analyst. Extract structured data from video transcripts. | ||||||||
| Be specific and practical — no vague or generic items. | ||||||||
| For events: classify type (action/topic/insight/tool/resource) and priority (high/medium/low). | ||||||||
|
|
@@ -94,54 +126,91 @@ async function extractWithOpenAI(trimmed: string, videoTitle?: string, videoUrl? | |||||||
| } | ||||||||
|
|
||||||||
| async function extractWithGemini(trimmed: string, videoTitle?: string, videoUrl?: string) { | ||||||||
| const model = getGemini().getGenerativeModel({ | ||||||||
| model: 'gemini-2.0-flash', | ||||||||
| generationConfig: { | ||||||||
| responseMimeType: 'application/json', | ||||||||
| const ai = getGeminiClient(); | ||||||||
| const response = await ai.models.generateContent({ | ||||||||
| model: 'gemini-3-pro-preview', | ||||||||
| contents: `${SYSTEM_PROMPT}\n\n${buildUserPrompt(trimmed, videoTitle, videoUrl)}`, | ||||||||
| config: { | ||||||||
| temperature: 0.3, | ||||||||
| responseMimeType: 'application/json', | ||||||||
| responseSchema: geminiResponseSchema, | ||||||||
| tools: [{ googleSearch: {} }], | ||||||||
| }, | ||||||||
| }); | ||||||||
| const result = await model.generateContent(`${SYSTEM_PROMPT}\n\n${buildUserPrompt(trimmed, videoTitle, videoUrl)}`); | ||||||||
| const text = result.response.text(); | ||||||||
| const text = response.text ?? ''; | ||||||||
| return JSON.parse(text); | ||||||||
| } | ||||||||
|
|
||||||||
| export async function POST(request: Request) { | ||||||||
| try { | ||||||||
| const { transcript, videoTitle, videoUrl } = await request.json(); | ||||||||
|
|
||||||||
| if (!transcript || typeof transcript !== 'string') { | ||||||||
| // Accept either transcript text OR videoUrl for direct Gemini analysis | ||||||||
| if ((!transcript || typeof transcript !== 'string') && !videoUrl) { | ||||||||
| return NextResponse.json( | ||||||||
| { error: 'transcript (string) is required' }, | ||||||||
| { error: 'transcript (string) or videoUrl is required' }, | ||||||||
| { status: 400 } | ||||||||
| ); | ||||||||
| } | ||||||||
|
|
||||||||
| const trimmed = transcript.slice(0, 8000); | ||||||||
| let parsed; | ||||||||
| let provider = 'openai'; | ||||||||
|
|
||||||||
| // Try OpenAI first, fall back to Gemini on quota/auth errors | ||||||||
| if (process.env.OPENAI_API_KEY) { | ||||||||
| try { | ||||||||
| parsed = await extractWithOpenAI(trimmed, videoTitle, videoUrl); | ||||||||
| } catch (err) { | ||||||||
| const msg = err instanceof Error ? err.message : ''; | ||||||||
| if ((msg.includes('429') || msg.includes('quota') || msg.includes('rate')) && process.env.GEMINI_API_KEY) { | ||||||||
| console.warn('OpenAI quota hit, falling back to Gemini'); | ||||||||
| parsed = await extractWithGemini(trimmed, videoTitle, videoUrl); | ||||||||
| provider = 'gemini'; | ||||||||
| } else { | ||||||||
| throw err; | ||||||||
| // If we have transcript text, use the existing extraction logic | ||||||||
| if (transcript && typeof transcript === 'string' && transcript.length > 50) { | ||||||||
| const trimmed = transcript.slice(0, 8000); | ||||||||
|
|
||||||||
| if (process.env.OPENAI_API_KEY) { | ||||||||
| try { | ||||||||
| parsed = await extractWithOpenAI(trimmed, videoTitle, videoUrl); | ||||||||
| } catch (err) { | ||||||||
| const msg = err instanceof Error ? err.message : ''; | ||||||||
| if ((msg.includes('429') || msg.includes('quota') || msg.includes('rate')) && hasGeminiKey()) { | ||||||||
| console.warn('OpenAI quota hit, falling back to Gemini'); | ||||||||
| parsed = await extractWithGemini(trimmed, videoTitle, videoUrl); | ||||||||
| provider = 'gemini'; | ||||||||
| } else { | ||||||||
| throw err; | ||||||||
| } | ||||||||
| } | ||||||||
| } else if (hasGeminiKey()) { | ||||||||
| parsed = await extractWithGemini(trimmed, videoTitle, videoUrl); | ||||||||
| provider = 'gemini'; | ||||||||
| } | ||||||||
| } else if (process.env.GEMINI_API_KEY) { | ||||||||
| parsed = await extractWithGemini(trimmed, videoTitle, videoUrl); | ||||||||
| provider = 'gemini'; | ||||||||
| } else { | ||||||||
| } | ||||||||
|
|
||||||||
| // If no transcript but have videoUrl + Gemini, do direct video analysis via Google Search | ||||||||
| if (!parsed && videoUrl && hasGeminiKey()) { | ||||||||
| try { | ||||||||
| const ai = getGeminiClient(); | ||||||||
| const response = await ai.models.generateContent({ | ||||||||
| model: 'gemini-3-pro-preview', | ||||||||
| contents: `${SYSTEM_PROMPT}\n\nAnalyze this YouTube video and extract structured data. | ||||||||
| Use your Google Search tool to find the video's transcript, description, and chapter content. | ||||||||
|
|
||||||||
| Video URL: ${videoUrl} | ||||||||
| ${videoTitle ? `Video Title: ${videoTitle}` : ''} | ||||||||
|
|
||||||||
| Extract events, actions, summary, and topics from the actual video content found via search.`, | ||||||||
| config: { | ||||||||
| temperature: 0.3, | ||||||||
| responseMimeType: 'application/json', | ||||||||
| responseSchema: geminiResponseSchema, | ||||||||
| tools: [{ googleSearch: {} }], | ||||||||
| }, | ||||||||
| }); | ||||||||
| const text = response.text ?? ''; | ||||||||
| parsed = JSON.parse(text); | ||||||||
| provider = 'gemini-search'; | ||||||||
| } catch (e) { | ||||||||
| console.warn('Gemini direct video extraction failed:', e); | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| if (!parsed) { | ||||||||
| return NextResponse.json({ | ||||||||
| success: false, | ||||||||
| error: 'No AI API key configured. Set OPENAI_API_KEY or GEMINI_API_KEY.', | ||||||||
| error: 'No AI API key configured or all extraction attempts failed. Set GEMINI_API_KEY.', | ||||||||
|
||||||||
| error: 'No AI API key configured or all extraction attempts failed. Set GEMINI_API_KEY.', | |
| error: | |
| 'No AI providers succeeded. Either no API keys are configured (missing OPENAI_API_KEY and/or GEMINI_API_KEY) or all extraction attempts failed at runtime.', |
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,163 @@ | ||||||||
| import { NextResponse } from 'next/server'; | ||||||||
| import { publishEvent, EventTypes } from '@/lib/cloudevents'; | ||||||||
| import { analyzeVideoWithGemini } from '@/lib/gemini-video-analyzer'; | ||||||||
| import { hasGeminiKey } from '@/lib/gemini-client'; | ||||||||
|
|
||||||||
| const rawBackendUrl = process.env.BACKEND_URL || ''; | ||||||||
| const BACKEND_URL = rawBackendUrl.startsWith('http') ? rawBackendUrl : 'http://localhost:8000'; | ||||||||
| const BACKEND_AVAILABLE = rawBackendUrl.startsWith('http'); | ||||||||
|
|
||||||||
| /** | ||||||||
| * POST /api/pipeline | ||||||||
| * | ||||||||
| * End-to-end pipeline: YouTube URL → Video Analysis → Code Generation → Deployment → Live URL | ||||||||
| * | ||||||||
| * This is the FULL pipeline that the user's notes describe (PK=999, PK=1021): | ||||||||
| * Ingest → Translate → Transport → Execute | ||||||||
| * | ||||||||
| * Strategies: | ||||||||
| * 1. Backend pipeline (FastAPI /api/v1/video-to-software) — full pipeline with agents | ||||||||
| * 2. Gemini analysis + frontend deployment — when no backend is available | ||||||||
| */ | ||||||||
| export async function POST(request: Request) { | ||||||||
| let videoUrl: string | undefined; | ||||||||
| try { | ||||||||
| const body = await request.json(); | ||||||||
| const { url, project_type = 'web', deployment_target = 'vercel', features } = body; | ||||||||
| videoUrl = url; | ||||||||
|
|
||||||||
| if (!url) { | ||||||||
| return NextResponse.json({ error: 'Video URL is required' }, { status: 400 }); | ||||||||
| } | ||||||||
|
|
||||||||
| await publishEvent(EventTypes.VIDEO_RECEIVED, { url, pipeline: 'end-to-end' }, url); | ||||||||
|
|
||||||||
| // ── Strategy 1: Full backend pipeline (FastAPI video-to-software) ── | ||||||||
| if (BACKEND_AVAILABLE) { | ||||||||
| try { | ||||||||
| const controller = new AbortController(); | ||||||||
| const timeout = setTimeout(() => controller.abort(), 300_000); // 5 min for full pipeline | ||||||||
|
|
||||||||
| let response: Response; | ||||||||
| try { | ||||||||
| response = await fetch(`${BACKEND_URL}/api/v1/video-to-software`, { | ||||||||
| method: 'POST', | ||||||||
| headers: { 'Content-Type': 'application/json' }, | ||||||||
| body: JSON.stringify({ | ||||||||
| video_url: url, | ||||||||
| project_type, | ||||||||
| deployment_target, | ||||||||
| features: features || ['responsive_design', 'modern_ui'], | ||||||||
| }), | ||||||||
| signal: controller.signal, | ||||||||
| }); | ||||||||
| } finally { | ||||||||
| clearTimeout(timeout); | ||||||||
| } | ||||||||
|
|
||||||||
| if (response.ok) { | ||||||||
| const result = await response.json(); | ||||||||
|
|
||||||||
| await publishEvent(EventTypes.PIPELINE_COMPLETED, { | ||||||||
| strategy: 'backend-pipeline', | ||||||||
| success: result.status === 'success', | ||||||||
| live_url: result.live_url, | ||||||||
| github_repo: result.github_repo, | ||||||||
| build_status: result.build_status, | ||||||||
| }, url); | ||||||||
|
|
||||||||
| return NextResponse.json({ | ||||||||
| id: `pipeline_${Date.now().toString(36)}`, | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using
Suggested change
|
||||||||
| status: result.status || 'complete', | ||||||||
| pipeline: 'backend', | ||||||||
| processing_time: result.processing_time, | ||||||||
| result: { | ||||||||
| live_url: result.live_url, | ||||||||
| github_repo: result.github_repo, | ||||||||
| build_status: result.build_status, | ||||||||
| video_analysis: result.video_analysis, | ||||||||
| code_generation: result.code_generation, | ||||||||
| deployment: result.deployment, | ||||||||
| features_implemented: result.features_implemented, | ||||||||
| }, | ||||||||
| }); | ||||||||
| } | ||||||||
| console.warn(`Backend pipeline returned ${response.status}, falling back`); | ||||||||
| } catch (e) { | ||||||||
| console.log('Backend pipeline unavailable:', e); | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| // ── Strategy 2: Gemini analysis (video intelligence only, no deployment) ── | ||||||||
| if (hasGeminiKey()) { | ||||||||
| try { | ||||||||
| const startTime = Date.now(); | ||||||||
| const analysis = await analyzeVideoWithGemini(url); | ||||||||
| const elapsed = Date.now() - startTime; | ||||||||
|
|
||||||||
| await publishEvent(EventTypes.PIPELINE_COMPLETED, { | ||||||||
| strategy: 'gemini-analysis-only', | ||||||||
| success: true, | ||||||||
| note: 'Backend unavailable — analysis only, no deployment', | ||||||||
| }, url); | ||||||||
|
|
||||||||
| return NextResponse.json({ | ||||||||
| id: `pipeline_${Date.now().toString(36)}`, | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using
Suggested change
|
||||||||
| status: 'partial', | ||||||||
| pipeline: 'gemini-only', | ||||||||
| processing_time: `${(elapsed / 1000).toFixed(1)}s`, | ||||||||
| result: { | ||||||||
| live_url: null, | ||||||||
| github_repo: null, | ||||||||
| build_status: 'not_attempted', | ||||||||
| video_analysis: { | ||||||||
| title: analysis.title, | ||||||||
| summary: analysis.summary, | ||||||||
| events: analysis.events, | ||||||||
| actions: analysis.actions, | ||||||||
| topics: analysis.topics, | ||||||||
| architectureCode: analysis.architectureCode, | ||||||||
| }, | ||||||||
| code_generation: null, | ||||||||
| deployment: null, | ||||||||
| message: 'Backend pipeline unavailable. Video analysis complete but code generation and deployment require the Python backend.', | ||||||||
| }, | ||||||||
| }); | ||||||||
| } catch (e) { | ||||||||
| console.error('Gemini analysis failed:', e); | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| return NextResponse.json( | ||||||||
| { error: 'No pipeline available. Configure BACKEND_URL for full pipeline or GEMINI_API_KEY for analysis only.' }, | ||||||||
| { status: 503 }, | ||||||||
| ); | ||||||||
| } catch (error) { | ||||||||
| console.error('Pipeline error:', error); | ||||||||
| await publishEvent(EventTypes.PIPELINE_FAILED, { error: String(error) }, videoUrl).catch(() => {}); | ||||||||
| return NextResponse.json( | ||||||||
| { error: 'Pipeline failed', details: String(error) }, | ||||||||
| { status: 500 }, | ||||||||
| ); | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| export async function GET() { | ||||||||
| return NextResponse.json({ | ||||||||
| name: 'EventRelay End-to-End Pipeline', | ||||||||
| version: '1.0.0', | ||||||||
| description: 'YouTube URL → Video Analysis → Code Generation → Deployment → Live URL', | ||||||||
| pipeline_stages: [ | ||||||||
| '1. Ingest: Gemini analyzes video content with Google Search grounding', | ||||||||
| '2. Translate: Structured output → VideoPack artifact', | ||||||||
| '3. Transport: CloudEvents published at each stage', | ||||||||
| '4. Execute: Agents generate code, create repo, deploy to Vercel', | ||||||||
| ], | ||||||||
| backend_available: BACKEND_AVAILABLE, | ||||||||
| gemini_available: hasGeminiKey(), | ||||||||
| endpoints: { | ||||||||
| pipeline: 'POST /api/pipeline - Full end-to-end pipeline', | ||||||||
| video: 'POST /api/video - Video analysis only', | ||||||||
| }, | ||||||||
| }); | ||||||||
| } | ||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This direct call to
JSON.parse()is unsafe. Ifresponse.textfrom the Gemini API is an empty string (which is possible if the model returns no content), this will throw an unhandled exception and cause the API route to crash with a 500 error. You should gracefully handle the case of an empty or invalid JSON string before parsing.