Skip to content

Commit 967d9b1

Browse files
feat: upgrade Gemini to @google/genai SDK with structured output, search grounding, video URL processing, and extend VideoPack schema
- Upgrade extract-events/route.ts from @google/generative-ai to @google/genai - Add Gemini responseSchema with Type system for structured output enforcement - Add Google Search grounding (googleSearch tool) to Gemini calls - Upgrade transcribe/route.ts to @google/genai with direct YouTube URL processing via fileData - Add Gemini video URL fallback chain: direct video → text+search → other strategies - Extend VideoPackV0 schema with Chapter, CodeCue, Task models - Update versioning shim for new fields - Export new types from videopack __init__ Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 41b22e3 commit 967d9b1

7 files changed

Lines changed: 686 additions & 89 deletions

File tree

apps/web/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
"lint": "next lint"
1010
},
1111
"dependencies": {
12+
"@google/genai": "^1.43.0",
1213
"@google/generative-ai": "^0.24.1",
1314
"@stripe/stripe-js": "^2.0.0",
1415
"@supabase/supabase-js": "^2.39.0",

apps/web/src/app/api/extract-events/route.ts

Lines changed: 49 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import OpenAI from 'openai';
2-
import { GoogleGenerativeAI } from '@google/generative-ai';
2+
import { GoogleGenAI, Type } from '@google/genai';
33
import { NextResponse } from 'next/server';
44

55
let _openai: OpenAI | null = null;
@@ -8,13 +8,13 @@ function getOpenAI() {
88
return _openai;
99
}
1010

11-
let _gemini: GoogleGenerativeAI | null = null;
11+
let _gemini: GoogleGenAI | null = null;
1212
function getGemini() {
13-
if (!_gemini) _gemini = new GoogleGenerativeAI(process.env.GEMINI_API_KEY || '');
13+
if (!_gemini) _gemini = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY || '' });
1414
return _gemini;
1515
}
1616

17-
// JSON Schema for structured extraction via Responses API
17+
// JSON Schema for structured extraction via OpenAI Responses API
1818
const extractionSchema = {
1919
type: 'object' as const,
2020
properties: {
@@ -54,6 +54,43 @@ const extractionSchema = {
5454
additionalProperties: false,
5555
};
5656

57+
// Gemini responseSchema using @google/genai Type system
58+
const geminiResponseSchema = {
59+
type: Type.OBJECT,
60+
properties: {
61+
events: {
62+
type: Type.ARRAY,
63+
items: {
64+
type: Type.OBJECT,
65+
properties: {
66+
type: { type: Type.STRING, enum: ['action', 'topic', 'insight', 'tool', 'resource'] },
67+
title: { type: Type.STRING },
68+
description: { type: Type.STRING },
69+
timestamp: { type: Type.STRING, nullable: true },
70+
priority: { type: Type.STRING, enum: ['high', 'medium', 'low'] },
71+
},
72+
required: ['type', 'title', 'description', 'priority'],
73+
},
74+
},
75+
actions: {
76+
type: Type.ARRAY,
77+
items: {
78+
type: Type.OBJECT,
79+
properties: {
80+
title: { type: Type.STRING },
81+
description: { type: Type.STRING },
82+
category: { type: Type.STRING, enum: ['setup', 'build', 'deploy', 'learn', 'research', 'configure'] },
83+
estimatedMinutes: { type: Type.NUMBER, nullable: true },
84+
},
85+
required: ['title', 'description', 'category'],
86+
},
87+
},
88+
summary: { type: Type.STRING },
89+
topics: { type: Type.ARRAY, items: { type: Type.STRING } },
90+
},
91+
required: ['events', 'actions', 'summary', 'topics'],
92+
};
93+
5794
const SYSTEM_PROMPT = `You are an expert content analyst. Extract structured data from video transcripts.
5895
Be specific and practical — no vague or generic items.
5996
For events: classify type (action/topic/insight/tool/resource) and priority (high/medium/low).
@@ -94,15 +131,18 @@ async function extractWithOpenAI(trimmed: string, videoTitle?: string, videoUrl?
94131
}
95132

96133
async function extractWithGemini(trimmed: string, videoTitle?: string, videoUrl?: string) {
97-
const model = getGemini().getGenerativeModel({
134+
const ai = getGemini();
135+
const response = await ai.models.generateContent({
98136
model: 'gemini-2.0-flash',
99-
generationConfig: {
100-
responseMimeType: 'application/json',
137+
contents: `${SYSTEM_PROMPT}\n\n${buildUserPrompt(trimmed, videoTitle, videoUrl)}`,
138+
config: {
101139
temperature: 0.3,
140+
responseMimeType: 'application/json',
141+
responseSchema: geminiResponseSchema,
142+
tools: [{ googleSearch: {} }],
102143
},
103144
});
104-
const result = await model.generateContent(`${SYSTEM_PROMPT}\n\n${buildUserPrompt(trimmed, videoTitle, videoUrl)}`);
105-
const text = result.response.text();
145+
const text = response.text ?? '';
106146
return JSON.parse(text);
107147
}
108148

apps/web/src/app/api/transcribe/route.ts

Lines changed: 60 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import OpenAI from 'openai';
2-
import { GoogleGenerativeAI } from '@google/generative-ai';
2+
import { GoogleGenAI } from '@google/genai';
33
import { NextResponse } from 'next/server';
44

55
let _openai: OpenAI | null = null;
@@ -8,9 +8,9 @@ function getOpenAI() {
88
return _openai;
99
}
1010

11-
let _gemini: GoogleGenerativeAI | null = null;
11+
let _gemini: GoogleGenAI | null = null;
1212
function getGemini() {
13-
if (!_gemini) _gemini = new GoogleGenerativeAI(process.env.GEMINI_API_KEY || '');
13+
if (!_gemini) _gemini = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY || '' });
1414
return _gemini;
1515
}
1616

@@ -121,32 +121,76 @@ Be thorough — capture all key points, quotes, and technical details.`,
121121
}
122122
}
123123

124-
// Strategy 3: Gemini fallback (when OpenAI unavailable)
124+
// Strategy 3: Gemini with direct YouTube URL processing + Google Search grounding
125125
if (url && !audioUrl && process.env.GEMINI_API_KEY) {
126126
try {
127-
const model = getGemini().getGenerativeModel({
127+
const ai = getGemini();
128+
const result = await ai.models.generateContent({
128129
model: 'gemini-2.0-flash',
129-
generationConfig: { temperature: 0.2 },
130+
contents: [
131+
{
132+
role: 'user',
133+
parts: [
134+
{
135+
fileData: {
136+
mimeType: 'video/*',
137+
fileUri: url,
138+
},
139+
},
140+
{
141+
text: 'Provide a complete, detailed transcript of this video. ' +
142+
'Include all spoken content verbatim. ' +
143+
'Include timestamps where possible in [MM:SS] format. ' +
144+
'Be thorough and comprehensive — capture every key point, quote, and technical detail.',
145+
},
146+
],
147+
},
148+
],
149+
config: {
150+
temperature: 0.2,
151+
tools: [{ googleSearch: {} }],
152+
},
130153
});
131-
132-
const result = await model.generateContent(
133-
`You are a video content transcription assistant. ` +
134-
`For the following YouTube video URL, provide a detailed transcript or content summary. ` +
135-
`Include all key points, technical details, quotes, and actionable insights. ` +
136-
`Be thorough and comprehensive.\n\nVideo URL: ${url}`
137-
);
138-
const text = result.response.text();
154+
const text = result.text ?? '';
139155

140156
if (text.length > 100) {
141157
return NextResponse.json({
142158
success: true,
143159
transcript: text,
144-
source: 'gemini',
160+
source: 'gemini-video',
145161
wordCount: text.split(/\s+/).length,
146162
});
147163
}
148164
} catch (e) {
149-
console.warn('Gemini transcript fallback failed:', e);
165+
console.warn('Gemini video URL processing failed, trying text fallback:', e);
166+
167+
// Fallback: text-based Gemini with Google Search grounding
168+
try {
169+
const ai = getGemini();
170+
const result = await ai.models.generateContent({
171+
model: 'gemini-2.0-flash',
172+
contents: `You are a video content transcription assistant. ` +
173+
`For the following YouTube video URL, provide a detailed transcript or content summary. ` +
174+
`Include all key points, technical details, quotes, and actionable insights. ` +
175+
`Be thorough and comprehensive.\n\nVideo URL: ${url}`,
176+
config: {
177+
temperature: 0.2,
178+
tools: [{ googleSearch: {} }],
179+
},
180+
});
181+
const text = result.text ?? '';
182+
183+
if (text.length > 100) {
184+
return NextResponse.json({
185+
success: true,
186+
transcript: text,
187+
source: 'gemini',
188+
wordCount: text.split(/\s+/).length,
189+
});
190+
}
191+
} catch (e2) {
192+
console.warn('Gemini text fallback also failed:', e2);
193+
}
150194
}
151195
}
152196

0 commit comments

Comments
 (0)