Skip to content

Commit 2296f5e

Browse files
Merge pull request #33 from groupthinking/claude/slack-check-status-update-R47Ph
Add frontend-only video analysis pipeline with multi-strategy transcription
2 parents 2cd9b5b + d95c7f7 commit 2296f5e

6 files changed

Lines changed: 267 additions & 130 deletions

File tree

apps/web/src/app/api/transcribe/route.ts

Lines changed: 98 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,29 @@
11
import OpenAI from 'openai';
2+
import { GoogleGenerativeAI } from '@google/generative-ai';
23
import { NextResponse } from 'next/server';
34

4-
let _client: OpenAI | null = null;
5-
function getClient() {
6-
if (!_client) _client = new OpenAI();
7-
return _client;
5+
let _openai: OpenAI | null = null;
6+
function getOpenAI() {
7+
if (!_openai) _openai = new OpenAI();
8+
return _openai;
89
}
10+
11+
let _gemini: GoogleGenerativeAI | null = null;
12+
function getGemini() {
13+
if (!_gemini) _gemini = new GoogleGenerativeAI(process.env.GEMINI_API_KEY || '');
14+
return _gemini;
15+
}
16+
917
const BACKEND_URL = process.env.BACKEND_URL || 'http://localhost:8000';
1018

1119
/**
12-
* OpenAI STT fallback — used when YouTube's auto-caption API fails or
13-
* returns low-quality transcripts. Uses gpt-4o-mini-transcribe for
14-
* cost-effective, high-quality transcription.
15-
*
1620
* POST /api/transcribe
17-
* { url: string } — YouTube URL (tries YouTube API first, falls back to STT)
18-
* { audioUrl: string } — Direct audio URL (goes straight to STT)
1921
*
20-
* Returns { success, transcript, source: 'youtube' | 'openai-stt' }
22+
* Multi-strategy transcript extraction:
23+
* 1. YouTube captions via backend (fast + free)
24+
* 2. OpenAI Responses API with web_search (finds transcripts online)
25+
* 3. Gemini fallback (if OpenAI unavailable)
26+
* 4. Direct audio STT via OpenAI Whisper
2127
*/
2228
export async function POST(request: Request) {
2329
try {
@@ -30,18 +36,24 @@ export async function POST(request: Request) {
3036
);
3137
}
3238

33-
// Strategy 1: Try YouTube transcript API via backend first (fast + free)
39+
// Strategy 1: Try YouTube transcript API via backend (fast + free)
3440
if (url && !audioUrl) {
3541
try {
42+
const controller = new AbortController();
43+
const timeout = setTimeout(() => controller.abort(), 8_000);
44+
3645
const ytResponse = await fetch(`${BACKEND_URL}/api/v1/transcript-action`, {
3746
method: 'POST',
3847
headers: { 'Content-Type': 'application/json' },
3948
body: JSON.stringify({ video_url: url, language }),
40-
});
49+
signal: controller.signal,
50+
}).finally(() => clearTimeout(timeout));
4151

4252
if (ytResponse.ok) {
4353
const result = await ytResponse.json();
44-
const segments = result.transcript || [];
54+
55+
// Handle transcript as segments array
56+
const segments = Array.isArray(result.transcript) ? result.transcript : [];
4557
if (segments.length > 0) {
4658
const fullText = segments
4759
.map((s: { text?: string }) => s.text || '')
@@ -58,42 +70,85 @@ export async function POST(request: Request) {
5870
});
5971
}
6072
}
73+
74+
// Handle transcript as { text: string }
75+
const transcriptText =
76+
typeof result.transcript === 'string'
77+
? result.transcript
78+
: result.transcript?.text;
79+
if (typeof transcriptText === 'string' && transcriptText.length > 50) {
80+
return NextResponse.json({
81+
success: true,
82+
transcript: transcriptText,
83+
source: 'youtube',
84+
wordCount: transcriptText.split(/\s+/).length,
85+
});
86+
}
6187
}
6288
} catch {
63-
// YouTube API failed — fall through to OpenAI STT
64-
console.log('YouTube transcript unavailable, falling back to OpenAI STT');
89+
console.log('YouTube transcript unavailable, falling back to AI providers');
6590
}
6691
}
6792

68-
// Strategy 2: OpenAI Speech-to-Text via Responses API
69-
// For YouTube URLs without direct audio, use the Responses API with
70-
// web_search to find and analyze the content
71-
if (url && !audioUrl) {
72-
// Use Responses API to transcribe/summarize the video content
73-
const response = await getClient().responses.create({
74-
model: 'gpt-4o-mini',
75-
instructions: `You are a video content transcription assistant.
93+
// Strategy 2: OpenAI Responses API with web_search
94+
if (url && !audioUrl && process.env.OPENAI_API_KEY) {
95+
try {
96+
const response = await getOpenAI().responses.create({
97+
model: 'gpt-4o-mini',
98+
instructions: `You are a video content transcription assistant.
7699
Given a YouTube URL, use web search to find the video's transcript or detailed content.
77100
Return the full transcript text if available, or a detailed content summary.
78101
Be thorough — capture all key points, quotes, and technical details.`,
79-
tools: [{ type: 'web_search' as const }],
80-
input: `Find and return the full transcript or detailed content of this video: ${url}`,
81-
});
102+
tools: [{ type: 'web_search' as const }],
103+
input: `Find and return the full transcript or detailed content of this video: ${url}`,
104+
});
105+
106+
const text = response.output_text || '';
82107

83-
const text = response.output_text || '';
108+
if (text.length > 100) {
109+
return NextResponse.json({
110+
success: true,
111+
transcript: text,
112+
source: 'openai-web-search',
113+
wordCount: text.split(/\s+/).length,
114+
});
115+
}
116+
} catch (e) {
117+
console.warn('OpenAI web_search transcript failed:', e);
118+
}
119+
}
84120

85-
if (text.length > 100) {
86-
return NextResponse.json({
87-
success: true,
88-
transcript: text,
89-
source: 'openai-web-search',
90-
wordCount: text.split(/\s+/).length,
121+
// Strategy 3: Gemini fallback (when OpenAI unavailable)
122+
if (url && !audioUrl && process.env.GEMINI_API_KEY) {
123+
try {
124+
const model = getGemini().getGenerativeModel({
125+
model: 'gemini-2.0-flash',
126+
generationConfig: { temperature: 0.2 },
91127
});
128+
129+
const result = await model.generateContent(
130+
`You are a video content transcription assistant. ` +
131+
`For the following YouTube video URL, provide a detailed transcript or content summary. ` +
132+
`Include all key points, technical details, quotes, and actionable insights. ` +
133+
`Be thorough and comprehensive.\n\nVideo URL: ${url}`
134+
);
135+
const text = result.response.text();
136+
137+
if (text.length > 100) {
138+
return NextResponse.json({
139+
success: true,
140+
transcript: text,
141+
source: 'gemini',
142+
wordCount: text.split(/\s+/).length,
143+
});
144+
}
145+
} catch (e) {
146+
console.warn('Gemini transcript fallback failed:', e);
92147
}
93148
}
94149

95-
// Strategy 3: Direct audio file transcription via OpenAI Whisper/STT
96-
if (audioUrl) {
150+
// Strategy 4: Direct audio file transcription via OpenAI Whisper
151+
if (audioUrl && process.env.OPENAI_API_KEY) {
97152
const audioResponse = await fetch(audioUrl);
98153
if (!audioResponse.ok) {
99154
return NextResponse.json(
@@ -105,7 +160,7 @@ Be thorough — capture all key points, quotes, and technical details.`,
105160
const audioBlob = await audioResponse.blob();
106161
const audioFile = new File([audioBlob], 'audio.mp3', { type: 'audio/mpeg' });
107162

108-
const transcription = await getClient().audio.transcriptions.create({
163+
const transcription = await getOpenAI().audio.transcriptions.create({
109164
model: 'gpt-4o-mini-transcribe',
110165
file: audioFile,
111166
language,
@@ -119,9 +174,13 @@ Be thorough — capture all key points, quotes, and technical details.`,
119174
});
120175
}
121176

177+
// No strategy succeeded
178+
const hasKeys = !!(process.env.OPENAI_API_KEY || process.env.GEMINI_API_KEY);
122179
return NextResponse.json({
123180
success: false,
124-
error: 'Could not transcribe video — YouTube API and OpenAI STT both failed',
181+
error: hasKeys
182+
? 'Could not transcribe video — all strategies failed'
183+
: 'No AI API key configured. Set OPENAI_API_KEY or GEMINI_API_KEY in Vercel environment variables.',
125184
transcript: '',
126185
});
127186
} catch (error) {
@@ -131,7 +190,7 @@ Be thorough — capture all key points, quotes, and technical details.`,
131190
return NextResponse.json({
132191
success: false,
133192
error: message.includes('API key')
134-
? 'OpenAI API key not configured. Set OPENAI_API_KEY in your environment.'
193+
? 'AI API key not configured. Set OPENAI_API_KEY or GEMINI_API_KEY.'
135194
: message,
136195
transcript: '',
137196
});

0 commit comments

Comments
 (0)