11import OpenAI from 'openai' ;
2+ import { GoogleGenerativeAI } from '@google/generative-ai' ;
23import { NextResponse } from 'next/server' ;
34
4- let _client : OpenAI | null = null ;
5- function getClient ( ) {
6- if ( ! _client ) _client = new OpenAI ( ) ;
7- return _client ;
5+ let _openai : OpenAI | null = null ;
6+ function getOpenAI ( ) {
7+ if ( ! _openai ) _openai = new OpenAI ( ) ;
8+ return _openai ;
89}
10+
11+ let _gemini : GoogleGenerativeAI | null = null ;
12+ function getGemini ( ) {
13+ if ( ! _gemini ) _gemini = new GoogleGenerativeAI ( process . env . GEMINI_API_KEY || '' ) ;
14+ return _gemini ;
15+ }
16+
917const BACKEND_URL = process . env . BACKEND_URL || 'http://localhost:8000' ;
1018
1119/**
12- * OpenAI STT fallback — used when YouTube's auto-caption API fails or
13- * returns low-quality transcripts. Uses gpt-4o-mini-transcribe for
14- * cost-effective, high-quality transcription.
15- *
1620 * POST /api/transcribe
17- * { url: string } — YouTube URL (tries YouTube API first, falls back to STT)
18- * { audioUrl: string } — Direct audio URL (goes straight to STT)
1921 *
20- * Returns { success, transcript, source: 'youtube' | 'openai-stt' }
22+ * Multi-strategy transcript extraction:
23+ * 1. YouTube captions via backend (fast + free)
24+ * 2. OpenAI Responses API with web_search (finds transcripts online)
25+ * 3. Gemini fallback (if OpenAI unavailable)
26+ * 4. Direct audio STT via OpenAI Whisper
2127 */
2228export async function POST ( request : Request ) {
2329 try {
@@ -30,18 +36,24 @@ export async function POST(request: Request) {
3036 ) ;
3137 }
3238
33- // Strategy 1: Try YouTube transcript API via backend first (fast + free)
39+ // Strategy 1: Try YouTube transcript API via backend (fast + free)
3440 if ( url && ! audioUrl ) {
3541 try {
42+ const controller = new AbortController ( ) ;
43+ const timeout = setTimeout ( ( ) => controller . abort ( ) , 8_000 ) ;
44+
3645 const ytResponse = await fetch ( `${ BACKEND_URL } /api/v1/transcript-action` , {
3746 method : 'POST' ,
3847 headers : { 'Content-Type' : 'application/json' } ,
3948 body : JSON . stringify ( { video_url : url , language } ) ,
40- } ) ;
49+ signal : controller . signal ,
50+ } ) . finally ( ( ) => clearTimeout ( timeout ) ) ;
4151
4252 if ( ytResponse . ok ) {
4353 const result = await ytResponse . json ( ) ;
44- const segments = result . transcript || [ ] ;
54+
55+ // Handle transcript as segments array
56+ const segments = Array . isArray ( result . transcript ) ? result . transcript : [ ] ;
4557 if ( segments . length > 0 ) {
4658 const fullText = segments
4759 . map ( ( s : { text ?: string } ) => s . text || '' )
@@ -58,42 +70,85 @@ export async function POST(request: Request) {
5870 } ) ;
5971 }
6072 }
73+
74+ // Handle transcript as { text: string }
75+ const transcriptText =
76+ typeof result . transcript === 'string'
77+ ? result . transcript
78+ : result . transcript ?. text ;
79+ if ( typeof transcriptText === 'string' && transcriptText . length > 50 ) {
80+ return NextResponse . json ( {
81+ success : true ,
82+ transcript : transcriptText ,
83+ source : 'youtube' ,
84+ wordCount : transcriptText . split ( / \s + / ) . length ,
85+ } ) ;
86+ }
6187 }
6288 } catch {
63- // YouTube API failed — fall through to OpenAI STT
64- console . log ( 'YouTube transcript unavailable, falling back to OpenAI STT' ) ;
89+ console . log ( 'YouTube transcript unavailable, falling back to AI providers' ) ;
6590 }
6691 }
6792
68- // Strategy 2: OpenAI Speech-to-Text via Responses API
69- // For YouTube URLs without direct audio, use the Responses API with
70- // web_search to find and analyze the content
71- if ( url && ! audioUrl ) {
72- // Use Responses API to transcribe/summarize the video content
73- const response = await getClient ( ) . responses . create ( {
74- model : 'gpt-4o-mini' ,
75- instructions : `You are a video content transcription assistant.
93+ // Strategy 2: OpenAI Responses API with web_search
94+ if ( url && ! audioUrl && process . env . OPENAI_API_KEY ) {
95+ try {
96+ const response = await getOpenAI ( ) . responses . create ( {
97+ model : 'gpt-4o-mini' ,
98+ instructions : `You are a video content transcription assistant.
7699Given a YouTube URL, use web search to find the video's transcript or detailed content.
77100Return the full transcript text if available, or a detailed content summary.
78101Be thorough — capture all key points, quotes, and technical details.` ,
79- tools : [ { type : 'web_search' as const } ] ,
80- input : `Find and return the full transcript or detailed content of this video: ${ url } ` ,
81- } ) ;
102+ tools : [ { type : 'web_search' as const } ] ,
103+ input : `Find and return the full transcript or detailed content of this video: ${ url } ` ,
104+ } ) ;
105+
106+ const text = response . output_text || '' ;
82107
83- const text = response . output_text || '' ;
108+ if ( text . length > 100 ) {
109+ return NextResponse . json ( {
110+ success : true ,
111+ transcript : text ,
112+ source : 'openai-web-search' ,
113+ wordCount : text . split ( / \s + / ) . length ,
114+ } ) ;
115+ }
116+ } catch ( e ) {
117+ console . warn ( 'OpenAI web_search transcript failed:' , e ) ;
118+ }
119+ }
84120
85- if ( text . length > 100 ) {
86- return NextResponse . json ( {
87- success : true ,
88- transcript : text ,
89- source : 'openai-web-search ' ,
90- wordCount : text . split ( / \s + / ) . length ,
121+ // Strategy 3: Gemini fallback (when OpenAI unavailable)
122+ if ( url && ! audioUrl && process . env . GEMINI_API_KEY ) {
123+ try {
124+ const model = getGemini ( ) . getGenerativeModel ( {
125+ model : 'gemini-2.0-flash ' ,
126+ generationConfig : { temperature : 0.2 } ,
91127 } ) ;
128+
129+ const result = await model . generateContent (
130+ `You are a video content transcription assistant. ` +
131+ `For the following YouTube video URL, provide a detailed transcript or content summary. ` +
132+ `Include all key points, technical details, quotes, and actionable insights. ` +
133+ `Be thorough and comprehensive.\n\nVideo URL: ${ url } `
134+ ) ;
135+ const text = result . response . text ( ) ;
136+
137+ if ( text . length > 100 ) {
138+ return NextResponse . json ( {
139+ success : true ,
140+ transcript : text ,
141+ source : 'gemini' ,
142+ wordCount : text . split ( / \s + / ) . length ,
143+ } ) ;
144+ }
145+ } catch ( e ) {
146+ console . warn ( 'Gemini transcript fallback failed:' , e ) ;
92147 }
93148 }
94149
95- // Strategy 3 : Direct audio file transcription via OpenAI Whisper/STT
96- if ( audioUrl ) {
150+ // Strategy 4 : Direct audio file transcription via OpenAI Whisper
151+ if ( audioUrl && process . env . OPENAI_API_KEY ) {
97152 const audioResponse = await fetch ( audioUrl ) ;
98153 if ( ! audioResponse . ok ) {
99154 return NextResponse . json (
@@ -105,7 +160,7 @@ Be thorough — capture all key points, quotes, and technical details.`,
105160 const audioBlob = await audioResponse . blob ( ) ;
106161 const audioFile = new File ( [ audioBlob ] , 'audio.mp3' , { type : 'audio/mpeg' } ) ;
107162
108- const transcription = await getClient ( ) . audio . transcriptions . create ( {
163+ const transcription = await getOpenAI ( ) . audio . transcriptions . create ( {
109164 model : 'gpt-4o-mini-transcribe' ,
110165 file : audioFile ,
111166 language,
@@ -119,9 +174,13 @@ Be thorough — capture all key points, quotes, and technical details.`,
119174 } ) ;
120175 }
121176
177+ // No strategy succeeded
178+ const hasKeys = ! ! ( process . env . OPENAI_API_KEY || process . env . GEMINI_API_KEY ) ;
122179 return NextResponse . json ( {
123180 success : false ,
124- error : 'Could not transcribe video — YouTube API and OpenAI STT both failed' ,
181+ error : hasKeys
182+ ? 'Could not transcribe video — all strategies failed'
183+ : 'No AI API key configured. Set OPENAI_API_KEY or GEMINI_API_KEY in Vercel environment variables.' ,
125184 transcript : '' ,
126185 } ) ;
127186 } catch ( error ) {
@@ -131,7 +190,7 @@ Be thorough — capture all key points, quotes, and technical details.`,
131190 return NextResponse . json ( {
132191 success : false ,
133192 error : message . includes ( 'API key' )
134- ? 'OpenAI API key not configured. Set OPENAI_API_KEY in your environment .'
193+ ? 'AI API key not configured. Set OPENAI_API_KEY or GEMINI_API_KEY .'
135194 : message ,
136195 transcript : '' ,
137196 } ) ;
0 commit comments