groupthinking · groupthinking · Feb 28, 2026 · Feb 28, 2026 · gemini-code-assist · Feb 28, 2026
@@ -1,5 +1,5 @@
 import OpenAI from 'openai';
-import { Type } from '@google/genai';
+
 import { NextResponse } from 'next/server';
 import { getGeminiClient, hasGeminiKey } from '@/lib/gemini-client';
 
@@ -49,43 +49,6 @@ const extractionSchema = {
   additionalProperties: false,
 };
 
-// Gemini responseSchema using @google/genai Type system
-const geminiResponseSchema = {
-  type: Type.OBJECT,
-  properties: {
-    events: {
-      type: Type.ARRAY,
-      items: {
-        type: Type.OBJECT,
-        properties: {
-          type: { type: Type.STRING, enum: ['action', 'topic', 'insight', 'tool', 'resource'] },
-          title: { type: Type.STRING },
-          description: { type: Type.STRING },
-          timestamp: { type: Type.STRING, nullable: true },
-          priority: { type: Type.STRING, enum: ['high', 'medium', 'low'] },
-        },
-        required: ['type', 'title', 'description', 'priority'],
-      },
-    },
-    actions: {
-      type: Type.ARRAY,
-      items: {
-        type: Type.OBJECT,
-        properties: {
-          title: { type: Type.STRING },
-          description: { type: Type.STRING },
-          category: { type: Type.STRING, enum: ['setup', 'build', 'deploy', 'learn', 'research', 'configure'] },
-          estimatedMinutes: { type: Type.NUMBER, nullable: true },
-        },
-        required: ['title', 'description', 'category'],
-      },
-    },
-    summary: { type: Type.STRING },
-    topics: { type: Type.ARRAY, items: { type: Type.STRING } },
-  },
-  required: ['events', 'actions', 'summary', 'topics'],
-};
-
 const SYSTEM_PROMPT = `You are an expert content analyst. Extract structured data from video transcripts.
 Be specific and practical — no vague or generic items.
 For events: classify type (action/topic/insight/tool/resource) and priority (high/medium/low).
@@ -132,13 +95,12 @@ async function extractWithGemini(trimmed: string, videoTitle?: string, videoUrl?
     contents: `${SYSTEM_PROMPT}\n\n${buildUserPrompt(trimmed, videoTitle, videoUrl)}`,
     config: {
       temperature: 0.3,
-      responseMimeType: 'application/json',
-      responseSchema: geminiResponseSchema,
       tools: [{ googleSearch: {} }],
     },
   });
-  const text = response.text ?? '';
-  return JSON.parse(text);
+  const text = (response.text ?? '').trim();
+  const cleaned = text.replace(/^```(?:json)?\s*\n?/i, '').replace(/\n?```\s*$/i, '');
+  return JSON.parse(cleaned);
 }
 
 export async function POST(request: Request) {
@@ -201,13 +163,12 @@ Respond with ONLY valid JSON matching this structure:
 }`,
           config: {
             temperature: 0.3,
-            responseMimeType: 'application/json',
-            responseSchema: geminiResponseSchema,
             tools: [{ googleSearch: {} }],
           },
         });
-        const text = response.text ?? '';
-        parsed = JSON.parse(text);
+        const text = (response.text ?? '').trim();
+        const cleaned = text.replace(/^```(?:json)?\s*\n?/i, '').replace(/\n?```\s*$/i, '');
+        parsed = JSON.parse(cleaned);
         provider = 'gemini-search';
       } catch (e) {
         console.warn('Gemini direct video extraction failed:', e);

@@ -3,26 +3,22 @@
  *
  * Supports two authentication modes:
  *   1. Gemini API: uses GEMINI_API_KEY or GOOGLE_API_KEY
- *   2. Vertex AI: uses Vertex_AI_API_KEY with project/location
- *      (Express Mode — API key instead of service account)
+ *   2. Vertex AI Express Mode: uses Vertex_AI_API_KEY
+ *      (apiKey + vertexai: true — no project/location needed)
  *
- * Env vars for Vertex AI:
- *   - Vertex_AI_API_KEY: the Vertex AI API key
- *   - GOOGLE_CLOUD_PROJECT: GCP project ID (default: uvai-730bb)
- *   - GOOGLE_CLOUD_LOCATION: GCP location (default: us-central1)
+ * See: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/start/express-mode/vertex-ai-express-mode-api-reference
- * See: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/start/express-mode/vertex-ai-express-mode-api-reference
+ * See: https://cloud.google.com/vertex-ai/generative-ai/docs/start/express-mode/vertex-ai-express-mode-api-reference
- * See: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/start/express-mode/vertex-ai-express-mode-api-reference
+ * See: https://cloud.google.com/vertex-ai/generative-ai/docs/start/express-mode/vertex-ai-express-mode-api-reference
  */
 
 import { GoogleGenAI } from '@google/genai';
 
 /**
  * Resolve the best available Google/Gemini API key.
- * Returns the first non-empty key found, or empty string.
  */
 export function resolveGeminiApiKey(): string {
   return (
+    process.env.Vertex_AI_API_KEY ||
     process.env.GEMINI_API_KEY ||
     process.env.GOOGLE_API_KEY ||
-    process.env.Vertex_AI_API_KEY ||
     ''
   );
 }
@@ -35,14 +31,10 @@ export function hasGeminiKey(): boolean {
 }
 
 /**
- * Determine if we should use Vertex AI mode.
- * True when the only available key is Vertex_AI_API_KEY,
- * or when GOOGLE_CLOUD_PROJECT is explicitly set.
+ * Determine if we should use Vertex AI Express Mode.
  */
 function shouldUseVertexAI(): boolean {
-  // If standard Gemini keys are set, use Gemini API
-  if (process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY) return false;
-  // If Vertex AI key is available, use Vertex AI Express Mode
+  if (process.env.GOOGLE_GENAI_USE_VERTEXAI === 'true') return true;
-  if (process.env.GOOGLE_GENAI_USE_VERTEXAI === 'true') return true;
+  const useVertexEnv = process.env.GOOGLE_GENAI_USE_VERTEXAI;
+  if (useVertexEnv && ['true', '1'].includes(useVertexEnv.trim().toLowerCase())) return true;
-  if (process.env.GOOGLE_GENAI_USE_VERTEXAI === 'true') return true;
+  const useVertexEnv = process.env.GOOGLE_GENAI_USE_VERTEXAI;
+  if (useVertexEnv && ['true', '1'].includes(useVertexEnv.trim().toLowerCase())) return true;
   if (process.env.Vertex_AI_API_KEY) return true;
   return false;
 }
@@ -52,7 +44,7 @@ let _lastKey = '';
 let _lastMode = '';
 
 /**
- * Get a shared GoogleGenAI instance, creating one if needed.
+ * Get a shared GoogleGenAI instance.
  * Automatically selects Gemini API or Vertex AI Express Mode.
  */
 export function getGeminiClient(): GoogleGenAI {
@@ -61,11 +53,10 @@ export function getGeminiClient(): GoogleGenAI {
 
   if (!_gemini || _lastKey !== key || _lastMode !== mode) {
     if (mode === 'vertex') {
+      // Vertex AI Express Mode: apiKey + vertexai only
       _gemini = new GoogleGenAI({
         vertexai: true,
         apiKey: key,
-        project: process.env.GOOGLE_CLOUD_PROJECT || 'uvai-730bb',
-        location: process.env.GOOGLE_CLOUD_LOCATION || 'us-central1',
       });
     } else {
       _gemini = new GoogleGenAI({ apiKey: key });

@@ -4,9 +4,11 @@
  * Uses the googleSearch tool as the PRIMARY mechanism to retrieve real-time
  * transcripts, descriptions, chapters, and metadata from YouTube videos.
  * Based on the UVAI PK=998 implementation pattern.
+ *
+ * NOTE: Vertex AI does NOT support responseSchema (controlled generation)
+ * combined with googleSearch tool. JSON structure is enforced via prompt.
  */
 
-import { Type } from '@google/genai';
 import { getGeminiClient } from './gemini-client';
 
 export interface VideoAnalysisResult {
@@ -31,83 +33,12 @@ export interface VideoAnalysisResult {
   ingestScript: string;
 }
 
-/**
- * Gemini response schema using the @google/genai Type system.
- * Matches the UVAI structured output requirements.
- */
-const responseSchema = {
-  type: Type.OBJECT,
-  properties: {
-    title: { type: Type.STRING },
-    summary: { type: Type.STRING },
-    transcript: {
-      type: Type.ARRAY,
-      items: {
-        type: Type.OBJECT,
-        properties: {
-          start: { type: Type.NUMBER, description: 'Seconds from video start' },
-          duration: { type: Type.NUMBER },
-          text: { type: Type.STRING },
-        },
-        required: ['start', 'duration', 'text'] as const,
-      },
-    },
-    events: {
-      type: Type.ARRAY,
-      items: {
-        type: Type.OBJECT,
-        properties: {
-          timestamp: { type: Type.NUMBER },
-          label: { type: Type.STRING },
-          description: { type: Type.STRING },
-          codeMapping: {
-            type: Type.STRING,
-            description: 'One-line code implementation of the action',
-          },
-          cloudService: { type: Type.STRING },
-        },
-        required: ['timestamp', 'label', 'description', 'codeMapping', 'cloudService'] as const,
-      },
-    },
-    actions: {
-      type: Type.ARRAY,
-      items: {
-        type: Type.OBJECT,
-        properties: {
-          title: { type: Type.STRING },
-          description: { type: Type.STRING },
-          category: {
-            type: Type.STRING,
-            enum: ['setup', 'build', 'deploy', 'learn', 'research', 'configure'],
-          },
-          estimatedMinutes: { type: Type.NUMBER, nullable: true },
-        },
-        required: ['title', 'description', 'category'] as const,
-      },
-    },
-    topics: { type: Type.ARRAY, items: { type: Type.STRING } },
-    architectureCode: { type: Type.STRING },
-    ingestScript: { type: Type.STRING },
-  },
-  required: [
-    'title',
-    'summary',
-    'transcript',
-    'events',
-    'actions',
-    'topics',
-    'architectureCode',
-    'ingestScript',
-  ] as const,
-};
-
 /**
  * Build the agentic system instruction for the Gemini model.
  * Implements the Think → Act → Observe → Map loop from PK=998.
  */
 function buildSystemInstruction(videoUrl: string): string {
-  return `
-You are the Agentic Video Intelligence Engine.
+  return `You are the Agentic Video Intelligence Engine.
 
 MISSION:
 1. WATCH the video at ${videoUrl} by searching for its transcript, technical documentation,
@@ -119,20 +50,6 @@ MISSION:
 4. OBSERVE & MAP: Extract specific "Action Events" from the video and provide a direct
    code mapping for each.
 
-DATA STRUCTURE REQUIREMENTS:
-- title: Accurate video title from search results.
-- summary: A high-level technical executive summary (2-3 sentences).
-- transcript: An array of {start, duration, text} reconstructed from grounding.
-  Use chapter timestamps and description content if a full transcript is unavailable.
-  Each entry should cover a meaningful segment (30-120 seconds).
-- events: 3-8 key technical milestones with timestamp, label, description, and codeMapping.
-- actions: 3-8 concrete tasks a developer/learner should DO after watching.
-- topics: Key topics and technologies covered.
-- architectureCode: A markdown-formatted architecture overview if technical content is discussed,
-  or empty string if not applicable.
-- ingestScript: A Python script that processes or replicates the video's key workflow,
-  or empty string if not applicable.
-
 IMPORTANT RULES:
 - Use your googleSearch tool to find the ACTUAL content. Search for the video URL,
   the video title, and related terms.
@@ -142,7 +59,24 @@ IMPORTANT RULES:
   chapters, comments, and related articles found via search.
 - NO MOCK DATA. Only use what is found via search grounding.
 - Be thorough — capture every key point, technical detail, and actionable insight.
-`;
+
+You MUST respond with ONLY valid JSON (no markdown fences, no extra text) matching this exact structure:
+{
+  "title": "Accurate video title",
+  "summary": "2-3 sentence technical executive summary",
+  "transcript": [
+    {"start": 0, "duration": 60, "text": "segment text covering 30-120 seconds each"}
+  ],
+  "events": [
+    {"timestamp": 0, "label": "Event Name", "description": "What happened", "codeMapping": "one-line code", "cloudService": "relevant service"}
+  ],
+  "actions": [
+    {"title": "Task title", "description": "What to do", "category": "setup|build|deploy|learn|research|configure", "estimatedMinutes": 15}
+  ],
+  "topics": ["topic1", "topic2"],
+  "architectureCode": "markdown architecture overview or empty string",
+  "ingestScript": "Python script or empty string"
+}`;
 }
 
 /**
@@ -161,13 +95,13 @@ export async function analyzeVideoWithGemini(
     contents: `Perform Agentic Grounding for Video: ${videoUrl}`,
     config: {
       systemInstruction,
-      responseMimeType: 'application/json',
-      responseSchema,
       tools: [{ googleSearch: {} }],
       temperature: 0.3,
     },
   });
 
-  const resultText = response.text || '{}';
-  return JSON.parse(resultText) as VideoAnalysisResult;
+  const resultText = (response.text || '').trim();
+  // Strip markdown code fences if present
+  const cleaned = resultText.replace(/^```(?:json)?\s*\n?/i, '').replace(/\n?```\s*$/i, '');
+  return JSON.parse(cleaned) as VideoAnalysisResult;
-  return JSON.parse(cleaned) as VideoAnalysisResult;
+  const safeJson = cleaned || '{}';
+  try {
+    return JSON.parse(safeJson) as VideoAnalysisResult;
+  } catch (error) {
+    throw new Error(
+      `Failed to parse Gemini response as JSON. Raw response:\n${resultText}`,
+    );
+  }
-  return JSON.parse(cleaned) as VideoAnalysisResult;
+  const safeJson = cleaned || '{}';
+  try {
+    return JSON.parse(safeJson) as VideoAnalysisResult;
+  } catch (error) {
+    throw new Error(
+      `Failed to parse Gemini response as JSON. Raw response:\n${resultText}`,
+    );
+  }
 }