diff --git a/app/api/manual-cache/route.ts b/app/api/manual-cache/route.ts
new file mode 100644
index 000000000..c9afeb89b
--- /dev/null
+++ b/app/api/manual-cache/route.ts
@@ -0,0 +1,34 @@
+import { NextResponse } from 'next/server';
+import fs from 'fs';
+import path from 'path';
+
+// Note: Storing in /tmp to work around serverless readonly filesystems
+// However, since serverless instances are ephemeral, ideally you'd use Redis or Vercel KV.
+// We are storing in /tmp and verifying hash structure to prevent Path Traversal.
+
+export async function POST(req: Request) {
+  try {
+    const { hash, response } = await req.json();
+
+    if (!hash || !response) {
+      return NextResponse.json({ error: 'Missing hash or response' }, { status: 400 });
+    }
+
+    // Validate hash to be strictly 32 alphanumeric hex chars to prevent path traversal
+    if (!/^[a-fA-F0-9]{32}$/.test(hash)) {
+      return NextResponse.json({ error: 'Invalid hash format' }, { status: 400 });
+    }
+
+    // Since serverless is readonly outside /tmp, use /tmp
+    const CACHE_DIR = path.join('/tmp', '.openmaic', 'manual_cache');
+    if (!fs.existsSync(CACHE_DIR)) {
+      fs.mkdirSync(CACHE_DIR, { recursive: true });
+    }
+
+    fs.writeFileSync(path.join(CACHE_DIR, `${hash}.json`), response, 'utf-8');
+
+    return NextResponse.json({ success: true });
+  } catch (error) {
+    return NextResponse.json({ error: error instanceof Error ? error.message : 'Unknown error' }, { status: 500 });
+  }
+}
diff --git a/app/api/web-search/route.ts b/app/api/web-search/route.ts
index 5a9708260..f96a225cd 100644
--- a/app/api/web-search/route.ts
+++ b/app/api/web-search/route.ts
@@ -8,6 +8,7 @@
 import { NextRequest } from 'next/server';
 import { callLLM } from '@/lib/ai/llm';
 import { searchWithTavily, formatSearchResultsAsContext } from '@/lib/web-search/tavily';
+import { searchWithSearXNG } from '@/lib/web-search/searxng';
 import { resolveWebSearchApiKey } from '@/lib/server/provider-config';
 import { createLogger } from '@/lib/logger';
 import { apiError, apiSuccess } from '@/lib/server/api-response';
@@ -28,10 +29,14 @@ export async function POST(req: NextRequest) {
       query: requestQuery,
       pdfText,
       apiKey: clientApiKey,
+      providerId,
+      baseUrl,
     } = body as {
       query?: string;
       pdfText?: string;
       apiKey?: string;
+      providerId?: string;
+      baseUrl?: string;
     };
     query = requestQuery;
 
@@ -39,13 +44,22 @@ export async function POST(req: NextRequest) {
       return apiError('MISSING_REQUIRED_FIELD', 400, 'query is required');
     }
 
-    const apiKey = resolveWebSearchApiKey(clientApiKey);
-    if (!apiKey) {
-      return apiError(
-        'MISSING_API_KEY',
-        400,
-        'Tavily API key is not configured. Set it in Settings → Web Search or set TAVILY_API_KEY env var.',
-      );
+    let result;
+    if (providerId === 'searxng') {
+      result = await searchWithSearXNG({
+        query: query.trim(),
+        baseUrl: baseUrl || process.env.SEARXNG_URL || 'http://127.0.0.1:8080/search'
+      });
+    } else {
+      const apiKey = resolveWebSearchApiKey(clientApiKey);
+      if (!apiKey) {
+        return apiError(
+          'MISSING_API_KEY',
+          400,
+          'Tavily API key is not configured. Set it in Settings → Web Search or set TAVILY_API_KEY env var.',
+        );
+      }
+      result = await searchWithTavily({ query: query.trim(), apiKey });
     }
 
     // Clamp rewrite input at the route boundary; framework body limits still apply to total request size.
diff --git a/components/generation/generating-progress.tsx b/components/generation/generating-progress.tsx
index 639e79d31..17ca7f256 100644
--- a/components/generation/generating-progress.tsx
+++ b/components/generation/generating-progress.tsx
@@ -2,8 +2,11 @@
 
 import { useEffect, useState } from 'react';
 import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
-import { Loader2, CheckCircle2, XCircle, Circle } from 'lucide-react';
+import { Loader2, CheckCircle2, XCircle, Circle, Copy, Play } from 'lucide-react';
 import { useI18n } from '@/lib/hooks/use-i18n';
+import { Button } from '@/components/ui/button';
+import { Textarea } from '@/components/ui/textarea';
+import { toast } from 'sonner';
 
 interface GeneratingProgressProps {
   outlineReady: boolean; // Is outline generation complete?
@@ -62,6 +65,14 @@ export function GeneratingProgress({
 }: GeneratingProgressProps) {
   const { t } = useI18n();
   const [dots, setDots] = useState('');
+  const [manualResponse, setManualResponse] = useState('');
+  const [isSubmitting, setIsSubmitting] = useState(false);
+
+  // Extract hash and prompt
+  const isManualIntervention = error?.startsWith('MANUAL_INTERVENTION_REQUIRED|||');
+  const errorParts = isManualIntervention ? error?.split('|||') : [];
+  const promptHash = (errorParts && errorParts[1]) || '';
+  const manualPromptText = (errorParts && errorParts[2]) || '';
 
   // Animated dots for loading state
   useEffect(() => {
@@ -73,12 +84,41 @@ export function GeneratingProgress({
     }
   }, [error, firstPageReady]);
 
+  const handleCopyPrompt = () => {
+    if (manualPromptText) {
+      navigator.clipboard.writeText(manualPromptText);
+      toast.success("Prompt copied to clipboard");
+    }
+  };
+
+  const handleSubmitManualResponse = async () => {
+    setIsSubmitting(true);
+    try {
+      await fetch('/api/manual-cache', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ hash: promptHash, response: manualResponse })
+      });
+
+      // Reload the page. The user will click "Generate" again,
+      // but the backend will instantly skip the step using the cache!
+      toast.success("Saved! Please restart the generation.");
+      window.location.reload();
+    } catch (_e) {
+      toast.error("Failed to save response.");
+    } finally {
+      setIsSubmitting(false);
+    }
+  };
+
   return (
     <div className="space-y-6">
       <Card>
         <CardHeader>
           <CardTitle className="flex items-center gap-2">
-            {error ? (
+            {isManualIntervention ? (
+               <><XCircle className="size-5 text-amber-500" /> Action Required: Gemini Blocked Output</>
+            ) : error ? (
               <>
                 <XCircle className="size-5 text-destructive" />
                 {t('generation.generationFailed')}
@@ -98,40 +138,67 @@ export function GeneratingProgress({
           </CardTitle>
         </CardHeader>
         <CardContent className="space-y-4">
-          {/* Two milestone status items */}
-          <div className="divide-y">
-            <StatusItem
-              completed={outlineReady}
-              inProgress={!outlineReady && !error}
-              hasError={!outlineReady && !!error}
-              label={
-                outlineReady ? t('generation.outlineReady') : t('generation.generatingOutlines')
-              }
-            />
-            <StatusItem
-              completed={firstPageReady}
-              inProgress={outlineReady && !firstPageReady && !error}
-              hasError={outlineReady && !firstPageReady && !!error}
-              label={
-                firstPageReady
-                  ? t('generation.firstPageReady')
-                  : t('generation.generatingFirstPage')
-              }
-            />
-          </div>
+          {isManualIntervention ? (
+             <div className="space-y-4 bg-amber-500/10 border border-amber-500/20 p-4 rounded-lg">
+                <p className="text-sm text-amber-700 dark:text-amber-400">
+                  The API blocked this specific prompt. Copy the text, paste it into the Gemini Web App, and paste the JSON result here.
+                </p>
+                <div className="relative">
+                  <Textarea value={manualPromptText} readOnly className="h-48 text-xs font-mono bg-background/50" />
+                  <Button size="sm" variant="secondary" className="absolute top-2 right-2" onClick={handleCopyPrompt}>
+                    <Copy className="size-4 mr-2" /> Copy Prompt
+                  </Button>
+                </div>
+                <div className="pt-4 border-t border-amber-500/20">
+                  <Textarea
+                    placeholder="Paste the JSON response from Gemini here..."
+                    value={manualResponse}
+                    onChange={(e) => setManualResponse(e.target.value)}
+                    className="h-32 text-xs font-mono"
+                  />
+                  <Button className="mt-2 w-full" disabled={!manualResponse || isSubmitting} onClick={handleSubmitManualResponse}>
+                    <Play className="size-4 mr-2" /> Inject & Restart Generation
+                  </Button>
+                </div>
+             </div>
+          ) : (
+            <>
+              {/* Two milestone status items */}
+              <div className="divide-y">
+                <StatusItem
+                  completed={outlineReady}
+                  inProgress={!outlineReady && !error}
+                  hasError={!outlineReady && !!error}
+                  label={
+                    outlineReady ? t('generation.outlineReady') : t('generation.generatingOutlines')
+                  }
+                />
+                <StatusItem
+                  completed={firstPageReady}
+                  inProgress={outlineReady && !firstPageReady && !error}
+                  hasError={outlineReady && !firstPageReady && !!error}
+                  label={
+                    firstPageReady
+                      ? t('generation.firstPageReady')
+                      : t('generation.generatingFirstPage')
+                  }
+                />
+              </div>
 
-          {/* Status message */}
-          {statusMessage && !error && (
-            <div className="pt-2 border-t">
-              <p className="text-sm text-muted-foreground">{statusMessage}</p>
-            </div>
-          )}
+              {/* Status message */}
+              {statusMessage && !error && (
+                <div className="pt-2 border-t">
+                  <p className="text-sm text-muted-foreground">{statusMessage}</p>
+                </div>
+              )}
 
-          {/* Error message */}
-          {error && (
-            <div className="p-4 bg-destructive/10 border border-destructive/20 rounded-lg">
-              <p className="text-sm text-destructive">{error}</p>
-            </div>
+              {/* Error message */}
+              {error && (
+                <div className="p-4 bg-destructive/10 border border-destructive/20 rounded-lg">
+                  <p className="text-sm text-destructive">{error}</p>
+                </div>
+              )}
+            </>
           )}
         </CardContent>
       </Card>
diff --git a/lib/ai/llm.ts b/lib/ai/llm.ts
index 8ce5b87f8..eb1e2bf23 100644
--- a/lib/ai/llm.ts
+++ b/lib/ai/llm.ts
@@ -8,6 +8,29 @@ import { generateText, streamText } from 'ai';
 import type { GenerateTextResult, StreamTextResult } from 'ai';
 import { createLogger } from '@/lib/logger';
 import { PROVIDERS } from './providers';
+import crypto from 'crypto';
+import fs from 'fs';
+import path from 'path';
+
+// --- Add these helpers at the top ---
+const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
+
+function getPromptHash(params: Record<string, unknown>): string {
+  const data = JSON.stringify({ system: params.system, prompt: params.prompt, messages: params.messages });
+  return crypto.createHash('md5').update(data).digest('hex');
+}
+
+// Temporary cache dir for manual overrides, use /tmp for serverless
+const CACHE_DIR = path.join('/tmp', '.openmaic', 'manual_cache');
+if (!fs.existsSync(CACHE_DIR)) fs.mkdirSync(CACHE_DIR, { recursive: true });
+
+function getManualCache(hash: string): string | null {
+  if (!/^[a-fA-F0-9]{32}$/.test(hash)) return null;
+  const filePath = path.join(CACHE_DIR, `${hash}.json`);
+  if (fs.existsSync(filePath)) return fs.readFileSync(filePath, 'utf-8');
+  return null;
+}
+// -------------------------------------
 import { thinkingContext } from './thinking-context';
 import type { ProviderType, ThinkingCapability, ThinkingConfig } from '@/lib/types/provider';
 const log = createLogger('LLM');
@@ -292,6 +315,15 @@ export async function callLLM<T extends GenerateTextParams>(
   const maxAttempts = (retryOptions?.retries ?? 0) + 1;
   const validate = retryOptions?.validate ?? (maxAttempts > 1 ? DEFAULT_VALIDATE : undefined);
 
+  // 0. CACHE INTERCEPTION: Check if the user manually provided an answer for this prompt
+  const promptHash = getPromptHash(params as Record<string, unknown>);
+  const cachedResponse = getManualCache(promptHash);
+  if (cachedResponse) {
+    log.info(`[${source}] 🚀 Using manual cached response for hash: ${promptHash}`);
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    return { text: cachedResponse } as unknown as GenerateTextResult<any, any>; // Mock the AI SDK response object
+  }
+
   // eslint-disable-next-line @typescript-eslint/no-explicit-any
   let lastResult: GenerateTextResult<any, any> | undefined;
   let lastError: unknown;
@@ -319,8 +351,45 @@ export async function callLLM<T extends GenerateTextParams>(
       }
 
       return result;
-    } catch (error) {
+    } catch (error: unknown) {
       lastError = error;
+      const err = error as Record<string, unknown>;
+
+      // 1. RATE LIMIT PAUSING
+      if (err?.statusCode === 429 || (typeof err?.message === 'string' && (err.message.includes('429') || err.message.includes('Too Many Requests')))) {
+        log.warn(`[${source}] Rate limit hit. Pausing 20s...`);
+        await sleep(20000);
+        continue;
+      }
+
+      // 2. MANUAL FALLBACK TRIGGER
+      const isUnsupported = typeof err?.message === 'string' && (err.message.includes('unsupported') || err.message.includes('schema'));
+      const isSafety = typeof err?.message === 'string' && (err.message.includes('safety') || err.message.includes('SAFETY'));
+
+      if (isUnsupported || isSafety) {
+        let promptText = "";
+        const p = params as Record<string, unknown>;
+        if (p.system) promptText += `[SYSTEM]\n${p.system}\n\n`;
+        if (p.prompt) promptText += `[USER]\n${p.prompt}\n\n`;
+        if (p.messages && Array.isArray(p.messages)) {
+          promptText += p.messages.map((m: Record<string, unknown>) => {
+            let contentStr = "";
+            if (typeof m.content === 'string') {
+              contentStr = m.content;
+            } else if (Array.isArray(m.content)) {
+              contentStr = m.content.map((part: Record<string, unknown>) => {
+                if (part.type === 'text') return part.text;
+                if (part.type === 'image') return `\n[⚠️ ACTION REQUIRED: Drag and drop the original image/PDF into the Gemini chat here] \n`;
+                return JSON.stringify(part);
+              }).join('\n');
+            }
+            return `[${(m.role || 'USER').toString().toUpperCase()}]:\n${contentStr}`;
+          }).join('\n\n');
+        }
+
+        // Pass the Hash along with the error
+        throw new Error(`MANUAL_INTERVENTION_REQUIRED|||${promptHash}|||${promptText}`);
+      }
 
       if (attempt < maxAttempts) {
         log.warn(`[${source}] Call failed (attempt ${attempt}/${maxAttempts}), retrying...`, error);
diff --git a/lib/pdf/constants.ts b/lib/pdf/constants.ts
index 93a2ef387..fa18eb9f8 100644
--- a/lib/pdf/constants.ts
+++ b/lib/pdf/constants.ts
@@ -24,6 +24,12 @@ export const PDF_PROVIDERS: Record<PDFProviderId, PDFProviderConfig> = {
     icon: '/logos/mineru.png',
     features: ['text', 'images', 'tables', 'formulas', 'layout-analysis'],
   },
+  local_vision: {
+    id: 'local_vision',
+    name: 'Local Vision (Qwen2-VL/Llama-3.2-Vision)',
+    requiresApiKey: false,
+    features: ['text', 'images', 'ocr', 'layout-analysis'],
+  },
 };
 
 /**
diff --git a/lib/pdf/pdf-providers.ts b/lib/pdf/pdf-providers.ts
index edfaea06e..0c1e627de 100644
--- a/lib/pdf/pdf-providers.ts
+++ b/lib/pdf/pdf-providers.ts
@@ -176,6 +176,10 @@ export async function parsePDF(
       result = await parseWithMinerU(config, pdfBuffer);
       break;
 
+    case 'local_vision':
+      result = await parseWithLocalVision(config, pdfBuffer);
+      break;
+
     default:
       throw new Error(`Unsupported PDF provider: ${config.providerId}`);
   }
@@ -461,3 +465,69 @@ export async function getCurrentPDFConfig(): Promise<PDFParserConfig> {
 
 // Re-export from constants for convenience
 export { getAllPDFProviders, getPDFProvider } from './constants';
+
+/**
+ * Local Vision API implementation
+ *
+ * Uses a local OpenAI-compatible endpoint (like vLLM or Ollama running Qwen2-VL)
+ * to perform OCR and layout analysis on PDF pages.
+ */
+async function parseWithLocalVision(
+  config: PDFParserConfig,
+  pdfBuffer: Buffer
+): Promise<ParsedPdfContent> {
+  const { getDocumentProxy, renderPageAsImage } = await import('unpdf');
+  const pdf = await getDocumentProxy(new Uint8Array(pdfBuffer));
+  const numPages = pdf.numPages;
+
+  let fullText = '';
+  const allImages: string[] = [];
+  const baseUrl = config.baseUrl || 'http://127.0.0.1:11434/v1';
+
+  for (let i = 1; i <= numPages; i++) {
+    // page is intentionally unused if only OCR is used
+    await pdf.getPage(i);
+    const imageArrayBuffer = await renderPageAsImage(new Uint8Array(pdfBuffer), i, { scale: 2 });
+    const base64Image = Buffer.from(imageArrayBuffer).toString('base64');
+    const imageUrl = `data:image/png;base64,${base64Image}`;
+
+    const payload = {
+      model: "qwen2-vl",
+      messages: [
+        {
+          role: "user",
+          content: [
+            { type: "text", text: "Transcribe the text in this document image accurately. Preserve the layout, headings, paragraphs, and list structures using Markdown. If there are tables or formulas, transcribe them into Markdown tables or LaTeX blocks respectively." },
+            { type: "image_url", image_url: { url: imageUrl } }
+          ]
+        }
+      ]
+    };
+
+    const response = await fetch(`${baseUrl}/chat/completions`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify(payload)
+    });
+
+    if (!response.ok) {
+      throw new Error(`Local Vision OCR error: ${response.statusText}`);
+    }
+
+    const data = await response.json();
+    const pageText = data.choices?.[0]?.message?.content || '';
+    fullText += `\n\n--- Page ${i} ---\n\n${pageText}`;
+
+    // Optionally extract native images from the page using unpdf alongside the OCR
+    // ...
+  }
+
+  return {
+    text: fullText.trim(),
+    images: allImages,
+    metadata: {
+      pageCount: numPages,
+      parser: 'local_vision'
+    }
+  };
+}
diff --git a/lib/pdf/types.ts b/lib/pdf/types.ts
index 8173daedc..d4bf3bbea 100644
--- a/lib/pdf/types.ts
+++ b/lib/pdf/types.ts
@@ -5,7 +5,7 @@
 /**
  * PDF Provider IDs
  */
-export type PDFProviderId = 'unpdf' | 'mineru';
+export type PDFProviderId = 'unpdf' | 'mineru' | 'local_vision';
 
 /**
  * PDF Provider Configuration
diff --git a/lib/server/resolve-model.ts b/lib/server/resolve-model.ts
index b433b7782..d6708361b 100644
--- a/lib/server/resolve-model.ts
+++ b/lib/server/resolve-model.ts
@@ -19,6 +19,9 @@ export interface ResolvedModel extends ModelWithInfo {
   apiKey: string;
 }
 
+// Global state to track round-robin indexes per provider across warm serverless requests
+const roundRobinIndexMap = new Map<string, number>();
+
 /**
  * Resolve a language model from explicit parameters.
  *
@@ -44,9 +47,21 @@ export async function resolveModel(params: {
     }
   }
 
-  const apiKey = clientBaseUrl
+  let apiKey = clientBaseUrl
     ? params.apiKey || ''
     : resolveApiKey(providerId, params.apiKey || '');
+
+  // --- MULTI-KEY ROUND ROBIN INJECTION ---
+  if (apiKey.includes(',')) {
+    const keys = apiKey.split(',').map(k => k.trim()).filter(Boolean);
+    if (keys.length > 0) {
+      const currentIndex = roundRobinIndexMap.get(providerId) || 0;
+      apiKey = keys[currentIndex % keys.length];
+      roundRobinIndexMap.set(providerId, currentIndex + 1);
+    }
+  }
+  // ---------------------------------------
+
   const baseUrl = clientBaseUrl ? clientBaseUrl : resolveBaseUrl(providerId, params.baseUrl);
   const proxy = resolveProxy(providerId);
   const { model, modelInfo } = getModel({
diff --git a/lib/web-search/searxng.ts b/lib/web-search/searxng.ts
new file mode 100644
index 000000000..055112e68
--- /dev/null
+++ b/lib/web-search/searxng.ts
@@ -0,0 +1,43 @@
+import type { WebSearchResult, WebSearchSource } from '@/lib/types/web-search';
+
+const SEARXNG_MAX_QUERY_LENGTH = 400;
+
+/**
+ * Search the web using SearXNG API and return structured results.
+ */
+export async function searchWithSearXNG(params: {
+  query: string;
+  baseUrl: string;
+  maxResults?: number;
+}): Promise<WebSearchResult> {
+  const { query, baseUrl, maxResults = 5 } = params;
+  const truncatedQuery = query.slice(0, SEARXNG_MAX_QUERY_LENGTH);
+
+  const url = new URL(baseUrl);
+  url.searchParams.append('q', truncatedQuery);
+  url.searchParams.append('format', 'json');
+  url.searchParams.append('language', 'en');
+
+  const response = await fetch(url.toString());
+  if (!response.ok) throw new Error(`SearXNG error: ${response.status}`);
+
+  const data = await response.json();
+  const startTime = Date.now();
+
+  const sources: WebSearchSource[] = data.results.slice(0, maxResults).map((result: Record<string, string | number>) => ({
+    title: String(result.title || ''),
+    url: String(result.url || ''),
+    content: String(result.content || ''),
+    score: typeof result.score === 'number' ? result.score : 1,
+  }));
+
+  const answer = data.answers && data.answers.length > 0 ? data.answers[0] : '';
+  const responseTime = (Date.now() - startTime) / 1000;
+
+  return {
+    answer,
+    sources,
+    query: data.query,
+    responseTime,
+  };
+}