From 9ce0d76f4c48f5ebf0e56190db6a2cd9fb197c24 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
<161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Tue, 14 Apr 2026 08:59:49 +0000
Subject: [PATCH 1/2] chore: save uncommitted diff patch for future rebase
- Creates `all_changes_patch.diff` with currently staged uncommitted modifications.
Co-authored-by: shervinemp <18602406+shervinemp@users.noreply.github.com>
---
all_changes_patch.diff | 570 ++++++++++++++++++
app/api/manual-cache/route.ts | 34 ++
app/api/web-search/route.ts | 29 +-
components/generation/generating-progress.tsx | 135 +++--
lib/ai/llm.ts | 71 ++-
lib/pdf/constants.ts | 6 +
lib/pdf/pdf-providers.ts | 70 +++
lib/pdf/types.ts | 2 +-
lib/server/resolve-model.ts | 17 +-
lib/web-search/searxng.ts | 43 ++
10 files changed, 931 insertions(+), 46 deletions(-)
create mode 100644 all_changes_patch.diff
create mode 100644 app/api/manual-cache/route.ts
create mode 100644 lib/web-search/searxng.ts
diff --git a/all_changes_patch.diff b/all_changes_patch.diff
new file mode 100644
index 000000000..a4a6bbf3c
--- /dev/null
+++ b/all_changes_patch.diff
@@ -0,0 +1,570 @@
+diff --git a/app/api/manual-cache/route.ts b/app/api/manual-cache/route.ts
+new file mode 100644
+index 0000000..c9afeb8
+--- /dev/null
++++ b/app/api/manual-cache/route.ts
+@@ -0,0 +1,34 @@
++import { NextResponse } from 'next/server';
++import fs from 'fs';
++import path from 'path';
++
++// Note: Storing in /tmp to work around serverless readonly filesystems
++// However, since serverless instances are ephemeral, ideally you'd use Redis or Vercel KV.
++// We are storing in /tmp and verifying hash structure to prevent Path Traversal.
++
++export async function POST(req: Request) {
++ try {
++ const { hash, response } = await req.json();
++
++ if (!hash || !response) {
++ return NextResponse.json({ error: 'Missing hash or response' }, { status: 400 });
++ }
++
++ // Validate hash to be strictly 32 alphanumeric hex chars to prevent path traversal
++ if (!/^[a-fA-F0-9]{32}$/.test(hash)) {
++ return NextResponse.json({ error: 'Invalid hash format' }, { status: 400 });
++ }
++
++ // Since serverless is readonly outside /tmp, use /tmp
++ const CACHE_DIR = path.join('/tmp', '.openmaic', 'manual_cache');
++ if (!fs.existsSync(CACHE_DIR)) {
++ fs.mkdirSync(CACHE_DIR, { recursive: true });
++ }
++
++ fs.writeFileSync(path.join(CACHE_DIR, `${hash}.json`), response, 'utf-8');
++
++ return NextResponse.json({ success: true });
++ } catch (error) {
++ return NextResponse.json({ error: error instanceof Error ? error.message : 'Unknown error' }, { status: 500 });
++ }
++}
+diff --git a/app/api/web-search/route.ts b/app/api/web-search/route.ts
+index f2ff627..b16d489 100644
+--- a/app/api/web-search/route.ts
++++ b/app/api/web-search/route.ts
+@@ -6,6 +6,7 @@
+ */
+
+ import { searchWithTavily, formatSearchResultsAsContext } from '@/lib/web-search/tavily';
++import { searchWithSearXNG } from '@/lib/web-search/searxng';
+ import { resolveWebSearchApiKey } from '@/lib/server/provider-config';
+ import { createLogger } from '@/lib/logger';
+ import { apiError, apiSuccess } from '@/lib/server/api-response';
+@@ -15,25 +16,35 @@ const log = createLogger('WebSearch');
+ export async function POST(req: Request) {
+ try {
+ const body = await req.json();
+- const { query, apiKey: clientApiKey } = body as {
++ const { query, apiKey: clientApiKey, providerId, baseUrl } = body as {
+ query?: string;
+ apiKey?: string;
++ providerId?: string;
++ baseUrl?: string;
+ };
+
+ if (!query || !query.trim()) {
+ return apiError('MISSING_REQUIRED_FIELD', 400, 'query is required');
+ }
+
+- const apiKey = resolveWebSearchApiKey(clientApiKey);
+- if (!apiKey) {
+- return apiError(
+- 'MISSING_API_KEY',
+- 400,
+- 'Tavily API key is not configured. Set it in Settings → Web Search or set TAVILY_API_KEY env var.',
+- );
++ let result;
++ if (providerId === 'searxng') {
++ result = await searchWithSearXNG({
++ query: query.trim(),
++ baseUrl: baseUrl || process.env.SEARXNG_URL || 'http://127.0.0.1:8080/search'
++ });
++ } else {
++ const apiKey = resolveWebSearchApiKey(clientApiKey);
++ if (!apiKey) {
++ return apiError(
++ 'MISSING_API_KEY',
++ 400,
++ 'Tavily API key is not configured. Set it in Settings → Web Search or set TAVILY_API_KEY env var.',
++ );
++ }
++ result = await searchWithTavily({ query: query.trim(), apiKey });
+ }
+
+- const result = await searchWithTavily({ query: query.trim(), apiKey });
+ const context = formatSearchResultsAsContext(result);
+
+ return apiSuccess({
+diff --git a/components/generation/generating-progress.tsx b/components/generation/generating-progress.tsx
+index 639e79d..f76df6a 100644
+--- a/components/generation/generating-progress.tsx
++++ b/components/generation/generating-progress.tsx
+@@ -2,8 +2,11 @@
+
+ import { useEffect, useState } from 'react';
+ import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
+-import { Loader2, CheckCircle2, XCircle, Circle } from 'lucide-react';
++import { Loader2, CheckCircle2, XCircle, Circle, Copy, Play } from 'lucide-react';
+ import { useI18n } from '@/lib/hooks/use-i18n';
++import { Button } from '@/components/ui/button';
++import { Textarea } from '@/components/ui/textarea';
++import { toast } from 'sonner';
+
+ interface GeneratingProgressProps {
+ outlineReady: boolean; // Is outline generation complete?
+@@ -62,6 +65,14 @@ export function GeneratingProgress({
+ }: GeneratingProgressProps) {
+ const { t } = useI18n();
+ const [dots, setDots] = useState('');
++ const [manualResponse, setManualResponse] = useState('');
++ const [isSubmitting, setIsSubmitting] = useState(false);
++
++ // Extract hash and prompt
++ const isManualIntervention = error?.startsWith('MANUAL_INTERVENTION_REQUIRED|||');
++ const errorParts = isManualIntervention ? error?.split('|||') : [];
++ const promptHash = (errorParts && errorParts[1]) || '';
++ const manualPromptText = (errorParts && errorParts[2]) || '';
+
+ // Animated dots for loading state
+ useEffect(() => {
+@@ -73,12 +84,41 @@ export function GeneratingProgress({
+ }
+ }, [error, firstPageReady]);
+
++ const handleCopyPrompt = () => {
++ if (manualPromptText) {
++ navigator.clipboard.writeText(manualPromptText);
++ toast.success("Prompt copied to clipboard");
++ }
++ };
++
++ const handleSubmitManualResponse = async () => {
++ setIsSubmitting(true);
++ try {
++ await fetch('/api/manual-cache', {
++ method: 'POST',
++ headers: { 'Content-Type': 'application/json' },
++ body: JSON.stringify({ hash: promptHash, response: manualResponse })
++ });
++
++ // Reload the page. The user will click "Generate" again,
++ // but the backend will instantly skip the step using the cache!
++ toast.success("Saved! Please restart the generation.");
++ window.location.reload();
++ } catch (_e) {
++ toast.error("Failed to save response.");
++ } finally {
++ setIsSubmitting(false);
++ }
++ };
++
+ return (
+
+
+
+
+- {error ? (
++ {isManualIntervention ? (
++ <> Action Required: Gemini Blocked Output>
++ ) : error ? (
+ <>
+
+ {t('generation.generationFailed')}
+@@ -98,40 +138,67 @@ export function GeneratingProgress({
+
+
+
+- {/* Two milestone status items */}
+-
+-
+-
+-
++ {isManualIntervention ? (
++
++
++ The API blocked this specific prompt. Copy the text, paste it into the Gemini Web App, and paste the JSON result here.
++
++
++
++
++
++
++
++ ) : (
++ <>
++ {/* Two milestone status items */}
++
++
++
++
+
+- {/* Status message */}
+- {statusMessage && !error && (
+-
+- )}
++ {/* Status message */}
++ {statusMessage && !error && (
++
++ )}
+
+- {/* Error message */}
+- {error && (
+-
++ {/* Error message */}
++ {error && (
++
++ )}
++ >
+ )}
+
+
+diff --git a/lib/ai/llm.ts b/lib/ai/llm.ts
+index 8ce5b87..f5d270c 100644
+--- a/lib/ai/llm.ts
++++ b/lib/ai/llm.ts
+@@ -8,6 +8,29 @@ import { generateText, streamText } from 'ai';
+ import type { GenerateTextResult, StreamTextResult } from 'ai';
+ import { createLogger } from '@/lib/logger';
+ import { PROVIDERS } from './providers';
++import crypto from 'crypto';
++import fs from 'fs';
++import path from 'path';
++
++// --- Add these helpers at the top ---
++const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
++
++function getPromptHash(params: Record
): string {
++ const data = JSON.stringify({ system: params.system, prompt: params.prompt, messages: params.messages });
++ return crypto.createHash('md5').update(data).digest('hex');
++}
++
++// Temporary cache dir for manual overrides, use /tmp for serverless
++const CACHE_DIR = path.join('/tmp', '.openmaic', 'manual_cache');
++if (!fs.existsSync(CACHE_DIR)) fs.mkdirSync(CACHE_DIR, { recursive: true });
++
++function getManualCache(hash: string): string | null {
++ if (!/^[a-fA-F0-9]{32}$/.test(hash)) return null;
++ const filePath = path.join(CACHE_DIR, `${hash}.json`);
++ if (fs.existsSync(filePath)) return fs.readFileSync(filePath, 'utf-8');
++ return null;
++}
++// -------------------------------------
+ import { thinkingContext } from './thinking-context';
+ import type { ProviderType, ThinkingCapability, ThinkingConfig } from '@/lib/types/provider';
+ const log = createLogger('LLM');
+@@ -292,6 +315,15 @@ export async function callLLM(
+ const maxAttempts = (retryOptions?.retries ?? 0) + 1;
+ const validate = retryOptions?.validate ?? (maxAttempts > 1 ? DEFAULT_VALIDATE : undefined);
+
++ // 0. CACHE INTERCEPTION: Check if the user manually provided an answer for this prompt
++ const promptHash = getPromptHash(params as Record);
++ const cachedResponse = getManualCache(promptHash);
++ if (cachedResponse) {
++ log.info(`[${source}] 🚀 Using manual cached response for hash: ${promptHash}`);
++ // eslint-disable-next-line @typescript-eslint/no-explicit-any
++ return { text: cachedResponse } as unknown as GenerateTextResult; // Mock the AI SDK response object
++ }
++
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ let lastResult: GenerateTextResult | undefined;
+ let lastError: unknown;
+@@ -319,8 +351,45 @@ export async function callLLM(
+ }
+
+ return result;
+- } catch (error) {
++ } catch (error: unknown) {
+ lastError = error;
++ const err = error as Record;
++
++ // 1. RATE LIMIT PAUSING
++ if (err?.statusCode === 429 || (typeof err?.message === 'string' && (err.message.includes('429') || err.message.includes('Too Many Requests')))) {
++ log.warn(`[${source}] Rate limit hit. Pausing 20s...`);
++ await sleep(20000);
++ continue;
++ }
++
++ // 2. MANUAL FALLBACK TRIGGER
++ const isUnsupported = typeof err?.message === 'string' && (err.message.includes('unsupported') || err.message.includes('schema'));
++ const isSafety = typeof err?.message === 'string' && (err.message.includes('safety') || err.message.includes('SAFETY'));
++
++ if (isUnsupported || isSafety) {
++ let promptText = "";
++ const p = params as Record;
++ if (p.system) promptText += `[SYSTEM]\n${p.system}\n\n`;
++ if (p.prompt) promptText += `[USER]\n${p.prompt}\n\n`;
++ if (p.messages && Array.isArray(p.messages)) {
++ promptText += p.messages.map((m: Record) => {
++ let contentStr = "";
++ if (typeof m.content === 'string') {
++ contentStr = m.content;
++ } else if (Array.isArray(m.content)) {
++ contentStr = m.content.map((part: Record) => {
++ if (part.type === 'text') return part.text;
++ if (part.type === 'image') return `\n[⚠️ ACTION REQUIRED: Drag and drop the original image/PDF into the Gemini chat here] \n`;
++ return JSON.stringify(part);
++ }).join('\n');
++ }
++ return `[${(m.role || 'USER').toString().toUpperCase()}]:\n${contentStr}`;
++ }).join('\n\n');
++ }
++
++ // Pass the Hash along with the error
++ throw new Error(`MANUAL_INTERVENTION_REQUIRED|||${promptHash}|||${promptText}`);
++ }
+
+ if (attempt < maxAttempts) {
+ log.warn(`[${source}] Call failed (attempt ${attempt}/${maxAttempts}), retrying...`, error);
+diff --git a/lib/pdf/constants.ts b/lib/pdf/constants.ts
+index 93a2ef3..fa18eb9 100644
+--- a/lib/pdf/constants.ts
++++ b/lib/pdf/constants.ts
+@@ -24,6 +24,12 @@ export const PDF_PROVIDERS: Record = {
+ icon: '/logos/mineru.png',
+ features: ['text', 'images', 'tables', 'formulas', 'layout-analysis'],
+ },
++ local_vision: {
++ id: 'local_vision',
++ name: 'Local Vision (Qwen2-VL/Llama-3.2-Vision)',
++ requiresApiKey: false,
++ features: ['text', 'images', 'ocr', 'layout-analysis'],
++ },
+ };
+
+ /**
+diff --git a/lib/pdf/pdf-providers.ts b/lib/pdf/pdf-providers.ts
+index edfaea0..baacfbe 100644
+--- a/lib/pdf/pdf-providers.ts
++++ b/lib/pdf/pdf-providers.ts
+@@ -176,6 +176,10 @@ export async function parsePDF(
+ result = await parseWithMinerU(config, pdfBuffer);
+ break;
+
++ case 'local_vision':
++ result = await parseWithLocalVision(config, pdfBuffer);
++ break;
++
+ default:
+ throw new Error(`Unsupported PDF provider: ${config.providerId}`);
+ }
+@@ -461,3 +465,69 @@ export async function getCurrentPDFConfig(): Promise {
+
+ // Re-export from constants for convenience
+ export { getAllPDFProviders, getPDFProvider } from './constants';
++
++/**
++ * Local Vision API implementation
++ *
++ * Uses a local OpenAI-compatible endpoint (like vLLM or Ollama running Qwen2-VL)
++ * to perform OCR and layout analysis on PDF pages.
++ */
++async function parseWithLocalVision(
++ config: PDFParserConfig,
++ pdfBuffer: Buffer
++): Promise {
++ const { getDocumentProxy, renderPageAsImage } = await import('unpdf');
++ const pdf = await getDocumentProxy(new Uint8Array(pdfBuffer));
++ const numPages = pdf.numPages;
++
++ let fullText = '';
++ const allImages: string[] = [];
++ const baseUrl = config.baseUrl || 'http://127.0.0.1:11434/v1';
++
++ for (let i = 1; i <= numPages; i++) {
++ // page is intentionally unused if only OCR is used
++ await pdf.getPage(i);
++ const imageArrayBuffer = await renderPageAsImage(new Uint8Array(pdfBuffer), i, { scale: 2 });
++ const base64Image = Buffer.from(imageArrayBuffer).toString('base64');
++ const imageUrl = `data:image/png;base64,${base64Image}`;
++
++ const payload = {
++ model: "qwen2-vl",
++ messages: [
++ {
++ role: "user",
++ content: [
++ { type: "text", text: "Transcribe the text in this document image accurately. Preserve the layout, headings, paragraphs, and list structures using Markdown. If there are tables or formulas, transcribe them into Markdown tables or LaTeX blocks respectively." },
++ { type: "image_url", image_url: { url: imageUrl } }
++ ]
++ }
++ ]
++ };
++
++ const response = await fetch(`${baseUrl}/chat/completions`, {
++ method: 'POST',
++ headers: { 'Content-Type': 'application/json' },
++ body: JSON.stringify(payload)
++ });
++
++ if (!response.ok) {
++ throw new Error(`Local Vision OCR error: ${response.statusText}`);
++ }
++
++ const data = await response.json();
++ const pageText = data.choices?.[0]?.message?.content || '';
++ fullText += `\n\n--- Page ${i} ---\n\n${pageText}`;
++
++ // Optionally extract native images from the page using unpdf alongside the OCR
++ // ...
++ }
++
++ return {
++ text: fullText.trim(),
++ images: allImages,
++ metadata: {
++ pageCount: numPages,
++ parser: 'local_vision'
++ }
++ };
++}
+diff --git a/lib/pdf/types.ts b/lib/pdf/types.ts
+index 8173dae..d4bf3bb 100644
+--- a/lib/pdf/types.ts
++++ b/lib/pdf/types.ts
+@@ -5,7 +5,7 @@
+ /**
+ * PDF Provider IDs
+ */
+-export type PDFProviderId = 'unpdf' | 'mineru';
++export type PDFProviderId = 'unpdf' | 'mineru' | 'local_vision';
+
+ /**
+ * PDF Provider Configuration
+diff --git a/lib/server/resolve-model.ts b/lib/server/resolve-model.ts
+index 790da76..3acfaea 100644
+--- a/lib/server/resolve-model.ts
++++ b/lib/server/resolve-model.ts
+@@ -17,6 +17,9 @@ export interface ResolvedModel extends ModelWithInfo {
+ apiKey: string;
+ }
+
++// Global state to track round-robin indexes per provider across warm serverless requests
++const roundRobinIndexMap = new Map();
++
+ /**
+ * Resolve a language model from explicit parameters.
+ *
+@@ -40,9 +43,21 @@ export function resolveModel(params: {
+ }
+ }
+
+- const apiKey = clientBaseUrl
++ let apiKey = clientBaseUrl
+ ? params.apiKey || ''
+ : resolveApiKey(providerId, params.apiKey || '');
++
++ // --- MULTI-KEY ROUND ROBIN INJECTION ---
++ if (apiKey.includes(',')) {
++ const keys = apiKey.split(',').map(k => k.trim()).filter(Boolean);
++ if (keys.length > 0) {
++ const currentIndex = roundRobinIndexMap.get(providerId) || 0;
++ apiKey = keys[currentIndex % keys.length];
++ roundRobinIndexMap.set(providerId, currentIndex + 1);
++ }
++ }
++ // ---------------------------------------
++
+ const baseUrl = clientBaseUrl ? clientBaseUrl : resolveBaseUrl(providerId, params.baseUrl);
+ const proxy = resolveProxy(providerId);
+ const { model, modelInfo } = getModel({
+diff --git a/lib/web-search/searxng.ts b/lib/web-search/searxng.ts
+new file mode 100644
+index 0000000..b297240
+--- /dev/null
++++ b/lib/web-search/searxng.ts
+@@ -0,0 +1,43 @@
++import type { WebSearchResult, WebSearchSource } from '@/lib/types/web-search';
++
++const SEARXNG_MAX_QUERY_LENGTH = 400;
++
++/**
++ * Search the web using SearXNG API and return structured results.
++ */
++export async function searchWithSearXNG(params: {
++ query: string;
++ baseUrl: string;
++ maxResults?: number;
++}): Promise {
++ const { query, baseUrl, maxResults = 5 } = params;
++ const truncatedQuery = query.slice(0, SEARXNG_MAX_QUERY_LENGTH);
++
++ const url = new URL(baseUrl);
++ url.searchParams.append('q', truncatedQuery);
++ url.searchParams.append('format', 'json');
++ url.searchParams.append('language', 'en');
++
++ const response = await fetch(url.toString());
++ if (!response.ok) throw new Error(`SearXNG error: ${response.status}`);
++
++ const data = await response.json();
++ const startTime = Date.now();
++
++ const sources: WebSearchSource[] = data.results.slice(0, maxResults).map((result: Record) => ({
++ title: String(result.title || ''),
++ url: String(result.url || ''),
++ content: String(result.content || ''),
++ score: typeof result.score === 'number' ? result.score : 1,
++ }));
++
++ const answer = data.answers && data.answers.length > 0 ? data.answers[0] : '';
++ const responseTime = (Date.now() - startTime) / 1000;
++
++ return {
++ answer,
++ sources,
++ query: data.query,
++ responseTime,
++ };
++}
diff --git a/app/api/manual-cache/route.ts b/app/api/manual-cache/route.ts
new file mode 100644
index 000000000..c9afeb89b
--- /dev/null
+++ b/app/api/manual-cache/route.ts
@@ -0,0 +1,34 @@
+import { NextResponse } from 'next/server';
+import fs from 'fs';
+import path from 'path';
+
+// Note: Storing in /tmp to work around serverless readonly filesystems
+// However, since serverless instances are ephemeral, ideally you'd use Redis or Vercel KV.
+// We are storing in /tmp and verifying hash structure to prevent Path Traversal.
+
+export async function POST(req: Request) {
+ try {
+ const { hash, response } = await req.json();
+
+ if (!hash || !response) {
+ return NextResponse.json({ error: 'Missing hash or response' }, { status: 400 });
+ }
+
+ // Validate hash to be strictly 32 alphanumeric hex chars to prevent path traversal
+ if (!/^[a-fA-F0-9]{32}$/.test(hash)) {
+ return NextResponse.json({ error: 'Invalid hash format' }, { status: 400 });
+ }
+
+ // Since serverless is readonly outside /tmp, use /tmp
+ const CACHE_DIR = path.join('/tmp', '.openmaic', 'manual_cache');
+ if (!fs.existsSync(CACHE_DIR)) {
+ fs.mkdirSync(CACHE_DIR, { recursive: true });
+ }
+
+ fs.writeFileSync(path.join(CACHE_DIR, `${hash}.json`), response, 'utf-8');
+
+ return NextResponse.json({ success: true });
+ } catch (error) {
+ return NextResponse.json({ error: error instanceof Error ? error.message : 'Unknown error' }, { status: 500 });
+ }
+}
diff --git a/app/api/web-search/route.ts b/app/api/web-search/route.ts
index f2ff627f4..3282bde41 100644
--- a/app/api/web-search/route.ts
+++ b/app/api/web-search/route.ts
@@ -6,6 +6,7 @@
*/
import { searchWithTavily, formatSearchResultsAsContext } from '@/lib/web-search/tavily';
+import { searchWithSearXNG } from '@/lib/web-search/searxng';
import { resolveWebSearchApiKey } from '@/lib/server/provider-config';
import { createLogger } from '@/lib/logger';
import { apiError, apiSuccess } from '@/lib/server/api-response';
@@ -15,25 +16,35 @@ const log = createLogger('WebSearch');
export async function POST(req: Request) {
try {
const body = await req.json();
- const { query, apiKey: clientApiKey } = body as {
+ const { query, apiKey: clientApiKey, providerId, baseUrl } = body as {
query?: string;
apiKey?: string;
+ providerId?: string;
+ baseUrl?: string;
};
if (!query || !query.trim()) {
return apiError('MISSING_REQUIRED_FIELD', 400, 'query is required');
}
- const apiKey = resolveWebSearchApiKey(clientApiKey);
- if (!apiKey) {
- return apiError(
- 'MISSING_API_KEY',
- 400,
- 'Tavily API key is not configured. Set it in Settings → Web Search or set TAVILY_API_KEY env var.',
- );
+ let result;
+ if (providerId === 'searxng') {
+ result = await searchWithSearXNG({
+ query: query.trim(),
+ baseUrl: baseUrl || process.env.SEARXNG_URL || 'http://127.0.0.1:8080/search'
+ });
+ } else {
+ const apiKey = resolveWebSearchApiKey(clientApiKey);
+ if (!apiKey) {
+ return apiError(
+ 'MISSING_API_KEY',
+ 400,
+ 'Tavily API key is not configured. Set it in Settings → Web Search or set TAVILY_API_KEY env var.',
+ );
+ }
+ result = await searchWithTavily({ query: query.trim(), apiKey });
}
- const result = await searchWithTavily({ query: query.trim(), apiKey });
const context = formatSearchResultsAsContext(result);
return apiSuccess({
diff --git a/components/generation/generating-progress.tsx b/components/generation/generating-progress.tsx
index 639e79d31..17ca7f256 100644
--- a/components/generation/generating-progress.tsx
+++ b/components/generation/generating-progress.tsx
@@ -2,8 +2,11 @@
import { useEffect, useState } from 'react';
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
-import { Loader2, CheckCircle2, XCircle, Circle } from 'lucide-react';
+import { Loader2, CheckCircle2, XCircle, Circle, Copy, Play } from 'lucide-react';
import { useI18n } from '@/lib/hooks/use-i18n';
+import { Button } from '@/components/ui/button';
+import { Textarea } from '@/components/ui/textarea';
+import { toast } from 'sonner';
interface GeneratingProgressProps {
outlineReady: boolean; // Is outline generation complete?
@@ -62,6 +65,14 @@ export function GeneratingProgress({
}: GeneratingProgressProps) {
const { t } = useI18n();
const [dots, setDots] = useState('');
+ const [manualResponse, setManualResponse] = useState('');
+ const [isSubmitting, setIsSubmitting] = useState(false);
+
+ // Extract hash and prompt
+ const isManualIntervention = error?.startsWith('MANUAL_INTERVENTION_REQUIRED|||');
+ const errorParts = isManualIntervention ? error?.split('|||') : [];
+ const promptHash = (errorParts && errorParts[1]) || '';
+ const manualPromptText = (errorParts && errorParts[2]) || '';
// Animated dots for loading state
useEffect(() => {
@@ -73,12 +84,41 @@ export function GeneratingProgress({
}
}, [error, firstPageReady]);
+ const handleCopyPrompt = () => {
+ if (manualPromptText) {
+ navigator.clipboard.writeText(manualPromptText);
+ toast.success("Prompt copied to clipboard");
+ }
+ };
+
+ const handleSubmitManualResponse = async () => {
+ setIsSubmitting(true);
+ try {
+ await fetch('/api/manual-cache', {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({ hash: promptHash, response: manualResponse })
+ });
+
+ // Reload the page. The user will click "Generate" again,
+ // but the backend will instantly skip the step using the cache!
+ toast.success("Saved! Please restart the generation.");
+ window.location.reload();
+ } catch (_e) {
+ toast.error("Failed to save response.");
+ } finally {
+ setIsSubmitting(false);
+ }
+ };
+
return (
- {error ? (
+ {isManualIntervention ? (
+ <> Action Required: Gemini Blocked Output>
+ ) : error ? (
<>
{t('generation.generationFailed')}
@@ -98,40 +138,67 @@ export function GeneratingProgress({
- {/* Two milestone status items */}
-
-
-
-
+ {isManualIntervention ? (
+
+
+ The API blocked this specific prompt. Copy the text, paste it into the Gemini Web App, and paste the JSON result here.
+
+
+
+
+
+
+
+ ) : (
+ <>
+ {/* Two milestone status items */}
+
+
+
+
- {/* Status message */}
- {statusMessage && !error && (
-
- )}
+ {/* Status message */}
+ {statusMessage && !error && (
+
+ )}
- {/* Error message */}
- {error && (
-
+ {/* Error message */}
+ {error && (
+
+ )}
+ >
)}
diff --git a/lib/ai/llm.ts b/lib/ai/llm.ts
index 8ce5b87f8..eb1e2bf23 100644
--- a/lib/ai/llm.ts
+++ b/lib/ai/llm.ts
@@ -8,6 +8,29 @@ import { generateText, streamText } from 'ai';
import type { GenerateTextResult, StreamTextResult } from 'ai';
import { createLogger } from '@/lib/logger';
import { PROVIDERS } from './providers';
+import crypto from 'crypto';
+import fs from 'fs';
+import path from 'path';
+
+// --- Add these helpers at the top ---
+const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
+
+function getPromptHash(params: Record
): string {
+ const data = JSON.stringify({ system: params.system, prompt: params.prompt, messages: params.messages });
+ return crypto.createHash('md5').update(data).digest('hex');
+}
+
+// Temporary cache dir for manual overrides, use /tmp for serverless
+const CACHE_DIR = path.join('/tmp', '.openmaic', 'manual_cache');
+if (!fs.existsSync(CACHE_DIR)) fs.mkdirSync(CACHE_DIR, { recursive: true });
+
+function getManualCache(hash: string): string | null {
+ if (!/^[a-fA-F0-9]{32}$/.test(hash)) return null;
+ const filePath = path.join(CACHE_DIR, `${hash}.json`);
+ if (fs.existsSync(filePath)) return fs.readFileSync(filePath, 'utf-8');
+ return null;
+}
+// -------------------------------------
import { thinkingContext } from './thinking-context';
import type { ProviderType, ThinkingCapability, ThinkingConfig } from '@/lib/types/provider';
const log = createLogger('LLM');
@@ -292,6 +315,15 @@ export async function callLLM(
const maxAttempts = (retryOptions?.retries ?? 0) + 1;
const validate = retryOptions?.validate ?? (maxAttempts > 1 ? DEFAULT_VALIDATE : undefined);
+ // 0. CACHE INTERCEPTION: Check if the user manually provided an answer for this prompt
+ const promptHash = getPromptHash(params as Record);
+ const cachedResponse = getManualCache(promptHash);
+ if (cachedResponse) {
+ log.info(`[${source}] 🚀 Using manual cached response for hash: ${promptHash}`);
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ return { text: cachedResponse } as unknown as GenerateTextResult; // Mock the AI SDK response object
+ }
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
let lastResult: GenerateTextResult | undefined;
let lastError: unknown;
@@ -319,8 +351,45 @@ export async function callLLM(
}
return result;
- } catch (error) {
+ } catch (error: unknown) {
lastError = error;
+ const err = error as Record;
+
+ // 1. RATE LIMIT PAUSING
+ if (err?.statusCode === 429 || (typeof err?.message === 'string' && (err.message.includes('429') || err.message.includes('Too Many Requests')))) {
+ log.warn(`[${source}] Rate limit hit. Pausing 20s...`);
+ await sleep(20000);
+ continue;
+ }
+
+ // 2. MANUAL FALLBACK TRIGGER
+ const isUnsupported = typeof err?.message === 'string' && (err.message.includes('unsupported') || err.message.includes('schema'));
+ const isSafety = typeof err?.message === 'string' && (err.message.includes('safety') || err.message.includes('SAFETY'));
+
+ if (isUnsupported || isSafety) {
+ let promptText = "";
+ const p = params as Record;
+ if (p.system) promptText += `[SYSTEM]\n${p.system}\n\n`;
+ if (p.prompt) promptText += `[USER]\n${p.prompt}\n\n`;
+ if (p.messages && Array.isArray(p.messages)) {
+ promptText += p.messages.map((m: Record) => {
+ let contentStr = "";
+ if (typeof m.content === 'string') {
+ contentStr = m.content;
+ } else if (Array.isArray(m.content)) {
+ contentStr = m.content.map((part: Record) => {
+ if (part.type === 'text') return part.text;
+ if (part.type === 'image') return `\n[⚠️ ACTION REQUIRED: Drag and drop the original image/PDF into the Gemini chat here] \n`;
+ return JSON.stringify(part);
+ }).join('\n');
+ }
+ return `[${(m.role || 'USER').toString().toUpperCase()}]:\n${contentStr}`;
+ }).join('\n\n');
+ }
+
+ // Pass the Hash along with the error
+ throw new Error(`MANUAL_INTERVENTION_REQUIRED|||${promptHash}|||${promptText}`);
+ }
if (attempt < maxAttempts) {
log.warn(`[${source}] Call failed (attempt ${attempt}/${maxAttempts}), retrying...`, error);
diff --git a/lib/pdf/constants.ts b/lib/pdf/constants.ts
index 93a2ef387..fa18eb9f8 100644
--- a/lib/pdf/constants.ts
+++ b/lib/pdf/constants.ts
@@ -24,6 +24,12 @@ export const PDF_PROVIDERS: Record = {
icon: '/logos/mineru.png',
features: ['text', 'images', 'tables', 'formulas', 'layout-analysis'],
},
+ local_vision: {
+ id: 'local_vision',
+ name: 'Local Vision (Qwen2-VL/Llama-3.2-Vision)',
+ requiresApiKey: false,
+ features: ['text', 'images', 'ocr', 'layout-analysis'],
+ },
};
/**
diff --git a/lib/pdf/pdf-providers.ts b/lib/pdf/pdf-providers.ts
index edfaea06e..0c1e627de 100644
--- a/lib/pdf/pdf-providers.ts
+++ b/lib/pdf/pdf-providers.ts
@@ -176,6 +176,10 @@ export async function parsePDF(
result = await parseWithMinerU(config, pdfBuffer);
break;
+ case 'local_vision':
+ result = await parseWithLocalVision(config, pdfBuffer);
+ break;
+
default:
throw new Error(`Unsupported PDF provider: ${config.providerId}`);
}
@@ -461,3 +465,69 @@ export async function getCurrentPDFConfig(): Promise {
// Re-export from constants for convenience
export { getAllPDFProviders, getPDFProvider } from './constants';
+
+/**
+ * Local Vision API implementation
+ *
+ * Uses a local OpenAI-compatible endpoint (like vLLM or Ollama running Qwen2-VL)
+ * to perform OCR and layout analysis on PDF pages.
+ */
+async function parseWithLocalVision(
+ config: PDFParserConfig,
+ pdfBuffer: Buffer
+): Promise {
+ const { getDocumentProxy, renderPageAsImage } = await import('unpdf');
+ const pdf = await getDocumentProxy(new Uint8Array(pdfBuffer));
+ const numPages = pdf.numPages;
+
+ let fullText = '';
+ const allImages: string[] = [];
+ const baseUrl = config.baseUrl || 'http://127.0.0.1:11434/v1';
+
+ for (let i = 1; i <= numPages; i++) {
+ // page is intentionally unused if only OCR is used
+ await pdf.getPage(i);
+ const imageArrayBuffer = await renderPageAsImage(new Uint8Array(pdfBuffer), i, { scale: 2 });
+ const base64Image = Buffer.from(imageArrayBuffer).toString('base64');
+ const imageUrl = `data:image/png;base64,${base64Image}`;
+
+ const payload = {
+ model: "qwen2-vl",
+ messages: [
+ {
+ role: "user",
+ content: [
+ { type: "text", text: "Transcribe the text in this document image accurately. Preserve the layout, headings, paragraphs, and list structures using Markdown. If there are tables or formulas, transcribe them into Markdown tables or LaTeX blocks respectively." },
+ { type: "image_url", image_url: { url: imageUrl } }
+ ]
+ }
+ ]
+ };
+
+ const response = await fetch(`${baseUrl}/chat/completions`, {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify(payload)
+ });
+
+ if (!response.ok) {
+ throw new Error(`Local Vision OCR error: ${response.statusText}`);
+ }
+
+ const data = await response.json();
+ const pageText = data.choices?.[0]?.message?.content || '';
+ fullText += `\n\n--- Page ${i} ---\n\n${pageText}`;
+
+ // Optionally extract native images from the page using unpdf alongside the OCR
+ // ...
+ }
+
+ return {
+ text: fullText.trim(),
+ images: allImages,
+ metadata: {
+ pageCount: numPages,
+ parser: 'local_vision'
+ }
+ };
+}
diff --git a/lib/pdf/types.ts b/lib/pdf/types.ts
index 8173daedc..d4bf3bbea 100644
--- a/lib/pdf/types.ts
+++ b/lib/pdf/types.ts
@@ -5,7 +5,7 @@
/**
* PDF Provider IDs
*/
-export type PDFProviderId = 'unpdf' | 'mineru';
+export type PDFProviderId = 'unpdf' | 'mineru' | 'local_vision';
/**
* PDF Provider Configuration
diff --git a/lib/server/resolve-model.ts b/lib/server/resolve-model.ts
index 790da760c..13e46b493 100644
--- a/lib/server/resolve-model.ts
+++ b/lib/server/resolve-model.ts
@@ -17,6 +17,9 @@ export interface ResolvedModel extends ModelWithInfo {
apiKey: string;
}
+// Global state to track round-robin indexes per provider across warm serverless requests
+const roundRobinIndexMap = new Map();
+
/**
* Resolve a language model from explicit parameters.
*
@@ -40,9 +43,21 @@ export function resolveModel(params: {
}
}
- const apiKey = clientBaseUrl
+ let apiKey = clientBaseUrl
? params.apiKey || ''
: resolveApiKey(providerId, params.apiKey || '');
+
+ // --- MULTI-KEY ROUND ROBIN INJECTION ---
+ if (apiKey.includes(',')) {
+ const keys = apiKey.split(',').map(k => k.trim()).filter(Boolean);
+ if (keys.length > 0) {
+ const currentIndex = roundRobinIndexMap.get(providerId) || 0;
+ apiKey = keys[currentIndex % keys.length];
+ roundRobinIndexMap.set(providerId, currentIndex + 1);
+ }
+ }
+ // ---------------------------------------
+
const baseUrl = clientBaseUrl ? clientBaseUrl : resolveBaseUrl(providerId, params.baseUrl);
const proxy = resolveProxy(providerId);
const { model, modelInfo } = getModel({
diff --git a/lib/web-search/searxng.ts b/lib/web-search/searxng.ts
new file mode 100644
index 000000000..055112e68
--- /dev/null
+++ b/lib/web-search/searxng.ts
@@ -0,0 +1,43 @@
+import type { WebSearchResult, WebSearchSource } from '@/lib/types/web-search';
+
+const SEARXNG_MAX_QUERY_LENGTH = 400;
+
+/**
+ * Search the web using SearXNG API and return structured results.
+ */
+export async function searchWithSearXNG(params: {
+ query: string;
+ baseUrl: string;
+ maxResults?: number;
+}): Promise {
+ const { query, baseUrl, maxResults = 5 } = params;
+ const truncatedQuery = query.slice(0, SEARXNG_MAX_QUERY_LENGTH);
+
+ const url = new URL(baseUrl);
+ url.searchParams.append('q', truncatedQuery);
+ url.searchParams.append('format', 'json');
+ url.searchParams.append('language', 'en');
+
+ const response = await fetch(url.toString());
+ if (!response.ok) throw new Error(`SearXNG error: ${response.status}`);
+
+ const data = await response.json();
+ const startTime = Date.now();
+
+ const sources: WebSearchSource[] = data.results.slice(0, maxResults).map((result: Record) => ({
+ title: String(result.title || ''),
+ url: String(result.url || ''),
+ content: String(result.content || ''),
+ score: typeof result.score === 'number' ? result.score : 1,
+ }));
+
+ const answer = data.answers && data.answers.length > 0 ? data.answers[0] : '';
+ const responseTime = (Date.now() - startTime) / 1000;
+
+ return {
+ answer,
+ sources,
+ query: data.query,
+ responseTime,
+ };
+}
From da6e6aaf968caddb51209335f215dd9fc9ace97a Mon Sep 17 00:00:00 2001
From: Shervin Naseri
Date: Tue, 14 Apr 2026 05:01:31 -0400
Subject: [PATCH 2/2] Delete all_changes_patch.diff
---
all_changes_patch.diff | 570 -----------------------------------------
1 file changed, 570 deletions(-)
delete mode 100644 all_changes_patch.diff
diff --git a/all_changes_patch.diff b/all_changes_patch.diff
deleted file mode 100644
index a4a6bbf3c..000000000
--- a/all_changes_patch.diff
+++ /dev/null
@@ -1,570 +0,0 @@
-diff --git a/app/api/manual-cache/route.ts b/app/api/manual-cache/route.ts
-new file mode 100644
-index 0000000..c9afeb8
---- /dev/null
-+++ b/app/api/manual-cache/route.ts
-@@ -0,0 +1,34 @@
-+import { NextResponse } from 'next/server';
-+import fs from 'fs';
-+import path from 'path';
-+
-+// Note: Storing in /tmp to work around serverless readonly filesystems
-+// However, since serverless instances are ephemeral, ideally you'd use Redis or Vercel KV.
-+// We are storing in /tmp and verifying hash structure to prevent Path Traversal.
-+
-+export async function POST(req: Request) {
-+ try {
-+ const { hash, response } = await req.json();
-+
-+ if (!hash || !response) {
-+ return NextResponse.json({ error: 'Missing hash or response' }, { status: 400 });
-+ }
-+
-+ // Validate hash to be strictly 32 alphanumeric hex chars to prevent path traversal
-+ if (!/^[a-fA-F0-9]{32}$/.test(hash)) {
-+ return NextResponse.json({ error: 'Invalid hash format' }, { status: 400 });
-+ }
-+
-+ // Since serverless is readonly outside /tmp, use /tmp
-+ const CACHE_DIR = path.join('/tmp', '.openmaic', 'manual_cache');
-+ if (!fs.existsSync(CACHE_DIR)) {
-+ fs.mkdirSync(CACHE_DIR, { recursive: true });
-+ }
-+
-+ fs.writeFileSync(path.join(CACHE_DIR, `${hash}.json`), response, 'utf-8');
-+
-+ return NextResponse.json({ success: true });
-+ } catch (error) {
-+ return NextResponse.json({ error: error instanceof Error ? error.message : 'Unknown error' }, { status: 500 });
-+ }
-+}
-diff --git a/app/api/web-search/route.ts b/app/api/web-search/route.ts
-index f2ff627..b16d489 100644
---- a/app/api/web-search/route.ts
-+++ b/app/api/web-search/route.ts
-@@ -6,6 +6,7 @@
- */
-
- import { searchWithTavily, formatSearchResultsAsContext } from '@/lib/web-search/tavily';
-+import { searchWithSearXNG } from '@/lib/web-search/searxng';
- import { resolveWebSearchApiKey } from '@/lib/server/provider-config';
- import { createLogger } from '@/lib/logger';
- import { apiError, apiSuccess } from '@/lib/server/api-response';
-@@ -15,25 +16,35 @@ const log = createLogger('WebSearch');
- export async function POST(req: Request) {
- try {
- const body = await req.json();
-- const { query, apiKey: clientApiKey } = body as {
-+ const { query, apiKey: clientApiKey, providerId, baseUrl } = body as {
- query?: string;
- apiKey?: string;
-+ providerId?: string;
-+ baseUrl?: string;
- };
-
- if (!query || !query.trim()) {
- return apiError('MISSING_REQUIRED_FIELD', 400, 'query is required');
- }
-
-- const apiKey = resolveWebSearchApiKey(clientApiKey);
-- if (!apiKey) {
-- return apiError(
-- 'MISSING_API_KEY',
-- 400,
-- 'Tavily API key is not configured. Set it in Settings → Web Search or set TAVILY_API_KEY env var.',
-- );
-+ let result;
-+ if (providerId === 'searxng') {
-+ result = await searchWithSearXNG({
-+ query: query.trim(),
-+ baseUrl: baseUrl || process.env.SEARXNG_URL || 'http://127.0.0.1:8080/search'
-+ });
-+ } else {
-+ const apiKey = resolveWebSearchApiKey(clientApiKey);
-+ if (!apiKey) {
-+ return apiError(
-+ 'MISSING_API_KEY',
-+ 400,
-+ 'Tavily API key is not configured. Set it in Settings → Web Search or set TAVILY_API_KEY env var.',
-+ );
-+ }
-+ result = await searchWithTavily({ query: query.trim(), apiKey });
- }
-
-- const result = await searchWithTavily({ query: query.trim(), apiKey });
- const context = formatSearchResultsAsContext(result);
-
- return apiSuccess({
-diff --git a/components/generation/generating-progress.tsx b/components/generation/generating-progress.tsx
-index 639e79d..f76df6a 100644
---- a/components/generation/generating-progress.tsx
-+++ b/components/generation/generating-progress.tsx
-@@ -2,8 +2,11 @@
-
- import { useEffect, useState } from 'react';
- import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
--import { Loader2, CheckCircle2, XCircle, Circle } from 'lucide-react';
-+import { Loader2, CheckCircle2, XCircle, Circle, Copy, Play } from 'lucide-react';
- import { useI18n } from '@/lib/hooks/use-i18n';
-+import { Button } from '@/components/ui/button';
-+import { Textarea } from '@/components/ui/textarea';
-+import { toast } from 'sonner';
-
- interface GeneratingProgressProps {
- outlineReady: boolean; // Is outline generation complete?
-@@ -62,6 +65,14 @@ export function GeneratingProgress({
- }: GeneratingProgressProps) {
- const { t } = useI18n();
- const [dots, setDots] = useState('');
-+ const [manualResponse, setManualResponse] = useState('');
-+ const [isSubmitting, setIsSubmitting] = useState(false);
-+
-+ // Extract hash and prompt
-+ const isManualIntervention = error?.startsWith('MANUAL_INTERVENTION_REQUIRED|||');
-+ const errorParts = isManualIntervention ? error?.split('|||') : [];
-+ const promptHash = (errorParts && errorParts[1]) || '';
-+ const manualPromptText = (errorParts && errorParts[2]) || '';
-
- // Animated dots for loading state
- useEffect(() => {
-@@ -73,12 +84,41 @@ export function GeneratingProgress({
- }
- }, [error, firstPageReady]);
-
-+ const handleCopyPrompt = () => {
-+ if (manualPromptText) {
-+ navigator.clipboard.writeText(manualPromptText);
-+ toast.success("Prompt copied to clipboard");
-+ }
-+ };
-+
-+ const handleSubmitManualResponse = async () => {
-+ setIsSubmitting(true);
-+ try {
-+ await fetch('/api/manual-cache', {
-+ method: 'POST',
-+ headers: { 'Content-Type': 'application/json' },
-+ body: JSON.stringify({ hash: promptHash, response: manualResponse })
-+ });
-+
-+ // Reload the page. The user will click "Generate" again,
-+ // but the backend will instantly skip the step using the cache!
-+ toast.success("Saved! Please restart the generation.");
-+ window.location.reload();
-+ } catch (_e) {
-+ toast.error("Failed to save response.");
-+ } finally {
-+ setIsSubmitting(false);
-+ }
-+ };
-+
- return (
-
-
-
-
-- {error ? (
-+ {isManualIntervention ? (
-+ <> Action Required: Gemini Blocked Output>
-+ ) : error ? (
- <>
-
- {t('generation.generationFailed')}
-@@ -98,40 +138,67 @@ export function GeneratingProgress({
-
-
-
-- {/* Two milestone status items */}
--
--
--
--
-+ {isManualIntervention ? (
-+
-+
-+ The API blocked this specific prompt. Copy the text, paste it into the Gemini Web App, and paste the JSON result here.
-+
-+
-+
-+
-+
-+
-+
-+ ) : (
-+ <>
-+ {/* Two milestone status items */}
-+
-+
-+
-+
-
-- {/* Status message */}
-- {statusMessage && !error && (
--
-- )}
-+ {/* Status message */}
-+ {statusMessage && !error && (
-+
-+ )}
-
-- {/* Error message */}
-- {error && (
--
-+ {/* Error message */}
-+ {error && (
-+
-+ )}
-+ >
- )}
-
-
-diff --git a/lib/ai/llm.ts b/lib/ai/llm.ts
-index 8ce5b87..f5d270c 100644
---- a/lib/ai/llm.ts
-+++ b/lib/ai/llm.ts
-@@ -8,6 +8,29 @@ import { generateText, streamText } from 'ai';
- import type { GenerateTextResult, StreamTextResult } from 'ai';
- import { createLogger } from '@/lib/logger';
- import { PROVIDERS } from './providers';
-+import crypto from 'crypto';
-+import fs from 'fs';
-+import path from 'path';
-+
-+// --- Add these helpers at the top ---
-+const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
-+
-+function getPromptHash(params: Record
): string {
-+ const data = JSON.stringify({ system: params.system, prompt: params.prompt, messages: params.messages });
-+ return crypto.createHash('md5').update(data).digest('hex');
-+}
-+
-+// Temporary cache dir for manual overrides, use /tmp for serverless
-+const CACHE_DIR = path.join('/tmp', '.openmaic', 'manual_cache');
-+if (!fs.existsSync(CACHE_DIR)) fs.mkdirSync(CACHE_DIR, { recursive: true });
-+
-+function getManualCache(hash: string): string | null {
-+ if (!/^[a-fA-F0-9]{32}$/.test(hash)) return null;
-+ const filePath = path.join(CACHE_DIR, `${hash}.json`);
-+ if (fs.existsSync(filePath)) return fs.readFileSync(filePath, 'utf-8');
-+ return null;
-+}
-+// -------------------------------------
- import { thinkingContext } from './thinking-context';
- import type { ProviderType, ThinkingCapability, ThinkingConfig } from '@/lib/types/provider';
- const log = createLogger('LLM');
-@@ -292,6 +315,15 @@ export async function callLLM(
- const maxAttempts = (retryOptions?.retries ?? 0) + 1;
- const validate = retryOptions?.validate ?? (maxAttempts > 1 ? DEFAULT_VALIDATE : undefined);
-
-+ // 0. CACHE INTERCEPTION: Check if the user manually provided an answer for this prompt
-+ const promptHash = getPromptHash(params as Record);
-+ const cachedResponse = getManualCache(promptHash);
-+ if (cachedResponse) {
-+ log.info(`[${source}] 🚀 Using manual cached response for hash: ${promptHash}`);
-+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
-+ return { text: cachedResponse } as unknown as GenerateTextResult; // Mock the AI SDK response object
-+ }
-+
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
- let lastResult: GenerateTextResult | undefined;
- let lastError: unknown;
-@@ -319,8 +351,45 @@ export async function callLLM(
- }
-
- return result;
-- } catch (error) {
-+ } catch (error: unknown) {
- lastError = error;
-+ const err = error as Record;
-+
-+ // 1. RATE LIMIT PAUSING
-+ if (err?.statusCode === 429 || (typeof err?.message === 'string' && (err.message.includes('429') || err.message.includes('Too Many Requests')))) {
-+ log.warn(`[${source}] Rate limit hit. Pausing 20s...`);
-+ await sleep(20000);
-+ continue;
-+ }
-+
-+ // 2. MANUAL FALLBACK TRIGGER
-+ const isUnsupported = typeof err?.message === 'string' && (err.message.includes('unsupported') || err.message.includes('schema'));
-+ const isSafety = typeof err?.message === 'string' && (err.message.includes('safety') || err.message.includes('SAFETY'));
-+
-+ if (isUnsupported || isSafety) {
-+ let promptText = "";
-+ const p = params as Record;
-+ if (p.system) promptText += `[SYSTEM]\n${p.system}\n\n`;
-+ if (p.prompt) promptText += `[USER]\n${p.prompt}\n\n`;
-+ if (p.messages && Array.isArray(p.messages)) {
-+ promptText += p.messages.map((m: Record) => {
-+ let contentStr = "";
-+ if (typeof m.content === 'string') {
-+ contentStr = m.content;
-+ } else if (Array.isArray(m.content)) {
-+ contentStr = m.content.map((part: Record) => {
-+ if (part.type === 'text') return part.text;
-+ if (part.type === 'image') return `\n[⚠️ ACTION REQUIRED: Drag and drop the original image/PDF into the Gemini chat here] \n`;
-+ return JSON.stringify(part);
-+ }).join('\n');
-+ }
-+ return `[${(m.role || 'USER').toString().toUpperCase()}]:\n${contentStr}`;
-+ }).join('\n\n');
-+ }
-+
-+ // Pass the Hash along with the error
-+ throw new Error(`MANUAL_INTERVENTION_REQUIRED|||${promptHash}|||${promptText}`);
-+ }
-
- if (attempt < maxAttempts) {
- log.warn(`[${source}] Call failed (attempt ${attempt}/${maxAttempts}), retrying...`, error);
-diff --git a/lib/pdf/constants.ts b/lib/pdf/constants.ts
-index 93a2ef3..fa18eb9 100644
---- a/lib/pdf/constants.ts
-+++ b/lib/pdf/constants.ts
-@@ -24,6 +24,12 @@ export const PDF_PROVIDERS: Record = {
- icon: '/logos/mineru.png',
- features: ['text', 'images', 'tables', 'formulas', 'layout-analysis'],
- },
-+ local_vision: {
-+ id: 'local_vision',
-+ name: 'Local Vision (Qwen2-VL/Llama-3.2-Vision)',
-+ requiresApiKey: false,
-+ features: ['text', 'images', 'ocr', 'layout-analysis'],
-+ },
- };
-
- /**
-diff --git a/lib/pdf/pdf-providers.ts b/lib/pdf/pdf-providers.ts
-index edfaea0..baacfbe 100644
---- a/lib/pdf/pdf-providers.ts
-+++ b/lib/pdf/pdf-providers.ts
-@@ -176,6 +176,10 @@ export async function parsePDF(
- result = await parseWithMinerU(config, pdfBuffer);
- break;
-
-+ case 'local_vision':
-+ result = await parseWithLocalVision(config, pdfBuffer);
-+ break;
-+
- default:
- throw new Error(`Unsupported PDF provider: ${config.providerId}`);
- }
-@@ -461,3 +465,69 @@ export async function getCurrentPDFConfig(): Promise {
-
- // Re-export from constants for convenience
- export { getAllPDFProviders, getPDFProvider } from './constants';
-+
-+/**
-+ * Local Vision API implementation
-+ *
-+ * Uses a local OpenAI-compatible endpoint (like vLLM or Ollama running Qwen2-VL)
-+ * to perform OCR and layout analysis on PDF pages.
-+ */
-+async function parseWithLocalVision(
-+ config: PDFParserConfig,
-+ pdfBuffer: Buffer
-+): Promise {
-+ const { getDocumentProxy, renderPageAsImage } = await import('unpdf');
-+ const pdf = await getDocumentProxy(new Uint8Array(pdfBuffer));
-+ const numPages = pdf.numPages;
-+
-+ let fullText = '';
-+ const allImages: string[] = [];
-+ const baseUrl = config.baseUrl || 'http://127.0.0.1:11434/v1';
-+
-+ for (let i = 1; i <= numPages; i++) {
-+ // page is intentionally unused if only OCR is used
-+ await pdf.getPage(i);
-+ const imageArrayBuffer = await renderPageAsImage(new Uint8Array(pdfBuffer), i, { scale: 2 });
-+ const base64Image = Buffer.from(imageArrayBuffer).toString('base64');
-+ const imageUrl = `data:image/png;base64,${base64Image}`;
-+
-+ const payload = {
-+ model: "qwen2-vl",
-+ messages: [
-+ {
-+ role: "user",
-+ content: [
-+ { type: "text", text: "Transcribe the text in this document image accurately. Preserve the layout, headings, paragraphs, and list structures using Markdown. If there are tables or formulas, transcribe them into Markdown tables or LaTeX blocks respectively." },
-+ { type: "image_url", image_url: { url: imageUrl } }
-+ ]
-+ }
-+ ]
-+ };
-+
-+ const response = await fetch(`${baseUrl}/chat/completions`, {
-+ method: 'POST',
-+ headers: { 'Content-Type': 'application/json' },
-+ body: JSON.stringify(payload)
-+ });
-+
-+ if (!response.ok) {
-+ throw new Error(`Local Vision OCR error: ${response.statusText}`);
-+ }
-+
-+ const data = await response.json();
-+ const pageText = data.choices?.[0]?.message?.content || '';
-+ fullText += `\n\n--- Page ${i} ---\n\n${pageText}`;
-+
-+ // Optionally extract native images from the page using unpdf alongside the OCR
-+ // ...
-+ }
-+
-+ return {
-+ text: fullText.trim(),
-+ images: allImages,
-+ metadata: {
-+ pageCount: numPages,
-+ parser: 'local_vision'
-+ }
-+ };
-+}
-diff --git a/lib/pdf/types.ts b/lib/pdf/types.ts
-index 8173dae..d4bf3bb 100644
---- a/lib/pdf/types.ts
-+++ b/lib/pdf/types.ts
-@@ -5,7 +5,7 @@
- /**
- * PDF Provider IDs
- */
--export type PDFProviderId = 'unpdf' | 'mineru';
-+export type PDFProviderId = 'unpdf' | 'mineru' | 'local_vision';
-
- /**
- * PDF Provider Configuration
-diff --git a/lib/server/resolve-model.ts b/lib/server/resolve-model.ts
-index 790da76..3acfaea 100644
---- a/lib/server/resolve-model.ts
-+++ b/lib/server/resolve-model.ts
-@@ -17,6 +17,9 @@ export interface ResolvedModel extends ModelWithInfo {
- apiKey: string;
- }
-
-+// Global state to track round-robin indexes per provider across warm serverless requests
-+const roundRobinIndexMap = new Map();
-+
- /**
- * Resolve a language model from explicit parameters.
- *
-@@ -40,9 +43,21 @@ export function resolveModel(params: {
- }
- }
-
-- const apiKey = clientBaseUrl
-+ let apiKey = clientBaseUrl
- ? params.apiKey || ''
- : resolveApiKey(providerId, params.apiKey || '');
-+
-+ // --- MULTI-KEY ROUND ROBIN INJECTION ---
-+ if (apiKey.includes(',')) {
-+ const keys = apiKey.split(',').map(k => k.trim()).filter(Boolean);
-+ if (keys.length > 0) {
-+ const currentIndex = roundRobinIndexMap.get(providerId) || 0;
-+ apiKey = keys[currentIndex % keys.length];
-+ roundRobinIndexMap.set(providerId, currentIndex + 1);
-+ }
-+ }
-+ // ---------------------------------------
-+
- const baseUrl = clientBaseUrl ? clientBaseUrl : resolveBaseUrl(providerId, params.baseUrl);
- const proxy = resolveProxy(providerId);
- const { model, modelInfo } = getModel({
-diff --git a/lib/web-search/searxng.ts b/lib/web-search/searxng.ts
-new file mode 100644
-index 0000000..b297240
---- /dev/null
-+++ b/lib/web-search/searxng.ts
-@@ -0,0 +1,43 @@
-+import type { WebSearchResult, WebSearchSource } from '@/lib/types/web-search';
-+
-+const SEARXNG_MAX_QUERY_LENGTH = 400;
-+
-+/**
-+ * Search the web using SearXNG API and return structured results.
-+ */
-+export async function searchWithSearXNG(params: {
-+ query: string;
-+ baseUrl: string;
-+ maxResults?: number;
-+}): Promise {
-+ const { query, baseUrl, maxResults = 5 } = params;
-+ const truncatedQuery = query.slice(0, SEARXNG_MAX_QUERY_LENGTH);
-+
-+ const url = new URL(baseUrl);
-+ url.searchParams.append('q', truncatedQuery);
-+ url.searchParams.append('format', 'json');
-+ url.searchParams.append('language', 'en');
-+
-+ const response = await fetch(url.toString());
-+ if (!response.ok) throw new Error(`SearXNG error: ${response.status}`);
-+
-+ const data = await response.json();
-+ const startTime = Date.now();
-+
-+ const sources: WebSearchSource[] = data.results.slice(0, maxResults).map((result: Record) => ({
-+ title: String(result.title || ''),
-+ url: String(result.url || ''),
-+ content: String(result.content || ''),
-+ score: typeof result.score === 'number' ? result.score : 1,
-+ }));
-+
-+ const answer = data.answers && data.answers.length > 0 ? data.answers[0] : '';
-+ const responseTime = (Date.now() - startTime) / 1000;
-+
-+ return {
-+ answer,
-+ sources,
-+ query: data.query,
-+ responseTime,
-+ };
-+}