From 7c870fb1489a12661615bf7f1cecacdeb02588a5 Mon Sep 17 00:00:00 2001
From: zebrapurring <>
Date: Sun, 22 Mar 2026 15:42:31 +0100
Subject: [PATCH] feat: add support for passing arbitrary headers to Ollama via
 `OLLAMA_HEADERS`

---
 README.md           |  3 +++
 main.go             | 10 +++++++++-
 ocr/llm_provider.go | 42 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 06c27f8a..63d0aa46 100644
--- a/README.md
+++ b/README.md
@@ -179,6 +179,7 @@ services:
       # LLM_MODEL: "qwen3:8b"
       # OLLAMA_HOST: "http://host.docker.internal:11434"
       # OLLAMA_CONTEXT_LENGTH: "8192" # Sets Ollama NumCtx (context window)
+      # OLLAMA_HEADERS: "Authorization=Bearer mytoken" # Optional headers for reverse-proxy auth
       # TOKEN_LIMIT: 1000 # Recommended for smaller models
 
       # Option 5: Anthropic/Claude
@@ -569,6 +570,7 @@ For best results with the enhanced OCR features:
 | `VISION_LLM_TEMPERATURE`            | Sampling temperature for Vision OCR generation. Lower is more deterministic. Important: For OpenAI GPT-5 it must be explicitly set to `1.0`.                                                  | No       |                            |
 | `OLLAMA_CONTEXT_LENGTH`             | (Ollama only) Integer. Sets NumCtx (context window) for the Ollama runner. If unset or 0, the model default is used.                                                                          | No       |                            |
 | `OLLAMA_OCR_TOP_K`                  | (Ollama only) Top-k token sampling for Vision OCR. Lower favors more likely tokens; higher increases diversity.                                                                               | No       |                            |
+| `OLLAMA_HEADERS`                    | (Ollama only) Comma-separated `Key=Value` pairs added as HTTP headers to every Ollama request. Useful for authorization when Ollama is behind a reverse proxy (e.g. `Authorization=Bearer mytoken`). | No       |                            |
 | `AZURE_DOCAI_ENDPOINT`              | Azure Document Intelligence endpoint. Required if OCR_PROVIDER is `azure`.                                                                                                                    | Cond.    |                            |
 | `AZURE_DOCAI_KEY`                   | Azure Document Intelligence API key. Required if OCR_PROVIDER is `azure`.                                                                                                                     | Cond.    |                            |
 | `AZURE_DOCAI_MODEL_ID`              | Azure Document Intelligence model ID. Optional if using `azure` provider.                                                                                                                     | No       | prebuilt-read              |
@@ -916,6 +918,7 @@ When using local LLMs (like those through Ollama), you might need to adjust cert
 
 - Use `TOKEN_LIMIT` environment variable to control the maximum number of tokens sent to the LLM
 - For Ollama, set `OLLAMA_CONTEXT_LENGTH` to control the model's context window (NumCtx). This is independent of `TOKEN_LIMIT` and configures the server-side KV cache size. If unset or 0, the model default is used. Choose a value within the model's supported window (e.g., 8192).
+- If Ollama is behind a reverse proxy that requires authentication, set `OLLAMA_HEADERS` to a comma-separated list of `Key=Value` header pairs (e.g. `Authorization=Bearer mytoken`).
 - Smaller models might truncate content unexpectedly if given too much text
 - Start with a conservative limit (e.g., 1000 tokens) and adjust based on your model's capabilities
 - Set to `0` to disable the limit (use with caution)
diff --git a/main.go b/main.go
index 69e03cf8..59ba5ebf 100644
--- a/main.go
+++ b/main.go
@@ -991,6 +991,9 @@ func createLLM() (llms.Model, error) {
 				log.Warnf("Invalid OLLAMA_CONTEXT_LENGTH value: %v, ignoring", err)
 			}
 		}
+		if client := ocr.OllamaHTTPClient(); client != nil {
+			opts = append(opts, ollama.WithHTTPClient(client))
+		}
 		llm, err := ollama.New(opts...)
 		if err != nil {
 			return nil, err
@@ -1099,6 +1102,9 @@ func createVisionLLM() (llms.Model, error) {
 				log.Warnf("Invalid OLLAMA_CONTEXT_LENGTH value: %v, ignoring", err)
 			}
 		}
+		if client := ocr.OllamaHTTPClient(); client != nil {
+			opts = append(opts, ollama.WithHTTPClient(client))
+		}
 		llm, err := ollama.New(opts...)
 		if err != nil {
 			return nil, err
@@ -1135,6 +1141,7 @@ func createVisionLLM() (llms.Model, error) {
 	}
 }
 
+
 func createCustomHTTPClient() *http.Client {
 	// Create custom transport that adds headers
 	customTransport := &headerTransport{
@@ -1159,8 +1166,9 @@ type headerTransport struct {
 
 // RoundTrip implements the http.RoundTripper interface
 func (t *headerTransport) RoundTrip(req *http.Request) (*http.Response, error) {
+	req = req.Clone(req.Context())
 	for key, value := range t.headers {
-		req.Header.Add(key, value)
+		req.Header.Set(key, value)
 	}
 	return t.transport.RoundTrip(req)
 }
diff --git a/ocr/llm_provider.go b/ocr/llm_provider.go
index 16a0d334..f5e396db 100644
--- a/ocr/llm_provider.go
+++ b/ocr/llm_provider.go
@@ -6,6 +6,7 @@ import (
 	"encoding/base64"
 	"fmt"
 	"image"
+	"net/http"
 	"os"
 	"strings"
 
@@ -208,6 +209,44 @@ func createOpenAIClient(config Config) (llms.Model, error) {
 	)
 }
 
+// OllamaHTTPClient returns an *http.Client with headers from OLLAMA_HEADERS injected,
+// or nil if OLLAMA_HEADERS is not set.
+func OllamaHTTPClient() *http.Client {
+	raw := os.Getenv("OLLAMA_HEADERS")
+	if raw == "" {
+		return nil
+	}
+	headers := map[string]string{}
+	for _, pair := range strings.Split(raw, ",") {
+		parts := strings.SplitN(strings.TrimSpace(pair), "=", 2)
+		if len(parts) == 2 && parts[0] != "" {
+			headers[parts[0]] = parts[1]
+		}
+	}
+	if len(headers) == 0 {
+		return nil
+	}
+	return &http.Client{
+		Transport: &ollamaHeaderTransport{
+			base:    http.DefaultTransport,
+			headers: headers,
+		},
+	}
+}
+
+type ollamaHeaderTransport struct {
+	base    http.RoundTripper
+	headers map[string]string
+}
+
+func (t *ollamaHeaderTransport) RoundTrip(req *http.Request) (*http.Response, error) {
+	req = req.Clone(req.Context())
+	for k, v := range t.headers {
+		req.Header.Set(k, v)
+	}
+	return t.base.RoundTrip(req)
+}
+
 // createOllamaClient creates a new Ollama vision model client
 func createOllamaClient(config Config) (llms.Model, error) {
 	host := os.Getenv("OLLAMA_HOST")
@@ -221,6 +260,9 @@ func createOllamaClient(config Config) (llms.Model, error) {
 	if config.OllamaContextLength > 0 {
 		opts = append(opts, ollama.WithRunnerNumCtx(config.OllamaContextLength))
 	}
+	if client := OllamaHTTPClient(); client != nil {
+		opts = append(opts, ollama.WithHTTPClient(client))
+	}
 	return ollama.New(opts...)
 }