From 7c870fb1489a12661615bf7f1cecacdeb02588a5 Mon Sep 17 00:00:00 2001 From: zebrapurring <> Date: Sun, 22 Mar 2026 15:42:31 +0100 Subject: [PATCH] feat: add support for passing arbitrary headers to Ollama via `OLLAMA_HEADERS` --- README.md | 3 +++ main.go | 10 +++++++++- ocr/llm_provider.go | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 06c27f8a..63d0aa46 100644 --- a/README.md +++ b/README.md @@ -179,6 +179,7 @@ services: # LLM_MODEL: "qwen3:8b" # OLLAMA_HOST: "http://host.docker.internal:11434" # OLLAMA_CONTEXT_LENGTH: "8192" # Sets Ollama NumCtx (context window) + # OLLAMA_HEADERS: "Authorization=Bearer mytoken" # Optional headers for reverse-proxy auth # TOKEN_LIMIT: 1000 # Recommended for smaller models # Option 5: Anthropic/Claude @@ -569,6 +570,7 @@ For best results with the enhanced OCR features: | `VISION_LLM_TEMPERATURE` | Sampling temperature for Vision OCR generation. Lower is more deterministic. Important: For OpenAI GPT-5 it must be explicitly set to `1.0`. | No | | | `OLLAMA_CONTEXT_LENGTH` | (Ollama only) Integer. Sets NumCtx (context window) for the Ollama runner. If unset or 0, the model default is used. | No | | | `OLLAMA_OCR_TOP_K` | (Ollama only) Top-k token sampling for Vision OCR. Lower favors more likely tokens; higher increases diversity. | No | | +| `OLLAMA_HEADERS` | (Ollama only) Comma-separated `Key=Value` pairs added as HTTP headers to every Ollama request. Useful for authorization when Ollama is behind a reverse proxy (e.g. `Authorization=Bearer mytoken`). | No | | | `AZURE_DOCAI_ENDPOINT` | Azure Document Intelligence endpoint. Required if OCR_PROVIDER is `azure`. | Cond. | | | `AZURE_DOCAI_KEY` | Azure Document Intelligence API key. Required if OCR_PROVIDER is `azure`. | Cond. | | | `AZURE_DOCAI_MODEL_ID` | Azure Document Intelligence model ID. Optional if using `azure` provider. | No | prebuilt-read | @@ -916,6 +918,7 @@ When using local LLMs (like those through Ollama), you might need to adjust cert - Use `TOKEN_LIMIT` environment variable to control the maximum number of tokens sent to the LLM - For Ollama, set `OLLAMA_CONTEXT_LENGTH` to control the model's context window (NumCtx). This is independent of `TOKEN_LIMIT` and configures the server-side KV cache size. If unset or 0, the model default is used. Choose a value within the model's supported window (e.g., 8192). +- If Ollama is behind a reverse proxy that requires authentication, set `OLLAMA_HEADERS` to a comma-separated list of `Key=Value` header pairs (e.g. `Authorization=Bearer mytoken`). - Smaller models might truncate content unexpectedly if given too much text - Start with a conservative limit (e.g., 1000 tokens) and adjust based on your model's capabilities - Set to `0` to disable the limit (use with caution) diff --git a/main.go b/main.go index 69e03cf8..59ba5ebf 100644 --- a/main.go +++ b/main.go @@ -991,6 +991,9 @@ func createLLM() (llms.Model, error) { log.Warnf("Invalid OLLAMA_CONTEXT_LENGTH value: %v, ignoring", err) } } + if client := ocr.OllamaHTTPClient(); client != nil { + opts = append(opts, ollama.WithHTTPClient(client)) + } llm, err := ollama.New(opts...) if err != nil { return nil, err @@ -1099,6 +1102,9 @@ func createVisionLLM() (llms.Model, error) { log.Warnf("Invalid OLLAMA_CONTEXT_LENGTH value: %v, ignoring", err) } } + if client := ocr.OllamaHTTPClient(); client != nil { + opts = append(opts, ollama.WithHTTPClient(client)) + } llm, err := ollama.New(opts...) if err != nil { return nil, err @@ -1135,6 +1141,7 @@ func createVisionLLM() (llms.Model, error) { } } + func createCustomHTTPClient() *http.Client { // Create custom transport that adds headers customTransport := &headerTransport{ @@ -1159,8 +1166,9 @@ type headerTransport struct { // RoundTrip implements the http.RoundTripper interface func (t *headerTransport) RoundTrip(req *http.Request) (*http.Response, error) { + req = req.Clone(req.Context()) for key, value := range t.headers { - req.Header.Add(key, value) + req.Header.Set(key, value) } return t.transport.RoundTrip(req) } diff --git a/ocr/llm_provider.go b/ocr/llm_provider.go index 16a0d334..f5e396db 100644 --- a/ocr/llm_provider.go +++ b/ocr/llm_provider.go @@ -6,6 +6,7 @@ import ( "encoding/base64" "fmt" "image" + "net/http" "os" "strings" @@ -208,6 +209,44 @@ func createOpenAIClient(config Config) (llms.Model, error) { ) } +// OllamaHTTPClient returns an *http.Client with headers from OLLAMA_HEADERS injected, +// or nil if OLLAMA_HEADERS is not set. +func OllamaHTTPClient() *http.Client { + raw := os.Getenv("OLLAMA_HEADERS") + if raw == "" { + return nil + } + headers := map[string]string{} + for _, pair := range strings.Split(raw, ",") { + parts := strings.SplitN(strings.TrimSpace(pair), "=", 2) + if len(parts) == 2 && parts[0] != "" { + headers[parts[0]] = parts[1] + } + } + if len(headers) == 0 { + return nil + } + return &http.Client{ + Transport: &ollamaHeaderTransport{ + base: http.DefaultTransport, + headers: headers, + }, + } +} + +type ollamaHeaderTransport struct { + base http.RoundTripper + headers map[string]string +} + +func (t *ollamaHeaderTransport) RoundTrip(req *http.Request) (*http.Response, error) { + req = req.Clone(req.Context()) + for k, v := range t.headers { + req.Header.Set(k, v) + } + return t.base.RoundTrip(req) +} + // createOllamaClient creates a new Ollama vision model client func createOllamaClient(config Config) (llms.Model, error) { host := os.Getenv("OLLAMA_HOST") @@ -221,6 +260,9 @@ func createOllamaClient(config Config) (llms.Model, error) { if config.OllamaContextLength > 0 { opts = append(opts, ollama.WithRunnerNumCtx(config.OllamaContextLength)) } + if client := OllamaHTTPClient(); client != nil { + opts = append(opts, ollama.WithHTTPClient(client)) + } return ollama.New(opts...) }