ivanzud · ivanzud · Mar 8, 2026 · Mar 4, 2026 · Mar 4, 2026 · Mar 4, 2026
diff --git a/Dockerfile b/Dockerfile
@@ -66,6 +66,7 @@ COPY --from=frontend /app/dist /app/web-app/dist
 COPY *.go ./
 COPY internal ./internal
 COPY ocr ./ocr
+COPY sanitize ./sanitize
 
 # Import ARGs from top level
 ARG VERSION

diff --git a/README.md b/README.md
@@ -8,11 +8,11 @@
 [![GitHub Sponsors](https://img.shields.io/badge/Sponsor-icereed-ea4aaa?logo=github-sponsors)](https://github.com/sponsors/icereed)


 <a href="https://trendshift.io/repositories/12701" target="_blank"><img src="https://trendshift.io/api/badge/repositories/12701" alt="icereed%2Fpaperless-gpt | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>

 ![Screenshot](./paperless-gpt-screenshot.png)

 <sub>💡 Maintained by [Icereed](https://github.com/icereed). Proudly supported by [BubbleTax.de](https://bubbletax.de/?utm_source=github&utm_medium=readme&utm_campaign=paperless) – automated, BMF-compliant tax reports for Interactive Brokers traders in Germany.</sub>

 ---
 **paperless-gpt** seamlessly pairs with [paperless-ngx][paperless-ngx] to generate **AI-powered document titles** and **tags**, saving you hours of manual sorting. While other tools may offer AI chat features, **paperless-gpt** stands out by **supercharging OCR with LLMs**-ensuring high accuracy, even with tricky scans. If you're craving next-level text extraction and effortless document organization, this is your solution.
@@ -27,20 +27,20 @@
 ## Key Highlights

 1. **LLM-Enhanced OCR**  
   Harness Large Language Models (OpenAI or Ollama) for **better-than-traditional** OCR—turn messy or low-quality scans into context-aware, high-fidelity text.

 2. **Use specialized AI OCR services**

   - **LLM OCR**: Use OpenAI or Ollama to extract text from images.
   - **Google Document AI**: Leverage Google's powerful Document AI for OCR tasks.
   - **Azure Document Intelligence**: Use Microsoft's enterprise OCR solution.
   - **Docling Server**: Self-hosted OCR and document conversion service

 3. **Automatic Title, Tag & Created Date Generation**  
   No more guesswork. Let the AI do the naming and categorizing. You can easily review suggestions and refine them if needed.

 4. **Supports reasoning models in Ollama**  
   Greatly enhance accuracy by using a reasoning model like `qwen3:8b`. The perfect tradeoff between privacy and performance! Of course, if you got enough GPUs or NPUs, a bigger model will enhance the experience.

 5. **Automatic Correspondent Generation**  
   Automatically identify and generate correspondents from your documents, making it easier to track and organize your communications.
@@ -87,7 +87,7 @@
    - [1. LLM-based OCR (Default)](#1-llm-based-ocr-default)
    - [2. Azure Document Intelligence](#2-azure-document-intelligence)
    - [3. Google Document AI](#3-google-document-ai)
    - [4. Docling Server](#4-docling-server)
  - [OCR Processing Modes](#ocr-processing-modes)
    - [Image Mode (Default)](#image-mode-default)
    - [PDF Mode](#pdf-mode)
@@ -587,7 +587,9 @@
 | `GOOGLEAI_API_KEY`                  | Google Gemini API key (required if using `LLM_PROVIDER=googleai`).                                                                                                                            | Cond.    |                            |
 | `GOOGLEAI_THINKING_BUDGET`          | (Optional, googleai only) Integer. Controls Gemini "thinking" budget. If unset, model default is used (thinking enabled if supported). Set to `0` to disable thinking (if model supports it). | No       |                            |
 | `OLLAMA_HOST`                       | Ollama server URL (e.g. `http://host.docker.internal:11434`).                                                                                                                                 | No       |                            |
+| `OLLAMA_THINK`                      | (Optional, Ollama only) Boolean. If set, explicitly enables (`true`) or disables (`false`) Ollama thinking mode for the main LLM. If unset, the parameter is omitted and Ollama defaults apply. | No       |                            |
 | `VISION_OLLAMA_HOST`                | Optional Ollama URL used only for OCR vision requests (`VISION_LLM_PROVIDER=ollama`). Falls back to `VISION_LLM_HOST`, then `OLLAMA_HOST`.                                                   | No       |                            |
+| `VISION_OLLAMA_THINK`               | (Optional, vision Ollama only) Boolean. Overrides `OLLAMA_THINK` for OCR/vision requests when `VISION_LLM_PROVIDER=ollama`. If unset, OCR inherits `OLLAMA_THINK`.                         | No       |                            |
 | `VISION_LLM_HOST`                   | Alias for `VISION_OLLAMA_HOST`. Useful if you prefer provider-agnostic naming.                                                                                                                | No       |                            |
 | `LLM_REQUESTS_PER_MINUTE`           | Maximum requests per minute for the main LLM. Useful for managing API costs or local LLM load.                                                                                                | No       | 120                        |
 | `LLM_MAX_RETRIES`                   | Maximum retry attempts for failed main LLM requests.                                                                                                                                          | No       | 3                          |

diff --git a/app_http_handlers.go b/app_http_handlers.go
@@ -14,6 +14,8 @@ import (
 	"text/template"
 	"time"
 
+	"paperless-gpt/sanitize"
+
 	"github.com/Masterminds/sprig/v3"
 	"github.com/gin-gonic/gin"
 )
@@ -606,6 +608,7 @@ func (app *App) analyzeDocumentsHandler(c *gin.Context) {
 			log.Errorf("Error fetching document %d: %v", docID, err)
 			return
 		}
+		doc.Content = sanitize.Sanitize(doc.Content)
 		documents = append(documents, doc)
 	}
 

diff --git a/app_llm.go b/app_llm.go
@@ -13,6 +13,8 @@ import (
 
 	_ "image/jpeg"
 
+	"paperless-gpt/sanitize"
+
 	"github.com/sirupsen/logrus"
 	"github.com/tmc/langchaingo/llms"
 )
@@ -63,7 +65,7 @@ func (app *App) getSuggestedCorrespondent(ctx context.Context, content string, s
 			},
 			Role: llms.ChatMessageTypeHuman,
 		},
-	})
+	}, mainLLMCallOptions()...)
 	if err != nil {
 		return "", fmt.Errorf("error getting response from LLM: %v", err)
 	}
@@ -133,7 +135,7 @@ func (app *App) getSuggestedTags(
 			},
 			Role: llms.ChatMessageTypeHuman,
 		},
-	})
+	}, mainLLMCallOptions()...)
 	if err != nil {
 		logger.Errorf("Error getting response from LLM: %v", err)
 		return nil, fmt.Errorf("error getting response from LLM: %v", err)
@@ -219,7 +221,7 @@ func (app *App) getSuggestedDocumentType(
 			},
 			Role: llms.ChatMessageTypeHuman,
 		},
-	})
+	}, mainLLMCallOptions()...)
 	if err != nil {
 		logger.Errorf("Error getting response from LLM: %v", err)
 		return "", fmt.Errorf("error getting response from LLM: %v", err)
@@ -289,7 +291,7 @@ func (app *App) getSuggestedTitle(ctx context.Context, content string, originalT
 			},
 			Role: llms.ChatMessageTypeHuman,
 		},
-	})
+	}, mainLLMCallOptions()...)
 	if err != nil {
 		return "", fmt.Errorf("error getting response from LLM: %v", err)
 	}
@@ -345,7 +347,7 @@ func (app *App) getSuggestedCreatedDate(ctx context.Context, content string, log
 			},
 			Role: llms.ChatMessageTypeHuman,
 		},
-	})
+	}, mainLLMCallOptions()...)
 	if err != nil {
 		return "", fmt.Errorf("error getting response from LLM: %v", err)
 	}
@@ -401,7 +403,7 @@ func (app *App) getSuggestedCustomFields(ctx context.Context, doc Document, sele
 		return nil, fmt.Errorf("error calculating available tokens for custom fields: %v", err)
 	}
 
-	truncatedContent, err := truncateContentByTokens(doc.Content, availableTokens)
+	truncatedContent, err := truncateContentByTokens(sanitize.Sanitize(doc.Content), availableTokens)
 	if err != nil {
 		return nil, fmt.Errorf("error truncating content for custom fields: %v", err)
 	}
@@ -423,7 +425,7 @@ func (app *App) getSuggestedCustomFields(ctx context.Context, doc Document, sele
 				llms.TextContent{Text: prompt},
 			},
 		},
-	})
+	}, mainLLMCallOptions()...)
 	if err != nil {
 		return nil, fmt.Errorf("error getting response from LLM for custom fields: %v", err)
 	}
@@ -529,7 +531,7 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque
 			startTime := time.Now()
 			docLogger.Printf("Processing Document ID %d...", documentID)
 
-			content := doc.Content
+			content := sanitize.Sanitize(doc.Content)
 			suggestedTitle := doc.Title
 			var suggestedTags []string
 			var suggestedCorrespondent string

diff --git a/background.go b/background.go
@@ -7,6 +7,8 @@ import (
 	"slices"
 	"strings"
 	"time"
+
+	"github.com/sirupsen/logrus"
 )
 
 // This is our interface, allowing us to enable proper testing
@@ -46,11 +48,14 @@ func StartBackgroundTasks(ctx context.Context, app BackgroundProcessor) {
 				}
 
 				// Run auto-tagging after OCR
-				autoCount, err := app.processAutoTagDocuments(ctx)
-				if err != nil {
-					return 0, fmt.Errorf("error in processAutoTagDocuments: %w", err)
+				// Only run auto-tagging if OCR did not find any documents to process, otherwise re-run OCR
+				if count == 0 {
+					autoCount, err := app.processAutoTagDocuments(ctx)
+					if err != nil {
+						return 0, fmt.Errorf("error in processAutoTagDocuments: %w", err)
+					}
+					count += autoCount
 				}
-				count += autoCount
 
 				return count, nil
 			}()
@@ -78,6 +83,34 @@ func StartBackgroundTasks(ctx context.Context, app BackgroundProcessor) {
 	}()
 }
 
+// classifyDocument generates classification suggestions for a single document.
+// This is shared by the normal auto-tag path and OCR chaining.
+func (app *App) classifyDocument(ctx context.Context, document Document, logger *logrus.Entry) (*DocumentSuggestion, error) {
+	settingsMutex.RLock()
+	generateCustomFields := settings.CustomFieldsEnable
+	settingsMutex.RUnlock()
+
+	suggestionRequest := GenerateSuggestionsRequest{
+		Documents:              []Document{document},
+		GenerateTitles:         strings.ToLower(autoGenerateTitle) != "false",
+		GenerateTags:           strings.ToLower(autoGenerateTags) != "false",
+		GenerateCorrespondents: strings.ToLower(autoGenerateCorrespondents) != "false",
+		GenerateDocumentTypes:  strings.ToLower(autoGenerateDocumentType) != "false",
+		GenerateCreatedDate:    strings.ToLower(autoGenerateCreatedDate) != "false",
+		GenerateCustomFields:   generateCustomFields,
+	}
+
+	suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest, logger)
+	if err != nil {
+		return nil, fmt.Errorf("error generating suggestions: %w", err)
+	}
+	if len(suggestions) == 0 {
+		return nil, fmt.Errorf("no suggestions generated")
+	}
+
+	return &suggestions[0], nil
+}
+
 // processAutoTagDocuments handles the background auto-tagging of documents
 func (app *App) processAutoTagDocuments(ctx context.Context) (int, error) {
 	documents, err := app.Client.GetDocumentsByTag(ctx, autoTag, 25)
@@ -117,29 +150,15 @@ func (app *App) processAutoTagDocuments(ctx context.Context) (int, error) {
 		docLogger := documentLogger(document.ID)
 		docLogger.Info("Processing document for auto-tagging")
 
-		settingsMutex.RLock()
-		generateCustomFields := settings.CustomFieldsEnable
-		settingsMutex.RUnlock()
-
-		suggestionRequest := GenerateSuggestionsRequest{
-			Documents:              []Document{document},
-			GenerateTitles:         strings.ToLower(autoGenerateTitle) != "false",
-			GenerateTags:           strings.ToLower(autoGenerateTags) != "false",
-			GenerateCorrespondents: strings.ToLower(autoGenerateCorrespondents) != "false",
-			GenerateDocumentTypes:  strings.ToLower(autoGenerateDocumentType) != "false",
-			GenerateCreatedDate:    strings.ToLower(autoGenerateCreatedDate) != "false",
-			GenerateCustomFields:   generateCustomFields,
-		}
-
-		suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest, docLogger)
+		suggestion, err := app.classifyDocument(ctx, document, docLogger)
 		if err != nil {
-			err = fmt.Errorf("error generating suggestions for document %d: %w", document.ID, err)
+			err = fmt.Errorf("error classifying document %d: %w", document.ID, err)
 			docLogger.Error(err.Error())
 			errs = append(errs, err)
 			continue
 		}
 
-		err = app.Client.UpdateDocuments(ctx, suggestions, app.Database, false)
+		err = app.Client.UpdateDocuments(ctx, []DocumentSuggestion{*suggestion}, app.Database, false)
 		if err != nil {
 			err = fmt.Errorf("error updating document %d: %w", document.ID, err)
 			docLogger.Error(err.Error())
@@ -220,6 +239,7 @@ func (app *App) processAutoOcrTagDocuments(ctx context.Context) (int, error) {
 			CopyMetadata:    app.pdfCopyMetadata,
 			LimitPages:      limitOcrPages,
 			ProcessMode:     app.ocrProcessMode,
+			ExistingContent: document.Content,
 		}
 
 		// Use the DocumentProcessor interface instead of calling the method directly
@@ -265,6 +285,48 @@ func (app *App) processAutoOcrTagDocuments(ctx context.Context) (int, error) {
 			docLogger.Infof("Adding OCR complete tag '%s'", app.pdfOCRCompleteTag)
 		}
 
+		// Chain into classification if enabled so OCR output can drive metadata in a single pass.
+		if autoOcrThenClassify {
+			docLogger.Info("Chaining into classification after OCR")
+
+			classifyDoc, fetchErr := app.Client.GetDocument(ctx, document.ID)
+			if fetchErr != nil {
+				docLogger.Errorf("Failed to fetch full document for classification, skipping: %v", fetchErr)
+			} else {
+				classifyDoc.Content = processedDoc.Text
+				refreshCustomFieldsCache(app.Client)
+
+				classifySuggestion, classifyErr := app.classifyDocument(ctx, classifyDoc, docLogger)
+				if classifyErr != nil {
+					docLogger.Errorf("Classification after OCR failed (OCR content will still be saved): %v", classifyErr)
+				} else {
+					documentSuggestion.SuggestedTitle = classifySuggestion.SuggestedTitle
+					documentSuggestion.SuggestedCorrespondent = classifySuggestion.SuggestedCorrespondent
+					documentSuggestion.SuggestedDocumentType = classifySuggestion.SuggestedDocumentType
+					documentSuggestion.SuggestedCreatedDate = classifySuggestion.SuggestedCreatedDate
+					documentSuggestion.SuggestedCustomFields = classifySuggestion.SuggestedCustomFields
+					documentSuggestion.CustomFieldsWriteMode = classifySuggestion.CustomFieldsWriteMode
+					documentSuggestion.CustomFieldsEnable = classifySuggestion.CustomFieldsEnable
+
+					mergedTags := append([]string{}, documentSuggestion.SuggestedTags...)
+					for _, tag := range classifySuggestion.SuggestedTags {
+						if !slices.Contains(mergedTags, tag) {
+							mergedTags = append(mergedTags, tag)
+						}
+					}
+					documentSuggestion.SuggestedTags = mergedTags
+					documentSuggestion.KeepOriginalTags = true
+
+					for _, tag := range append(classifySuggestion.RemoveTags, autoTag) {
+						if !slices.Contains(documentSuggestion.RemoveTags, tag) {
+							documentSuggestion.RemoveTags = append(documentSuggestion.RemoveTags, tag)
+						}
+					}
+					docLogger.Info("Classification after OCR completed successfully")
+				}
+			}
+		}
+
 		// Skip updating the original document if it was actually replaced (deleted) during OCR.
 		// The replacement document will be processed as a new document on the next cycle.
 		if options.ReplaceOriginal && processedDoc != nil && processedDoc.ReplacedOriginal {