JaidedAI · sliedes · Jul 2, 2024
diff --git a/easyocr/cli.py b/easyocr/cli.py
@@ -229,7 +229,7 @@ def parse_args():
     parser.add_argument(
         "--output_format",
         type=str,
-        choices=["standard", 'dict', 'json'],
+        choices=["standard", 'dict', 'json', "hocr"],
         default='standard',
         help="output format.",
     )

diff --git a/easyocr/easyocr.py b/easyocr/easyocr.py
@@ -4,7 +4,7 @@
 from .utils import group_text_box, get_image_list, calculate_md5, get_paragraph,\
                    download_and_unzip, printProgressBar, diff, reformat_input,\
                    make_rotated_img_list, set_result_with_confidence,\
-                   reformat_input_batched, merge_to_free
+                   reformat_input_batched, merge_to_free, to_hocr
 from .config import *
 from bidi.algorithm import get_display
 import numpy as np
@@ -434,6 +434,8 @@ def recognize(self, img_cv_grey, horizontal_list=None, free_list=None,\
             return [json.dumps({'boxes':[list(map(int, lst)) for lst in item[0]],'text':item[1],'confident':item[2]}, ensure_ascii=False) for item in result]
         elif output_format == 'free_merge':
             return merge_to_free(result, free_list)
+        elif output_format == "hocr":
+            return to_hocr(result)
         else:
             return result
 

diff --git a/easyocr/utils.py b/easyocr/utils.py
@@ -8,6 +8,7 @@
 from PIL import Image, JpegImagePlugin
 from scipy import ndimage
 import hashlib
+import html
 import sys, os
 from zipfile import ZipFile
 from .imgproc import loadImage
@@ -383,6 +384,49 @@ def decode_wordbeamsearch(self, mat, beamWidth=5):
             texts.append(string)
         return texts
 
+OCR_PREAMBLE = """
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title></title>
+    <meta http-equiv="Content-Type" content="text/html;charset=utf-8"/>
+    <meta name='ocr-system' content='EasyOCR' />
+    <meta name='ocr-capabilities' content='ocrx_word'/>
+  </head>
+  <body>
+    <div class="ocr_page" id="page_1" title="image 'image.png'; bbox {x0} {y0} {x1} {y1}; ppageno 0">
+""".strip()
+
+
+# In order to get a browser-renderable HTML file, you can add this before the closing </body> tag:
+#
+# <script src="https://unpkg.com/hocrjs"></script>
+
+OCR_POSTAMBLE = """    </div>
+  </body>
+</html>
+""".splitlines()
+
+def to_hocr(result):
+    content = []
+    min_x0, min_y0, max_x1, max_y1 = 1e9, 1e9, 0, 0
+    for box, text, confidence in result:
+        # We have the corners of the box, clockwise from top-left
+        c1, _, c3, _ = [[int(x) for x in c] for c in box]
+        x0, y0 = c1
+        x1, y1 = c3
+        min_x0 = min(min_x0, x0)
+        min_y0 = min(min_y0, y0)
+        max_x1 = max(max_x1, x1)
+        max_y1 = max(max_y1, y1)
+        content.append('      <span class="ocrx_word" title="bbox {x0} {y0} {x1} {y1}">{text}</span>'.format(
+            x0=x0, y0=y0, x1=x1, y1=y1, text=html.escape(text)
+        ))
+    preamble = OCR_PREAMBLE.format(x0=min_x0, y0=min_y0, x1=max_x1, y1=max_y1).splitlines()
+    return preamble + content + OCR_POSTAMBLE
+
 def merge_to_free(merge_result, free_list):
     merge_result_buf, mr_buf = [], []