v1.2.5

JaidedAI · Feb 22, 2021 · 1434130 · 1434130
1 parent 6a8e71d
commit 1434130
Show file tree

Hide file tree

Showing 8 changed files with 47 additions and 30 deletions.
diff --git a/README.md b/README.md
@@ -6,19 +6,20 @@
 [![Tweet](https://img.shields.io/twitter/url/https/github.com/JaidedAI/EasyOCR.svg?style=social)](https://twitter.com/intent/tweet?text=Check%20out%20this%20awesome%20library:%20EasyOCR%20https://github.com/JaidedAI/EasyOCR)
 [![Twitter](https://img.shields.io/badge/[email protected]?style=flat)](https://twitter.com/JaidedAI)
 
-Ready-to-use OCR with 80+ languages supported including Chinese, Japanese, Korean and Thai.
+Ready-to-use OCR with 80+ [supported languages](https://www.jaided.ai/easyocr) and all popular writing scripts including Latin, Chinese, Arabic, Devanagari, Cyrillic and etc.
+
+[Try Demo on our website](https://www.jaided.ai/easyocr)
 
 ## What's new
+- 22 February 2021 - Version 1.2.5
+    - Add dynamic quantization for faster CPU inference (it is enabled by default for CPU mode)
+    - More sensible confident score
 - 7 February 2021 - Version 1.2.4
     - Faster CPU inference speed by using dynamic input shape (recognition rate increases by around 100% for images with a lot of text)
 - 1 February 2021 - Version 1.2.3
     - Add `setLanguageList` method to `Reader` class. This is a convenient api for changing languages (within the same model) after creating class instance.
     - Small change on text box merging. (thanks [z-pc](https://github.com/z-pc), see [PR](https://github.com/JaidedAI/EasyOCR/pull/338))
     - [Basic Demo on website](https://www.jaided.ai/easyocr)
-- 5 January 2021 - Version 1.2.2
-    - Add `optimal_num_chars` to `detect` method. If specified, bounding boxes with estimated number of characters near this value are returned first. (thanks [@adamfrees](https://github.com/adamfrees))
-    - Add `rotation_info` to `readtext` method. Allow EasyOCR to rotate each text box and return the one with the best confident score. Eligible values are 90, 180 and 270. For example, try [90, 180 ,270] for all possible text orientations. (thanks [@mijoo308](https://github.com/mijoo308))
-    - Update [documentation](https://www.jaided.ai/easyocr/documentation).
 
 - [Read all released notes](https://github.com/JaidedAI/EasyOCR/blob/master/releasenotes.md)
 
@@ -34,13 +35,6 @@ Ready-to-use OCR with 80+ languages supported including Chinese, Japanese, Korea
 
 ![example3](examples/example3.png)
 
-## Supported Languages
-
-We are currently supporting 80+ languages. See [list of supported languages](https://www.jaided.ai/easyocr).
-
-## Demo
-
-[Try our demo on website: Jaided AI](https://www.jaided.ai/easyocr)
 
 ## Installation
 
@@ -141,7 +135,7 @@ $ easyocr -l ch_sim en -f chinese.jpg --detail=1 --gpu=True
 7. Restructure code to support swappable detection and recognition algorithm.
 The api should be as easy as
 ``` python
-reader = easyocr.Reader(['en'], detection='DB', recognition = 'CNN_Transformer')
+reader = easyocr.Reader(['en'], detection='DB', recognition = 'Transformer')
 ```
 The idea is to be able to plug-in any state-of-the-art model into EasyOCR. There are a lot of geniuses trying to make better detection/recognition model. We are not trying to be a genius here, just make genius's works quickly accessible to the public ... for free. (well I believe most geniuses want their work to create positive impact as fast/big as possible) The pipeline should be something like below diagram. Grey slots are placeholders for changeable light blue modules.
 

diff --git a/easyocr/__init__.py b/easyocr/__init__.py
@@ -1,3 +1,3 @@
 from .easyocr import Reader
 
-__version__ = '1.2.4'
+__version__ = '1.2.5'
diff --git a/easyocr/detection.py b/easyocr/detection.py
@@ -57,11 +57,16 @@ def test_net(canvas_size, mag_ratio, net, image, text_threshold, link_threshold,
 
     return boxes, polys
 
-def get_detector(trained_model, device='cpu'):
+def get_detector(trained_model, device='cpu', quantize=True):
     net = CRAFT()
 
     if device == 'cpu':
         net.load_state_dict(copyStateDict(torch.load(trained_model, map_location=device)))
+        if quantize:
+            try:
+                torch.quantization.quantize_dynamic(net, dtype=torch.qint8, inplace=True)
+            except:
+                pass
     else:
         net.load_state_dict(copyStateDict(torch.load(trained_model, map_location=device)))
         net = torch.nn.DataParallel(net).to(device)

diff --git a/easyocr/easyocr.py b/easyocr/easyocr.py
@@ -31,7 +31,7 @@ class Reader(object):
     def __init__(self, lang_list, gpu=True, model_storage_directory=None,
                  user_network_directory=None, recog_network = 'standard',
                  download_enabled=True, detector=True, recognizer=True,
-                 verbose=True):
+                 verbose=True, quantize=True):
         """Create an EasyOCR Reader.
 
         Parameters:
@@ -226,7 +226,7 @@ def __init__(self, lang_list, gpu=True, model_storage_directory=None,
             dict_list[lang] = os.path.join(BASE_PATH, 'dict', lang + ".txt")
 
         if detector:
-            self.detector = get_detector(detector_path, self.device)
+            self.detector = get_detector(detector_path, self.device, quantize)
         if recognizer:
             if recog_network == 'standard':
                 network_params = {
@@ -244,7 +244,7 @@ def __init__(self, lang_list, gpu=True, model_storage_directory=None,
                 network_params = recog_config['network_params']
             self.recognizer, self.converter = get_recognizer(recog_network, network_params,\
                                                          self.character, separator_list,\
-                                                         dict_list, model_path, device = self.device)
+                                                         dict_list, model_path, device = self.device, quantize=quantize)
 
     def setModelLanguage(self, language, lang_list, list_lang, list_lang_string):
         self.model_lang = language
@@ -315,6 +315,7 @@ def recognize(self, img_cv_grey, horizontal_list=None, free_list=None,\
         if (horizontal_list==None) and (free_list==None):
             y_max, x_max = img_cv_grey.shape
             horizontal_list = [[0, x_max, 0, y_max]]
+            free_list = []
 
         # without gpu/parallelization, it is faster to process image one by one
         if ((batch_size == 1) or (self.device == 'cpu')) and not rotation_info:

diff --git a/easyocr/model/modules.py b/easyocr/model/modules.py
@@ -84,7 +84,10 @@ def forward(self, input):
         input : visual feature [batch_size x T x input_size]
         output : contextual feature [batch_size x T x output_size]
         """
-        self.rnn.flatten_parameters()
+        try: # multi gpu needs this
+            self.rnn.flatten_parameters()
+        except: # quantization doesn't work with this 
+            pass
         recurrent, _ = self.rnn(input)  # batch_size x T x input_size -> batch_size x T x (2*hidden_size)
         output = self.linear(recurrent)  # batch_size x T x output_size
         return output

diff --git a/easyocr/recognition.py b/easyocr/recognition.py
@@ -10,6 +10,9 @@
 from .utils import CTCLabelConverter
 import math
 
+def custom_mean(x):
+    return x.prod()**(2.0/np.sqrt(len(x)))
+
 def contrast_grey(img):
     high = np.percentile(img, 90)
     low  = np.percentile(img, 10)
@@ -130,17 +133,26 @@ def recognizer_predict(model, converter, test_loader, batch_max_length,\
                 k = preds_prob.cpu().detach().numpy()
                 preds_str = converter.decode_wordbeamsearch(k, beamWidth=beamWidth)
 
-            preds_max_prob, _ = preds_prob.max(dim=2)
+            preds_prob = preds_prob.cpu().detach().numpy()
+            values = preds_prob.max(axis=2)
+            indices = preds_prob.argmax(axis=2)
+            preds_max_prob = []
+            for v,i in zip(values, indices):
+                max_probs = v[i!=0]
+                if len(max_probs)>0:
+                    preds_max_prob.append(max_probs)
+                else:
+                    preds_max_prob.append([0])
 
             for pred, pred_max_prob in zip(preds_str, preds_max_prob):
-                confidence_score = pred_max_prob.cumprod(dim=0)[-1]
-                result.append([pred, confidence_score.item()])
+                confidence_score = custom_mean(pred_max_prob)
+                result.append([pred, confidence_score])
 
     return result
 
 def get_recognizer(recog_network, network_params, character,\
                    separator_list, dict_list, model_path,\
-                   device = 'cpu'):
+                   device = 'cpu', quantize = True):
 
     converter = CTCLabelConverter(character, separator_list, dict_list)
     num_class = len(converter.character)
@@ -160,6 +172,11 @@ def get_recognizer(recog_network, network_params, character,\
             new_key = key[7:]
             new_state_dict[new_key] = value
         model.load_state_dict(new_state_dict)
+        if quantize:
+            try:
+                torch.quantization.quantize_dynamic(model, dtype=torch.qint8, inplace=True)
+            except:
+                pass
     else:
         model = torch.nn.DataParallel(model).to(device)
         model.load_state_dict(torch.load(model_path, map_location=device))
@@ -213,10 +230,4 @@ def get_text(character, imgH, imgW, recognizer, converter, image_list,\
         else:
             result.append( (box, pred1[0], pred1[1]) )
 
-    #confidence_score = pred_max_prob.cumprod(dim=0)[-1]
-    #if confidence_score.item() > filter_ths:
-    #    print(pred, confidence_score.item())
-    #else:
-    #    print('not sure', pred, confidence_score.item())
-
     return result
diff --git a/releasenotes.md b/releasenotes.md
@@ -1,3 +1,6 @@
+- 22 February 2021 - Version 1.2.5
+    - Add dynamic quantization for faster CPU inference (it is enabled by default for CPU mode)
+    - More sensible confident score
 - 7 February 2021 - Version 1.2.4
     - Faster CPU inference speed by using dynamic input shape (recognition rate increases by around 100% for images with a lot of text)
 - 1 February 2021 - Version 1.2.3

diff --git a/setup.py b/setup.py
@@ -17,7 +17,7 @@ def readme():
     name='easyocr',
     packages=['easyocr'],
     include_package_data=True,
-    version='1.2.4',
+    version='1.2.5',
     install_requires=requirements,
     entry_points={"console_scripts": ["easyocr= easyocr.cli:main"]},
     license='Apache License 2.0',