From 14341309255fe539155a1d5ea54733dba2c7a899 Mon Sep 17 00:00:00 2001 From: rkcosmos Date: Mon, 22 Feb 2021 20:19:51 +0700 Subject: [PATCH] v1.2.5 --- README.md | 20 +++++++------------- easyocr/__init__.py | 2 +- easyocr/detection.py | 7 ++++++- easyocr/easyocr.py | 7 ++++--- easyocr/model/modules.py | 5 ++++- easyocr/recognition.py | 31 +++++++++++++++++++++---------- releasenotes.md | 3 +++ setup.py | 2 +- 8 files changed, 47 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index b151c4654..c15835bfd 100644 --- a/README.md +++ b/README.md @@ -6,19 +6,20 @@ [![Tweet](https://img.shields.io/twitter/url/https/github.com/JaidedAI/EasyOCR.svg?style=social)](https://twitter.com/intent/tweet?text=Check%20out%20this%20awesome%20library:%20EasyOCR%20https://github.com/JaidedAI/EasyOCR) [![Twitter](https://img.shields.io/badge/twitter-@JaidedAI-blue.svg?style=flat)](https://twitter.com/JaidedAI) -Ready-to-use OCR with 80+ languages supported including Chinese, Japanese, Korean and Thai. +Ready-to-use OCR with 80+ [supported languages](https://www.jaided.ai/easyocr) and all popular writing scripts including Latin, Chinese, Arabic, Devanagari, Cyrillic and etc. + +[Try Demo on our website](https://www.jaided.ai/easyocr) ## What's new +- 22 February 2021 - Version 1.2.5 + - Add dynamic quantization for faster CPU inference (it is enabled by default for CPU mode) + - More sensible confident score - 7 February 2021 - Version 1.2.4 - Faster CPU inference speed by using dynamic input shape (recognition rate increases by around 100% for images with a lot of text) - 1 February 2021 - Version 1.2.3 - Add `setLanguageList` method to `Reader` class. This is a convenient api for changing languages (within the same model) after creating class instance. - Small change on text box merging. (thanks [z-pc](https://github.com/z-pc), see [PR](https://github.com/JaidedAI/EasyOCR/pull/338)) - [Basic Demo on website](https://www.jaided.ai/easyocr) -- 5 January 2021 - Version 1.2.2 - - Add `optimal_num_chars` to `detect` method. If specified, bounding boxes with estimated number of characters near this value are returned first. (thanks [@adamfrees](https://github.com/adamfrees)) - - Add `rotation_info` to `readtext` method. Allow EasyOCR to rotate each text box and return the one with the best confident score. Eligible values are 90, 180 and 270. For example, try [90, 180 ,270] for all possible text orientations. (thanks [@mijoo308](https://github.com/mijoo308)) - - Update [documentation](https://www.jaided.ai/easyocr/documentation). - [Read all released notes](https://github.com/JaidedAI/EasyOCR/blob/master/releasenotes.md) @@ -34,13 +35,6 @@ Ready-to-use OCR with 80+ languages supported including Chinese, Japanese, Korea ![example3](examples/example3.png) -## Supported Languages - -We are currently supporting 80+ languages. See [list of supported languages](https://www.jaided.ai/easyocr). - -## Demo - -[Try our demo on website: Jaided AI](https://www.jaided.ai/easyocr) ## Installation @@ -141,7 +135,7 @@ $ easyocr -l ch_sim en -f chinese.jpg --detail=1 --gpu=True 7. Restructure code to support swappable detection and recognition algorithm. The api should be as easy as ``` python -reader = easyocr.Reader(['en'], detection='DB', recognition = 'CNN_Transformer') +reader = easyocr.Reader(['en'], detection='DB', recognition = 'Transformer') ``` The idea is to be able to plug-in any state-of-the-art model into EasyOCR. There are a lot of geniuses trying to make better detection/recognition model. We are not trying to be a genius here, just make genius's works quickly accessible to the public ... for free. (well I believe most geniuses want their work to create positive impact as fast/big as possible) The pipeline should be something like below diagram. Grey slots are placeholders for changeable light blue modules. diff --git a/easyocr/__init__.py b/easyocr/__init__.py index 53efe300c..ef99398a7 100644 --- a/easyocr/__init__.py +++ b/easyocr/__init__.py @@ -1,3 +1,3 @@ from .easyocr import Reader -__version__ = '1.2.4' +__version__ = '1.2.5' diff --git a/easyocr/detection.py b/easyocr/detection.py index b6f07b522..b1a4b7135 100644 --- a/easyocr/detection.py +++ b/easyocr/detection.py @@ -57,11 +57,16 @@ def test_net(canvas_size, mag_ratio, net, image, text_threshold, link_threshold, return boxes, polys -def get_detector(trained_model, device='cpu'): +def get_detector(trained_model, device='cpu', quantize=True): net = CRAFT() if device == 'cpu': net.load_state_dict(copyStateDict(torch.load(trained_model, map_location=device))) + if quantize: + try: + torch.quantization.quantize_dynamic(net, dtype=torch.qint8, inplace=True) + except: + pass else: net.load_state_dict(copyStateDict(torch.load(trained_model, map_location=device))) net = torch.nn.DataParallel(net).to(device) diff --git a/easyocr/easyocr.py b/easyocr/easyocr.py index 6d61da731..61597c21a 100644 --- a/easyocr/easyocr.py +++ b/easyocr/easyocr.py @@ -31,7 +31,7 @@ class Reader(object): def __init__(self, lang_list, gpu=True, model_storage_directory=None, user_network_directory=None, recog_network = 'standard', download_enabled=True, detector=True, recognizer=True, - verbose=True): + verbose=True, quantize=True): """Create an EasyOCR Reader. Parameters: @@ -226,7 +226,7 @@ def __init__(self, lang_list, gpu=True, model_storage_directory=None, dict_list[lang] = os.path.join(BASE_PATH, 'dict', lang + ".txt") if detector: - self.detector = get_detector(detector_path, self.device) + self.detector = get_detector(detector_path, self.device, quantize) if recognizer: if recog_network == 'standard': network_params = { @@ -244,7 +244,7 @@ def __init__(self, lang_list, gpu=True, model_storage_directory=None, network_params = recog_config['network_params'] self.recognizer, self.converter = get_recognizer(recog_network, network_params,\ self.character, separator_list,\ - dict_list, model_path, device = self.device) + dict_list, model_path, device = self.device, quantize=quantize) def setModelLanguage(self, language, lang_list, list_lang, list_lang_string): self.model_lang = language @@ -315,6 +315,7 @@ def recognize(self, img_cv_grey, horizontal_list=None, free_list=None,\ if (horizontal_list==None) and (free_list==None): y_max, x_max = img_cv_grey.shape horizontal_list = [[0, x_max, 0, y_max]] + free_list = [] # without gpu/parallelization, it is faster to process image one by one if ((batch_size == 1) or (self.device == 'cpu')) and not rotation_info: diff --git a/easyocr/model/modules.py b/easyocr/model/modules.py index 31d665d3f..d8b6d92eb 100644 --- a/easyocr/model/modules.py +++ b/easyocr/model/modules.py @@ -84,7 +84,10 @@ def forward(self, input): input : visual feature [batch_size x T x input_size] output : contextual feature [batch_size x T x output_size] """ - self.rnn.flatten_parameters() + try: # multi gpu needs this + self.rnn.flatten_parameters() + except: # quantization doesn't work with this + pass recurrent, _ = self.rnn(input) # batch_size x T x input_size -> batch_size x T x (2*hidden_size) output = self.linear(recurrent) # batch_size x T x output_size return output diff --git a/easyocr/recognition.py b/easyocr/recognition.py index b09c026ce..87dc09205 100644 --- a/easyocr/recognition.py +++ b/easyocr/recognition.py @@ -10,6 +10,9 @@ from .utils import CTCLabelConverter import math +def custom_mean(x): + return x.prod()**(2.0/np.sqrt(len(x))) + def contrast_grey(img): high = np.percentile(img, 90) low = np.percentile(img, 10) @@ -130,17 +133,26 @@ def recognizer_predict(model, converter, test_loader, batch_max_length,\ k = preds_prob.cpu().detach().numpy() preds_str = converter.decode_wordbeamsearch(k, beamWidth=beamWidth) - preds_max_prob, _ = preds_prob.max(dim=2) + preds_prob = preds_prob.cpu().detach().numpy() + values = preds_prob.max(axis=2) + indices = preds_prob.argmax(axis=2) + preds_max_prob = [] + for v,i in zip(values, indices): + max_probs = v[i!=0] + if len(max_probs)>0: + preds_max_prob.append(max_probs) + else: + preds_max_prob.append([0]) for pred, pred_max_prob in zip(preds_str, preds_max_prob): - confidence_score = pred_max_prob.cumprod(dim=0)[-1] - result.append([pred, confidence_score.item()]) + confidence_score = custom_mean(pred_max_prob) + result.append([pred, confidence_score]) return result def get_recognizer(recog_network, network_params, character,\ separator_list, dict_list, model_path,\ - device = 'cpu'): + device = 'cpu', quantize = True): converter = CTCLabelConverter(character, separator_list, dict_list) num_class = len(converter.character) @@ -160,6 +172,11 @@ def get_recognizer(recog_network, network_params, character,\ new_key = key[7:] new_state_dict[new_key] = value model.load_state_dict(new_state_dict) + if quantize: + try: + torch.quantization.quantize_dynamic(model, dtype=torch.qint8, inplace=True) + except: + pass else: model = torch.nn.DataParallel(model).to(device) model.load_state_dict(torch.load(model_path, map_location=device)) @@ -213,10 +230,4 @@ def get_text(character, imgH, imgW, recognizer, converter, image_list,\ else: result.append( (box, pred1[0], pred1[1]) ) - #confidence_score = pred_max_prob.cumprod(dim=0)[-1] - #if confidence_score.item() > filter_ths: - # print(pred, confidence_score.item()) - #else: - # print('not sure', pred, confidence_score.item()) - return result diff --git a/releasenotes.md b/releasenotes.md index e8abf0b17..26f85fa22 100644 --- a/releasenotes.md +++ b/releasenotes.md @@ -1,3 +1,6 @@ +- 22 February 2021 - Version 1.2.5 + - Add dynamic quantization for faster CPU inference (it is enabled by default for CPU mode) + - More sensible confident score - 7 February 2021 - Version 1.2.4 - Faster CPU inference speed by using dynamic input shape (recognition rate increases by around 100% for images with a lot of text) - 1 February 2021 - Version 1.2.3 diff --git a/setup.py b/setup.py index fd2807f52..4bca0a145 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ def readme(): name='easyocr', packages=['easyocr'], include_package_data=True, - version='1.2.4', + version='1.2.5', install_requires=requirements, entry_points={"console_scripts": ["easyocr= easyocr.cli:main"]}, license='Apache License 2.0',