From cfa2fda7cc9adba1bdeae9d6bc8977b17f83c6c9 Mon Sep 17 00:00:00 2001
From: TAHRI Ahmed R <Ousret@users.noreply.github.com>
Date: Fri, 11 Oct 2019 14:58:15 +0200
Subject: [PATCH] Add more reliable numbers/stats  (#24)

* some numbers on how well it performs

* some numbers on how well it performs

* Update README.md

* diff between ftfy and chardet
---
 README.md         |  10 +-
 paper/README.md   | 343 ++++++++++++++++++++++++++++++++++++++++++++++
 paper/features.py | 283 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 635 insertions(+), 1 deletion(-)
 create mode 100644 paper/README.md
 create mode 100644 paper/features.py
diff --git a/README.md b/README.md
index 3745b164..68bd7023 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,7 @@ This project offers you an alternative to **Universal Charset Encoding Detector*
 
 | Feature       | [Chardet](https://github.com/chardet/chardet)       | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
 | ------------- | :-------------: | :------------------: | :------------------: |
-| `Fast`         | ❌<br>          | ✅<br>             | ✅ <br>⚡ |
+| `Fast`         | ❌<br>          | ❌<br>             | ✅ <br> |
 | `Universal**`     | ❌            | ✅                 | ❌ |
 | `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
 | `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
@@ -45,6 +45,12 @@ This project offers you an alternative to **Universal Charset Encoding Detector*
 | `Detect spoken language` | ❌ | ✅ | N/A |
 | `Supported Encoding` | 30 | :tada: [90](https://charset-normalizer.readthedocs.io/en/latest/support.html)  | 40
 
+| Package       | Accuracy       | Mean per file (ns) | File per sec (est) |
+| ------------- | :-------------: | :------------------: | :------------------: |
+|      [chardet](https://github.com/chardet/chardet)       |     93.5 %     |     126 081 168 ns      |       7.931 file/sec        |
+|      [cchardet](https://github.com/PyYoshi/cChardet)      |     97.0 %     |      1 668 145 ns       |      **599.468 file/sec**      |
+| charset-normalizer |    **97.25 %**     |     209 503 253 ns      |       4.773 file/sec    |
+
 <p align="center">
 <img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://image.noelshack.com/fichiers/2019/31/5/1564761473-ezgif-5-cf1bd9dd66b0.gif" alt="Cat Reading Text" width="200"/>
 
@@ -119,6 +125,8 @@ What I want is to get readable text, the best I can.
 
 In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
 
+Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
+
 ## 🍰 How
 
   - Discard all charset encoding table that could not fit the binary content.
diff --git a/paper/README.md b/paper/README.md
new file mode 100644
index 00000000..68f67c80
--- /dev/null
+++ b/paper/README.md
@@ -0,0 +1,343 @@
+## State of the art
+
+pre-requisite
+-------------
+
+  - chardet
+  - cchardet
+  - charset-normalizer
+
+Thoses tests are running on charset-normalizer (1.3.1), chardet (3.0.4) and cchardet (2.1.4).
+
+  - files used in python chardet tests
+  - files used in python charset-normalizer
+  - script features.py
+
+how it is computed
+------------------
+
+  - `from time import perf_counter_ns` for performance measure
+  - reading file before measure
+  - using folder name as encoding hint (considering it is the right one)
+  - when guessed and not equal to target encoding try to verify if it matter
+
+*if it matter* : Means if a package guessed cp1252 instead of cp1254, decode bytes using both and compare output string. 
+If equal, then it does not matter.
+
+run it yourself
+---------------
+
+```sh
+git clone https://github.com/ousret/charset_normalizer.git
+cd ./charset_normalizer/paper/
+python features.py
+```
+
+## Global results
+
+| Package       | Accuracy       | Mean per file (ns) | File per sec (est) |
+| ------------- | :-------------: | :------------------: | :------------------: |
+|      [chardet](https://github.com/chardet/chardet)       |     93.5 %     |     126 081 168 ns      |       7.931 file/sec        |
+|      [cchardet](https://github.com/PyYoshi/cChardet)      |     97.0 %     |      1 668 145 ns       |      **599.468 file/sec**      |
+| charset-normalizer |    **97.25 %**     |     209 503 253 ns      |       4.773 file/sec    |
+
+### Per encoding
+
+ascii
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |    100.0     |       201824       |      4954.812      |                            |
+|      cchardet      |    100.0     |       53517        |     18685.651      |                            |
+| charset-normalizer |     75.0     |      48069575      |       20.803       |                            |
+
+big5
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |     100.0     |     187318613      |       5.338        |                            |
+|      cchardet      |     100.0     |       420464       |      2378.325      |                            |
+| charset-normalizer |     100.0     |      29030648      |       34.446       |                            |
+
+cp932
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |    100.0     |     281405894      |       3.554        |                            |
+|      cchardet      |    0.0      |       448856       |      2227.886      |         shift_jis          |
+| charset-normalizer |    100.0     |     407520555      |       2.454        |                            |
+
+cp949
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |    100.0     |     235318707      |        4.25        |                            |
+|      cchardet      |    50.0     |       275061       |      3635.557      |           euc_kr           |
+| charset-normalizer |    100.0     |     283099072      |       3.532        |                            |
+
+
+euc_jp
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |    100.0     |     285472520      |       3.503        |                            |
+|      cchardet      |    96.55     |       152344       |      6564.092      |           cp1252           |
+| charset-normalizer |    96.55     |     223891487      |       4.466        |           cp1252           |
+
+euc_kr
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |    100.0     |     206658004      |       4.839        |                            |
+|      cchardet      |    100.0     |       379409       |      2635.678      |                            |
+| charset-normalizer |    96.88     |     237868022      |       4.204        |           gb2312           |
+
+gb2312
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |    100.0     |     204280068      |       4.895        |                            |
+|      cchardet      |    75.0     |       406609       |      2459.365      |          gb18030           |
+| charset-normalizer |    90.0     |     267700099      |       3.736        |  euc_jis_2004, big5hkscs   |
+
+
+cp855
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |    100.0     |      48681383      |       20.542       |                            |
+|      cchardet      |    100.0     |      3109405       |      321.605       |                            |
+| charset-normalizer |    100.0     |     131318072      |       7.615        |                            |
+
+cp866
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |    100.0     |      34948580      |       28.613       |                            |
+|      cchardet      |    100.0     |      2120040       |      471.689       |                            |
+| charset-normalizer |    100.0     |     134351478      |       7.443        |                            |
+
+iso2022_jp
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |  100.0     |       487661       |      2050.605      |                            |
+|      cchardet      |  100.0     |       11730        |     85251.492      |                            |
+| charset-normalizer |  100.0     |      53079390      |       18.84        |                            |
+
+
+iso2022_kr
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |  100.0     |       80494        |     12423.286      |                            |
+|      cchardet      |  100.0     |        8798        |     113662.196     |                            |
+| charset-normalizer |  100.0     |      28608808      |       34.954       |                            |
+
+
+latin_1
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |   100.0     |      10655475      |       93.848       |                            |
+|      cchardet      |   100.0     |       158693       |      6301.475      |                            |
+| charset-normalizer |   100.0     |     219482041      |       4.556        |                            |
+
+
+iso8859_2
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |  9.09     |      59510154      |       16.804       |          latin_1           |
+|      cchardet      |  100.0     |      1072225       |       932.64       |                            |
+| charset-normalizer |  100.0     |     364305515      |       2.745        |                            |
+
+
+iso8859_5
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |  100.0     |      41274746      |       24.228       |                            |
+|      cchardet      |  100.0     |      1931337       |      517.776       |                            |
+| charset-normalizer |  100.0     |     235748017      |       4.242        |                            |
+
+
+iso8859_6
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |  0.0      |      8770859       |      114.014       |        mac_cyrillic        |
+|      cchardet      |  100.0     |       248962       |      4016.677      |                            |
+| charset-normalizer |  100.0     |      58873846      |       16.985       |                            |
+
+iso8859_7
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |  90.91     |      26917723      |       37.15        |           cp1253           |
+|      cchardet      |  100.0     |       475691       |      2102.205      |                            |
+| charset-normalizer |  100.0     |     219005790      |       4.566        |                            |
+
+iso8859_9
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |  0.0      |      16149994      |       61.92        |          latin_1           |
+|      cchardet      |  100.0     |       303994       |      3289.539      |                            |
+| charset-normalizer |  33.33     |     313328579      |       3.192        |           cp1252           |
+
+koi8_r
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |    100.0     |      48102976      |       20.789       |                            |
+|      cchardet      |    100.0     |      4506055       |      221.924       |                            |
+| charset-normalizer |    100.0     |     285609163      |       3.501        |                            |
+
+
+mac_cyrillic
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       | 100.0     |      39849823      |       25.094       |                            |
+|      cchardet      | 100.0     |      2161876       |      462.561       |                            |
+| charset-normalizer | 100.0     |     273638883      |       3.654        |                            |
+
+
+shift_jis
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |  100.0     |     261271302      |       3.827        |                            |
+|      cchardet      |  100.0     |       199649       |      5008.79       |                            |
+| charset-normalizer |  96.67     |     394586122      |       2.534        |           cp932            |
+
+
+tis_620
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |   100.0     |      89417757      |       11.183       |                            |
+|      cchardet      |   100.0     |      2988084       |      334.663       |                            |
+| charset-normalizer |   100.0     |     367614610      |        2.72        |                            |
+
+
+utf_16
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |    100.0     |       24339        |     41086.322      |                            |
+|      cchardet      |    100.0     |        5718        |     174886.324     |                            |
+| charset-normalizer |    100.0     |      99274930      |       10.073       |                            |
+
+
+utf_32
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |    100.0     |       39001        |     25640.368      |                            |
+|      cchardet      |    100.0     |        9383        |     106575.722     |                            |
+| charset-normalizer |    100.0     |     181745938      |       5.502        |                            |
+
+
+utf_8
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |    100.0     |      86177619      |       11.604       |                            |
+|      cchardet      |    100.0     |       53297        |     18762.782      |                            |
+| charset-normalizer |    100.0     |     275800555      |       3.626        |                            |
+
+
+cp1250
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |    16.67     |     570631080      |       1.752        |      cp1252, latin_1       |
+|      cchardet      |    100.0     |      12933035      |       77.321       |                            |
+| charset-normalizer |    100.0     |      86223475      |       11.598       |                            |
+
+
+cp1251
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |    100.0     |      39826493      |       25.109       |                            |
+|      cchardet      |    100.0     |      2292002       |       436.3        |                            |
+| charset-normalizer |    100.0     |      68469359      |       14.605       |                            |
+
+
+cp1252
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |    100.0     |      28613134      |       34.949       |                            |
+|      cchardet      |    50.0     |       330630       |      3024.529      |    iso8859_7, iso8859_9    |
+| charset-normalizer |    50.0     |     200121270      |       4.997        |           cp437            |
+
+
+cp1253
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |    66.67     |      47471533      |       21.065       |         iso8859_7          |
+|      cchardet      |    100.0     |      2930658       |       341.22       |                            |
+| charset-normalizer |    100.0     |     168027309      |       5.951        |                            |
+
+
+cp1254
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |    0.0      |      3659364       |      273.272       |          latin_1           |
+|      cchardet      |    100.0     |       84747        |     11799.828      |                            |
+| charset-normalizer |    0.0      |      77320217      |       12.933       |           cp1252           |
+
+
+cp1255
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |    100.0     |      93093102      |       10.742       |                            |
+|      cchardet      |    100.0     |      2764209       |      361.767       |                            |
+| charset-normalizer |    100.0     |     119266978      |       8.385        |                            |
+
+cp1256
+-----
+
+|      Package       |  Accuracy | Mean per file (ns) | File per sec (est) | Confuse it with (sometime) |
+| ------------- | :-------------: | :------------------: | :------------------: | :------------------: |
+|      chardet       |    0.0      |     437728948      |       2.285        |        mac_cyrillic        |
+|      cchardet      |    100.0     |      14977980      |       66.765       |                            |
+| charset-normalizer |    100.0     |     336711209      |        2.97        |                            |
+
diff --git a/paper/features.py b/paper/features.py
new file mode 100644
index 00000000..8078bb29
--- /dev/null
+++ b/paper/features.py
@@ -0,0 +1,283 @@
+from loguru import logger
+
+from charset_normalizer.normalizer import CharsetNormalizerMatches as CnM
+from chardet import detect as chardet_detect
+from cchardet import detect as cchardet_detect
+
+from charset_normalizer.version import __version__ as charset_normalizer_ver
+from chardet import __version__ as chardet_version
+from cchardet import __version__ as cchardet_version
+
+from time import perf_counter_ns, sleep
+from prettytable import PrettyTable
+from encodings.aliases import aliases
+
+from os.path import basename
+from glob import glob
+from statistics import mean
+
+report = {
+
+    'chardet': {
+        'success': 0,
+        'failure': 0,
+        'performances': list(),
+        'encodings': dict()
+    },
+
+    'cchardet': {
+        'success': 0,
+        'failure': 0,
+        'performances': list(),
+        'encodings': dict()
+    },
+
+    'charset-normalizer': {
+        'success': 0,
+        'failure': 0,
+        'performances': list(),
+        'encodings': dict()
+    },
+
+}
+
+
+def also_could_be(encoding):
+    if encoding == 'uhc':  # for cchardet. UHC ==> EUC_KR
+        return 'euc_kr'
+    if 'sig' in encoding:
+        encoding = encoding.replace('_sig', '')
+    for a, b in aliases.items():
+        if a == encoding:
+            return b
+    return encoding
+
+
+def does_it_matter(raw_bytes, detected_encoding, should_be_encoding):
+    """
+    :param bytes raw_bytes:
+    :param str detected_encoding:
+    :param str should_be_encoding:
+    :return:
+    """
+    if detected_encoding == should_be_encoding:
+        return False
+    try:
+        return raw_bytes.decode(detected_encoding) != raw_bytes.decode(should_be_encoding)
+    except UnicodeDecodeError:
+        return True
+
+
+if __name__ == '__main__':
+
+    logger.warning(
+        "Running feature benchmark on charset-normalizer ({v1}), chardet ({v2}) and cchardet ({v3}).",
+        v1=charset_normalizer_ver,
+        v2=chardet_version,
+        v3=cchardet_version
+    )
+
+    files_queue = glob('../data/chardet/**/*')
+
+    for path in files_queue:
+
+        target_encoding_dir = path.split('\\')[-2].lower().replace('-', '_')
+
+        if target_encoding_dir.startswith('windows_'):
+            target_encoding_dir = '_'.join(target_encoding_dir.split('_')[:2])
+        if target_encoding_dir.startswith('iso'):
+            target_encoding_dir = 'iso{n1}_{n2}'.format(n1=target_encoding_dir.split('_')[1],
+                                                        n2=target_encoding_dir.split('_')[2])
+        should_be_encoding = None
+
+        for a, b in aliases.items():
+            if a == target_encoding_dir:
+                should_be_encoding = b
+                break
+            elif b == target_encoding_dir:
+                should_be_encoding = target_encoding_dir
+                break
+
+        if should_be_encoding is None:
+            logger.warning('{encoding} could not be identified in encodings.aliases.')
+            continue
+
+        if should_be_encoding not in report['charset-normalizer']['encodings'].keys():
+            for competitor in report.keys():
+                report[competitor]['encodings'][should_be_encoding] = {
+                    'performances': list(),
+                    'success': 0,
+                    'failure': 0,
+                    'confuse_it_with': set()
+                }
+
+        logger.info('File "{filename}" is going to be tested. File owner claim it is encoded with {target_encoding}.', filename=basename(path), target_encoding=should_be_encoding)
+
+        before_reading_file = perf_counter_ns()
+
+        raw_content = open(path, 'rb').read()
+
+        after_reading_file = perf_counter_ns()
+
+        logger.info('"{filename}" was loaded within {ts} nanoseconds.', filename=basename(path), ts=(after_reading_file-before_reading_file))
+
+        before_chardet_decide = perf_counter_ns()
+
+        chardet_result = chardet_detect(raw_content)
+
+        after_chardet_decide = perf_counter_ns()
+
+        logger.info('"{filename}" content was identified within {ts} nanoseconds by CHARDET.', filename=basename(path),
+                    ts=(after_chardet_decide - before_chardet_decide))
+
+        report['chardet']['performances'].append(after_chardet_decide - before_chardet_decide)
+        report['chardet']['encodings'][should_be_encoding]['performances'].append(after_chardet_decide - before_chardet_decide)
+
+        if chardet_result.get('encoding') is None:
+            logger.warning('"{filename}" content could not identified by CHARDET.')
+            report['chardet']['failure'] += 1
+            report['chardet']['encodings'][should_be_encoding]['failure'] += 1
+        elif should_be_encoding != also_could_be(chardet_result.get('encoding').lower().replace('-', '_')) and does_it_matter(raw_content, should_be_encoding, also_could_be(chardet_result.get('encoding').lower().replace('-', '_'))) is True:
+            logger.error('"{filename}" content could not identified properly by CHARDET. ({got} instead of {should_be})', filename=basename(path), got=chardet_result.get('encoding').replace('-', '_'), should_be=should_be_encoding)
+            report['chardet']['failure'] += 1
+            report['chardet']['encodings'][should_be_encoding]['failure'] += 1
+            report['chardet']['encodings'][should_be_encoding]['confuse_it_with'].add(
+                also_could_be(chardet_result.get('encoding').lower().replace('-', '_')))
+        else:
+            report['chardet']['success'] += 1
+            report['chardet']['encodings'][should_be_encoding]['success'] += 1
+
+        before_cchardet_decide = perf_counter_ns()
+
+        cchardet_result = cchardet_detect(raw_content)
+
+        after_cchardet_decide = perf_counter_ns()
+
+        logger.info('"{filename}" content was identified within {ts} nanoseconds by CCHARDET.', filename=basename(path),
+                    ts=(after_cchardet_decide - before_cchardet_decide))
+
+        report['cchardet']['performances'].append(after_cchardet_decide - before_cchardet_decide)
+        report['cchardet']['encodings'][should_be_encoding]['performances'].append(
+            after_cchardet_decide - before_cchardet_decide)
+
+        if cchardet_result.get('encoding') is None:
+            logger.warning('"{filename}" content could not identified by CCHARDET.')
+            report['cchardet']['failure'] += 1
+            report['cchardet']['encodings'][should_be_encoding]['failure'] += 1
+        elif should_be_encoding != also_could_be(cchardet_result.get('encoding').lower().replace('-', '_')) and does_it_matter(raw_content, should_be_encoding, also_could_be(cchardet_result.get('encoding').lower().replace('-', '_'))) is True:
+            logger.error('"{filename}" content could not identified properly by CCHARDET. ({got} instead of {should_be})', filename=basename(path), got=cchardet_result.get('encoding').replace('-', '_'), should_be=should_be_encoding)
+            report['cchardet']['failure'] += 1
+            report['cchardet']['encodings'][should_be_encoding]['failure'] += 1
+            report['cchardet']['encodings'][should_be_encoding]['confuse_it_with'].add(
+                also_could_be(cchardet_result.get('encoding').lower().replace('-', '_')))
+        else:
+            report['cchardet']['success'] += 1
+            report['cchardet']['encodings'][should_be_encoding]['success'] += 1
+
+        before_cn_decide = perf_counter_ns()
+
+        cn_result = CnM.from_bytes(raw_content).best().first()
+
+        after_cn_decide = perf_counter_ns()
+
+        logger.info('"{filename}" content was identified within {ts} nanoseconds by CHARSET-NORMALIZER.', filename=basename(path),
+                    ts=(after_cn_decide - before_cn_decide))
+
+        report['charset-normalizer']['performances'].append(after_cn_decide - before_cn_decide)
+        report['charset-normalizer']['encodings'][should_be_encoding]['performances'].append(
+            after_cn_decide - before_cn_decide)
+
+        if cn_result is None:
+            logger.warning('"{filename}" content could not identified by CHARSET-NORMALIZER.', filename=basename(path))
+            report['charset-normalizer']['failure'] += 1
+            report['charset-normalizer']['encodings'][should_be_encoding]['failure'] += 1
+        elif should_be_encoding not in ' '.join(cn_result.could_be_from_charset):
+            logger.error('"{filename}" content could not identified properly by CHARSET-NORMALIZER. ({got} instead of {should_be})', filename=basename(path), got=cn_result.encoding, should_be=should_be_encoding)
+            report['charset-normalizer']['failure'] += 1
+            report['charset-normalizer']['encodings'][should_be_encoding]['failure'] += 1
+            report['charset-normalizer']['encodings'][should_be_encoding]['confuse_it_with'].add(
+                cn_result.encoding)
+        else:
+            report['charset-normalizer']['success'] += 1
+            report['charset-normalizer']['encodings'][should_be_encoding]['success'] += 1
+
+    # Publish result
+    logger.info('Publishing results')
+
+    sleep(0.5)
+
+    x = PrettyTable(
+        [
+            'Package',
+            'Success Rate',
+            'Mean per file (ns)',
+            'File per sec (est)'
+        ]
+    )
+
+    for package in report.keys():
+
+        mean_perf_ns = round(
+            mean(
+                report[package]['performances']
+            )
+        )
+
+        x.add_row(
+            [
+                package,
+                round(
+                    (report[package]['success'] / (report[package]['success'] + report[package]['failure'])) * 100,
+                    ndigits=2
+                ),
+                mean_perf_ns,
+                round(
+                    1. / (mean_perf_ns / 1e+9),
+                    ndigits=3
+                )
+            ]
+        )
+
+    print(
+        x
+    )
+
+    y = PrettyTable(
+        [
+            'Package',
+            'Encoding',
+            'Success Rate',
+            'Mean per file (ns)',
+            'File per sec (est)',
+            'Confuse it with (sometime)'
+        ]
+    )
+
+    for encoding in report['charset-normalizer']['encodings'].keys():
+        for package in report.keys():
+            mean_perf_ns = round(
+                mean(
+                    report[package]['encodings'][encoding]['performances']
+                )
+            )
+
+            y.add_row(
+                [
+                    package,
+                    encoding,
+                    round(
+                        (report[package]['encodings'][encoding]['success'] / (report[package]['encodings'][encoding]['success'] + report[package]['encodings'][encoding]['failure'])) * 100,
+                        ndigits=2
+                    ),
+                    mean_perf_ns,
+                    round(
+                        1. / (mean_perf_ns / 1e+9),
+                        ndigits=3
+                    ),
+                    ', '.join(report[package]['encodings'][encoding]['confuse_it_with'])
+                ]
+            )
+
+    print(
+        y
+    )