From e770c9911f874cd367ac361104a5eaca0e2da9f3 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 14 Aug 2022 19:56:50 +0200
Subject: [PATCH 01/63] :bookmark: Bump version to 3.0.0 b1

---
 charset_normalizer/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/charset_normalizer/version.py b/charset_normalizer/version.py
index af7e749e..1bee3dc5 100644
--- a/charset_normalizer/version.py
+++ b/charset_normalizer/version.py
@@ -2,5 +2,5 @@
 Expose version
 """
 
-__version__ = "2.1.0"
+__version__ = "3.0.0b1"
 VERSION = __version__.split(".")

From 4e9b2d32223ded694a5fd7b48e4d26032c9149dc Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 14 Aug 2022 19:57:29 +0200
Subject: [PATCH 02/63] :wrench: Add support to build Whl using MYPYC

---
 setup.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/setup.py b/setup.py
index 298d12be..415e9051 100644
--- a/setup.py
+++ b/setup.py
@@ -3,11 +3,38 @@
 
 import io
 import os
+import sys
 from re import search
 
 from setuptools import find_packages, setup
 
 
+USE_MYPYC = False
+
+if len(sys.argv) > 1 and sys.argv[1] == "--use-mypyc":
+    sys.argv.pop(1)
+    USE_MYPYC = True
+if os.getenv("CHARSET_NORMALIZER_USE_MYPYC", None) == "1":
+    USE_MYPYC = True
+
+if USE_MYPYC:
+    from mypyc.build import mypycify
+
+    MYPYC_MODULES = mypycify([
+        "charset_normalizer/__init__.py",
+        "charset_normalizer/api.py",
+        "charset_normalizer/cd.py",
+        "charset_normalizer/constant.py",
+        "charset_normalizer/legacy.py",
+        "charset_normalizer/md.py",
+        "charset_normalizer/models.py",
+        "charset_normalizer/utils.py",
+        "charset_normalizer/assets/__init__.py"
+    ], opt_level="4")
+else:
+    MYPYC_MODULES = None
+
+
 def get_version():
     with open('charset_normalizer/version.py') as version_file:
         return search(r"""__version__\s+=\s+(['"])(?P<version>.+?)\1""",
@@ -83,4 +110,5 @@ def get_version():
         'Bug Reports': 'https://github.com/Ousret/charset_normalizer/issues',
         'Documentation': 'https://charset-normalizer.readthedocs.io/en/latest',
     },
+    ext_modules=MYPYC_MODULES
 )

From 482d2e38551e9ee0606284eb34fe4dae9920d3b1 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 14 Aug 2022 19:58:02 +0200
Subject: [PATCH 03/63] :wrench: remove opt level override

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 415e9051..0f6ae096 100644
--- a/setup.py
+++ b/setup.py
@@ -30,7 +30,7 @@
         "charset_normalizer/models.py",
         "charset_normalizer/utils.py",
         "charset_normalizer/assets/__init__.py"
-    ], opt_level="4")
+    ])
 else:
     MYPYC_MODULES = None
 

From e74851a4910fbc8bdb486ea020f4f845de7ba2ec Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 14 Aug 2022 20:31:06 +0200
Subject: [PATCH 04/63] :fire: remove deprecated

---
 charset_normalizer/__init__.py | 15 ++--------
 charset_normalizer/api.py      | 45 ----------------------------
 charset_normalizer/legacy.py   | 54 +---------------------------------
 charset_normalizer/models.py   | 44 ---------------------------
 charset_normalizer/utils.py    |  8 +----
 5 files changed, 4 insertions(+), 162 deletions(-)

diff --git a/charset_normalizer/__init__.py b/charset_normalizer/__init__.py
index 2dcaf56f..ebb5da89 100644
--- a/charset_normalizer/__init__.py
+++ b/charset_normalizer/__init__.py
@@ -21,14 +21,8 @@
 """
 import logging
 
-from .api import from_bytes, from_fp, from_path, normalize
-from .legacy import (
-    CharsetDetector,
-    CharsetDoctor,
-    CharsetNormalizerMatch,
-    CharsetNormalizerMatches,
-    detect,
-)
+from .api import from_bytes, from_fp, from_path
+from .legacy import detect
 from .models import CharsetMatch, CharsetMatches
 from .utils import set_logging_handler
 from .version import VERSION, __version__
@@ -37,14 +31,9 @@
     "from_fp",
     "from_path",
     "from_bytes",
-    "normalize",
     "detect",
     "CharsetMatch",
     "CharsetMatches",
-    "CharsetNormalizerMatch",
-    "CharsetNormalizerMatches",
-    "CharsetDetector",
-    "CharsetDoctor",
     "__version__",
     "VERSION",
     "set_logging_handler",
diff --git a/charset_normalizer/api.py b/charset_normalizer/api.py
index 3697291c..c4d3c7c3 100644
--- a/charset_normalizer/api.py
+++ b/charset_normalizer/api.py
@@ -1,6 +1,5 @@
 import logging
 from os import PathLike
-from os.path import basename, splitext
 from typing import Any, BinaryIO, List, Optional, Set
 
 from .cd import (
@@ -532,47 +531,3 @@ def from_path(
             preemptive_behaviour,
             explain,
         )
-
-
-def normalize(
-    path: "PathLike[Any]",
-    steps: int = 5,
-    chunk_size: int = 512,
-    threshold: float = 0.20,
-    cp_isolation: Optional[List[str]] = None,
-    cp_exclusion: Optional[List[str]] = None,
-    preemptive_behaviour: bool = True,
-) -> CharsetMatch:
-    """
-    Take a (text-based) file path and try to create another file next to it, this time using UTF-8.
-    """
-    results = from_path(
-        path,
-        steps,
-        chunk_size,
-        threshold,
-        cp_isolation,
-        cp_exclusion,
-        preemptive_behaviour,
-    )
-
-    filename = basename(path)
-    target_extensions = list(splitext(filename))
-
-    if len(results) == 0:
-        raise IOError(
-            'Unable to normalize "{}", no encoding charset seems to fit.'.format(
-                filename
-            )
-        )
-
-    result = results.best()
-
-    target_extensions[0] += "-" + result.encoding  # type: ignore
-
-    with open(
-        "{}".format(str(path).replace(filename, "".join(target_extensions))), "wb"
-    ) as fp:
-        fp.write(result.output())  # type: ignore
-
-    return result  # type: ignore
diff --git a/charset_normalizer/legacy.py b/charset_normalizer/legacy.py
index cdebe2b8..b266d176 100644
--- a/charset_normalizer/legacy.py
+++ b/charset_normalizer/legacy.py
@@ -1,9 +1,7 @@
-import warnings
 from typing import Dict, Optional, Union
 
-from .api import from_bytes, from_fp, from_path, normalize
+from .api import from_bytes
 from .constant import CHARDET_CORRESPONDENCE
-from .models import CharsetMatch, CharsetMatches
 
 
 def detect(byte_str: bytes) -> Dict[str, Optional[Union[str, float]]]:
@@ -43,53 +41,3 @@ def detect(byte_str: bytes) -> Dict[str, Optional[Union[str, float]]]:
         "language": language,
         "confidence": confidence,
     }
-
-
-class CharsetNormalizerMatch(CharsetMatch):
-    pass
-
-
-class CharsetNormalizerMatches(CharsetMatches):
-    @staticmethod
-    def from_fp(*args, **kwargs):  # type: ignore
-        warnings.warn(  # pragma: nocover
-            "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
-            "and scheduled to be removed in 3.0",
-            DeprecationWarning,
-        )
-        return from_fp(*args, **kwargs)  # pragma: nocover
-
-    @staticmethod
-    def from_bytes(*args, **kwargs):  # type: ignore
-        warnings.warn(  # pragma: nocover
-            "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
-            "and scheduled to be removed in 3.0",
-            DeprecationWarning,
-        )
-        return from_bytes(*args, **kwargs)  # pragma: nocover
-
-    @staticmethod
-    def from_path(*args, **kwargs):  # type: ignore
-        warnings.warn(  # pragma: nocover
-            "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
-            "and scheduled to be removed in 3.0",
-            DeprecationWarning,
-        )
-        return from_path(*args, **kwargs)  # pragma: nocover
-
-    @staticmethod
-    def normalize(*args, **kwargs):  # type: ignore
-        warnings.warn(  # pragma: nocover
-            "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
-            "and scheduled to be removed in 3.0",
-            DeprecationWarning,
-        )
-        return normalize(*args, **kwargs)  # pragma: nocover
-
-
-class CharsetDetector(CharsetNormalizerMatches):
-    pass
-
-
-class CharsetDoctor(CharsetNormalizerMatches):
-    pass
diff --git a/charset_normalizer/models.py b/charset_normalizer/models.py
index ccb0d475..b52bae78 100644
--- a/charset_normalizer/models.py
+++ b/charset_normalizer/models.py
@@ -1,12 +1,8 @@
-import warnings
-from collections import Counter
 from encodings.aliases import aliases
 from hashlib import sha256
 from json import dumps
-from re import sub
 from typing import (
     Any,
-    Counter as TypeCounter,
     Dict,
     Iterator,
     List,
@@ -16,7 +12,6 @@
 )
 
 from .constant import NOT_PRINTABLE_PATTERN, TOO_BIG_SEQUENCE
-from .md import mess_ratio
 from .utils import iana_name, is_multi_byte_encoding, unicode_range
 
 
@@ -78,45 +73,6 @@ def __lt__(self, other: object) -> bool:
     def multi_byte_usage(self) -> float:
         return 1.0 - len(str(self)) / len(self.raw)
 
-    @property
-    def chaos_secondary_pass(self) -> float:
-        """
-        Check once again chaos in decoded text, except this time, with full content.
-        Use with caution, this can be very slow.
-        Notice: Will be removed in 3.0
-        """
-        warnings.warn(
-            "chaos_secondary_pass is deprecated and will be removed in 3.0",
-            DeprecationWarning,
-        )
-        return mess_ratio(str(self), 1.0)
-
-    @property
-    def coherence_non_latin(self) -> float:
-        """
-        Coherence ratio on the first non-latin language detected if ANY.
-        Notice: Will be removed in 3.0
-        """
-        warnings.warn(
-            "coherence_non_latin is deprecated and will be removed in 3.0",
-            DeprecationWarning,
-        )
-        return 0.0
-
-    @property
-    def w_counter(self) -> TypeCounter[str]:
-        """
-        Word counter instance on decoded text.
-        Notice: Will be removed in 3.0
-        """
-        warnings.warn(
-            "w_counter is deprecated and will be removed in 3.0", DeprecationWarning
-        )
-
-        string_printable_only = sub(NOT_PRINTABLE_PATTERN, " ", str(self).lower())
-
-        return Counter(string_printable_only.split())
-
     def __str__(self) -> str:
         # Lazy Str Loading
         if self._string is None:
diff --git a/charset_normalizer/utils.py b/charset_normalizer/utils.py
index 859f212b..c2375114 100644
--- a/charset_normalizer/utils.py
+++ b/charset_normalizer/utils.py
@@ -1,10 +1,4 @@
-try:
-    # WARNING: unicodedata2 support is going to be removed in 3.0
-    # Python is quickly catching up.
-    import unicodedata2 as unicodedata
-except ImportError:
-    import unicodedata  # type: ignore[no-redef]
-
+import unicodedata
 import importlib
 import logging
 from codecs import IncrementalDecoder

From cd4be0dd19ac82b79206353a9468ff6abb806e23 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 14 Aug 2022 20:37:58 +0200
Subject: [PATCH 05/63] :fire: remove test_normalize_fp as target fn been
 removed

---
 tests/test_normalize_fp.py | 20 --------------------
 1 file changed, 20 deletions(-)
 delete mode 100644 tests/test_normalize_fp.py

diff --git a/tests/test_normalize_fp.py b/tests/test_normalize_fp.py
deleted file mode 100644
index e2ce364a..00000000
--- a/tests/test_normalize_fp.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import pytest
-from charset_normalizer import normalize
-from os.path import exists
-from os import unlink
-
-
-def test_normalize_fp_creation():
-    guesses = normalize(
-        "./data/sample-arabic-1.txt"
-    )
-
-    predicted_path = "./data/sample-arabic-1-{}.txt".format(guesses.best().encoding)
-    path_exist = exists(
-        "./data/sample-arabic-1-{}.txt".format(guesses.best().encoding)
-    )
-
-    assert path_exist is True
-
-    if path_exist:
-        unlink(predicted_path)

From 0d89020aa6d959ad9f03e542914e846ab8ff26cc Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 14 Aug 2022 20:38:52 +0200
Subject: [PATCH 06/63] :fire: remove extra unicodedata backport support

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 0f6ae096..1f556726 100644
--- a/setup.py
+++ b/setup.py
@@ -53,7 +53,7 @@ def get_version():
 REQUIRED = []
 
 EXTRAS = {
-    'unicode_backport': ['unicodedata2']
+    'unicode_backport': []
 }
 
 here = os.path.abspath(os.path.dirname(__file__))

From b3c0d5a38d982534a1a6fb8f4e2708e8a9b06da3 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 14 Aug 2022 20:39:49 +0200
Subject: [PATCH 07/63] :art: reformat models.py and utils.py

---
 charset_normalizer/models.py | 10 +---------
 charset_normalizer/utils.py  |  2 +-
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/charset_normalizer/models.py b/charset_normalizer/models.py
index b52bae78..7a8ff565 100644
--- a/charset_normalizer/models.py
+++ b/charset_normalizer/models.py
@@ -1,15 +1,7 @@
 from encodings.aliases import aliases
 from hashlib import sha256
 from json import dumps
-from typing import (
-    Any,
-    Dict,
-    Iterator,
-    List,
-    Optional,
-    Tuple,
-    Union,
-)
+from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
 
 from .constant import NOT_PRINTABLE_PATTERN, TOO_BIG_SEQUENCE
 from .utils import iana_name, is_multi_byte_encoding, unicode_range
diff --git a/charset_normalizer/utils.py b/charset_normalizer/utils.py
index c2375114..425d8365 100644
--- a/charset_normalizer/utils.py
+++ b/charset_normalizer/utils.py
@@ -1,6 +1,6 @@
-import unicodedata
 import importlib
 import logging
+import unicodedata
 from codecs import IncrementalDecoder
 from encodings.aliases import aliases
 from functools import lru_cache

From 6f6300a7e97d5abcf6752c21d2e92fcc62490fc3 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 14 Aug 2022 20:42:01 +0200
Subject: [PATCH 08/63] :art: fix flake8 F401 '.constant.NOT_PRINTABLE_PATTERN'
 imported but unused

---
 charset_normalizer/constant.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/charset_normalizer/constant.py b/charset_normalizer/constant.py
index ac840c46..2e181638 100644
--- a/charset_normalizer/constant.py
+++ b/charset_normalizer/constant.py
@@ -489,8 +489,6 @@
 KO_NAMES: Set[str] = {"johab", "cp949", "euc_kr"}
 ZH_NAMES: Set[str] = {"big5", "cp950", "big5hkscs", "hz"}
 
-NOT_PRINTABLE_PATTERN = re_compile(r"[0-9\W\n\r\t]+")
-
 LANGUAGE_SUPPORTED_COUNT: int = len(FREQUENCIES)
 
 # Logging LEVEL bellow DEBUG

From 0262569567eedd7279a5c122a644e0eafff9ee3f Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 14 Aug 2022 20:44:00 +0200
Subject: [PATCH 09/63] :fire: remove NOT_PRINTABLE_PATTERN import in models.py

---
 charset_normalizer/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/charset_normalizer/models.py b/charset_normalizer/models.py
index 7a8ff565..2da82cbd 100644
--- a/charset_normalizer/models.py
+++ b/charset_normalizer/models.py
@@ -3,7 +3,7 @@
 from json import dumps
 from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
 
-from .constant import NOT_PRINTABLE_PATTERN, TOO_BIG_SEQUENCE
+from .constant import TOO_BIG_SEQUENCE
 from .utils import iana_name, is_multi_byte_encoding, unicode_range
 
 

From c0283d904d991682509163338b7aef7b4a673de6 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Mon, 15 Aug 2022 16:33:59 +0200
Subject: [PATCH 10/63] :zap: Only "compile" md.py for whl size sake

We do not need to optimize everything, most of the time is spent in md.py
---
 setup.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/setup.py b/setup.py
index 1f556726..476d65d9 100644
--- a/setup.py
+++ b/setup.py
@@ -21,15 +21,7 @@
     from mypyc.build import mypycify
 
     MYPYC_MODULES = mypycify([
-        "charset_normalizer/__init__.py",
-        "charset_normalizer/api.py",
-        "charset_normalizer/cd.py",
-        "charset_normalizer/constant.py",
-        "charset_normalizer/legacy.py",
-        "charset_normalizer/md.py",
-        "charset_normalizer/models.py",
-        "charset_normalizer/utils.py",
-        "charset_normalizer/assets/__init__.py"
+        "charset_normalizer/md.py"
     ])
 else:
     MYPYC_MODULES = None

From 6328f7c288249297318c8a99520c9431e02b9cc7 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Mon, 15 Aug 2022 17:14:07 +0200
Subject: [PATCH 11/63] :sparkle: Add mypyc gha minimum testing

---
 .github/workflows/mypyc-verify.yml | 37 ++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 .github/workflows/mypyc-verify.yml

diff --git a/.github/workflows/mypyc-verify.yml b/.github/workflows/mypyc-verify.yml
new file mode 100644
index 00000000..499487c8
--- /dev/null
+++ b/.github/workflows/mypyc-verify.yml
@@ -0,0 +1,37 @@
+name: MYPYC Run
+
+on: [push, pull_request]
+
+jobs:
+  detection_coverage:
+    runs-on: ${{ matrix.os }}
+
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [3.9]
+        os: [ubuntu-latest]
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        pip install -U pip setuptools
+        pip install -r dev-requirements.txt
+        pip uninstall -y charset-normalizer
+    - name: Install the package
+      run: |
+        python setup.py --use-mypyc install
+    - name: Clone the complete dataset
+      run: |
+        git clone https://github.com/Ousret/char-dataset.git
+    - name: Coverage WITH preemptive
+      run: |
+        python ./bin/coverage.py --coverage 97 --with-preemptive
+    - name: Coverage WITHOUT preemptive
+      run: |
+        python ./bin/coverage.py --coverage 95

From 31f2673f4955c8b077b1ea569f4d1b1b0fce7876 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Mon, 15 Aug 2022 17:14:46 +0200
Subject: [PATCH 12/63] :sparkle: initial ci update to include building wheels
 (specific)

+universal
---
 .github/workflows/python-publish.yml | 90 +++++++++++++++++++++++++++-
 1 file changed, 87 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index 88986a21..2393715e 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -145,11 +145,88 @@ jobs:
       - name: Integration Tests with Requests
         run: |
           python ./bin/integration.py
+  universal-wheel:
+    runs-on: ubuntu-latest
+    needs:
+      - integration
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.x'
+      - name: Update pip, setuptools, wheel and twine
+        run: |
+          python -m pip install --upgrade pip
+          pip install setuptools wheel twine
+      - name: Build Wheel
+        env:
+          CHARSET_NORMALIZER_USE_MYPYC: '0'
+        run: python setup.py sdist bdist_wheel
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: dist
+          path: dist
+
+  build-wheels:
+    name: Build wheels on ${{ matrix.os }} ${{ matrix.qemu }}
+    runs-on: ${{ matrix.os }}-latest
+    needs: universal-wheel
+    strategy:
+      matrix:
+        os: [ ubuntu, windows, macos ]
+        qemu: [ '' ]
+        include:
+          # Split ubuntu job for the sake of speed-up
+          - os: ubuntu
+            qemu: aarch64
+          - os: ubuntu
+            qemu: ppc64le
+          - os: ubuntu
+            qemu: s390x
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+      - name: Set up QEMU
+        if: ${{ matrix.qemu }}
+        uses: docker/setup-qemu-action@v2
+        with:
+          platforms: all
+        id: qemu
+      - name: Prepare emulation
+        run: |
+          if [[ -n "${{ matrix.qemu }}" ]]; then
+            # Build emulated architectures only if QEMU is set,
+            # use default "auto" otherwise
+            echo "CIBW_ARCHS_LINUX=${{ matrix.qemu }}" >> $GITHUB_ENV
+          fi
+        shell: bash
+      - name: Setup Python
+        uses: actions/setup-python@v4
+      - name: Update pip, wheel, setuptools, build, twine
+        run: |
+          python -m pip install -U pip wheel setuptools build twine
+      - name: Install dev-dependencies
+        run: |
+          pip install -r dev-requirements.txt
+      - name: Build Wheel
+        env:
+          CHARSET_NORMALIZER_USE_MYPYC: '1'
+        run: python setup.py sdist bdist_wheel
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: dist
+          path: dist
+
   deploy:
 
     runs-on: ubuntu-latest
     needs:
-      - integration
+      - build-wheels
 
     steps:
     - uses: actions/checkout@v2
@@ -161,10 +238,17 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install setuptools wheel twine
-    - name: Build and publish
+    - name: Download disctributions
+      uses: actions/download-artifact@v3
+      with:
+        name: dist
+        path: dist
+    - name: Collected dists
+      run: |
+        tree dist
+    - name: Publish
       env:
         TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
       run: |
-        python setup.py sdist bdist_wheel
         twine upload dist/*

From e8d7405a45e6d73aed4e269f34cd3466e742bb91 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Mon, 15 Aug 2022 17:26:54 +0200
Subject: [PATCH 13/63] :pencil: Add CHANGELOG entry for first beta

---
 CHANGELOG.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 68f38819..692b608e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,17 @@
 All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
+## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
+
+### Changed
+- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
+
+### Removed
+- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
+- Breaking: Top-level function `normalize`
+- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
+- Support for the backport `unicodedata2`
+
 ## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
 
 ### Added

From 316b5beeeb21c5d1cf5dff27c577c36634a43da5 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Mon, 15 Aug 2022 17:39:24 +0200
Subject: [PATCH 14/63] :pencil: Update README

---
 README.md | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index d58ede1b..f7084441 100644
--- a/README.md
+++ b/README.md
@@ -53,12 +53,12 @@ This package offer better performance than its counterpart Chardet. Here are som
 | Package       | Accuracy       | Mean per file (ms) | File per sec (est) |
 | ------------- | :-------------: | :------------------: | :------------------: |
 |      [chardet](https://github.com/chardet/chardet)        |     86 %     |     200 ms      |       5 file/sec        |
-| charset-normalizer |    **98 %**     |     **39 ms**      |       26 file/sec    |
+| charset-normalizer |    **98 %**     |     **10 ms**      |       100 file/sec    |
 
 | Package       | 99th percentile       | 95th percentile | 50th percentile |
 | ------------- | :-------------: | :------------------: | :------------------: |
 |      [chardet](https://github.com/chardet/chardet)        |     1200 ms     |     287 ms      |       23 ms        |
-| charset-normalizer |    400 ms     |     200 ms      |       15 ms    |
+| charset-normalizer |    100 ms     |     50 ms      |       5 ms    |
 
 Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
 
@@ -68,9 +68,6 @@ Chardet's performance on larger file (1MB+) are very poor. Expect huge differenc
 > Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
 > (eg. Supported Encoding) Challenge-them if you want.
 
-[cchardet](https://github.com/PyYoshi/cChardet) is a non-native (cpp binding) and unmaintained faster alternative with 
-a better accuracy than chardet but lower than this package. If speed is the most important factor, you should try it.
-
 ## ✨ Installation
 
 Using PyPi for latest stable
@@ -78,11 +75,6 @@ Using PyPi for latest stable
 pip install charset-normalizer -U
 ```
 
-If you want a more up-to-date `unicodedata` than the one available in your Python setup.
-```sh
-pip install charset-normalizer[unicode_backport] -U
-```
-
 ## 🚀 Basic Usage
 
 ### CLI

From 82fb1b2441e5fc0e2371a950528b759c8558d90c Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Mon, 15 Aug 2022 17:54:56 +0200
Subject: [PATCH 15/63] :pencil: Add a bit of docs about this

---
 docs/community/speedup.rst     | 45 ++++++++++++++++++++++++++++++++++
 docs/community/why_migrate.rst |  4 +--
 docs/index.rst                 |  1 +
 3 files changed, 48 insertions(+), 2 deletions(-)
 create mode 100644 docs/community/speedup.rst

diff --git a/docs/community/speedup.rst b/docs/community/speedup.rst
new file mode 100644
index 00000000..cf810ab7
--- /dev/null
+++ b/docs/community/speedup.rst
@@ -0,0 +1,45 @@
+Optional speedup extension
+===========================
+
+Why?
+-------
+
+charset-normalizer will always remain pure Python, meaning that a environment without any build-capabilities will
+run this program without any additional requirements.
+
+Nonetheless, starting from the version 3.0 we introduce and publish some platform specific wheels including a
+pre-build extension.
+
+Most of the time is spent in the module `md.py` so we decided to "compile it" using Mypyc.
+
+(1) It does not require to have a separate code base
+(2) Our project code base is rather simple and lightweight
+(3) Mypyc is robust enough today
+(4) Four times faster!
+
+How?
+-------
+
+If your platform and/or architecture is not served by this swift optimization you may compile it easily yourself.
+Following those instructions (provided you have the necessary toolchain installed):
+
+  ::
+
+    git clone https://github.com/Ousret/charset_normalizer.git
+    cd charset_normalizer
+    git checkout 3.0
+    pip install -r dev-requirements.txt
+    python setup.py --use-mypyc install
+
+
+How not to?
+-------
+
+You may install charset-normalizer without any specific (pre-built wheel) by directly using the universal wheel
+(most likely hosted on PyPi or any valid mirror you use)
+
+  ::
+
+    pip install https://........./charset_normalizer-3.0.0b2-py3-none-any.whl
+
+Directly.
diff --git a/docs/community/why_migrate.rst b/docs/community/why_migrate.rst
index 717fc3b5..1909c770 100644
--- a/docs/community/why_migrate.rst
+++ b/docs/community/why_migrate.rst
@@ -4,13 +4,13 @@ Why should I migrate to Charset-Normalizer?
 There is so many reason to migrate your current project. Here are some of them:
 
 - Remove ANY license ambiguity/restriction for projects bundling Chardet (even indirectly).
-- X5 faster than Chardet in average and X3 faster in 99% of the cases AND support 3 times more encoding.
+- X10 faster than Chardet in average and X6 faster in 99% of the cases AND support 3 times more encoding.
 - Never return a encoding if not suited for the given decoder. Eg. Never get UnicodeDecodeError!
 - Actively maintained, open to contributors.
 - Have the backward compatible function ``detect`` that come from Chardet.
 - Truly detect the language used in the text.
 - It is, for the first time, really universal! As there is no specific probe per charset.
-- The package size is X4 lower than Chardet's (5.0)!
+- The package size is X2~X4 lower than Chardet's (5.0)! (Depends on your arch)
 - Propose much more options/public kwargs to tweak the detection as you sees fit!
 - Using static typing to ease your development.
 - Detect Unicode content better than Chardet or cChardet does.
diff --git a/docs/index.rst b/docs/index.rst
index 2398a7f0..ca065097 100755
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -72,6 +72,7 @@ Community Guide
 .. toctree::
     :maxdepth: 2
 
+    community/speedup
     community/faq
     community/why_migrate
 

From 09402e6a58733db00dadfdf3a4c3cf47a2540da0 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Mon, 15 Aug 2022 18:05:04 +0200
Subject: [PATCH 16/63] :wrench: Add py matrix build specific wheels

---
 .github/workflows/python-publish.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index 2393715e..af243734 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -177,6 +177,7 @@ jobs:
       matrix:
         os: [ ubuntu, windows, macos ]
         qemu: [ '' ]
+        python-version: [ 3.6, 3.7, 3.8, 3.9, "3.10" ]
         include:
           # Split ubuntu job for the sake of speed-up
           - os: ubuntu
@@ -206,6 +207,8 @@ jobs:
         shell: bash
       - name: Setup Python
         uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
       - name: Update pip, wheel, setuptools, build, twine
         run: |
           python -m pip install -U pip wheel setuptools build twine

From c19faca176e351da420270864134e5fdbcceb683 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Mon, 15 Aug 2022 22:49:32 +0200
Subject: [PATCH 17/63] Use cibuildwheel action

---
 .github/workflows/python-publish.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index af243734..54a312f2 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -1,6 +1,7 @@
 name: Release-Deployment CI
 
 on:
+  workflow_dispatch: 
   release:
     types: [created]
 
@@ -177,7 +178,6 @@ jobs:
       matrix:
         os: [ ubuntu, windows, macos ]
         qemu: [ '' ]
-        python-version: [ 3.6, 3.7, 3.8, 3.9, "3.10" ]
         include:
           # Split ubuntu job for the sake of speed-up
           - os: ubuntu
@@ -207,18 +207,18 @@ jobs:
         shell: bash
       - name: Setup Python
         uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python-version }}
       - name: Update pip, wheel, setuptools, build, twine
         run: |
           python -m pip install -U pip wheel setuptools build twine
       - name: Install dev-dependencies
         run: |
           pip install -r dev-requirements.txt
-      - name: Build Wheel
+      - name: Build wheels
+        uses: pypa/cibuildwheel@2.7.0
         env:
+          CIBW_ARCHS_MACOS: x86_64 arm64 universal2
+          CIBW_ENVIRONMENT: CHARSET_NORMALIZER_USE_MYPYC='1'
           CHARSET_NORMALIZER_USE_MYPYC: '1'
-        run: python setup.py sdist bdist_wheel
       - name: Upload artifacts
         uses: actions/upload-artifact@v3
         with:

From 05b7e7eec46468f938dac7a1f4da2055c2d2d3cc Mon Sep 17 00:00:00 2001
From: TAHRI Ahmed R <Ousret@users.noreply.github.com>
Date: Mon, 15 Aug 2022 23:23:38 +0200
Subject: [PATCH 18/63] Update python-publish.yml

---
 .github/workflows/python-publish.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index 54a312f2..124b97cb 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -1,7 +1,6 @@
 name: Release-Deployment CI
 
 on:
-  workflow_dispatch: 
   release:
     types: [created]
 

From 68f5aff1ddadab41cedf24f35b16714b420e4e95 Mon Sep 17 00:00:00 2001
From: TAHRI Ahmed R <Ousret@users.noreply.github.com>
Date: Tue, 16 Aug 2022 17:00:22 +0200
Subject: [PATCH 19/63] Update python-publish.yml

---
 .github/workflows/python-publish.yml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index 124b97cb..bd1dee35 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -1,6 +1,7 @@
 name: Release-Deployment CI
 
 on:
+  workflow_dispatch:
   release:
     types: [created]
 
@@ -209,15 +210,12 @@ jobs:
       - name: Update pip, wheel, setuptools, build, twine
         run: |
           python -m pip install -U pip wheel setuptools build twine
-      - name: Install dev-dependencies
-        run: |
-          pip install -r dev-requirements.txt
       - name: Build wheels
         uses: pypa/cibuildwheel@2.7.0
         env:
           CIBW_ARCHS_MACOS: x86_64 arm64 universal2
           CIBW_ENVIRONMENT: CHARSET_NORMALIZER_USE_MYPYC='1'
-          CHARSET_NORMALIZER_USE_MYPYC: '1'
+          CIBW_DEPENDENCY_VERSIONS: ./dev-requirements.txt
       - name: Upload artifacts
         uses: actions/upload-artifact@v3
         with:

From 35f79f607a796a5dcc08373f0835a9588da117fe Mon Sep 17 00:00:00 2001
From: TAHRI Ahmed R <Ousret@users.noreply.github.com>
Date: Tue, 16 Aug 2022 17:12:45 +0200
Subject: [PATCH 20/63] Update python-publish.yml

---
 .github/workflows/python-publish.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index bd1dee35..545a9ac0 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -215,7 +215,7 @@ jobs:
         env:
           CIBW_ARCHS_MACOS: x86_64 arm64 universal2
           CIBW_ENVIRONMENT: CHARSET_NORMALIZER_USE_MYPYC='1'
-          CIBW_DEPENDENCY_VERSIONS: ./dev-requirements.txt
+          CIBW_BEFORE_BUILD: pip install -r dev-requirements.txt
       - name: Upload artifacts
         uses: actions/upload-artifact@v3
         with:

From 57a84852fd31c98e667a501ec9165356269d08aa Mon Sep 17 00:00:00 2001
From: TAHRI Ahmed R <Ousret@users.noreply.github.com>
Date: Tue, 16 Aug 2022 17:27:04 +0200
Subject: [PATCH 21/63] Update python-publish.yml

---
 .github/workflows/python-publish.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index 545a9ac0..452abb97 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -216,6 +216,7 @@ jobs:
           CIBW_ARCHS_MACOS: x86_64 arm64 universal2
           CIBW_ENVIRONMENT: CHARSET_NORMALIZER_USE_MYPYC='1'
           CIBW_BEFORE_BUILD: pip install -r dev-requirements.txt
+          CIBW_SKIP: pp*
       - name: Upload artifacts
         uses: actions/upload-artifact@v3
         with:

From 2f5130a4c27e5924d910c6c4fa365b93f2431a08 Mon Sep 17 00:00:00 2001
From: TAHRI Ahmed R <Ousret@users.noreply.github.com>
Date: Tue, 16 Aug 2022 17:57:56 +0200
Subject: [PATCH 22/63] Update python-publish.yml

---
 .github/workflows/python-publish.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index 452abb97..fd1d2a9d 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -221,7 +221,7 @@ jobs:
         uses: actions/upload-artifact@v3
         with:
           name: dist
-          path: dist
+          path: ./wheelhouse/*.whl
 
   deploy:
 

From 0a0e20b7479af103bb9de7f37744c1d2623e765a Mon Sep 17 00:00:00 2001
From: TAHRI Ahmed R <Ousret@users.noreply.github.com>
Date: Tue, 16 Aug 2022 17:59:59 +0200
Subject: [PATCH 23/63] Update python-publish.yml

---
 .github/workflows/python-publish.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index fd1d2a9d..d0f405d4 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -1,7 +1,6 @@
 name: Release-Deployment CI
 
 on:
-  workflow_dispatch:
   release:
     types: [created]
 

From 443ab7d5bab1040a1c758b3364aa951bf929efda Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Fri, 19 Aug 2022 23:06:17 +0200
Subject: [PATCH 24/63] :fire: remove unicodedata2 import ver in cli

---
 charset_normalizer/cli/normalizer.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/charset_normalizer/cli/normalizer.py b/charset_normalizer/cli/normalizer.py
index b8b652a5..4902d36e 100644
--- a/charset_normalizer/cli/normalizer.py
+++ b/charset_normalizer/cli/normalizer.py
@@ -5,10 +5,7 @@
 from platform import python_version
 from typing import List, Optional
 
-try:
-    from unicodedata2 import unidata_version
-except ImportError:
-    from unicodedata import unidata_version
+from unicodedata import unidata_version
 
 from charset_normalizer import from_fp
 from charset_normalizer.models import CliDetectionResult

From b580e970440825c18758cd84e97f86e8ff2fc951 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Fri, 19 Aug 2022 23:11:24 +0200
Subject: [PATCH 25/63] :sparkle: normalizer --version specify if extra speedup
 is present

---
 charset_normalizer/cli/normalizer.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/charset_normalizer/cli/normalizer.py b/charset_normalizer/cli/normalizer.py
index 4902d36e..0a0a0648 100644
--- a/charset_normalizer/cli/normalizer.py
+++ b/charset_normalizer/cli/normalizer.py
@@ -9,6 +9,7 @@
 
 from charset_normalizer import from_fp
 from charset_normalizer.models import CliDetectionResult
+import charset_normalizer.md as md_module
 from charset_normalizer.version import __version__
 
 
@@ -121,8 +122,8 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
     parser.add_argument(
         "--version",
         action="version",
-        version="Charset-Normalizer {} - Python {} - Unicode {}".format(
-            __version__, python_version(), unidata_version
+        version="Charset-Normalizer {} - Python {} - Unicode {} - SpeedUp {}".format(
+            __version__, python_version(), unidata_version, "OFF" if md_module.__file__.lower().endswith(".py") else "ON"
         ),
         help="Show version information and exit.",
     )

From eb4577c0d2c82d925a8e00d2b245269717ca6d66 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Fri, 19 Aug 2022 23:13:05 +0200
Subject: [PATCH 26/63] :bookmark: bump to beta2

---
 charset_normalizer/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/charset_normalizer/version.py b/charset_normalizer/version.py
index 1bee3dc5..c05c9cd1 100644
--- a/charset_normalizer/version.py
+++ b/charset_normalizer/version.py
@@ -2,5 +2,5 @@
 Expose version
 """
 
-__version__ = "3.0.0b1"
+__version__ = "3.0.0b2"
 VERSION = __version__.split(".")

From 03a25998d2ca6a5d41a1b8771fe1f54845d56ad3 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Fri, 19 Aug 2022 23:16:28 +0200
Subject: [PATCH 27/63] :pencil: Add changelog entry

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 692b608e..b797f747 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
 ## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
 
+### Added
+- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
+
 ### Changed
 - Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
 

From 97b87f0905c6a10d979299c74986c6d753d01e69 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Fri, 19 Aug 2022 23:21:52 +0200
Subject: [PATCH 28/63] :pencil: update changelog

---
 CHANGELOG.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b797f747..29915d29 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,11 +2,13 @@
 All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
-## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
+## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-19)
 
 ### Added
 - `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
 
+## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
+
 ### Changed
 - Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
 

From 1755db9cee1495cd29a1c06ac98deec524eff69e Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Fri, 19 Aug 2022 23:23:03 +0200
Subject: [PATCH 29/63] :pencil: update speedup doc

---
 docs/community/speedup.rst | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/docs/community/speedup.rst b/docs/community/speedup.rst
index cf810ab7..e88b40b6 100644
--- a/docs/community/speedup.rst
+++ b/docs/community/speedup.rst
@@ -25,11 +25,8 @@ Following those instructions (provided you have the necessary toolchain installe
 
   ::
 
-    git clone https://github.com/Ousret/charset_normalizer.git
-    cd charset_normalizer
-    git checkout 3.0
-    pip install -r dev-requirements.txt
-    python setup.py --use-mypyc install
+    export CHARSET_NORMALIZER_USE_MYPYC=1
+    pip install charset-normalizer --no-binary :all:
 
 
 How not to?
@@ -40,6 +37,6 @@ You may install charset-normalizer without any specific (pre-built wheel) by dir
 
   ::
 
-    pip install https://........./charset_normalizer-3.0.0b2-py3-none-any.whl
+    pip install charset-normalizer --no-binary :all:
 
 Directly.

From 1faeed046660898cd7a727711c9706245aeccbf8 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Fri, 19 Aug 2022 23:33:30 +0200
Subject: [PATCH 30/63] :heavy_check_mark: Verify that --version work as
 intended

for CLI
---
 tests/test_cli.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/test_cli.py b/tests/test_cli.py
index 16601750..440ce9df 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -28,6 +28,12 @@ def test_single_file(self):
             )
         )
 
+    def test_version_output_success(self):
+        with self.assertRaises(SystemExit):
+            cli_detect(
+                ['--version']
+            )
+
     def test_single_file_normalize(self):
         self.assertEqual(
             0,

From 8e5af122d10135e42f1653dfe054ca144dc69725 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Fri, 19 Aug 2022 23:33:44 +0200
Subject: [PATCH 31/63] :art: reformat normalizer.py

---
 charset_normalizer/cli/normalizer.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/charset_normalizer/cli/normalizer.py b/charset_normalizer/cli/normalizer.py
index 0a0a0648..70293895 100644
--- a/charset_normalizer/cli/normalizer.py
+++ b/charset_normalizer/cli/normalizer.py
@@ -4,12 +4,11 @@
 from os.path import abspath
 from platform import python_version
 from typing import List, Optional
-
 from unicodedata import unidata_version
 
+import charset_normalizer.md as md_module
 from charset_normalizer import from_fp
 from charset_normalizer.models import CliDetectionResult
-import charset_normalizer.md as md_module
 from charset_normalizer.version import __version__
 
 
@@ -123,7 +122,10 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
         "--version",
         action="version",
         version="Charset-Normalizer {} - Python {} - Unicode {} - SpeedUp {}".format(
-            __version__, python_version(), unidata_version, "OFF" if md_module.__file__.lower().endswith(".py") else "ON"
+            __version__,
+            python_version(),
+            unidata_version,
+            "OFF" if md_module.__file__.lower().endswith(".py") else "ON",
         ),
         help="Show version information and exit.",
     )

From 368d0600921270ab657e6b1234f7fe13fb594b97 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Fri, 19 Aug 2022 23:59:39 +0200
Subject: [PATCH 32/63] :fire: remove method first() and best() from class
 CharsetMatch

---
 CHANGELOG.md                 |  3 +++
 charset_normalizer/models.py | 12 ------------
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 29915d29..afc5fdb7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 ### Added
 - `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
 
+### Removed
+- Breaking: Method `first()` and `best()` from CharsetMatch
+
 ## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
 
 ### Changed
diff --git a/charset_normalizer/models.py b/charset_normalizer/models.py
index 2da82cbd..07cbc12a 100644
--- a/charset_normalizer/models.py
+++ b/charset_normalizer/models.py
@@ -200,18 +200,6 @@ def could_be_from_charset(self) -> List[str]:
         """
         return [self._encoding] + [m.encoding for m in self._leaves]
 
-    def first(self) -> "CharsetMatch":
-        """
-        Kept for BC reasons. Will be removed in 3.0.
-        """
-        return self
-
-    def best(self) -> "CharsetMatch":
-        """
-        Kept for BC reasons. Will be removed in 3.0.
-        """
-        return self
-
     def output(self, encoding: str = "utf_8") -> bytes:
         """
         Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.

From 8e5171a61726d2a7d571f3c896d17f5491aa5688 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 21 Aug 2022 19:41:46 +0200
Subject: [PATCH 33/63] :fire: :art: remove unused import "warnings"

---
 charset_normalizer/api.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/charset_normalizer/api.py b/charset_normalizer/api.py
index 375d3b0c..c4d3c7c3 100644
--- a/charset_normalizer/api.py
+++ b/charset_normalizer/api.py
@@ -1,5 +1,4 @@
 import logging
-import warnings
 from os import PathLike
 from typing import Any, BinaryIO, List, Optional, Set
 

From 1957898fc2de2f026baff7da04611958e1fbc02a Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 21 Aug 2022 20:00:07 +0200
Subject: [PATCH 34/63] :art: Fix warnings in Sphinx docs generation process

Close #196
---
 docs/api.rst               | 11 +++++++----
 docs/community/speedup.rst |  2 +-
 docs/conf.py               |  4 ++--
 docs/index.rst             |  1 -
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/docs/api.rst b/docs/api.rst
index 47a985e5..48b74951 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -14,11 +14,9 @@ Those functions are publicly exposed and are protected through our BC guarantee.
 .. autofunction:: from_fp
 .. autofunction:: from_path
 
-.. autofunction:: normalize
-
-.. autoclass:: charset_normalizer.CharsetMatches
+.. autoclass:: charset_normalizer.models.CharsetMatches
     :inherited-members:
-.. autoclass:: charset_normalizer.CharsetMatch
+.. autoclass:: charset_normalizer.models.CharsetMatch
     :inherited-members:
 
 .. autofunction:: detect
@@ -99,3 +97,8 @@ Some reusable functions used across the project. We do not guarantee the BC in t
 .. autofunction:: charset_normalizer.utils.range_scan
 
 .. autofunction:: charset_normalizer.utils.is_cp_similar
+
+
+.. class:: os.PathLike
+
+   Used as a generic way to accept AnyStr for paths.
diff --git a/docs/community/speedup.rst b/docs/community/speedup.rst
index e88b40b6..cefc57e4 100644
--- a/docs/community/speedup.rst
+++ b/docs/community/speedup.rst
@@ -30,7 +30,7 @@ Following those instructions (provided you have the necessary toolchain installe
 
 
 How not to?
--------
+-----------
 
 You may install charset-normalizer without any specific (pre-built wheel) by directly using the universal wheel
 (most likely hosted on PyPi or any valid mirror you use)
diff --git a/docs/conf.py b/docs/conf.py
index 5cfe028b..3e675d42 100755
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -81,7 +81,7 @@
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
-language = None
+language = "en"
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
@@ -113,7 +113,7 @@
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = []
 
 
 # -- Options for HTMLHelp output ------------------------------------------
diff --git a/docs/index.rst b/docs/index.rst
index ca065097..05d5f98a 100755
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -13,7 +13,6 @@ It aims to be as generic as possible.
 
 .. image:: https://repository-images.githubusercontent.com/200259335/d3da9600-dedc-11e9-83e8-081f597505df
    :width: 500px
-   :scale: 100 %
    :alt: CLI Charset Normalizer
    :align: right
 

From 1eeb42354544cc332a95d1bb2a496fad3ad872d9 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 21 Aug 2022 20:07:15 +0200
Subject: [PATCH 35/63] :pencil: update changelog

---
 CHANGELOG.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4607bae1..033b0640 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,7 +2,7 @@
 All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
-## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-19)
+## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
 
 ### Added
 - `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
@@ -10,6 +10,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 ### Removed
 - Breaking: Method `first()` and `best()` from CharsetMatch
 
+### Fixed
+- Sphinx warnings when generating the documentation
+
 ## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
 
 ### Changed

From f119e43c1290be169cda5a866711e3984ebcfbad Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 21 Aug 2022 20:49:43 +0200
Subject: [PATCH 36/63] :pencil: update docs support section

---
 README.md             | 2 +-
 docs/user/support.rst | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index f7084441..b7819f97 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ This project offers you an alternative to **Universal Charset Encoding Detector*
 | `Detect spoken language` | ❌ | :heavy_check_mark: | N/A |
 | `UnicodeDecodeError Safety` | ❌ | :heavy_check_mark: | ❌ |
 | `Whl Size` | 193.6 kB | 39.5 kB | ~200 kB |
-| `Supported Encoding` | 33 | :tada: [93](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings)  | 40
+| `Supported Encoding` | 33 | :tada: [90](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings)  | 40
 
 <p align="center">
 <img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
diff --git a/docs/user/support.rst b/docs/user/support.rst
index 8b624933..0dbf06b9 100644
--- a/docs/user/support.rst
+++ b/docs/user/support.rst
@@ -92,13 +92,10 @@ mac_iceland      maciceland
 mac_latin2       maccentraleurope, maclatin2
 mac_roman        macintosh, macroman
 mac_turkish      macturkish
-mbcs             ansi, dbcs
 ptcp154          csptcp154, pt154, cp154, cyrillic_asian
-rot_13           rot13
 shift_jis        csshiftjis, shiftjis, sjis, s_jis, x_mac_japanese
 shift_jis_2004   shiftjis2004, sjis_2004, s_jis_2004
 shift_jisx0213   shiftjisx0213, sjisx0213, s_jisx0213
-tactis           tis260
 tis_620          tis620, tis_620_0, tis_620_2529_0, tis_620_2529_1, iso_ir_166
 utf_16           u16, utf16
 utf_16_be        unicodebigunmarked, utf_16be
@@ -107,9 +104,11 @@ utf_32           u32, utf32
 utf_32_be        utf_32be
 utf_32_le        utf_32le
 utf_8            u8, utf, utf8, utf8_ucs2, utf8_ucs4 (+utf_8_sig)
-utf_7            u7, unicode-1-1-utf-7
+utf_7*            u7, unicode-1-1-utf-7
 ===============  ===============================================================================================================================
 
+*: Only if a SIG/mark is found.
+
 -------------------
 Supported Languages
 -------------------

From 216d1c673615451b6fbfc82865da66ccfd8d66a5 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 21 Aug 2022 20:51:20 +0200
Subject: [PATCH 37/63] make sure utf-7 is not "detected" without a mark/sig

conflict with ascii detector. cannot enable it without compromises.
---
 charset_normalizer/api.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/charset_normalizer/api.py b/charset_normalizer/api.py
index 0be42c01..c2e54356 100644
--- a/charset_normalizer/api.py
+++ b/charset_normalizer/api.py
@@ -199,6 +199,13 @@ def from_bytes(
                 encoding_iana,
             )
             continue
+        if encoding_iana in {"utf_7"} and not bom_or_sig_available:
+            logger.log(
+                TRACE,
+                "Encoding %s won't be tested as-is because detection is unreliable without BOM/SIG.",
+                encoding_iana,
+            )
+            continue
 
         try:
             is_multi_byte_decoder: bool = is_multi_byte_encoding(encoding_iana)

From 03aa701dae61f4c71c0910147061cb3730e0a16f Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 21 Aug 2022 20:53:43 +0200
Subject: [PATCH 38/63] :pencil: update changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 033b0640..9dee06a8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
 ### Removed
 - Breaking: Method `first()` and `best()` from CharsetMatch
+- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
 
 ### Fixed
 - Sphinx warnings when generating the documentation

From c12a07a513c4aa5328cb5c340954647193f87e28 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sat, 1 Oct 2022 21:00:09 +0200
Subject: [PATCH 39/63] :wrench: switch to static metadata (setup.cfg) and use
 'build'

---
 .github/workflows/chardet-bc.yml        |  3 +-
 .github/workflows/detector-coverage.yml |  3 +-
 .github/workflows/integration.yml       |  3 +-
 .github/workflows/lint.yml              |  3 +-
 .github/workflows/mypyc-verify.yml      |  7 +-
 .github/workflows/performance.yml       |  3 +-
 .github/workflows/python-publish.yml    | 20 ++++--
 .github/workflows/run-tests.yml         |  3 +-
 README.md                               | 14 ++--
 dev-requirements.txt                    | 22 ++++---
 setup.cfg                               | 58 ++++++++++++++++-
 setup.py                                | 86 +++----------------------
 12 files changed, 115 insertions(+), 110 deletions(-)

diff --git a/.github/workflows/chardet-bc.yml b/.github/workflows/chardet-bc.yml
index 0bbeaec8..dfbc64cc 100644
--- a/.github/workflows/chardet-bc.yml
+++ b/.github/workflows/chardet-bc.yml
@@ -25,7 +25,8 @@ jobs:
         pip uninstall -y charset-normalizer
     - name: Install the package
       run: |
-        python setup.py install
+        python -m build
+        pip install ./dist/*.whl
     - name: Clone the complete dataset
       run: |
         git clone https://github.com/Ousret/char-dataset.git
diff --git a/.github/workflows/detector-coverage.yml b/.github/workflows/detector-coverage.yml
index 19eed9ae..1527f22b 100644
--- a/.github/workflows/detector-coverage.yml
+++ b/.github/workflows/detector-coverage.yml
@@ -25,7 +25,8 @@ jobs:
         pip uninstall -y charset-normalizer
     - name: Install the package
       run: |
-        python setup.py install
+        python -m build
+        pip install ./dist/*.whl
     - name: Clone the complete dataset
       run: |
         git clone https://github.com/Ousret/char-dataset.git
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index f74a56d2..00aa98eb 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -28,7 +28,8 @@ jobs:
         pip uninstall -y charset-normalizer
     - name: Install the package
       run: |
-        python setup.py install
+        python -m build
+        pip install ./dist/*.whl
     - name: Clone the complete dataset
       run: |
         git clone https://github.com/Ousret/char-dataset.git
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 877b890e..4f1f12f4 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -25,7 +25,8 @@ jobs:
         pip uninstall -y charset-normalizer
     - name: Install the package
       run: |
-        python setup.py install
+        python -m build
+        pip install ./dist/*.whl
     - name: Type checking (Mypy)
       run: |
         mypy --strict charset_normalizer
diff --git a/.github/workflows/mypyc-verify.yml b/.github/workflows/mypyc-verify.yml
index 499487c8..e9b2a9c7 100644
--- a/.github/workflows/mypyc-verify.yml
+++ b/.github/workflows/mypyc-verify.yml
@@ -9,7 +9,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.9]
+        python-version: [3.6, 3.7, 3.8, 3.9, "3.10"]
         os: [ubuntu-latest]
 
     steps:
@@ -24,8 +24,11 @@ jobs:
         pip install -r dev-requirements.txt
         pip uninstall -y charset-normalizer
     - name: Install the package
+      env:
+        CHARSET_NORMALIZER_USE_MYPYC: '1'
       run: |
-        python setup.py --use-mypyc install
+        python -m build --no-isolation
+        pip install ./dist/*.whl
     - name: Clone the complete dataset
       run: |
         git clone https://github.com/Ousret/char-dataset.git
diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index fddd9d30..e675061a 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -25,7 +25,8 @@ jobs:
         pip uninstall -y charset-normalizer
     - name: Install the package
       run: |
-        python setup.py install
+        python -m build
+        pip install ./dist/*.whl
     - name: Clone the complete dataset
       run: |
         git clone https://github.com/Ousret/char-dataset.git
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index fd1d2a9d..4a4f4e48 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -29,7 +29,8 @@ jobs:
           pip uninstall -y charset-normalizer
       - name: Install the package
         run: |
-          python setup.py install
+          python -m build
+          pip install ./dist/*.whl
       - name: Type checking (Mypy)
         run: |
           mypy charset_normalizer
@@ -51,7 +52,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [ 3.6, 3.7, 3.8, 3.9, "3.10" ]
+        python-version: [ 3.6, 3.7, 3.8, 3.9, "3.10", "3.11-dev" ]
         os: [ ubuntu-latest ]
 
     steps:
@@ -67,7 +68,8 @@ jobs:
           pip uninstall -y charset-normalizer
       - name: Install the package
         run: |
-          python setup.py install
+          python -m build
+          pip install ./dist/*.whl
       - name: Run tests
         run: |
           pytest
@@ -96,7 +98,8 @@ jobs:
           pip uninstall -y charset-normalizer
       - name: Install the package
         run: |
-          python setup.py install
+          python -m build
+          pip install ./dist/*.whl
       - name: Clone the complete dataset
         run: |
           git clone https://github.com/Ousret/char-dataset.git
@@ -136,7 +139,8 @@ jobs:
           pip uninstall -y charset-normalizer
       - name: Install the package
         run: |
-          python setup.py install
+          python -m build
+          pip install ./dist/*.whl
       - name: Clone the complete dataset
         run: |
           git clone https://github.com/Ousret/char-dataset.git
@@ -163,7 +167,7 @@ jobs:
       - name: Build Wheel
         env:
           CHARSET_NORMALIZER_USE_MYPYC: '0'
-        run: python setup.py sdist bdist_wheel
+        run: python -m build
       - name: Upload artifacts
         uses: actions/upload-artifact@v3
         with:
@@ -211,10 +215,12 @@ jobs:
         run: |
           python -m pip install -U pip wheel setuptools build twine
       - name: Build wheels
-        uses: pypa/cibuildwheel@2.7.0
+        uses: pypa/cibuildwheel@2.10.2
         env:
+          CIBW_BUILD_FRONTEND: "build"
           CIBW_ARCHS_MACOS: x86_64 arm64 universal2
           CIBW_ENVIRONMENT: CHARSET_NORMALIZER_USE_MYPYC='1'
+          CIBW_CONFIG_SETTINGS: "--no-isolation"
           CIBW_BEFORE_BUILD: pip install -r dev-requirements.txt
           CIBW_SKIP: pp*
       - name: Upload artifacts
diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
index 2e999729..27dc5d5f 100644
--- a/.github/workflows/run-tests.yml
+++ b/.github/workflows/run-tests.yml
@@ -25,7 +25,8 @@ jobs:
         pip uninstall -y charset-normalizer
     - name: Install the package
       run: |
-        python setup.py install
+        python -m build --no-isolation
+        pip install ./dist/*.whl
     - name: Run tests
       run: |
         pytest
diff --git a/README.md b/README.md
index b7819f97..27736830 100644
--- a/README.md
+++ b/README.md
@@ -25,14 +25,14 @@ This project offers you an alternative to **Universal Charset Encoding Detector*
 
 | Feature       | [Chardet](https://github.com/chardet/chardet)       | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
 | ------------- | :-------------: | :------------------: | :------------------: |
-| `Fast`         | ❌<br>          | :heavy_check_mark:<br>             | :heavy_check_mark: <br> |
-| `Universal**`     | ❌            | :heavy_check_mark:                 | ❌ |
-| `Reliable` **without** distinguishable standards | ❌ | :heavy_check_mark: | :heavy_check_mark: |
-| `Reliable` **with** distinguishable standards | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
+| `Fast`         | ❌<br>          | ✅<br>             | ✅ <br> |
+| `Universal**`     | ❌            | ✅                 | ❌ |
+| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
+| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
 | `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
-| `Native Python` | :heavy_check_mark: | :heavy_check_mark: | ❌ |
-| `Detect spoken language` | ❌ | :heavy_check_mark: | N/A |
-| `UnicodeDecodeError Safety` | ❌ | :heavy_check_mark: | ❌ |
+| `Native Python` | ✅ | ✅ | ❌ |
+| `Detect spoken language` | ❌ | ✅ | N/A |
+| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
 | `Whl Size` | 193.6 kB | 39.5 kB | ~200 kB |
 | `Supported Encoding` | 33 | :tada: [90](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings)  | 40
 
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 8e77fe94..df125d66 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,10 +1,12 @@
-pytest
-pytest-cov
-codecov
-chardet>=5.0,<5.1
-Flask>=2.0,<3.0
-requests>=2.26,<3.0
-black==22.8.0
-flake8==5.0.4
-mypy==0.971
-isort
+black==22.8.0
+flake8==5.0.4
+mypy>=0.970
+Flask==2.2.2
+chardet==5.0.0
+isort==5.10.1
+codecov==2.1.12
+pytest==7.1.3
+pytest-cov==4.0.0
+build==0.8.0
+requests==2.28.1
+wheel==0.37.1
diff --git a/setup.cfg b/setup.cfg
index bb4f9c50..8000f5cd 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,3 +1,59 @@
+[metadata]
+name = charset-normalizer
+description = The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
+long_description = file: README.md, CHANGELOG.md, LICENSE
+long_description_content_type = text/markdown
+keywords = encoding, charset, charset-detector, detector, normalization, unicode, chardet, detect
+url = https://github.com/Ousret/charset_normalizer
+license = MIT
+author_email = ahmed.tahri@cloudnursery.dev
+author = Ahmed TAHRI
+python_requires = >=3.6.0
+project_urls =
+    Bug Reports = https://github.com/Ousret/charset_normalizer/issues
+    Documentation = https://charset-normalizer.readthedocs.io/en/latest
+classifiers =
+    Development Status :: 5 - Production/Stable
+    License :: OSI Approved :: MIT License
+    Intended Audience :: Developers
+    Topic :: Software Development :: Libraries :: Python Modules
+    Operating System :: OS Independent
+    Programming Language :: Python
+    Programming Language :: Python :: 3
+    Programming Language :: Python :: 3.6
+    Programming Language :: Python :: 3.7
+    Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
+    Programming Language :: Python :: 3.10
+    Programming Language :: Python :: 3.11
+    Programming Language :: Python :: Implementation :: PyPy
+    Topic :: Text Processing :: Linguistic
+    Topic :: Utilities
+    Typing :: Typed
+
+[options.packages.find]
+exclude =
+    tests
+    *.tests
+    *.tests.*
+    tests.*
+    docs*
+    data*
+
+[options.extras_require]
+unicode_backport =
+
+[options.entry_points]
+console_scripts =
+    normalizer = charset_normalizer.cli.normalizer:cli_detect
+
+[options]
+packages = find:
+include_package_data = True
+
+[options.package_data]
+charset_normalizer = py.typed
+
 [tool:pytest]
 addopts = --cov=charset_normalizer --cov-report=term-missing -rxXs
 
@@ -11,4 +67,4 @@ ignore_missing_imports = True
 
 [tool:isort]
 profile = black
-combine_as_imports = True
\ No newline at end of file
+combine_as_imports = True
diff --git a/setup.py b/setup.py
index 476d65d9..7c64a695 100644
--- a/setup.py
+++ b/setup.py
@@ -1,12 +1,17 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-import io
 import os
 import sys
 from re import search
 
-from setuptools import find_packages, setup
+from setuptools import setup
+
+
+def get_version():
+    with open('charset_normalizer/version.py') as version_file:
+        return search(r"""__version__\s+=\s+(['"])(?P<version>.+?)\1""",
+                      version_file.read()).group('version')
 
 
 USE_MYPYC = False
@@ -26,81 +31,8 @@
 else:
     MYPYC_MODULES = None
 
-
-def get_version():
-    with open('charset_normalizer/version.py') as version_file:
-        return search(r"""__version__\s+=\s+(['"])(?P<version>.+?)\1""",
-                      version_file.read()).group('version')
-
-
-# Package meta-data.
-NAME = 'charset-normalizer'
-DESCRIPTION = 'The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.'
-URL = 'https://github.com/ousret/charset_normalizer'
-EMAIL = 'ahmed.tahri@cloudnursery.dev'
-AUTHOR = 'Ahmed TAHRI @Ousret'
-REQUIRES_PYTHON = '>=3.6.0'
-VERSION = get_version()
-
-REQUIRED = []
-
-EXTRAS = {
-    'unicode_backport': []
-}
-
-here = os.path.abspath(os.path.dirname(__file__))
-
-try:
-    with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
-        long_description = '\n' + f.read()
-except FileNotFoundError:
-    long_description = DESCRIPTION
-
 setup(
-    name=NAME,
-    version=VERSION,
-    description=DESCRIPTION,
-    long_description=long_description.replace(':heavy_check_mark:', '✅'),
-    long_description_content_type='text/markdown',
-    author=AUTHOR,
-    author_email=EMAIL,
-    python_requires=REQUIRES_PYTHON,
-    url=URL,
-    keywords=['encoding', 'i18n', 'txt', 'text', 'charset', 'charset-detector', 'normalization', 'unicode', 'chardet'],
-    packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]),
-    install_requires=REQUIRED,
-    extras_require=EXTRAS,
-    include_package_data=True,
-    package_data={"charset_normalizer": ["py.typed"]},
-    license='MIT',
-    entry_points={
-        'console_scripts':
-            [
-                'normalizer = charset_normalizer.cli.normalizer:cli_detect'
-            ]
-    },
-    classifiers=[
-        'Development Status :: 5 - Production/Stable',
-        'License :: OSI Approved :: MIT License',
-        'Intended Audience :: Developers',
-        'Topic :: Software Development :: Libraries :: Python Modules',
-        'Operating System :: OS Independent',
-        'Programming Language :: Python',
-        'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.6',
-        'Programming Language :: Python :: 3.7',
-        'Programming Language :: Python :: 3.8',
-        'Programming Language :: Python :: 3.9',
-        'Programming Language :: Python :: 3.10',
-        'Programming Language :: Python :: 3.11',
-        'Topic :: Text Processing :: Linguistic',
-        'Topic :: Utilities',
-        'Programming Language :: Python :: Implementation :: PyPy',
-        'Typing :: Typed'
-    ],
-    project_urls={
-        'Bug Reports': 'https://github.com/Ousret/charset_normalizer/issues',
-        'Documentation': 'https://charset-normalizer.readthedocs.io/en/latest',
-    },
+    name="charset-normalizer",
+    version=get_version(),
     ext_modules=MYPYC_MODULES
 )

From b2da4cbd914da2a4e8dc1a3023210f20616d82e3 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sat, 1 Oct 2022 21:06:55 +0200
Subject: [PATCH 40/63] :wrench: Lax on Flask version range (py 3.6)

---
 dev-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index df125d66..ec379eee 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,7 +1,7 @@
 black==22.8.0
 flake8==5.0.4
 mypy>=0.970
-Flask==2.2.2
+Flask>=2.0.3,<=2.2.2
 chardet==5.0.0
 isort==5.10.1
 codecov==2.1.12

From 95253c85a24204120422266364b020ef9d60e23e Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sat, 1 Oct 2022 21:11:41 +0200
Subject: [PATCH 41/63] :wrench: Lax on pytest version range (py 3.6)

---
 dev-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index ec379eee..c394f232 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -5,7 +5,7 @@ Flask>=2.0.3,<=2.2.2
 chardet==5.0.0
 isort==5.10.1
 codecov==2.1.12
-pytest==7.1.3
+pytest>=7.0.0,<8.0
 pytest-cov==4.0.0
 build==0.8.0
 requests==2.28.1

From a28be6ba9156d9bd95e95f3f604e6a77519cde26 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sat, 1 Oct 2022 21:13:20 +0200
Subject: [PATCH 42/63] :wrench: Lax on requests version range (py 3.6)

---
 dev-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index c394f232..1760f9f5 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -8,5 +8,5 @@ codecov==2.1.12
 pytest>=7.0.0,<8.0
 pytest-cov==4.0.0
 build==0.8.0
-requests==2.28.1
+requests>=2.27.1,<3.0.0
 wheel==0.37.1

From 02969005a82bfc28f10dcc5d4ae4108dd53018ea Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sat, 1 Oct 2022 21:43:43 +0200
Subject: [PATCH 43/63] :fire: remove codeql action

---
 .github/workflows/codeql-analysis.yml | 56 ---------------------------
 1 file changed, 56 deletions(-)
 delete mode 100644 .github/workflows/codeql-analysis.yml

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
deleted file mode 100644
index 1a7014d5..00000000
--- a/.github/workflows/codeql-analysis.yml
+++ /dev/null
@@ -1,56 +0,0 @@
-# For most projects, this workflow file will not need changing; you simply need
-# to commit it to your repository.
-#
-# You may wish to alter this file to override the set of languages analyzed,
-# or to provide custom queries or build logic.
-name: "CodeQL"
-
-on:
-  push:
-    branches: [master, develop]
-  pull_request:
-    # The branches below must be a subset of the branches above
-    branches: [master, develop]
-  schedule:
-    - cron: '0 23 * * 0'
-
-jobs:
-  analyze:
-    name: Analyze
-    runs-on: ubuntu-latest
-
-    strategy:
-      matrix:
-        python-version: [3.9]
-      fail-fast: false
-
-    steps:
-    - name: Checkout repository
-      uses: actions/checkout@v2
-      with:
-        # We must fetch at least the immediate parents so that if this is
-        # a pull request then we can checkout the head.
-        fetch-depth: 2
-
-    # If this run was triggered by a pull request event, then checkout
-    # the head of the pull request instead of the merge commit.
-    - run: git checkout HEAD^2
-      if: ${{ github.event_name == 'pull_request' }}
-
-    # Initializes the CodeQL tools for scanning.
-    - name: Initialize CodeQL
-      uses: github/codeql-action/init@v1
-      with:
-        languages: "python"
-        # If you wish to specify custom queries, you can do so here or in a config file.
-        # By default, queries listed here will override any specified in a config file.
-        # Prefix the list here with "+" to use these queries and those in the config file.
-        # queries: ./path/to/local/query, your-org/your-repo/queries@main
-
-    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
-    # If this step fails, then you should remove it and run the build manually (see below)
-    - name: Autobuild
-      uses: github/codeql-action/autobuild@v1
-
-    - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v1

From 0e91fb606606ffb4a25f7380c1704b49272c9f10 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sat, 1 Oct 2022 22:16:31 +0200
Subject: [PATCH 44/63] :bug: Fix CLI --normalize opt using fullpath in args

---
 charset_normalizer/cli/normalizer.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/charset_normalizer/cli/normalizer.py b/charset_normalizer/cli/normalizer.py
index 70293895..77f351f7 100644
--- a/charset_normalizer/cli/normalizer.py
+++ b/charset_normalizer/cli/normalizer.py
@@ -1,7 +1,7 @@
 import argparse
 import sys
 from json import dumps
-from os.path import abspath
+from os.path import abspath, realpath, dirname, basename, join
 from platform import python_version
 from typing import List, Optional
 from unicodedata import unidata_version
@@ -234,7 +234,10 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
                         my_file.close()
                     continue
 
-                o_: List[str] = my_file.name.split(".")
+                dir_path = dirname(realpath(my_file.name))
+                file_name = basename(realpath(my_file.name))
+
+                o_: List[str] = file_name.split(".")
 
                 if args.replace is False:
                     o_.insert(-1, best_guess.encoding)
@@ -255,7 +258,7 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
                     continue
 
                 try:
-                    x_[0].unicode_path = abspath("./{}".format(".".join(o_)))
+                    x_[0].unicode_path = join(dir_path, ".".join(o_))
 
                     with open(x_[0].unicode_path, "w", encoding="utf-8") as fp:
                         fp.write(str(best_guess))

From 5910d20efca877ea32f28ad9b09fddf500f01a6c Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sat, 1 Oct 2022 22:17:14 +0200
Subject: [PATCH 45/63] :heavy_check_mark: Ensure tests run with cibuildwheel

---
 .github/workflows/python-publish.yml |  2 ++
 tests/test_cli.py                    | 50 ++++++++++++++++------------
 tests/test_full_detection.py         |  8 ++++-
 3 files changed, 38 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index 4a4f4e48..2042d90e 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -222,6 +222,8 @@ jobs:
           CIBW_ENVIRONMENT: CHARSET_NORMALIZER_USE_MYPYC='1'
           CIBW_CONFIG_SETTINGS: "--no-isolation"
           CIBW_BEFORE_BUILD: pip install -r dev-requirements.txt
+          CIBW_TEST_REQUIRES: pytest codecov pytest-cov
+          CIBW_TEST_COMMAND: pytest {package}/tests
           CIBW_SKIP: pp*
       - name: Upload artifacts
         uses: actions/upload-artifact@v3
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 440ce9df..d42bf46b 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -2,7 +2,12 @@
 from charset_normalizer.cli.normalizer import cli_detect, query_yes_no
 from unittest.mock import patch
 from os.path import exists
-from os import remove
+from os import remove, path, pardir
+
+DIR_PATH = path.join(
+    path.dirname(path.realpath(__file__)),
+    pardir
+)
 
 
 class TestCommandLineInterface(unittest.TestCase):
@@ -24,7 +29,7 @@ def test_single_file(self):
         self.assertEqual(
             0,
             cli_detect(
-                ['./data/sample-arabic-1.txt']
+                [DIR_PATH + '/data/sample-arabic-1.txt']
             )
         )
 
@@ -38,16 +43,19 @@ def test_single_file_normalize(self):
         self.assertEqual(
             0,
             cli_detect(
-                ['./data/sample-arabic-1.txt', '--normalize']
+                [
+                    DIR_PATH + '/data/sample-arabic-1.txt',
+                    '--normalize'
+                ]
             )
         )
 
         self.assertTrue(
-            exists('./data/sample-arabic-1.cp1256.txt')
+            exists(DIR_PATH + '/data/sample-arabic-1.cp1256.txt')
         )
 
         try:
-            remove('./data/sample-arabic-1.cp1256.txt')
+            remove(DIR_PATH + '/data/sample-arabic-1.cp1256.txt')
         except:
             pass
 
@@ -55,7 +63,7 @@ def test_single_verbose_file(self):
         self.assertEqual(
             0,
             cli_detect(
-                ['./data/sample-arabic-1.txt', '--verbose']
+                [DIR_PATH + '/data/sample-arabic-1.txt', '--verbose']
             )
         )
 
@@ -64,9 +72,9 @@ def test_multiple_file(self):
             0,
             cli_detect(
                 [
-                    './data/sample-arabic-1.txt',
-                    './data/sample-french.txt',
-                    './data/sample-chinese.txt'
+                    DIR_PATH + '/data/sample-arabic-1.txt',
+                    DIR_PATH + '/data/sample-french.txt',
+                    DIR_PATH + '/data/sample-chinese.txt'
                 ]
             )
         )
@@ -77,9 +85,9 @@ def test_with_alternative(self):
             cli_detect(
                 [
                     '-a',
-                    './data/sample-arabic-1.txt',
-                    './data/sample-french.txt',
-                    './data/sample-chinese.txt'
+                    DIR_PATH + '/data/sample-arabic-1.txt',
+                    DIR_PATH + '/data/sample-french.txt',
+                    DIR_PATH + '/data/sample-chinese.txt'
                 ]
             )
         )
@@ -90,9 +98,9 @@ def test_with_minimal_output(self):
             cli_detect(
                 [
                     '-m',
-                    './data/sample-arabic-1.txt',
-                    './data/sample-french.txt',
-                    './data/sample-chinese.txt'
+                    DIR_PATH + '/data/sample-arabic-1.txt',
+                    DIR_PATH + '/data/sample-french.txt',
+                    DIR_PATH + '/data/sample-chinese.txt'
                 ]
             )
         )
@@ -104,9 +112,9 @@ def test_with_minimal_and_alt(self):
                 [
                     '-m',
                     '-a',
-                    './data/sample-arabic-1.txt',
-                    './data/sample-french.txt',
-                    './data/sample-chinese.txt'
+                    DIR_PATH + '/data/sample-arabic-1.txt',
+                    DIR_PATH + '/data/sample-french.txt',
+                    DIR_PATH + '/data/sample-chinese.txt'
                 ]
             )
         )
@@ -115,7 +123,7 @@ def test_non_existent_file(self):
 
         with self.assertRaises(SystemExit) as cm:
             cli_detect(
-                ['./data/not_found_data.txt']
+                [DIR_PATH + '/data/not_found_data.txt']
             )
 
         self.assertEqual(cm.exception.code, 2)
@@ -125,7 +133,7 @@ def test_replace_without_normalize(self):
         self.assertEqual(
             cli_detect(
                 [
-                    './data/sample-arabic-1.txt',
+                    DIR_PATH + '/data/sample-arabic-1.txt',
                     '--replace'
                 ]
             ),
@@ -136,7 +144,7 @@ def test_force_replace_without_replace(self):
         self.assertEqual(
             cli_detect(
                 [
-                    './data/sample-arabic-1.txt',
+                    DIR_PATH + '/data/sample-arabic-1.txt',
                     '--force'
                 ]
             ),
diff --git a/tests/test_full_detection.py b/tests/test_full_detection.py
index 96e0b797..fd8ac80c 100644
--- a/tests/test_full_detection.py
+++ b/tests/test_full_detection.py
@@ -1,5 +1,11 @@
 from charset_normalizer.api import from_path
 import pytest
+from os import path, pardir
+
+DIR_PATH = path.join(
+    path.dirname(path.realpath(__file__)),
+    pardir
+)
 
 
 @pytest.mark.parametrize(
@@ -30,7 +36,7 @@ def test_elementary_detection(
     expected_charset: str,
     expected_language: str,
 ):
-    best_guess = from_path("./data/{}".format(input_data_file)).best()
+    best_guess = from_path(DIR_PATH + "/data/{}".format(input_data_file)).best()
 
     assert best_guess is not None, "Elementary detection has failed upon '{}'".format(input_data_file)
     assert best_guess.encoding == expected_charset, "Elementary charset detection has failed upon '{}'".format(input_data_file)

From 093889b224e64310670c49e0758c061a36ce6a59 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sat, 1 Oct 2022 22:20:34 +0200
Subject: [PATCH 46/63] :art: apply isort on normalizer.py

---
 charset_normalizer/cli/normalizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/charset_normalizer/cli/normalizer.py b/charset_normalizer/cli/normalizer.py
index 77f351f7..ad26b4d0 100644
--- a/charset_normalizer/cli/normalizer.py
+++ b/charset_normalizer/cli/normalizer.py
@@ -1,7 +1,7 @@
 import argparse
 import sys
 from json import dumps
-from os.path import abspath, realpath, dirname, basename, join
+from os.path import abspath, basename, dirname, join, realpath
 from platform import python_version
 from typing import List, Optional
 from unicodedata import unidata_version

From d0df3f49377992dd3ec32e83bd2538bd03dae52d Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 2 Oct 2022 13:56:19 +0200
Subject: [PATCH 47/63] :sparkle: Extend the capability of explain=True when
 cp_isolation contain at most two entries, will log in details the
 Mess-detector results

---
 CHANGELOG.md              | 11 +++++++++++
 charset_normalizer/api.py |  6 +++++-
 charset_normalizer/md.py  | 21 +++++++++++++++++++--
 3 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9dee06a8..f1c893c9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,17 @@
 All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
+## [3.0.0b3](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0b3) (2022-10-??)
+
+### Added
+- Extend the capability of explain=True when cp_isolation contain at most two entries, will log in details the Mess-detector results
+
+### Changed
+- Build with static metadata using 'build' frontend
+
+### Fixed
+- CLI with opt --normalize fail when using full path for files
+
 ## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
 
 ### Added
diff --git a/charset_normalizer/api.py b/charset_normalizer/api.py
index c2e54356..1edd92f0 100644
--- a/charset_normalizer/api.py
+++ b/charset_normalizer/api.py
@@ -302,7 +302,11 @@ def from_bytes(
             ):
                 md_chunks.append(chunk)
 
-                md_ratios.append(mess_ratio(chunk, threshold))
+                md_ratios.append(
+                    mess_ratio(
+                        chunk, threshold, explain is True and len(cp_isolation) <= 2
+                    )
+                )
 
                 if md_ratios[-1] >= threshold:
                     early_stop_count += 1
diff --git a/charset_normalizer/md.py b/charset_normalizer/md.py
index 31808af8..8c0eb095 100644
--- a/charset_normalizer/md.py
+++ b/charset_normalizer/md.py
@@ -1,7 +1,12 @@
 from functools import lru_cache
+from logging import getLogger
 from typing import List, Optional
 
-from .constant import COMMON_SAFE_ASCII_CHARACTERS, UNICODE_SECONDARY_RANGE_KEYWORD
+from .constant import (
+    COMMON_SAFE_ASCII_CHARACTERS,
+    TRACE,
+    UNICODE_SECONDARY_RANGE_KEYWORD,
+)
 from .utils import (
     is_accentuated,
     is_ascii,
@@ -547,7 +552,19 @@ def mess_ratio(
                 break
 
     if debug:
+        logger = getLogger("charset_normalizer")
+
+        logger.log(
+            TRACE,
+            "Mess-detector extended-analysis start. "
+            f"{intermediary_mean_mess_ratio_calc=} {mean_mess_ratio=} {maximum_threshold=}",
+        )
+
+        if len(decoded_sequence) > 16:
+            logger.log(TRACE, f"Starting with: {decoded_sequence[:16]}")
+            logger.log(TRACE, f"Ending with: {decoded_sequence[-16::]}")
+
         for dt in detectors:  # pragma: nocover
-            print(dt.__class__, dt.ratio)
+            logger.log(TRACE, f"{dt.__class__}: {dt.ratio}")
 
     return round(mean_mess_ratio, 3)

From 32cbafeef71ef6988f6815dea51fb48820c6cc55 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 2 Oct 2022 13:56:56 +0200
Subject: [PATCH 48/63] :wrench: run_checks.sh adjust black target lvl py36

---
 bin/run_checks.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/run_checks.sh b/bin/run_checks.sh
index 0ae730eb..1e135b35 100755
--- a/bin/run_checks.sh
+++ b/bin/run_checks.sh
@@ -8,7 +8,7 @@ fi
 set -x
 
 ${PREFIX}pytest
-${PREFIX}black --check --diff --target-version=py35 charset_normalizer
+${PREFIX}black --check --diff --target-version=py36 charset_normalizer
 ${PREFIX}flake8 charset_normalizer
 ${PREFIX}mypy charset_normalizer
 ${PREFIX}isort --check --diff charset_normalizer

From b5ef79832c851f40c65c667b2a93962072aa2a9a Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 2 Oct 2022 15:29:22 +0200
Subject: [PATCH 49/63] :ambulance: Fix invalid syntax fstring eq auto format
 (py 36)

---
 charset_normalizer/md.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/charset_normalizer/md.py b/charset_normalizer/md.py
index 8c0eb095..0152f326 100644
--- a/charset_normalizer/md.py
+++ b/charset_normalizer/md.py
@@ -557,7 +557,7 @@ def mess_ratio(
         logger.log(
             TRACE,
             "Mess-detector extended-analysis start. "
-            f"{intermediary_mean_mess_ratio_calc=} {mean_mess_ratio=} {maximum_threshold=}",
+            f"intermediary_mean_mess_ratio_calc={intermediary_mean_mess_ratio_calc} mean_mess_ratio={mean_mess_ratio} maximum_threshold={maximum_threshold}",
         )
 
         if len(decoded_sequence) > 16:

From 2cb15cf6380f3795fa98c3ac51c01f28bf6c4eb8 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 2 Oct 2022 15:38:36 +0200
Subject: [PATCH 50/63] Amend commit d0df3f49377992dd3ec32e83bd2538bd03dae52d

---
 CHANGELOG.md              | 2 +-
 charset_normalizer/api.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f1c893c9..b1fce47f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 ## [3.0.0b3](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0b3) (2022-10-??)
 
 ### Added
-- Extend the capability of explain=True when cp_isolation contain at most two entries, will log in details the Mess-detector results
+- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
 
 ### Changed
 - Build with static metadata using 'build' frontend
diff --git a/charset_normalizer/api.py b/charset_normalizer/api.py
index 1edd92f0..c54fda32 100644
--- a/charset_normalizer/api.py
+++ b/charset_normalizer/api.py
@@ -304,7 +304,7 @@ def from_bytes(
 
                 md_ratios.append(
                     mess_ratio(
-                        chunk, threshold, explain is True and len(cp_isolation) <= 2
+                        chunk, threshold, explain is True and cp_isolation and len(cp_isolation) <= 2
                     )
                 )
 

From 9b4a2095f5a3ad84ec8d4a42dd48b64fae3cec06 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 2 Oct 2022 15:44:52 +0200
Subject: [PATCH 51/63] :art: reformat file (flake8)

---
 charset_normalizer/api.py | 2 +-
 charset_normalizer/md.py  | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/charset_normalizer/api.py b/charset_normalizer/api.py
index c54fda32..dbb2abf9 100644
--- a/charset_normalizer/api.py
+++ b/charset_normalizer/api.py
@@ -304,7 +304,7 @@ def from_bytes(
 
                 md_ratios.append(
                     mess_ratio(
-                        chunk, threshold, explain is True and cp_isolation and len(cp_isolation) <= 2
+                        chunk, threshold, explain is True and 1 <= len(cp_isolation) <= 2
                     )
                 )
 
diff --git a/charset_normalizer/md.py b/charset_normalizer/md.py
index 0152f326..d62a8bda 100644
--- a/charset_normalizer/md.py
+++ b/charset_normalizer/md.py
@@ -557,7 +557,8 @@ def mess_ratio(
         logger.log(
             TRACE,
             "Mess-detector extended-analysis start. "
-            f"intermediary_mean_mess_ratio_calc={intermediary_mean_mess_ratio_calc} mean_mess_ratio={mean_mess_ratio} maximum_threshold={maximum_threshold}",
+            f"intermediary_mean_mess_ratio_calc={intermediary_mean_mess_ratio_calc} mean_mess_ratio={mean_mess_ratio} "
+            f"maximum_threshold={maximum_threshold}",
         )
 
         if len(decoded_sequence) > 16:

From 5e2368ef47d61f73aac353b1859082d58e90f784 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 2 Oct 2022 17:11:24 +0200
Subject: [PATCH 52/63] :art: reformat api.py

---
 charset_normalizer/api.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/charset_normalizer/api.py b/charset_normalizer/api.py
index dbb2abf9..bee14481 100644
--- a/charset_normalizer/api.py
+++ b/charset_normalizer/api.py
@@ -304,7 +304,9 @@ def from_bytes(
 
                 md_ratios.append(
                     mess_ratio(
-                        chunk, threshold, explain is True and 1 <= len(cp_isolation) <= 2
+                        chunk,
+                        threshold,
+                        explain is True and 1 <= len(cp_isolation) <= 2,
                     )
                 )
 

From c76a83d5f01552364fe3da9bf3ea3010e491f7bf Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Thu, 6 Oct 2022 17:59:12 +0200
Subject: [PATCH 53/63] :sparkle: Support for alternative language frequency
 set

+ :fire: Coherence detector no longer return 'Simple English' instead return 'English'
---
 CHANGELOG.md                          |  4 ++
 charset_normalizer/assets/__init__.py | 58 ++++++++++++++-------------
 charset_normalizer/cd.py              | 31 +++++++++++++-
 tests/test_coherence_detection.py     | 17 +++++++-
 4 files changed, 80 insertions(+), 30 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b1fce47f..089e0bfd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
 ### Added
 - Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
+- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
 
 ### Changed
 - Build with static metadata using 'build' frontend
@@ -13,6 +14,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 ### Fixed
 - CLI with opt --normalize fail when using full path for files
 
+### Removed
+- Coherence detector no longer return 'Simple English' instead return 'English'
+
 ## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
 
 ### Added
diff --git a/charset_normalizer/assets/__init__.py b/charset_normalizer/assets/__init__.py
index 3c33ba30..029be349 100644
--- a/charset_normalizer/assets/__init__.py
+++ b/charset_normalizer/assets/__init__.py
@@ -1,6 +1,8 @@
 # -*- coding: utf-8 -*-
 from typing import Dict, List
 
+# Language label that contain the em dash "—"
+# character are to be considered alternative seq to origin
 FREQUENCIES: Dict[str, List[str]] = {
     "English": [
         "e",
@@ -30,6 +32,34 @@
         "z",
         "q",
     ],
+    "English—": [
+        "e",
+        "a",
+        "t",
+        "i",
+        "o",
+        "n",
+        "s",
+        "r",
+        "h",
+        "l",
+        "d",
+        "c",
+        "m",
+        "u",
+        "f",
+        "p",
+        "g",
+        "w",
+        "b",
+        "y",
+        "v",
+        "k",
+        "j",
+        "x",
+        "z",
+        "q",
+    ],
     "German": [
         "e",
         "n",
@@ -956,34 +986,6 @@
         "ö",
         "y",
     ],
-    "Simple English": [
-        "e",
-        "a",
-        "t",
-        "i",
-        "o",
-        "n",
-        "s",
-        "r",
-        "h",
-        "l",
-        "d",
-        "c",
-        "m",
-        "u",
-        "f",
-        "p",
-        "g",
-        "w",
-        "b",
-        "y",
-        "v",
-        "k",
-        "j",
-        "x",
-        "z",
-        "q",
-    ],
     "Thai": [
         "า",
         "น",
diff --git a/charset_normalizer/cd.py b/charset_normalizer/cd.py
index ee4b7424..a294257e 100644
--- a/charset_normalizer/cd.py
+++ b/charset_normalizer/cd.py
@@ -289,6 +289,33 @@ def merge_coherence_ratios(results: List[CoherenceMatches]) -> CoherenceMatches:
     return sorted(merge, key=lambda x: x[1], reverse=True)
 
 
+def filter_alt_coherence_matches(results: CoherenceMatches) -> CoherenceMatches:
+    """
+    We shall NOT return "English—" in CoherenceMatches because it is an alternative
+    of "English". This function only keeps the best match and remove the em-dash in it.
+    """
+    index_results: Dict[str, List[float]] = dict()
+
+    for result in results:
+        language, ratio = result
+        no_em_name: str = language.replace("—", "")
+
+        if no_em_name not in index_results:
+            index_results[no_em_name] = []
+
+        index_results[no_em_name].append(ratio)
+
+    if any(len(index_results[e]) > 1 for e in index_results):
+        filtered_results: CoherenceMatches = []
+
+        for language in index_results:
+            filtered_results.append((language, max(index_results[language])))
+
+        return filtered_results
+
+    return results
+
+
 @lru_cache(maxsize=2048)
 def coherence_ratio(
     decoded_sequence: str, threshold: float = 0.1, lg_inclusion: Optional[str] = None
@@ -336,4 +363,6 @@ def coherence_ratio(
             if sufficient_match_count >= 3:
                 break
 
-    return sorted(results, key=lambda x: x[1], reverse=True)
+    return sorted(
+        filter_alt_coherence_matches(results), key=lambda x: x[1], reverse=True
+    )
diff --git a/tests/test_coherence_detection.py b/tests/test_coherence_detection.py
index 6ad95927..7e399132 100644
--- a/tests/test_coherence_detection.py
+++ b/tests/test_coherence_detection.py
@@ -1,5 +1,5 @@
 import pytest
-from charset_normalizer.cd import encoding_languages, mb_encoding_languages, is_multi_byte_encoding, get_target_features
+from charset_normalizer.cd import encoding_languages, mb_encoding_languages, is_multi_byte_encoding, get_target_features, filter_alt_coherence_matches
 
 
 @pytest.mark.parametrize(
@@ -39,3 +39,18 @@ def test_target_features(language, expected_have_accents, expected_pure_latin):
 
     assert target_have_accents is expected_have_accents
     assert target_pure_latin is expected_pure_latin
+
+
+@pytest.mark.parametrize(
+    "matches, expected_return",
+    [
+        ([("English", 0.88,), ("English—", 0.99)], [("English", 0.99)]),
+        ([("English", 0.88,), ("English—", 0.99), ("English——", 0.999)], [("English", 0.999)]),
+        ([("English", 0.88,), ("English—", 0.77)], [("English", 0.88)]),
+        ([("English", 0.88,), ("Italian", 0.77)], [("English", 0.88), ("Italian", 0.77)]),
+    ]
+)
+def test_filter_alt_coherence_matches(matches, expected_return):
+    results = filter_alt_coherence_matches(matches)
+
+    assert results == expected_return

From 70c551a203c3c93593c21b90902de0e059566d6c Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Tue, 18 Oct 2022 09:06:18 +0200
Subject: [PATCH 54/63] :sparkle: Add parameter `language_threshold` in
 `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected
 coherence ratio

---
 CHANGELOG.md              | 1 +
 charset_normalizer/api.py | 9 ++++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 089e0bfd..230f448a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 ### Added
 - Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
 - Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
+- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
 
 ### Changed
 - Build with static metadata using 'build' frontend
diff --git a/charset_normalizer/api.py b/charset_normalizer/api.py
index bee14481..6c7e8983 100644
--- a/charset_normalizer/api.py
+++ b/charset_normalizer/api.py
@@ -39,6 +39,7 @@ def from_bytes(
     cp_exclusion: Optional[List[str]] = None,
     preemptive_behaviour: bool = True,
     explain: bool = False,
+    language_threshold: float = 0.1,
 ) -> CharsetMatches:
     """
     Given a raw bytes sequence, return the best possibles charset usable to render str objects.
@@ -400,7 +401,9 @@ def from_bytes(
         if encoding_iana != "ascii":
             for chunk in md_chunks:
                 chunk_languages = coherence_ratio(
-                    chunk, 0.1, ",".join(target_languages) if target_languages else None
+                    chunk,
+                    language_threshold,
+                    ",".join(target_languages) if target_languages else None,
                 )
 
                 cd_ratios.append(chunk_languages)
@@ -502,6 +505,7 @@ def from_fp(
     cp_exclusion: Optional[List[str]] = None,
     preemptive_behaviour: bool = True,
     explain: bool = False,
+    language_threshold: float = 0.1,
 ) -> CharsetMatches:
     """
     Same thing than the function from_bytes but using a file pointer that is already ready.
@@ -516,6 +520,7 @@ def from_fp(
         cp_exclusion,
         preemptive_behaviour,
         explain,
+        language_threshold,
     )
 
 
@@ -528,6 +533,7 @@ def from_path(
     cp_exclusion: Optional[List[str]] = None,
     preemptive_behaviour: bool = True,
     explain: bool = False,
+    language_threshold: float = 0.1,
 ) -> CharsetMatches:
     """
     Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
@@ -543,4 +549,5 @@ def from_path(
             cp_exclusion,
             preemptive_behaviour,
             explain,
+            language_threshold,
         )

From 14689be661abbd86769597149db7564b4ce5d899 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Tue, 18 Oct 2022 09:07:31 +0200
Subject: [PATCH 55/63] :wrench: Make the language detection stricter

Improve the condition on issue #200
---
 CHANGELOG.md                          |   1 +
 charset_normalizer/assets/__init__.py | 388 +++++++++++++++++++++++---
 charset_normalizer/cd.py              |  43 ++-
 tests/test_full_detection.py          |   2 +-
 4 files changed, 387 insertions(+), 47 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 230f448a..112c8a42 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
 ### Changed
 - Build with static metadata using 'build' frontend
+- Make the language detection stricter
 
 ### Fixed
 - CLI with opt --normalize fail when using full path for files
diff --git a/charset_normalizer/assets/__init__.py b/charset_normalizer/assets/__init__.py
index 029be349..9075930d 100644
--- a/charset_normalizer/assets/__init__.py
+++ b/charset_normalizer/assets/__init__.py
@@ -256,33 +256,303 @@
         "ж",
         "ц",
     ],
+    # Jap-Kanji
     "Japanese": [
+        "人",
+        "一",
+        "大",
+        "亅",
+        "丁",
+        "丨",
+        "竹",
+        "笑",
+        "口",
+        "日",
+        "今",
+        "二",
+        "彳",
+        "行",
+        "十",
+        "土",
+        "丶",
+        "寸",
+        "寺",
+        "時",
+        "乙",
+        "丿",
+        "乂",
+        "气",
+        "気",
+        "冂",
+        "巾",
+        "亠",
+        "市",
+        "目",
+        "儿",
+        "見",
+        "八",
+        "小",
+        "凵",
+        "県",
+        "月",
+        "彐",
+        "門",
+        "間",
+        "木",
+        "東",
+        "山",
+        "出",
+        "本",
+        "中",
+        "刀",
+        "分",
+        "耳",
+        "又",
+        "取",
+        "最",
+        "言",
+        "田",
+        "心",
+        "思",
+        "刂",
+        "前",
+        "京",
+        "尹",
+        "事",
+        "生",
+        "厶",
+        "云",
+        "会",
+        "未",
+        "来",
+        "白",
+        "冫",
+        "楽",
+        "灬",
+        "馬",
+        "尸",
+        "尺",
+        "駅",
+        "明",
+        "耂",
+        "者",
+        "了",
+        "阝",
+        "都",
+        "高",
+        "卜",
+        "占",
+        "厂",
+        "广",
+        "店",
+        "子",
+        "申",
+        "奄",
+        "亻",
+        "俺",
+        "上",
+        "方",
+        "冖",
+        "学",
+        "衣",
+        "艮",
+        "食",
+        "自",
+    ],
+    # Jap-Katakana
+    "Japanese—": [
+        "ー",
+        "ン",
+        "ス",
+        "・",
+        "ル",
+        "ト",
+        "リ",
+        "イ",
+        "ア",
+        "ラ",
+        "ッ",
+        "ク",
+        "ド",
+        "シ",
+        "レ",
+        "ジ",
+        "タ",
+        "フ",
+        "ロ",
+        "カ",
+        "テ",
+        "マ",
+        "ィ",
+        "グ",
+        "バ",
+        "ム",
+        "プ",
+        "オ",
+        "コ",
+        "デ",
+        "ニ",
+        "ウ",
+        "メ",
+        "サ",
+        "ビ",
+        "ナ",
+        "ブ",
+        "ャ",
+        "エ",
+        "ュ",
+        "チ",
+        "キ",
+        "ズ",
+        "ダ",
+        "パ",
+        "ミ",
+        "ェ",
+        "ョ",
+        "ハ",
+        "セ",
+        "ベ",
+        "ガ",
+        "モ",
+        "ツ",
+        "ネ",
+        "ボ",
+        "ソ",
+        "ノ",
+        "ァ",
+        "ヴ",
+        "ワ",
+        "ポ",
+        "ペ",
+        "ピ",
+        "ケ",
+        "ゴ",
+        "ギ",
+        "ザ",
+        "ホ",
+        "ゲ",
+        "ォ",
+        "ヤ",
+        "ヒ",
+        "ユ",
+        "ヨ",
+        "ヘ",
+        "ゼ",
+        "ヌ",
+        "ゥ",
+        "ゾ",
+        "ヶ",
+        "ヂ",
+        "ヲ",
+        "ヅ",
+        "ヵ",
+        "ヱ",
+        "ヰ",
+        "ヮ",
+        "ヽ",
+        "゠",
+        "ヾ",
+        "ヷ",
+        "ヿ",
+        "ヸ",
+        "ヹ",
+        "ヺ",
+    ],
+    # Jap-Hiragana
+    "Japanese——": [
         "の",
         "に",
         "る",
         "た",
-        "は",
-        "ー",
         "と",
+        "は",
         "し",
+        "い",
         "を",
         "で",
         "て",
         "が",
-        "い",
-        "ン",
-        "れ",
         "な",
-        "年",
-        "ス",
-        "っ",
-        "ル",
+        "れ",
         "か",
         "ら",
-        "あ",
         "さ",
-        "も",
+        "っ",
         "り",
+        "す",
+        "あ",
+        "も",
+        "こ",
+        "ま",
+        "う",
+        "く",
+        "よ",
+        "き",
+        "ん",
+        "め",
+        "お",
+        "け",
+        "そ",
+        "つ",
+        "だ",
+        "や",
+        "え",
+        "ど",
+        "わ",
+        "ち",
+        "み",
+        "せ",
+        "じ",
+        "ば",
+        "へ",
+        "び",
+        "ず",
+        "ろ",
+        "ほ",
+        "げ",
+        "む",
+        "べ",
+        "ひ",
+        "ょ",
+        "ゆ",
+        "ぶ",
+        "ご",
+        "ゃ",
+        "ね",
+        "ふ",
+        "ぐ",
+        "ぎ",
+        "ぼ",
+        "ゅ",
+        "づ",
+        "ざ",
+        "ぞ",
+        "ぬ",
+        "ぜ",
+        "ぱ",
+        "ぽ",
+        "ぷ",
+        "ぴ",
+        "ぃ",
+        "ぁ",
+        "ぇ",
+        "ぺ",
+        "ゞ",
+        "ぢ",
+        "ぉ",
+        "ぅ",
+        "ゐ",
+        "ゝ",
+        "ゑ",
+        "゛",
+        "゜",
+        "ゎ",
+        "ゔ",
+        "゚",
+        "ゟ",
+        "゙",
+        "ゕ",
+        "ゖ",
     ],
     "Portuguese": [
         "a",
@@ -370,6 +640,77 @@
         "就",
         "出",
         "会",
+        "可",
+        "也",
+        "你",
+        "对",
+        "生",
+        "能",
+        "而",
+        "子",
+        "那",
+        "得",
+        "于",
+        "着",
+        "下",
+        "自",
+        "之",
+        "年",
+        "过",
+        "发",
+        "后",
+        "作",
+        "里",
+        "用",
+        "道",
+        "行",
+        "所",
+        "然",
+        "家",
+        "种",
+        "事",
+        "成",
+        "方",
+        "多",
+        "经",
+        "么",
+        "去",
+        "法",
+        "学",
+        "如",
+        "都",
+        "同",
+        "现",
+        "当",
+        "没",
+        "动",
+        "面",
+        "起",
+        "看",
+        "定",
+        "天",
+        "分",
+        "还",
+        "进",
+        "好",
+        "小",
+        "部",
+        "其",
+        "些",
+        "主",
+        "样",
+        "理",
+        "心",
+        "她",
+        "本",
+        "前",
+        "开",
+        "但",
+        "因",
+        "只",
+        "从",
+        "想",
+        "实",
     ],
     "Ukrainian": [
         "о",
@@ -1068,31 +1409,6 @@
         "ஒ",
         "ஸ",
     ],
-    "Classical Chinese": [
-        "之",
-        "年",
-        "為",
-        "也",
-        "以",
-        "一",
-        "人",
-        "其",
-        "者",
-        "國",
-        "有",
-        "二",
-        "十",
-        "於",
-        "曰",
-        "三",
-        "不",
-        "大",
-        "而",
-        "子",
-        "中",
-        "五",
-        "四",
-    ],
     "Kazakh": [
         "а",
         "ы",
diff --git a/charset_normalizer/cd.py b/charset_normalizer/cd.py
index a294257e..ae2813fb 100644
--- a/charset_normalizer/cd.py
+++ b/charset_normalizer/cd.py
@@ -105,7 +105,7 @@ def mb_encoding_languages(iana_name: str) -> List[str]:
     ):
         return ["Japanese"]
     if iana_name.startswith("gb") or iana_name in ZH_NAMES:
-        return ["Chinese", "Classical Chinese"]
+        return ["Chinese"]
     if iana_name.startswith("iso2022_kr") or iana_name in KO_NAMES:
         return ["Korean"]
 
@@ -179,22 +179,45 @@ def characters_popularity_compare(
     character_approved_count: int = 0
     FREQUENCIES_language_set = set(FREQUENCIES[language])
 
-    for character in ordered_characters:
+    ordered_characters_count: int = len(ordered_characters)
+    target_language_characters_count: int = len(FREQUENCIES[language])
+
+    large_alphabet: bool = target_language_characters_count > 26
+
+    for character, character_rank in zip(
+        ordered_characters, range(0, ordered_characters_count)
+    ):
         if character not in FREQUENCIES_language_set:
             continue
 
+        character_rank_in_language: int = FREQUENCIES[language].index(character)
+        expected_projection_ratio: float = (
+            target_language_characters_count / ordered_characters_count
+        )
+        character_rank_projection: int = int(character_rank * expected_projection_ratio)
+
+        if (
+            large_alphabet is False
+            and abs(character_rank_projection - character_rank_in_language) > 4
+        ):
+            continue
+
+        if (
+            large_alphabet is True
+            and abs(character_rank_projection - character_rank_in_language)
+            < target_language_characters_count / 3
+        ):
+            character_approved_count += 1
+            continue
+
         characters_before_source: List[str] = FREQUENCIES[language][
-            0 : FREQUENCIES[language].index(character)
+            0:character_rank_in_language
         ]
         characters_after_source: List[str] = FREQUENCIES[language][
-            FREQUENCIES[language].index(character) :
-        ]
-        characters_before: List[str] = ordered_characters[
-            0 : ordered_characters.index(character)
-        ]
-        characters_after: List[str] = ordered_characters[
-            ordered_characters.index(character) :
+            character_rank_in_language:
         ]
+        characters_before: List[str] = ordered_characters[0:character_rank]
+        characters_after: List[str] = ordered_characters[character_rank:]
 
         before_match_count: int = len(
             set(characters_before) & set(characters_before_source)
diff --git a/tests/test_full_detection.py b/tests/test_full_detection.py
index fd8ac80c..adff8801 100644
--- a/tests/test_full_detection.py
+++ b/tests/test_full_detection.py
@@ -16,7 +16,7 @@
         ('sample-arabic.txt', 'utf_8', 'Arabic'),
         ('sample-russian-3.txt', 'utf_8', 'Russian'),
         ('sample-french.txt', 'utf_8', 'French'),
-        ('sample-chinese.txt', 'big5', 'Classical Chinese'),
+        ('sample-chinese.txt', 'big5', 'Chinese'),
         ('sample-greek.txt', 'cp1253', 'Greek'),
         ('sample-greek-2.txt', 'cp1253', 'Greek'),
         ('sample-hebrew-2.txt', 'cp1255', 'Hebrew'),

From 8f91aa4eb6fbb81a1024b2e4c93626e2383b6419 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Tue, 18 Oct 2022 09:08:48 +0200
Subject: [PATCH 56/63] :bug: TooManyAccentuatedPlugin induce false positive on
 the mess detection when too few alpha character have been fed to it

---
 CHANGELOG.md             | 1 +
 charset_normalizer/md.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 112c8a42..46e56147 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
 ### Fixed
 - CLI with opt --normalize fail when using full path for files
+- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
 
 ### Removed
 - Coherence detector no longer return 'Simple English' instead return 'English'
diff --git a/charset_normalizer/md.py b/charset_normalizer/md.py
index d62a8bda..56e9321a 100644
--- a/charset_normalizer/md.py
+++ b/charset_normalizer/md.py
@@ -128,7 +128,7 @@ def reset(self) -> None:  # pragma: no cover
 
     @property
     def ratio(self) -> float:
-        if self._character_count == 0:
+        if self._character_count == 0 or self._character_count < 8:
             return 0.0
         ratio_of_accentuation: float = self._accentuated_count / self._character_count
         return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0

From e0010ff55b4d4b553838789576035c45685dceb6 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Tue, 18 Oct 2022 09:10:02 +0200
Subject: [PATCH 57/63] :bookmark: Bump version rc1

---
 charset_normalizer/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/charset_normalizer/version.py b/charset_normalizer/version.py
index c05c9cd1..25bf3bcf 100644
--- a/charset_normalizer/version.py
+++ b/charset_normalizer/version.py
@@ -2,5 +2,5 @@
 Expose version
 """
 
-__version__ = "3.0.0b2"
+__version__ = "3.0.0rc1"
 VERSION = __version__.split(".")

From 840a6e08a9f1ac3d64b77b05be6c9ae52451bc87 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Tue, 18 Oct 2022 20:27:06 +0200
Subject: [PATCH 58/63] :wrench: Ensure proper version lock

(i) ensure build are reproductible (ii) still support python 3.6
---
 dev-requirements.txt | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index 1760f9f5..1bc2ee26 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,12 +1,24 @@
-black==22.8.0
 flake8==5.0.4
-mypy>=0.970
-Flask>=2.0.3,<=2.2.2
 chardet==5.0.0
 isort==5.10.1
 codecov==2.1.12
-pytest>=7.0.0,<8.0
 pytest-cov==4.0.0
 build==0.8.0
-requests>=2.27.1,<3.0.0
 wheel==0.37.1
+
+# The vast majority of project dropped Python 3.6
+# This is to ensure build are reproducible >=3.6
+black==22.8.0; python_version < "3.7"
+black==22.10.0; python_version >= "3.7"
+
+mypy==0.982; python_version >= "3.7"
+mypy==0.970; python_version < "3.7"
+
+Flask==2.2.2; python_version >= "3.7"
+Flask==2.0.3; python_version < "3.7"
+
+pytest==7.0.0; python_version < "3.7"
+pytest==7.1.3; python_version >= "3.7"
+
+requests==2.27.1; python_version < "3.7"
+requests==2.28.1; python_version >= "3.7"

From 9b8b048767901f08c3d7ec40beb0c975d7eda438 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Tue, 18 Oct 2022 20:29:04 +0200
Subject: [PATCH 59/63] :wrench: set target-version to py36 black autofix
 script

---
 bin/run_autofix.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/run_autofix.sh b/bin/run_autofix.sh
index f853cacd..e88f45c6 100755
--- a/bin/run_autofix.sh
+++ b/bin/run_autofix.sh
@@ -7,5 +7,5 @@ fi
 
 set -x
 
-${PREFIX}black --target-version=py35 charset_normalizer
+${PREFIX}black --target-version=py36 charset_normalizer
 ${PREFIX}isort charset_normalizer

From 13d9a99617af987227b7f36c8d46152991c1806a Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Tue, 18 Oct 2022 20:43:44 +0200
Subject: [PATCH 60/63] :wrench: mypy ver lock for py 3.6 revised

---
 dev-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index 1bc2ee26..91e06b88 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -12,7 +12,7 @@ black==22.8.0; python_version < "3.7"
 black==22.10.0; python_version >= "3.7"
 
 mypy==0.982; python_version >= "3.7"
-mypy==0.970; python_version < "3.7"
+mypy==0.971; python_version < "3.7"
 
 Flask==2.2.2; python_version >= "3.7"
 Flask==2.0.3; python_version < "3.7"

From f8e1153a0f9a392227f770a97e1f6d8a84a5e22e Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Tue, 18 Oct 2022 20:48:05 +0200
Subject: [PATCH 61/63] :pencil: Adjust speedup docs section

---
 docs/community/speedup.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/community/speedup.rst b/docs/community/speedup.rst
index cefc57e4..ea45b297 100644
--- a/docs/community/speedup.rst
+++ b/docs/community/speedup.rst
@@ -26,6 +26,7 @@ Following those instructions (provided you have the necessary toolchain installe
   ::
 
     export CHARSET_NORMALIZER_USE_MYPYC=1
+    pip install mypy build wheel
     pip install charset-normalizer --no-binary :all:
 
 

From b15f416535fbfd49120bb2284672ef52616a27e0 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Tue, 18 Oct 2022 20:51:50 +0200
Subject: [PATCH 62/63] :pencil: Update CHANGELOG.md

rc1 publish
---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 46e56147..0bd07b40 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,7 +2,7 @@
 All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
-## [3.0.0b3](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0b3) (2022-10-??)
+## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
 
 ### Added
 - Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results

From 6367d5343791c8e1e9f54fe3055f80cd41b73ce8 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Tue, 18 Oct 2022 20:55:22 +0200
Subject: [PATCH 63/63] :pencil: Missing CHANGELOG entry and add
 language_threshold to docs::advanced usage

---
 CHANGELOG.md                  | 1 +
 docs/user/advanced_search.rst | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0bd07b40..dcfd8f76 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
 ### Removed
 - Coherence detector no longer return 'Simple English' instead return 'English'
+- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
 
 ## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
 
diff --git a/docs/user/advanced_search.rst b/docs/user/advanced_search.rst
index b4441e58..a269cd10 100644
--- a/docs/user/advanced_search.rst
+++ b/docs/user/advanced_search.rst
@@ -18,7 +18,8 @@ As follow ::
         cp_isolation=None,  # Finite list of encoding to use when searching for a match
         cp_exclusion=None,  # Finite list of encoding to avoid when searching for a match
         preemptive_behaviour=True,  # Determine if we should look into my_byte_str (ASCII-Mode) for pre-defined encoding
-        explain=False  # Print on screen what is happening when searching for a match
+        explain=False,  # Print on screen what is happening when searching for a match
+        language_threshold=0.1  # Minimum coherence ratio / language ratio match accepted
     )