diff --git a/.github/workflows/chardet-bc.yml b/.github/workflows/chardet-bc.yml
index 0bbeaec8..dfbc64cc 100644
--- a/.github/workflows/chardet-bc.yml
+++ b/.github/workflows/chardet-bc.yml
@@ -25,7 +25,8 @@ jobs:
         pip uninstall -y charset-normalizer
     - name: Install the package
       run: |
-        python setup.py install
+        python -m build
+        pip install ./dist/*.whl
     - name: Clone the complete dataset
       run: |
         git clone https://github.com/Ousret/char-dataset.git
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
deleted file mode 100644
index 1a7014d5..00000000
--- a/.github/workflows/codeql-analysis.yml
+++ /dev/null
@@ -1,56 +0,0 @@
-# For most projects, this workflow file will not need changing; you simply need
-# to commit it to your repository.
-#
-# You may wish to alter this file to override the set of languages analyzed,
-# or to provide custom queries or build logic.
-name: "CodeQL"
-
-on:
-  push:
-    branches: [master, develop]
-  pull_request:
-    # The branches below must be a subset of the branches above
-    branches: [master, develop]
-  schedule:
-    - cron: '0 23 * * 0'
-
-jobs:
-  analyze:
-    name: Analyze
-    runs-on: ubuntu-latest
-
-    strategy:
-      matrix:
-        python-version: [3.9]
-      fail-fast: false
-
-    steps:
-    - name: Checkout repository
-      uses: actions/checkout@v2
-      with:
-        # We must fetch at least the immediate parents so that if this is
-        # a pull request then we can checkout the head.
-        fetch-depth: 2
-
-    # If this run was triggered by a pull request event, then checkout
-    # the head of the pull request instead of the merge commit.
-    - run: git checkout HEAD^2
-      if: ${{ github.event_name == 'pull_request' }}
-
-    # Initializes the CodeQL tools for scanning.
-    - name: Initialize CodeQL
-      uses: github/codeql-action/init@v1
-      with:
-        languages: "python"
-        # If you wish to specify custom queries, you can do so here or in a config file.
-        # By default, queries listed here will override any specified in a config file.
-        # Prefix the list here with "+" to use these queries and those in the config file.
-        # queries: ./path/to/local/query, your-org/your-repo/queries@main
-
-    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
-    # If this step fails, then you should remove it and run the build manually (see below)
-    - name: Autobuild
-      uses: github/codeql-action/autobuild@v1
-
-    - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v1
diff --git a/.github/workflows/detector-coverage.yml b/.github/workflows/detector-coverage.yml
index 19eed9ae..1527f22b 100644
--- a/.github/workflows/detector-coverage.yml
+++ b/.github/workflows/detector-coverage.yml
@@ -25,7 +25,8 @@ jobs:
         pip uninstall -y charset-normalizer
     - name: Install the package
       run: |
-        python setup.py install
+        python -m build
+        pip install ./dist/*.whl
     - name: Clone the complete dataset
       run: |
         git clone https://github.com/Ousret/char-dataset.git
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index f74a56d2..00aa98eb 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -28,7 +28,8 @@ jobs:
         pip uninstall -y charset-normalizer
     - name: Install the package
       run: |
-        python setup.py install
+        python -m build
+        pip install ./dist/*.whl
     - name: Clone the complete dataset
       run: |
         git clone https://github.com/Ousret/char-dataset.git
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 877b890e..4f1f12f4 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -25,7 +25,8 @@ jobs:
         pip uninstall -y charset-normalizer
     - name: Install the package
       run: |
-        python setup.py install
+        python -m build
+        pip install ./dist/*.whl
     - name: Type checking (Mypy)
       run: |
         mypy --strict charset_normalizer
diff --git a/.github/workflows/mypyc-verify.yml b/.github/workflows/mypyc-verify.yml
new file mode 100644
index 00000000..e9b2a9c7
--- /dev/null
+++ b/.github/workflows/mypyc-verify.yml
@@ -0,0 +1,40 @@
+name: MYPYC Run
+
+on: [push, pull_request]
+
+jobs:
+  detection_coverage:
+    runs-on: ${{ matrix.os }}
+
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [3.6, 3.7, 3.8, 3.9, "3.10"]
+        os: [ubuntu-latest]
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        pip install -U pip setuptools
+        pip install -r dev-requirements.txt
+        pip uninstall -y charset-normalizer
+    - name: Install the package
+      env:
+        CHARSET_NORMALIZER_USE_MYPYC: '1'
+      run: |
+        python -m build --no-isolation
+        pip install ./dist/*.whl
+    - name: Clone the complete dataset
+      run: |
+        git clone https://github.com/Ousret/char-dataset.git
+    - name: Coverage WITH preemptive
+      run: |
+        python ./bin/coverage.py --coverage 97 --with-preemptive
+    - name: Coverage WITHOUT preemptive
+      run: |
+        python ./bin/coverage.py --coverage 95
diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index fddd9d30..e675061a 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -25,7 +25,8 @@ jobs:
         pip uninstall -y charset-normalizer
     - name: Install the package
       run: |
-        python setup.py install
+        python -m build
+        pip install ./dist/*.whl
     - name: Clone the complete dataset
       run: |
         git clone https://github.com/Ousret/char-dataset.git
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index d9e664c1..2042d90e 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -29,7 +29,8 @@ jobs:
           pip uninstall -y charset-normalizer
       - name: Install the package
         run: |
-          python setup.py install
+          python -m build
+          pip install ./dist/*.whl
       - name: Type checking (Mypy)
         run: |
           mypy charset_normalizer
@@ -51,7 +52,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [ 3.6, 3.7, 3.8, 3.9, "3.10" ]
+        python-version: [ 3.6, 3.7, 3.8, 3.9, "3.10", "3.11-dev" ]
         os: [ ubuntu-latest ]
 
     steps:
@@ -67,7 +68,8 @@ jobs:
           pip uninstall -y charset-normalizer
       - name: Install the package
         run: |
-          python setup.py install
+          python -m build
+          pip install ./dist/*.whl
       - name: Run tests
         run: |
           pytest
@@ -96,7 +98,8 @@ jobs:
           pip uninstall -y charset-normalizer
       - name: Install the package
         run: |
-          python setup.py install
+          python -m build
+          pip install ./dist/*.whl
       - name: Clone the complete dataset
         run: |
           git clone https://github.com/Ousret/char-dataset.git
@@ -136,7 +139,8 @@ jobs:
           pip uninstall -y charset-normalizer
       - name: Install the package
         run: |
-          python setup.py install
+          python -m build
+          pip install ./dist/*.whl
       - name: Clone the complete dataset
         run: |
           git clone https://github.com/Ousret/char-dataset.git
@@ -146,11 +150,92 @@ jobs:
       - name: Integration Tests with Requests
         run: |
           python ./bin/integration.py
+  universal-wheel:
+    runs-on: ubuntu-latest
+    needs:
+      - integration
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.x'
+      - name: Update pip, setuptools, wheel and twine
+        run: |
+          python -m pip install --upgrade pip
+          pip install setuptools wheel twine
+      - name: Build Wheel
+        env:
+          CHARSET_NORMALIZER_USE_MYPYC: '0'
+        run: python -m build
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: dist
+          path: dist
+
+  build-wheels:
+    name: Build wheels on ${{ matrix.os }} ${{ matrix.qemu }}
+    runs-on: ${{ matrix.os }}-latest
+    needs: universal-wheel
+    strategy:
+      matrix:
+        os: [ ubuntu, windows, macos ]
+        qemu: [ '' ]
+        include:
+          # Split ubuntu job for the sake of speed-up
+          - os: ubuntu
+            qemu: aarch64
+          - os: ubuntu
+            qemu: ppc64le
+          - os: ubuntu
+            qemu: s390x
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+      - name: Set up QEMU
+        if: ${{ matrix.qemu }}
+        uses: docker/setup-qemu-action@v2
+        with:
+          platforms: all
+        id: qemu
+      - name: Prepare emulation
+        run: |
+          if [[ -n "${{ matrix.qemu }}" ]]; then
+            # Build emulated architectures only if QEMU is set,
+            # use default "auto" otherwise
+            echo "CIBW_ARCHS_LINUX=${{ matrix.qemu }}" >> $GITHUB_ENV
+          fi
+        shell: bash
+      - name: Setup Python
+        uses: actions/setup-python@v4
+      - name: Update pip, wheel, setuptools, build, twine
+        run: |
+          python -m pip install -U pip wheel setuptools build twine
+      - name: Build wheels
+        uses: pypa/cibuildwheel@2.10.2
+        env:
+          CIBW_BUILD_FRONTEND: "build"
+          CIBW_ARCHS_MACOS: x86_64 arm64 universal2
+          CIBW_ENVIRONMENT: CHARSET_NORMALIZER_USE_MYPYC='1'
+          CIBW_CONFIG_SETTINGS: "--no-isolation"
+          CIBW_BEFORE_BUILD: pip install -r dev-requirements.txt
+          CIBW_TEST_REQUIRES: pytest codecov pytest-cov
+          CIBW_TEST_COMMAND: pytest {package}/tests
+          CIBW_SKIP: pp*
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: dist
+          path: ./wheelhouse/*.whl
+
   deploy:
 
     runs-on: ubuntu-latest
     needs:
-      - integration
+      - build-wheels
 
     steps:
     - uses: actions/checkout@v2
@@ -162,10 +247,17 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install setuptools wheel twine
-    - name: Build and publish
+    - name: Download disctributions
+      uses: actions/download-artifact@v3
+      with:
+        name: dist
+        path: dist
+    - name: Collected dists
+      run: |
+        tree dist
+    - name: Publish
       env:
         TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
       run: |
-        python setup.py sdist bdist_wheel
         twine upload dist/*
diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
index 2e999729..27dc5d5f 100644
--- a/.github/workflows/run-tests.yml
+++ b/.github/workflows/run-tests.yml
@@ -25,7 +25,8 @@ jobs:
         pip uninstall -y charset-normalizer
     - name: Install the package
       run: |
-        python setup.py install
+        python -m build --no-isolation
+        pip install ./dist/*.whl
     - name: Run tests
       run: |
         pytest
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b80e7cd1..dcfd8f76 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,48 @@
 All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
+## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
+
+### Added
+- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
+- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
+- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
+
+### Changed
+- Build with static metadata using 'build' frontend
+- Make the language detection stricter
+
+### Fixed
+- CLI with opt --normalize fail when using full path for files
+- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
+
+### Removed
+- Coherence detector no longer return 'Simple English' instead return 'English'
+- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
+
+## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
+
+### Added
+- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
+
+### Removed
+- Breaking: Method `first()` and `best()` from CharsetMatch
+- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
+
+### Fixed
+- Sphinx warnings when generating the documentation
+
+## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
+
+### Changed
+- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
+
+### Removed
+- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
+- Breaking: Top-level function `normalize`
+- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
+- Support for the backport `unicodedata2`
+
 ## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
 
 ### Deprecated
diff --git a/README.md b/README.md
index d58ede1b..27736830 100644
--- a/README.md
+++ b/README.md
@@ -25,16 +25,16 @@ This project offers you an alternative to **Universal Charset Encoding Detector*
 
 | Feature       | [Chardet](https://github.com/chardet/chardet)       | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
 | ------------- | :-------------: | :------------------: | :------------------: |
-| `Fast`         | ❌<br>          | :heavy_check_mark:<br>             | :heavy_check_mark: <br> |
-| `Universal**`     | ❌            | :heavy_check_mark:                 | ❌ |
-| `Reliable` **without** distinguishable standards | ❌ | :heavy_check_mark: | :heavy_check_mark: |
-| `Reliable` **with** distinguishable standards | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
+| `Fast`         | ❌<br>          | ✅<br>             | ✅ <br> |
+| `Universal**`     | ❌            | ✅                 | ❌ |
+| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
+| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
 | `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
-| `Native Python` | :heavy_check_mark: | :heavy_check_mark: | ❌ |
-| `Detect spoken language` | ❌ | :heavy_check_mark: | N/A |
-| `UnicodeDecodeError Safety` | ❌ | :heavy_check_mark: | ❌ |
+| `Native Python` | ✅ | ✅ | ❌ |
+| `Detect spoken language` | ❌ | ✅ | N/A |
+| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
 | `Whl Size` | 193.6 kB | 39.5 kB | ~200 kB |
-| `Supported Encoding` | 33 | :tada: [93](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings)  | 40
+| `Supported Encoding` | 33 | :tada: [90](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings)  | 40
 
 <p align="center">
 <img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
@@ -53,12 +53,12 @@ This package offer better performance than its counterpart Chardet. Here are som
 | Package       | Accuracy       | Mean per file (ms) | File per sec (est) |
 | ------------- | :-------------: | :------------------: | :------------------: |
 |      [chardet](https://github.com/chardet/chardet)        |     86 %     |     200 ms      |       5 file/sec        |
-| charset-normalizer |    **98 %**     |     **39 ms**      |       26 file/sec    |
+| charset-normalizer |    **98 %**     |     **10 ms**      |       100 file/sec    |
 
 | Package       | 99th percentile       | 95th percentile | 50th percentile |
 | ------------- | :-------------: | :------------------: | :------------------: |
 |      [chardet](https://github.com/chardet/chardet)        |     1200 ms     |     287 ms      |       23 ms        |
-| charset-normalizer |    400 ms     |     200 ms      |       15 ms    |
+| charset-normalizer |    100 ms     |     50 ms      |       5 ms    |
 
 Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
 
@@ -68,9 +68,6 @@ Chardet's performance on larger file (1MB+) are very poor. Expect huge differenc
 > Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
 > (eg. Supported Encoding) Challenge-them if you want.
 
-[cchardet](https://github.com/PyYoshi/cChardet) is a non-native (cpp binding) and unmaintained faster alternative with 
-a better accuracy than chardet but lower than this package. If speed is the most important factor, you should try it.
-
 ## ✨ Installation
 
 Using PyPi for latest stable
@@ -78,11 +75,6 @@ Using PyPi for latest stable
 pip install charset-normalizer -U
 ```
 
-If you want a more up-to-date `unicodedata` than the one available in your Python setup.
-```sh
-pip install charset-normalizer[unicode_backport] -U
-```
-
 ## 🚀 Basic Usage
 
 ### CLI
diff --git a/bin/run_autofix.sh b/bin/run_autofix.sh
index f853cacd..e88f45c6 100755
--- a/bin/run_autofix.sh
+++ b/bin/run_autofix.sh
@@ -7,5 +7,5 @@ fi
 
 set -x
 
-${PREFIX}black --target-version=py35 charset_normalizer
+${PREFIX}black --target-version=py36 charset_normalizer
 ${PREFIX}isort charset_normalizer
diff --git a/bin/run_checks.sh b/bin/run_checks.sh
index 0ae730eb..1e135b35 100755
--- a/bin/run_checks.sh
+++ b/bin/run_checks.sh
@@ -8,7 +8,7 @@ fi
 set -x
 
 ${PREFIX}pytest
-${PREFIX}black --check --diff --target-version=py35 charset_normalizer
+${PREFIX}black --check --diff --target-version=py36 charset_normalizer
 ${PREFIX}flake8 charset_normalizer
 ${PREFIX}mypy charset_normalizer
 ${PREFIX}isort --check --diff charset_normalizer
diff --git a/charset_normalizer/__init__.py b/charset_normalizer/__init__.py
index 2dcaf56f..ebb5da89 100644
--- a/charset_normalizer/__init__.py
+++ b/charset_normalizer/__init__.py
@@ -21,14 +21,8 @@
 """
 import logging
 
-from .api import from_bytes, from_fp, from_path, normalize
-from .legacy import (
-    CharsetDetector,
-    CharsetDoctor,
-    CharsetNormalizerMatch,
-    CharsetNormalizerMatches,
-    detect,
-)
+from .api import from_bytes, from_fp, from_path
+from .legacy import detect
 from .models import CharsetMatch, CharsetMatches
 from .utils import set_logging_handler
 from .version import VERSION, __version__
@@ -37,14 +31,9 @@
     "from_fp",
     "from_path",
     "from_bytes",
-    "normalize",
     "detect",
     "CharsetMatch",
     "CharsetMatches",
-    "CharsetNormalizerMatch",
-    "CharsetNormalizerMatches",
-    "CharsetDetector",
-    "CharsetDoctor",
     "__version__",
     "VERSION",
     "set_logging_handler",
diff --git a/charset_normalizer/api.py b/charset_normalizer/api.py
index b6c37e8b..6c7e8983 100644
--- a/charset_normalizer/api.py
+++ b/charset_normalizer/api.py
@@ -1,7 +1,5 @@
 import logging
-import warnings
 from os import PathLike
-from os.path import basename, splitext
 from typing import Any, BinaryIO, List, Optional, Set
 
 from .cd import (
@@ -41,6 +39,7 @@ def from_bytes(
     cp_exclusion: Optional[List[str]] = None,
     preemptive_behaviour: bool = True,
     explain: bool = False,
+    language_threshold: float = 0.1,
 ) -> CharsetMatches:
     """
     Given a raw bytes sequence, return the best possibles charset usable to render str objects.
@@ -201,6 +200,13 @@ def from_bytes(
                 encoding_iana,
             )
             continue
+        if encoding_iana in {"utf_7"} and not bom_or_sig_available:
+            logger.log(
+                TRACE,
+                "Encoding %s won't be tested as-is because detection is unreliable without BOM/SIG.",
+                encoding_iana,
+            )
+            continue
 
         try:
             is_multi_byte_decoder: bool = is_multi_byte_encoding(encoding_iana)
@@ -297,7 +303,13 @@ def from_bytes(
             ):
                 md_chunks.append(chunk)
 
-                md_ratios.append(mess_ratio(chunk, threshold))
+                md_ratios.append(
+                    mess_ratio(
+                        chunk,
+                        threshold,
+                        explain is True and 1 <= len(cp_isolation) <= 2,
+                    )
+                )
 
                 if md_ratios[-1] >= threshold:
                     early_stop_count += 1
@@ -389,7 +401,9 @@ def from_bytes(
         if encoding_iana != "ascii":
             for chunk in md_chunks:
                 chunk_languages = coherence_ratio(
-                    chunk, 0.1, ",".join(target_languages) if target_languages else None
+                    chunk,
+                    language_threshold,
+                    ",".join(target_languages) if target_languages else None,
                 )
 
                 cd_ratios.append(chunk_languages)
@@ -491,6 +505,7 @@ def from_fp(
     cp_exclusion: Optional[List[str]] = None,
     preemptive_behaviour: bool = True,
     explain: bool = False,
+    language_threshold: float = 0.1,
 ) -> CharsetMatches:
     """
     Same thing than the function from_bytes but using a file pointer that is already ready.
@@ -505,6 +520,7 @@ def from_fp(
         cp_exclusion,
         preemptive_behaviour,
         explain,
+        language_threshold,
     )
 
 
@@ -517,6 +533,7 @@ def from_path(
     cp_exclusion: Optional[List[str]] = None,
     preemptive_behaviour: bool = True,
     explain: bool = False,
+    language_threshold: float = 0.1,
 ) -> CharsetMatches:
     """
     Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
@@ -532,53 +549,5 @@ def from_path(
             cp_exclusion,
             preemptive_behaviour,
             explain,
+            language_threshold,
         )
-
-
-def normalize(
-    path: "PathLike[Any]",
-    steps: int = 5,
-    chunk_size: int = 512,
-    threshold: float = 0.20,
-    cp_isolation: Optional[List[str]] = None,
-    cp_exclusion: Optional[List[str]] = None,
-    preemptive_behaviour: bool = True,
-) -> CharsetMatch:
-    """
-    Take a (text-based) file path and try to create another file next to it, this time using UTF-8.
-    """
-    warnings.warn(
-        "normalize is deprecated and will be removed in 3.0",
-        DeprecationWarning,
-    )
-
-    results = from_path(
-        path,
-        steps,
-        chunk_size,
-        threshold,
-        cp_isolation,
-        cp_exclusion,
-        preemptive_behaviour,
-    )
-
-    filename = basename(path)
-    target_extensions = list(splitext(filename))
-
-    if len(results) == 0:
-        raise IOError(
-            'Unable to normalize "{}", no encoding charset seems to fit.'.format(
-                filename
-            )
-        )
-
-    result = results.best()
-
-    target_extensions[0] += "-" + result.encoding  # type: ignore
-
-    with open(
-        "{}".format(str(path).replace(filename, "".join(target_extensions))), "wb"
-    ) as fp:
-        fp.write(result.output())  # type: ignore
-
-    return result  # type: ignore
diff --git a/charset_normalizer/assets/__init__.py b/charset_normalizer/assets/__init__.py
index 3c33ba30..9075930d 100644
--- a/charset_normalizer/assets/__init__.py
+++ b/charset_normalizer/assets/__init__.py
@@ -1,6 +1,8 @@
 # -*- coding: utf-8 -*-
 from typing import Dict, List
 
+# Language label that contain the em dash "—"
+# character are to be considered alternative seq to origin
 FREQUENCIES: Dict[str, List[str]] = {
     "English": [
         "e",
@@ -30,6 +32,34 @@
         "z",
         "q",
     ],
+    "English—": [
+        "e",
+        "a",
+        "t",
+        "i",
+        "o",
+        "n",
+        "s",
+        "r",
+        "h",
+        "l",
+        "d",
+        "c",
+        "m",
+        "u",
+        "f",
+        "p",
+        "g",
+        "w",
+        "b",
+        "y",
+        "v",
+        "k",
+        "j",
+        "x",
+        "z",
+        "q",
+    ],
     "German": [
         "e",
         "n",
@@ -226,33 +256,303 @@
         "ж",
         "ц",
     ],
+    # Jap-Kanji
     "Japanese": [
+        "人",
+        "一",
+        "大",
+        "亅",
+        "丁",
+        "丨",
+        "竹",
+        "笑",
+        "口",
+        "日",
+        "今",
+        "二",
+        "彳",
+        "行",
+        "十",
+        "土",
+        "丶",
+        "寸",
+        "寺",
+        "時",
+        "乙",
+        "丿",
+        "乂",
+        "气",
+        "気",
+        "冂",
+        "巾",
+        "亠",
+        "市",
+        "目",
+        "儿",
+        "見",
+        "八",
+        "小",
+        "凵",
+        "県",
+        "月",
+        "彐",
+        "門",
+        "間",
+        "木",
+        "東",
+        "山",
+        "出",
+        "本",
+        "中",
+        "刀",
+        "分",
+        "耳",
+        "又",
+        "取",
+        "最",
+        "言",
+        "田",
+        "心",
+        "思",
+        "刂",
+        "前",
+        "京",
+        "尹",
+        "事",
+        "生",
+        "厶",
+        "云",
+        "会",
+        "未",
+        "来",
+        "白",
+        "冫",
+        "楽",
+        "灬",
+        "馬",
+        "尸",
+        "尺",
+        "駅",
+        "明",
+        "耂",
+        "者",
+        "了",
+        "阝",
+        "都",
+        "高",
+        "卜",
+        "占",
+        "厂",
+        "广",
+        "店",
+        "子",
+        "申",
+        "奄",
+        "亻",
+        "俺",
+        "上",
+        "方",
+        "冖",
+        "学",
+        "衣",
+        "艮",
+        "食",
+        "自",
+    ],
+    # Jap-Katakana
+    "Japanese—": [
+        "ー",
+        "ン",
+        "ス",
+        "・",
+        "ル",
+        "ト",
+        "リ",
+        "イ",
+        "ア",
+        "ラ",
+        "ッ",
+        "ク",
+        "ド",
+        "シ",
+        "レ",
+        "ジ",
+        "タ",
+        "フ",
+        "ロ",
+        "カ",
+        "テ",
+        "マ",
+        "ィ",
+        "グ",
+        "バ",
+        "ム",
+        "プ",
+        "オ",
+        "コ",
+        "デ",
+        "ニ",
+        "ウ",
+        "メ",
+        "サ",
+        "ビ",
+        "ナ",
+        "ブ",
+        "ャ",
+        "エ",
+        "ュ",
+        "チ",
+        "キ",
+        "ズ",
+        "ダ",
+        "パ",
+        "ミ",
+        "ェ",
+        "ョ",
+        "ハ",
+        "セ",
+        "ベ",
+        "ガ",
+        "モ",
+        "ツ",
+        "ネ",
+        "ボ",
+        "ソ",
+        "ノ",
+        "ァ",
+        "ヴ",
+        "ワ",
+        "ポ",
+        "ペ",
+        "ピ",
+        "ケ",
+        "ゴ",
+        "ギ",
+        "ザ",
+        "ホ",
+        "ゲ",
+        "ォ",
+        "ヤ",
+        "ヒ",
+        "ユ",
+        "ヨ",
+        "ヘ",
+        "ゼ",
+        "ヌ",
+        "ゥ",
+        "ゾ",
+        "ヶ",
+        "ヂ",
+        "ヲ",
+        "ヅ",
+        "ヵ",
+        "ヱ",
+        "ヰ",
+        "ヮ",
+        "ヽ",
+        "゠",
+        "ヾ",
+        "ヷ",
+        "ヿ",
+        "ヸ",
+        "ヹ",
+        "ヺ",
+    ],
+    # Jap-Hiragana
+    "Japanese——": [
         "の",
         "に",
         "る",
         "た",
-        "は",
-        "ー",
         "と",
+        "は",
         "し",
+        "い",
         "を",
         "で",
         "て",
         "が",
-        "い",
-        "ン",
-        "れ",
         "な",
-        "年",
-        "ス",
-        "っ",
-        "ル",
+        "れ",
         "か",
         "ら",
-        "あ",
         "さ",
-        "も",
+        "っ",
         "り",
+        "す",
+        "あ",
+        "も",
+        "こ",
+        "ま",
+        "う",
+        "く",
+        "よ",
+        "き",
+        "ん",
+        "め",
+        "お",
+        "け",
+        "そ",
+        "つ",
+        "だ",
+        "や",
+        "え",
+        "ど",
+        "わ",
+        "ち",
+        "み",
+        "せ",
+        "じ",
+        "ば",
+        "へ",
+        "び",
+        "ず",
+        "ろ",
+        "ほ",
+        "げ",
+        "む",
+        "べ",
+        "ひ",
+        "ょ",
+        "ゆ",
+        "ぶ",
+        "ご",
+        "ゃ",
+        "ね",
+        "ふ",
+        "ぐ",
+        "ぎ",
+        "ぼ",
+        "ゅ",
+        "づ",
+        "ざ",
+        "ぞ",
+        "ぬ",
+        "ぜ",
+        "ぱ",
+        "ぽ",
+        "ぷ",
+        "ぴ",
+        "ぃ",
+        "ぁ",
+        "ぇ",
+        "ぺ",
+        "ゞ",
+        "ぢ",
+        "ぉ",
+        "ぅ",
+        "ゐ",
+        "ゝ",
+        "ゑ",
+        "゛",
+        "゜",
+        "ゎ",
+        "ゔ",
+        "゚",
+        "ゟ",
+        "゙",
+        "ゕ",
+        "ゖ",
     ],
     "Portuguese": [
         "a",
@@ -340,6 +640,77 @@
         "就",
         "出",
         "会",
+        "可",
+        "也",
+        "你",
+        "对",
+        "生",
+        "能",
+        "而",
+        "子",
+        "那",
+        "得",
+        "于",
+        "着",
+        "下",
+        "自",
+        "之",
+        "年",
+        "过",
+        "发",
+        "后",
+        "作",
+        "里",
+        "用",
+        "道",
+        "行",
+        "所",
+        "然",
+        "家",
+        "种",
+        "事",
+        "成",
+        "方",
+        "多",
+        "经",
+        "么",
+        "去",
+        "法",
+        "学",
+        "如",
+        "都",
+        "同",
+        "现",
+        "当",
+        "没",
+        "动",
+        "面",
+        "起",
+        "看",
+        "定",
+        "天",
+        "分",
+        "还",
+        "进",
+        "好",
+        "小",
+        "部",
+        "其",
+        "些",
+        "主",
+        "样",
+        "理",
+        "心",
+        "她",
+        "本",
+        "前",
+        "开",
+        "但",
+        "因",
+        "只",
+        "从",
+        "想",
+        "实",
     ],
     "Ukrainian": [
         "о",
@@ -956,34 +1327,6 @@
         "ö",
         "y",
     ],
-    "Simple English": [
-        "e",
-        "a",
-        "t",
-        "i",
-        "o",
-        "n",
-        "s",
-        "r",
-        "h",
-        "l",
-        "d",
-        "c",
-        "m",
-        "u",
-        "f",
-        "p",
-        "g",
-        "w",
-        "b",
-        "y",
-        "v",
-        "k",
-        "j",
-        "x",
-        "z",
-        "q",
-    ],
     "Thai": [
         "า",
         "น",
@@ -1066,31 +1409,6 @@
         "ஒ",
         "ஸ",
     ],
-    "Classical Chinese": [
-        "之",
-        "年",
-        "為",
-        "也",
-        "以",
-        "一",
-        "人",
-        "其",
-        "者",
-        "國",
-        "有",
-        "二",
-        "十",
-        "於",
-        "曰",
-        "三",
-        "不",
-        "大",
-        "而",
-        "子",
-        "中",
-        "五",
-        "四",
-    ],
     "Kazakh": [
         "а",
         "ы",
diff --git a/charset_normalizer/cd.py b/charset_normalizer/cd.py
index ee4b7424..ae2813fb 100644
--- a/charset_normalizer/cd.py
+++ b/charset_normalizer/cd.py
@@ -105,7 +105,7 @@ def mb_encoding_languages(iana_name: str) -> List[str]:
     ):
         return ["Japanese"]
     if iana_name.startswith("gb") or iana_name in ZH_NAMES:
-        return ["Chinese", "Classical Chinese"]
+        return ["Chinese"]
     if iana_name.startswith("iso2022_kr") or iana_name in KO_NAMES:
         return ["Korean"]
 
@@ -179,22 +179,45 @@ def characters_popularity_compare(
     character_approved_count: int = 0
     FREQUENCIES_language_set = set(FREQUENCIES[language])
 
-    for character in ordered_characters:
+    ordered_characters_count: int = len(ordered_characters)
+    target_language_characters_count: int = len(FREQUENCIES[language])
+
+    large_alphabet: bool = target_language_characters_count > 26
+
+    for character, character_rank in zip(
+        ordered_characters, range(0, ordered_characters_count)
+    ):
         if character not in FREQUENCIES_language_set:
             continue
 
+        character_rank_in_language: int = FREQUENCIES[language].index(character)
+        expected_projection_ratio: float = (
+            target_language_characters_count / ordered_characters_count
+        )
+        character_rank_projection: int = int(character_rank * expected_projection_ratio)
+
+        if (
+            large_alphabet is False
+            and abs(character_rank_projection - character_rank_in_language) > 4
+        ):
+            continue
+
+        if (
+            large_alphabet is True
+            and abs(character_rank_projection - character_rank_in_language)
+            < target_language_characters_count / 3
+        ):
+            character_approved_count += 1
+            continue
+
         characters_before_source: List[str] = FREQUENCIES[language][
-            0 : FREQUENCIES[language].index(character)
+            0:character_rank_in_language
         ]
         characters_after_source: List[str] = FREQUENCIES[language][
-            FREQUENCIES[language].index(character) :
-        ]
-        characters_before: List[str] = ordered_characters[
-            0 : ordered_characters.index(character)
-        ]
-        characters_after: List[str] = ordered_characters[
-            ordered_characters.index(character) :
+            character_rank_in_language:
         ]
+        characters_before: List[str] = ordered_characters[0:character_rank]
+        characters_after: List[str] = ordered_characters[character_rank:]
 
         before_match_count: int = len(
             set(characters_before) & set(characters_before_source)
@@ -289,6 +312,33 @@ def merge_coherence_ratios(results: List[CoherenceMatches]) -> CoherenceMatches:
     return sorted(merge, key=lambda x: x[1], reverse=True)
 
 
+def filter_alt_coherence_matches(results: CoherenceMatches) -> CoherenceMatches:
+    """
+    We shall NOT return "English—" in CoherenceMatches because it is an alternative
+    of "English". This function only keeps the best match and remove the em-dash in it.
+    """
+    index_results: Dict[str, List[float]] = dict()
+
+    for result in results:
+        language, ratio = result
+        no_em_name: str = language.replace("—", "")
+
+        if no_em_name not in index_results:
+            index_results[no_em_name] = []
+
+        index_results[no_em_name].append(ratio)
+
+    if any(len(index_results[e]) > 1 for e in index_results):
+        filtered_results: CoherenceMatches = []
+
+        for language in index_results:
+            filtered_results.append((language, max(index_results[language])))
+
+        return filtered_results
+
+    return results
+
+
 @lru_cache(maxsize=2048)
 def coherence_ratio(
     decoded_sequence: str, threshold: float = 0.1, lg_inclusion: Optional[str] = None
@@ -336,4 +386,6 @@ def coherence_ratio(
             if sufficient_match_count >= 3:
                 break
 
-    return sorted(results, key=lambda x: x[1], reverse=True)
+    return sorted(
+        filter_alt_coherence_matches(results), key=lambda x: x[1], reverse=True
+    )
diff --git a/charset_normalizer/cli/normalizer.py b/charset_normalizer/cli/normalizer.py
index b8b652a5..ad26b4d0 100644
--- a/charset_normalizer/cli/normalizer.py
+++ b/charset_normalizer/cli/normalizer.py
@@ -1,15 +1,12 @@
 import argparse
 import sys
 from json import dumps
-from os.path import abspath
+from os.path import abspath, basename, dirname, join, realpath
 from platform import python_version
 from typing import List, Optional
+from unicodedata import unidata_version
 
-try:
-    from unicodedata2 import unidata_version
-except ImportError:
-    from unicodedata import unidata_version
-
+import charset_normalizer.md as md_module
 from charset_normalizer import from_fp
 from charset_normalizer.models import CliDetectionResult
 from charset_normalizer.version import __version__
@@ -124,8 +121,11 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
     parser.add_argument(
         "--version",
         action="version",
-        version="Charset-Normalizer {} - Python {} - Unicode {}".format(
-            __version__, python_version(), unidata_version
+        version="Charset-Normalizer {} - Python {} - Unicode {} - SpeedUp {}".format(
+            __version__,
+            python_version(),
+            unidata_version,
+            "OFF" if md_module.__file__.lower().endswith(".py") else "ON",
         ),
         help="Show version information and exit.",
     )
@@ -234,7 +234,10 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
                         my_file.close()
                     continue
 
-                o_: List[str] = my_file.name.split(".")
+                dir_path = dirname(realpath(my_file.name))
+                file_name = basename(realpath(my_file.name))
+
+                o_: List[str] = file_name.split(".")
 
                 if args.replace is False:
                     o_.insert(-1, best_guess.encoding)
@@ -255,7 +258,7 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
                     continue
 
                 try:
-                    x_[0].unicode_path = abspath("./{}".format(".".join(o_)))
+                    x_[0].unicode_path = join(dir_path, ".".join(o_))
 
                     with open(x_[0].unicode_path, "w", encoding="utf-8") as fp:
                         fp.write(str(best_guess))
diff --git a/charset_normalizer/constant.py b/charset_normalizer/constant.py
index e679f79c..3188108d 100644
--- a/charset_normalizer/constant.py
+++ b/charset_normalizer/constant.py
@@ -489,8 +489,6 @@
 KO_NAMES: Set[str] = {"johab", "cp949", "euc_kr"}
 ZH_NAMES: Set[str] = {"big5", "cp950", "big5hkscs", "hz"}
 
-NOT_PRINTABLE_PATTERN = re_compile(r"[0-9\W\n\r\t]+")
-
 LANGUAGE_SUPPORTED_COUNT: int = len(FREQUENCIES)
 
 # Logging LEVEL below DEBUG
diff --git a/charset_normalizer/legacy.py b/charset_normalizer/legacy.py
index cdebe2b8..b266d176 100644
--- a/charset_normalizer/legacy.py
+++ b/charset_normalizer/legacy.py
@@ -1,9 +1,7 @@
-import warnings
 from typing import Dict, Optional, Union
 
-from .api import from_bytes, from_fp, from_path, normalize
+from .api import from_bytes
 from .constant import CHARDET_CORRESPONDENCE
-from .models import CharsetMatch, CharsetMatches
 
 
 def detect(byte_str: bytes) -> Dict[str, Optional[Union[str, float]]]:
@@ -43,53 +41,3 @@ def detect(byte_str: bytes) -> Dict[str, Optional[Union[str, float]]]:
         "language": language,
         "confidence": confidence,
     }
-
-
-class CharsetNormalizerMatch(CharsetMatch):
-    pass
-
-
-class CharsetNormalizerMatches(CharsetMatches):
-    @staticmethod
-    def from_fp(*args, **kwargs):  # type: ignore
-        warnings.warn(  # pragma: nocover
-            "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
-            "and scheduled to be removed in 3.0",
-            DeprecationWarning,
-        )
-        return from_fp(*args, **kwargs)  # pragma: nocover
-
-    @staticmethod
-    def from_bytes(*args, **kwargs):  # type: ignore
-        warnings.warn(  # pragma: nocover
-            "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
-            "and scheduled to be removed in 3.0",
-            DeprecationWarning,
-        )
-        return from_bytes(*args, **kwargs)  # pragma: nocover
-
-    @staticmethod
-    def from_path(*args, **kwargs):  # type: ignore
-        warnings.warn(  # pragma: nocover
-            "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
-            "and scheduled to be removed in 3.0",
-            DeprecationWarning,
-        )
-        return from_path(*args, **kwargs)  # pragma: nocover
-
-    @staticmethod
-    def normalize(*args, **kwargs):  # type: ignore
-        warnings.warn(  # pragma: nocover
-            "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
-            "and scheduled to be removed in 3.0",
-            DeprecationWarning,
-        )
-        return normalize(*args, **kwargs)  # pragma: nocover
-
-
-class CharsetDetector(CharsetNormalizerMatches):
-    pass
-
-
-class CharsetDoctor(CharsetNormalizerMatches):
-    pass
diff --git a/charset_normalizer/md.py b/charset_normalizer/md.py
index 31808af8..56e9321a 100644
--- a/charset_normalizer/md.py
+++ b/charset_normalizer/md.py
@@ -1,7 +1,12 @@
 from functools import lru_cache
+from logging import getLogger
 from typing import List, Optional
 
-from .constant import COMMON_SAFE_ASCII_CHARACTERS, UNICODE_SECONDARY_RANGE_KEYWORD
+from .constant import (
+    COMMON_SAFE_ASCII_CHARACTERS,
+    TRACE,
+    UNICODE_SECONDARY_RANGE_KEYWORD,
+)
 from .utils import (
     is_accentuated,
     is_ascii,
@@ -123,7 +128,7 @@ def reset(self) -> None:  # pragma: no cover
 
     @property
     def ratio(self) -> float:
-        if self._character_count == 0:
+        if self._character_count == 0 or self._character_count < 8:
             return 0.0
         ratio_of_accentuation: float = self._accentuated_count / self._character_count
         return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0
@@ -547,7 +552,20 @@ def mess_ratio(
                 break
 
     if debug:
+        logger = getLogger("charset_normalizer")
+
+        logger.log(
+            TRACE,
+            "Mess-detector extended-analysis start. "
+            f"intermediary_mean_mess_ratio_calc={intermediary_mean_mess_ratio_calc} mean_mess_ratio={mean_mess_ratio} "
+            f"maximum_threshold={maximum_threshold}",
+        )
+
+        if len(decoded_sequence) > 16:
+            logger.log(TRACE, f"Starting with: {decoded_sequence[:16]}")
+            logger.log(TRACE, f"Ending with: {decoded_sequence[-16::]}")
+
         for dt in detectors:  # pragma: nocover
-            print(dt.__class__, dt.ratio)
+            logger.log(TRACE, f"{dt.__class__}: {dt.ratio}")
 
     return round(mean_mess_ratio, 3)
diff --git a/charset_normalizer/models.py b/charset_normalizer/models.py
index bc16bfb6..7f8ca389 100644
--- a/charset_normalizer/models.py
+++ b/charset_normalizer/models.py
@@ -1,22 +1,9 @@
-import warnings
-from collections import Counter
 from encodings.aliases import aliases
 from hashlib import sha256
 from json import dumps
-from re import sub
-from typing import (
-    Any,
-    Counter as TypeCounter,
-    Dict,
-    Iterator,
-    List,
-    Optional,
-    Tuple,
-    Union,
-)
-
-from .constant import NOT_PRINTABLE_PATTERN, TOO_BIG_SEQUENCE
-from .md import mess_ratio
+from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
+
+from .constant import TOO_BIG_SEQUENCE
 from .utils import iana_name, is_multi_byte_encoding, unicode_range
 
 
@@ -78,45 +65,6 @@ def __lt__(self, other: object) -> bool:
     def multi_byte_usage(self) -> float:
         return 1.0 - len(str(self)) / len(self.raw)
 
-    @property
-    def chaos_secondary_pass(self) -> float:
-        """
-        Check once again chaos in decoded text, except this time, with full content.
-        Use with caution, this can be very slow.
-        Notice: Will be removed in 3.0
-        """
-        warnings.warn(
-            "chaos_secondary_pass is deprecated and will be removed in 3.0",
-            DeprecationWarning,
-        )
-        return mess_ratio(str(self), 1.0)
-
-    @property
-    def coherence_non_latin(self) -> float:
-        """
-        Coherence ratio on the first non-latin language detected if ANY.
-        Notice: Will be removed in 3.0
-        """
-        warnings.warn(
-            "coherence_non_latin is deprecated and will be removed in 3.0",
-            DeprecationWarning,
-        )
-        return 0.0
-
-    @property
-    def w_counter(self) -> TypeCounter[str]:
-        """
-        Word counter instance on decoded text.
-        Notice: Will be removed in 3.0
-        """
-        warnings.warn(
-            "w_counter is deprecated and will be removed in 3.0", DeprecationWarning
-        )
-
-        string_printable_only = sub(NOT_PRINTABLE_PATTERN, " ", str(self).lower())
-
-        return Counter(string_printable_only.split())
-
     def __str__(self) -> str:
         # Lazy Str Loading
         if self._string is None:
@@ -252,18 +200,6 @@ def could_be_from_charset(self) -> List[str]:
         """
         return [self._encoding] + [m.encoding for m in self._leaves]
 
-    def first(self) -> "CharsetMatch":
-        """
-        Kept for BC reasons. Will be removed in 3.0.
-        """
-        return self
-
-    def best(self) -> "CharsetMatch":
-        """
-        Kept for BC reasons. Will be removed in 3.0.
-        """
-        return self
-
     def output(self, encoding: str = "utf_8") -> bytes:
         """
         Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
diff --git a/charset_normalizer/utils.py b/charset_normalizer/utils.py
index 859f212b..425d8365 100644
--- a/charset_normalizer/utils.py
+++ b/charset_normalizer/utils.py
@@ -1,12 +1,6 @@
-try:
-    # WARNING: unicodedata2 support is going to be removed in 3.0
-    # Python is quickly catching up.
-    import unicodedata2 as unicodedata
-except ImportError:
-    import unicodedata  # type: ignore[no-redef]
-
 import importlib
 import logging
+import unicodedata
 from codecs import IncrementalDecoder
 from encodings.aliases import aliases
 from functools import lru_cache
diff --git a/charset_normalizer/version.py b/charset_normalizer/version.py
index 64c0dbde..25bf3bcf 100644
--- a/charset_normalizer/version.py
+++ b/charset_normalizer/version.py
@@ -2,5 +2,5 @@
 Expose version
 """
 
-__version__ = "2.1.1"
+__version__ = "3.0.0rc1"
 VERSION = __version__.split(".")
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 8e77fe94..91e06b88 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,10 +1,24 @@
-pytest
-pytest-cov
-codecov
-chardet>=5.0,<5.1
-Flask>=2.0,<3.0
-requests>=2.26,<3.0
-black==22.8.0
-flake8==5.0.4
-mypy==0.971
-isort
+flake8==5.0.4
+chardet==5.0.0
+isort==5.10.1
+codecov==2.1.12
+pytest-cov==4.0.0
+build==0.8.0
+wheel==0.37.1
+
+# The vast majority of project dropped Python 3.6
+# This is to ensure build are reproducible >=3.6
+black==22.8.0; python_version < "3.7"
+black==22.10.0; python_version >= "3.7"
+
+mypy==0.982; python_version >= "3.7"
+mypy==0.971; python_version < "3.7"
+
+Flask==2.2.2; python_version >= "3.7"
+Flask==2.0.3; python_version < "3.7"
+
+pytest==7.0.0; python_version < "3.7"
+pytest==7.1.3; python_version >= "3.7"
+
+requests==2.27.1; python_version < "3.7"
+requests==2.28.1; python_version >= "3.7"
diff --git a/docs/api.rst b/docs/api.rst
index 47a985e5..48b74951 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -14,11 +14,9 @@ Those functions are publicly exposed and are protected through our BC guarantee.
 .. autofunction:: from_fp
 .. autofunction:: from_path
 
-.. autofunction:: normalize
-
-.. autoclass:: charset_normalizer.CharsetMatches
+.. autoclass:: charset_normalizer.models.CharsetMatches
     :inherited-members:
-.. autoclass:: charset_normalizer.CharsetMatch
+.. autoclass:: charset_normalizer.models.CharsetMatch
     :inherited-members:
 
 .. autofunction:: detect
@@ -99,3 +97,8 @@ Some reusable functions used across the project. We do not guarantee the BC in t
 .. autofunction:: charset_normalizer.utils.range_scan
 
 .. autofunction:: charset_normalizer.utils.is_cp_similar
+
+
+.. class:: os.PathLike
+
+   Used as a generic way to accept AnyStr for paths.
diff --git a/docs/community/speedup.rst b/docs/community/speedup.rst
new file mode 100644
index 00000000..ea45b297
--- /dev/null
+++ b/docs/community/speedup.rst
@@ -0,0 +1,43 @@
+Optional speedup extension
+===========================
+
+Why?
+-------
+
+charset-normalizer will always remain pure Python, meaning that a environment without any build-capabilities will
+run this program without any additional requirements.
+
+Nonetheless, starting from the version 3.0 we introduce and publish some platform specific wheels including a
+pre-build extension.
+
+Most of the time is spent in the module `md.py` so we decided to "compile it" using Mypyc.
+
+(1) It does not require to have a separate code base
+(2) Our project code base is rather simple and lightweight
+(3) Mypyc is robust enough today
+(4) Four times faster!
+
+How?
+-------
+
+If your platform and/or architecture is not served by this swift optimization you may compile it easily yourself.
+Following those instructions (provided you have the necessary toolchain installed):
+
+  ::
+
+    export CHARSET_NORMALIZER_USE_MYPYC=1
+    pip install mypy build wheel
+    pip install charset-normalizer --no-binary :all:
+
+
+How not to?
+-----------
+
+You may install charset-normalizer without any specific (pre-built wheel) by directly using the universal wheel
+(most likely hosted on PyPi or any valid mirror you use)
+
+  ::
+
+    pip install charset-normalizer --no-binary :all:
+
+Directly.
diff --git a/docs/community/why_migrate.rst b/docs/community/why_migrate.rst
index 717fc3b5..1909c770 100644
--- a/docs/community/why_migrate.rst
+++ b/docs/community/why_migrate.rst
@@ -4,13 +4,13 @@ Why should I migrate to Charset-Normalizer?
 There is so many reason to migrate your current project. Here are some of them:
 
 - Remove ANY license ambiguity/restriction for projects bundling Chardet (even indirectly).
-- X5 faster than Chardet in average and X3 faster in 99% of the cases AND support 3 times more encoding.
+- X10 faster than Chardet in average and X6 faster in 99% of the cases AND support 3 times more encoding.
 - Never return a encoding if not suited for the given decoder. Eg. Never get UnicodeDecodeError!
 - Actively maintained, open to contributors.
 - Have the backward compatible function ``detect`` that come from Chardet.
 - Truly detect the language used in the text.
 - It is, for the first time, really universal! As there is no specific probe per charset.
-- The package size is X4 lower than Chardet's (5.0)!
+- The package size is X2~X4 lower than Chardet's (5.0)! (Depends on your arch)
 - Propose much more options/public kwargs to tweak the detection as you sees fit!
 - Using static typing to ease your development.
 - Detect Unicode content better than Chardet or cChardet does.
diff --git a/docs/conf.py b/docs/conf.py
index 5cfe028b..3e675d42 100755
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -81,7 +81,7 @@
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
-language = None
+language = "en"
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
@@ -113,7 +113,7 @@
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = []
 
 
 # -- Options for HTMLHelp output ------------------------------------------
diff --git a/docs/index.rst b/docs/index.rst
index 2398a7f0..05d5f98a 100755
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -13,7 +13,6 @@ It aims to be as generic as possible.
 
 .. image:: https://repository-images.githubusercontent.com/200259335/d3da9600-dedc-11e9-83e8-081f597505df
    :width: 500px
-   :scale: 100 %
    :alt: CLI Charset Normalizer
    :align: right
 
@@ -72,6 +71,7 @@ Community Guide
 .. toctree::
     :maxdepth: 2
 
+    community/speedup
     community/faq
     community/why_migrate
 
diff --git a/docs/user/advanced_search.rst b/docs/user/advanced_search.rst
index b4441e58..a269cd10 100644
--- a/docs/user/advanced_search.rst
+++ b/docs/user/advanced_search.rst
@@ -18,7 +18,8 @@ As follow ::
         cp_isolation=None,  # Finite list of encoding to use when searching for a match
         cp_exclusion=None,  # Finite list of encoding to avoid when searching for a match
         preemptive_behaviour=True,  # Determine if we should look into my_byte_str (ASCII-Mode) for pre-defined encoding
-        explain=False  # Print on screen what is happening when searching for a match
+        explain=False,  # Print on screen what is happening when searching for a match
+        language_threshold=0.1  # Minimum coherence ratio / language ratio match accepted
     )
 
 
diff --git a/docs/user/support.rst b/docs/user/support.rst
index 8b624933..0dbf06b9 100644
--- a/docs/user/support.rst
+++ b/docs/user/support.rst
@@ -92,13 +92,10 @@ mac_iceland      maciceland
 mac_latin2       maccentraleurope, maclatin2
 mac_roman        macintosh, macroman
 mac_turkish      macturkish
-mbcs             ansi, dbcs
 ptcp154          csptcp154, pt154, cp154, cyrillic_asian
-rot_13           rot13
 shift_jis        csshiftjis, shiftjis, sjis, s_jis, x_mac_japanese
 shift_jis_2004   shiftjis2004, sjis_2004, s_jis_2004
 shift_jisx0213   shiftjisx0213, sjisx0213, s_jisx0213
-tactis           tis260
 tis_620          tis620, tis_620_0, tis_620_2529_0, tis_620_2529_1, iso_ir_166
 utf_16           u16, utf16
 utf_16_be        unicodebigunmarked, utf_16be
@@ -107,9 +104,11 @@ utf_32           u32, utf32
 utf_32_be        utf_32be
 utf_32_le        utf_32le
 utf_8            u8, utf, utf8, utf8_ucs2, utf8_ucs4 (+utf_8_sig)
-utf_7            u7, unicode-1-1-utf-7
+utf_7*            u7, unicode-1-1-utf-7
 ===============  ===============================================================================================================================
 
+*: Only if a SIG/mark is found.
+
 -------------------
 Supported Languages
 -------------------
diff --git a/setup.cfg b/setup.cfg
index bb4f9c50..8000f5cd 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,3 +1,59 @@
+[metadata]
+name = charset-normalizer
+description = The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
+long_description = file: README.md, CHANGELOG.md, LICENSE
+long_description_content_type = text/markdown
+keywords = encoding, charset, charset-detector, detector, normalization, unicode, chardet, detect
+url = https://github.com/Ousret/charset_normalizer
+license = MIT
+author_email = ahmed.tahri@cloudnursery.dev
+author = Ahmed TAHRI
+python_requires = >=3.6.0
+project_urls =
+    Bug Reports = https://github.com/Ousret/charset_normalizer/issues
+    Documentation = https://charset-normalizer.readthedocs.io/en/latest
+classifiers =
+    Development Status :: 5 - Production/Stable
+    License :: OSI Approved :: MIT License
+    Intended Audience :: Developers
+    Topic :: Software Development :: Libraries :: Python Modules
+    Operating System :: OS Independent
+    Programming Language :: Python
+    Programming Language :: Python :: 3
+    Programming Language :: Python :: 3.6
+    Programming Language :: Python :: 3.7
+    Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
+    Programming Language :: Python :: 3.10
+    Programming Language :: Python :: 3.11
+    Programming Language :: Python :: Implementation :: PyPy
+    Topic :: Text Processing :: Linguistic
+    Topic :: Utilities
+    Typing :: Typed
+
+[options.packages.find]
+exclude =
+    tests
+    *.tests
+    *.tests.*
+    tests.*
+    docs*
+    data*
+
+[options.extras_require]
+unicode_backport =
+
+[options.entry_points]
+console_scripts =
+    normalizer = charset_normalizer.cli.normalizer:cli_detect
+
+[options]
+packages = find:
+include_package_data = True
+
+[options.package_data]
+charset_normalizer = py.typed
+
 [tool:pytest]
 addopts = --cov=charset_normalizer --cov-report=term-missing -rxXs
 
@@ -11,4 +67,4 @@ ignore_missing_imports = True
 
 [tool:isort]
 profile = black
-combine_as_imports = True
\ No newline at end of file
+combine_as_imports = True
diff --git a/setup.py b/setup.py
index 298d12be..7c64a695 100644
--- a/setup.py
+++ b/setup.py
@@ -1,11 +1,11 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-import io
 import os
+import sys
 from re import search
 
-from setuptools import find_packages, setup
+from setuptools import setup
 
 
 def get_version():
@@ -14,73 +14,25 @@ def get_version():
                       version_file.read()).group('version')
 
 
-# Package meta-data.
-NAME = 'charset-normalizer'
-DESCRIPTION = 'The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.'
-URL = 'https://github.com/ousret/charset_normalizer'
-EMAIL = 'ahmed.tahri@cloudnursery.dev'
-AUTHOR = 'Ahmed TAHRI @Ousret'
-REQUIRES_PYTHON = '>=3.6.0'
-VERSION = get_version()
+USE_MYPYC = False
 
-REQUIRED = []
+if len(sys.argv) > 1 and sys.argv[1] == "--use-mypyc":
+    sys.argv.pop(1)
+    USE_MYPYC = True
+if os.getenv("CHARSET_NORMALIZER_USE_MYPYC", None) == "1":
+    USE_MYPYC = True
 
-EXTRAS = {
-    'unicode_backport': ['unicodedata2']
-}
+if USE_MYPYC:
+    from mypyc.build import mypycify
 
-here = os.path.abspath(os.path.dirname(__file__))
-
-try:
-    with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
-        long_description = '\n' + f.read()
-except FileNotFoundError:
-    long_description = DESCRIPTION
+    MYPYC_MODULES = mypycify([
+        "charset_normalizer/md.py"
+    ])
+else:
+    MYPYC_MODULES = None
 
 setup(
-    name=NAME,
-    version=VERSION,
-    description=DESCRIPTION,
-    long_description=long_description.replace(':heavy_check_mark:', '✅'),
-    long_description_content_type='text/markdown',
-    author=AUTHOR,
-    author_email=EMAIL,
-    python_requires=REQUIRES_PYTHON,
-    url=URL,
-    keywords=['encoding', 'i18n', 'txt', 'text', 'charset', 'charset-detector', 'normalization', 'unicode', 'chardet'],
-    packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]),
-    install_requires=REQUIRED,
-    extras_require=EXTRAS,
-    include_package_data=True,
-    package_data={"charset_normalizer": ["py.typed"]},
-    license='MIT',
-    entry_points={
-        'console_scripts':
-            [
-                'normalizer = charset_normalizer.cli.normalizer:cli_detect'
-            ]
-    },
-    classifiers=[
-        'Development Status :: 5 - Production/Stable',
-        'License :: OSI Approved :: MIT License',
-        'Intended Audience :: Developers',
-        'Topic :: Software Development :: Libraries :: Python Modules',
-        'Operating System :: OS Independent',
-        'Programming Language :: Python',
-        'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.6',
-        'Programming Language :: Python :: 3.7',
-        'Programming Language :: Python :: 3.8',
-        'Programming Language :: Python :: 3.9',
-        'Programming Language :: Python :: 3.10',
-        'Programming Language :: Python :: 3.11',
-        'Topic :: Text Processing :: Linguistic',
-        'Topic :: Utilities',
-        'Programming Language :: Python :: Implementation :: PyPy',
-        'Typing :: Typed'
-    ],
-    project_urls={
-        'Bug Reports': 'https://github.com/Ousret/charset_normalizer/issues',
-        'Documentation': 'https://charset-normalizer.readthedocs.io/en/latest',
-    },
+    name="charset-normalizer",
+    version=get_version(),
+    ext_modules=MYPYC_MODULES
 )
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 16601750..d42bf46b 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -2,7 +2,12 @@
 from charset_normalizer.cli.normalizer import cli_detect, query_yes_no
 from unittest.mock import patch
 from os.path import exists
-from os import remove
+from os import remove, path, pardir
+
+DIR_PATH = path.join(
+    path.dirname(path.realpath(__file__)),
+    pardir
+)
 
 
 class TestCommandLineInterface(unittest.TestCase):
@@ -24,24 +29,33 @@ def test_single_file(self):
         self.assertEqual(
             0,
             cli_detect(
-                ['./data/sample-arabic-1.txt']
+                [DIR_PATH + '/data/sample-arabic-1.txt']
             )
         )
 
+    def test_version_output_success(self):
+        with self.assertRaises(SystemExit):
+            cli_detect(
+                ['--version']
+            )
+
     def test_single_file_normalize(self):
         self.assertEqual(
             0,
             cli_detect(
-                ['./data/sample-arabic-1.txt', '--normalize']
+                [
+                    DIR_PATH + '/data/sample-arabic-1.txt',
+                    '--normalize'
+                ]
             )
         )
 
         self.assertTrue(
-            exists('./data/sample-arabic-1.cp1256.txt')
+            exists(DIR_PATH + '/data/sample-arabic-1.cp1256.txt')
         )
 
         try:
-            remove('./data/sample-arabic-1.cp1256.txt')
+            remove(DIR_PATH + '/data/sample-arabic-1.cp1256.txt')
         except:
             pass
 
@@ -49,7 +63,7 @@ def test_single_verbose_file(self):
         self.assertEqual(
             0,
             cli_detect(
-                ['./data/sample-arabic-1.txt', '--verbose']
+                [DIR_PATH + '/data/sample-arabic-1.txt', '--verbose']
             )
         )
 
@@ -58,9 +72,9 @@ def test_multiple_file(self):
             0,
             cli_detect(
                 [
-                    './data/sample-arabic-1.txt',
-                    './data/sample-french.txt',
-                    './data/sample-chinese.txt'
+                    DIR_PATH + '/data/sample-arabic-1.txt',
+                    DIR_PATH + '/data/sample-french.txt',
+                    DIR_PATH + '/data/sample-chinese.txt'
                 ]
             )
         )
@@ -71,9 +85,9 @@ def test_with_alternative(self):
             cli_detect(
                 [
                     '-a',
-                    './data/sample-arabic-1.txt',
-                    './data/sample-french.txt',
-                    './data/sample-chinese.txt'
+                    DIR_PATH + '/data/sample-arabic-1.txt',
+                    DIR_PATH + '/data/sample-french.txt',
+                    DIR_PATH + '/data/sample-chinese.txt'
                 ]
             )
         )
@@ -84,9 +98,9 @@ def test_with_minimal_output(self):
             cli_detect(
                 [
                     '-m',
-                    './data/sample-arabic-1.txt',
-                    './data/sample-french.txt',
-                    './data/sample-chinese.txt'
+                    DIR_PATH + '/data/sample-arabic-1.txt',
+                    DIR_PATH + '/data/sample-french.txt',
+                    DIR_PATH + '/data/sample-chinese.txt'
                 ]
             )
         )
@@ -98,9 +112,9 @@ def test_with_minimal_and_alt(self):
                 [
                     '-m',
                     '-a',
-                    './data/sample-arabic-1.txt',
-                    './data/sample-french.txt',
-                    './data/sample-chinese.txt'
+                    DIR_PATH + '/data/sample-arabic-1.txt',
+                    DIR_PATH + '/data/sample-french.txt',
+                    DIR_PATH + '/data/sample-chinese.txt'
                 ]
             )
         )
@@ -109,7 +123,7 @@ def test_non_existent_file(self):
 
         with self.assertRaises(SystemExit) as cm:
             cli_detect(
-                ['./data/not_found_data.txt']
+                [DIR_PATH + '/data/not_found_data.txt']
             )
 
         self.assertEqual(cm.exception.code, 2)
@@ -119,7 +133,7 @@ def test_replace_without_normalize(self):
         self.assertEqual(
             cli_detect(
                 [
-                    './data/sample-arabic-1.txt',
+                    DIR_PATH + '/data/sample-arabic-1.txt',
                     '--replace'
                 ]
             ),
@@ -130,7 +144,7 @@ def test_force_replace_without_replace(self):
         self.assertEqual(
             cli_detect(
                 [
-                    './data/sample-arabic-1.txt',
+                    DIR_PATH + '/data/sample-arabic-1.txt',
                     '--force'
                 ]
             ),
diff --git a/tests/test_coherence_detection.py b/tests/test_coherence_detection.py
index 6ad95927..7e399132 100644
--- a/tests/test_coherence_detection.py
+++ b/tests/test_coherence_detection.py
@@ -1,5 +1,5 @@
 import pytest
-from charset_normalizer.cd import encoding_languages, mb_encoding_languages, is_multi_byte_encoding, get_target_features
+from charset_normalizer.cd import encoding_languages, mb_encoding_languages, is_multi_byte_encoding, get_target_features, filter_alt_coherence_matches
 
 
 @pytest.mark.parametrize(
@@ -39,3 +39,18 @@ def test_target_features(language, expected_have_accents, expected_pure_latin):
 
     assert target_have_accents is expected_have_accents
     assert target_pure_latin is expected_pure_latin
+
+
+@pytest.mark.parametrize(
+    "matches, expected_return",
+    [
+        ([("English", 0.88,), ("English—", 0.99)], [("English", 0.99)]),
+        ([("English", 0.88,), ("English—", 0.99), ("English——", 0.999)], [("English", 0.999)]),
+        ([("English", 0.88,), ("English—", 0.77)], [("English", 0.88)]),
+        ([("English", 0.88,), ("Italian", 0.77)], [("English", 0.88), ("Italian", 0.77)]),
+    ]
+)
+def test_filter_alt_coherence_matches(matches, expected_return):
+    results = filter_alt_coherence_matches(matches)
+
+    assert results == expected_return
diff --git a/tests/test_full_detection.py b/tests/test_full_detection.py
index 96e0b797..adff8801 100644
--- a/tests/test_full_detection.py
+++ b/tests/test_full_detection.py
@@ -1,5 +1,11 @@
 from charset_normalizer.api import from_path
 import pytest
+from os import path, pardir
+
+DIR_PATH = path.join(
+    path.dirname(path.realpath(__file__)),
+    pardir
+)
 
 
 @pytest.mark.parametrize(
@@ -10,7 +16,7 @@
         ('sample-arabic.txt', 'utf_8', 'Arabic'),
         ('sample-russian-3.txt', 'utf_8', 'Russian'),
         ('sample-french.txt', 'utf_8', 'French'),
-        ('sample-chinese.txt', 'big5', 'Classical Chinese'),
+        ('sample-chinese.txt', 'big5', 'Chinese'),
         ('sample-greek.txt', 'cp1253', 'Greek'),
         ('sample-greek-2.txt', 'cp1253', 'Greek'),
         ('sample-hebrew-2.txt', 'cp1255', 'Hebrew'),
@@ -30,7 +36,7 @@ def test_elementary_detection(
     expected_charset: str,
     expected_language: str,
 ):
-    best_guess = from_path("./data/{}".format(input_data_file)).best()
+    best_guess = from_path(DIR_PATH + "/data/{}".format(input_data_file)).best()
 
     assert best_guess is not None, "Elementary detection has failed upon '{}'".format(input_data_file)
     assert best_guess.encoding == expected_charset, "Elementary charset detection has failed upon '{}'".format(input_data_file)
diff --git a/tests/test_normalize_fp.py b/tests/test_normalize_fp.py
deleted file mode 100644
index e2ce364a..00000000
--- a/tests/test_normalize_fp.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import pytest
-from charset_normalizer import normalize
-from os.path import exists
-from os import unlink
-
-
-def test_normalize_fp_creation():
-    guesses = normalize(
-        "./data/sample-arabic-1.txt"
-    )
-
-    predicted_path = "./data/sample-arabic-1-{}.txt".format(guesses.best().encoding)
-    path_exist = exists(
-        "./data/sample-arabic-1-{}.txt".format(guesses.best().encoding)
-    )
-
-    assert path_exist is True
-
-    if path_exist:
-        unlink(predicted_path)