Skip to content

Commit

Permalink
Removing sys dependcy in deduplication and making translate table static
Browse files Browse the repository at this point in the history
  • Loading branch information
reinoldus committed Feb 8, 2025
1 parent 8be03e8 commit b910438
Showing 1 changed file with 1 addition and 3 deletions.
4 changes: 1 addition & 3 deletions trafilatura/deduplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
# 3.11+: from typing import Self

import re
import sys
import string
from difflib import SequenceMatcher
from functools import lru_cache
Expand All @@ -23,8 +22,7 @@

BIN_COUNT_FUNC = getattr(int, "bit_count", lambda x: bin(x).count("1"))

PUNCT_TBL = dict.fromkeys((i for i in range(sys.maxunicode)
if unicodedata.category(chr(i)).startswith('P')), ord(' '))
PUNCT_TBL = str.maketrans({i: ' ' for i in range(0x10FFFF) if unicodedata.category(chr(i)).startswith('P')})


@lru_cache(maxsize=1024)
Expand Down

0 comments on commit b910438

Please sign in to comment.