diff --git a/CHANGELOG.md b/CHANGELOG.md index 068c509..b2bc888 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ This is the changelog for the open source version of tiktoken. +## [v0.13.0] + +- Loosened `Encoding`'s batch encoding methods' typing to allow for `Sequence` + ## [v0.12.0] - Build wheels for Python 3.14 - Build musllinux aarch64 wheels diff --git a/tiktoken/core.py b/tiktoken/core.py index 225fffb..3557a79 100644 --- a/tiktoken/core.py +++ b/tiktoken/core.py @@ -158,7 +158,7 @@ def encode_to_numpy( buffer = self._core_bpe.encode_to_tiktoken_buffer(text, allowed_special) return np.frombuffer(buffer, dtype=np.uint32) - def encode_ordinary_batch(self, text: list[str], *, num_threads: int = 8) -> list[list[int]]: + def encode_ordinary_batch(self, text: Sequence[str], *, num_threads: int = 8) -> list[list[int]]: """Encodes a list of strings into tokens, in parallel, ignoring special tokens. This is equivalent to `encode_batch(text, disallowed_special=())` (but slightly faster). @@ -174,7 +174,7 @@ def encode_ordinary_batch(self, text: list[str], *, num_threads: int = 8) -> lis def encode_batch( self, - text: list[str], + text: Sequence[str], *, num_threads: int = 8, allowed_special: Literal["all"] | AbstractSet[str] = set(), # noqa: B006