diff --git a/README.md b/README.md index b2c3f2f..0365e2c 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,35 @@ Standalone on PyPI, and portable across training and inference stacks (transform uv add renderers ``` +`transformers` is an optional extra. The base install is lightweight — bring +your own tokenizer (e.g. a `tokenizers`-backed one) and you can render and parse +text without `transformers` in your environment at all. Install the extra when +you want the `load_tokenizer` / `create_renderer` convenience helpers, or any of +the vision-language renderers (they need a HuggingFace image processor): + +```bash +uv add 'renderers[transformers]' +``` + +Two caveats for the lightweight path: a bring-your-own tokenizer must satisfy the +[`Tokenizer`](renderers/base.py) protocol (`encode` / `decode` / +`convert_tokens_to_ids` / `apply_chat_template`, plus `name_or_path`, +`unk_token_id`, `eos_token_id`); and per-token training attribution +(`attribute_text_segments`) additionally needs `tokenizer(..., return_offsets_mapping=True)` +— without it, attribution falls back to a vanilla HuggingFace tokenizer, which +requires the extra. + +`renderers.client` — the generate client for vLLM's `/inference/v1/generate` — is +also opt-in. It's the only piece that needs the `openai` SDK and `httpx`, so +`import renderers` and the renderers themselves stay free of HTTP/engine deps. +Install the extra to use it: + +```bash +uv add 'renderers[vllm]' +``` + +The extras compose, e.g. `uv add 'renderers[transformers,vllm]'`. + ## At a glance ```python diff --git a/pyproject.toml b/pyproject.toml index 478104f..b3fa4dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,10 +16,8 @@ license-files = ["LICENSE"] requires-python = ">=3.10,<3.14" dependencies = [ "numpy", - "openai>=1.108.1", "tiktoken", "jinja2", - "transformers>=4.50.0", # Used by GptOssRenderer to render and parse harmony tokens. Vendoring # OpenAI's reference implementation keeps us byte-identical with vLLM # (which also uses it) and saves us mirroring a 330-line Jinja template. @@ -31,12 +29,6 @@ dependencies = [ # against 0.0.8) and ``tests/test_gpt_oss_harmony_parity.py`` passes on it, # so the older harmony is safe. "openai-harmony>=0.0.4", - # Crusoe's Rust BPE tokenizer; ~10x faster encode vs HF's tokenizers. - # ``load_tokenizer`` patches it in by default for every supported model - # except a small denylist (DeepSeek-V3 family). The patch is bracketed - # around ``from_pretrained``, so subsequent ``AutoTokenizer`` calls - # outside the renderers package stay vanilla. - "fastokens>=0.2.0", # ``BaseRendererConfig`` inherits from ``pydantic_config.BaseConfig`` so # the typed-config surface stays uniform with prime-rl / verifiers config # bases. Transitively brings pydantic, which ``renderers.configs`` also @@ -44,6 +36,35 @@ dependencies = [ "prime-pydantic-config>=0.3.0.dev83", ] +[project.optional-dependencies] +# ``transformers`` is heavy, and text-only renderers don't need it: construct +# a renderer with your own tokenizer object (e.g. a ``tokenizers``-backed one) +# and render / parse with no ``transformers`` import at all (issue #31). It is +# required only by the ``load_tokenizer`` / ``create_renderer*`` convenience +# helpers, the offset-attribution fallback in ``attribute_text_segments``, and +# the VLM renderers (which need ``AutoProcessor`` image processors). The lazy +# import points raise a clear "install renderers[transformers]" error when it's +# missing — see ``renderers.base._require_transformers``. +# +# ``fastokens`` (Crusoe's Rust BPE; ~10x faster encode) patches +# ``transformers`` inside ``load_tokenizer``, so it travels in the same extra. +transformers = [ + "transformers>=4.50.0", + "fastokens>=0.2.0", +] + +# ``renderers.client`` is a generate client for vLLM's +# ``/inference/v1/generate`` — engine-specific glue, not part of the core +# message↔token contract. It's the only thing that needs the ``openai`` SDK +# (``AsyncOpenAI``) and ``httpx``, so it's opt-in: ``import renderers`` and the +# renderers themselves stay free of HTTP/engine deps. Install this extra to use +# ``from renderers.client import generate``. (A future ``sglang`` client would +# live behind its own extra.) +vllm = [ + "openai>=1.108.1", + "httpx", +] + [tool.hatch.version] source = "vcs" # Tags look like ``renderers-v0.1.8`` (prefix matches the publish.yml @@ -76,6 +97,16 @@ dev = [ "torch>=2.11.0", "torchvision>=0.26.0", "ty>=0.0.1a29,<0.0.22", + # ``transformers`` is an optional extra for consumers (issue #31), but the + # test suite + type-check exercise the ``load_tokenizer`` / VLM paths, so + # the dev env always installs it. Mirror of ``[project.optional-dependencies].transformers``. + "transformers>=4.50.0", + "fastokens>=0.2.0", + # ``renderers.client`` deps — opt-in via the ``[vllm]`` extra for consumers, + # but ``tests/test_client.py`` exercises the client, so the dev env always + # installs them. Mirror of ``[project.optional-dependencies].vllm``. + "openai>=1.108.1", + "httpx", ] [tool.uv] diff --git a/renderers/__init__.py b/renderers/__init__.py index e7cd1c4..2f6bff8 100644 --- a/renderers/__init__.py +++ b/renderers/__init__.py @@ -7,6 +7,7 @@ __version__ = "0+unknown" from renderers.base import ( + ChatTemplateTokenizer, Content, ContentPart, ImagePart, @@ -17,12 +18,14 @@ ParsedResponse, ParsedToolCall, PlaceholderRange, + Processor, RenderedConversation, RenderedTokens, Renderer, RendererPool, TextPart, ThinkingPart, + Tokenizer, ToolCall, ToolCallFunction, ToolCallParseStatus, @@ -38,7 +41,6 @@ reject_assistant_in_extension, trim_to_turn_close, ) -from renderers.client import OverlongPromptError from renderers.configs import ( AutoRendererConfig, BaseRendererConfig, @@ -63,15 +65,27 @@ # Concrete renderer classes are lazy-loaded so that consumers needing # only the config layer (``RendererConfig`` discriminated union) don't -# pay the ``transformers`` import cost. Each renderer module does -# ``from transformers.tokenization_utils import PreTrainedTokenizer`` -# at module level, so eager imports here would drag ``transformers`` -# into every downstream ``import renderers``. ``__getattr__`` (PEP 562) -# resolves the names on first attribute access, so ``from renderers -# import DefaultRenderer`` and ``renderers.DefaultRenderer`` both work -# transparently. ``create_renderer`` doesn't depend on these eager -# imports — ``renderers.base._populate_registry`` lazy-imports the -# concrete classes itself when a renderer is instantiated. +# pay the cost of importing every renderer module up front. ``__getattr__`` +# (PEP 562) resolves the names on first attribute access, so ``from +# renderers import DefaultRenderer`` and ``renderers.DefaultRenderer`` both +# work transparently. ``create_renderer`` doesn't depend on these eager +# imports — ``renderers.base._populate_registry`` lazy-imports the concrete +# classes itself when a renderer is instantiated. +# +# As of issue #31, ``transformers`` is an optional extra: the renderer +# modules type their ``tokenizer`` / ``processor`` params against the +# ``Tokenizer`` / ``Processor`` protocols in ``renderers.base`` rather than +# ``transformers.PreTrainedTokenizer``, so ``import renderers`` (and +# constructing a text renderer with your own tokenizer) no longer pulls in +# ``transformers`` at all. It's loaded lazily only by ``load_tokenizer`` / +# ``create_renderer*`` and the VLM renderers — see ``_require_transformers``. +# +# ``renderers.client`` (the vLLM ``/inference/v1/generate`` client) is +# likewise opt-in: it depends on the ``openai`` SDK + ``httpx`` (the +# ``renderers[vllm]`` extra) and is deliberately *not* imported here, so +# ``import renderers`` stays free of HTTP/engine deps. Import it explicitly +# (``from renderers.client import generate, OverlongPromptError``) when you +# want it. _LAZY_RENDERERS: dict[str, str] = { "DeepSeekV3Renderer": "renderers.deepseek_v3", "DefaultRenderer": "renderers.default", @@ -109,6 +123,7 @@ def __dir__() -> list[str]: __all__ = [ "AutoRendererConfig", "BaseRendererConfig", + "ChatTemplateTokenizer", "Content", "ContentPart", "DeepSeekV3Renderer", @@ -138,10 +153,10 @@ def __dir__() -> list[str]: "MultimodalRenderer", "Nemotron3Renderer", "Nemotron3RendererConfig", - "OverlongPromptError", "ParsedResponse", "ParsedToolCall", "PlaceholderRange", + "Processor", "Qwen35Renderer", "Qwen35RendererConfig", "Qwen36Renderer", @@ -157,6 +172,7 @@ def __dir__() -> list[str]: "RendererPool", "TextPart", "ThinkingPart", + "Tokenizer", "ToolCall", "ToolCallFunction", "ToolCallParseStatus", diff --git a/renderers/base.py b/renderers/base.py index 45768de..04a4194 100644 --- a/renderers/base.py +++ b/renderers/base.py @@ -652,6 +652,78 @@ def with_completion( ) +@runtime_checkable +class Tokenizer(Protocol): + """Structural type for the tokenizer a renderer needs. + + Satisfied by HuggingFace ``PreTrainedTokenizerBase`` and by any + bring-your-own wrapper (e.g. around a ``tokenizers.Tokenizer``) that + exposes this surface. Defining it here — rather than annotating with + ``transformers.PreTrainedTokenizer`` — keeps ``transformers`` out of + the import graph for text-only renderers (issue #31): the heavy + package becomes an optional extra, needed only by the + ``load_tokenizer`` / ``create_renderer`` convenience helpers and by + the VLM renderers. + + The hand-coded renderers only need ``encode`` / ``decode`` / + ``convert_tokens_to_ids`` (plus the id attributes), so a plain + ``tokenizers.Tokenizer`` wrapper satisfies this protocol. + ``apply_chat_template`` is deliberately *not* required here — only + :class:`DefaultRenderer` needs it, via the :class:`ChatTemplateTokenizer` + subtype. + + ``__call__`` is consumed only by ``attribute_text_segments`` for + character-offset attribution (``return_offsets_mapping=True``). A + tokenizer that doesn't support offsets still renders and parses fine; + offset attribution then falls back to a vanilla HuggingFace tokenizer + (which requires the ``transformers`` extra) — see + ``_get_offset_tokenizer``. + """ + + name_or_path: str + unk_token_id: int | None + eos_token_id: int | None + + def encode(self, text: str, *args: Any, **kwargs: Any) -> list[int]: ... + + def decode(self, token_ids: Any, *args: Any, **kwargs: Any) -> str: ... + + def convert_tokens_to_ids(self, tokens: Any) -> Any: ... + + def __call__(self, *args: Any, **kwargs: Any) -> Any: ... + + +@runtime_checkable +class ChatTemplateTokenizer(Tokenizer, Protocol): + """A :class:`Tokenizer` that also exposes ``apply_chat_template``. + + Required only by :class:`DefaultRenderer`, the generic fallback that + delegates rendering to the tokenizer's Jinja chat template. The + hand-coded renderers reproduce each model's template in Python and only + touch the base :class:`Tokenizer` surface, so they don't need this. + """ + + def apply_chat_template(self, *args: Any, **kwargs: Any) -> Any: ... + + +@runtime_checkable +class Processor(Protocol): + """Structural type for the HuggingFace processor a VLM renderer needs. + + Satisfied by ``AutoProcessor`` instances (Qwen-VL, Kimi-VL, ...). The + ``image_processor`` sub-object's surface differs per model family + (Qwen exposes ``__call__(images=...)`` + ``merge_size``; Kimi exposes + ``preprocess(...)`` + ``media_tokens_calculator``), so it's typed + loosely. VLMs intrinsically need ``transformers`` at render time — this + Protocol just keeps the ``processor:`` type hint from importing it at + module load. + """ + + image_processor: Any + + def apply_chat_template(self, *args: Any, **kwargs: Any) -> Any: ... + + @runtime_checkable class Renderer(Protocol): """Owns message ↔ token conversion for a specific model family.""" @@ -1156,6 +1228,32 @@ def _model_has_vision_config(model_name: str) -> bool: _FASTOKENS_ANNOUNCED = False +_TRANSFORMERS_INSTALL_HINT = ( + "This requires the optional `transformers` extra, which is not " + "installed. Install it with `pip install 'renderers[transformers]'` " + "(or `uv add 'renderers[transformers]'`). Text-only renderers work " + "without it when you construct them with your own tokenizer object." +) + + +def _require_transformers(): + """Import and return the ``transformers`` module, or raise a clear, + actionable error pointing at the optional extra. + + ``transformers`` (and ``fastokens``, which patches it) is an optional + dependency — see issue #31. The convenience helpers (``load_tokenizer``, + ``create_renderer*``), the offset-attribution fallback, and all VLM + renderers need it; text-only render/parse with a bring-your-own + tokenizer does not. + """ + try: + import transformers + + return transformers + except ImportError as exc: + raise ImportError(_TRANSFORMERS_INSTALL_HINT) from exc + + def _patched_load(model_name_or_path: str, **kwargs): """Run ``AutoTokenizer.from_pretrained`` with fastokens patched in process-locally — patch around the load, unpatch right after. @@ -1293,7 +1391,11 @@ def load_tokenizer( validation for configs with nested ``rope_parameters``), we fall back to loading the repo's self-contained ``tokenizer.json`` directly — see ``_load_tokenizer_via_auto``. + + Requires the optional ``transformers`` extra; raises a clear + ``ImportError`` with install instructions if it's missing. """ + _require_transformers() kwargs: dict[str, Any] = {} revision = TRUSTED_REVISIONS.get(model_name_or_path) if revision is not None: @@ -1677,6 +1779,11 @@ def _get_offset_tokenizer(tokenizer): cached = _offset_tokenizers.get(name_or_path) if cached is not None: return cached + # The supplied tokenizer can't produce offsets; fall back to a + # vanilla HuggingFace tokenizer, which needs the ``transformers`` + # extra. A bring-your-own tokenizer that supports + # ``return_offsets_mapping=True`` skips this path entirely. + _require_transformers() kwargs: dict[str, Any] = {} revision = TRUSTED_REVISIONS.get(name_or_path) diff --git a/renderers/deepseek_v3.py b/renderers/deepseek_v3.py index 7bec3de..143ffb0 100644 --- a/renderers/deepseek_v3.py +++ b/renderers/deepseek_v3.py @@ -14,12 +14,11 @@ import json -from transformers.tokenization_utils import PreTrainedTokenizer - from renderers.base import ( Message, ParsedResponse, RenderedTokens, + Tokenizer, ToolSpec, attribute_text_segments, extract_message_tool_names, @@ -55,7 +54,7 @@ class DeepSeekV3Renderer: def __init__( self, - tokenizer: PreTrainedTokenizer, + tokenizer: Tokenizer, config: DeepSeekV3RendererConfig | None = None, ): self._tokenizer = tokenizer diff --git a/renderers/default.py b/renderers/default.py index a662097..a6db04a 100644 --- a/renderers/default.py +++ b/renderers/default.py @@ -11,9 +11,8 @@ import json from typing import Any -from transformers.tokenization_utils import PreTrainedTokenizer - from renderers.base import ( + ChatTemplateTokenizer, Message, ParsedResponse, RenderedTokens, @@ -91,7 +90,7 @@ class DefaultRenderer: def __init__( self, - tokenizer: PreTrainedTokenizer, + tokenizer: ChatTemplateTokenizer, config: DefaultRendererConfig | None = None, ): cfg = config or DefaultRendererConfig() diff --git a/renderers/glm45.py b/renderers/glm45.py index 7af9259..6ac8429 100644 --- a/renderers/glm45.py +++ b/renderers/glm45.py @@ -13,12 +13,11 @@ import json from typing import Any -from transformers.tokenization_utils import PreTrainedTokenizer - from renderers.base import ( Message, ParsedResponse, RenderedTokens, + Tokenizer, ToolSpec, attribute_text_segments, extract_message_tool_names, @@ -54,7 +53,7 @@ class GLM45Renderer: def __init__( self, - tokenizer: PreTrainedTokenizer, + tokenizer: Tokenizer, config: GLM45RendererConfig | None = None, ): self._tokenizer = tokenizer diff --git a/renderers/glm5.py b/renderers/glm5.py index 924d754..8ebfed3 100644 --- a/renderers/glm5.py +++ b/renderers/glm5.py @@ -14,12 +14,11 @@ import json from typing import Any -from transformers.tokenization_utils import PreTrainedTokenizer - from renderers.base import ( Message, ParsedResponse, RenderedTokens, + Tokenizer, ToolSpec, attribute_text_segments, extract_message_tool_names, @@ -63,7 +62,7 @@ class GLM5Renderer: def __init__( self, - tokenizer: PreTrainedTokenizer, + tokenizer: Tokenizer, config: GLM5RendererConfig | GLM51RendererConfig | None = None, ): self._tokenizer = tokenizer diff --git a/renderers/gpt_oss.py b/renderers/gpt_oss.py index 2a9c5ca..9078cb6 100644 --- a/renderers/gpt_oss.py +++ b/renderers/gpt_oss.py @@ -49,12 +49,11 @@ ToolDescription, load_harmony_encoding, ) -from transformers.tokenization_utils import PreTrainedTokenizer - from renderers.base import ( Message, ParsedResponse, RenderedTokens, + Tokenizer, ToolSpec, extract_message_tool_names, reject_assistant_in_extension, @@ -122,7 +121,7 @@ class GptOssRenderer: def __init__( self, - tokenizer: PreTrainedTokenizer, + tokenizer: Tokenizer, config: GptOssRendererConfig | None = None, ): """Initialise the renderer. diff --git a/renderers/kimi_k2.py b/renderers/kimi_k2.py index e99dfa7..ce0b3eb 100644 --- a/renderers/kimi_k2.py +++ b/renderers/kimi_k2.py @@ -16,12 +16,11 @@ import json -from transformers.tokenization_utils import PreTrainedTokenizer - from renderers.base import ( Message, ParsedResponse, RenderedTokens, + Tokenizer, ToolSpec, extract_message_tool_names, reject_assistant_in_extension, @@ -45,7 +44,7 @@ class KimiK2Renderer: def __init__( self, - tokenizer: PreTrainedTokenizer, + tokenizer: Tokenizer, config: KimiK2RendererConfig | None = None, ): self._tokenizer = tokenizer diff --git a/renderers/kimi_k25.py b/renderers/kimi_k25.py index ba3ca6e..2236b09 100644 --- a/renderers/kimi_k25.py +++ b/renderers/kimi_k25.py @@ -25,15 +25,15 @@ import re from typing import Any -from transformers.tokenization_utils import PreTrainedTokenizer - from renderers.base import ( Message, MultiModalData, ParsedResponse, ParsedToolCall, PlaceholderRange, + Processor, RenderedTokens, + Tokenizer, ToolCallParseStatus, ToolSpec, extract_message_tool_names, @@ -579,10 +579,10 @@ class KimiK25Renderer: def __init__( self, - tokenizer: PreTrainedTokenizer, + tokenizer: Tokenizer, config: KimiK25RendererConfig | None = None, *, - processor: Any = None, + processor: Processor | None = None, ): self._tokenizer = tokenizer self._processor = processor @@ -638,7 +638,9 @@ def mm_token_type_id_map(self) -> dict[int, int]: def _get_processor(self): if self._processor is not None: return self._processor - from transformers import AutoProcessor + from renderers.base import _require_transformers + + AutoProcessor = _require_transformers().AutoProcessor name = getattr(self._tokenizer, "name_or_path", None) if not name: diff --git a/renderers/laguna_xs2.py b/renderers/laguna_xs2.py index bd6b64f..45c28c4 100644 --- a/renderers/laguna_xs2.py +++ b/renderers/laguna_xs2.py @@ -27,13 +27,12 @@ import json -from transformers.tokenization_utils import PreTrainedTokenizer - from renderers.base import ( Content, Message, ParsedResponse, RenderedTokens, + Tokenizer, ToolSpec, attribute_text_segments, extract_message_tool_names, @@ -80,7 +79,7 @@ class LagunaXS2Renderer: def __init__( self, - tokenizer: PreTrainedTokenizer, + tokenizer: Tokenizer, config: LagunaXS2RendererConfig | None = None, ): self._tokenizer = tokenizer diff --git a/renderers/minimax_m2.py b/renderers/minimax_m2.py index f990274..925fef3 100644 --- a/renderers/minimax_m2.py +++ b/renderers/minimax_m2.py @@ -14,12 +14,11 @@ import json from typing import Any -from transformers.tokenization_utils import PreTrainedTokenizer - from renderers.base import ( Message, ParsedResponse, RenderedTokens, + Tokenizer, ToolSpec, attribute_text_segments, extract_message_tool_names, @@ -56,7 +55,7 @@ class MiniMaxM2Renderer: def __init__( self, - tokenizer: PreTrainedTokenizer, + tokenizer: Tokenizer, config: MiniMaxM2RendererConfig | None = None, ): self._tokenizer = tokenizer diff --git a/renderers/nemotron3.py b/renderers/nemotron3.py index e6398b5..d334f23 100644 --- a/renderers/nemotron3.py +++ b/renderers/nemotron3.py @@ -17,12 +17,11 @@ import json from typing import Any -from transformers.tokenization_utils import PreTrainedTokenizer - from renderers.base import ( Message, ParsedResponse, RenderedTokens, + Tokenizer, ToolSpec, attribute_text_segments, extract_message_tool_names, @@ -80,7 +79,7 @@ class Nemotron3Renderer: def __init__( self, - tokenizer: PreTrainedTokenizer, + tokenizer: Tokenizer, config: Nemotron3RendererConfig | None = None, ): self._tokenizer = tokenizer diff --git a/renderers/qwen3.py b/renderers/qwen3.py index f744b8c..315d84c 100644 --- a/renderers/qwen3.py +++ b/renderers/qwen3.py @@ -11,12 +11,11 @@ import json -from transformers.tokenization_utils import PreTrainedTokenizer - from renderers.base import ( Message, ParsedResponse, RenderedTokens, + Tokenizer, ToolSpec, attribute_text_segments, extract_message_tool_names, @@ -49,7 +48,7 @@ class Qwen3Renderer: def __init__( self, - tokenizer: PreTrainedTokenizer, + tokenizer: Tokenizer, config: Qwen3RendererConfig | None = None, ): self._tokenizer = tokenizer diff --git a/renderers/qwen35.py b/renderers/qwen35.py index cdb8ee1..b48293b 100644 --- a/renderers/qwen35.py +++ b/renderers/qwen35.py @@ -17,14 +17,14 @@ import json from typing import Any -from transformers.tokenization_utils import PreTrainedTokenizer - from renderers.base import ( Message, MultiModalData, ParsedResponse, PlaceholderRange, + Processor, RenderedTokens, + Tokenizer, ToolSpec, attribute_text_segments, extract_message_tool_names, @@ -114,10 +114,10 @@ class Qwen35Renderer: def __init__( self, - tokenizer: PreTrainedTokenizer, + tokenizer: Tokenizer, config: Qwen35RendererConfig | None = None, *, - processor: Any = None, + processor: Processor | None = None, ): self._tokenizer = tokenizer self._processor = processor @@ -163,7 +163,9 @@ def mm_token_type_id_map(self) -> dict[int, int]: def _get_processor(self): if self._processor is not None: return self._processor - from transformers import AutoProcessor + from renderers.base import _require_transformers + + AutoProcessor = _require_transformers().AutoProcessor name = getattr(self._tokenizer, "name_or_path", None) if not name: diff --git a/renderers/qwen3_vl.py b/renderers/qwen3_vl.py index 9a4ffde..ccbb14a 100644 --- a/renderers/qwen3_vl.py +++ b/renderers/qwen3_vl.py @@ -33,14 +33,14 @@ from typing import Any from urllib.parse import urlparse -from transformers.tokenization_utils import PreTrainedTokenizer - from renderers.base import ( Message, MultiModalData, ParsedResponse, PlaceholderRange, + Processor, RenderedTokens, + Tokenizer, ToolSpec, attribute_text_segments, extract_message_tool_names, @@ -309,10 +309,10 @@ class Qwen3VLRenderer: def __init__( self, - tokenizer: PreTrainedTokenizer, + tokenizer: Tokenizer, config: Qwen3VLRendererConfig | None = None, *, - processor: Any = None, + processor: Processor | None = None, ): self._tokenizer = tokenizer self._processor = processor @@ -368,7 +368,9 @@ def _encode(self, text: str) -> list[int]: def _get_processor(self): if self._processor is not None: return self._processor - from transformers import AutoProcessor + from renderers.base import _require_transformers + + AutoProcessor = _require_transformers().AutoProcessor name = getattr(self._tokenizer, "name_or_path", None) if not name: diff --git a/tests/test_no_transformers.py b/tests/test_no_transformers.py new file mode 100644 index 0000000..dfd5695 --- /dev/null +++ b/tests/test_no_transformers.py @@ -0,0 +1,171 @@ +"""``transformers`` is an optional extra (issue #31). + +These tests prove the boundary holds: importing ``renderers`` and driving a +text renderer with a bring-your-own tokenizer must work with ``transformers`` +(and ``fastokens``) absent, and the convenience helpers that *do* need it must +fail with a clear, actionable error. + +The dev environment has ``transformers`` installed, so we simulate its absence +in a subprocess by installing a meta-path finder that raises ``ImportError`` +for ``transformers`` / ``fastokens`` (and their submodules) before anything +imports ``renderers``. +""" + +from __future__ import annotations + +import subprocess +import sys +import textwrap + +# Shared preamble: block ``transformers`` / ``fastokens`` at import time, the +# way a lightweight install (no extra) would behave. +_BLOCK_PREAMBLE = """ +import sys +import importlib.abc +import importlib.machinery + +_BLOCKED = ("transformers", "fastokens") + + +class _Blocker(importlib.abc.MetaPathFinder): + def find_spec(self, name, path, target=None): + root = name.split(".")[0] + if root in _BLOCKED: + raise ImportError(f"{name} is blocked (simulating optional extra)") + return None + + +sys.meta_path.insert(0, _Blocker()) + +# A minimal tokenizer satisfying renderers.Tokenizer with no HF dependency. +# Special tokens map to fixed ids; ordinary text is char-level (ord + an +# offset that can't collide with the special ids). decode inverts encode, and +# __call__ supports return_offsets_mapping so attribute_text_segments uses this +# tokenizer directly rather than falling back to a vanilla HF one. +_CHAR_BASE = 200_000 + + +class FakeTokenizer: + name_or_path = "fake/qwen3-text" + unk_token_id = 0 + + def __init__(self): + specials = [ + "<|im_start|>", + "<|im_end|>", + "<|endoftext|>", + "", + "", + "", + "", + ] + self._special = {t: 150_000 + i for i, t in enumerate(specials)} + self._rev = {v: k for k, v in self._special.items()} + self.eos_token_id = self._special["<|im_end|>"] + + def convert_tokens_to_ids(self, token): + return self._special.get(token, self.unk_token_id) + + def encode(self, text, add_special_tokens=False, **kw): + return [_CHAR_BASE + ord(c) for c in text] + + def decode(self, ids, **kw): + out = [] + for i in ids: + if i in self._rev: + out.append(self._rev[i]) + elif i >= _CHAR_BASE: + out.append(chr(i - _CHAR_BASE)) + return "".join(out) + + def __call__(self, text, add_special_tokens=False, return_offsets_mapping=False, **kw): + ids = [_CHAR_BASE + ord(c) for c in text] + result = {"input_ids": ids} + if return_offsets_mapping: + result["offset_mapping"] = [(i, i + 1) for i in range(len(text))] + return result +""" + + +def _run(body: str) -> subprocess.CompletedProcess: + script = _BLOCK_PREAMBLE + textwrap.dedent(body) + return subprocess.run( + [sys.executable, "-c", script], + capture_output=True, + text=True, + ) + + +def test_text_renderer_works_without_transformers(): + """Import renderers + drive a text renderer with no transformers present.""" + proc = _run( + """ + import renderers + from renderers import Qwen3Renderer + + tok = FakeTokenizer() + r = Qwen3Renderer(tok) + + prompt_ids = r.render_ids( + [{"role": "user", "content": "hi there"}], + add_generation_prompt=True, + ) + assert prompt_ids and all(isinstance(i, int) for i in prompt_ids) + + # parse a hand-built assistant completion: text then the stop token. + completion = tok.encode("Hello!") + [tok.convert_tokens_to_ids("<|im_end|>")] + parsed = r.parse_response(completion) + assert parsed.content == "Hello!", parsed.content + + # The whole point: transformers / fastokens never got imported. + leaked = [m for m in sys.modules if m.split(".")[0] in _BLOCKED] + assert not leaked, f"unexpected import: {leaked}" + print("OK") + """ + ) + assert proc.returncode == 0, f"stdout:\n{proc.stdout}\nstderr:\n{proc.stderr}" + assert "OK" in proc.stdout + + +def test_import_renderers_without_client_deps(): + """``renderers.client`` (vLLM generate client) is opt-in via the ``[vllm]`` + extra — ``import renderers`` and driving a renderer must not pull in the + ``openai`` SDK or ``httpx``.""" + proc = _run( + """ + # Additionally block the client's deps. + _BLOCKED = _BLOCKED + ("openai", "httpx") + + import renderers + from renderers import Qwen3Renderer + + r = Qwen3Renderer(FakeTokenizer()) + ids = r.render_ids([{"role": "user", "content": "hi"}], add_generation_prompt=True) + assert ids + + leaked = [m for m in sys.modules if m.split(".")[0] in ("openai", "httpx")] + assert not leaked, f"client deps leaked into import: {leaked}" + print("OK") + """ + ) + assert proc.returncode == 0, f"stdout:\n{proc.stdout}\nstderr:\n{proc.stderr}" + assert "OK" in proc.stdout + + +def test_load_tokenizer_errors_clearly_without_transformers(): + """The convenience helper must point at the extra, not raise a bare + ``No module named 'transformers'``.""" + proc = _run( + """ + from renderers.base import load_tokenizer + try: + load_tokenizer("Qwen/Qwen3-8B") + except ImportError as exc: + assert "renderers[transformers]" in str(exc), str(exc) + print("OK") + else: + raise AssertionError("expected ImportError") + """ + ) + assert proc.returncode == 0, f"stdout:\n{proc.stdout}\nstderr:\n{proc.stderr}" + assert "OK" in proc.stdout