diff --git a/README.md b/README.md
index b2c3f2f..0365e2c 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,35 @@ Standalone on PyPI, and portable across training and inference stacks (transform
uv add renderers
```
+`transformers` is an optional extra. The base install is lightweight — bring
+your own tokenizer (e.g. a `tokenizers`-backed one) and you can render and parse
+text without `transformers` in your environment at all. Install the extra when
+you want the `load_tokenizer` / `create_renderer` convenience helpers, or any of
+the vision-language renderers (they need a HuggingFace image processor):
+
+```bash
+uv add 'renderers[transformers]'
+```
+
+Two caveats for the lightweight path: a bring-your-own tokenizer must satisfy the
+[`Tokenizer`](renderers/base.py) protocol (`encode` / `decode` /
+`convert_tokens_to_ids` / `apply_chat_template`, plus `name_or_path`,
+`unk_token_id`, `eos_token_id`); and per-token training attribution
+(`attribute_text_segments`) additionally needs `tokenizer(..., return_offsets_mapping=True)`
+— without it, attribution falls back to a vanilla HuggingFace tokenizer, which
+requires the extra.
+
+`renderers.client` — the generate client for vLLM's `/inference/v1/generate` — is
+also opt-in. It's the only piece that needs the `openai` SDK and `httpx`, so
+`import renderers` and the renderers themselves stay free of HTTP/engine deps.
+Install the extra to use it:
+
+```bash
+uv add 'renderers[vllm]'
+```
+
+The extras compose, e.g. `uv add 'renderers[transformers,vllm]'`.
+
## At a glance
```python
diff --git a/pyproject.toml b/pyproject.toml
index 478104f..b3fa4dc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,10 +16,8 @@ license-files = ["LICENSE"]
requires-python = ">=3.10,<3.14"
dependencies = [
"numpy",
- "openai>=1.108.1",
"tiktoken",
"jinja2",
- "transformers>=4.50.0",
# Used by GptOssRenderer to render and parse harmony tokens. Vendoring
# OpenAI's reference implementation keeps us byte-identical with vLLM
# (which also uses it) and saves us mirroring a 330-line Jinja template.
@@ -31,12 +29,6 @@ dependencies = [
# against 0.0.8) and ``tests/test_gpt_oss_harmony_parity.py`` passes on it,
# so the older harmony is safe.
"openai-harmony>=0.0.4",
- # Crusoe's Rust BPE tokenizer; ~10x faster encode vs HF's tokenizers.
- # ``load_tokenizer`` patches it in by default for every supported model
- # except a small denylist (DeepSeek-V3 family). The patch is bracketed
- # around ``from_pretrained``, so subsequent ``AutoTokenizer`` calls
- # outside the renderers package stay vanilla.
- "fastokens>=0.2.0",
# ``BaseRendererConfig`` inherits from ``pydantic_config.BaseConfig`` so
# the typed-config surface stays uniform with prime-rl / verifiers config
# bases. Transitively brings pydantic, which ``renderers.configs`` also
@@ -44,6 +36,35 @@ dependencies = [
"prime-pydantic-config>=0.3.0.dev83",
]
+[project.optional-dependencies]
+# ``transformers`` is heavy, and text-only renderers don't need it: construct
+# a renderer with your own tokenizer object (e.g. a ``tokenizers``-backed one)
+# and render / parse with no ``transformers`` import at all (issue #31). It is
+# required only by the ``load_tokenizer`` / ``create_renderer*`` convenience
+# helpers, the offset-attribution fallback in ``attribute_text_segments``, and
+# the VLM renderers (which need ``AutoProcessor`` image processors). The lazy
+# import points raise a clear "install renderers[transformers]" error when it's
+# missing — see ``renderers.base._require_transformers``.
+#
+# ``fastokens`` (Crusoe's Rust BPE; ~10x faster encode) patches
+# ``transformers`` inside ``load_tokenizer``, so it travels in the same extra.
+transformers = [
+ "transformers>=4.50.0",
+ "fastokens>=0.2.0",
+]
+
+# ``renderers.client`` is a generate client for vLLM's
+# ``/inference/v1/generate`` — engine-specific glue, not part of the core
+# message↔token contract. It's the only thing that needs the ``openai`` SDK
+# (``AsyncOpenAI``) and ``httpx``, so it's opt-in: ``import renderers`` and the
+# renderers themselves stay free of HTTP/engine deps. Install this extra to use
+# ``from renderers.client import generate``. (A future ``sglang`` client would
+# live behind its own extra.)
+vllm = [
+ "openai>=1.108.1",
+ "httpx",
+]
+
[tool.hatch.version]
source = "vcs"
# Tags look like ``renderers-v0.1.8`` (prefix matches the publish.yml
@@ -76,6 +97,16 @@ dev = [
"torch>=2.11.0",
"torchvision>=0.26.0",
"ty>=0.0.1a29,<0.0.22",
+ # ``transformers`` is an optional extra for consumers (issue #31), but the
+ # test suite + type-check exercise the ``load_tokenizer`` / VLM paths, so
+ # the dev env always installs it. Mirror of ``[project.optional-dependencies].transformers``.
+ "transformers>=4.50.0",
+ "fastokens>=0.2.0",
+ # ``renderers.client`` deps — opt-in via the ``[vllm]`` extra for consumers,
+ # but ``tests/test_client.py`` exercises the client, so the dev env always
+ # installs them. Mirror of ``[project.optional-dependencies].vllm``.
+ "openai>=1.108.1",
+ "httpx",
]
[tool.uv]
diff --git a/renderers/__init__.py b/renderers/__init__.py
index e7cd1c4..2f6bff8 100644
--- a/renderers/__init__.py
+++ b/renderers/__init__.py
@@ -7,6 +7,7 @@
__version__ = "0+unknown"
from renderers.base import (
+ ChatTemplateTokenizer,
Content,
ContentPart,
ImagePart,
@@ -17,12 +18,14 @@
ParsedResponse,
ParsedToolCall,
PlaceholderRange,
+ Processor,
RenderedConversation,
RenderedTokens,
Renderer,
RendererPool,
TextPart,
ThinkingPart,
+ Tokenizer,
ToolCall,
ToolCallFunction,
ToolCallParseStatus,
@@ -38,7 +41,6 @@
reject_assistant_in_extension,
trim_to_turn_close,
)
-from renderers.client import OverlongPromptError
from renderers.configs import (
AutoRendererConfig,
BaseRendererConfig,
@@ -63,15 +65,27 @@
# Concrete renderer classes are lazy-loaded so that consumers needing
# only the config layer (``RendererConfig`` discriminated union) don't
-# pay the ``transformers`` import cost. Each renderer module does
-# ``from transformers.tokenization_utils import PreTrainedTokenizer``
-# at module level, so eager imports here would drag ``transformers``
-# into every downstream ``import renderers``. ``__getattr__`` (PEP 562)
-# resolves the names on first attribute access, so ``from renderers
-# import DefaultRenderer`` and ``renderers.DefaultRenderer`` both work
-# transparently. ``create_renderer`` doesn't depend on these eager
-# imports — ``renderers.base._populate_registry`` lazy-imports the
-# concrete classes itself when a renderer is instantiated.
+# pay the cost of importing every renderer module up front. ``__getattr__``
+# (PEP 562) resolves the names on first attribute access, so ``from
+# renderers import DefaultRenderer`` and ``renderers.DefaultRenderer`` both
+# work transparently. ``create_renderer`` doesn't depend on these eager
+# imports — ``renderers.base._populate_registry`` lazy-imports the concrete
+# classes itself when a renderer is instantiated.
+#
+# As of issue #31, ``transformers`` is an optional extra: the renderer
+# modules type their ``tokenizer`` / ``processor`` params against the
+# ``Tokenizer`` / ``Processor`` protocols in ``renderers.base`` rather than
+# ``transformers.PreTrainedTokenizer``, so ``import renderers`` (and
+# constructing a text renderer with your own tokenizer) no longer pulls in
+# ``transformers`` at all. It's loaded lazily only by ``load_tokenizer`` /
+# ``create_renderer*`` and the VLM renderers — see ``_require_transformers``.
+#
+# ``renderers.client`` (the vLLM ``/inference/v1/generate`` client) is
+# likewise opt-in: it depends on the ``openai`` SDK + ``httpx`` (the
+# ``renderers[vllm]`` extra) and is deliberately *not* imported here, so
+# ``import renderers`` stays free of HTTP/engine deps. Import it explicitly
+# (``from renderers.client import generate, OverlongPromptError``) when you
+# want it.
_LAZY_RENDERERS: dict[str, str] = {
"DeepSeekV3Renderer": "renderers.deepseek_v3",
"DefaultRenderer": "renderers.default",
@@ -109,6 +123,7 @@ def __dir__() -> list[str]:
__all__ = [
"AutoRendererConfig",
"BaseRendererConfig",
+ "ChatTemplateTokenizer",
"Content",
"ContentPart",
"DeepSeekV3Renderer",
@@ -138,10 +153,10 @@ def __dir__() -> list[str]:
"MultimodalRenderer",
"Nemotron3Renderer",
"Nemotron3RendererConfig",
- "OverlongPromptError",
"ParsedResponse",
"ParsedToolCall",
"PlaceholderRange",
+ "Processor",
"Qwen35Renderer",
"Qwen35RendererConfig",
"Qwen36Renderer",
@@ -157,6 +172,7 @@ def __dir__() -> list[str]:
"RendererPool",
"TextPart",
"ThinkingPart",
+ "Tokenizer",
"ToolCall",
"ToolCallFunction",
"ToolCallParseStatus",
diff --git a/renderers/base.py b/renderers/base.py
index 45768de..04a4194 100644
--- a/renderers/base.py
+++ b/renderers/base.py
@@ -652,6 +652,78 @@ def with_completion(
)
+@runtime_checkable
+class Tokenizer(Protocol):
+ """Structural type for the tokenizer a renderer needs.
+
+ Satisfied by HuggingFace ``PreTrainedTokenizerBase`` and by any
+ bring-your-own wrapper (e.g. around a ``tokenizers.Tokenizer``) that
+ exposes this surface. Defining it here — rather than annotating with
+ ``transformers.PreTrainedTokenizer`` — keeps ``transformers`` out of
+ the import graph for text-only renderers (issue #31): the heavy
+ package becomes an optional extra, needed only by the
+ ``load_tokenizer`` / ``create_renderer`` convenience helpers and by
+ the VLM renderers.
+
+ The hand-coded renderers only need ``encode`` / ``decode`` /
+ ``convert_tokens_to_ids`` (plus the id attributes), so a plain
+ ``tokenizers.Tokenizer`` wrapper satisfies this protocol.
+ ``apply_chat_template`` is deliberately *not* required here — only
+ :class:`DefaultRenderer` needs it, via the :class:`ChatTemplateTokenizer`
+ subtype.
+
+ ``__call__`` is consumed only by ``attribute_text_segments`` for
+ character-offset attribution (``return_offsets_mapping=True``). A
+ tokenizer that doesn't support offsets still renders and parses fine;
+ offset attribution then falls back to a vanilla HuggingFace tokenizer
+ (which requires the ``transformers`` extra) — see
+ ``_get_offset_tokenizer``.
+ """
+
+ name_or_path: str
+ unk_token_id: int | None
+ eos_token_id: int | None
+
+ def encode(self, text: str, *args: Any, **kwargs: Any) -> list[int]: ...
+
+ def decode(self, token_ids: Any, *args: Any, **kwargs: Any) -> str: ...
+
+ def convert_tokens_to_ids(self, tokens: Any) -> Any: ...
+
+ def __call__(self, *args: Any, **kwargs: Any) -> Any: ...
+
+
+@runtime_checkable
+class ChatTemplateTokenizer(Tokenizer, Protocol):
+ """A :class:`Tokenizer` that also exposes ``apply_chat_template``.
+
+ Required only by :class:`DefaultRenderer`, the generic fallback that
+ delegates rendering to the tokenizer's Jinja chat template. The
+ hand-coded renderers reproduce each model's template in Python and only
+ touch the base :class:`Tokenizer` surface, so they don't need this.
+ """
+
+ def apply_chat_template(self, *args: Any, **kwargs: Any) -> Any: ...
+
+
+@runtime_checkable
+class Processor(Protocol):
+ """Structural type for the HuggingFace processor a VLM renderer needs.
+
+ Satisfied by ``AutoProcessor`` instances (Qwen-VL, Kimi-VL, ...). The
+ ``image_processor`` sub-object's surface differs per model family
+ (Qwen exposes ``__call__(images=...)`` + ``merge_size``; Kimi exposes
+ ``preprocess(...)`` + ``media_tokens_calculator``), so it's typed
+ loosely. VLMs intrinsically need ``transformers`` at render time — this
+ Protocol just keeps the ``processor:`` type hint from importing it at
+ module load.
+ """
+
+ image_processor: Any
+
+ def apply_chat_template(self, *args: Any, **kwargs: Any) -> Any: ...
+
+
@runtime_checkable
class Renderer(Protocol):
"""Owns message ↔ token conversion for a specific model family."""
@@ -1156,6 +1228,32 @@ def _model_has_vision_config(model_name: str) -> bool:
_FASTOKENS_ANNOUNCED = False
+_TRANSFORMERS_INSTALL_HINT = (
+ "This requires the optional `transformers` extra, which is not "
+ "installed. Install it with `pip install 'renderers[transformers]'` "
+ "(or `uv add 'renderers[transformers]'`). Text-only renderers work "
+ "without it when you construct them with your own tokenizer object."
+)
+
+
+def _require_transformers():
+ """Import and return the ``transformers`` module, or raise a clear,
+ actionable error pointing at the optional extra.
+
+ ``transformers`` (and ``fastokens``, which patches it) is an optional
+ dependency — see issue #31. The convenience helpers (``load_tokenizer``,
+ ``create_renderer*``), the offset-attribution fallback, and all VLM
+ renderers need it; text-only render/parse with a bring-your-own
+ tokenizer does not.
+ """
+ try:
+ import transformers
+
+ return transformers
+ except ImportError as exc:
+ raise ImportError(_TRANSFORMERS_INSTALL_HINT) from exc
+
+
def _patched_load(model_name_or_path: str, **kwargs):
"""Run ``AutoTokenizer.from_pretrained`` with fastokens patched in
process-locally — patch around the load, unpatch right after.
@@ -1293,7 +1391,11 @@ def load_tokenizer(
validation for configs with nested ``rope_parameters``), we fall
back to loading the repo's self-contained ``tokenizer.json``
directly — see ``_load_tokenizer_via_auto``.
+
+ Requires the optional ``transformers`` extra; raises a clear
+ ``ImportError`` with install instructions if it's missing.
"""
+ _require_transformers()
kwargs: dict[str, Any] = {}
revision = TRUSTED_REVISIONS.get(model_name_or_path)
if revision is not None:
@@ -1677,6 +1779,11 @@ def _get_offset_tokenizer(tokenizer):
cached = _offset_tokenizers.get(name_or_path)
if cached is not None:
return cached
+ # The supplied tokenizer can't produce offsets; fall back to a
+ # vanilla HuggingFace tokenizer, which needs the ``transformers``
+ # extra. A bring-your-own tokenizer that supports
+ # ``return_offsets_mapping=True`` skips this path entirely.
+ _require_transformers()
kwargs: dict[str, Any] = {}
revision = TRUSTED_REVISIONS.get(name_or_path)
diff --git a/renderers/deepseek_v3.py b/renderers/deepseek_v3.py
index 7bec3de..143ffb0 100644
--- a/renderers/deepseek_v3.py
+++ b/renderers/deepseek_v3.py
@@ -14,12 +14,11 @@
import json
-from transformers.tokenization_utils import PreTrainedTokenizer
-
from renderers.base import (
Message,
ParsedResponse,
RenderedTokens,
+ Tokenizer,
ToolSpec,
attribute_text_segments,
extract_message_tool_names,
@@ -55,7 +54,7 @@ class DeepSeekV3Renderer:
def __init__(
self,
- tokenizer: PreTrainedTokenizer,
+ tokenizer: Tokenizer,
config: DeepSeekV3RendererConfig | None = None,
):
self._tokenizer = tokenizer
diff --git a/renderers/default.py b/renderers/default.py
index a662097..a6db04a 100644
--- a/renderers/default.py
+++ b/renderers/default.py
@@ -11,9 +11,8 @@
import json
from typing import Any
-from transformers.tokenization_utils import PreTrainedTokenizer
-
from renderers.base import (
+ ChatTemplateTokenizer,
Message,
ParsedResponse,
RenderedTokens,
@@ -91,7 +90,7 @@ class DefaultRenderer:
def __init__(
self,
- tokenizer: PreTrainedTokenizer,
+ tokenizer: ChatTemplateTokenizer,
config: DefaultRendererConfig | None = None,
):
cfg = config or DefaultRendererConfig()
diff --git a/renderers/glm45.py b/renderers/glm45.py
index 7af9259..6ac8429 100644
--- a/renderers/glm45.py
+++ b/renderers/glm45.py
@@ -13,12 +13,11 @@
import json
from typing import Any
-from transformers.tokenization_utils import PreTrainedTokenizer
-
from renderers.base import (
Message,
ParsedResponse,
RenderedTokens,
+ Tokenizer,
ToolSpec,
attribute_text_segments,
extract_message_tool_names,
@@ -54,7 +53,7 @@ class GLM45Renderer:
def __init__(
self,
- tokenizer: PreTrainedTokenizer,
+ tokenizer: Tokenizer,
config: GLM45RendererConfig | None = None,
):
self._tokenizer = tokenizer
diff --git a/renderers/glm5.py b/renderers/glm5.py
index 924d754..8ebfed3 100644
--- a/renderers/glm5.py
+++ b/renderers/glm5.py
@@ -14,12 +14,11 @@
import json
from typing import Any
-from transformers.tokenization_utils import PreTrainedTokenizer
-
from renderers.base import (
Message,
ParsedResponse,
RenderedTokens,
+ Tokenizer,
ToolSpec,
attribute_text_segments,
extract_message_tool_names,
@@ -63,7 +62,7 @@ class GLM5Renderer:
def __init__(
self,
- tokenizer: PreTrainedTokenizer,
+ tokenizer: Tokenizer,
config: GLM5RendererConfig | GLM51RendererConfig | None = None,
):
self._tokenizer = tokenizer
diff --git a/renderers/gpt_oss.py b/renderers/gpt_oss.py
index 2a9c5ca..9078cb6 100644
--- a/renderers/gpt_oss.py
+++ b/renderers/gpt_oss.py
@@ -49,12 +49,11 @@
ToolDescription,
load_harmony_encoding,
)
-from transformers.tokenization_utils import PreTrainedTokenizer
-
from renderers.base import (
Message,
ParsedResponse,
RenderedTokens,
+ Tokenizer,
ToolSpec,
extract_message_tool_names,
reject_assistant_in_extension,
@@ -122,7 +121,7 @@ class GptOssRenderer:
def __init__(
self,
- tokenizer: PreTrainedTokenizer,
+ tokenizer: Tokenizer,
config: GptOssRendererConfig | None = None,
):
"""Initialise the renderer.
diff --git a/renderers/kimi_k2.py b/renderers/kimi_k2.py
index e99dfa7..ce0b3eb 100644
--- a/renderers/kimi_k2.py
+++ b/renderers/kimi_k2.py
@@ -16,12 +16,11 @@
import json
-from transformers.tokenization_utils import PreTrainedTokenizer
-
from renderers.base import (
Message,
ParsedResponse,
RenderedTokens,
+ Tokenizer,
ToolSpec,
extract_message_tool_names,
reject_assistant_in_extension,
@@ -45,7 +44,7 @@ class KimiK2Renderer:
def __init__(
self,
- tokenizer: PreTrainedTokenizer,
+ tokenizer: Tokenizer,
config: KimiK2RendererConfig | None = None,
):
self._tokenizer = tokenizer
diff --git a/renderers/kimi_k25.py b/renderers/kimi_k25.py
index ba3ca6e..2236b09 100644
--- a/renderers/kimi_k25.py
+++ b/renderers/kimi_k25.py
@@ -25,15 +25,15 @@
import re
from typing import Any
-from transformers.tokenization_utils import PreTrainedTokenizer
-
from renderers.base import (
Message,
MultiModalData,
ParsedResponse,
ParsedToolCall,
PlaceholderRange,
+ Processor,
RenderedTokens,
+ Tokenizer,
ToolCallParseStatus,
ToolSpec,
extract_message_tool_names,
@@ -579,10 +579,10 @@ class KimiK25Renderer:
def __init__(
self,
- tokenizer: PreTrainedTokenizer,
+ tokenizer: Tokenizer,
config: KimiK25RendererConfig | None = None,
*,
- processor: Any = None,
+ processor: Processor | None = None,
):
self._tokenizer = tokenizer
self._processor = processor
@@ -638,7 +638,9 @@ def mm_token_type_id_map(self) -> dict[int, int]:
def _get_processor(self):
if self._processor is not None:
return self._processor
- from transformers import AutoProcessor
+ from renderers.base import _require_transformers
+
+ AutoProcessor = _require_transformers().AutoProcessor
name = getattr(self._tokenizer, "name_or_path", None)
if not name:
diff --git a/renderers/laguna_xs2.py b/renderers/laguna_xs2.py
index bd6b64f..45c28c4 100644
--- a/renderers/laguna_xs2.py
+++ b/renderers/laguna_xs2.py
@@ -27,13 +27,12 @@
import json
-from transformers.tokenization_utils import PreTrainedTokenizer
-
from renderers.base import (
Content,
Message,
ParsedResponse,
RenderedTokens,
+ Tokenizer,
ToolSpec,
attribute_text_segments,
extract_message_tool_names,
@@ -80,7 +79,7 @@
class LagunaXS2Renderer:
def __init__(
self,
- tokenizer: PreTrainedTokenizer,
+ tokenizer: Tokenizer,
config: LagunaXS2RendererConfig | None = None,
):
self._tokenizer = tokenizer
diff --git a/renderers/minimax_m2.py b/renderers/minimax_m2.py
index f990274..925fef3 100644
--- a/renderers/minimax_m2.py
+++ b/renderers/minimax_m2.py
@@ -14,12 +14,11 @@
import json
from typing import Any
-from transformers.tokenization_utils import PreTrainedTokenizer
-
from renderers.base import (
Message,
ParsedResponse,
RenderedTokens,
+ Tokenizer,
ToolSpec,
attribute_text_segments,
extract_message_tool_names,
@@ -56,7 +55,7 @@ class MiniMaxM2Renderer:
def __init__(
self,
- tokenizer: PreTrainedTokenizer,
+ tokenizer: Tokenizer,
config: MiniMaxM2RendererConfig | None = None,
):
self._tokenizer = tokenizer
diff --git a/renderers/nemotron3.py b/renderers/nemotron3.py
index e6398b5..d334f23 100644
--- a/renderers/nemotron3.py
+++ b/renderers/nemotron3.py
@@ -17,12 +17,11 @@
import json
from typing import Any
-from transformers.tokenization_utils import PreTrainedTokenizer
-
from renderers.base import (
Message,
ParsedResponse,
RenderedTokens,
+ Tokenizer,
ToolSpec,
attribute_text_segments,
extract_message_tool_names,
@@ -80,7 +79,7 @@ class Nemotron3Renderer:
def __init__(
self,
- tokenizer: PreTrainedTokenizer,
+ tokenizer: Tokenizer,
config: Nemotron3RendererConfig | None = None,
):
self._tokenizer = tokenizer
diff --git a/renderers/qwen3.py b/renderers/qwen3.py
index f744b8c..315d84c 100644
--- a/renderers/qwen3.py
+++ b/renderers/qwen3.py
@@ -11,12 +11,11 @@
import json
-from transformers.tokenization_utils import PreTrainedTokenizer
-
from renderers.base import (
Message,
ParsedResponse,
RenderedTokens,
+ Tokenizer,
ToolSpec,
attribute_text_segments,
extract_message_tool_names,
@@ -49,7 +48,7 @@ class Qwen3Renderer:
def __init__(
self,
- tokenizer: PreTrainedTokenizer,
+ tokenizer: Tokenizer,
config: Qwen3RendererConfig | None = None,
):
self._tokenizer = tokenizer
diff --git a/renderers/qwen35.py b/renderers/qwen35.py
index cdb8ee1..b48293b 100644
--- a/renderers/qwen35.py
+++ b/renderers/qwen35.py
@@ -17,14 +17,14 @@
import json
from typing import Any
-from transformers.tokenization_utils import PreTrainedTokenizer
-
from renderers.base import (
Message,
MultiModalData,
ParsedResponse,
PlaceholderRange,
+ Processor,
RenderedTokens,
+ Tokenizer,
ToolSpec,
attribute_text_segments,
extract_message_tool_names,
@@ -114,10 +114,10 @@ class Qwen35Renderer:
def __init__(
self,
- tokenizer: PreTrainedTokenizer,
+ tokenizer: Tokenizer,
config: Qwen35RendererConfig | None = None,
*,
- processor: Any = None,
+ processor: Processor | None = None,
):
self._tokenizer = tokenizer
self._processor = processor
@@ -163,7 +163,9 @@ def mm_token_type_id_map(self) -> dict[int, int]:
def _get_processor(self):
if self._processor is not None:
return self._processor
- from transformers import AutoProcessor
+ from renderers.base import _require_transformers
+
+ AutoProcessor = _require_transformers().AutoProcessor
name = getattr(self._tokenizer, "name_or_path", None)
if not name:
diff --git a/renderers/qwen3_vl.py b/renderers/qwen3_vl.py
index 9a4ffde..ccbb14a 100644
--- a/renderers/qwen3_vl.py
+++ b/renderers/qwen3_vl.py
@@ -33,14 +33,14 @@
from typing import Any
from urllib.parse import urlparse
-from transformers.tokenization_utils import PreTrainedTokenizer
-
from renderers.base import (
Message,
MultiModalData,
ParsedResponse,
PlaceholderRange,
+ Processor,
RenderedTokens,
+ Tokenizer,
ToolSpec,
attribute_text_segments,
extract_message_tool_names,
@@ -309,10 +309,10 @@ class Qwen3VLRenderer:
def __init__(
self,
- tokenizer: PreTrainedTokenizer,
+ tokenizer: Tokenizer,
config: Qwen3VLRendererConfig | None = None,
*,
- processor: Any = None,
+ processor: Processor | None = None,
):
self._tokenizer = tokenizer
self._processor = processor
@@ -368,7 +368,9 @@ def _encode(self, text: str) -> list[int]:
def _get_processor(self):
if self._processor is not None:
return self._processor
- from transformers import AutoProcessor
+ from renderers.base import _require_transformers
+
+ AutoProcessor = _require_transformers().AutoProcessor
name = getattr(self._tokenizer, "name_or_path", None)
if not name:
diff --git a/tests/test_no_transformers.py b/tests/test_no_transformers.py
new file mode 100644
index 0000000..dfd5695
--- /dev/null
+++ b/tests/test_no_transformers.py
@@ -0,0 +1,171 @@
+"""``transformers`` is an optional extra (issue #31).
+
+These tests prove the boundary holds: importing ``renderers`` and driving a
+text renderer with a bring-your-own tokenizer must work with ``transformers``
+(and ``fastokens``) absent, and the convenience helpers that *do* need it must
+fail with a clear, actionable error.
+
+The dev environment has ``transformers`` installed, so we simulate its absence
+in a subprocess by installing a meta-path finder that raises ``ImportError``
+for ``transformers`` / ``fastokens`` (and their submodules) before anything
+imports ``renderers``.
+"""
+
+from __future__ import annotations
+
+import subprocess
+import sys
+import textwrap
+
+# Shared preamble: block ``transformers`` / ``fastokens`` at import time, the
+# way a lightweight install (no extra) would behave.
+_BLOCK_PREAMBLE = """
+import sys
+import importlib.abc
+import importlib.machinery
+
+_BLOCKED = ("transformers", "fastokens")
+
+
+class _Blocker(importlib.abc.MetaPathFinder):
+ def find_spec(self, name, path, target=None):
+ root = name.split(".")[0]
+ if root in _BLOCKED:
+ raise ImportError(f"{name} is blocked (simulating optional extra)")
+ return None
+
+
+sys.meta_path.insert(0, _Blocker())
+
+# A minimal tokenizer satisfying renderers.Tokenizer with no HF dependency.
+# Special tokens map to fixed ids; ordinary text is char-level (ord + an
+# offset that can't collide with the special ids). decode inverts encode, and
+# __call__ supports return_offsets_mapping so attribute_text_segments uses this
+# tokenizer directly rather than falling back to a vanilla HF one.
+_CHAR_BASE = 200_000
+
+
+class FakeTokenizer:
+ name_or_path = "fake/qwen3-text"
+ unk_token_id = 0
+
+ def __init__(self):
+ specials = [
+ "<|im_start|>",
+ "<|im_end|>",
+ "<|endoftext|>",
+ "",
+ "",
+ "",
+ "",
+ ]
+ self._special = {t: 150_000 + i for i, t in enumerate(specials)}
+ self._rev = {v: k for k, v in self._special.items()}
+ self.eos_token_id = self._special["<|im_end|>"]
+
+ def convert_tokens_to_ids(self, token):
+ return self._special.get(token, self.unk_token_id)
+
+ def encode(self, text, add_special_tokens=False, **kw):
+ return [_CHAR_BASE + ord(c) for c in text]
+
+ def decode(self, ids, **kw):
+ out = []
+ for i in ids:
+ if i in self._rev:
+ out.append(self._rev[i])
+ elif i >= _CHAR_BASE:
+ out.append(chr(i - _CHAR_BASE))
+ return "".join(out)
+
+ def __call__(self, text, add_special_tokens=False, return_offsets_mapping=False, **kw):
+ ids = [_CHAR_BASE + ord(c) for c in text]
+ result = {"input_ids": ids}
+ if return_offsets_mapping:
+ result["offset_mapping"] = [(i, i + 1) for i in range(len(text))]
+ return result
+"""
+
+
+def _run(body: str) -> subprocess.CompletedProcess:
+ script = _BLOCK_PREAMBLE + textwrap.dedent(body)
+ return subprocess.run(
+ [sys.executable, "-c", script],
+ capture_output=True,
+ text=True,
+ )
+
+
+def test_text_renderer_works_without_transformers():
+ """Import renderers + drive a text renderer with no transformers present."""
+ proc = _run(
+ """
+ import renderers
+ from renderers import Qwen3Renderer
+
+ tok = FakeTokenizer()
+ r = Qwen3Renderer(tok)
+
+ prompt_ids = r.render_ids(
+ [{"role": "user", "content": "hi there"}],
+ add_generation_prompt=True,
+ )
+ assert prompt_ids and all(isinstance(i, int) for i in prompt_ids)
+
+ # parse a hand-built assistant completion: text then the stop token.
+ completion = tok.encode("Hello!") + [tok.convert_tokens_to_ids("<|im_end|>")]
+ parsed = r.parse_response(completion)
+ assert parsed.content == "Hello!", parsed.content
+
+ # The whole point: transformers / fastokens never got imported.
+ leaked = [m for m in sys.modules if m.split(".")[0] in _BLOCKED]
+ assert not leaked, f"unexpected import: {leaked}"
+ print("OK")
+ """
+ )
+ assert proc.returncode == 0, f"stdout:\n{proc.stdout}\nstderr:\n{proc.stderr}"
+ assert "OK" in proc.stdout
+
+
+def test_import_renderers_without_client_deps():
+ """``renderers.client`` (vLLM generate client) is opt-in via the ``[vllm]``
+ extra — ``import renderers`` and driving a renderer must not pull in the
+ ``openai`` SDK or ``httpx``."""
+ proc = _run(
+ """
+ # Additionally block the client's deps.
+ _BLOCKED = _BLOCKED + ("openai", "httpx")
+
+ import renderers
+ from renderers import Qwen3Renderer
+
+ r = Qwen3Renderer(FakeTokenizer())
+ ids = r.render_ids([{"role": "user", "content": "hi"}], add_generation_prompt=True)
+ assert ids
+
+ leaked = [m for m in sys.modules if m.split(".")[0] in ("openai", "httpx")]
+ assert not leaked, f"client deps leaked into import: {leaked}"
+ print("OK")
+ """
+ )
+ assert proc.returncode == 0, f"stdout:\n{proc.stdout}\nstderr:\n{proc.stderr}"
+ assert "OK" in proc.stdout
+
+
+def test_load_tokenizer_errors_clearly_without_transformers():
+ """The convenience helper must point at the extra, not raise a bare
+ ``No module named 'transformers'``."""
+ proc = _run(
+ """
+ from renderers.base import load_tokenizer
+ try:
+ load_tokenizer("Qwen/Qwen3-8B")
+ except ImportError as exc:
+ assert "renderers[transformers]" in str(exc), str(exc)
+ print("OK")
+ else:
+ raise AssertionError("expected ImportError")
+ """
+ )
+ assert proc.returncode == 0, f"stdout:\n{proc.stdout}\nstderr:\n{proc.stderr}"
+ assert "OK" in proc.stdout