From 6cbd83286dd166f16a3d9400d3d6416fe5963cfe Mon Sep 17 00:00:00 2001 From: leslie-fang25 Date: Sun, 26 Oct 2025 23:31:26 -0700 Subject: [PATCH] [TRTLLM-8763][chore] Deprecate pybind based GuidedDecodingConfig usage in torch backend Signed-off-by: leslie-fang25 --- .../_torch/pyexecutor/grammar_matcher.py | 4 +++- .../_torch/pyexecutor/guided_decoder.py | 4 +++- .../_torch/pyexecutor/py_executor_creator.py | 4 ++-- tensorrt_llm/llmapi/llm_args.py | 23 +++++++++++++++++-- 4 files changed, 29 insertions(+), 6 deletions(-) diff --git a/tensorrt_llm/_torch/pyexecutor/grammar_matcher.py b/tensorrt_llm/_torch/pyexecutor/grammar_matcher.py index 9688b4e0243..3de8d6f2b40 100644 --- a/tensorrt_llm/_torch/pyexecutor/grammar_matcher.py +++ b/tensorrt_llm/_torch/pyexecutor/grammar_matcher.py @@ -6,7 +6,9 @@ import torch import xgrammar -from ...bindings.executor import GuidedDecodingConfig, GuidedDecodingParams +from tensorrt_llm.llmapi.llm_args import GuidedDecodingConfig + +from ...bindings.executor import GuidedDecodingParams class GrammarMatcher(ABC): diff --git a/tensorrt_llm/_torch/pyexecutor/guided_decoder.py b/tensorrt_llm/_torch/pyexecutor/guided_decoder.py index d280acf4c09..efd3379ee09 100644 --- a/tensorrt_llm/_torch/pyexecutor/guided_decoder.py +++ b/tensorrt_llm/_torch/pyexecutor/guided_decoder.py @@ -5,8 +5,10 @@ import torch +from tensorrt_llm.llmapi.llm_args import GuidedDecodingConfig + from ..._utils import nvtx_range -from ...bindings.executor import GuidedDecodingConfig, GuidedDecodingParams +from ...bindings.executor import GuidedDecodingParams from ...bindings.internal.batch_manager import LlmRequestType from ...logger import logger from ..hostfunc import hostfunc diff --git a/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py b/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py index c57b9d3c6fa..0936bd5dc98 100644 --- a/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py +++ b/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py @@ -14,9 +14,9 @@ import tensorrt_llm from tensorrt_llm._torch.pyexecutor.resource_manager import ResourceManagerType from tensorrt_llm._utils import get_sm_version, mpi_disabled -from tensorrt_llm.bindings.executor import GuidedDecodingConfig from tensorrt_llm.llmapi.llm_args import (CapacitySchedulerPolicy, - ContextChunkingPolicy, LoadFormat, + ContextChunkingPolicy, + GuidedDecodingConfig, LoadFormat, TorchLlmArgs) from tensorrt_llm.llmapi.tokenizer import (TokenizerBase, _llguidance_tokenizer_info, diff --git a/tensorrt_llm/llmapi/llm_args.py b/tensorrt_llm/llmapi/llm_args.py index b537a4702be..cd2967d5efc 100644 --- a/tensorrt_llm/llmapi/llm_args.py +++ b/tensorrt_llm/llmapi/llm_args.py @@ -45,8 +45,7 @@ KvCacheConfig as _KvCacheConfig, LookaheadDecodingConfig as _LookaheadDecodingConfig, PeftCacheConfig as _PeftCacheConfig, - SchedulerConfig as _SchedulerConfig, - GuidedDecodingConfig as _GuidedDecodingConfig) # isort: skip + SchedulerConfig as _SchedulerConfig) # isort: skip # isort: on # yapf: enable @@ -165,6 +164,26 @@ def _generate_cuda_graph_batch_sizes(max_batch_size: int, return batch_sizes +class GuidedDecodingConfig(StrictBaseModel): + + class GuidedDecodingBackend(Enum): + XGRAMMAR = 0 + LLGUIDANCE = 1 + + backend: GuidedDecodingBackend = Field( + default=GuidedDecodingBackend.XGRAMMAR, + description="The backend for guided decoding config.") + encoded_vocab: Optional[List[str]] = Field( + default=None, + description="The encoded vocab for guided decoding config.") + tokenizer_str: Optional[str] = Field( + default=None, + description="The tokenizer string for guided decoding config.") + stop_token_ids: Optional[List[int]] = Field( + default=None, + description="The stop token ids for guided decoding config.") + + class BaseSparseAttentionConfig(StrictBaseModel): """ Configuration for sparse attention.