Skip to content

Commit 451959c

Browse files
[TRTLLM-8763][chore] Deprecate pybind based GuidedDecodingConfig usage in torch backend (#8717)
Signed-off-by: leslie-fang25 <[email protected]>
1 parent fc3b6f5 commit 451959c

File tree

4 files changed

+29
-6
lines changed

4 files changed

+29
-6
lines changed

tensorrt_llm/_torch/pyexecutor/grammar_matcher.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
import torch
77
import xgrammar
88

9-
from ...bindings.executor import GuidedDecodingConfig, GuidedDecodingParams
9+
from tensorrt_llm.llmapi.llm_args import GuidedDecodingConfig
10+
11+
from ...bindings.executor import GuidedDecodingParams
1012

1113

1214
class GrammarMatcher(ABC):

tensorrt_llm/_torch/pyexecutor/guided_decoder.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55

66
import torch
77

8+
from tensorrt_llm.llmapi.llm_args import GuidedDecodingConfig
9+
810
from ..._utils import nvtx_range
9-
from ...bindings.executor import GuidedDecodingConfig, GuidedDecodingParams
11+
from ...bindings.executor import GuidedDecodingParams
1012
from ...bindings.internal.batch_manager import LlmRequestType
1113
from ...logger import logger
1214
from ..hostfunc import hostfunc

tensorrt_llm/_torch/pyexecutor/py_executor_creator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414
import tensorrt_llm
1515
from tensorrt_llm._torch.pyexecutor.resource_manager import ResourceManagerType
1616
from tensorrt_llm._utils import get_sm_version, mpi_disabled
17-
from tensorrt_llm.bindings.executor import GuidedDecodingConfig
1817
from tensorrt_llm.llmapi.llm_args import (CapacitySchedulerPolicy,
19-
ContextChunkingPolicy, LoadFormat,
18+
ContextChunkingPolicy,
19+
GuidedDecodingConfig, LoadFormat,
2020
TorchLlmArgs)
2121
from tensorrt_llm.llmapi.tokenizer import (TokenizerBase,
2222
_llguidance_tokenizer_info,

tensorrt_llm/llmapi/llm_args.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,7 @@
4444
KvCacheConfig as _KvCacheConfig,
4545
LookaheadDecodingConfig as _LookaheadDecodingConfig,
4646
PeftCacheConfig as _PeftCacheConfig,
47-
SchedulerConfig as _SchedulerConfig,
48-
GuidedDecodingConfig as _GuidedDecodingConfig) # isort: skip
47+
SchedulerConfig as _SchedulerConfig) # isort: skip
4948
# isort: on
5049

5150
# yapf: enable
@@ -164,6 +163,26 @@ def _generate_cuda_graph_batch_sizes(max_batch_size: int,
164163
return batch_sizes
165164

166165

166+
class GuidedDecodingConfig(StrictBaseModel):
167+
168+
class GuidedDecodingBackend(Enum):
169+
XGRAMMAR = 0
170+
LLGUIDANCE = 1
171+
172+
backend: GuidedDecodingBackend = Field(
173+
default=GuidedDecodingBackend.XGRAMMAR,
174+
description="The backend for guided decoding config.")
175+
encoded_vocab: Optional[List[str]] = Field(
176+
default=None,
177+
description="The encoded vocab for guided decoding config.")
178+
tokenizer_str: Optional[str] = Field(
179+
default=None,
180+
description="The tokenizer string for guided decoding config.")
181+
stop_token_ids: Optional[List[int]] = Field(
182+
default=None,
183+
description="The stop token ids for guided decoding config.")
184+
185+
167186
class BaseSparseAttentionConfig(StrictBaseModel):
168187
"""
169188
Configuration for sparse attention.

0 commit comments

Comments
 (0)