Skip to content

Commit e96dda5

Browse files
fix
Signed-off-by: Pamela <[email protected]>
1 parent b45aee6 commit e96dda5

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

tensorrt_llm/_torch/models/modeling_qwen2vl.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44

55
import torch
66
import torch.nn as nn
7-
from transformers import (AutoProcessor, AutoTokenizer, PretrainedConfig,
8-
PreTrainedModel, Qwen2_5_VLForConditionalGeneration,
7+
from transformers import (AutoConfig, AutoProcessor, AutoTokenizer,
8+
PretrainedConfig, PreTrainedModel,
9+
Qwen2_5_VLForConditionalGeneration,
910
Qwen2VLForConditionalGeneration)
1011
from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import \
1112
Qwen2_5_VisionTransformerPretrainedModel
@@ -333,9 +334,10 @@ def __init__(self, model_config: ModelConfig[PretrainedConfig],
333334
# Currently, copying vision encoder on all devices.
334335
# NOTE: Using attn_implementation='flash_attention_2' to avoid the issue of vision model's GPU OOM.
335336
hf_model_config = AutoConfig.from_pretrained(model_path)
336-
vision_model = model_class(config=hf_model_config.vision_config,
337-
torch_dtype=pretrained_config.torch_dtype,
338-
attn_implementation='flash_attention_2')
337+
vision_model = model_class._from_config(
338+
hf_model_config.vision_config,
339+
torch_dtype=pretrained_config.torch_dtype,
340+
attn_implementation='flash_attention_2')
339341
# TODO: Make vision model compatible with meta init mode and load_weights at the same place
340342
self.visual = vision_model.to(self.device)
341343
self.post_config()

0 commit comments

Comments
 (0)