diff --git a/mbridge/models/qwen3_vl/gpt_model.py b/mbridge/models/qwen3_vl/gpt_model.py index 41a2c9a..234cdd0 100644 --- a/mbridge/models/qwen3_vl/gpt_model.py +++ b/mbridge/models/qwen3_vl/gpt_model.py @@ -95,6 +95,7 @@ def forward( *, inference_params: Optional[BaseInferenceContext] = None, loss_mask: Optional[Tensor] = None, + padding_mask: Optional[Tensor] = None, # args for deepstack visual_pos_masks: Optional[torch.Tensor] = None, deepstack_visual_embeds: Optional[list[torch.Tensor]] = None, @@ -114,13 +115,7 @@ def forward( inference_context, inference_params ) - ( - decoder_input, - rotary_pos_emb, - rotary_pos_cos, - rotary_pos_sin, - sequence_len_offset, - ) = self._preprocess( + preproc_output = self._preprocess( input_ids=input_ids, position_ids=position_ids, decoder_input=decoder_input, @@ -128,6 +123,14 @@ def forward( packed_seq_params=packed_seq_params, ) + ( + decoder_input, + rotary_pos_emb, + rotary_pos_cos, + rotary_pos_sin, + sequence_len_offset, + ) = preproc_output[:5] + # Run decoder. hidden_states = self.decoder( hidden_states=decoder_input,