Skip to content

Commit 61f4121

Browse files
[Bugfix] Re-enable Gemma3 for V1 (vllm-project#14980)
Signed-off-by: DarkLight1337 <[email protected]>
1 parent 05ccd0a commit 61f4121

File tree

8 files changed

+419
-175
lines changed

8 files changed

+419
-175
lines changed

docs/source/models/supported_models.md

+2-5
Original file line numberDiff line numberDiff line change
@@ -768,7 +768,7 @@ See [this page](#generative-models) for more information on how to use generativ
768768
* `google/gemma-3-4b-it`, `google/gemma-3-27b-it`, etc.
769769
* ✅︎
770770
* ✅︎
771-
*
771+
* ⚠️
772772
- * `GLM4VForCausalLM`<sup>^</sup>
773773
* GLM-4V
774774
* T + I
@@ -951,13 +951,10 @@ V0 correctly implements the model's attention pattern:
951951

952952
V1 currently uses a simplified attention pattern:
953953
- Uses causal attention for all tokens, including image tokens
954-
- Generates reasonable outputs but does not match the original model's attention for text + image inputs
954+
- Generates reasonable outputs but does not match the original model's attention for text + image inputs, especially when `{"do_pan_and_scan": True}`
955955
- Will be updated in the future to support the correct behavior
956-
- Does not support `"do_pan_and_scan": True`
957956

958957
This limitation exists because the model's mixed attention pattern (bidirectional for images, causal otherwise) is not yet supported by vLLM's attention backends.
959-
960-
For these reasons, `Gemma3ForConditionalGeneration` is supported only on V0 at the moment.
961958
:::
962959

963960
:::{note}

tests/multimodal/test_processing.py

+54-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
apply_token_matches,
2020
find_mm_placeholders,
2121
find_text_matches, find_token_matches,
22-
iter_token_matches)
22+
iter_token_matches,
23+
replace_token_matches)
2324
# yapf: enable
2425
from vllm.multimodal.profiling import MultiModalProfiler
2526
from vllm.transformers_utils.tokenizer import (AnyTokenizer,
@@ -89,6 +90,58 @@ def test_iter_token_matches(token_ids, match_ids, expected):
8990
assert all(match_len == len(match_ids) for match_len in match_lens)
9091

9192

93+
# yapf: disable
94+
@pytest.mark.parametrize(
95+
("token_ids", "match_ids", "new_ids", "expected"),
96+
[
97+
([], [], [-1], []),
98+
([], [32000], [-1], []),
99+
(
100+
[32000, 32000, 32000],
101+
[32000],
102+
[-1],
103+
[-1, -1, -1],
104+
),
105+
(
106+
[32000, 32000, 32000],
107+
[32000, 32000],
108+
[-1],
109+
[-1, 32000],
110+
),
111+
(
112+
[32000, 32000, 32000],
113+
[32000, 32000, 32000],
114+
[-1],
115+
[-1],
116+
),
117+
(
118+
[9833, 28747, 32000, 32000, 32000, 9833, 28747, 32000, 32000, 918],
119+
[28747, 32000],
120+
[-1],
121+
[9833, -1, 32000, 32000, 9833, -1, 32000, 918],
122+
),
123+
(
124+
[9833, 28747, 32000, 32000, 32000, 9833, 28747, 32000, 32000, 918],
125+
[28747, 32000, 32000, 32000],
126+
[-1],
127+
[9833, -1, 9833, 28747, 32000, 32000, 918],
128+
),
129+
(
130+
[9833, 28747, 32000, 32000, 32000, 9833, 28747, 32000, 32000, 918],
131+
[28747, 0, 32000],
132+
[-1],
133+
[9833, 28747, 32000, 32000, 32000, 9833, 28747, 32000, 32000, 918],
134+
),
135+
],
136+
)
137+
# yapf: enable
138+
def test_replace_token_matches(token_ids, match_ids, new_ids, expected):
139+
result = replace_token_matches(token_ids, match_ids, new_ids)
140+
141+
# Manually constructed results
142+
assert result == expected
143+
144+
92145
# yapf: disable
93146
@pytest.mark.parametrize(
94147
("prompt", "target_by_key", "expected_by_key"),

0 commit comments

Comments
 (0)