aws-neuron
diff --git a/‎docs/source/models/supported_models.md
+2-5 b/‎docs/source/models/supported_models.md
+2-5
diff --git a/‎tests/multimodal/test_processing.py
+54-1 b/‎tests/multimodal/test_processing.py
+54-1
@@ -768,7 +768,7 @@ See [this page](#generative-models) for more information on how to use generativ
   * `google/gemma-3-4b-it`, `google/gemma-3-27b-it`, etc.
   * ✅︎
   * ✅︎
-  *
+  * ⚠️
 - * `GLM4VForCausalLM`<sup>^</sup>
   * GLM-4V
   * T + I
@@ -951,13 +951,10 @@ V0 correctly implements the model's attention pattern:
 
 V1 currently uses a simplified attention pattern:
 - Uses causal attention for all tokens, including image tokens
-- Generates reasonable outputs but does not match the original model's attention for text + image inputs
+- Generates reasonable outputs but does not match the original model's attention for text + image inputs, especially when `{"do_pan_and_scan": True}`
 - Will be updated in the future to support the correct behavior
-- Does not support `"do_pan_and_scan": True`
 
 This limitation exists because the model's mixed attention pattern (bidirectional for images, causal otherwise) is not yet supported by vLLM's attention backends.
-
-For these reasons, `Gemma3ForConditionalGeneration` is supported only on V0 at the moment.
 :::
 
 :::{note}
 
@@ -19,7 +19,8 @@
                                         apply_token_matches,
                                         find_mm_placeholders,
                                         find_text_matches, find_token_matches,
-                                        iter_token_matches)
+                                        iter_token_matches,
+                                        replace_token_matches)
 # yapf: enable
 from vllm.multimodal.profiling import MultiModalProfiler
 from vllm.transformers_utils.tokenizer import (AnyTokenizer,
@@ -89,6 +90,58 @@ def test_iter_token_matches(token_ids, match_ids, expected):
     assert all(match_len == len(match_ids) for match_len in match_lens)
 
 
+# yapf: disable
+@pytest.mark.parametrize(
+    ("token_ids", "match_ids", "new_ids", "expected"),
+    [
+        ([], [], [-1], []),
+        ([], [32000], [-1], []),
+        (
+            [32000, 32000, 32000],
+            [32000],
+            [-1],
+            [-1, -1, -1],
+        ),
+        (
+            [32000, 32000, 32000],
+            [32000, 32000],
+            [-1],
+            [-1, 32000],
+        ),
+        (
+            [32000, 32000, 32000],
+            [32000, 32000, 32000],
+            [-1],
+            [-1],
+        ),
+        (
+            [9833, 28747, 32000, 32000, 32000, 9833, 28747, 32000, 32000, 918],
+            [28747, 32000],
+            [-1],
+            [9833, -1, 32000, 32000, 9833, -1, 32000, 918],
+        ),
+        (
+            [9833, 28747, 32000, 32000, 32000, 9833, 28747, 32000, 32000, 918],
+            [28747, 32000, 32000, 32000],
+            [-1],
+            [9833, -1, 9833, 28747, 32000, 32000, 918],
+        ),
+        (
+            [9833, 28747, 32000, 32000, 32000, 9833, 28747, 32000, 32000, 918],
+            [28747, 0, 32000],
+            [-1],
+            [9833, 28747, 32000, 32000, 32000, 9833, 28747, 32000, 32000, 918],
+        ),
+    ],
+)
+# yapf: enable
+def test_replace_token_matches(token_ids, match_ids, new_ids, expected):
+    result = replace_token_matches(token_ids, match_ids, new_ids)
+
+    # Manually constructed results
+    assert result == expected
+
+
 # yapf: disable
 @pytest.mark.parametrize(
     ("prompt", "target_by_key", "expected_by_key"),