diff --git a/tests/e2e/vLLM/configs/vl_w4a16_actorder_group_qwen_vl.yaml b/tests/e2e/vLLM/configs/vl_w4a16_actorder_group_qwen_vl.yaml new file mode 100644 index 000000000..309a8d50a --- /dev/null +++ b/tests/e2e/vLLM/configs/vl_w4a16_actorder_group_qwen_vl.yaml @@ -0,0 +1,10 @@ +cadence: "weekly" +test_type: "regression" +model: Qwen/Qwen2.5-VL-7B-Instruct +model_class: Qwen2_5_VLForConditionalGeneration +recipe: tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_group.yaml +dataset_id: lmms-lab/flickr30k +dataset_config: default +dataset_split: test +scheme: W4A16_actorder_group +save_dir: Qwen2.5-VL-7B-Instruct-v1.0-actorder-group \ No newline at end of file diff --git a/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_channel_quant.yaml b/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_channel_quant.yaml index 8a5302c7f..d944d5c40 100644 --- a/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_channel_quant.yaml +++ b/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_channel_quant.yaml @@ -1,7 +1,7 @@ quant_stage: quant_modifiers: GPTQModifier: - ignore: [lm_head] + ignore: ["lm_head", "re:vision_tower.*", "re:multi_modal_projector.*", "re:visual.*", "re:vision_model.*"] config_groups: group_0: weights: {num_bits: 4, type: int, symmetric: true, strategy: channel, dynamic: false} diff --git a/tests/e2e/vLLM/recipes/WNA16/recipe_w8a16_channel_quant.yaml b/tests/e2e/vLLM/recipes/WNA16/recipe_w8a16_channel_quant.yaml index f7d1b742b..9229a324e 100644 --- a/tests/e2e/vLLM/recipes/WNA16/recipe_w8a16_channel_quant.yaml +++ b/tests/e2e/vLLM/recipes/WNA16/recipe_w8a16_channel_quant.yaml @@ -1,7 +1,7 @@ quant_stage: quant_modifiers: GPTQModifier: - ignore: [lm_head] + ignore: ["lm_head", "re:vision_tower.*", "re:multi_modal_projector.*", "re:visual.*", "re:vision_model.*"] config_groups: group_0: weights: {num_bits: 8, type: int, symmetric: true, strategy: channel, dynamic: false} diff --git a/tests/e2e/vLLM/recipes/WNA16_2of4/2of4_w4a16_group-128_recipe.yaml b/tests/e2e/vLLM/recipes/WNA16_2of4/2of4_w4a16_group-128_recipe.yaml index 92cc85ae7..f5c26e249 100644 --- a/tests/e2e/vLLM/recipes/WNA16_2of4/2of4_w4a16_group-128_recipe.yaml +++ b/tests/e2e/vLLM/recipes/WNA16_2of4/2of4_w4a16_group-128_recipe.yaml @@ -10,7 +10,7 @@ quantization_stage: run_type: oneshot quantization_modifiers: GPTQModifier: - ignore: ["lm_head"] + ignore: ["lm_head", "re:vision_tower.*", "re:multi_modal_projector.*", "re:visual.*", "re:vision_model.*"] config_groups: group_0: weights: diff --git a/tests/e2e/vLLM/recipes/WNA16_2of4/2of4_w4a16_recipe.yaml b/tests/e2e/vLLM/recipes/WNA16_2of4/2of4_w4a16_recipe.yaml index dc7e18b6e..ed7124cfc 100644 --- a/tests/e2e/vLLM/recipes/WNA16_2of4/2of4_w4a16_recipe.yaml +++ b/tests/e2e/vLLM/recipes/WNA16_2of4/2of4_w4a16_recipe.yaml @@ -10,7 +10,7 @@ quantization_stage: run_type: oneshot quantization_modifiers: GPTQModifier: - ignore: ["lm_head"] + ignore: ["lm_head", "re:vision_tower.*", "re:multi_modal_projector.*", "re:visual.*", "re:vision_model.*"] config_groups: group_0: weights: diff --git a/tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_group.yaml b/tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_group.yaml index b9b9db154..0b1aaeee7 100644 --- a/tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_group.yaml +++ b/tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_group.yaml @@ -1,7 +1,7 @@ quant_stage: quant_modifiers: GPTQModifier: - ignore: ["lm_head"] + ignore: ["lm_head", "re:vision_tower.*", "re:multi_modal_projector.*", "re:visual.*", "re:vision_model.*"] config_groups: group_0: weights: diff --git a/tests/e2e/vLLM/recipes/kv_cache/gptq.yaml b/tests/e2e/vLLM/recipes/kv_cache/gptq.yaml index 8c76de33a..5da077903 100644 --- a/tests/e2e/vLLM/recipes/kv_cache/gptq.yaml +++ b/tests/e2e/vLLM/recipes/kv_cache/gptq.yaml @@ -5,7 +5,7 @@ quant_stage: {num_bits: 8, type: float, symmetric: true, strategy: tensor} GPTQModifier: sequential_update: false - ignore: ["lm_head"] + ignore: ["lm_head", "re:vision_tower.*", "re:multi_modal_projector.*", "re:visual.*", "re:vision_model.*"] config_groups: group_0: weights: