Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .github/workflows/vllm_ascend_test_pr_light.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,8 @@ jobs:
TORCH_DEVICE_BACKEND_AUTOLOAD: 0
run: |
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
# Remove these ignores after multimodal refactor is done
pytest -sv --cov --cov-report=xml:unittests-coverage.xml tests/ut \
--ignore tests/ut/torchair/models/test_torchair_deepseek_mtp.py \
--ignore tests/ut/torchair/models/test_torchair_deepseek_v2.py \
--ignore tests/ut/models/test_qwen2_vl.py \
--ignore tests/ut/models/test_qwen2_5_vl.py \
--ignore tests/ut/models/test_qwen2_5_vl_without_padding.py
Expand Down
29 changes: 23 additions & 6 deletions tests/ut/attention/test_mla_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,12 +454,20 @@ def setUp(self):
"vllm_ascend.attention.mla_v1.get_decode_context_model_parallel_world_size"
)
@patch("vllm_ascend.attention.mla_v1.get_ascend_config")
def test_build_prefix_no_cache_metadata(self, mock_get_ascend_config,
@patch("vllm_ascend.attention.mla_v1.torch.zeros", wraps=torch.zeros)
@patch("torch.Tensor.npu", new=lambda self: self)
@patch("torch.npu.is_available")
def test_build_prefix_no_cache_metadata(self, mock_npu_available,
mock_zeros, mock_get_ascend_config,
mock_dcp_world_size):
if not torch.npu.is_available():
self.skipTest("NPU not available, skipping NPU-dependent tests")
mock_npu_available.return_value = False
mock_dcp_world_size.return_value = 1

def zeros_override(*args, **kwargs):
kwargs.pop('pin_memory', None)
return mock_zeros._mock_wraps(*args, **kwargs)

mock_zeros.side_effect = zeros_override
common_attn_metadata = AscendCommonAttentionMetadata(
query_start_loc=torch.tensor([0, 3, 7]),
query_start_loc_cpu=torch.tensor([0, 3, 7]),
Expand Down Expand Up @@ -506,12 +514,21 @@ def test_build_prefix_no_cache_metadata(self, mock_get_ascend_config,
"vllm_ascend.attention.mla_v1.get_decode_context_model_parallel_world_size"
)
@patch("vllm_ascend.attention.mla_v1.get_ascend_config")
def test_build_chunked_prefix_metadata(self, mock_get_ascend_config,
@patch("vllm_ascend.attention.mla_v1.torch.zeros", wraps=torch.zeros)
@patch("torch.Tensor.npu", new=lambda self: self)
@patch("torch.npu.is_available")
def test_build_chunked_prefix_metadata(self, mock_npu_available,
mock_zeros, mock_get_ascend_config,
mock_dcp_world_size):
if not torch.npu.is_available():
self.skipTest("NPU not available, skipping NPU-dependent tests")
mock_npu_available.return_value = False
mock_dcp_world_size.return_value = 1

def zeros_override(*args, **kwargs):
kwargs.pop('pin_memory', None)
return mock_zeros._mock_wraps(*args, **kwargs)

mock_zeros.side_effect = zeros_override

common_attn_metadata = AscendCommonAttentionMetadata(
query_start_loc=torch.tensor([0, 2, 5, 9]),
query_start_loc_cpu=torch.tensor([0, 2, 5, 9]),
Expand Down
Loading