@@ -261,6 +261,7 @@ def test_ascend_mla_metadata_builder_spec_decode(self, mock_get_dcp_size,
261261 new_callable = lambda : MagicMock (spec = GroupCoordinator ))
262262 @patch ("vllm.distributed.get_decode_context_model_parallel_world_size" ,
263263 return_value = 1 )
264+ @patch ("torch.Tensor.pin_memory" , lambda x : x )
264265 def test_ascend_mla_metadata_builder_build_full_graph (
265266 self , mock_get_dcp_size , mock_dcp , mock_get_dcp_group ):
266267 mock_vllm_config = MagicMock ()
@@ -454,6 +455,7 @@ def setUp(self):
454455 "vllm_ascend.attention.mla_v1.get_decode_context_model_parallel_world_size"
455456 )
456457 @patch ("vllm_ascend.attention.mla_v1.get_ascend_config" )
458+ @patch ("torch.Tensor.pin_memory" , lambda x : x )
457459 def test_build_prefix_no_cache_metadata (self , mock_get_ascend_config ,
458460 mock_dcp_world_size ):
459461 if not torch .npu .is_available ():
@@ -506,6 +508,7 @@ def test_build_prefix_no_cache_metadata(self, mock_get_ascend_config,
506508 "vllm_ascend.attention.mla_v1.get_decode_context_model_parallel_world_size"
507509 )
508510 @patch ("vllm_ascend.attention.mla_v1.get_ascend_config" )
511+ @patch ("torch.Tensor.pin_memory" , lambda x : x )
509512 def test_build_chunked_prefix_metadata (self , mock_get_ascend_config ,
510513 mock_dcp_world_size ):
511514 if not torch .npu .is_available ():
@@ -558,6 +561,7 @@ def test_build_chunked_prefix_metadata(self, mock_get_ascend_config,
558561 "vllm_ascend.attention.mla_v1.get_decode_context_model_parallel_world_size"
559562 )
560563 @patch ("vllm_ascend.attention.mla_v1.get_ascend_config" )
564+ @patch ("torch.Tensor.pin_memory" , lambda x : x )
561565 def test_build_decode_only_metadata (self , mock_get_ascend_config ,
562566 mock_dcp_world_size ):
563567 mock_dcp_world_size .return_value = 1
@@ -607,6 +611,7 @@ def test_build_decode_only_metadata(self, mock_get_ascend_config,
607611 "vllm_ascend.attention.mla_v1.get_decode_context_model_parallel_world_size"
608612 )
609613 @patch ("vllm_ascend.attention.mla_v1.get_ascend_config" )
614+ @patch ("torch.Tensor.pin_memory" , lambda x : x )
610615 def test_build_for_graph_capture_decode_only (self , mock_get_ascend_config ,
611616 mock_dcp_world_size ):
612617 mock_dcp_world_size .return_value = 1
@@ -657,6 +662,7 @@ def test_build_for_graph_capture_decode_only(self, mock_get_ascend_config,
657662 "vllm_ascend.attention.mla_v1.get_decode_context_model_parallel_world_size"
658663 )
659664 @patch ("vllm_ascend.attention.mla_v1.get_ascend_config" )
665+ @patch ("torch.Tensor.pin_memory" , lambda x : x )
660666 def test_build_for_graph_capture_prefill (self , mock_get_ascend_config ,
661667 mock_dcp_world_size ):
662668 mock_dcp_world_size .return_value = 1
0 commit comments