22# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33from itertools import repeat
44from typing import Any
5+ import os
56
67import pytest
78import torch ._dynamo .config as dynamo_config
@@ -169,10 +170,11 @@ def run_test(
169170 spec_config : dict [str , Any ] | None ,
170171 test_prefill_chunking : bool ,
171172):
173+ os .environ ['VLLM_WORKER_MULTIPROC_METHOD' ] = 'spawn'
172174 spec_decoding = spec_config is not None
173175 cache_arg : dict [str , Any ] = (
174176 # Force preemptions
175- dict (num_gpu_blocks_override = 32 ) if test_preemption else dict (
177+ dict (num_gpu_blocks_override = 2 ) if test_preemption else dict (
176178 gpu_memory_utilization = 0.9 ))
177179 spec_mml = (spec_config or {}).get ("max_model_len" )
178180 test_config = (f"executor={ executor } , preemption={ test_preemption } , "
@@ -199,7 +201,7 @@ def run_test(
199201 results = []
200202 acceptance_rates : list [float ] | None = [] if spec_decoding else None
201203 for override_params in sampling_param_tests :
202- metrics_before = vllm_model .llm .get_metrics ()
204+ metrics_before = vllm_model .model .get_metrics ()
203205 print (f"----------- RUNNING PARAMS: { override_params } " )
204206 results .append (
205207 vllm_model .generate (
@@ -208,7 +210,7 @@ def run_test(
208210 ** override_params ),
209211 return_logprobs = True ,
210212 ))
211- metrics_after = vllm_model .llm .get_metrics ()
213+ metrics_after = vllm_model .model .get_metrics ()
212214 if acceptance_rates is not None :
213215 acceptance_rate = _get_acceptance_rate (metrics_before ,
214216 metrics_after )
0 commit comments