|
10 | 10 | import pytest |
11 | 11 | import yaml |
12 | 12 | from defs.conftest import skip_no_hopper |
13 | | -from defs.disaggregated.test_disaggregated_single_gpu import \ |
14 | | - model_path as get_model_path |
15 | 13 | from defs.trt_test_alternative import popen |
16 | 14 | from transformers import AutoTokenizer |
17 | 15 |
|
@@ -206,13 +204,10 @@ def __init__(self, |
206 | 204 | gen_servers: List[str], |
207 | 205 | req_timeout_secs: int = DEFAULT_TIMEOUT_REQUEST, |
208 | 206 | server_start_timeout_secs: int = DEFAULT_TIMEOUT_SERVER_START, |
209 | | - model_name: str = "TinyLlama/TinyLlama-1.1B-Chat-v1.0", |
210 | | - model_path: Optional[str] = None): |
| 207 | + model_name: str = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"): |
211 | 208 | super().__init__(ctx_servers, gen_servers, req_timeout_secs, |
212 | 209 | server_start_timeout_secs) |
213 | | - if model_path is None: |
214 | | - model_path = get_model_path(model_name) |
215 | | - self.tokenizer = AutoTokenizer.from_pretrained(model_path) |
| 210 | + self.tokenizer = AutoTokenizer.from_pretrained(model_name) |
216 | 211 | self.model_name = model_name |
217 | 212 | self.kv_cache_block_maps: dict[str, KvCacheAwareServerState] = {} |
218 | 213 | self.kv_cache_event_maps: dict[str, list[dict]] = {} |
@@ -489,6 +484,7 @@ def load_default_prompts(disaggregated_example_root: str): |
489 | 484 | @contextlib.contextmanager |
490 | 485 | def background_workers(llm_venv, config_file: str, num_ranks: int = None): |
491 | 486 | cwd = llm_venv.get_working_directory() |
| 487 | + os.chdir(cwd) |
492 | 488 | with open(os.path.join(cwd, 'output_workers.log'), 'w+') as log_file: |
493 | 489 | workers_proc, ctx_servers, gen_servers = run_disaggregated_workers( |
494 | 490 | config_file=config_file, |
@@ -603,7 +599,6 @@ def test_workers_kv_cache_aware_router_deepseek_v3_lite_bf16( |
603 | 599 |
|
604 | 600 | with background_workers(llm_venv, config_file, |
605 | 601 | 4) as (ctx_servers, gen_servers): |
606 | | - os.chdir(llm_venv.get_working_directory()) |
607 | 602 | tester = KvCacheAwareRouterTester(ctx_servers, |
608 | 603 | gen_servers, |
609 | 604 | model_name="DeepSeek-V3-Lite/bf16", |
|
0 commit comments