-
Notifications
You must be signed in to change notification settings - Fork 17
Description
启动命令是这个
`#!/bin/bash
model="/llm/models/Qwen3-30B-A3B"
served_model_name="Qwen3-30B-A3B"
export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
export VLLM_WORKER_MULTIPROC_METHOD=spawn
export VLLM_OFFLOAD_WEIGHTS_BEFORE_QUANT=0
export ZE_AFFINITY_MASK=0,1
python3 -m vllm.entrypoints.openai.api_server
--model $MODEL_NAME
--served-model-name model $served_model_name
--dtype=bfloat16
--enforce-eager
--port 8001
--host 0.0.0.0
--trust-remote-code
--gpu-memory-util=0.9
--no-enable-prefix-caching
--max-num-batched-tokens=8192
--disable-log-requests
--max-model-len=1000
--block-size 64
--quantization fp8
-tp=2
~
`
报错如下:
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] WorkerProc failed to start.
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] Traceback (most recent call last):
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/multiproc_executor.py", line 715, in worker_main
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] worker = WorkerProc(*args, **kwargs)
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/multiproc_executor.py", line 555, in init
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] self.worker.load_model()
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/worker/gpu_worker.py", line 273, in load_model
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] self.model_runner.load_model(eep_scale_up=eep_scale_up)
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/worker/gpu_model_runner.py", line 3275, in load_model
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] self.model = model_loader.load_model(
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] ^^^^^^^^^^^^^^^^^^^^^^^^
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/base_loader.py", line 49, in load_model
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] model = initialize_model(
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] ^^^^^^^^^^^^^^^^^
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/utils.py", line 57, in initialize_model
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] return model_class(vllm_config=vllm_config, prefix=prefix)
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/qwen3_moe.py", line 741, in init
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] self.model = Qwen3MoeModel(
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] ^^^^^^^^^^^^^^
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] File "/usr/local/lib/python3.12/dist-packages/vllm/compilation/decorators.py", line 276, in init
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] old_init(self, **kwargs)
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/qwen3_moe.py", line 420, in init
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] self.start_layer, self.end_layer, self.layers = make_layers(
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] ^^^^^^^^^^^^
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 651, in make_layers
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] maybe_offload_to_cpu(layer_fn(prefix=f"{prefix}.{idx}"))
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/qwen3_moe.py", line 422, in
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] lambda prefix: Qwen3MoeDecoderLayer(vllm_config=vllm_config, prefix=prefix),
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/qwen3_moe.py", line 360, in init
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] self.mlp = Qwen3MoeSparseMoeBlock(
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] ^^^^^^^^^^^^^^^^^^^^^^^
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/qwen3_moe.py", line 164, in init
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] self.experts = FusedMoE(
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] ^^^^^^^^^
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/layers/fused_moe/layer.py", line 613, in init
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] self.quant_method.create_weights(layer=self, **moe_quant_params)
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/layers/quantization/ipex_quant.py", line 463, in create_weights
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] torch.ones(num_experts, 2, dtype=torch.float32), requires_grad=False
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] File "/usr/local/lib/python3.12/dist-packages/torch/utils/_device.py", line 103, in torch_function
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] return func(*args, **kwargs)
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] ^^^^^^^^^^^^^^^^^^^^^
(Worker_TP1 pid=8317) ERROR 02-03 09:24:09 [multiproc_executor.py:743] RuntimeError: UR backend failed. UR backend returns:40 (UR_RESULT_ERROR_OUT_OF_RESOURCES)
(Worker_TP0 pid=8316) INFO 02-03 09:24:09 [multiproc_executor.py:702] Parent process exited, terminating worker
(Worker_TP1 pid=8317) INFO 02-03 09:24:09 [multiproc_executor.py:702] Parent process exited, terminating worker
(EngineCore_DP0 pid=8243) ERROR 02-03 09:24:13 [core.py:842] EngineCore failed to start.
(EngineCore_DP0 pid=8243) ERROR 02-03 09:24:13 [core.py:842] Traceback (most recent call last):
(EngineCore_DP0 pid=8243) ERROR 02-03 09:24:13 [core.py:842] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 833, in run_engine_core
(EngineCore_DP0 pid=8243) ERROR 02-03 09:24:13 [core.py:842] engine_core = EngineCoreProc(*args, **kwargs)
(EngineCore_DP0 pid=8243) ERROR 02-03 09:24:13 [core.py:842] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=8243) ERROR 02-03 09:24:13 [core.py:842] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 606, in init
(EngineCore_DP0 pid=8243) ERROR 02-03 09:24:13 [core.py:842] super().init(
(EngineCore_DP0 pid=8243) ERROR 02-03 09:24:13 [core.py:842] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 102, in init
(EngineCore_DP0 pid=8243) ERROR 02-03 09:24:13 [core.py:842] self.model_executor = executor_class(vllm_config)
(EngineCore_DP0 pid=8243) ERROR 02-03 09:24:13 [core.py:842] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=8243) ERROR 02-03 09:24:13 [core.py:842] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/multiproc_executor.py", line 96, in init
(EngineCore_DP0 pid=8243) ERROR 02-03 09:24:13 [core.py:842] super().init(vllm_config)
(EngineCore_DP0 pid=8243) ERROR 02-03 09:24:13 [core.py:842] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/abstract.py", line 101, in init
(EngineCore_DP0 pid=8243) ERROR 02-03 09:24:13 [core.py:842] self._init_executor()
(EngineCore_DP0 pid=8243) ERROR 02-03 09:24:13 [core.py:842] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/multiproc_executor.py", line 171, in _init_executor
(EngineCore_DP0 pid=8243) ERROR 02-03 09:24:13 [core.py:842] self.workers = WorkerProc.wait_for_ready(unready_workers)
(EngineCore_DP0 pid=8243) ERROR 02-03 09:24:13 [core.py:842] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=8243) ERROR 02-03 09:24:13 [core.py:842] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/multiproc_executor.py", line 653, in wait_for_ready
(EngineCore_DP0 pid=8243) ERROR 02-03 09:24:13 [core.py:842] raise e from None
(EngineCore_DP0 pid=8243) ERROR 02-03 09:24:13 [core.py:842] Exception: WorkerProc initialization failed due to an exception in a background process. See stack trace for root cause.
(EngineCore_DP0 pid=8243) Process EngineCore_DP0:
(EngineCore_DP0 pid=8243) Traceback (most recent call last):
(EngineCore_DP0 pid=8243) File "/usr/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap
(EngineCore_DP0 pid=8243) self.run()
(EngineCore_DP0 pid=8243) File "/usr/lib/python3.12/multiprocessing/process.py", line 108, in run
(EngineCore_DP0 pid=8243) self._target(*self._args, **self._kwargs)
(EngineCore_DP0 pid=8243) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 846, in run_engine_core
(EngineCore_DP0 pid=8243) raise e
(EngineCore_DP0 pid=8243) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 833, in run_engine_core
(EngineCore_DP0 pid=8243) engine_core = EngineCoreProc(*args, **kwargs)
(EngineCore_DP0 pid=8243) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=8243) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 606, in init
(EngineCore_DP0 pid=8243) super().init(
(EngineCore_DP0 pid=8243) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 102, in init
(EngineCore_DP0 pid=8243) self.model_executor = executor_class(vllm_config)
(EngineCore_DP0 pid=8243) ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=8243) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/multiproc_executor.py", line 96, in init
(EngineCore_DP0 pid=8243) super().init(vllm_config)
(EngineCore_DP0 pid=8243) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/abstract.py", line 101, in init
(EngineCore_DP0 pid=8243) self._init_executor()
(EngineCore_DP0 pid=8243) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/multiproc_executor.py", line 171, in _init_executor
(EngineCore_DP0 pid=8243) self.workers = WorkerProc.wait_for_ready(unready_workers)
(EngineCore_DP0 pid=8243) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=8243) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/multiproc_executor.py", line 653, in wait_for_ready
(EngineCore_DP0 pid=8243) raise e from None
(EngineCore_DP0 pid=8243) Exception: WorkerProc initialization failed due to an exception in a background process. See stack trace for root cause.
(APIServer pid=8167) Traceback (most recent call last):
(APIServer pid=8167) File "", line 198, in _run_module_as_main
(APIServer pid=8167) File "", line 88, in _run_code
(APIServer pid=8167) File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 2096, in
(APIServer pid=8167) uvloop.run(run_server(args))
(APIServer pid=8167) File "/usr/local/lib/python3.12/dist-packages/uvloop/init.py", line 96, in run
(APIServer pid=8167) return __asyncio.run(
(APIServer pid=8167) ^^^^^^^^^^^^^^
(APIServer pid=8167) File "/usr/lib/python3.12/asyncio/runners.py", line 194, in run
(APIServer pid=8167) return runner.run(main)
(APIServer pid=8167) ^^^^^^^^^^^^^^^^
(APIServer pid=8167) File "/usr/lib/python3.12/asyncio/runners.py", line 118, in run
(APIServer pid=8167) return self._loop.run_until_complete(task)
(APIServer pid=8167) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=8167) File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
(APIServer pid=8167) File "/usr/local/lib/python3.12/dist-packages/uvloop/init.py", line 48, in wrapper
(APIServer pid=8167) return await main
(APIServer pid=8167) ^^^^^^^^^^
(APIServer pid=8167) File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 2024, in run_server
(APIServer pid=8167) await run_server_worker(listen_address, sock, args, **uvicorn_kwargs)
(APIServer pid=8167) File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 2043, in run_server_worker
(APIServer pid=8167) async with build_async_engine_client(
(APIServer pid=8167) File "/usr/lib/python3.12/contextlib.py", line 210, in aenter
(APIServer pid=8167) return await anext(self.gen)
(APIServer pid=8167) ^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=8167) File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 195, in build_async_engine_client
(APIServer pid=8167) async with build_async_engine_client_from_engine_args(
(APIServer pid=8167) File "/usr/lib/python3.12/contextlib.py", line 210, in aenter
(APIServer pid=8167) return await anext(self.gen)
(APIServer pid=8167) ^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=8167) File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 236, in build_async_engine_client_from_engine_args
(APIServer pid=8167) async_llm = AsyncLLM.from_vllm_config(
(APIServer pid=8167) ^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=8167) File "/usr/local/lib/python3.12/dist-packages/vllm/utils/func_utils.py", line 116, in inner
(APIServer pid=8167) return fn(*args, **kwargs)
(APIServer pid=8167) ^^^^^^^^^^^^^^^^^^^
(APIServer pid=8167) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/async_llm.py", line 203, in from_vllm_config
(APIServer pid=8167) return cls(
(APIServer pid=8167) ^^^^
(APIServer pid=8167) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/async_llm.py", line 133, in init
(APIServer pid=8167) self.engine_core = EngineCoreClient.make_async_mp_client(
(APIServer pid=8167) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=8167) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core_client.py", line 121, in make_async_mp_client
(APIServer pid=8167) return AsyncMPClient(*client_args)
(APIServer pid=8167) ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=8167) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core_client.py", line 808, in init
(APIServer pid=8167) super().init(
(APIServer pid=8167) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core_client.py", line 469, in init
(APIServer pid=8167) with launch_core_engines(vllm_config, executor_class, log_stats) as (
(APIServer pid=8167) File "/usr/lib/python3.12/contextlib.py", line 144, in exit
(APIServer pid=8167) next(self.gen)
(APIServer pid=8167) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/utils.py", line 907, in launch_core_engines
(APIServer pid=8167) wait_for_engine_startup(
(APIServer pid=8167) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/utils.py", line 964, in wait_for_engine_startup
(APIServer pid=8167) raise RuntimeError(
(APIServer pid=8167) RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {}
root@test-MS-WorkStation-W790:/llm/llm-server# /usr/lib/python3.12/multiprocessing/resource_tracker.py:254: UserWarning: resource_tracker: There appear to be 1 leaked shared_memory objects to clean up at shutdown
warnings.warn('resource_tracker: There appear to be %d '
请问可以帮忙看看吗?