-
Notifications
You must be signed in to change notification settings - Fork 314
Open
Description
I am following installation guide on Windows 11 x64.
vllm serve fails
(.venv) PS D:\Experiments\fara> vllm serve "microsoft/Fara-7B" --port 5000 --dtype auto
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "D:\Experiments\fara\.venv\Scripts\vllm.exe\__main__.py", line 2, in <module>
from vllm.entrypoints.cli.main import main
File "D:\Experiments\fara\.venv\Lib\site-packages\vllm\entrypoints\cli\__init__.py", line 3, in <module>
from vllm.entrypoints.cli.benchmark.latency import BenchmarkLatencySubcommand
File "D:\Experiments\fara\.venv\Lib\site-packages\vllm\entrypoints\cli\benchmark\latency.py", line 5, in <module>
from vllm.benchmarks.latency import add_cli_args, main
File "D:\Experiments\fara\.venv\Lib\site-packages\vllm\benchmarks\latency.py", line 17, in <module>
from vllm.engine.arg_utils import EngineArgs
File "D:\Experiments\fara\.venv\Lib\site-packages\vllm\engine\arg_utils.py", line 35, in <module>
from vllm.attention.backends.registry import AttentionBackendEnum
File "D:\Experiments\fara\.venv\Lib\site-packages\vllm\attention\__init__.py", line 4, in <module>
from vllm.attention.backends.abstract import (
...<3 lines>...
)
File "D:\Experiments\fara\.venv\Lib\site-packages\vllm\attention\backends\abstract.py", line 9, in <module>
from vllm.model_executor.layers.linear import ColumnParallelLinear
File "D:\Experiments\fara\.venv\Lib\site-packages\vllm\model_executor\__init__.py", line 4, in <module>
from vllm.model_executor.parameter import BasevLLMParameter, PackedvLLMParameter
File "D:\Experiments\fara\.venv\Lib\site-packages\vllm\model_executor\parameter.py", line 11, in <module>
from vllm.distributed import (
...<2 lines>...
)
File "D:\Experiments\fara\.venv\Lib\site-packages\vllm\distributed\__init__.py", line 4, in <module>
from .communication_op import *
File "D:\Experiments\fara\.venv\Lib\site-packages\vllm\distributed\communication_op.py", line 9, in <module>
from .parallel_state import get_tp_group
File "D:\Experiments\fara\.venv\Lib\site-packages\vllm\distributed\parallel_state.py", line 250, in <module>
direct_register_custom_op(
~~~~~~~~~~~~~~~~~~~~~~~~~^
op_name="all_reduce",
^^^^^^^^^^^^^^^^^^^^^
op_func=all_reduce,
^^^^^^^^^^^^^^^^^^^
fake_impl=all_reduce_fake,
^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "D:\Experiments\fara\.venv\Lib\site-packages\vllm\utils\torch_utils.py", line 640, in direct_register_custom_op
from vllm.platforms import current_platform
File "D:\Experiments\fara\.venv\Lib\site-packages\vllm\platforms\__init__.py", line 257, in __getattr__
_current_platform = resolve_obj_by_qualname(platform_cls_qualname)()
~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^
File "D:\Experiments\fara\.venv\Lib\site-packages\vllm\utils\import_utils.py", line 89, in resolve_obj_by_qualname
module = importlib.import_module(module_name)
File "C:\Users\rail\AppData\Local\Programs\Python\Python313\Lib\importlib\__init__.py", line 88, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\Experiments\fara\.venv\Lib\site-packages\vllm\platforms\cuda.py", line 16, in <module>
import vllm._C # noqa
^^^^^^^^^^^^^^
ModuleNotFoundError: No module named 'vllm._C'
byuufx and shreyan1999
Metadata
Metadata
Assignees
Labels
No labels