Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/vibevoice-vllm-asr.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ docker run -d --gpus all --name vibevoice-vllm \
-v $(pwd):/app \
-w /app \
--entrypoint bash \
vllm/vllm-openai:latest \
vllm/vllm-openai:v0.14.1 \
-c "python3 /app/vllm_plugin/scripts/start_server.py"
```

Expand Down
38 changes: 32 additions & 6 deletions vllm_plugin/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,13 @@ def _ffmpeg_load_file(filepath) -> tuple[np.ndarray, int]:
return audio, sr

# Register FFmpeg-based audio loader
import vllm.multimodal.audio as _vllm_audio_module
_OriginalAudioMediaIO = _vllm_audio_module.AudioMediaIO
try:
# Try new location (vLLM >= 0.6.x)
from vllm.multimodal.media.audio import AudioMediaIO as _OriginalAudioMediaIO
except ImportError:
# Fall back to old location (vLLM < 0.6.x)
import vllm.multimodal.audio as _vllm_audio_module
_OriginalAudioMediaIO = _vllm_audio_module.AudioMediaIO

class _PatchedAudioMediaIO(_OriginalAudioMediaIO):
"""AudioMediaIO implementation using FFmpeg for audio decoding."""
Expand All @@ -62,11 +67,22 @@ def load_file(self, filepath) -> tuple[np.ndarray, int]:
return _ffmpeg_load_file(filepath)

# Replace globally
_vllm_audio_module.AudioMediaIO = _PatchedAudioMediaIO
try:
# For new vLLM versions
import vllm.multimodal.media.audio as _vllm_audio_module
_vllm_audio_module.AudioMediaIO = _PatchedAudioMediaIO
except ImportError:
# For old vLLM versions
import vllm.multimodal.audio as _vllm_audio_module
_vllm_audio_module.AudioMediaIO = _PatchedAudioMediaIO

# Also patch in utils module where it's imported
import vllm.multimodal.utils as _vllm_utils_module
_vllm_utils_module.AudioMediaIO = _PatchedAudioMediaIO
try:
import vllm.multimodal.utils as _vllm_utils_module
_vllm_utils_module.AudioMediaIO = _PatchedAudioMediaIO
except (ImportError, AttributeError):
# AudioMediaIO might not be imported in utils in newer versions
pass

# ============================================================================

Expand All @@ -91,7 +107,17 @@ def load_file(self, filepath) -> tuple[np.ndarray, int]:
PromptUpdate,
PromptUpdateDetails,
)
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
try:
# Try new location (vLLM >= 0.6.x)
from vllm.multimodal.processing import BaseDummyInputsBuilder, ProcessorInputs
except ImportError:
# Fall back to old location (vLLM < 0.6.x)
try:
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
except ImportError:
# If neither location works, try individual imports
from vllm.multimodal.processing.dummy_inputs import BaseDummyInputsBuilder
from vllm.multimodal.processing.inputs import ProcessorInputs

# Import VibeVoice components
from vibevoice.modular.modular_vibevoice_tokenizer import (
Expand Down