diff --git a/tutorials/23-whisper-api-transcription.md b/tutorials/23-whisper-api-transcription.md index a09281916..39951dd2d 100644 --- a/tutorials/23-whisper-api-transcription.md +++ b/tutorials/23-whisper-api-transcription.md @@ -29,6 +29,8 @@ vllm serve \ Create and run a router connected to the Whisper backend: +run-router.sh: + ```bash #!/bin/bash if [[ $# -ne 2 ]]; then @@ -37,21 +39,23 @@ if [[ $# -ne 2 ]]; then fi uv run python3 -m vllm_router.app \ - --host 0.0.0.0 --port "$1" \ - --service-discovery static \ - --static-backends "$2" \ - --static-models "openai/whisper-small" \ - --static-model-types "transcription" \ - --routing-logic roundrobin \ - --log-stats \ - --engine-stats-interval 10 \ - --request-stats-window 10 + --host 0.0.0.0 --port "$1" \ + --service-discovery static \ + --static-backends "$2" \ + --static-models "openai/whisper-small" \ + --static-model-labels "transcription" \ + --routing-logic roundrobin \ + --log-stats \ + --log-level debug \ # log level: "debug", "info", "warning", "error", "critical" + --engine-stats-interval 10 \ + --request-stats-window 10 + --static-backend-health-checks # Enable this flag to make vllm-router check periodically if the models work by sending dummy requests to their endpoints. ``` Example usage: ```bash -./run-router.sh 8000 http://localhost:8002 +./run-router.sh 8000 http://0.0.0.0:8002 ``` ## 3. Sending a Transcription Request