From 5e869435aeec98186e574e0506f0137d56e496d4 Mon Sep 17 00:00:00 2001 From: BLACKBOX Agent Date: Thu, 20 Nov 2025 18:45:04 +0000 Subject: [PATCH] Fix GitHub Issue #118: Selfhosted vLLM Server (Qwe... --- docker/run-vllm-qwen.sh | 13 +++++++++++++ packages/bytebot-llm-proxy/litellm-config.yaml | 9 +++++++++ 2 files changed, 22 insertions(+) create mode 100755 docker/run-vllm-qwen.sh diff --git a/docker/run-vllm-qwen.sh b/docker/run-vllm-qwen.sh new file mode 100755 index 000000000..6d3989053 --- /dev/null +++ b/docker/run-vllm-qwen.sh @@ -0,0 +1,13 @@ +#!/bin/sh +export HUGGING_FACE_HUB_TOKEN=hf_XXX-XXX-XXX +export CUDA_VISIBLE_DEVICES="0,1,2,3" +docker run \ + --name vllm-qwen-vl \ + --network vllm-qwen-vl \ + --gpus all \ + --runtime=nvidia \ + --ipc=host \ + --rm --init \ + -p 8000:8000 \ + -v /opt/vllm:/root/.cache/huggingface \ + vllm/vllm-openai:latest --model Qwen/Qwen2.5-VL-32B-Instruct --served-model-name "Qwen2.5-VL-32B-Instruct" --tensor-parallel-size 4 --max_model_len 32768 --enable-auto-tool-choice --tool-call-parser qwen \ No newline at end of file diff --git a/packages/bytebot-llm-proxy/litellm-config.yaml b/packages/bytebot-llm-proxy/litellm-config.yaml index ff063c345..bc4c4fee5 100644 --- a/packages/bytebot-llm-proxy/litellm-config.yaml +++ b/packages/bytebot-llm-proxy/litellm-config.yaml @@ -28,3 +28,12 @@ model_list: litellm_params: model: gemini/gemini-2.5-flash api_key: os.environ/GEMINI_API_KEY + + # Self-hosted vLLM Models + - model_name: VM426:Qwen2.5-VL-32B-Instruct + litellm_params: + model: openai/Qwen2.5-VL-32B-Instruct + api_base: https://XXX-XXX-XXX-XXX/v1 + supports_function_calling: true + drop_params: true + temperature: 0.1