diff --git a/build-local-vllm.sh b/build-local-vllm.sh new file mode 100755 index 0000000000..5e3afe85de --- /dev/null +++ b/build-local-vllm.sh @@ -0,0 +1,182 @@ +#!/usr/bin/env bash +# Build script for Dynamo with local vLLM and/or DeepEP directories + +set -e + +# Default values +VLLM_PATH="" +DEEPEP_PATH="" +IMAGE_TAG="dynamo-vllm:local" +BUILD_ARGS="" +DOCKER_BUILD_ARGS="" + +# Color codes for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Function to print colored messages +print_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +print_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +# Function to show usage +usage() { + cat << EOF +Usage: $0 [OPTIONS] + +Build Dynamo Docker image with local vLLM and/or DeepEP directories. + +OPTIONS: + -v, --vllm PATH Path to local vLLM directory + -d, --deepep PATH Path to local DeepEP directory + -t, --tag TAG Docker image tag (default: dynamo-vllm:local) + -b, --build-arg ARG Additional Docker build arguments (can be used multiple times) + -h, --help Show this help message + +EXAMPLES: + # Build with local vLLM only + $0 --vllm /path/to/vllm + + # Build with local DeepEP only + $0 --deepep /path/to/DeepEP + + # Build with both local vLLM and DeepEP + $0 --vllm /path/to/vllm --deepep /path/to/DeepEP + + # Build with custom tag + $0 --vllm /path/to/vllm --tag my-custom-image:latest + + # Build with additional Docker build args + $0 --vllm /path/to/vllm --build-arg MAX_JOBS=8 + +EOF + exit 0 +} + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + -v|--vllm) + VLLM_PATH="$2" + shift 2 + ;; + -d|--deepep) + DEEPEP_PATH="$2" + shift 2 + ;; + -t|--tag) + IMAGE_TAG="$2" + shift 2 + ;; + -b|--build-arg) + DOCKER_BUILD_ARGS="${DOCKER_BUILD_ARGS} --build-arg $2" + shift 2 + ;; + -h|--help) + usage + ;; + *) + print_error "Unknown option: $1" + usage + ;; + esac +done + +# Check if at least one local path is provided +if [ -z "$VLLM_PATH" ] && [ -z "$DEEPEP_PATH" ]; then + print_error "At least one of --vllm or --deepep must be specified" + usage +fi + +# Validate provided paths +if [ -n "$VLLM_PATH" ] && [ ! -d "$VLLM_PATH" ]; then + print_error "vLLM path does not exist or is not a directory: $VLLM_PATH" + exit 1 +fi + +if [ -n "$DEEPEP_PATH" ] && [ ! -d "$DEEPEP_PATH" ]; then + print_error "DeepEP path does not exist or is not a directory: $DEEPEP_PATH" + exit 1 +fi + +# Get the directory of this script +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# Create a temporary build context directory +BUILD_CONTEXT=$(mktemp -d) +print_info "Created temporary build context: $BUILD_CONTEXT" + +# Cleanup function +cleanup() { + if [ -n "$BUILD_CONTEXT" ] && [ -d "$BUILD_CONTEXT" ]; then + print_info "Cleaning up temporary build context..." + rm -rf "$BUILD_CONTEXT" + fi +} + +# Set trap to cleanup on exit +trap cleanup EXIT + +# Copy dynamo directory contents to build context (not as subdirectory) +print_info "Copying dynamo directory to build context..." +# Use cp -a to preserve everything including hidden files +cp -a "$SCRIPT_DIR/." "$BUILD_CONTEXT/" + +# Copy or link local directories to build context +if [ -n "$VLLM_PATH" ]; then + print_info "Copying local vLLM from: $VLLM_PATH" + cp -r "$VLLM_PATH" "$BUILD_CONTEXT/vllm" + BUILD_ARGS="${BUILD_ARGS} --build-arg USE_LOCAL_VLLM=true" +else + # Create empty directory to avoid COPY failure + mkdir -p "$BUILD_CONTEXT/vllm_placeholder" + touch "$BUILD_CONTEXT/vllm_placeholder/.placeholder" +fi + +if [ -n "$DEEPEP_PATH" ]; then + print_info "Copying local DeepEP from: $DEEPEP_PATH" + cp -r "$DEEPEP_PATH" "$BUILD_CONTEXT/DeepEP" + BUILD_ARGS="${BUILD_ARGS} --build-arg USE_LOCAL_DEEPEP=true" +else + # Create empty directory to avoid COPY failure + mkdir -p "$BUILD_CONTEXT/DeepEP_placeholder" + touch "$BUILD_CONTEXT/DeepEP_placeholder/.placeholder" +fi + +# Build the Docker image +print_info "Building Docker image with tag: $IMAGE_TAG" +print_info "Build arguments: $BUILD_ARGS $DOCKER_BUILD_ARGS" + +cd "$BUILD_CONTEXT" + +# Run Docker build +if DOCKER_BUILDKIT=1 docker build \ + $BUILD_ARGS \ + $DOCKER_BUILD_ARGS \ + -f container/Dockerfile.vllm \ + -t "$IMAGE_TAG" \ + . ; then + print_info "Docker image built successfully: $IMAGE_TAG" + + echo + print_info "To run the container:" + echo " docker run --gpus all -it $IMAGE_TAG" + echo + print_info "To run with volume mounts for development:" + echo " docker run --gpus all -it -v /path/to/workspace:/workspace $IMAGE_TAG" +else + print_error "Docker build failed" + exit 1 +fi + +print_info "Build completed successfully!" diff --git a/container/Dockerfile.vllm b/container/Dockerfile.vllm index 0d4c5b3ce4..f963776bdf 100644 --- a/container/Dockerfile.vllm +++ b/container/Dockerfile.vllm @@ -8,15 +8,20 @@ ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" # for details and reproducer to manually test if the image # can be updated to later versions. ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" +ARG RELEASE_BUILD ARG ENABLE_KVBM=false ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda" ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04" ARG CUDA_VERSION="12.8" # Make sure to update the dependency version in pyproject.toml when updating this -ARG VLLM_REF="v0.11.0" +ARG VLLM_REF="v0.10.2" +# Set to "true" to use local vLLM directory instead of cloning from git +ARG USE_LOCAL_VLLM="false" +# Set to "true" to use local DeepEP directory instead of using vLLM's version +ARG USE_LOCAL_DEEPEP="false" # FlashInfer only respected when building vLLM from source, ie when VLLM_REF does not start with 'v' or for arm64 builds -ARG FLASHINF_REF="v0.3.1" +ARG FLASHINF_REF="v0.3.0" ARG TORCH_BACKEND="cu128" # If left blank, then we will fallback to vLLM defaults @@ -116,6 +121,18 @@ ARG MAX_JOBS=16 ENV MAX_JOBS=$MAX_JOBS ENV CUDA_HOME=/usr/local/cuda +# Copy local vLLM and DeepEP if using local versions +# Note: These paths are relative to the build context +# The build script should ensure vllm/ and DeepEP/ directories are available in the build context +ARG USE_LOCAL_VLLM +ARG USE_LOCAL_DEEPEP +# Copy directly to where they would be cloned/installed (same path for both local and GitHub) +# The wildcard * makes these optional (won't fail if directory doesn't exist) +RUN if [ "$USE_LOCAL_VLLM" = "true" ]; then mkdir -p /opt; fi +COPY --chown=root:root vll[m] /opt/ +RUN if [ "$USE_LOCAL_DEEPEP" = "true" ]; then mkdir -p /opt; fi +COPY --chown=root:root DeepE[P] /opt/ + # Install sccache if requested COPY container/use-sccache.sh /tmp/use-sccache.sh # Install sccache if requested @@ -143,7 +160,18 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \ export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \ cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \ chmod +x /tmp/install_vllm.sh && \ - /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} --torch-backend $TORCH_BACKEND --cuda-version $CUDA_VERSION && \ + if [ "$USE_LOCAL_VLLM" = "true" ] && [ "$USE_LOCAL_DEEPEP" = "true" ]; then \ + echo "Using local vLLM from /opt/vllm and local DeepEP from /opt/DeepEP" && \ + /tmp/install_vllm.sh --editable --use-local-vllm /opt/vllm --use-local-deepep /opt/DeepEP --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} --torch-backend $TORCH_BACKEND --cuda-version $CUDA_VERSION; \ + elif [ "$USE_LOCAL_VLLM" = "true" ]; then \ + echo "Using local vLLM from /opt/vllm" && \ + /tmp/install_vllm.sh --editable --use-local-vllm /opt/vllm --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} --torch-backend $TORCH_BACKEND --cuda-version $CUDA_VERSION; \ + elif [ "$USE_LOCAL_DEEPEP" = "true" ]; then \ + echo "Using local DeepEP from /opt/DeepEP" && \ + /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --use-local-deepep /opt/DeepEP --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} --torch-backend $TORCH_BACKEND --cuda-version $CUDA_VERSION; \ + else \ + /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} --torch-backend $TORCH_BACKEND --cuda-version $CUDA_VERSION; \ + fi && \ /tmp/use-sccache.sh show-stats "vLLM"; ENV LD_LIBRARY_PATH=\ @@ -241,7 +269,7 @@ $LD_LIBRARY_PATH # DeepGemm runs nvcc for JIT kernel compilation, however the CUDA include path # is not properly set for complilation. Set CPATH to help nvcc find the headers. -ENV CPATH=/usr/local/cuda/include +ENV CPATH=/usr/local/cuda/include:$CPATH ### VIRTUAL ENVIRONMENT SETUP ### diff --git a/container/deps/vllm/install_vllm.sh b/container/deps/vllm/install_vllm.sh index 0ebbb58823..46a2f3f1a5 100755 --- a/container/deps/vllm/install_vllm.sh +++ b/container/deps/vllm/install_vllm.sh @@ -30,6 +30,8 @@ CUDA_VERSION="12.8" # For DEEPGEMM EDITABLE=true VLLM_GIT_URL="https://github.com/vllm-project/vllm.git" FLASHINF_REF="v0.3.1" +USE_LOCAL_VLLM="" +USE_LOCAL_DEEPEP="" while [[ $# -gt 0 ]]; do case $1 in @@ -41,6 +43,14 @@ while [[ $# -gt 0 ]]; do EDITABLE=false shift ;; + --use-local-vllm) + USE_LOCAL_VLLM="$2" + shift 2 + ;; + --use-local-deepep) + USE_LOCAL_DEEPEP="$2" + shift 2 + ;; --vllm-ref) VLLM_REF="$2" shift 2 @@ -82,10 +92,12 @@ while [[ $# -gt 0 ]]; do shift 2 ;; -h|--help) - echo "Usage: $0 [--editable|--no-editable] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF] [--torch-backend BACKEND] [--torch-cuda-arch-list LIST] [--cuda-version VERSION]" + echo "Usage: $0 [--editable|--no-editable] [--use-local-vllm PATH] [--use-local-deepep PATH] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF] [--torch-backend BACKEND] [--torch-cuda-arch-list LIST] [--cuda-version VERSION]" echo "Options:" echo " --editable Install vllm in editable mode (default)" echo " --no-editable Install vllm in non-editable mode" + echo " --use-local-vllm PATH Use local vLLM directory instead of cloning" + echo " --use-local-deepep PATH Use local DeepEP directory instead of vLLM's version" echo " --vllm-ref REF Git reference to checkout (default: ${VLLM_REF})" echo " --max-jobs NUM Maximum number of parallel jobs (default: ${MAX_JOBS})" echo " --arch ARCH Architecture (amd64|arm64, default: auto-detect)" @@ -123,16 +135,32 @@ echo " MAX_JOBS=$MAX_JOBS | TORCH_BACKEND=$TORCH_BACKEND | CUDA_VERSION=$CUDA_V echo " TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST" echo " DEEPGEMM_REF=$DEEPGEMM_REF | FLASHINF_REF=$FLASHINF_REF" echo " INSTALLATION_DIR=$INSTALLATION_DIR | VLLM_GIT_URL=$VLLM_GIT_URL" - -echo "\n=== Cloning vLLM repository ===" -# We need to clone to install dependencies -cd $INSTALLATION_DIR -git clone $VLLM_GIT_URL vllm -cd vllm -git checkout $VLLM_REF - -# TODO leave this here in case we need to do cherry-picks in future -# GIT_COMMITTER_NAME="Container Build" GIT_COMMITTER_EMAIL="container@buildkitsandbox.local" git cherry-pick 740f064 +echo " USE_LOCAL_VLLM=$USE_LOCAL_VLLM | USE_LOCAL_DEEPEP=$USE_LOCAL_DEEPEP" + +if [ -n "$USE_LOCAL_VLLM" ]; then + echo "\n=== Using local vLLM directory ===" + echo "Local vLLM at: $USE_LOCAL_VLLM" + + # For Docker builds, vLLM should already be at /opt/vllm + # For other uses, we might need to handle it differently + if [ ! -d "$USE_LOCAL_VLLM" ]; then + echo "Error: Local vLLM directory not found at $USE_LOCAL_VLLM" + exit 1 + fi + + # Just use the local vLLM where it is + cd "$USE_LOCAL_VLLM" +else + echo "\n=== Cloning vLLM repository ===" + # We need to clone to install dependencies + cd $INSTALLATION_DIR + git clone $VLLM_GIT_URL vllm + cd vllm + git checkout $VLLM_REF + + # TODO leave this here in case we need to do cherry-picks in future + # GIT_COMMITTER_NAME="Container Build" GIT_COMMITTER_EMAIL="container@buildkitsandbox.local" git cherry-pick 740f064 +fi echo "\n=== Installing vLLM & FlashInfer ===" @@ -239,6 +267,16 @@ echo "āœ“ DeepGEMM installation completed" echo "\n=== Installing EP Kernels (PPLX and DeepEP) ===" cd ep_kernels/ -TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST" bash install_python_libraries.sh + +# Check if the local script exists (when using local vLLM, you might have this script) +if [ -n "$USE_LOCAL_DEEPEP" ] && [ -f "install_python_libraries_local_clean.sh" ]; then + echo "Using local DeepEP with install_python_libraries_local_clean.sh" + echo "Local DeepEP source: $USE_LOCAL_DEEPEP" + # The local script expects DeepEP at /workspace/DeepEP + cp -r "$USE_LOCAL_DEEPEP" /workspace/DeepEP + TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST" bash install_python_libraries_local_clean.sh +else + TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST" bash install_python_libraries.sh +fi echo "\nāœ… All installations completed successfully!"