ai-dynamo · tzulingk · Oct 24, 2025 · Oct 27, 2025
diff --git a/build-local-vllm.sh b/build-local-vllm.sh
@@ -0,0 +1,182 @@
+#!/usr/bin/env bash
+# Build script for Dynamo with local vLLM and/or DeepEP directories
+
+set -e
+
+# Default values
+VLLM_PATH=""
+DEEPEP_PATH=""
+IMAGE_TAG="dynamo-vllm:local"
+BUILD_ARGS=""
+DOCKER_BUILD_ARGS=""
+
+# Color codes for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+# Function to print colored messages
+print_info() {
+    echo -e "${GREEN}[INFO]${NC} $1"
+}
+
+print_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+print_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+# Function to show usage
+usage() {
+    cat << EOF
+Usage: $0 [OPTIONS]
+
+Build Dynamo Docker image with local vLLM and/or DeepEP directories.
+
+OPTIONS:
+    -v, --vllm PATH         Path to local vLLM directory
+    -d, --deepep PATH       Path to local DeepEP directory
+    -t, --tag TAG          Docker image tag (default: dynamo-vllm:local)
+    -b, --build-arg ARG    Additional Docker build arguments (can be used multiple times)
+    -h, --help             Show this help message
+
+EXAMPLES:
+    # Build with local vLLM only
+    $0 --vllm /path/to/vllm
+
+    # Build with local DeepEP only
+    $0 --deepep /path/to/DeepEP
+
+    # Build with both local vLLM and DeepEP
+    $0 --vllm /path/to/vllm --deepep /path/to/DeepEP
+
+    # Build with custom tag
+    $0 --vllm /path/to/vllm --tag my-custom-image:latest
+
+    # Build with additional Docker build args
+    $0 --vllm /path/to/vllm --build-arg MAX_JOBS=8
+
+EOF
+    exit 0
+}
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        -v|--vllm)
+            VLLM_PATH="$2"
+            shift 2
+            ;;
+        -d|--deepep)
+            DEEPEP_PATH="$2"
+            shift 2
+            ;;
+        -t|--tag)
+            IMAGE_TAG="$2"
+            shift 2
+            ;;
+        -b|--build-arg)
+            DOCKER_BUILD_ARGS="${DOCKER_BUILD_ARGS} --build-arg $2"
+            shift 2
+            ;;
+        -h|--help)
+            usage
+            ;;
+        *)
+            print_error "Unknown option: $1"
+            usage
+            ;;
+    esac
+done
+
+# Check if at least one local path is provided
+if [ -z "$VLLM_PATH" ] && [ -z "$DEEPEP_PATH" ]; then
+    print_error "At least one of --vllm or --deepep must be specified"
+    usage
+fi
+
+# Validate provided paths
+if [ -n "$VLLM_PATH" ] && [ ! -d "$VLLM_PATH" ]; then
+    print_error "vLLM path does not exist or is not a directory: $VLLM_PATH"
+    exit 1
+fi
+
+if [ -n "$DEEPEP_PATH" ] && [ ! -d "$DEEPEP_PATH" ]; then
+    print_error "DeepEP path does not exist or is not a directory: $DEEPEP_PATH"
+    exit 1
+fi
+
+# Get the directory of this script
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+# Create a temporary build context directory
+BUILD_CONTEXT=$(mktemp -d)
+print_info "Created temporary build context: $BUILD_CONTEXT"
+
+# Cleanup function
+cleanup() {
+    if [ -n "$BUILD_CONTEXT" ] && [ -d "$BUILD_CONTEXT" ]; then
+        print_info "Cleaning up temporary build context..."
+        rm -rf "$BUILD_CONTEXT"
+    fi
+}
+
+# Set trap to cleanup on exit
+trap cleanup EXIT
+
+# Copy dynamo directory contents to build context (not as subdirectory)
+print_info "Copying dynamo directory to build context..."
+# Use cp -a to preserve everything including hidden files
+cp -a "$SCRIPT_DIR/." "$BUILD_CONTEXT/"
+
+# Copy or link local directories to build context
+if [ -n "$VLLM_PATH" ]; then
+    print_info "Copying local vLLM from: $VLLM_PATH"
+    cp -r "$VLLM_PATH" "$BUILD_CONTEXT/vllm"
+    BUILD_ARGS="${BUILD_ARGS} --build-arg USE_LOCAL_VLLM=true"
+else
+    # Create empty directory to avoid COPY failure
+    mkdir -p "$BUILD_CONTEXT/vllm_placeholder"
+    touch "$BUILD_CONTEXT/vllm_placeholder/.placeholder"
+fi
+
+if [ -n "$DEEPEP_PATH" ]; then
+    print_info "Copying local DeepEP from: $DEEPEP_PATH"
+    cp -r "$DEEPEP_PATH" "$BUILD_CONTEXT/DeepEP"
+    BUILD_ARGS="${BUILD_ARGS} --build-arg USE_LOCAL_DEEPEP=true"
+else
+    # Create empty directory to avoid COPY failure
+    mkdir -p "$BUILD_CONTEXT/DeepEP_placeholder"
+    touch "$BUILD_CONTEXT/DeepEP_placeholder/.placeholder"
+fi
+
+# Build the Docker image
+print_info "Building Docker image with tag: $IMAGE_TAG"
+print_info "Build arguments: $BUILD_ARGS $DOCKER_BUILD_ARGS"
+
+cd "$BUILD_CONTEXT"
+
+# Run Docker build
+if DOCKER_BUILDKIT=1 docker build \
+    $BUILD_ARGS \
+    $DOCKER_BUILD_ARGS \
+    -f container/Dockerfile.vllm \
+    -t "$IMAGE_TAG" \
+    . ; then
+    print_info "Docker image built successfully: $IMAGE_TAG"
+
+    echo
+    print_info "To run the container:"
+    echo "    docker run --gpus all -it $IMAGE_TAG"
+    echo
+    print_info "To run with volume mounts for development:"
+    echo "    docker run --gpus all -it -v /path/to/workspace:/workspace $IMAGE_TAG"
+else
+    print_error "Docker build failed"
+    exit 1
+fi
+
+print_info "Build completed successfully!"
@@ -8,15 +8,20 @@ ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
 # for details and reproducer to manually test if the image
 # can be updated to later versions.
 ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
+ARG RELEASE_BUILD
 ARG ENABLE_KVBM=false
 ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
 ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
 ARG CUDA_VERSION="12.8"
 
 # Make sure to update the dependency version in pyproject.toml when updating this
-ARG VLLM_REF="v0.11.0"
+ARG VLLM_REF="v0.10.2"
+# Set to "true" to use local vLLM directory instead of cloning from git
+ARG USE_LOCAL_VLLM="false"
+# Set to "true" to use local DeepEP directory instead of using vLLM's version
+ARG USE_LOCAL_DEEPEP="false"
 # FlashInfer only respected when building vLLM from source, ie when VLLM_REF does not start with 'v' or for arm64 builds
-ARG FLASHINF_REF="v0.3.1"
+ARG FLASHINF_REF="v0.3.0"
 ARG TORCH_BACKEND="cu128"
 
 # If left blank, then we will fallback to vLLM defaults
@@ -116,6 +121,18 @@ ARG MAX_JOBS=16
 ENV MAX_JOBS=$MAX_JOBS
 ENV CUDA_HOME=/usr/local/cuda
 
+# Copy local vLLM and DeepEP if using local versions
+# Note: These paths are relative to the build context
+# The build script should ensure vllm/ and DeepEP/ directories are available in the build context
+ARG USE_LOCAL_VLLM
+ARG USE_LOCAL_DEEPEP
+# Copy directly to where they would be cloned/installed (same path for both local and GitHub)
+# The wildcard * makes these optional (won't fail if directory doesn't exist)
+RUN if [ "$USE_LOCAL_VLLM" = "true" ]; then mkdir -p /opt; fi
+COPY --chown=root:root vll[m] /opt/
+RUN if [ "$USE_LOCAL_DEEPEP" = "true" ]; then mkdir -p /opt; fi  
+COPY --chown=root:root DeepE[P] /opt/
+
 # Install sccache if requested
 COPY container/use-sccache.sh /tmp/use-sccache.sh
 # Install sccache if requested
@@ -143,7 +160,18 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
     export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
         cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
         chmod +x /tmp/install_vllm.sh && \
-        /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} --torch-backend $TORCH_BACKEND --cuda-version $CUDA_VERSION && \
+        if [ "$USE_LOCAL_VLLM" = "true" ] && [ "$USE_LOCAL_DEEPEP" = "true" ]; then \
+            echo "Using local vLLM from /opt/vllm and local DeepEP from /opt/DeepEP" && \
+            /tmp/install_vllm.sh --editable --use-local-vllm /opt/vllm --use-local-deepep /opt/DeepEP --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} --torch-backend $TORCH_BACKEND --cuda-version $CUDA_VERSION; \
+        elif [ "$USE_LOCAL_VLLM" = "true" ]; then \
+            echo "Using local vLLM from /opt/vllm" && \
+            /tmp/install_vllm.sh --editable --use-local-vllm /opt/vllm --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} --torch-backend $TORCH_BACKEND --cuda-version $CUDA_VERSION; \
+        elif [ "$USE_LOCAL_DEEPEP" = "true" ]; then \
+            echo "Using local DeepEP from /opt/DeepEP" && \
+            /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --use-local-deepep /opt/DeepEP --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} --torch-backend $TORCH_BACKEND --cuda-version $CUDA_VERSION; \
+        else \
+            /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} --torch-backend $TORCH_BACKEND --cuda-version $CUDA_VERSION; \
+        fi && \
         /tmp/use-sccache.sh show-stats "vLLM";
 
 ENV LD_LIBRARY_PATH=\
@@ -241,7 +269,7 @@ $LD_LIBRARY_PATH
 
 # DeepGemm runs nvcc for JIT kernel compilation, however the CUDA include path
 # is not properly set for complilation. Set CPATH to help nvcc find the headers.
-ENV CPATH=/usr/local/cuda/include
+ENV CPATH=/usr/local/cuda/include:$CPATH
 
 ### VIRTUAL ENVIRONMENT SETUP ###
 

@@ -30,6 +30,8 @@ CUDA_VERSION="12.8" # For DEEPGEMM
 EDITABLE=true
 VLLM_GIT_URL="https://github.com/vllm-project/vllm.git"
 FLASHINF_REF="v0.3.1"
+USE_LOCAL_VLLM=""
+USE_LOCAL_DEEPEP=""
 
 while [[ $# -gt 0 ]]; do
     case $1 in
@@ -41,6 +43,14 @@ while [[ $# -gt 0 ]]; do
             EDITABLE=false
             shift
             ;;
+        --use-local-vllm)
+            USE_LOCAL_VLLM="$2"
+            shift 2
+            ;;
+        --use-local-deepep)
+            USE_LOCAL_DEEPEP="$2"
+            shift 2
+            ;;
         --vllm-ref)
             VLLM_REF="$2"
             shift 2
@@ -82,10 +92,12 @@ while [[ $# -gt 0 ]]; do
             shift 2
             ;;
         -h|--help)
-            echo "Usage: $0 [--editable|--no-editable] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF] [--torch-backend BACKEND] [--torch-cuda-arch-list LIST] [--cuda-version VERSION]"
+            echo "Usage: $0 [--editable|--no-editable] [--use-local-vllm PATH] [--use-local-deepep PATH] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF] [--torch-backend BACKEND] [--torch-cuda-arch-list LIST] [--cuda-version VERSION]"
             echo "Options:"
             echo "  --editable        Install vllm in editable mode (default)"
             echo "  --no-editable     Install vllm in non-editable mode"
+            echo "  --use-local-vllm PATH  Use local vLLM directory instead of cloning"
+            echo "  --use-local-deepep PATH  Use local DeepEP directory instead of vLLM's version"
             echo "  --vllm-ref REF    Git reference to checkout (default: ${VLLM_REF})"
             echo "  --max-jobs NUM    Maximum number of parallel jobs (default: ${MAX_JOBS})"
             echo "  --arch ARCH       Architecture (amd64|arm64, default: auto-detect)"
@@ -123,16 +135,32 @@ echo "  MAX_JOBS=$MAX_JOBS | TORCH_BACKEND=$TORCH_BACKEND | CUDA_VERSION=$CUDA_V
 echo "  TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST"
 echo "  DEEPGEMM_REF=$DEEPGEMM_REF | FLASHINF_REF=$FLASHINF_REF"
 echo "  INSTALLATION_DIR=$INSTALLATION_DIR | VLLM_GIT_URL=$VLLM_GIT_URL"
-
-echo "\n=== Cloning vLLM repository ==="
-# We need to clone to install dependencies
-cd $INSTALLATION_DIR
-git clone $VLLM_GIT_URL vllm
-cd vllm
-git checkout $VLLM_REF
-
-# TODO leave this here in case we need to do cherry-picks in future
-# GIT_COMMITTER_NAME="Container Build" GIT_COMMITTER_EMAIL="[email protected]" git cherry-pick 740f064
+echo "  USE_LOCAL_VLLM=$USE_LOCAL_VLLM | USE_LOCAL_DEEPEP=$USE_LOCAL_DEEPEP"
+
+if [ -n "$USE_LOCAL_VLLM" ]; then
+    echo "\n=== Using local vLLM directory ==="
+    echo "Local vLLM at: $USE_LOCAL_VLLM"
+
+    # For Docker builds, vLLM should already be at /opt/vllm
+    # For other uses, we might need to handle it differently
+    if [ ! -d "$USE_LOCAL_VLLM" ]; then
+        echo "Error: Local vLLM directory not found at $USE_LOCAL_VLLM"
+        exit 1
+    fi
+
+    # Just use the local vLLM where it is
+    cd "$USE_LOCAL_VLLM"
+else
+    echo "\n=== Cloning vLLM repository ==="
+    # We need to clone to install dependencies
+    cd $INSTALLATION_DIR
+    git clone $VLLM_GIT_URL vllm
+    cd vllm
+    git checkout $VLLM_REF
+
+    # TODO leave this here in case we need to do cherry-picks in future
+    # GIT_COMMITTER_NAME="Container Build" GIT_COMMITTER_EMAIL="[email protected]" git cherry-pick 740f064
+fi
 
 echo "\n=== Installing vLLM & FlashInfer ==="
 
@@ -239,6 +267,16 @@ echo "✓ DeepGEMM installation completed"
 
 echo "\n=== Installing EP Kernels (PPLX and DeepEP) ==="
 cd ep_kernels/
-TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST" bash install_python_libraries.sh
+
+# Check if the local script exists (when using local vLLM, you might have this script)
+if [ -n "$USE_LOCAL_DEEPEP" ] && [ -f "install_python_libraries_local_clean.sh" ]; then
+    echo "Using local DeepEP with install_python_libraries_local_clean.sh"
+    echo "Local DeepEP source: $USE_LOCAL_DEEPEP"
+    # The local script expects DeepEP at /workspace/DeepEP
+    cp -r "$USE_LOCAL_DEEPEP" /workspace/DeepEP
+    TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST" bash install_python_libraries_local_clean.sh
+else
+    TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST" bash install_python_libraries.sh
+fi
 
 echo "\n✅ All installations completed successfully!"