Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 182 additions & 0 deletions build-local-vllm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
#!/usr/bin/env bash
# Build script for Dynamo with local vLLM and/or DeepEP directories

set -e

# Default values
VLLM_PATH=""
DEEPEP_PATH=""
IMAGE_TAG="dynamo-vllm:local"
BUILD_ARGS=""
DOCKER_BUILD_ARGS=""

# Color codes for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

# Function to print colored messages
print_info() {
echo -e "${GREEN}[INFO]${NC} $1"
}

print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}

print_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}

# Function to show usage
usage() {
cat << EOF
Usage: $0 [OPTIONS]

Build Dynamo Docker image with local vLLM and/or DeepEP directories.

OPTIONS:
-v, --vllm PATH Path to local vLLM directory
-d, --deepep PATH Path to local DeepEP directory
-t, --tag TAG Docker image tag (default: dynamo-vllm:local)
-b, --build-arg ARG Additional Docker build arguments (can be used multiple times)
-h, --help Show this help message

EXAMPLES:
# Build with local vLLM only
$0 --vllm /path/to/vllm

# Build with local DeepEP only
$0 --deepep /path/to/DeepEP

# Build with both local vLLM and DeepEP
$0 --vllm /path/to/vllm --deepep /path/to/DeepEP

# Build with custom tag
$0 --vllm /path/to/vllm --tag my-custom-image:latest

# Build with additional Docker build args
$0 --vllm /path/to/vllm --build-arg MAX_JOBS=8

EOF
exit 0
}

# Parse command line arguments
while [[ $# -gt 0 ]]; do
case $1 in
-v|--vllm)
VLLM_PATH="$2"
shift 2
;;
-d|--deepep)
DEEPEP_PATH="$2"
shift 2
;;
-t|--tag)
IMAGE_TAG="$2"
shift 2
;;
-b|--build-arg)
DOCKER_BUILD_ARGS="${DOCKER_BUILD_ARGS} --build-arg $2"
shift 2
;;
-h|--help)
usage
;;
*)
print_error "Unknown option: $1"
usage
;;
esac
done

# Check if at least one local path is provided
if [ -z "$VLLM_PATH" ] && [ -z "$DEEPEP_PATH" ]; then
print_error "At least one of --vllm or --deepep must be specified"
usage
fi

# Validate provided paths
if [ -n "$VLLM_PATH" ] && [ ! -d "$VLLM_PATH" ]; then
print_error "vLLM path does not exist or is not a directory: $VLLM_PATH"
exit 1
fi

if [ -n "$DEEPEP_PATH" ] && [ ! -d "$DEEPEP_PATH" ]; then
print_error "DeepEP path does not exist or is not a directory: $DEEPEP_PATH"
exit 1
fi

# Get the directory of this script
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

# Create a temporary build context directory
BUILD_CONTEXT=$(mktemp -d)
print_info "Created temporary build context: $BUILD_CONTEXT"

# Cleanup function
cleanup() {
if [ -n "$BUILD_CONTEXT" ] && [ -d "$BUILD_CONTEXT" ]; then
print_info "Cleaning up temporary build context..."
rm -rf "$BUILD_CONTEXT"
fi
}

# Set trap to cleanup on exit
trap cleanup EXIT

# Copy dynamo directory contents to build context (not as subdirectory)
print_info "Copying dynamo directory to build context..."
# Use cp -a to preserve everything including hidden files
cp -a "$SCRIPT_DIR/." "$BUILD_CONTEXT/"

# Copy or link local directories to build context
if [ -n "$VLLM_PATH" ]; then
print_info "Copying local vLLM from: $VLLM_PATH"
cp -r "$VLLM_PATH" "$BUILD_CONTEXT/vllm"
BUILD_ARGS="${BUILD_ARGS} --build-arg USE_LOCAL_VLLM=true"
else
# Create empty directory to avoid COPY failure
mkdir -p "$BUILD_CONTEXT/vllm_placeholder"
touch "$BUILD_CONTEXT/vllm_placeholder/.placeholder"
fi

if [ -n "$DEEPEP_PATH" ]; then
print_info "Copying local DeepEP from: $DEEPEP_PATH"
cp -r "$DEEPEP_PATH" "$BUILD_CONTEXT/DeepEP"
BUILD_ARGS="${BUILD_ARGS} --build-arg USE_LOCAL_DEEPEP=true"
else
# Create empty directory to avoid COPY failure
mkdir -p "$BUILD_CONTEXT/DeepEP_placeholder"
touch "$BUILD_CONTEXT/DeepEP_placeholder/.placeholder"
fi

# Build the Docker image
print_info "Building Docker image with tag: $IMAGE_TAG"
print_info "Build arguments: $BUILD_ARGS $DOCKER_BUILD_ARGS"

cd "$BUILD_CONTEXT"

# Run Docker build
if DOCKER_BUILDKIT=1 docker build \
$BUILD_ARGS \
$DOCKER_BUILD_ARGS \
-f container/Dockerfile.vllm \
-t "$IMAGE_TAG" \
. ; then
print_info "Docker image built successfully: $IMAGE_TAG"

echo
print_info "To run the container:"
echo " docker run --gpus all -it $IMAGE_TAG"
echo
print_info "To run with volume mounts for development:"
echo " docker run --gpus all -it -v /path/to/workspace:/workspace $IMAGE_TAG"
else
print_error "Docker build failed"
exit 1
fi

print_info "Build completed successfully!"
36 changes: 32 additions & 4 deletions container/Dockerfile.vllm
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,20 @@ ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# for details and reproducer to manually test if the image
# can be updated to later versions.
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
ARG RELEASE_BUILD
ARG ENABLE_KVBM=false
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
ARG CUDA_VERSION="12.8"

# Make sure to update the dependency version in pyproject.toml when updating this
ARG VLLM_REF="v0.11.0"
ARG VLLM_REF="v0.10.2"
# Set to "true" to use local vLLM directory instead of cloning from git
ARG USE_LOCAL_VLLM="false"
# Set to "true" to use local DeepEP directory instead of using vLLM's version
ARG USE_LOCAL_DEEPEP="false"
# FlashInfer only respected when building vLLM from source, ie when VLLM_REF does not start with 'v' or for arm64 builds
ARG FLASHINF_REF="v0.3.1"
ARG FLASHINF_REF="v0.3.0"
ARG TORCH_BACKEND="cu128"

# If left blank, then we will fallback to vLLM defaults
Expand Down Expand Up @@ -116,6 +121,18 @@ ARG MAX_JOBS=16
ENV MAX_JOBS=$MAX_JOBS
ENV CUDA_HOME=/usr/local/cuda

# Copy local vLLM and DeepEP if using local versions
# Note: These paths are relative to the build context
# The build script should ensure vllm/ and DeepEP/ directories are available in the build context
ARG USE_LOCAL_VLLM
ARG USE_LOCAL_DEEPEP
# Copy directly to where they would be cloned/installed (same path for both local and GitHub)
# The wildcard * makes these optional (won't fail if directory doesn't exist)
RUN if [ "$USE_LOCAL_VLLM" = "true" ]; then mkdir -p /opt; fi
COPY --chown=root:root vll[m] /opt/
RUN if [ "$USE_LOCAL_DEEPEP" = "true" ]; then mkdir -p /opt; fi
COPY --chown=root:root DeepE[P] /opt/

# Install sccache if requested
COPY container/use-sccache.sh /tmp/use-sccache.sh
# Install sccache if requested
Expand Down Expand Up @@ -143,7 +160,18 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
chmod +x /tmp/install_vllm.sh && \
/tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} --torch-backend $TORCH_BACKEND --cuda-version $CUDA_VERSION && \
if [ "$USE_LOCAL_VLLM" = "true" ] && [ "$USE_LOCAL_DEEPEP" = "true" ]; then \
echo "Using local vLLM from /opt/vllm and local DeepEP from /opt/DeepEP" && \
/tmp/install_vllm.sh --editable --use-local-vllm /opt/vllm --use-local-deepep /opt/DeepEP --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} --torch-backend $TORCH_BACKEND --cuda-version $CUDA_VERSION; \
elif [ "$USE_LOCAL_VLLM" = "true" ]; then \
echo "Using local vLLM from /opt/vllm" && \
/tmp/install_vllm.sh --editable --use-local-vllm /opt/vllm --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} --torch-backend $TORCH_BACKEND --cuda-version $CUDA_VERSION; \
elif [ "$USE_LOCAL_DEEPEP" = "true" ]; then \
echo "Using local DeepEP from /opt/DeepEP" && \
/tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --use-local-deepep /opt/DeepEP --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} --torch-backend $TORCH_BACKEND --cuda-version $CUDA_VERSION; \
else \
/tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} --torch-backend $TORCH_BACKEND --cuda-version $CUDA_VERSION; \
fi && \
/tmp/use-sccache.sh show-stats "vLLM";

ENV LD_LIBRARY_PATH=\
Expand Down Expand Up @@ -241,7 +269,7 @@ $LD_LIBRARY_PATH

# DeepGemm runs nvcc for JIT kernel compilation, however the CUDA include path
# is not properly set for complilation. Set CPATH to help nvcc find the headers.
ENV CPATH=/usr/local/cuda/include
ENV CPATH=/usr/local/cuda/include:$CPATH

### VIRTUAL ENVIRONMENT SETUP ###

Expand Down
62 changes: 50 additions & 12 deletions container/deps/vllm/install_vllm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ CUDA_VERSION="12.8" # For DEEPGEMM
EDITABLE=true
VLLM_GIT_URL="https://github.com/vllm-project/vllm.git"
FLASHINF_REF="v0.3.1"
USE_LOCAL_VLLM=""
USE_LOCAL_DEEPEP=""

while [[ $# -gt 0 ]]; do
case $1 in
Expand All @@ -41,6 +43,14 @@ while [[ $# -gt 0 ]]; do
EDITABLE=false
shift
;;
--use-local-vllm)
USE_LOCAL_VLLM="$2"
shift 2
;;
--use-local-deepep)
USE_LOCAL_DEEPEP="$2"
shift 2
;;
--vllm-ref)
VLLM_REF="$2"
shift 2
Expand Down Expand Up @@ -82,10 +92,12 @@ while [[ $# -gt 0 ]]; do
shift 2
;;
-h|--help)
echo "Usage: $0 [--editable|--no-editable] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF] [--torch-backend BACKEND] [--torch-cuda-arch-list LIST] [--cuda-version VERSION]"
echo "Usage: $0 [--editable|--no-editable] [--use-local-vllm PATH] [--use-local-deepep PATH] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF] [--torch-backend BACKEND] [--torch-cuda-arch-list LIST] [--cuda-version VERSION]"
echo "Options:"
echo " --editable Install vllm in editable mode (default)"
echo " --no-editable Install vllm in non-editable mode"
echo " --use-local-vllm PATH Use local vLLM directory instead of cloning"
echo " --use-local-deepep PATH Use local DeepEP directory instead of vLLM's version"
echo " --vllm-ref REF Git reference to checkout (default: ${VLLM_REF})"
echo " --max-jobs NUM Maximum number of parallel jobs (default: ${MAX_JOBS})"
echo " --arch ARCH Architecture (amd64|arm64, default: auto-detect)"
Expand Down Expand Up @@ -123,16 +135,32 @@ echo " MAX_JOBS=$MAX_JOBS | TORCH_BACKEND=$TORCH_BACKEND | CUDA_VERSION=$CUDA_V
echo " TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST"
echo " DEEPGEMM_REF=$DEEPGEMM_REF | FLASHINF_REF=$FLASHINF_REF"
echo " INSTALLATION_DIR=$INSTALLATION_DIR | VLLM_GIT_URL=$VLLM_GIT_URL"

echo "\n=== Cloning vLLM repository ==="
# We need to clone to install dependencies
cd $INSTALLATION_DIR
git clone $VLLM_GIT_URL vllm
cd vllm
git checkout $VLLM_REF

# TODO leave this here in case we need to do cherry-picks in future
# GIT_COMMITTER_NAME="Container Build" GIT_COMMITTER_EMAIL="[email protected]" git cherry-pick 740f064
echo " USE_LOCAL_VLLM=$USE_LOCAL_VLLM | USE_LOCAL_DEEPEP=$USE_LOCAL_DEEPEP"

if [ -n "$USE_LOCAL_VLLM" ]; then
echo "\n=== Using local vLLM directory ==="
echo "Local vLLM at: $USE_LOCAL_VLLM"

# For Docker builds, vLLM should already be at /opt/vllm
# For other uses, we might need to handle it differently
if [ ! -d "$USE_LOCAL_VLLM" ]; then
echo "Error: Local vLLM directory not found at $USE_LOCAL_VLLM"
exit 1
fi

# Just use the local vLLM where it is
cd "$USE_LOCAL_VLLM"
else
echo "\n=== Cloning vLLM repository ==="
# We need to clone to install dependencies
cd $INSTALLATION_DIR
git clone $VLLM_GIT_URL vllm
cd vllm
git checkout $VLLM_REF

# TODO leave this here in case we need to do cherry-picks in future
# GIT_COMMITTER_NAME="Container Build" GIT_COMMITTER_EMAIL="[email protected]" git cherry-pick 740f064
fi

echo "\n=== Installing vLLM & FlashInfer ==="

Expand Down Expand Up @@ -239,6 +267,16 @@ echo "✓ DeepGEMM installation completed"

echo "\n=== Installing EP Kernels (PPLX and DeepEP) ==="
cd ep_kernels/
TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST" bash install_python_libraries.sh

# Check if the local script exists (when using local vLLM, you might have this script)
if [ -n "$USE_LOCAL_DEEPEP" ] && [ -f "install_python_libraries_local_clean.sh" ]; then
echo "Using local DeepEP with install_python_libraries_local_clean.sh"
echo "Local DeepEP source: $USE_LOCAL_DEEPEP"
# The local script expects DeepEP at /workspace/DeepEP
cp -r "$USE_LOCAL_DEEPEP" /workspace/DeepEP
TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST" bash install_python_libraries_local_clean.sh
else
TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST" bash install_python_libraries.sh
fi

echo "\n✅ All installations completed successfully!"
Loading