diff --git a/.github/filters.yaml b/.github/filters.yaml index 09ef5327bc..d351905c6e 100644 --- a/.github/filters.yaml +++ b/.github/filters.yaml @@ -28,21 +28,21 @@ vllm: &vllm - 'container/Dockerfile.vllm' - 'container/deps/requirements.vllm.txt' - 'container/deps/vllm/**' - - 'components/backends/vllm/**' + - 'examples/backends/vllm/**' - 'components/src/dynamo/vllm/**' - 'tests/serve/test_vllm.py' sglang: &sglang - 'container/Dockerfile.sglang' - 'container/Dockerfile.sglang-wideep' - - 'components/backends/sglang/**' + - 'examples/backends/sglang/**' - 'components/src/dynamo/sglang/**' - 'container/build.sh' - 'tests/serve/test_sglang.py' trtllm: &trtllm - 'container/Dockerfile.trtllm' - - 'components/backends/trtllm/**' + - 'examples/backends/trtllm/**' - 'components/src/dynamo/trtllm/**' - 'container/build.sh' - 'container/build_trtllm_wheel.sh' diff --git a/.github/workflows/container-validation-backends.yml b/.github/workflows/container-validation-backends.yml index b197838242..b58f936501 100644 --- a/.github/workflows/container-validation-backends.yml +++ b/.github/workflows/container-validation-backends.yml @@ -453,7 +453,7 @@ jobs: export KUBECONFIG=$(pwd)/.kubeconfig kubectl config set-context --current --namespace=$NAMESPACE - cd components/backends/$FRAMEWORK + cd examples/backends/$FRAMEWORK export FRAMEWORK_RUNTIME_IMAGE="${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-${FRAMEWORK}-amd64" export KUBE_NS=$NAMESPACE export GRAPH_NAME=$(yq e '.metadata.name' $DEPLOYMENT_FILE) diff --git a/README.md b/README.md index 4a67f7ffd1..5a0576355c 100644 --- a/README.md +++ b/README.md @@ -171,7 +171,7 @@ Rerun with `curl -N` and change `stream` in the request to `true` to get the res ### Deploying Dynamo - Follow the [Quickstart Guide](docs/kubernetes/README.md) to deploy on Kubernetes. -- Check out [Backends](components/backends) to deploy various workflow configurations (e.g. SGLang with router, vLLM with disaggregated serving, etc.) +- Check out [Backends](examples/backends) to deploy various workflow configurations (e.g. SGLang with router, vLLM with disaggregated serving, etc.) - Run some [Examples](examples) to learn about building components in Dynamo and exploring various integrations. ### Benchmarking Dynamo diff --git a/benchmarks/README.md b/benchmarks/README.md index 422bed8310..dcb24dfdbf 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -20,7 +20,7 @@ This directory contains benchmarking scripts and tools for performance evaluatio ## Quick Start ### Benchmark a Dynamo Deployment -First, deploy your DynamoGraphDeployment using the [deployment documentation](../components/backends/), then: +First, deploy your DynamoGraphDeployment using the [deployment documentation](../docs/kubernetes/), then: ```bash # Port-forward your deployment to http://localhost:8000 diff --git a/benchmarks/profiler/utils/config_modifiers/sglang.py b/benchmarks/profiler/utils/config_modifiers/sglang.py index 0422ba64a9..332d58e85c 100644 --- a/benchmarks/profiler/utils/config_modifiers/sglang.py +++ b/benchmarks/profiler/utils/config_modifiers/sglang.py @@ -36,7 +36,7 @@ logger.addHandler(console_handler) -DEFAULT_SGLANG_CONFIG_PATH = "components/backends/sglang/deploy/disagg.yaml" +DEFAULT_SGLANG_CONFIG_PATH = "examples/backends/sglang/deploy/disagg.yaml" class SGLangConfigModifier: diff --git a/benchmarks/profiler/utils/config_modifiers/trtllm.py b/benchmarks/profiler/utils/config_modifiers/trtllm.py index 2af59f3065..020b7efca7 100644 --- a/benchmarks/profiler/utils/config_modifiers/trtllm.py +++ b/benchmarks/profiler/utils/config_modifiers/trtllm.py @@ -38,7 +38,7 @@ logger.addHandler(console_handler) -DEFAULT_TRTLLM_CONFIG_PATH = "components/backends/trtllm/deploy/disagg.yaml" +DEFAULT_TRTLLM_CONFIG_PATH = "examples/backends/trtllm/deploy/disagg.yaml" class TrtllmConfigModifier: diff --git a/benchmarks/profiler/utils/config_modifiers/vllm.py b/benchmarks/profiler/utils/config_modifiers/vllm.py index afacdd62a9..c0f004d580 100644 --- a/benchmarks/profiler/utils/config_modifiers/vllm.py +++ b/benchmarks/profiler/utils/config_modifiers/vllm.py @@ -34,7 +34,7 @@ logger.addHandler(console_handler) -DEFAULT_VLLM_CONFIG_PATH = "components/backends/vllm/deploy/disagg.yaml" +DEFAULT_VLLM_CONFIG_PATH = "examples/backends/vllm/deploy/disagg.yaml" class VllmV1ConfigModifier: diff --git a/components/README.md b/components/README.md index 3aa6cc0ea8..069e651ac4 100644 --- a/components/README.md +++ b/components/README.md @@ -19,25 +19,17 @@ limitations under the License. This directory contains the core components that make up the Dynamo inference framework. Each component serves a specific role in the distributed LLM serving architecture, enabling high-throughput, low-latency inference across multiple nodes and GPUs. -## Supported Inference Engines - -Dynamo supports multiple inference engines (with a focus on SGLang, vLLM, and TensorRT-LLM), each with their own deployment configurations and capabilities: - -- **[vLLM](/docs/backends/vllm/README.md)** - High-performance LLM inference with native KV cache events and NIXL-based transfer mechanisms -- **[SGLang](/docs/backends/sglang/README.md)** - Structured generation language framework with ZMQ-based communication -- **[TensorRT-LLM](/docs/backends/trtllm/README.md)** - NVIDIA's optimized LLM inference engine with TensorRT acceleration - -Each engine provides launch scripts for different deployment patterns in their respective `/launch` & `/deploy` directories. - ## Core Components -### [Backends](backends/) +### Backends + +Dynamo supports multiple inference engines, each with their own deployment configurations and capabilities: -The backends directory contains inference engine integrations and implementations, with a key focus on: +- **[vLLM](/docs/backends/vllm/README.md)** - Full-featured vLLM integration with disaggregated serving, KV-aware routing, SLA-based planning, native KV cache events, and NIXL-based transfer mechanisms +- **[SGLang](/docs/backends/sglang/README.md)** - SGLang engine integration with ZMQ-based communication, supporting disaggregated serving and KV-aware routing +- **[TensorRT-LLM](/docs/backends/trtllm/README.md)** - TensorRT-LLM integration with disaggregated serving capabilities and TensorRT acceleration -- **vLLM** - Full-featured vLLM integration with disaggregated serving, KV-aware routing, and SLA-based planning -- **SGLang** - SGLang engine integration supporting disaggregated serving and KV-aware routing -- **TensorRT-LLM** - TensorRT-LLM integration with disaggregated serving capabilities +Each engine provides launch and deploy scripts for different deployment patterns in the [examples](../examples/backends/) folder. ### [Frontend](src/dynamo/frontend/) diff --git a/components/src/dynamo/router/README.md b/components/src/dynamo/router/README.md index c2b38bb792..b9347a0bed 100644 --- a/components/src/dynamo/router/README.md +++ b/components/src/dynamo/router/README.md @@ -47,7 +47,7 @@ Clients query the `find_best_worker` endpoint to determine which worker should p > > Use this manual setup if you need explicit control over prefill routing configuration or want to manage prefill and decode routers separately. -See [`components/backends/vllm/launch/disagg_router.sh`](/components/backends/vllm/launch/disagg_router.sh) for a complete example. +See [`examples/backends/vllm/launch/disagg_router.sh`](/examples/backends/vllm/launch/disagg_router.sh) for a complete example. ```bash # Start frontend router for decode workers diff --git a/container/Dockerfile.sglang-wideep b/container/Dockerfile.sglang-wideep index 9f7b8cf9b6..b232a68e8e 100644 --- a/container/Dockerfile.sglang-wideep +++ b/container/Dockerfile.sglang-wideep @@ -87,4 +87,4 @@ ENV PATH=/usr/local/bin/etcd:$PATH # Enable forceful shutdown of inflight requests ENV SGL_FORCE_SHUTDOWN=1 -WORKDIR /sgl-workspace/dynamo/components/backends/sglang +WORKDIR /sgl-workspace/dynamo/examples/backends/sglang diff --git a/deploy/helm/README.md b/deploy/helm/README.md index d6d62a0628..0a4491a168 100644 --- a/deploy/helm/README.md +++ b/deploy/helm/README.md @@ -33,7 +33,7 @@ This approach allows you to install Dynamo directly using a DynamoGraphDeploymen Here is how you would install a VLLM inference backend example. ```bash -helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud -f ./components/backends/vllm/deploy/agg.yaml +helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud -f ./examples/backends/vllm/deploy/agg.yaml ``` ### Installation using Grove @@ -41,7 +41,7 @@ helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud -f ./com Same example as above, but using Grove PodCliqueSet resources. ```bash -helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud -f ./components/backends/vllm/deploy/agg.yaml --set deploymentType=grove +helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud -f ./examples/backends/vllm/deploy/agg.yaml --set deploymentType=grove ``` ### Customizable Properties @@ -50,7 +50,7 @@ You can override the default configuration by setting the following properties: ```bash helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud \ - -f ./components/backends/vllm/deploy/agg.yaml \ + -f ./examples/backends/vllm/deploy/agg.yaml \ --set "imagePullSecrets[0].name=docker-secret-1" \ --set etcdAddr="my-etcd-service:2379" \ --set natsAddr="nats://my-nats-service:4222" diff --git a/deploy/inference-gateway/README.md b/deploy/inference-gateway/README.md index cb40f439af..85fd30fc86 100644 --- a/deploy/inference-gateway/README.md +++ b/deploy/inference-gateway/README.md @@ -85,12 +85,12 @@ kubectl get gateway inference-gateway -n my-model ### 3. Deploy Your Model ### -Follow the steps in [model deployment](../../components/backends/vllm/deploy/README.md) to deploy `Qwen/Qwen3-0.6B` model in aggregate mode using [agg.yaml](../../components/backends/vllm/deploy/agg.yaml) in `my-model` kubernetes namespace. +Follow the steps in [model deployment](../../examples/backends/vllm/deploy/README.md) to deploy `Qwen/Qwen3-0.6B` model in aggregate mode using [agg.yaml](../../examples/backends/vllm/deploy/agg.yaml) in `my-model` kubernetes namespace. Sample commands to deploy model: ```bash -cd /components/backends/vllm/deploy +cd /examples/backends/vllm/deploy kubectl apply -f agg.yaml -n my-model ``` @@ -116,7 +116,7 @@ kubectl create secret generic hf-token-secret \ ``` Create a model configuration file similar to the vllm_agg_qwen.yaml for your model. -This file demonstrates the values needed for the Vllm Agg setup in [agg.yaml](../../components/backends/vllm/deploy/agg.yaml) +This file demonstrates the values needed for the Vllm Agg setup in [agg.yaml](../../examples/backends/vllm/deploy/agg.yaml) Take a note of the model's block size provided in the model card. ### 4. Install Dynamo GAIE helm chart ### diff --git a/deploy/tracing/README.md b/deploy/tracing/README.md index 7be410e556..5df42c75b1 100644 --- a/deploy/tracing/README.md +++ b/deploy/tracing/README.md @@ -91,7 +91,7 @@ Run the vLLM disaggregated script with tracing enabled: ```bash # Navigate to vLLM launch directory -cd components/backends/vllm/launch +cd examples/backends/vllm/launch # Run disaggregated deployment (modify the script to export env vars first) ./disagg.sh @@ -179,7 +179,7 @@ For Kubernetes deployments, ensure you have a Tempo instance deployed and access ### Modify DynamoGraphDeployment for Tracing -Add common tracing environment variables at the top level and service-specific names in each component in your `DynamoGraphDeployment` (e.g., `components/backends/vllm/deploy/disagg.yaml`): +Add common tracing environment variables at the top level and service-specific names in each component in your `DynamoGraphDeployment` (e.g., `examples/backends/vllm/deploy/disagg.yaml`): ```yaml apiVersion: nvidia.com/v1alpha1 @@ -228,7 +228,7 @@ spec: Apply the updated DynamoGraphDeployment: ```bash -kubectl apply -f components/backends/vllm/deploy/disagg.yaml +kubectl apply -f examples/backends/vllm/deploy/disagg.yaml ``` Traces will now be exported to Tempo and can be viewed in Grafana. diff --git a/docs/backends/sglang/README.md b/docs/backends/sglang/README.md index 82ca386f77..0230e01999 100644 --- a/docs/backends/sglang/README.md +++ b/docs/backends/sglang/README.md @@ -182,14 +182,14 @@ docker compose -f deploy/docker-compose.yml up -d ### Aggregated Serving ```bash -cd $DYNAMO_HOME/components/backends/sglang +cd $DYNAMO_HOME/examples/backends/sglang ./launch/agg.sh ``` ### Aggregated Serving with KV Routing ```bash -cd $DYNAMO_HOME/components/backends/sglang +cd $DYNAMO_HOME/examples/backends/sglang ./launch/agg_router.sh ``` @@ -198,7 +198,7 @@ cd $DYNAMO_HOME/components/backends/sglang Here's an example that uses the [Qwen/Qwen3-Embedding-4B](https://huggingface.co/Qwen/Qwen3-Embedding-4B) model. ```bash -cd $DYNAMO_HOME/components/backends/sglang +cd $DYNAMO_HOME/examples/backends/sglang ./launch/agg_embed.sh ``` @@ -222,14 +222,14 @@ See [SGLang Disaggregation](sglang-disaggregation.md) to learn more about how sg ```bash -cd $DYNAMO_HOME/components/backends/sglang +cd $DYNAMO_HOME/examples/backends/sglang ./launch/disagg.sh ``` ### Disaggregated Serving with KV Aware Prefill Routing ```bash -cd $DYNAMO_HOME/components/backends/sglang +cd $DYNAMO_HOME/examples/backends/sglang ./launch/disagg_router.sh ``` @@ -239,7 +239,7 @@ You can use this configuration to test out disaggregated serving with dp attenti ```bash # note this will require 4 GPUs -cd $DYNAMO_HOME/components/backends/sglang +cd $DYNAMO_HOME/examples/backends/sglang ./launch/disagg_dp_attn.sh ``` @@ -285,7 +285,7 @@ Below we provide a selected list of advanced examples. Please open up an issue i We currently provide deployment examples for Kubernetes and SLURM. ## Kubernetes -- **[Deploying Dynamo with SGLang on Kubernetes](../../../components/backends/sglang/deploy/README.md)** +- **[Deploying Dynamo with SGLang on Kubernetes](../../../examples/backends/sglang/deploy/README.md)** ## SLURM -- **[Deploying Dynamo with SGLang on SLURM](../../../components/backends/sglang/slurm_jobs/README.md)** +- **[Deploying Dynamo with SGLang on SLURM](../../../examples/backends/sglang/slurm_jobs/README.md)** diff --git a/docs/backends/sglang/dsr1-wideep-h100.md b/docs/backends/sglang/dsr1-wideep-h100.md index 0bff93c607..07e4f9e306 100644 --- a/docs/backends/sglang/dsr1-wideep-h100.md +++ b/docs/backends/sglang/dsr1-wideep-h100.md @@ -44,7 +44,7 @@ docker run \ dynamo-wideep:latest ``` -In each container, you should be in the `/sgl-workspace/dynamo/components/backends/sglang` directory. +In each container, you should be in the `/sgl-workspace/dynamo/examples/backends/sglang` directory. 3. Run the ingress and prefill worker diff --git a/docs/backends/sglang/multimodal_epd.md b/docs/backends/sglang/multimodal_epd.md index f4bd19566b..bf8cac05c0 100644 --- a/docs/backends/sglang/multimodal_epd.md +++ b/docs/backends/sglang/multimodal_epd.md @@ -47,7 +47,7 @@ flowchart LR ``` ```bash -cd $DYNAMO_HOME/components/backends/sglang +cd $DYNAMO_HOME/examples/backends/sglang ./launch/multimodal_agg.sh ``` @@ -133,7 +133,7 @@ flowchart LR ```bash -cd $DYNAMO_HOME/components/backends/sglang +cd $DYNAMO_HOME/examples/backends/sglang ./launch/multimodal_disagg.sh ``` diff --git a/docs/backends/trtllm/README.md b/docs/backends/trtllm/README.md index 57b1e9ace0..e1bb7dca90 100644 --- a/docs/backends/trtllm/README.md +++ b/docs/backends/trtllm/README.md @@ -128,13 +128,13 @@ This figure shows an overview of the major components to deploy: ### Aggregated ```bash -cd $DYNAMO_HOME/components/backends/trtllm +cd $DYNAMO_HOME/examples/backends/trtllm ./launch/agg.sh ``` ### Aggregated with KV Routing ```bash -cd $DYNAMO_HOME/components/backends/trtllm +cd $DYNAMO_HOME/examples/backends/trtllm ./launch/agg_router.sh ``` @@ -144,7 +144,7 @@ cd $DYNAMO_HOME/components/backends/trtllm > Disaggregated serving supports two strategies for request flow: `"prefill_first"` and `"decode_first"`. By default, the script below uses the `"decode_first"` strategy, which can reduce response latency by minimizing extra hops in the return path. You can switch strategies by setting the `DISAGGREGATION_STRATEGY` environment variable. ```bash -cd $DYNAMO_HOME/components/backends/trtllm +cd $DYNAMO_HOME/examples/backends/trtllm ./launch/disagg.sh ``` @@ -154,13 +154,13 @@ cd $DYNAMO_HOME/components/backends/trtllm > Disaggregated serving with KV routing uses a "prefill first" workflow by default. Currently, Dynamo supports KV routing to only one endpoint per model. In disaggregated workflow, it is generally more effective to route requests to the prefill worker. If you wish to use a "decode first" workflow instead, you can simply set the `DISAGGREGATION_STRATEGY` environment variable accordingly. ```bash -cd $DYNAMO_HOME/components/backends/trtllm +cd $DYNAMO_HOME/examples/backends/trtllm ./launch/disagg_router.sh ``` ### Aggregated with Multi-Token Prediction (MTP) and DeepSeek R1 ```bash -cd $DYNAMO_HOME/components/backends/trtllm +cd $DYNAMO_HOME/examples/backends/trtllm export AGG_ENGINE_ARGS=./recipes/deepseek-r1/trtllm/mtp/mtp_agg.yaml export SERVED_MODEL_NAME="nvidia/DeepSeek-R1-FP4" @@ -186,7 +186,7 @@ For comprehensive instructions on multinode serving, see the [multinode-examples ### Kubernetes Deployment -For complete Kubernetes deployment instructions, configurations, and troubleshooting, see [TensorRT-LLM Kubernetes Deployment Guide](../../../components/backends/trtllm/deploy/README.md). +For complete Kubernetes deployment instructions, configurations, and troubleshooting, see [TensorRT-LLM Kubernetes Deployment Guide](../../../examples/backends/trtllm/deploy/README.md). ### Client @@ -270,7 +270,7 @@ Logits processors let you modify the next-token logits at every decoding step (e You can enable a test-only processor that forces the model to respond with "Hello world!". This is useful to verify the wiring without modifying your model or engine code. ```bash -cd $DYNAMO_HOME/components/backends/trtllm +cd $DYNAMO_HOME/examples/backends/trtllm export DYNAMO_ENABLE_TEST_LOGITS_PROCESSOR=1 ./launch/agg.sh ``` @@ -316,7 +316,7 @@ sampling_params.logits_processor = create_trtllm_adapters(processors) ## Performance Sweep -For detailed instructions on running comprehensive performance sweeps across both aggregated and disaggregated serving configurations, see the [TensorRT-LLM Benchmark Scripts for DeepSeek R1 model](../../../components/backends/trtllm/performance_sweeps/README.md). This guide covers recommended benchmarking setups, usage of provided scripts, and best practices for evaluating system performance. +For detailed instructions on running comprehensive performance sweeps across both aggregated and disaggregated serving configurations, see the [TensorRT-LLM Benchmark Scripts for DeepSeek R1 model](../../../examples/backends/trtllm/performance_sweeps/README.md). This guide covers recommended benchmarking setups, usage of provided scripts, and best practices for evaluating system performance. ## Dynamo KV Block Manager Integration diff --git a/docs/backends/trtllm/gemma3_sliding_window_attention.md b/docs/backends/trtllm/gemma3_sliding_window_attention.md index 5226ad5338..9898e25f8d 100644 --- a/docs/backends/trtllm/gemma3_sliding_window_attention.md +++ b/docs/backends/trtllm/gemma3_sliding_window_attention.md @@ -27,7 +27,7 @@ VSWA is a mechanism in which a model’s layers alternate between multiple slidi ## Aggregated Serving ```bash -cd $DYNAMO_HOME/components/backends/trtllm +cd $DYNAMO_HOME/examples/backends/trtllm export MODEL_PATH=google/gemma-3-1b-it export SERVED_MODEL_NAME=$MODEL_PATH export AGG_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_agg.yaml @@ -36,7 +36,7 @@ export AGG_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_agg.yaml ## Aggregated Serving with KV Routing ```bash -cd $DYNAMO_HOME/components/backends/trtllm +cd $DYNAMO_HOME/examples/backends/trtllm export MODEL_PATH=google/gemma-3-1b-it export SERVED_MODEL_NAME=$MODEL_PATH export AGG_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_agg.yaml @@ -45,7 +45,7 @@ export AGG_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_agg.yaml ## Disaggregated Serving ```bash -cd $DYNAMO_HOME/components/backends/trtllm +cd $DYNAMO_HOME/examples/backends/trtllm export MODEL_PATH=google/gemma-3-1b-it export SERVED_MODEL_NAME=$MODEL_PATH export PREFILL_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_prefill.yaml @@ -55,7 +55,7 @@ export DECODE_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_decode.yaml ## Disaggregated Serving with KV Routing ```bash -cd $DYNAMO_HOME/components/backends/trtllm +cd $DYNAMO_HOME/examples/backends/trtllm export MODEL_PATH=google/gemma-3-1b-it export SERVED_MODEL_NAME=$MODEL_PATH export PREFILL_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_prefill.yaml diff --git a/docs/backends/trtllm/gpt-oss.md b/docs/backends/trtllm/gpt-oss.md index 279f2fd1c2..a158191c6c 100644 --- a/docs/backends/trtllm/gpt-oss.md +++ b/docs/backends/trtllm/gpt-oss.md @@ -128,7 +128,7 @@ You can use the provided launch script or run the components manually: #### Option A: Using the Launch Script ```bash -cd /workspace/components/backends/trtllm +cd /workspace/examples/backends/trtllm ./launch/gpt_oss_disagg.sh ``` @@ -136,7 +136,7 @@ cd /workspace/components/backends/trtllm 1. **Start frontend**: ```bash -cd /workspace/dynamo/components/backends/trtllm +cd /workspace/dynamo/examples/backends/trtllm # Start frontend with round-robin routing python3 -m dynamo.frontend --router-mode round-robin --http-port 8000 & diff --git a/docs/backends/trtllm/llama4_plus_eagle.md b/docs/backends/trtllm/llama4_plus_eagle.md index f15bfa669f..d4fac9cd5e 100644 --- a/docs/backends/trtllm/llama4_plus_eagle.md +++ b/docs/backends/trtllm/llama4_plus_eagle.md @@ -39,7 +39,7 @@ inside an interactive shell on one of the allocated nodes, set the following environment variables based: ```bash -cd $DYNAMO_HOME/components/backends/trtllm +cd $DYNAMO_HOME/examples/backends/trtllm export IMAGE="" # export MOUNTS="${PWD}/:/mnt,/lustre:/lustre" diff --git a/docs/backends/trtllm/multimodal_epd.md b/docs/backends/trtllm/multimodal_epd.md index 9900a168a8..ba4461abc6 100644 --- a/docs/backends/trtllm/multimodal_epd.md +++ b/docs/backends/trtllm/multimodal_epd.md @@ -21,7 +21,7 @@ To enable it build the dynamo container with the `--tensorrtllm-commit` flag, fo ## How to use ```bash -cd $DYNAMO_HOME/components/backends/trtllm +cd $DYNAMO_HOME/examples/backends/trtllm # Launch 3-worker EPD flow with NIXL ./launch/epd_disagg.sh diff --git a/docs/backends/trtllm/multinode/multinode-examples.md b/docs/backends/trtllm/multinode/multinode-examples.md index 622ab10637..b8080d504c 100644 --- a/docs/backends/trtllm/multinode/multinode-examples.md +++ b/docs/backends/trtllm/multinode/multinode-examples.md @@ -48,7 +48,7 @@ For simplicity of the example, we will make some assumptions about your slurm cl If your cluster supports similar container based plugins, you may be able to modify the script to use that instead. 3. Third, we assume you have already built a recent Dynamo+TRTLLM container image as - described [here](https://github.com/ai-dynamo/dynamo/tree/main/components/backends/trtllm#build-docker). + described [here](https://github.com/ai-dynamo/dynamo/tree/main/docs/backends/trtllm/README.md#build-container). This is the image that can be set to the `IMAGE` environment variable in later steps. 4. Fourth, we assume you pre-allocate a group of nodes using `salloc`. We will allocate 8 nodes below as a reference command to have enough capacity @@ -87,7 +87,7 @@ following environment variables based: ```bash # NOTE: IMAGE must be set manually for now # To build an iamge, see the steps here: -# https://github.com/ai-dynamo/dynamo/tree/main/components/backends/trtllm#build-docker +# https://github.com/ai-dynamo/dynamo/tree/main/docs/backends/trtllm/README.md#build-container export IMAGE="" # MOUNTS are the host:container path pairs that are mounted into the containers diff --git a/docs/backends/trtllm/multinode/multinode-multimodal-example.md b/docs/backends/trtllm/multinode/multinode-multimodal-example.md index 9546f7a210..e6ba318364 100644 --- a/docs/backends/trtllm/multinode/multinode-multimodal-example.md +++ b/docs/backends/trtllm/multinode/multinode-multimodal-example.md @@ -52,7 +52,7 @@ following environment variables based: ```bash # NOTE: IMAGE must be set manually for now # To build an iamge, see the steps here: -# https://github.com/ai-dynamo/dynamo/tree/main/components/backends/trtllm#build-docker +# https://github.com/ai-dynamo/dynamo/tree/main/docs/backends/trtllm/README.md#build-container export IMAGE="" # MOUNTS are the host:container path pairs that are mounted into the containers diff --git a/docs/backends/vllm/LMCache_Integration.md b/docs/backends/vllm/LMCache_Integration.md index 5e07e0c4d9..42fceadfa8 100644 --- a/docs/backends/vllm/LMCache_Integration.md +++ b/docs/backends/vllm/LMCache_Integration.md @@ -43,7 +43,7 @@ For advanced configurations, LMCache supports multiple [storage backends](https: Use the provided launch script for quick setup: ```bash -./components/backends/vllm/launch/agg_lmcache.sh +./examples/backends/vllm/launch/agg_lmcache.sh ``` This will: @@ -69,7 +69,7 @@ The same `ENABLE_LMCACHE=1` environment variable enables LMCache, but the system Use the provided disaggregated launch script(the script requires at least 2 GPUs): ```bash -./components/backends/vllm/launch/disagg_lmcache.sh +./examples/backends/vllm/launch/disagg_lmcache.sh ``` This will: diff --git a/docs/backends/vllm/README.md b/docs/backends/vllm/README.md index 9f2b901f20..ce9a04a672 100644 --- a/docs/backends/vllm/README.md +++ b/docs/backends/vllm/README.md @@ -106,7 +106,7 @@ Note: The above architecture illustrates all the components. The final component ```bash # requires one gpu -cd components/backends/vllm +cd examples/backends/vllm bash launch/agg.sh ``` @@ -114,7 +114,7 @@ bash launch/agg.sh ```bash # requires two gpus -cd components/backends/vllm +cd examples/backends/vllm bash launch/agg_router.sh ``` @@ -122,7 +122,7 @@ bash launch/agg_router.sh ```bash # requires two gpus -cd components/backends/vllm +cd examples/backends/vllm bash launch/disagg.sh ``` @@ -130,7 +130,7 @@ bash launch/disagg.sh ```bash # requires three gpus -cd components/backends/vllm +cd examples/backends/vllm bash launch/disagg_router.sh ``` @@ -140,7 +140,7 @@ This example is not meant to be performant but showcases Dynamo routing to data ```bash # requires four gpus -cd components/backends/vllm +cd examples/backends/vllm bash launch/dep.sh ``` @@ -153,7 +153,7 @@ Below we provide a selected list of advanced deployments. Please open up an issu ### Kubernetes Deployment -For complete Kubernetes deployment instructions, configurations, and troubleshooting, see [vLLM Kubernetes Deployment Guide](../../../components/backends/vllm/deploy/README.md) +For complete Kubernetes deployment instructions, configurations, and troubleshooting, see [vLLM Kubernetes Deployment Guide](../../../examples/backends/vllm/deploy/README.md) ## Configuration diff --git a/docs/benchmarks/benchmarking.md b/docs/benchmarks/benchmarking.md index a981ecb4b6..1bab4b8e21 100644 --- a/docs/benchmarks/benchmarking.md +++ b/docs/benchmarks/benchmarking.md @@ -100,7 +100,7 @@ Follow these steps to benchmark Dynamo deployments using client-side benchmarkin Set up your Kubernetes cluster with NVIDIA GPUs and install the Dynamo Cloud platform. First follow the [installation guide](/docs/kubernetes/installation_guide.md) to install Dynamo Cloud, then use [deploy/utils/README](../../deploy/utils/README.md) to set up benchmarking resources. ### Step 2: Deploy DynamoGraphDeployments -Deploy your DynamoGraphDeployments separately using the [deployment documentation](../../components/backends/). Each deployment should have a frontend service exposed. +Deploy your DynamoGraphDeployments separately using the [deployment documentation](../../examples/backends/). Each deployment should have a frontend service exposed. ### Step 3: Port-Forward and Benchmark Deployment A ```bash @@ -332,7 +332,7 @@ The server-side benchmarking solution: ## Quick Start ### Step 1: Deploy Your DynamoGraphDeployment -Deploy your DynamoGraphDeployment using the [deployment documentation](../../components/backends/). Ensure it has a frontend service exposed. +Deploy your DynamoGraphDeployment using the [deployment documentation](../../examples/backends/). Ensure it has a frontend service exposed. ### Step 2: Deploy and Run Benchmark Job diff --git a/docs/benchmarks/kv-router-ab-testing.md b/docs/benchmarks/kv-router-ab-testing.md index 4c17bf2d34..272b86abd0 100644 --- a/docs/benchmarks/kv-router-ab-testing.md +++ b/docs/benchmarks/kv-router-ab-testing.md @@ -163,7 +163,7 @@ spec: - gpu-h200-sxm # Adjust to your GPU node type mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.5.0 - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - /bin/sh - -c @@ -234,7 +234,7 @@ spec: - gpu-h200-sxm # Adjust to your GPU node type mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.5.0 - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - /bin/sh - -c diff --git a/docs/design_docs/distributed_runtime.md b/docs/design_docs/distributed_runtime.md index 54fd09e6e1..f100d27a5d 100644 --- a/docs/design_docs/distributed_runtime.md +++ b/docs/design_docs/distributed_runtime.md @@ -28,7 +28,7 @@ Dynamo's `DistributedRuntime` is the core infrastructure in the framework that e While theoretically each `DistributedRuntime` can have multiple `Namespace`s as long as their names are unique (similar logic also applies to `Component/Namespace` and `Endpoint/Component`), in practice, each dynamo components typically are deployed with its own process and thus has its own `DistributedRuntime` object. However, they share the same namespace to discover each other. -For example, a typical deployment configuration (like `components/backends/vllm/deploy/agg.yaml` or `components/backends/sglang/deploy/agg.yaml`) has multiple workers: +For example, a typical deployment configuration (like `examples/backends/vllm/deploy/agg.yaml` or `examples/backends/sglang/deploy/agg.yaml`) has multiple workers: - `Frontend`: Starts an HTTP server and handles incoming requests. The HTTP server routes all requests to the `Processor`. - `Processor`: When a new request arrives, `Processor` applies the chat template and performs the tokenization. @@ -75,6 +75,6 @@ After selecting which endpoint to hit, the `Client` sends the serialized request We provide native rust and python (through binding) examples for basic usage of `DistributedRuntime`: - Rust: `/lib/runtime/examples/` -- Python: We also provide complete examples of using `DistributedRuntime`. Please refer to the engines in `/components/backends` for full implementation details. +- Python: We also provide complete examples of using `DistributedRuntime`. Please refer to the engines in `components/src/dynamo` for full implementation details. diff --git a/docs/design_docs/dynamo_flow.md b/docs/design_docs/dynamo_flow.md index 865c98ab5c..695ca34513 100644 --- a/docs/design_docs/dynamo_flow.md +++ b/docs/design_docs/dynamo_flow.md @@ -17,7 +17,7 @@ limitations under the License. # Dynamo Architecture Flow -This diagram shows the NVIDIA Dynamo disaggregated inference system as implemented in [components/backends/vllm](../../components/backends/vllm). Color-coded flows indicate different types of operations: +This diagram shows the NVIDIA Dynamo disaggregated inference system as implemented in [examples/backends/vllm](../../examples/backends/vllm). Color-coded flows indicate different types of operations: ## πŸ”΅ Main Request Flow (Blue) The primary user journey through the system: diff --git a/docs/development/backend-guide.md b/docs/development/backend-guide.md index f6e0ee8e75..6f7e04d4d3 100644 --- a/docs/development/backend-guide.md +++ b/docs/development/backend-guide.md @@ -77,7 +77,7 @@ The `model_type` can be: - `migration_limit`: Maximum number of times a request may be [migrated to another Instance](../fault_tolerance/request_migration.md). Defaults to 0. - `user_data`: Optional dictionary containing custom metadata for worker behavior (e.g., LoRA configuration). Defaults to None. -See `components/backends` for full code examples. +See `examples/backends` for full code examples. ## Component names diff --git a/docs/kubernetes/README.md b/docs/kubernetes/README.md index 844c3858dd..6ffdd17a72 100644 --- a/docs/kubernetes/README.md +++ b/docs/kubernetes/README.md @@ -67,9 +67,9 @@ Each backend has deployment examples and configuration options: | Backend | Aggregated | Aggregated + Router | Disaggregated | Disaggregated + Router | Disaggregated + Planner | Disaggregated Multi-node | |--------------|:----------:|:-------------------:|:-------------:|:----------------------:|:-----------------------:|:------------------------:| -| **[SGLang](../../components/backends/sglang/deploy/README.md)** | βœ… | βœ… | βœ… | βœ… | βœ… | βœ… | -| **[TensorRT-LLM](../../components/backends/trtllm/deploy/README.md)** | βœ… | βœ… | βœ… | βœ… | 🚧 | βœ… | -| **[vLLM](../../components/backends/vllm/deploy/README.md)** | βœ… | βœ… | βœ… | βœ… | βœ… | βœ… | +| **[SGLang](../../examples/backends/sglang/deploy/README.md)** | βœ… | βœ… | βœ… | βœ… | βœ… | βœ… | +| **[TensorRT-LLM](../../examples/backends/trtllm/deploy/README.md)** | βœ… | βœ… | βœ… | βœ… | 🚧 | βœ… | +| **[vLLM](../../examples/backends/vllm/deploy/README.md)** | βœ… | βœ… | βœ… | βœ… | βœ… | βœ… | ## 3. Deploy Your First Model @@ -84,7 +84,7 @@ kubectl create secret generic hf-token-secret \ -n ${NAMESPACE}; # Deploy any example (this uses vLLM with Qwen model using aggregated serving) -kubectl apply -f components/backends/vllm/deploy/agg.yaml -n ${NAMESPACE} +kubectl apply -f examples/backends/vllm/deploy/agg.yaml -n ${NAMESPACE} # Check status kubectl get dynamoGraphDeployment -n ${NAMESPACE} diff --git a/docs/kubernetes/deployment/create_deployment.md b/docs/kubernetes/deployment/create_deployment.md index a0d2877d0a..85f1bfc7d1 100644 --- a/docs/kubernetes/deployment/create_deployment.md +++ b/docs/kubernetes/deployment/create_deployment.md @@ -1,7 +1,7 @@ # Creating Kubernetes Deployments -The scripts in the `components//launch` folder like [agg.sh](../../../components/backends/vllm/launch/agg.sh) demonstrate how you can serve your models locally. -The corresponding YAML files like [agg.yaml](../../../components/backends/vllm/deploy/agg.yaml) show you how you could create a Kubernetes deployment for your inference graph. +The scripts in the `examples//launch` folder like [agg.sh](../../../examples/backends/vllm/launch/agg.sh) demonstrate how you can serve your models locally. +The corresponding YAML files like [agg.yaml](../../../examples/backends/vllm/deploy/agg.yaml) show you how you could create a Kubernetes deployment for your inference graph. This guide explains how to create your own deployment files. @@ -25,7 +25,7 @@ Before choosing a template, understand the different architecture patterns: - GPU utilization may not be optimal (prefill and decode compete for resources) - Lower throughput ceiling compared to disaggregated -**Example**: [`agg.yaml`](../../../components/backends/vllm/deploy/agg.yaml) +**Example**: [`agg.yaml`](../../../examples/backends/vllm/deploy/agg.yaml) ### Aggregated + Router (agg_router.yaml) @@ -42,7 +42,7 @@ Before choosing a template, understand the different architecture patterns: - Still has GPU underutilization issues of aggregated serving - More complex than plain aggregated but simpler than disaggregated -**Example**: [`agg_router.yaml`](../../../components/backends/vllm/deploy/agg_router.yaml) +**Example**: [`agg_router.yaml`](../../../examples/backends/vllm/deploy/agg_router.yaml) ### Disaggregated Serving (disagg_router.yaml) @@ -61,7 +61,7 @@ Before choosing a template, understand the different architecture patterns: - More complex setup and debugging - Requires understanding of prefill/decode separation -**Example**: [`disagg_router.yaml`](../../../components/backends/vllm/deploy/disagg_router.yaml) +**Example**: [`disagg_router.yaml`](../../../examples/backends/vllm/deploy/disagg_router.yaml) ### Quick Selection Guide @@ -69,11 +69,11 @@ Select the architecture pattern as your template that best fits your use case. For example, when using the `vLLM` backend: -- **Development / Testing**: Use [`agg.yaml`](../../../components/backends/vllm/deploy/agg.yaml) as the base configuration. +- **Development / Testing**: Use [`agg.yaml`](../../../examples/backends/vllm/deploy/agg.yaml) as the base configuration. -- **Production with Load Balancing**: Use [`agg_router.yaml`](../../../components/backends/vllm/deploy/agg_router.yaml) to enable scalable, load-balanced inference. +- **Production with Load Balancing**: Use [`agg_router.yaml`](../../../examples/backends/vllm/deploy/agg_router.yaml) to enable scalable, load-balanced inference. -- **High Performance / Disaggregated Deployment**: Use [`disagg_router.yaml`](../../../components/backends/vllm/deploy/disagg_router.yaml) for maximum throughput and modular scalability. +- **High Performance / Disaggregated Deployment**: Use [`disagg_router.yaml`](../../../examples/backends/vllm/deploy/disagg_router.yaml) for maximum throughput and modular scalability. ## Step 2: Customize the Template diff --git a/docs/kubernetes/deployment/multinode-deployment.md b/docs/kubernetes/deployment/multinode-deployment.md index 02d557fb82..509f08d83b 100644 --- a/docs/kubernetes/deployment/multinode-deployment.md +++ b/docs/kubernetes/deployment/multinode-deployment.md @@ -281,8 +281,8 @@ To enable compilation cache, add a volume mount with `useAsCompilationCache: tru For additional support and examples, see the working multinode configurations in: -- **SGLang**: [components/backends/sglang/deploy/](../../../components/backends/sglang/deploy/) -- **TensorRT-LLM**: [components/backends/trtllm/deploy/](../../../components/backends/trtllm/deploy/) -- **vLLM**: [components/backends/vllm/deploy/](../../../components/backends/vllm/deploy/) +- **SGLang**: [examples/backends/sglang/deploy/](../../../examples/backends/sglang/deploy/) +- **TensorRT-LLM**: [examples/backends/trtllm/deploy/](../../../examples/backends/trtllm/deploy/) +- **vLLM**: [examples/backends/vllm/deploy/](../../../examples/backends/vllm/deploy/) These examples demonstrate proper usage of the `multinode` section with corresponding `gpu` limits and correct `tp-size` configuration. diff --git a/docs/kubernetes/installation_guide.md b/docs/kubernetes/installation_guide.md index 6250517017..d8175f23ed 100644 --- a/docs/kubernetes/installation_guide.md +++ b/docs/kubernetes/installation_guide.md @@ -233,7 +233,7 @@ kubectl get pods -n ${NAMESPACE} 1. **Deploy Model/Workflow** ```bash # Example: Deploy a vLLM workflow with Qwen3-0.6B using aggregated serving - kubectl apply -f components/backends/vllm/deploy/agg.yaml -n ${NAMESPACE} + kubectl apply -f examples/backends/vllm/deploy/agg.yaml -n ${NAMESPACE} # Port forward and test kubectl port-forward svc/agg-vllm-frontend 8000:8000 -n ${NAMESPACE} @@ -241,9 +241,9 @@ kubectl get pods -n ${NAMESPACE} ``` 2. **Explore Backend Guides** - - [vLLM Deployments](../../components/backends/vllm/deploy/README.md) - - [SGLang Deployments](../../components/backends/sglang/deploy/README.md) - - [TensorRT-LLM Deployments](../../components/backends/trtllm/deploy/README.md) + - [vLLM Deployments](../../examples/backends/vllm/deploy/README.md) + - [SGLang Deployments](../../examples/backends/sglang/deploy/README.md) + - [TensorRT-LLM Deployments](../../examples/backends/trtllm/deploy/README.md) 3. **Optional:** - [Set up Prometheus & Grafana](./observability/metrics.md) diff --git a/docs/kubernetes/observability/logging.md b/docs/kubernetes/observability/logging.md index abce5b81b8..0784cf05c7 100644 --- a/docs/kubernetes/observability/logging.md +++ b/docs/kubernetes/observability/logging.md @@ -126,7 +126,7 @@ At this point, we should have everything in place to collect and view logs in ou To enable structured logs in a DynamoGraphDeployment, we need to set the `DYN_LOGGING_JSONL` environment variable to `1`. This is done for us in the `agg_logging.yaml` setup for the Sglang backend. We can now deploy the DynamoGraphDeployment with: ```bash -kubectl apply -n $DYN_NAMESPACE -f components/backends/sglang/deploy/agg_logging.yaml +kubectl apply -n $DYN_NAMESPACE -f examples/backends/sglang/deploy/agg_logging.yaml ``` Send a few chat completions requests to generate structured logs across the frontend and worker pods across the DynamoGraphDeployment. We are now all set to view the logs in Grafana. diff --git a/docs/kubernetes/observability/metrics.md b/docs/kubernetes/observability/metrics.md index d1552f4715..e03ec3efeb 100644 --- a/docs/kubernetes/observability/metrics.md +++ b/docs/kubernetes/observability/metrics.md @@ -69,7 +69,7 @@ Let's start by deploying a simple vLLM aggregated deployment: ```bash export NAMESPACE=dynamo-system # namespace where dynamo operator is installed -pushd components/backends/vllm/deploy +pushd examples/backends/vllm/deploy kubectl apply -f agg.yaml -n $NAMESPACE popd ``` diff --git a/docs/kvbm/vllm-setup.md b/docs/kvbm/vllm-setup.md index de1eb4965d..84978b6ab2 100644 --- a/docs/kvbm/vllm-setup.md +++ b/docs/kvbm/vllm-setup.md @@ -39,7 +39,7 @@ docker compose -f deploy/docker-compose.yml up -d ### Aggregated Serving with KVBM ```bash -cd $DYNAMO_HOME/components/backends/vllm +cd $DYNAMO_HOME/examples/backends/vllm ./launch/agg_kvbm.sh ``` @@ -47,12 +47,12 @@ cd $DYNAMO_HOME/components/backends/vllm ```bash # 1P1D - one prefill worker and one decode worker # NOTE: need at least 2 GPUs -cd $DYNAMO_HOME/components/backends/vllm +cd $DYNAMO_HOME/examples/backends/vllm ./launch/disagg_kvbm.sh # 2P2D - two prefill workers and two decode workers # NOTE: need at least 4 GPUs -cd $DYNAMO_HOME/components/backends/vllm +cd $DYNAMO_HOME/examples/backends/vllm ./launch/disagg_kvbm_2p2d.sh ``` diff --git a/docs/performance/aiconfigurator.md b/docs/performance/aiconfigurator.md index 353bd23001..7a9ef9ed82 100644 --- a/docs/performance/aiconfigurator.md +++ b/docs/performance/aiconfigurator.md @@ -102,7 +102,7 @@ tokens/s/gpu tokens/s/user ```bash # Use with Dynamo's SLA planner (20-30 seconds vs hours) python3 -m benchmarks.profiler.profile_sla \ - --config ./components/backends/trtllm/deploy/disagg.yaml \ + --config ./examples/backends/trtllm/deploy/disagg.yaml \ --backend trtllm \ --use-ai-configurator \ --aic-system h200_sxm \ diff --git a/docs/planner/sla_planner_quickstart.md b/docs/planner/sla_planner_quickstart.md index 1fcb2a6ca9..53ba9d457a 100644 --- a/docs/planner/sla_planner_quickstart.md +++ b/docs/planner/sla_planner_quickstart.md @@ -254,7 +254,7 @@ spec: #### Using Existing DGD Configs (Recommended for Custom Setups) -If you have an existing DynamoGraphDeployment config (e.g., from `components/backends/*/deploy/disagg.yaml` or custom recipes), you can reference it via ConfigMap: +If you have an existing DynamoGraphDeployment config (e.g., from `examples/backends/*/deploy/disagg.yaml` or custom recipes), you can reference it via ConfigMap: **Step 1: Create ConfigMap from your DGD config file:** diff --git a/docs/reference/cli.md b/docs/reference/cli.md index fe842e7561..b50c2e2fea 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -293,7 +293,7 @@ The default delay is 10ms, which produces approximately 100 tokens per second. ### Other engines, multi-node, production -`vllm`, `sglang` and `trtllm` production grade engines are available in `components/backends`. They run as Python components, using the Rust bindings. See the main README. +`vllm`, `sglang` and `trtllm` production grade engines are available in `examples/backends`. They run as Python components, using the Rust bindings. See the main README. `dynamo-run` is an exploration, development and prototyping tool, as well as an example of using the Rust API. Multi-node and production setups should be using the main engine components. @@ -320,7 +320,7 @@ The output looks like this: ## Writing your own engine in Python -The [dynamo](https://pypi.org/project/ai-dynamo/) Python library allows you to build your own engine and attach it to Dynamo. All of the main backend components in `components/backends/` work like this. +The [dynamo](https://pypi.org/project/ai-dynamo/) Python library allows you to build your own engine and attach it to Dynamo. All of the main backend components in `examples/backends/` work like this. The Python file must do three things: 1. Decorate a function to get the runtime @@ -396,7 +396,7 @@ Here are some example engines: - Chat: * [sglang](https://github.com/ai-dynamo/dynamo/blob/main/lib/bindings/python/examples/hello_world/server_sglang_tok.py) -More fully-featured Python engines are in `components/backends`. +More fully-featured Python engines are in `examples/backends`. ## Debugging diff --git a/docs/router/README.md b/docs/router/README.md index 3e90327ee9..da2e50ca44 100644 --- a/docs/router/README.md +++ b/docs/router/README.md @@ -60,9 +60,9 @@ spec: - No worker-side configuration changes needed **Complete K8s Examples:** -- [TRT-LLM aggregated router example](../../components/backends/trtllm/deploy/agg_router.yaml) -- [vLLM aggregated router example](../../components/backends/vllm/deploy/agg_router.yaml) -- [SGLang aggregated router example](../../components/backends/sglang/deploy/agg_router.yaml) +- [TRT-LLM aggregated router example](../../examples/backends/trtllm/deploy/agg_router.yaml) +- [vLLM aggregated router example](../../examples/backends/vllm/deploy/agg_router.yaml) +- [SGLang aggregated router example](../../examples/backends/sglang/deploy/agg_router.yaml) - [Distributed inference tutorial](../../examples/basics/kubernetes/Distributed_Inference/agg_router.yaml) **For A/B Testing and Advanced K8s Setup:** diff --git a/examples/README.md b/examples/README.md index b52ff56a3f..4d0d7e1caf 100644 --- a/examples/README.md +++ b/examples/README.md @@ -34,10 +34,10 @@ Learn fundamental Dynamo concepts through these introductory examples: These examples show how Dynamo broadly works using major inference engines. -If you want to see advanced, framework-specific deployment patterns and best practices, check out the [Components Workflows](../components/backends/) directory: -- **[vLLM](../components/backends/vllm/)** – vLLM-specific deployment and configuration -- **[SGLang](../components/backends/sglang/)** – SGLang integration examples and workflows -- **[TensorRT-LLM](../components/backends/trtllm/)** – TensorRT-LLM workflows and optimizations +If you want to see advanced, framework-specific deployment patterns and best practices, check out the [Examples Backends](../examples/backends/) directory: +- **[vLLM](../examples/backends/vllm/)** – vLLM-specific deployment and configuration +- **[SGLang](../examples/backends/sglang/)** – SGLang integration examples and workflows +- **[TensorRT-LLM](../examples/backends/trtllm/)** – TensorRT-LLM workflows and optimizations ## Deployment Examples diff --git a/components/backends/sglang/deploy/README.md b/examples/backends/sglang/deploy/README.md similarity index 98% rename from components/backends/sglang/deploy/README.md rename to examples/backends/sglang/deploy/README.md index 8b77cd973e..44a94c4448 100644 --- a/components/backends/sglang/deploy/README.md +++ b/examples/backends/sglang/deploy/README.md @@ -62,7 +62,7 @@ resources: extraPodSpec: mainContainer: image: my-registry/sglang-runtime:my-tag - workingDir: /workspace/components/backends/sglang + workingDir: /workspace/examples/backends/sglang args: - "python3" - "-m" diff --git a/components/backends/sglang/deploy/agg.yaml b/examples/backends/sglang/deploy/agg.yaml similarity index 94% rename from components/backends/sglang/deploy/agg.yaml rename to examples/backends/sglang/deploy/agg.yaml index 8c444b384e..c6302906b6 100644 --- a/components/backends/sglang/deploy/agg.yaml +++ b/examples/backends/sglang/deploy/agg.yaml @@ -25,7 +25,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/sglang-runtime:my-tag - workingDir: /workspace/components/backends/sglang + workingDir: /workspace/examples/backends/sglang command: - python3 - -m diff --git a/components/backends/sglang/deploy/agg_logging.yaml b/examples/backends/sglang/deploy/agg_logging.yaml similarity index 94% rename from components/backends/sglang/deploy/agg_logging.yaml rename to examples/backends/sglang/deploy/agg_logging.yaml index 93fa747c2b..a6e576aae9 100644 --- a/components/backends/sglang/deploy/agg_logging.yaml +++ b/examples/backends/sglang/deploy/agg_logging.yaml @@ -28,7 +28,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/sglang-runtime:my-tag - workingDir: /workspace/components/backends/sglang + workingDir: /workspace/examples/backends/sglang command: - python3 - -m diff --git a/components/backends/sglang/deploy/agg_router.yaml b/examples/backends/sglang/deploy/agg_router.yaml similarity index 95% rename from components/backends/sglang/deploy/agg_router.yaml rename to examples/backends/sglang/deploy/agg_router.yaml index 142f1932f8..e9e01e8bb8 100644 --- a/components/backends/sglang/deploy/agg_router.yaml +++ b/examples/backends/sglang/deploy/agg_router.yaml @@ -28,7 +28,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/sglang-runtime:my-tag - workingDir: /workspace/components/backends/sglang + workingDir: /workspace/examples/backends/sglang command: - python3 - -m diff --git a/components/backends/sglang/deploy/disagg-multinode.yaml b/examples/backends/sglang/deploy/disagg-multinode.yaml similarity index 95% rename from components/backends/sglang/deploy/disagg-multinode.yaml rename to examples/backends/sglang/deploy/disagg-multinode.yaml index 39806f7ba6..ac161b3159 100644 --- a/components/backends/sglang/deploy/disagg-multinode.yaml +++ b/examples/backends/sglang/deploy/disagg-multinode.yaml @@ -36,7 +36,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/sglang-runtime:my-tag - workingDir: /workspace/components/backends/sglang + workingDir: /workspace/examples/backends/sglang command: - python3 - -m @@ -73,7 +73,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/sglang-runtime:my-tag - workingDir: /workspace/components/backends/sglang + workingDir: /workspace/examples/backends/sglang command: - python3 - -m diff --git a/components/backends/sglang/deploy/disagg.yaml b/examples/backends/sglang/deploy/disagg.yaml similarity index 94% rename from components/backends/sglang/deploy/disagg.yaml rename to examples/backends/sglang/deploy/disagg.yaml index d162844f4c..52866fedf5 100644 --- a/components/backends/sglang/deploy/disagg.yaml +++ b/examples/backends/sglang/deploy/disagg.yaml @@ -26,7 +26,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/sglang-runtime:my-tag - workingDir: /workspace/components/backends/sglang + workingDir: /workspace/examples/backends/sglang command: - python3 - -m @@ -62,7 +62,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/sglang-runtime:my-tag - workingDir: /workspace/components/backends/sglang + workingDir: /workspace/examples/backends/sglang command: - python3 - -m diff --git a/components/backends/sglang/deploy/disagg_planner.yaml b/examples/backends/sglang/deploy/disagg_planner.yaml similarity index 96% rename from components/backends/sglang/deploy/disagg_planner.yaml rename to examples/backends/sglang/deploy/disagg_planner.yaml index 5e0d7f2368..1ed1d195ba 100644 --- a/components/backends/sglang/deploy/disagg_planner.yaml +++ b/examples/backends/sglang/deploy/disagg_planner.yaml @@ -50,7 +50,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/sglang-runtime:my-tag - workingDir: /workspace/components/backends/sglang + workingDir: /workspace/examples/backends/sglang command: - python3 args: @@ -86,7 +86,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/sglang-runtime:my-tag - workingDir: /workspace/components/backends/sglang + workingDir: /workspace/examples/backends/sglang command: - python3 args: diff --git a/components/backends/sglang/launch/agg.sh b/examples/backends/sglang/launch/agg.sh similarity index 100% rename from components/backends/sglang/launch/agg.sh rename to examples/backends/sglang/launch/agg.sh diff --git a/components/backends/sglang/launch/agg_embed.sh b/examples/backends/sglang/launch/agg_embed.sh similarity index 100% rename from components/backends/sglang/launch/agg_embed.sh rename to examples/backends/sglang/launch/agg_embed.sh diff --git a/components/backends/sglang/launch/agg_router.sh b/examples/backends/sglang/launch/agg_router.sh similarity index 100% rename from components/backends/sglang/launch/agg_router.sh rename to examples/backends/sglang/launch/agg_router.sh diff --git a/components/backends/sglang/launch/disagg.sh b/examples/backends/sglang/launch/disagg.sh similarity index 100% rename from components/backends/sglang/launch/disagg.sh rename to examples/backends/sglang/launch/disagg.sh diff --git a/components/backends/sglang/launch/disagg_dp_attn.sh b/examples/backends/sglang/launch/disagg_dp_attn.sh similarity index 100% rename from components/backends/sglang/launch/disagg_dp_attn.sh rename to examples/backends/sglang/launch/disagg_dp_attn.sh diff --git a/components/backends/sglang/launch/disagg_router.sh b/examples/backends/sglang/launch/disagg_router.sh similarity index 100% rename from components/backends/sglang/launch/disagg_router.sh rename to examples/backends/sglang/launch/disagg_router.sh diff --git a/components/backends/sglang/launch/multimodal_agg.sh b/examples/backends/sglang/launch/multimodal_agg.sh similarity index 100% rename from components/backends/sglang/launch/multimodal_agg.sh rename to examples/backends/sglang/launch/multimodal_agg.sh diff --git a/components/backends/sglang/launch/multimodal_disagg.sh b/examples/backends/sglang/launch/multimodal_disagg.sh similarity index 100% rename from components/backends/sglang/launch/multimodal_disagg.sh rename to examples/backends/sglang/launch/multimodal_disagg.sh diff --git a/components/backends/sglang/slurm_jobs/.gitignore b/examples/backends/sglang/slurm_jobs/.gitignore similarity index 100% rename from components/backends/sglang/slurm_jobs/.gitignore rename to examples/backends/sglang/slurm_jobs/.gitignore diff --git a/components/backends/sglang/slurm_jobs/README.md b/examples/backends/sglang/slurm_jobs/README.md similarity index 100% rename from components/backends/sglang/slurm_jobs/README.md rename to examples/backends/sglang/slurm_jobs/README.md diff --git a/components/backends/sglang/slurm_jobs/job_script_template.j2 b/examples/backends/sglang/slurm_jobs/job_script_template.j2 similarity index 100% rename from components/backends/sglang/slurm_jobs/job_script_template.j2 rename to examples/backends/sglang/slurm_jobs/job_script_template.j2 diff --git a/components/backends/sglang/slurm_jobs/scripts/benchmark_utils.sh b/examples/backends/sglang/slurm_jobs/scripts/benchmark_utils.sh similarity index 100% rename from components/backends/sglang/slurm_jobs/scripts/benchmark_utils.sh rename to examples/backends/sglang/slurm_jobs/scripts/benchmark_utils.sh diff --git a/components/backends/sglang/slurm_jobs/scripts/check_server_health.py b/examples/backends/sglang/slurm_jobs/scripts/check_server_health.py similarity index 100% rename from components/backends/sglang/slurm_jobs/scripts/check_server_health.py rename to examples/backends/sglang/slurm_jobs/scripts/check_server_health.py diff --git a/components/backends/sglang/slurm_jobs/scripts/gap/bench.sh b/examples/backends/sglang/slurm_jobs/scripts/gap/bench.sh similarity index 100% rename from components/backends/sglang/slurm_jobs/scripts/gap/bench.sh rename to examples/backends/sglang/slurm_jobs/scripts/gap/bench.sh diff --git a/components/backends/sglang/slurm_jobs/scripts/gb200-fp4.sh b/examples/backends/sglang/slurm_jobs/scripts/gb200-fp4.sh similarity index 100% rename from components/backends/sglang/slurm_jobs/scripts/gb200-fp4.sh rename to examples/backends/sglang/slurm_jobs/scripts/gb200-fp4.sh diff --git a/components/backends/sglang/slurm_jobs/scripts/gb200-fp8.sh b/examples/backends/sglang/slurm_jobs/scripts/gb200-fp8.sh similarity index 100% rename from components/backends/sglang/slurm_jobs/scripts/gb200-fp8.sh rename to examples/backends/sglang/slurm_jobs/scripts/gb200-fp8.sh diff --git a/components/backends/sglang/slurm_jobs/scripts/h100.sh b/examples/backends/sglang/slurm_jobs/scripts/h100.sh similarity index 100% rename from components/backends/sglang/slurm_jobs/scripts/h100.sh rename to examples/backends/sglang/slurm_jobs/scripts/h100.sh diff --git a/components/backends/sglang/slurm_jobs/scripts/monitor_gpu_utilization.sh b/examples/backends/sglang/slurm_jobs/scripts/monitor_gpu_utilization.sh similarity index 100% rename from components/backends/sglang/slurm_jobs/scripts/monitor_gpu_utilization.sh rename to examples/backends/sglang/slurm_jobs/scripts/monitor_gpu_utilization.sh diff --git a/components/backends/sglang/slurm_jobs/scripts/nginx.conf.j2 b/examples/backends/sglang/slurm_jobs/scripts/nginx.conf.j2 similarity index 100% rename from components/backends/sglang/slurm_jobs/scripts/nginx.conf.j2 rename to examples/backends/sglang/slurm_jobs/scripts/nginx.conf.j2 diff --git a/components/backends/sglang/slurm_jobs/scripts/sglang/bench.sh b/examples/backends/sglang/slurm_jobs/scripts/sglang/bench.sh similarity index 100% rename from components/backends/sglang/slurm_jobs/scripts/sglang/bench.sh rename to examples/backends/sglang/slurm_jobs/scripts/sglang/bench.sh diff --git a/components/backends/sglang/slurm_jobs/scripts/sglang_bench_serving.sh b/examples/backends/sglang/slurm_jobs/scripts/sglang_bench_serving.sh similarity index 100% rename from components/backends/sglang/slurm_jobs/scripts/sglang_bench_serving.sh rename to examples/backends/sglang/slurm_jobs/scripts/sglang_bench_serving.sh diff --git a/components/backends/sglang/slurm_jobs/scripts/vllm/backend_request_func.py b/examples/backends/sglang/slurm_jobs/scripts/vllm/backend_request_func.py similarity index 100% rename from components/backends/sglang/slurm_jobs/scripts/vllm/backend_request_func.py rename to examples/backends/sglang/slurm_jobs/scripts/vllm/backend_request_func.py diff --git a/components/backends/sglang/slurm_jobs/scripts/vllm/bench.sh b/examples/backends/sglang/slurm_jobs/scripts/vllm/bench.sh similarity index 100% rename from components/backends/sglang/slurm_jobs/scripts/vllm/bench.sh rename to examples/backends/sglang/slurm_jobs/scripts/vllm/bench.sh diff --git a/components/backends/sglang/slurm_jobs/scripts/vllm/benchmark_serving.py b/examples/backends/sglang/slurm_jobs/scripts/vllm/benchmark_serving.py similarity index 100% rename from components/backends/sglang/slurm_jobs/scripts/vllm/benchmark_serving.py rename to examples/backends/sglang/slurm_jobs/scripts/vllm/benchmark_serving.py diff --git a/components/backends/sglang/slurm_jobs/scripts/vllm/benchmark_utils.py b/examples/backends/sglang/slurm_jobs/scripts/vllm/benchmark_utils.py similarity index 100% rename from components/backends/sglang/slurm_jobs/scripts/vllm/benchmark_utils.py rename to examples/backends/sglang/slurm_jobs/scripts/vllm/benchmark_utils.py diff --git a/components/backends/sglang/slurm_jobs/scripts/worker_setup.py b/examples/backends/sglang/slurm_jobs/scripts/worker_setup.py similarity index 100% rename from components/backends/sglang/slurm_jobs/scripts/worker_setup.py rename to examples/backends/sglang/slurm_jobs/scripts/worker_setup.py diff --git a/components/backends/sglang/slurm_jobs/submit_disagg.sh b/examples/backends/sglang/slurm_jobs/submit_disagg.sh similarity index 100% rename from components/backends/sglang/slurm_jobs/submit_disagg.sh rename to examples/backends/sglang/slurm_jobs/submit_disagg.sh diff --git a/components/backends/sglang/slurm_jobs/submit_job_script.py b/examples/backends/sglang/slurm_jobs/submit_job_script.py similarity index 100% rename from components/backends/sglang/slurm_jobs/submit_job_script.py rename to examples/backends/sglang/slurm_jobs/submit_job_script.py diff --git a/components/backends/trtllm/deploy/README.md b/examples/backends/trtllm/deploy/README.md similarity index 99% rename from components/backends/trtllm/deploy/README.md rename to examples/backends/trtllm/deploy/README.md index 8fad703d96..0f7aecd9f2 100644 --- a/components/backends/trtllm/deploy/README.md +++ b/examples/backends/trtllm/deploy/README.md @@ -90,7 +90,7 @@ resources: extraPodSpec: mainContainer: image: my-registry/trtllm-runtime:my-tag - workingDir: /workspace/components/backends/trtllm + workingDir: /workspace/examples/backends/trtllm args: - "python3" - "-m" @@ -168,7 +168,7 @@ Then, deploy the model using the deployment file. Export the NAMESPACE you used in your Dynamo Cloud Installation. ```bash -cd dynamo/components/backends/trtllm/deploy +cd dynamo/examples/backends/trtllm/deploy export DEPLOYMENT_FILE=agg.yaml kubectl apply -f $DEPLOYMENT_FILE -n $NAMESPACE ``` diff --git a/components/backends/trtllm/deploy/agg-with-config.yaml b/examples/backends/trtllm/deploy/agg-with-config.yaml similarity index 97% rename from components/backends/trtllm/deploy/agg-with-config.yaml rename to examples/backends/trtllm/deploy/agg-with-config.yaml index e40ca48ada..d18d1b0fb2 100644 --- a/components/backends/trtllm/deploy/agg-with-config.yaml +++ b/examples/backends/trtllm/deploy/agg-with-config.yaml @@ -51,7 +51,7 @@ spec: name: nvidia-config mainContainer: image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag - workingDir: /workspace/components/backends/trtllm + workingDir: /workspace/examples/backends/trtllm # mount the configmap as a volume volumeMounts: - name: nvidia-config diff --git a/components/backends/trtllm/deploy/agg.yaml b/examples/backends/trtllm/deploy/agg.yaml similarity index 100% rename from components/backends/trtllm/deploy/agg.yaml rename to examples/backends/trtllm/deploy/agg.yaml diff --git a/components/backends/trtllm/deploy/agg_router.yaml b/examples/backends/trtllm/deploy/agg_router.yaml similarity index 100% rename from components/backends/trtllm/deploy/agg_router.yaml rename to examples/backends/trtllm/deploy/agg_router.yaml diff --git a/components/backends/trtllm/deploy/disagg-multinode.yaml b/examples/backends/trtllm/deploy/disagg-multinode.yaml similarity index 98% rename from components/backends/trtllm/deploy/disagg-multinode.yaml rename to examples/backends/trtllm/deploy/disagg-multinode.yaml index 2906cfd193..e3eb6a3bcc 100644 --- a/components/backends/trtllm/deploy/disagg-multinode.yaml +++ b/examples/backends/trtllm/deploy/disagg-multinode.yaml @@ -96,7 +96,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/trtllm-runtime:my-tag - workingDir: /workspace/components/backends/trtllm + workingDir: /workspace/examples/backends/trtllm command: - python3 - -m diff --git a/components/backends/trtllm/deploy/disagg.yaml b/examples/backends/trtllm/deploy/disagg.yaml similarity index 100% rename from components/backends/trtllm/deploy/disagg.yaml rename to examples/backends/trtllm/deploy/disagg.yaml diff --git a/components/backends/trtllm/deploy/disagg_planner.yaml b/examples/backends/trtllm/deploy/disagg_planner.yaml similarity index 98% rename from components/backends/trtllm/deploy/disagg_planner.yaml rename to examples/backends/trtllm/deploy/disagg_planner.yaml index 40c5a82ecf..9324bd5089 100644 --- a/components/backends/trtllm/deploy/disagg_planner.yaml +++ b/examples/backends/trtllm/deploy/disagg_planner.yaml @@ -17,7 +17,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/trtllm-runtime:my-tag - workingDir: /workspace/components/backends/trtllm + workingDir: /workspace/examples/backends/trtllm command: - python3 args: diff --git a/components/backends/trtllm/deploy/disagg_router.yaml b/examples/backends/trtllm/deploy/disagg_router.yaml similarity index 100% rename from components/backends/trtllm/deploy/disagg_router.yaml rename to examples/backends/trtllm/deploy/disagg_router.yaml diff --git a/components/backends/trtllm/launch/agg.sh b/examples/backends/trtllm/launch/agg.sh similarity index 100% rename from components/backends/trtllm/launch/agg.sh rename to examples/backends/trtllm/launch/agg.sh diff --git a/components/backends/trtllm/launch/agg_metrics.sh b/examples/backends/trtllm/launch/agg_metrics.sh similarity index 100% rename from components/backends/trtllm/launch/agg_metrics.sh rename to examples/backends/trtllm/launch/agg_metrics.sh diff --git a/components/backends/trtllm/launch/agg_router.sh b/examples/backends/trtllm/launch/agg_router.sh similarity index 100% rename from components/backends/trtllm/launch/agg_router.sh rename to examples/backends/trtllm/launch/agg_router.sh diff --git a/components/backends/trtllm/launch/disagg.sh b/examples/backends/trtllm/launch/disagg.sh similarity index 100% rename from components/backends/trtllm/launch/disagg.sh rename to examples/backends/trtllm/launch/disagg.sh diff --git a/components/backends/trtllm/launch/disagg_router.sh b/examples/backends/trtllm/launch/disagg_router.sh similarity index 100% rename from components/backends/trtllm/launch/disagg_router.sh rename to examples/backends/trtllm/launch/disagg_router.sh diff --git a/components/backends/trtllm/launch/epd_disagg.sh b/examples/backends/trtllm/launch/epd_disagg.sh similarity index 100% rename from components/backends/trtllm/launch/epd_disagg.sh rename to examples/backends/trtllm/launch/epd_disagg.sh diff --git a/components/backends/trtllm/launch/gpt_oss_disagg.sh b/examples/backends/trtllm/launch/gpt_oss_disagg.sh similarity index 100% rename from components/backends/trtllm/launch/gpt_oss_disagg.sh rename to examples/backends/trtllm/launch/gpt_oss_disagg.sh diff --git a/components/backends/trtllm/performance_sweeps/README.md b/examples/backends/trtllm/performance_sweeps/README.md similarity index 97% rename from components/backends/trtllm/performance_sweeps/README.md rename to examples/backends/trtllm/performance_sweeps/README.md index 0e3a4de174..7819609579 100644 --- a/components/backends/trtllm/performance_sweeps/README.md +++ b/examples/backends/trtllm/performance_sweeps/README.md @@ -49,7 +49,7 @@ For more finer grained details on how to launch TRTLLM backend workers with Deep Before running the scripts, ensure you have: 1. Access to a SLURM cluster -2. Container image of Dynamo with TensorRT-LLM built using instructions from [here](https://github.com/ai-dynamo/dynamo/tree/main/components/backends/trtllm#build-docker). +2. Container image of Dynamo with TensorRT-LLM built using instructions from [here](https://github.com/ai-dynamo/dynamo/tree/main/docs/backends/trtllm/README.md#build-container). 3. Model files accessible on the cluster 4. Required environment variables set @@ -69,7 +69,7 @@ export SLURM_JOB_NAME="" # NOTE: IMAGE must be set manually for now # To build an iamge, see the steps here: -# https://github.com/ai-dynamo/dynamo/tree/main/components/backends/trtllm#build-docker +# https://github.com/ai-dynamo/dynamo/tree/main/docs/backends/trtllm/README.md#build-container export IMAGE="" # NOTE: In general, Deepseek R1 is very large, so it is recommended to diff --git a/components/backends/trtllm/performance_sweeps/benchmark_agg.slurm b/examples/backends/trtllm/performance_sweeps/benchmark_agg.slurm similarity index 100% rename from components/backends/trtllm/performance_sweeps/benchmark_agg.slurm rename to examples/backends/trtllm/performance_sweeps/benchmark_agg.slurm diff --git a/components/backends/trtllm/performance_sweeps/benchmark_disagg.slurm b/examples/backends/trtllm/performance_sweeps/benchmark_disagg.slurm similarity index 100% rename from components/backends/trtllm/performance_sweeps/benchmark_disagg.slurm rename to examples/backends/trtllm/performance_sweeps/benchmark_disagg.slurm diff --git a/components/backends/trtllm/performance_sweeps/plot_performance_comparison.py b/examples/backends/trtllm/performance_sweeps/plot_performance_comparison.py similarity index 100% rename from components/backends/trtllm/performance_sweeps/plot_performance_comparison.py rename to examples/backends/trtllm/performance_sweeps/plot_performance_comparison.py diff --git a/components/backends/trtllm/performance_sweeps/post_process.py b/examples/backends/trtllm/performance_sweeps/post_process.py similarity index 100% rename from components/backends/trtllm/performance_sweeps/post_process.py rename to examples/backends/trtllm/performance_sweeps/post_process.py diff --git a/components/backends/trtllm/performance_sweeps/scripts/bench.sh b/examples/backends/trtllm/performance_sweeps/scripts/bench.sh similarity index 100% rename from components/backends/trtllm/performance_sweeps/scripts/bench.sh rename to examples/backends/trtllm/performance_sweeps/scripts/bench.sh diff --git a/components/backends/trtllm/performance_sweeps/scripts/gen_yaml.py b/examples/backends/trtllm/performance_sweeps/scripts/gen_yaml.py similarity index 100% rename from components/backends/trtllm/performance_sweeps/scripts/gen_yaml.py rename to examples/backends/trtllm/performance_sweeps/scripts/gen_yaml.py diff --git a/components/backends/trtllm/performance_sweeps/scripts/start_agg_worker.sh b/examples/backends/trtllm/performance_sweeps/scripts/start_agg_worker.sh similarity index 100% rename from components/backends/trtllm/performance_sweeps/scripts/start_agg_worker.sh rename to examples/backends/trtllm/performance_sweeps/scripts/start_agg_worker.sh diff --git a/components/backends/trtllm/performance_sweeps/scripts/start_disagg_worker.sh b/examples/backends/trtllm/performance_sweeps/scripts/start_disagg_worker.sh similarity index 100% rename from components/backends/trtllm/performance_sweeps/scripts/start_disagg_worker.sh rename to examples/backends/trtllm/performance_sweeps/scripts/start_disagg_worker.sh diff --git a/components/backends/trtllm/performance_sweeps/scripts/start_frontend.sh b/examples/backends/trtllm/performance_sweeps/scripts/start_frontend.sh similarity index 100% rename from components/backends/trtllm/performance_sweeps/scripts/start_frontend.sh rename to examples/backends/trtllm/performance_sweeps/scripts/start_frontend.sh diff --git a/components/backends/trtllm/performance_sweeps/set_clock.sh b/examples/backends/trtllm/performance_sweeps/set_clock.sh similarity index 100% rename from components/backends/trtllm/performance_sweeps/set_clock.sh rename to examples/backends/trtllm/performance_sweeps/set_clock.sh diff --git a/components/backends/trtllm/performance_sweeps/submit_agg.sh b/examples/backends/trtllm/performance_sweeps/submit_agg.sh similarity index 100% rename from components/backends/trtllm/performance_sweeps/submit_agg.sh rename to examples/backends/trtllm/performance_sweeps/submit_agg.sh diff --git a/components/backends/trtllm/performance_sweeps/submit_disagg.sh b/examples/backends/trtllm/performance_sweeps/submit_disagg.sh similarity index 100% rename from components/backends/trtllm/performance_sweeps/submit_disagg.sh rename to examples/backends/trtllm/performance_sweeps/submit_disagg.sh diff --git a/components/backends/vllm/deploy/README.md b/examples/backends/vllm/deploy/README.md similarity index 98% rename from components/backends/vllm/deploy/README.md rename to examples/backends/vllm/deploy/README.md index 16169381d1..5f2c11dc4c 100644 --- a/components/backends/vllm/deploy/README.md +++ b/examples/backends/vllm/deploy/README.md @@ -70,7 +70,7 @@ resources: extraPodSpec: mainContainer: image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm args: - "python3" - "-m" @@ -141,7 +141,7 @@ Then, deploy the model using the deployment file. Export the NAMESPACE you used in your Dynamo Cloud Installation. ```bash -cd /components/backends/vllm/deploy +cd /examples/backends/vllm/deploy export DEPLOYMENT_FILE=agg.yaml kubectl apply -f $DEPLOYMENT_FILE -n $NAMESPACE diff --git a/components/backends/vllm/deploy/agg.yaml b/examples/backends/vllm/deploy/agg.yaml similarity index 93% rename from components/backends/vllm/deploy/agg.yaml rename to examples/backends/vllm/deploy/agg.yaml index 95de87138a..adbceca664 100644 --- a/components/backends/vllm/deploy/agg.yaml +++ b/examples/backends/vllm/deploy/agg.yaml @@ -25,7 +25,7 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 - -m diff --git a/components/backends/vllm/deploy/agg_kvbm.yaml b/examples/backends/vllm/deploy/agg_kvbm.yaml similarity index 95% rename from components/backends/vllm/deploy/agg_kvbm.yaml rename to examples/backends/vllm/deploy/agg_kvbm.yaml index 84dc297786..62e28386aa 100644 --- a/components/backends/vllm/deploy/agg_kvbm.yaml +++ b/examples/backends/vllm/deploy/agg_kvbm.yaml @@ -32,7 +32,7 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 - -m diff --git a/components/backends/vllm/deploy/agg_router.yaml b/examples/backends/vllm/deploy/agg_router.yaml similarity index 94% rename from components/backends/vllm/deploy/agg_router.yaml rename to examples/backends/vllm/deploy/agg_router.yaml index 999dd75f64..26b961a061 100644 --- a/components/backends/vllm/deploy/agg_router.yaml +++ b/examples/backends/vllm/deploy/agg_router.yaml @@ -28,7 +28,7 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 - -m diff --git a/components/backends/vllm/deploy/disagg-multinode.yaml b/examples/backends/vllm/deploy/disagg-multinode.yaml similarity index 90% rename from components/backends/vllm/deploy/disagg-multinode.yaml rename to examples/backends/vllm/deploy/disagg-multinode.yaml index e46b2ed6af..bfb56e6daf 100644 --- a/components/backends/vllm/deploy/disagg-multinode.yaml +++ b/examples/backends/vllm/deploy/disagg-multinode.yaml @@ -14,7 +14,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 - -m @@ -35,7 +35,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 - -m @@ -58,7 +58,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 - -m diff --git a/components/backends/vllm/deploy/disagg.yaml b/examples/backends/vllm/deploy/disagg.yaml similarity index 92% rename from components/backends/vllm/deploy/disagg.yaml rename to examples/backends/vllm/deploy/disagg.yaml index d7288a62da..7ecaf311dc 100644 --- a/components/backends/vllm/deploy/disagg.yaml +++ b/examples/backends/vllm/deploy/disagg.yaml @@ -26,7 +26,7 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 - -m @@ -46,7 +46,7 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 - -m diff --git a/components/backends/vllm/deploy/disagg_kvbm.yaml b/examples/backends/vllm/deploy/disagg_kvbm.yaml similarity index 94% rename from components/backends/vllm/deploy/disagg_kvbm.yaml rename to examples/backends/vllm/deploy/disagg_kvbm.yaml index d3455a87ce..f4315a13cd 100644 --- a/components/backends/vllm/deploy/disagg_kvbm.yaml +++ b/examples/backends/vllm/deploy/disagg_kvbm.yaml @@ -25,7 +25,7 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 - -m @@ -57,7 +57,7 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 - -m diff --git a/components/backends/vllm/deploy/disagg_kvbm_2p2d.yaml b/examples/backends/vllm/deploy/disagg_kvbm_2p2d.yaml similarity index 94% rename from components/backends/vllm/deploy/disagg_kvbm_2p2d.yaml rename to examples/backends/vllm/deploy/disagg_kvbm_2p2d.yaml index 883a64b02b..1aa5281d09 100644 --- a/components/backends/vllm/deploy/disagg_kvbm_2p2d.yaml +++ b/examples/backends/vllm/deploy/disagg_kvbm_2p2d.yaml @@ -25,7 +25,7 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 - -m @@ -57,7 +57,7 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 - -m diff --git a/components/backends/vllm/deploy/disagg_kvbm_tp2.yaml b/examples/backends/vllm/deploy/disagg_kvbm_tp2.yaml similarity index 94% rename from components/backends/vllm/deploy/disagg_kvbm_tp2.yaml rename to examples/backends/vllm/deploy/disagg_kvbm_tp2.yaml index 82b9c59f93..439b17a91f 100644 --- a/components/backends/vllm/deploy/disagg_kvbm_tp2.yaml +++ b/examples/backends/vllm/deploy/disagg_kvbm_tp2.yaml @@ -27,7 +27,7 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 - -m @@ -61,7 +61,7 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 - -m diff --git a/components/backends/vllm/deploy/disagg_planner.yaml b/examples/backends/vllm/deploy/disagg_planner.yaml similarity index 95% rename from components/backends/vllm/deploy/disagg_planner.yaml rename to examples/backends/vllm/deploy/disagg_planner.yaml index 5afbf58c70..5e33a0d866 100644 --- a/components/backends/vllm/deploy/disagg_planner.yaml +++ b/examples/backends/vllm/deploy/disagg_planner.yaml @@ -49,7 +49,7 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 args: @@ -69,7 +69,7 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 args: diff --git a/components/backends/vllm/deploy/disagg_router.yaml b/examples/backends/vllm/deploy/disagg_router.yaml similarity index 92% rename from components/backends/vllm/deploy/disagg_router.yaml rename to examples/backends/vllm/deploy/disagg_router.yaml index a298bdcfe5..e46ee66ffe 100644 --- a/components/backends/vllm/deploy/disagg_router.yaml +++ b/examples/backends/vllm/deploy/disagg_router.yaml @@ -28,7 +28,7 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 - -m @@ -47,7 +47,7 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 - -m diff --git a/components/backends/vllm/launch/agg.sh b/examples/backends/vllm/launch/agg.sh similarity index 100% rename from components/backends/vllm/launch/agg.sh rename to examples/backends/vllm/launch/agg.sh diff --git a/components/backends/vllm/launch/agg_kvbm.sh b/examples/backends/vllm/launch/agg_kvbm.sh similarity index 100% rename from components/backends/vllm/launch/agg_kvbm.sh rename to examples/backends/vllm/launch/agg_kvbm.sh diff --git a/components/backends/vllm/launch/agg_lmcache.sh b/examples/backends/vllm/launch/agg_lmcache.sh similarity index 100% rename from components/backends/vllm/launch/agg_lmcache.sh rename to examples/backends/vllm/launch/agg_lmcache.sh diff --git a/components/backends/vllm/launch/agg_router.sh b/examples/backends/vllm/launch/agg_router.sh similarity index 100% rename from components/backends/vllm/launch/agg_router.sh rename to examples/backends/vllm/launch/agg_router.sh diff --git a/components/backends/vllm/launch/dep.sh b/examples/backends/vllm/launch/dep.sh similarity index 100% rename from components/backends/vllm/launch/dep.sh rename to examples/backends/vllm/launch/dep.sh diff --git a/components/backends/vllm/launch/disagg.sh b/examples/backends/vllm/launch/disagg.sh similarity index 100% rename from components/backends/vllm/launch/disagg.sh rename to examples/backends/vllm/launch/disagg.sh diff --git a/components/backends/vllm/launch/disagg_kvbm.sh b/examples/backends/vllm/launch/disagg_kvbm.sh similarity index 100% rename from components/backends/vllm/launch/disagg_kvbm.sh rename to examples/backends/vllm/launch/disagg_kvbm.sh diff --git a/components/backends/vllm/launch/disagg_kvbm_2p2d.sh b/examples/backends/vllm/launch/disagg_kvbm_2p2d.sh similarity index 100% rename from components/backends/vllm/launch/disagg_kvbm_2p2d.sh rename to examples/backends/vllm/launch/disagg_kvbm_2p2d.sh diff --git a/components/backends/vllm/launch/disagg_lmcache.sh b/examples/backends/vllm/launch/disagg_lmcache.sh similarity index 100% rename from components/backends/vllm/launch/disagg_lmcache.sh rename to examples/backends/vllm/launch/disagg_lmcache.sh diff --git a/components/backends/vllm/launch/disagg_router.sh b/examples/backends/vllm/launch/disagg_router.sh similarity index 100% rename from components/backends/vllm/launch/disagg_router.sh rename to examples/backends/vllm/launch/disagg_router.sh diff --git a/components/backends/vllm/launch/dsr1_dep.sh b/examples/backends/vllm/launch/dsr1_dep.sh similarity index 100% rename from components/backends/vllm/launch/dsr1_dep.sh rename to examples/backends/vllm/launch/dsr1_dep.sh diff --git a/examples/basics/kubernetes/Distributed_Inference/README.md b/examples/basics/kubernetes/Distributed_Inference/README.md index d278ca41f3..43a6bf4c7b 100644 --- a/examples/basics/kubernetes/Distributed_Inference/README.md +++ b/examples/basics/kubernetes/Distributed_Inference/README.md @@ -19,7 +19,7 @@ helm fetch https://helm.ngc.nvidia.com/nvidia/ai-dynamo/charts/dynamo-platform-$ helm install dynamo-platform dynamo-platform-${RELEASE_VERSION}.tgz --namespace ${NAMESPACE} ``` 3. Model hosting with vLLM backend -This `agg_router.yaml` is adpated from vLLM deployment [example](https://github.com/ai-dynamo/dynamo/blob/main/components/backends/vllm/deploy/agg_router.yaml). It has following customizations +This `agg_router.yaml` is adpated from vLLM deployment [example](https://github.com/ai-dynamo/dynamo/blob/main/examples/backends/vllm/deploy/agg_router.yaml). It has following customizations - Deployed `Qwen/Qwen2.5-1.5B-Instruct` model - Use KV cache based routing in frontend deployment via the `DYN_ROUTER_MODE=kv` environment variable - Mounted a local cache folder `/YOUR/LOCAL/CACHE/FOLDER` for model artifacts reuse diff --git a/examples/basics/kubernetes/Distributed_Inference/agg_router.yaml b/examples/basics/kubernetes/Distributed_Inference/agg_router.yaml index e05d2eab76..a058af8a57 100644 --- a/examples/basics/kubernetes/Distributed_Inference/agg_router.yaml +++ b/examples/basics/kubernetes/Distributed_Inference/agg_router.yaml @@ -39,7 +39,7 @@ spec: volumeMounts: - name: local-model-cache mountPath: /root/.cache - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - /bin/sh - -c diff --git a/examples/basics/kubernetes/Distributed_Inference/disagg_router.yaml b/examples/basics/kubernetes/Distributed_Inference/disagg_router.yaml index cc2e16aaf4..a8bb580757 100644 --- a/examples/basics/kubernetes/Distributed_Inference/disagg_router.yaml +++ b/examples/basics/kubernetes/Distributed_Inference/disagg_router.yaml @@ -36,7 +36,7 @@ spec: type: DirectoryOrCreate mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm volumeMounts: - name: local-model-cache mountPath: /root/.cache @@ -64,7 +64,7 @@ spec: type: DirectoryOrCreate mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm volumeMounts: - name: local-model-cache mountPath: /root/.cache diff --git a/examples/basics/kubernetes/shared_frontend/shared_frontend.yaml b/examples/basics/kubernetes/shared_frontend/shared_frontend.yaml index fb31fe8e21..bd1a183ca1 100644 --- a/examples/basics/kubernetes/shared_frontend/shared_frontend.yaml +++ b/examples/basics/kubernetes/shared_frontend/shared_frontend.yaml @@ -46,7 +46,7 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.5.0 - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - /bin/sh - -c diff --git a/examples/basics/multinode/trtllm/srun_aggregated.sh b/examples/basics/multinode/trtllm/srun_aggregated.sh index 46044c9265..36dcd03099 100755 --- a/examples/basics/multinode/trtllm/srun_aggregated.sh +++ b/examples/basics/multinode/trtllm/srun_aggregated.sh @@ -32,7 +32,7 @@ if [[ -z ${IMAGE} ]]; then echo "ERROR: You need to set the IMAGE environment variable to the " \ "Dynamo+TRTLLM docker image or .sqsh file from 'enroot import' " \ "See how to build one from source here: " \ - "https://github.com/ai-dynamo/dynamo/tree/main/components/backends/trtllm#build-docker" + "https://github.com/ai-dynamo/dynamo/tree/main/docs/backends/trtllm/README.md#build-container" exit 1 fi diff --git a/examples/basics/multinode/trtllm/srun_disaggregated.sh b/examples/basics/multinode/trtllm/srun_disaggregated.sh index 8b6aabf3b6..fe103db1d4 100755 --- a/examples/basics/multinode/trtllm/srun_disaggregated.sh +++ b/examples/basics/multinode/trtllm/srun_disaggregated.sh @@ -37,7 +37,7 @@ if [[ -z ${IMAGE} ]]; then echo "ERROR: You need to set the IMAGE environment variable to the " \ "Dynamo+TRTLLM docker image or .sqsh file from 'enroot import' " \ "See how to build one from source here: " \ - "https://github.com/ai-dynamo/dynamo/tree/main/components/backends/trtllm#build-docker" + "https://github.com/ai-dynamo/dynamo/tree/main/docs/backends/trtllm/README.md#build-container" exit 1 fi diff --git a/examples/deployments/ECS/README.md b/examples/deployments/ECS/README.md index 4a5b8cc4fb..3a4fb4b3fd 100644 --- a/examples/deployments/ECS/README.md +++ b/examples/deployments/ECS/README.md @@ -84,13 +84,13 @@ Please follow steps below to create this task |ETCD_ENDPOINTS|Value|http://IP_ADDRESS:2379| |NATS_SERVER|Value|nats://IP_ADDRESS:4222| - Docker configuration -Add `sh,-c` in **Entry point** and `cd components/backends/vllm && python -m dynamo.frontend --router-mode kv & python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager` in **Command** +Add `sh,-c` in **Entry point** and `cd examples/backends/vllm && python -m dynamo.frontend --router-mode kv & python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager` in **Command** 2. Dynamo vLLM PrefillWorker Task Create the PrefillWorker task same as the frontend worker, except for following changes - Set container name as `dynamo-prefill` - No container port mapping -- Docker configuration with command `cd components/backends/vllm && python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager --is-prefill-worker` +- Docker configuration with command `cd examples/backends/vllm && python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager --is-prefill-worker` ## 5. Task Deployment You can create a service or directly run the task from the task definition diff --git a/examples/deployments/ECS/task_definition_frontend.json b/examples/deployments/ECS/task_definition_frontend.json index fda0a28876..2d8e517a2d 100644 --- a/examples/deployments/ECS/task_definition_frontend.json +++ b/examples/deployments/ECS/task_definition_frontend.json @@ -23,7 +23,7 @@ "-c" ], "command": [ - "cd components/backends/vllm && python -m dynamo.frontend --router-mode kv & python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager" + "cd examples/backends/vllm && python -m dynamo.frontend --router-mode kv & python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager" ], "environment": [ { diff --git a/examples/deployments/ECS/task_definition_prefillworker.json b/examples/deployments/ECS/task_definition_prefillworker.json index fdf928bf3d..50f0539490 100644 --- a/examples/deployments/ECS/task_definition_prefillworker.json +++ b/examples/deployments/ECS/task_definition_prefillworker.json @@ -15,7 +15,7 @@ "-c" ], "command": [ - "cd components/backends/vllm && python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager --is-prefill-worker" + "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager --is-prefill-worker" ], "environment": [ { diff --git a/examples/deployments/EKS/Deploy_Dynamo_Cloud.md b/examples/deployments/EKS/Deploy_Dynamo_Cloud.md index 69608b54e8..ef1c75494c 100644 --- a/examples/deployments/EKS/Deploy_Dynamo_Cloud.md +++ b/examples/deployments/EKS/Deploy_Dynamo_Cloud.md @@ -86,7 +86,7 @@ helm install dynamo-platform ./platform/ \ Your pods should be running like below ``` -ubuntu@ip-192-168-83-157:~/dynamo/components/backends/vllm/deploy$ kubectl get pods -A +ubuntu@ip-192-168-83-157:~/dynamo/examples/backends/vllm/deploy$ kubectl get pods -A NAMESPACE NAME READY STATUS RESTARTS AGE dynamo-cloud dynamo-platform-dynamo-operator-controller-manager-86795c5f4j4k 2/2 Running 0 4h17m dynamo-cloud dynamo-platform-etcd-0 1/1 Running 0 4h17m diff --git a/examples/deployments/EKS/Deploy_VLLM_example.md b/examples/deployments/EKS/Deploy_VLLM_example.md index b395781ed5..a60bcd97b8 100644 --- a/examples/deployments/EKS/Deploy_VLLM_example.md +++ b/examples/deployments/EKS/Deploy_VLLM_example.md @@ -3,7 +3,7 @@ ## 1. Deploy Dynamo Graph ``` -cd dynamo/components/backends/vllm/deploy +cd dynamo/examples/backends/vllm/deploy vim agg_router.yaml #under metadata add namespace: dynamo-cloud and change image to your built base image kubectl apply -f agg_router.yaml ``` @@ -11,7 +11,7 @@ kubectl apply -f agg_router.yaml Your pods should be running like below ``` -ubuntu@ip-192-168-83-157:~/dynamo/components/backends/vllm/deploy$ kubectl get pods -A +ubuntu@ip-192-168-83-157:~/dynamo/examples/backends/vllm/deploy$ kubectl get pods -A NAMESPACE NAME READY STATUS RESTARTS AGE dynamo-cloud dynamo-platform-dynamo-operator-controller-manager-86795c5f4j4k 2/2 Running 0 4h17m dynamo-cloud dynamo-platform-etcd-0 1/1 Running 0 4h17m diff --git a/examples/deployments/GKE/sglang/disagg.yaml b/examples/deployments/GKE/sglang/disagg.yaml index d64752e569..1e49597e66 100644 --- a/examples/deployments/GKE/sglang/disagg.yaml +++ b/examples/deployments/GKE/sglang/disagg.yaml @@ -25,7 +25,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/sglang-runtime:my-tag - workingDir: /workspace/components/backends/sglang + workingDir: /workspace/examples/backends/sglang command: - /bin/sh - -c @@ -48,7 +48,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/sglang-runtime:my-tag - workingDir: /workspace/components/backends/sglang + workingDir: /workspace/examples/backends/sglang command: - /bin/sh - -c diff --git a/examples/deployments/GKE/vllm/disagg.yaml b/examples/deployments/GKE/vllm/disagg.yaml index 098fe06d14..d504c2b410 100644 --- a/examples/deployments/GKE/vllm/disagg.yaml +++ b/examples/deployments/GKE/vllm/disagg.yaml @@ -28,7 +28,7 @@ spec: startupProbe: initialDelaySeconds: 180 image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - /bin/sh - -c @@ -50,7 +50,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - /bin/sh - -c diff --git a/launch/dynamo-run/src/main.rs b/launch/dynamo-run/src/main.rs index 1b7ff631bb..3d3438bb6f 100644 --- a/launch/dynamo-run/src/main.rs +++ b/launch/dynamo-run/src/main.rs @@ -94,7 +94,7 @@ async fn wrapper(runtime: dynamo_runtime::Runtime) -> anyhow::Result<()> { "out" => { if val == "sglang" || val == "trtllm" || val == "vllm" { tracing::error!( - "To run the {val} engine please use the Python interface, see root README or look in directory `components/backends/`." + "To run the {val} engine please use the Python interface, see root README or look in directory `examples/backends/`." ); std::process::exit(1); } diff --git a/lib/bindings/python/examples/metrics/README.md b/lib/bindings/python/examples/metrics/README.md index d02ac4df01..9e0b810773 100644 --- a/lib/bindings/python/examples/metrics/README.md +++ b/lib/bindings/python/examples/metrics/README.md @@ -117,7 +117,7 @@ When you need to add or modify metrics in Method 1 (ForwardPassMetrics Pub/Sub v } ``` -4. **`components/backends/sglang/.../publisher.py`** - Update Python code to compute new metric: +4. **`components/src/dynamo/sglang/publisher.py`** - Update Python code to compute new metric: ```python def collect_metrics(): worker_stats = WorkerStats( @@ -268,7 +268,7 @@ Dynamic Registration provides type hints (via `.pyi` stub files) for typed metri ```mermaid graph TB subgraph "Python Layer" - PY[Python Application
components/backends/sglang/main.py] + PY[Python Application
components/src/dynamo/sglang/main.py] style PY fill:#3776ab,color:#fff end diff --git a/pyproject.toml b/pyproject.toml index edc858cf8d..8c8466d226 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -151,7 +151,7 @@ addopts = [ "--ignore-glob=docs/*", "--ignore-glob=components/src/dynamo/sglang/request_handlers/*", "--ignore-glob=components/src/dynamo/sglang/multimodal_utils/*", - "--ignore-glob=components/backends/sglang/slurm_jobs/*", + "--ignore-glob=examples/backends/sglang/slurm_jobs/*", # FIXME: Get relative/generic blob paths to work here ] xfail_strict = true diff --git a/recipes/deepseek-r1/sglang/disagg-16gpu/deploy.yaml b/recipes/deepseek-r1/sglang/disagg-16gpu/deploy.yaml index 888862b92f..455f7943da 100644 --- a/recipes/deepseek-r1/sglang/disagg-16gpu/deploy.yaml +++ b/recipes/deepseek-r1/sglang/disagg-16gpu/deploy.yaml @@ -48,7 +48,7 @@ spec: timeoutSeconds: 10 failureThreshold: 600 image: my-registry/sglang-wideep-runtime:my-tag - workingDir: /workspace/components/backends/sglang + workingDir: /workspace/examples/backends/sglang command: - python3 - -m @@ -101,7 +101,7 @@ spec: timeoutSeconds: 10 failureThreshold: 600 image: my-registry/sglang-wideep-runtime:my-tag - workingDir: /workspace/components/backends/sglang + workingDir: /workspace/examples/backends/sglang command: - python3 - -m diff --git a/recipes/deepseek-r1/sglang/disagg-8gpu/deploy.yaml b/recipes/deepseek-r1/sglang/disagg-8gpu/deploy.yaml index b1f96e592e..cb156c4425 100644 --- a/recipes/deepseek-r1/sglang/disagg-8gpu/deploy.yaml +++ b/recipes/deepseek-r1/sglang/disagg-8gpu/deploy.yaml @@ -46,7 +46,7 @@ spec: timeoutSeconds: 10 failureThreshold: 600 image: my-registry/sglang-wideep-runtime:my-tag - workingDir: /workspace/components/backends/sglang + workingDir: /workspace/examples/backends/sglang command: - python3 - -m @@ -95,7 +95,7 @@ spec: timeoutSeconds: 10 failureThreshold: 600 image: my-registry/sglang-wideep-runtime:my-tag - workingDir: /workspace/components/backends/sglang + workingDir: /workspace/examples/backends/sglang command: - python3 - -m diff --git a/recipes/gpt-oss-120b/trtllm/agg/deploy.yaml b/recipes/gpt-oss-120b/trtllm/agg/deploy.yaml index a3f0b5c2e6..df23c829f2 100644 --- a/recipes/gpt-oss-120b/trtllm/agg/deploy.yaml +++ b/recipes/gpt-oss-120b/trtllm/agg/deploy.yaml @@ -97,7 +97,7 @@ spec: - mountPath: /opt/dynamo/configs name: llm-config readOnly: true - workingDir: /workspace/components/backends/trtllm + workingDir: /workspace/examples/backends/trtllm volumes: - configMap: name: llm-config diff --git a/recipes/llama-3-70b/vllm/agg/deploy.yaml b/recipes/llama-3-70b/vllm/agg/deploy.yaml index 1a0101e26f..f0d8a93947 100644 --- a/recipes/llama-3-70b/vllm/agg/deploy.yaml +++ b/recipes/llama-3-70b/vllm/agg/deploy.yaml @@ -19,7 +19,7 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm replicas: 1 VllmPrefillWorker: componentType: worker @@ -43,7 +43,7 @@ spec: - /bin/sh - -c image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm replicas: 1 resources: limits: diff --git a/recipes/llama-3-70b/vllm/disagg-multi-node/deploy.yaml b/recipes/llama-3-70b/vllm/disagg-multi-node/deploy.yaml index 229e7196c3..33177e79f7 100644 --- a/recipes/llama-3-70b/vllm/disagg-multi-node/deploy.yaml +++ b/recipes/llama-3-70b/vllm/disagg-multi-node/deploy.yaml @@ -19,7 +19,7 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm replicas: 1 VllmPrefillWorker: componentType: worker @@ -43,7 +43,7 @@ spec: - /bin/sh - -c image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm replicas: 1 resources: limits: @@ -72,7 +72,7 @@ spec: - /bin/sh - -c image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm replicas: 1 resources: limits: diff --git a/recipes/llama-3-70b/vllm/disagg-single-node/deploy.yaml b/recipes/llama-3-70b/vllm/disagg-single-node/deploy.yaml index fd2c16fb62..0879899046 100644 --- a/recipes/llama-3-70b/vllm/disagg-single-node/deploy.yaml +++ b/recipes/llama-3-70b/vllm/disagg-single-node/deploy.yaml @@ -19,7 +19,7 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm replicas: 1 VllmPrefillWorker: componentType: worker @@ -53,7 +53,7 @@ spec: - /bin/sh - -c image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm replicas: 2 resources: limits: @@ -92,7 +92,7 @@ spec: - /bin/sh - -c image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm replicas: 1 resources: limits: diff --git a/recipes/qwen3-32b-fp8/trtllm/agg/deploy.yaml b/recipes/qwen3-32b-fp8/trtllm/agg/deploy.yaml index b11aa10c6b..4400919076 100644 --- a/recipes/qwen3-32b-fp8/trtllm/agg/deploy.yaml +++ b/recipes/qwen3-32b-fp8/trtllm/agg/deploy.yaml @@ -110,7 +110,7 @@ spec: - mountPath: /opt/dynamo/configs name: llm-config readOnly: true - workingDir: /workspace/components/backends/trtllm + workingDir: /workspace/examples/backends/trtllm volumes: - configMap: name: llm-config diff --git a/recipes/qwen3-32b-fp8/trtllm/disagg/deploy.yaml b/recipes/qwen3-32b-fp8/trtllm/disagg/deploy.yaml index 6fb79287e1..1bd93ae7fc 100644 --- a/recipes/qwen3-32b-fp8/trtllm/disagg/deploy.yaml +++ b/recipes/qwen3-32b-fp8/trtllm/disagg/deploy.yaml @@ -270,7 +270,7 @@ spec: - mountPath: /opt/dynamo/configs name: llm-config-prefill readOnly: true - workingDir: /workspace/components/backends/trtllm + workingDir: /workspace/examples/backends/trtllm volumes: - configMap: name: llm-config-prefill @@ -330,7 +330,7 @@ spec: - mountPath: /opt/dynamo/configs name: llm-config-decode readOnly: true - workingDir: /workspace/components/backends/trtllm + workingDir: /workspace/examples/backends/trtllm volumes: - configMap: name: llm-config-decode diff --git a/tests/fault_tolerance/deploy/scenarios.py b/tests/fault_tolerance/deploy/scenarios.py index faca2633d6..ffb1a0d408 100644 --- a/tests/fault_tolerance/deploy/scenarios.py +++ b/tests/fault_tolerance/deploy/scenarios.py @@ -177,8 +177,8 @@ def _create_deployments_for_backend(backend): # Define the yaml files for agg and disagg deployments yaml_files = { - "agg": f"components/backends/{backend}/deploy/agg.yaml", - "disagg": f"components/backends/{backend}/deploy/disagg.yaml", + "agg": f"examples/backends/{backend}/deploy/agg.yaml", + "disagg": f"examples/backends/{backend}/deploy/disagg.yaml", } # Define the different configurations to test diff --git a/tests/fault_tolerance/deploy/templates/vllm/moe_agg.yaml b/tests/fault_tolerance/deploy/templates/vllm/moe_agg.yaml index 3e9c58926c..e0b21b562c 100644 --- a/tests/fault_tolerance/deploy/templates/vllm/moe_agg.yaml +++ b/tests/fault_tolerance/deploy/templates/vllm/moe_agg.yaml @@ -49,7 +49,7 @@ spec: - name: nvcr-imagepullsecret mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 - -m diff --git a/tests/fault_tolerance/deploy/templates/vllm/moe_disagg.yaml b/tests/fault_tolerance/deploy/templates/vllm/moe_disagg.yaml index b60018829c..b0d7dec38e 100644 --- a/tests/fault_tolerance/deploy/templates/vllm/moe_disagg.yaml +++ b/tests/fault_tolerance/deploy/templates/vllm/moe_disagg.yaml @@ -52,7 +52,7 @@ spec: - name: nvcr-imagepullsecret mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 - -m @@ -117,7 +117,7 @@ spec: - name: nvcr-imagepullsecret mainContainer: image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 - -m diff --git a/tests/planner/README.md b/tests/planner/README.md index aac10fc55b..e903e34643 100644 --- a/tests/planner/README.md +++ b/tests/planner/README.md @@ -10,7 +10,7 @@ This directory contains comprehensive testing tools for validating the SLA plann The SLA planner monitors metrics every 60 seconds (default adjustment interval) and scales prefill/decode workers based on TTFT, ITL, and request patterns. -To setup the environment, simply use the released docker images for any backends, or build your own docker image following the READMEs in `./components/backends//README.md`, or follow the `Developing Locally` section in [README.md](../../README.md) to setup the environment locally. If using the local environment, make sure to install dependencies by running `UV_GIT_LFS=1 uv pip install --no-cache -r container/deps/requirements.txt` +To setup the environment, simply use the released docker images for any backends, or build your own docker image following the READMEs in `./examples/backends//README.md`, or follow the `Developing Locally` section in [README.md](../../README.md) to setup the environment locally. If using the local environment, make sure to install dependencies by running `UV_GIT_LFS=1 uv pip install --no-cache -r container/deps/requirements.txt` ## Pre-Requisite: Pre-Deployment Profiling Data @@ -170,12 +170,12 @@ Test complete scaling behavior including Kubernetes deployment and load generati **Prepare the test deployment manifest:** -The test requires modifying `components/backends/vllm/deploy/disagg_planner.yaml` with test-specific planner arguments: +The test requires modifying `examples/backends/vllm/deploy/disagg_planner.yaml` with test-specific planner arguments: 1. Copy the base deployment: ```bash -cp components/backends/vllm/deploy/disagg_planner.yaml tests/planner/scaling/disagg_planner.yaml +cp examples/backends/vllm/deploy/disagg_planner.yaml tests/planner/scaling/disagg_planner.yaml ``` 2. Edit `tests/planner/scaling/disagg_planner.yaml`. Ensure all services use the correct image. Modify the Planner service args: diff --git a/tests/planner/perf_test_configs/agg_8b.yaml b/tests/planner/perf_test_configs/agg_8b.yaml index c176e62cec..19c0f618cd 100644 --- a/tests/planner/perf_test_configs/agg_8b.yaml +++ b/tests/planner/perf_test_configs/agg_8b.yaml @@ -39,7 +39,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - /bin/sh - -c @@ -89,7 +89,7 @@ spec: periodSeconds: 10 failureThreshold: 60 image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - /bin/sh - -c diff --git a/tests/planner/perf_test_configs/disagg_8b_2p2d.yaml b/tests/planner/perf_test_configs/disagg_8b_2p2d.yaml index 4b38a9a9b0..3791766277 100644 --- a/tests/planner/perf_test_configs/disagg_8b_2p2d.yaml +++ b/tests/planner/perf_test_configs/disagg_8b_2p2d.yaml @@ -39,7 +39,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - /bin/sh - -c @@ -89,7 +89,7 @@ spec: periodSeconds: 10 failureThreshold: 60 image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - /bin/sh - -c @@ -139,7 +139,7 @@ spec: periodSeconds: 10 failureThreshold: 60 image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - /bin/sh - -c diff --git a/tests/planner/perf_test_configs/disagg_8b_3p1d.yaml b/tests/planner/perf_test_configs/disagg_8b_3p1d.yaml index 41618cf472..e5a01d8823 100644 --- a/tests/planner/perf_test_configs/disagg_8b_3p1d.yaml +++ b/tests/planner/perf_test_configs/disagg_8b_3p1d.yaml @@ -39,7 +39,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - /bin/sh - -c @@ -89,7 +89,7 @@ spec: periodSeconds: 10 failureThreshold: 60 image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - /bin/sh - -c @@ -139,7 +139,7 @@ spec: periodSeconds: 10 failureThreshold: 60 image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - /bin/sh - -c diff --git a/tests/planner/perf_test_configs/disagg_8b_planner.yaml b/tests/planner/perf_test_configs/disagg_8b_planner.yaml index d3e1761fa2..3c0ee6ff85 100644 --- a/tests/planner/perf_test_configs/disagg_8b_planner.yaml +++ b/tests/planner/perf_test_configs/disagg_8b_planner.yaml @@ -42,7 +42,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - /bin/sh - -c @@ -139,7 +139,7 @@ spec: periodSeconds: 10 failureThreshold: 60 image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 args: @@ -196,7 +196,7 @@ spec: periodSeconds: 10 failureThreshold: 60 image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - python3 args: diff --git a/tests/planner/perf_test_configs/disagg_8b_tp2.yaml b/tests/planner/perf_test_configs/disagg_8b_tp2.yaml index 3c83f78bcd..96522a027e 100644 --- a/tests/planner/perf_test_configs/disagg_8b_tp2.yaml +++ b/tests/planner/perf_test_configs/disagg_8b_tp2.yaml @@ -39,7 +39,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - /bin/sh - -c @@ -89,7 +89,7 @@ spec: periodSeconds: 10 failureThreshold: 60 image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - /bin/sh - -c @@ -139,7 +139,7 @@ spec: periodSeconds: 10 failureThreshold: 60 image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - /bin/sh - -c diff --git a/tests/planner/profiling_results/H200_TP1P_TP1D/disagg.yaml b/tests/planner/profiling_results/H200_TP1P_TP1D/disagg.yaml index 6d8c378acf..62001fc43d 100644 --- a/tests/planner/profiling_results/H200_TP1P_TP1D/disagg.yaml +++ b/tests/planner/profiling_results/H200_TP1P_TP1D/disagg.yaml @@ -39,7 +39,7 @@ spec: extraPodSpec: mainContainer: image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - /bin/sh - -c @@ -89,7 +89,7 @@ spec: periodSeconds: 10 failureThreshold: 60 image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - /bin/sh - -c @@ -139,7 +139,7 @@ spec: periodSeconds: 10 failureThreshold: 60 image: my-registry/vllm-runtime:my-tag - workingDir: /workspace/components/backends/vllm + workingDir: /workspace/examples/backends/vllm command: - /bin/sh - -c diff --git a/tests/profiler/test_profile_sla_aiconfigurator.py b/tests/profiler/test_profile_sla_aiconfigurator.py index 9e6525a6d4..769140a910 100644 --- a/tests/profiler/test_profile_sla_aiconfigurator.py +++ b/tests/profiler/test_profile_sla_aiconfigurator.py @@ -30,7 +30,7 @@ def __init__(self): self.model = "" self.dgd_image = "" self.backend = "trtllm" - self.config = "components/backends/trtllm/deploy/disagg.yaml" + self.config = "examples/backends/trtllm/deploy/disagg.yaml" self.output_dir = "/tmp/test_profiling_results" self.namespace = "test-namespace" self.min_num_gpus_per_engine = 1 diff --git a/tests/profiler/test_profile_sla_dryrun.py b/tests/profiler/test_profile_sla_dryrun.py index 556cc0789e..a6cac33461 100644 --- a/tests/profiler/test_profile_sla_dryrun.py +++ b/tests/profiler/test_profile_sla_dryrun.py @@ -46,7 +46,7 @@ def vllm_args(self): class Args: def __init__(self): self.backend = "vllm" - self.config = "components/backends/vllm/deploy/disagg.yaml" + self.config = "examples/backends/vllm/deploy/disagg.yaml" self.output_dir = "/tmp/test_profiling_results" self.namespace = "test-namespace" self.model = "" @@ -82,7 +82,7 @@ def sglang_args(self): class Args: def __init__(self): self.backend = "sglang" - self.config = "components/backends/sglang/deploy/disagg.yaml" + self.config = "examples/backends/sglang/deploy/disagg.yaml" self.output_dir = "/tmp/test_profiling_results" self.namespace = "test-namespace" self.model = "" @@ -132,7 +132,7 @@ def trtllm_args(self): class Args: def __init__(self): self.backend = "trtllm" - self.config = "components/backends/trtllm/deploy/disagg.yaml" + self.config = "examples/backends/trtllm/deploy/disagg.yaml" self.output_dir = "/tmp/test_profiling_results" self.namespace = "test-namespace" self.model = "" diff --git a/tests/serve/test_sglang.py b/tests/serve/test_sglang.py index 0379b45245..1cb6ded814 100644 --- a/tests/serve/test_sglang.py +++ b/tests/serve/test_sglang.py @@ -34,7 +34,7 @@ class SGLangConfig(EngineConfig): sglang_dir = os.environ.get("SGLANG_DIR") or os.path.join( - WORKSPACE_DIR, "components/backends/sglang" + WORKSPACE_DIR, "examples/backends/sglang" ) sglang_configs = { diff --git a/tests/serve/test_trtllm.py b/tests/serve/test_trtllm.py index ecd349fe1c..d008acafac 100644 --- a/tests/serve/test_trtllm.py +++ b/tests/serve/test_trtllm.py @@ -30,7 +30,7 @@ class TRTLLMConfig(EngineConfig): trtllm_dir = os.environ.get("TRTLLM_DIR") or os.path.join( - WORKSPACE_DIR, "components/backends/trtllm" + WORKSPACE_DIR, "examples/backends/trtllm" ) # trtllm test configurations diff --git a/tests/serve/test_vllm.py b/tests/serve/test_vllm.py index 03a1e1fa3b..19a6740e2f 100644 --- a/tests/serve/test_vllm.py +++ b/tests/serve/test_vllm.py @@ -31,7 +31,7 @@ class VLLMConfig(EngineConfig): vllm_dir = os.environ.get("VLLM_DIR") or os.path.join( - WORKSPACE_DIR, "components/backends/vllm" + WORKSPACE_DIR, "examples/backends/vllm" ) # vLLM test configurations diff --git a/tests/utils/managed_deployment.py b/tests/utils/managed_deployment.py index 5b5ba94891..0317f19150 100644 --- a/tests/utils/managed_deployment.py +++ b/tests/utils/managed_deployment.py @@ -828,7 +828,7 @@ async def main(): workspace_dir = get_workspace_dir() deployment_spec = DeploymentSpec( - os.path.join(workspace_dir, "components/backends/vllm/deploy/agg.yaml") + os.path.join(workspace_dir, "examples/backends/vllm/deploy/agg.yaml") ) deployment_spec.disable_grove()