diff --git a/deploy/inference-gateway/README.md b/deploy/inference-gateway/README.md index 36eb254b37..cb40f439af 100644 --- a/deploy/inference-gateway/README.md +++ b/deploy/inference-gateway/README.md @@ -53,7 +53,7 @@ b. Install the Inference Extension CRDs (Inference Model and Inference Pool CRDs ```bash INFERENCE_EXTENSION_VERSION=v0.5.1 -kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/$INFERENCE_EXTENSION_VERSION/manifests.yaml -n my-model +kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/$INFERENCE_EXTENSION_VERSION/manifests.yaml ``` c. Install `kgateway` CRDs and kgateway. diff --git a/recipes/README.md b/recipes/README.md index 27a43b59c4..236a38a71a 100644 --- a/recipes/README.md +++ b/recipes/README.md @@ -13,14 +13,14 @@ This repository contains production-ready recipes for deploying large language m ## Available Models -| Model Family | Framework | Deployment Mode | GPU Requirements | Status | Benchmark | -|-----------------|-----------|---------------------|------------------|--------|-----------| -| llama-3-70b | vllm | agg | 4x H100/H200 | ✅ | ✅ | -| llama-3-70b | vllm | disagg (1 node) | 8x H100/H200 | ✅ | ✅ | -| llama-3-70b | vllm | disagg (multi-node) | 16x H100/H200 | ✅ | ✅ | -| deepseek-r1 | sglang | disagg (1 node, wide-ep) | 8x H200 | ✅ | 🚧 | -| deepseek-r1 | sglang | disagg (multi-node, wide-ep) | 16x H200 | ✅ | 🚧 | -| gpt-oss-120b | trtllm | agg | 4x GB200 | ✅ | ✅ | +| Model Family | Framework | Deployment Mode | GPU Requirements | Status | Benchmark |GAIE-integration | +|-----------------|-----------|---------------------|------------------|--------|-----------|------------------| +| llama-3-70b | vllm | agg | 4x H100/H200 | ✅ | ✅ |✅ | +| llama-3-70b | vllm | disagg (1 node) | 8x H100/H200 | ✅ | ✅ | 🚧 | +| llama-3-70b | vllm | disagg (multi-node) | 16x H100/H200 | ✅ | ✅ |🚧 | +| deepseek-r1 | sglang | disagg (1 node, wide-ep) | 8x H200 | ✅ | 🚧 |🚧 | +| deepseek-r1 | sglang | disagg (multi-node, wide-ep) | 16x H200 | ✅ | 🚧 |🚧 | +| gpt-oss-120b | trtllm | agg | 4x GB200 | ✅ | ✅ |🚧 | **Legend:** - ✅ Functional @@ -89,9 +89,7 @@ vim hf_hub_secret/hf_hub_secret.yaml kubectl apply -f hf_hub_secret/hf_hub_secret.yaml -n ${NAMESPACE} ``` -### 6. Configure Storage Class - -Configure persistent storage for model caching: +6. Configure Storage Class ```bash # Check available storage classes @@ -160,6 +158,20 @@ kubectl apply -f hf_hub_secret/hf_hub_secret.yaml -n ${NAMESPACE} ./run.sh --dry-run --model llama-3-70b --framework vllm --deployment agg ``` +## If deploying with Gateway API Inference extension GAIE + +1. Follow [Deploy Inference Gateway Section 2](../deploy/inference-gateway/README.md#2-deploy-inference-gateway) to install GAIE. + +2. Apply manifests by running a script. + +```bash +# Match the block size to the cli value in your deployment file deploy.yaml: - "python3 -m dynamo.vllm ... --block-size 128" +export DYNAMO_KV_BLOCK_SIZE=128 +export EPP_IMAGE=nvcr.io/you/epp:tag +# Add --gaie argument to the script i.e.: +./run.sh --model llama-3-70b --framework vllm --gaie agg +``` +The script will perform gateway checks and apply the manifests. ## Option 2: Manual Deployment diff --git a/recipes/gaie_checks.sh b/recipes/gaie_checks.sh new file mode 100755 index 0000000000..3a4750beb0 --- /dev/null +++ b/recipes/gaie_checks.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#!/usr/bin/env bash +set -Eeuo pipefail + +# ===== Namespace ensure ===== +if ! kubectl get ns "$NAMESPACE" >/dev/null 2>&1; then + kubectl create namespace "$NAMESPACE" +fi + +KGW_NS="${KGW_NS:-kgateway-system}" + +ok() { printf "✅ %s\n" "$*"; } +fail(){ printf "❌ %s\n" "$*" >&2; exit 1; } +info(){ printf "ℹ️ %s\n" "$*"; } + +need() { command -v "$1" >/dev/null 2>&1 || fail "'$1' is required"; } + +need kubectl + +# ===== Config (env overridable) ===== +: "${NAMESPACE:=dynamo}" + +# ===== Pre-flight checks ===== +command -v helm >/dev/null 2>&1 || { echo "ERROR: helm not found"; exit 1; } +command -v kubectl >/dev/null 2>&1 || { echo "ERROR: kubectl not found"; exit 1; } + +GATEWAY_CRDS=( + gateways.gateway.networking.k8s.io + gatewayclasses.gateway.networking.k8s.io + httproutes.gateway.networking.k8s.io + referencegrants.gateway.networking.k8s.io +) +info "Checking Gateway API CRDs…" +for c in "${GATEWAY_CRDS[@]}"; do + kubectl get crd "$c" >/dev/null 2>&1 || fail "Missing CRD: $c (run step a)" + kubectl wait --for=condition=Established "crd/$c" --timeout=60s >/dev/null || fail "CRD not Established: $c" +done +ok "Gateway API CRDs present & Established" + +GAIE_CRDS=( + inferencemodels.inference.networking.x-k8s.io + inferencepools.inference.networking.x-k8s.io +) + +info "Checking GAIE (Inference Extension) CRDs…" +for c in "${GAIE_CRDS[@]}"; do + kubectl get crd "$c" >/dev/null 2>&1 || fail "Missing CRD: $c (run step b install of inference extension)" + kubectl wait --for=condition=Established "crd/$c" --timeout=60s >/dev/null || fail "CRD not Established: $c" +done +ok "GAIE CRDs present & Established" + +info "Checking Kgateway controller in namespace '$KGW_NS'…" +# namespace must exist +kubectl get ns "$KGW_NS" >/dev/null 2>&1 || fail "Namespace '$KGW_NS' not found (run step c Helm installs)" + +# pods should be running +if ! kubectl get pods -n "$KGW_NS" -l app.kubernetes.io/name=kgateway >/dev/null 2>&1; then + # fallback label (charts sometimes label differently) + PODS=$(kubectl get pods -n "$KGW_NS" -o name | grep -E 'kgateway|gateway' || true) + [[ -z "${PODS:-}" ]] && fail "Kgateway pods not found in '$KGW_NS'" +else + PODS=$(kubectl get pods -n "$KGW_NS" -l app.kubernetes.io/name=kgateway -o name) +fi +for p in $PODS; do + kubectl wait -n "$KGW_NS" --for=condition=Ready "$p" --timeout=180s >/dev/null || fail "Pod not Ready: $p" +done +ok "Kgateway controller pods Ready" + +kubectl get gateway.gateway.networking.k8s.io inference-gateway -n "$NAMESPACE" >/dev/null 2>&1 || fail "Gateway 'inference-gateway' not found in $NAMESPACE (apply step d manifest)" + +ok "GAIE is installed and the gateway is up in namespace '$NAMESPACE'." + + diff --git a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/configmap.yaml b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/configmap.yaml new file mode 100644 index 0000000000..ae7cada4e3 --- /dev/null +++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/configmap.yaml @@ -0,0 +1,48 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# NOTE: You can remove the namespace field if using kubectl apply -n +apiVersion: v1 +kind: ConfigMap +metadata: + name: epp-config + labels: + app.kubernetes.io/name: dynamo-gaie + app.kubernetes.io/instance: llama3-70b-agg +data: + epp-config-dynamo.yaml: | + apiVersion: inference.networking.x-k8s.io/v1alpha1 + kind: EndpointPickerConfig + plugins: + # Required: tells EPP which profile to use (even if you only have one) + - type: single-profile-handler + + # Picker: chooses the final endpoint after scoring + - name: picker + type: max-score-picker + - name: dyn-pre + type: dynamo-inject-workerid + parameters: {} + - name: dyn-kv + type: kv-aware-scorer + parameters: + frontendURL: http://127.0.0.1:8000/v1/chat/completions + timeoutMS: 10000 + schedulingProfiles: + - name: default + plugins: + - pluginRef: dyn-kv + weight: 1 + - pluginRef: picker diff --git a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/deployment.yaml b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/deployment.yaml new file mode 100644 index 0000000000..b4160df134 --- /dev/null +++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/deployment.yaml @@ -0,0 +1,107 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# NOTE: Update the namespace field below to match your deployment namespace +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llama3-70b-agg-epp + labels: + app: llama3-70b-agg-epp +spec: + replicas: 1 + selector: + matchLabels: + app: llama3-70b-agg-epp + template: + metadata: + labels: + app: llama3-70b-agg-epp + spec: + serviceAccountName: epp-sa + terminationGracePeriodSeconds: 130 + + imagePullSecrets: + - name: docker-imagepullsecret + + containers: + - name: epp + image: nvcr.io/nvstaging/ai-dynamo/epp-inference-extension-dynamo:v0.6.0-1 + imagePullPolicy: IfNotPresent + resources: + requests: + memory: "1Gi" + cpu: "1" + limits: + memory: "2Gi" + cpu: "2" + command: ["/bin/sh", "-c"] + args: + - > + exec /epp + -poolName "llama3-70b-agg-pool" + -poolNamespace "$POD_NAMESPACE" + -v 4 --zap-encoder json + -grpcPort 9002 -grpcHealthPort 9003 + -configFile /etc/epp/epp-config-dynamo.yaml + + volumeMounts: + - name: epp-config + mountPath: /etc/epp + readOnly: true + + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: PLATFORM_NAMESPACE + value: "$(POD_NAMESPACE)" # set to your dynamo platform namespace if different + - name: ETCD_ENDPOINTS + value: "dynamo-platform-etcd.$(PLATFORM_NAMESPACE):2379" # update dynamo-platform to appropriate namespace + - name: NATS_SERVER + value: "nats://dynamo-platform-nats.$(PLATFORM_NAMESPACE):4222" # update dynamo-platform to appropriate namespace + - name: DYN_NAMESPACE + value: "llama3-70b-agg" + - name: DYNAMO_KV_BLOCK_SIZE + value: "128" # UPDATE to match the --block-size in your deploy.yaml engine command + - name: USE_STREAMING + value: "true" + + ports: + - containerPort: 9002 + - containerPort: 9003 + - name: metrics + containerPort: 9090 + livenessProbe: + grpc: + port: 9003 + service: inference-extension + initialDelaySeconds: 5 + periodSeconds: 10 + readinessProbe: + grpc: + port: 9003 + service: inference-extension + initialDelaySeconds: 5 + periodSeconds: 10 + + volumes: + - name: epp-config + configMap: + name: epp-config + items: + - key: epp-config-dynamo.yaml + path: epp-config-dynamo.yaml diff --git a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/http-route.yaml b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/http-route.yaml new file mode 100644 index 0000000000..cc43b938a5 --- /dev/null +++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/http-route.yaml @@ -0,0 +1,39 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# NOTE: You can remove metadata.namespace if using kubectl apply -n +# The backendRefs.namespace field should match where your InferencePool is deployed +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: llama3-70b-agg-route +spec: + parentRefs: + - group: gateway.networking.k8s.io + kind: Gateway + name: inference-gateway + rules: + - backendRefs: + - group: inference.networking.x-k8s.io + kind: InferencePool + name: llama3-70b-agg-pool + port: 8000 + weight: 1 + matches: + - path: + type: PathPrefix + value: / + timeouts: + request: 300s diff --git a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/service.yaml b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/service.yaml new file mode 100644 index 0000000000..0eab9970fd --- /dev/null +++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/service.yaml @@ -0,0 +1,30 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# NOTE: Update the namespace field below to match your deployment namespace +apiVersion: v1 +kind: Service +metadata: + name: llama3-70b-agg-epp +spec: + selector: + app: llama3-70b-agg + ports: + - protocol: TCP + port: 9002 + targetPort: 9002 + appProtocol: http2 + type: ClusterIP + diff --git a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/model/inference-model.yaml b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/model/inference-model.yaml new file mode 100644 index 0000000000..3876b2f751 --- /dev/null +++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/model/inference-model.yaml @@ -0,0 +1,28 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# NOTE: You can remove the namespace field if using kubectl apply -n +apiVersion: inference.networking.x-k8s.io/v1alpha2 +kind: InferenceModel +metadata: + name: llama3-70b-agg-model +spec: + criticality: Critical + modelName: RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic + poolRef: + group: inference.networking.x-k8s.io + kind: InferencePool + name: llama3-70b-agg-pool + diff --git a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/model/inference-pool.yaml b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/model/inference-pool.yaml new file mode 100644 index 0000000000..f4a675a41b --- /dev/null +++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/model/inference-pool.yaml @@ -0,0 +1,30 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# NOTE: You can remove the namespace field if using kubectl apply -n +apiVersion: inference.networking.x-k8s.io/v1alpha2 +kind: InferencePool +metadata: + name: llama3-70b-agg-pool +spec: + targetPortNumber: 8000 + selector: + nvidia.com/dynamo-component: Frontend + nvidia.com/dynamo-namespace: llama3-70b-agg # # This is the Dynamo namespace where the model is deployed + extensionRef: + failureMode: FailOpen + group: "" + kind: Service + name: llama3-70b-agg-epp diff --git a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/rbac/cluster-role.yaml b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/rbac/cluster-role.yaml new file mode 100644 index 0000000000..dd30593c30 --- /dev/null +++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/rbac/cluster-role.yaml @@ -0,0 +1,41 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: pod-read +rules: +- apiGroups: ["inference.networking.x-k8s.io"] + resources: ["inferencepools"] + verbs: ["get", "watch", "list"] +- apiGroups: ["inference.networking.x-k8s.io"] + resources: ["inferencemodels"] + verbs: ["get", "watch", "list"] +- apiGroups: [""] + resources: ["pods"] + verbs: ["get", "watch", "list"] +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create + diff --git a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/rbac/role-binding.yaml b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/rbac/role-binding.yaml new file mode 100644 index 0000000000..d169729432 --- /dev/null +++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/rbac/role-binding.yaml @@ -0,0 +1,30 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# NOTE: ClusterRoleBinding is cluster-scoped (no metadata.namespace) +# The subjects.namespace field specifies where the ServiceAccount is located +# This CANNOT be removed - it must match your deployment namespace +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: pod-read-binding + # no metadata.namespace - kubectl -n sets it +subjects: + - kind: ServiceAccount + name: epp-sa +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: pod-read diff --git a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/rbac/service-account.yaml b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/rbac/service-account.yaml new file mode 100644 index 0000000000..974ee5c102 --- /dev/null +++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/rbac/service-account.yaml @@ -0,0 +1,20 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: epp-sa +# no metadata.namespace (kubectl -n sets it) diff --git a/recipes/run.sh b/recipes/run.sh index af83c65b2e..980c9333b6 100755 --- a/recipes/run.sh +++ b/recipes/run.sh @@ -14,9 +14,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +set -euo pipefail +IFS=$'\n\t' + RECIPES_DIR="$( cd "$( dirname "$0" )" && pwd )" # Default values NAMESPACE="${NAMESPACE:-dynamo}" +DEPLOY_TYPE="" +GAIE="${GAIE:-false}" DEPLOYMENT="" MODEL="" FRAMEWORK="" @@ -36,9 +41,10 @@ usage() { echo " --deployment Deployment type (e.g., agg, disagg etc, please refer to the README.md for available deployment types)" echo "" echo "Optional:" - echo " --namespace Kubernetes namespace (default: dynamo)" - echo " --dry-run Print commands without executing them" - echo " -h, --help Show this help message" + echo " --namespace Kubernetes namespace (default: dynamo)" + echo " --dry-run Print commands without executing them" + echo " --gaie[=true|false] Enable GAIE integration subfolder (applies GAIE manifests skips benchmark) (default: ${GAIE})" + echo " -h, --help Show this help message" echo "" echo "Environment Variables:" echo " NAMESPACE Kubernetes namespace (default: dynamo)" @@ -98,6 +104,22 @@ while [[ $# -gt 0 ]]; do missing_requirement "$1" fi ;; + --gaie) + GAIE=true + shift + ;; + --gaie=false) + GAIE=false + shift + ;; + --gaie=*) + GAIE="${1#*=}" + case "${GAIE,,}" in + true|false) GAIE="${GAIE,,}";; + *) echo "ERROR: --gaie must be true or false"; exit 1;; + esac + shift + ;; -h|--help) usage ;; @@ -137,6 +159,7 @@ fi MODEL_DIR="$RECIPES_DIR/$MODEL" FRAMEWORK_DIR="$MODEL_DIR/${FRAMEWORK,,}" DEPLOY_PATH="$FRAMEWORK_DIR/$DEPLOYMENT" +INTEGRATION="$([[ "${GAIE,,}" == "true" ]] && echo gaie || echo "")" # Check if model directory exists if [[ ! -d "$MODEL_DIR" ]]; then @@ -188,6 +211,7 @@ echo "Model: $MODEL" echo "Framework: ${FRAMEWORK,,}" echo "Deployment Type: $DEPLOYMENT" echo "Namespace: $NAMESPACE" +echo "GAIE integration: $GAIE" echo "======================================" # Handle model downloading @@ -205,6 +229,15 @@ $DRY_RUN kubectl wait --for=condition=Complete job/$MODEL_DOWNLOAD_JOB_NAME -n $ echo "Deploying $MODEL ${FRAMEWORK,,} $DEPLOYMENT configuration..." $DRY_RUN kubectl apply -n $NAMESPACE -f $DEPLOY_FILE +if [[ "$INTEGRATION" == "gaie" ]]; then + # run gaie checks. + SCRIPT_DIR="$(cd -- "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + "${SCRIPT_DIR}/gaie_checks.sh" + kubectl apply -f "$DEPLOY_PATH/gaie/k8s-manifests" -n "$NAMESPACE" + # For now do not run the benchmark + exit + fi + # Launch the benchmark job (if available) if [[ "$PERF_AVAILABLE" == "true" ]]; then echo "Launching benchmark job..."