Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 63 additions & 19 deletions .github/workflows/nightly-e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -162,16 +162,13 @@ jobs:
if-no-files-found: ignore

# ── GPU E2E (Ollama local inference) ──────────────────────────
# Enable by setting repository variable GPU_E2E_ENABLED=true
# (Settings → Secrets and variables → Actions → Variables)
#
# Runner labels: using 'self-hosted' for now. Refine to
# [self-hosted, linux, x64, gpu] once NVIDIA runner labels are confirmed.
# Runs on an ephemeral Brev GPU instance with Ollama pre-installed.
gpu-e2e:
if: github.repository == 'NVIDIA/NemoClaw' && vars.GPU_E2E_ENABLED == 'true'
runs-on: self-hosted
timeout-minutes: 60
runs-on: ubuntu-latest
timeout-minutes: 90
env:
BREV_API_TOKEN: ${{ secrets.BREV_API_TOKEN }}
NEMOCLAW_NON_INTERACTIVE: "1"
NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
NEMOCLAW_SANDBOX_NAME: "e2e-gpu-ollama"
Expand All @@ -181,19 +178,59 @@ jobs:
- name: Checkout
uses: actions/checkout@v6

- name: Verify GPU availability
- name: Install Brev CLI
run: |
curl -fsSL -o /tmp/brev.tar.gz "https://github.com/brevdev/brev-cli/releases/download/v0.6.322/brev-cli_0.6.322_linux_amd64.tar.gz"
sudo tar -xzf /tmp/brev.tar.gz -C /usr/local/bin brev
sudo chmod +x /usr/local/bin/brev

- name: Provision Brev GPU Instance & Run Test
env:
INSTANCE_NAME: e2e-gpu-nightly-${{ github.run_id }}
run: |
echo "=== GPU Info ==="
nvidia-smi
echo ""
echo "=== VRAM ==="
nvidia-smi --query-gpu=name,memory.total --format=csv,noheader
echo ""
echo "=== Docker ==="
docker info --format '{{.ServerVersion}}'

- name: Run GPU E2E test (Ollama local inference)
run: bash test/e2e/test-gpu-e2e.sh
# Provision the GPU instance with our script. Form-created launchables could also just substitute their template ID here.
echo "Provisioning GPU instance..."
brev create --name "$INSTANCE_NAME" \
--flavor "t4" \
--startup-script "@scripts/brev-launchable-ci-gpu.sh"

echo "Waiting for readiness sentinel..."
export READY=0
for i in {1..20}; do
if brev exec "$INSTANCE_NAME" -- cat /var/run/nemoclaw-launchable-ready >/dev/null 2>&1; then
READY=1
break
fi
sleep 30
done

if [ $READY -eq 0 ]; then
echo "Instance did not become ready in time."
exit 1
fi

echo "Running GPU E2E tests remotely..."
# Pass the needed env variables when running
brev exec "$INSTANCE_NAME" -- bash -c \
"cd ~/NemoClaw && \
export NEMOCLAW_NON_INTERACTIVE=${NEMOCLAW_NON_INTERACTIVE} && \
export NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=${NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE} && \
export NEMOCLAW_SANDBOX_NAME=${NEMOCLAW_SANDBOX_NAME} && \
export NEMOCLAW_RECREATE_SANDBOX=${NEMOCLAW_RECREATE_SANDBOX} && \
export NEMOCLAW_PROVIDER=${NEMOCLAW_PROVIDER} && \
export OLLAMA_MODEL=qwen3:0.6b && \
bash test/e2e/test-gpu-e2e.sh"

- name: Tear down GPU instance
if: always()
run: brev delete e2e-gpu-nightly-${{ github.run_id }} || true

- name: Copy install log on failure
if: failure()
env:
INSTANCE_NAME: e2e-gpu-nightly-${{ github.run_id }}
run: |
brev scp "$INSTANCE_NAME":/tmp/nemoclaw-gpu-e2e-install.log /tmp/nemoclaw-gpu-e2e-install.log || true

- name: Upload install log on failure
if: failure()
Expand All @@ -203,6 +240,13 @@ jobs:
path: /tmp/nemoclaw-gpu-e2e-install.log
if-no-files-found: ignore

- name: Copy test log on failure
if: failure()
env:
INSTANCE_NAME: e2e-gpu-nightly-${{ github.run_id }}
run: |
brev scp "$INSTANCE_NAME":/tmp/nemoclaw-gpu-e2e-test.log /tmp/nemoclaw-gpu-e2e-test.log || true

- name: Upload test log on failure
if: failure()
uses: actions/upload-artifact@v4
Expand Down
Loading