Skip to content

E2E Test — Chem Job Cycle #39

E2E Test — Chem Job Cycle

E2E Test — Chem Job Cycle #39

Workflow file for this run

---
# End-to-end test for the full docking pipeline: seed -> prep -> dock -> verify.
#
# Runner: self-hosted arc-chem runner inside the RKE2 cluster.
# Direct ClusterIP access to docking-controller and docking-mysql.
#
# This test seeds 3 small drug-like molecules (SMILES only, no pre-calculated
# PDBQTs), runs the ligand-prep pipeline to generate PDBQTs via RDKit, then
# submits a docking workflow against PDB 7jrn and asserts real results.
#
# Triggers:
# workflow_run — fires automatically after Build and Push Chem Images succeeds.
# workflow_dispatch — manual on-demand run.
name: E2E Test — Chem Job Cycle
on:
workflow_run:
workflows: ["Build and Push Chem Images"]
types: [completed]
branches: [main]
workflow_dispatch:
env:
CONTROLLER_URL: http://docking-controller.chem.svc.cluster.local
FLUX_WEBHOOK_URL: http://webhook-receiver.tooling.svc.cluster.local/hook/af49d0fcea89a1eb9c7cb24c3cb716ce5f28138bd071ea530bc5123fb1f94241
jobs:
e2e-test:
name: Full pipeline E2E (seed + prep + dock)
if: >
github.event_name == 'workflow_dispatch' ||
github.event.workflow_run.conclusion == 'success'
runs-on: [self-hosted, arc-chem]
timeout-minutes: 60
steps:
- name: Trigger Flux reconciliation
run: |
if [ "${{ github.event_name }}" = "workflow_run" ]; then
echo "Auto-triggered: triggering Flux webhook to deploy new images..."
curl -sf -X POST "$FLUX_WEBHOOK_URL" -H "Content-Type: application/json" -d '{}' \
&& echo "Webhook triggered" || echo "Webhook failed (non-fatal)"
echo "Waiting 90s for image scan + rollout..."
sleep 90
else
echo "Manual dispatch: skipping Flux rollout"
fi
echo "Verifying controller health..."
for i in $(seq 1 30); do
if curl -sf $CONTROLLER_URL/health > /dev/null 2>&1; then
echo "Controller healthy"
exit 0
fi
sleep 5
done
echo "ERROR: controller not healthy after 150s"
exit 1
- name: Verify controller and result-writer health
run: |
curl -sf $CONTROLLER_URL/health
curl -sf http://result-writer.chem.svc.cluster.local/health || echo "result-writer health check failed (may not have /health)"
- name: Seed test compounds (SMILES only, no PDBQTs)
run: |
echo "Seeding 3 small drug-like molecules via /api/v1/ligands..."
echo "These have SMILES only — the prep step will generate PDBQTs via RDKit."
RESP=$(curl -sf -X POST \
$CONTROLLER_URL/api/v1/ligands \
-H "Content-Type: application/json" \
-d '[
{"compound_id": "E2E:caffeine", "smiles": "Cn1c(=O)c2c(ncn2C)n(C)c1=O", "source_db": "e2e-test"},
{"compound_id": "E2E:aspirin", "smiles": "CC(=O)Oc1ccccc1C(O)=O", "source_db": "e2e-test"},
{"compound_id": "E2E:acetaminophen", "smiles": "CC(=O)Nc1ccc(O)cc1", "source_db": "e2e-test"}
]')
echo "Import response: $RESP"
IMPORTED=$(echo "$RESP" | sed -n 's/.*"imported"[[:space:]]*:[[:space:]]*\([0-9]*\).*/\1/p' | tr -d '\r\n')
[ "${IMPORTED:-0}" -ge 3 ] || { echo "ERROR: expected 3 imports, got ${IMPORTED:-0}"; exit 1; }
echo "Successfully seeded $IMPORTED compounds."
- name: Run ligand prep (SMILES -> PDBQT)
run: |
echo "Starting ligand prep for source_db=e2e-test..."
RESP=$(curl -sf -X POST \
$CONTROLLER_URL/api/v1/prep \
-H "Content-Type: application/json" \
-d '{"source_db": "e2e-test", "chunk_size": 10}')
echo "Prep response: $RESP"
# Check if all ligands were already prepped (re-run case).
ALREADY_DONE=$(echo "$RESP" | sed -n 's/.*"all ligands already prepped".*/yes/p')
if [ "$ALREADY_DONE" = "yes" ]; then
echo "All ligands already prepped (idempotent re-run). Continuing."
else
echo "Prep job launched. Waiting 3 minutes for pod startup + RDKit conversion..."
echo "(These are tiny molecules — prep takes seconds per compound, but pod scheduling adds overhead.)"
sleep 180
echo "Prep wait complete."
fi
- name: Dump existing workflows (pre-run state)
run: |
echo "=== existing docking workflows ==="
curl -sf $CONTROLLER_URL/api/v1/dockingjobs 2>/dev/null || echo "(list failed)"
- name: Submit docking job
id: submit
run: |
echo "Submitting docking job: e2e-test ligands against PDB 7jrn (native ligand TTT)..."
RESPONSE=$(curl -sf -X POST \
$CONTROLLER_URL/api/v1/dockingjobs \
-H "Content-Type: application/json" \
-d '{
"ligand_db": "e2e-test",
"pdbid": "7jrn",
"native_ligand": "TTT",
"ligands_chunk_size": 10
}')
echo "Submit response: $RESPONSE"
JOB_NAME=$(echo "$RESPONSE" \
| sed -n 's/.*"name"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p' \
| tr -d '\r\n')
[ -z "$JOB_NAME" ] && echo "ERROR: no job name in response" && exit 1
echo "Submitted: $JOB_NAME"
echo "job_name=$JOB_NAME" >> "$GITHUB_OUTPUT"
- name: Poll until Completed or Failed
run: |
JOB_NAME="${{ steps.submit.outputs.job_name }}"
echo "Polling $JOB_NAME every 30s (max 50 min)..."
for i in $(seq 1 100); do
RESP=$(curl -sf \
"$CONTROLLER_URL/api/v1/dockingjobs/$JOB_NAME" \
|| echo '{"status":"Unknown"}')
STATUS=$(echo "$RESP" \
| sed -n 's/.*"status"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p' \
| tr -d '\r\n')
echo "[$i/100] $STATUS"
if [ "$STATUS" = "Completed" ]; then
MSG=$(echo "$RESP" \
| sed -n 's/.*"message"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p' \
| tr -d '\r\n')
echo "Completed. Result: ${MSG:-(none)}"
exit 0
fi
if [ "$STATUS" = "Failed" ]; then
MSG=$(echo "$RESP" \
| sed -n 's/.*"message"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p')
echo "FAILED: $MSG"
exit 1
fi
sleep 30
done
echo "TIMEOUT: job did not complete within polling window"
exit 1
- name: Dump pipeline logs
if: always()
run: |
JOB_NAME="${{ steps.submit.outputs.job_name }}"
[ -z "$JOB_NAME" ] && exit 0
BASE="$CONTROLLER_URL/api/v1/dockingjobs/$JOB_NAME"
for TASK in prepare-receptor dock-batch; do
echo "=== logs: $TASK ==="
curl -sf "$BASE/logs?task=$TASK" 2>/dev/null || echo "(no logs or job gone)"
done
echo "=== job status ==="
curl -sf "$BASE" 2>/dev/null || true
- name: Verify results
run: |
JOB_NAME="${{ steps.submit.outputs.job_name }}"
echo "Checking final workflow state..."
RESP=$(curl -sf "$CONTROLLER_URL/api/v1/dockingjobs/$JOB_NAME")
echo "Workflow response: $RESP"
STATUS=$(echo "$RESP" | sed -n 's/.*"status"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p' | tr -d '\r\n')
[ "$STATUS" = "Completed" ] || { echo "ERROR: expected Completed, got $STATUS"; exit 1; }
echo "Workflow completed successfully."
echo ""
echo "Fetching docking results..."
RESULTS=$(curl -sf "$CONTROLLER_URL/api/v1/dockingjobs/$JOB_NAME/results" || echo '{}')
echo "Results: $RESULTS"
# With real prepped ligands, we must have actual docking results.
RESULT_COUNT=$(echo "$RESULTS" | sed -n 's/.*"result_count"[[:space:]]*:[[:space:]]*\([0-9]*\).*/\1/p' | tr -d '\r\n')
echo "Result count: ${RESULT_COUNT:-0}"
[ "${RESULT_COUNT:-0}" -gt 0 ] || { echo "ERROR: expected result_count > 0, got ${RESULT_COUNT:-0}"; exit 1; }
echo "PASS: Got $RESULT_COUNT docking result(s) with real affinities."
# Log the affinity values for visibility.
BEST=$(echo "$RESULTS" | sed -n 's/.*"best_affinity_kcal_mol"[[:space:]]*:[[:space:]]*\(-\{0,1\}[0-9.]*\).*/\1/p' | tr -d '\r\n')
WORST=$(echo "$RESULTS" | sed -n 's/.*"worst_affinity_kcal_mol"[[:space:]]*:[[:space:]]*\(-\{0,1\}[0-9.]*\).*/\1/p' | tr -d '\r\n')
AVG=$(echo "$RESULTS" | sed -n 's/.*"avg_affinity_kcal_mol"[[:space:]]*:[[:space:]]*\(-\{0,1\}[0-9.]*\).*/\1/p' | tr -d '\r\n')
echo "Affinities (kcal/mol): best=${BEST:-N/A} worst=${WORST:-N/A} avg=${AVG:-N/A}"
- name: Cleanup
if: always()
run: |
JOB_NAME="${{ steps.submit.outputs.job_name }}"
# Delete workflow via API (cleans up K8s Jobs, staging, results, workflow row)
[ -n "$JOB_NAME" ] && \
curl -sf -X DELETE "$CONTROLLER_URL/api/v1/dockingjobs/$JOB_NAME" \
&& echo "Deleted workflow $JOB_NAME" || true
# Test ligands remain in DB (idempotent via ON DUPLICATE KEY UPDATE)