Skip to content

Cancel merge queue workflows on job failure #3065

Cancel merge queue workflows on job failure

Cancel merge queue workflows on job failure #3065

name: Cancel merge queue workflows on job failure
on:
workflow_dispatch:
schedule:
- cron: "*/5 * * * *" # Runs every 5 minutes
permissions:
actions: write
contents: read
jobs:
check-and-cancel:
name: Check merge queue workflows and cancel on failure
runs-on: ubuntu-latest
if: github.repository == 'tensorzero/tensorzero'
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
sparse-checkout: .github/workflows/general.yml
sparse-checkout-cone-mode: false
- name: Check running workflows on merge queue branches
run: |
echo "Checking for running workflows on merge queue branches..."
# Dynamically extract the 'needs' list from 'check-all-general-jobs-passed' job in general.yml
# This ensures we only cancel on failures of jobs that actually block merging
REQUIRED_JOBS=$(yq -r '.jobs["check-all-general-jobs-passed"].needs[]' .github/workflows/general.yml)
if [ -z "$REQUIRED_JOBS" ]; then
echo "ERROR: Could not extract required jobs from general.yml"
exit 1
fi
echo "Required jobs (from check-all-general-jobs-passed needs):"
echo "$REQUIRED_JOBS"
echo ""
# Build jq filter for required jobs as a JSON array
JQ_FILTER=$(echo "$REQUIRED_JOBS" | jq -R -s 'split("\n") | map(select(length > 0))')
# Get all in-progress workflow runs for general.yml
workflow_runs=$(curl -s -H "Authorization: Bearer ${{ github.token }}" \
"https://api.github.com/repos/${{ github.repository }}/actions/workflows/general.yml/runs?status=in_progress&per_page=100")
# Filter for merge queue branches
merge_queue_runs=$(echo "$workflow_runs" | jq -r '.workflow_runs[] | select(.head_branch // "" | test("^gh-readonly-queue/main/pr-")) | "\(.id) \(.head_branch) \(.name)"')
if [ -z "$merge_queue_runs" ]; then
echo "No running workflows found on merge queue branches"
exit 0
fi
echo "Found running workflows on merge queue branches:"
echo "$merge_queue_runs"
echo ""
# Check each workflow for failed jobs
while IFS= read -r line; do
if [ -z "$line" ]; then
continue
fi
run_id=$(echo "$line" | awk '{print $1}')
branch=$(echo "$line" | awk '{print $2}')
workflow_name=$(echo "$line" | cut -d' ' -f3-)
echo "Checking workflow run $run_id ($workflow_name) on branch $branch..."
# Get jobs for this workflow run
jobs=$(curl -s -H "Authorization: Bearer ${{ github.token }}" \
"https://api.github.com/repos/${{ github.repository }}/actions/runs/$run_id/jobs")
# Check if any required job has failed
# Job names may include matrix suffixes (e.g., "lint-rust (1)"), so we check if the job name starts with any required job name
failed_jobs=$(echo "$jobs" | jq -r --argjson required "$JQ_FILTER" '
.jobs[] |
select(.conclusion == "failure") |
select(
.name as $job_name |
any($required[]; . as $req | $job_name | startswith($req))
) |
"\(.name) (status: \(.status), conclusion: \(.conclusion))"
')
if [ -n "$failed_jobs" ]; then
echo "Found failed required jobs in workflow run $run_id:"
echo "$failed_jobs"
echo ""
echo "Cancelling workflow run $run_id on branch $branch..."
cancel_response=$(curl -s -w "\n%{http_code}" -X POST \
-H "Authorization: Bearer ${{ github.token }}" \
"https://api.github.com/repos/${{ github.repository }}/actions/runs/$run_id/cancel")
http_code=$(echo "$cancel_response" | tail -n1)
response_body=$(echo "$cancel_response" | sed '$d')
if [ "$http_code" = "202" ]; then
echo "Successfully cancelled workflow run $run_id"
else
echo "Failed to cancel workflow run $run_id (HTTP $http_code)"
echo "Response: $response_body"
fi
echo ""
else
echo "No failed required jobs found in workflow run $run_id"
fi
done <<< "$merge_queue_runs"
echo "Finished checking all merge queue workflows"