Skip to content

Commit 29541c9

Browse files
committed
test
1 parent 0fa40ad commit 29541c9

File tree

1 file changed

+276
-38
lines changed

1 file changed

+276
-38
lines changed

.github/workflows/_sandbox.yaml

Lines changed: 276 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,68 @@
11
name: Sandbox
2-
run-name: CI-amd64
2+
33
on:
4+
schedule:
5+
- cron: '30 9 * * *' # Pacific Time 01:30 AM in UTC
46
pull_request:
57
types:
68
- opened
79
- reopened
810
- ready_for_review
911
- synchronize
1012
paths-ignore:
11-
- "**.md"
13+
- '**.md'
14+
workflow_dispatch:
15+
inputs:
16+
PUBLISH:
17+
type: boolean
18+
description: Publish dated images and update the 'latest' tag?
19+
default: false
20+
required: false
21+
BUMP_MANIFEST:
22+
type: boolean
23+
description: Bump git repos in manifest.yaml to head of tree?
24+
default: false
25+
required: false
26+
MERGE_BUMPED_MANIFEST:
27+
type: boolean
28+
description: '(used if BUMP_MANIFEST=true) If true: attempt to PR/merge manifest branch'
29+
default: false
30+
required: false
1231

13-
env:
14-
DEFAULT_MANIFEST_ARTIFACT_NAME: bumped-manifest
32+
concurrency:
33+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
34+
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
1535

1636
permissions:
17-
contents: read # to fetch code
37+
contents: write # to fetch code and push branch
1838
actions: write # to cancel previous workflows
1939
packages: write # to upload container
40+
pull-requests: write # to make pull request for manifest bump
41+
42+
env:
43+
DEFAULT_MANIFEST_ARTIFACT_NAME: bumped-manifest
2044

2145
jobs:
2246
metadata:
2347
runs-on: ubuntu-22.04
2448
outputs:
49+
BUILD_DATE: ${{ steps.date.outputs.BUILD_DATE }}
2550
PUBLISH: ${{ steps.if-publish.outputs.PUBLISH }}
2651
BUMP_MANIFEST: ${{ steps.manifest-branch.outputs.BUMP_MANIFEST }}
2752
MANIFEST_ARTIFACT_NAME: ${{ steps.manifest-branch.outputs.MANIFEST_ARTIFACT_NAME }}
2853
MANIFEST_BRANCH: ${{ steps.manifest-branch.outputs.MANIFEST_BRANCH }}
2954
MERGE_BUMPED_MANIFEST: ${{ steps.manifest-branch.outputs.MERGE_BUMBED_MANIFEST }}
3055
steps:
56+
- name: Cancel workflow run if the trigger is a draft PR
57+
id: cancel-if-draft
58+
if: github.event_name == 'pull_request' && github.event.pull_request.draft == true
59+
run: |
60+
echo "Cancelling workflow for draft PR"
61+
curl -X POST -H "Authorization: token ${{ github.token }}" \
62+
-H "Accept: application/vnd.github.v3+json" \
63+
"https://api.github.com/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/cancel"
64+
while true; do sleep 1; done # blocks execution in case workflow cancellation takes time
65+
3166
- name: Set build date
3267
id: date
3368
shell: bash -x -e {0}
@@ -45,7 +80,7 @@ jobs:
4580
id: manifest-branch
4681
shell: bash -x -e {0}
4782
run: |
48-
BUMP_MANIFEST=${{ 'true' }}
83+
BUMP_MANIFEST=${{ github.event_name == 'schedule' || inputs.BUMP_MANIFEST || 'false' }}
4984
MERGE_BUMPED_MANIFEST=${{ github.event_name == 'schedule' || inputs.MERGE_BUMPED_MANIFEST || 'false' }}
5085
# Prepend nightly manifest branch with "z" to make it appear at the end
5186
if [[ "$BUMP_MANIFEST" == "true" ]]; then
@@ -103,48 +138,251 @@ jobs:
103138
.github/container/manifest.yaml
104139
.github/container/patches
105140
106-
build-base:
107-
uses: ./.github/workflows/_build_base.yaml
141+
amd64:
108142
needs: [metadata, bump-manifest]
143+
uses: ./.github/workflows/_ci.yaml
109144
with:
110145
ARCHITECTURE: amd64
111-
BUILD_DATE: 20240418
146+
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }}
112147
MANIFEST_ARTIFACT_NAME: ${{ needs.metadata.outputs.MANIFEST_ARTIFACT_NAME }}
113148
secrets: inherit
114149

115-
build-jax:
116-
needs: build-base
117-
uses: ./.github/workflows/_build.yaml
150+
arm64:
151+
needs: [metadata, bump-manifest]
152+
uses: ./.github/workflows/_ci.yaml
118153
with:
119-
ARCHITECTURE: amd64
120-
ARTIFACT_NAME: artifact-jax-build
121-
BADGE_FILENAME: badge-jax-build
122-
BUILD_DATE: 20240418
123-
BASE_IMAGE: ${{ needs.build-base.outputs.DOCKER_TAG }}
124-
CONTAINER_NAME: jax
125-
DOCKERFILE: .github/container/Dockerfile.jax
126-
RUNNER_SIZE: large
154+
ARCHITECTURE: arm64
155+
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }}
156+
MANIFEST_ARTIFACT_NAME: ${{ needs.metadata.outputs.MANIFEST_ARTIFACT_NAME }}
127157
secrets: inherit
128158

129-
build-upstream-maxtext:
130-
needs: build-jax
131-
uses: ./.github/workflows/_build.yaml
159+
# Only merge if everything succeeds
160+
merge-new-manifest:
161+
runs-on: ubuntu-22.04
162+
if: ${{ !cancelled() && needs.metadata.outputs.MERGE_BUMPED_MANIFEST == 'true' && needs.metadata.outputs.MANIFEST_BRANCH != github.sha }}
163+
needs:
164+
- metadata
165+
- amd64
166+
- arm64
167+
steps:
168+
- name: "Tests Succeeded: ${{ !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }}"
169+
id: test_result
170+
run: echo "SUCCEEDED=${{ !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }}" | tee -a $GITHUB_OUTPUT
171+
172+
- name: Check out the repository under ${GITHUB_WORKSPACE}
173+
uses: actions/checkout@v4
174+
175+
- name: Delete checked-out manifest and patches
176+
run: |
177+
rm .github/container/manifest.yaml
178+
rm -rf .github/container/patches
179+
180+
- name: Replace checked-out manifest file/patches with bumped one
181+
uses: actions/download-artifact@v4
182+
with:
183+
name: ${{ needs.metadata.outputs.MANIFEST_ARTIFACT_NAME }}
184+
path: .github/container/
185+
186+
- name: 'Create local manifest branch: ${{ needs.metadata.outputs.MANIFEST_BRANCH }}'
187+
id: local_branch
188+
shell: bash -x -e {0}
189+
run: |
190+
git config user.name "JAX-Toolbox CI"
191+
git config user.email "[email protected]"
192+
git switch -c ${{ needs.metadata.outputs.MANIFEST_BRANCH }}
193+
git status
194+
git add .github/container/patches/
195+
git status
196+
# In the unusual situation where the manifest is the same even after bumping,
197+
# we will produce an empty commit with --allow-empty, which allows a PR to be
198+
# made and merged even with no changeset.
199+
git commit --allow-empty -a -m "Nightly Manifest Bump (${{ needs.metadata.outputs.BUILD_DATE }}) from: https://github.com/NVIDIA/JAX-Toolbox/actions/runs/${{ github.run_id }}"
200+
201+
- name: Try to merge manifest branch
202+
id: merge_local
203+
if: steps.test_result.outputs.SUCCEEDED == 'true'
204+
# Merge can fail
205+
continue-on-error: true
206+
shell: bash -x -e {0}
207+
run: |
208+
git switch ${{ github.ref_name }}
209+
# Pull this ref in case it was updated
210+
git pull --rebase
211+
git merge --ff-only ${{ needs.metadata.outputs.MANIFEST_BRANCH }}
212+
# Push the new change
213+
git push origin ${{ github.ref_name }}
214+
215+
# We will create a Draft PR & remote branch if:
216+
# 1. The tests failed
217+
# 2. The merge failed
218+
- name: Create remote manifest branch
219+
id: create_remote_branch
220+
if: steps.test_result.outputs.SUCCEEDED == 'false' || steps.merge_local.outcome != 'success'
221+
shell: bash -x -e {0}
222+
run: |
223+
# Always abort in case in-progress merge
224+
git merge --abort || true
225+
git switch ${{ needs.metadata.outputs.MANIFEST_BRANCH }}
226+
# Since the merge failed, create a remote and follow up with a PR
227+
git push --set-upstream origin ${{ needs.metadata.outputs.MANIFEST_BRANCH }}
228+
229+
- name: Creating Draft PR for MANIFEST_BRANCH=${{ needs.metadata.outputs.MANIFEST_BRANCH }}
230+
id: create_pr
231+
if: steps.test_result.outputs.SUCCEEDED == 'false' || steps.merge_local.outcome != 'success'
232+
uses: octokit/[email protected]
233+
with:
234+
route: POST /repos/{owner_and_repo}/pulls
235+
owner_and_repo: ${{ github.repository }}
236+
head: ${{ needs.metadata.outputs.MANIFEST_BRANCH }}
237+
# Always try to merge back into the branch that triggered this workflow
238+
base: ${{ github.ref }}
239+
body: |
240+
https://github.com/NVIDIA/JAX-Toolbox/actions/runs/${{ github.run_id }}
241+
title: Nightly Manifest Bump (${{ needs.metadata.outputs.BUILD_DATE }})
242+
draft: true
243+
env:
244+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
245+
246+
- name: 'Log created PR: #${{ fromJson(steps.create_pr.outputs.data).number }}'
247+
if: steps.create_pr.outcome == 'success'
248+
run: |
249+
echo "https://github.com/NVIDIA/JAX-Toolbox/pull/${{ fromJson(steps.create_pr.outputs.data).number }}" | tee -a $GITHUB_STEP_SUMMARY
250+
251+
# Guard delete in simple check to protect other branches
252+
- name: Check that the branch matches znightly- prefix
253+
run: |
254+
if [[ "${{ needs.metadata.outputs.MANIFEST_BRANCH }}" != znightly-* ]]; then
255+
echo Tried to delete MANIFEST_BRANCH=${{ needs.metadata.outputs.MANIFEST_BRANCH }}, but did not start with "znightly-"
256+
exit 1
257+
fi
258+
259+
# If merging fails b/c upstream conflict, branch is deleted to avoid clutter since changeset is preserved in PR
260+
- name: Deleting remote MANIFEST_BRANCH=${{ needs.metadata.outputs.MANIFEST_BRANCH }}
261+
# Delete can fail if branch was already deleted or not created, e.g., if the PR successfully merges, then branch is also already deleted.
262+
continue-on-error: true
263+
uses: octokit/[email protected]
264+
with:
265+
route: DELETE /repos/{owner_and_repo}/git/refs/heads/${{ needs.metadata.outputs.MANIFEST_BRANCH }}
266+
owner_and_repo: ${{ github.repository }}
267+
env:
268+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
269+
270+
make-publish-configs:
271+
runs-on: ubuntu-22.04
272+
if: ${{ !cancelled() }}
273+
env:
274+
MEALKIT_IMAGE_REPO: ${{ needs.metadata.outputs.PUBLISH == 'true' && 'jax-mealkit' || 'mock-jax-mealkit' }}
275+
FINAL_IMAGE_REPO: ${{ needs.metadata.outputs.PUBLISH == 'true' && 'jax' || 'mock-jax' }}
276+
needs:
277+
- metadata
278+
- amd64
279+
- arm64
280+
outputs:
281+
PUBLISH_CONFIGS: ${{ steps.generate-configs.outputs.PUBLISH_CONFIGS }}
282+
steps:
283+
- id: generate-configs
284+
shell: bash -eu -o pipefail {0}
285+
run: |
286+
declare -a FLAVORS=(
287+
base
288+
jax
289+
triton
290+
equinox
291+
maxtext
292+
levanter
293+
upstream-t5x
294+
upstream-pax
295+
upstream-maxtext
296+
t5x
297+
pax
298+
grok
299+
)
300+
declare -a STAGES=(
301+
mealkit
302+
final
303+
)
304+
305+
## create JSON specs for a 1D matrix of container publication jobs
306+
307+
ALL_TAGS=$(
308+
echo '${{ needs.amd64.outputs.DOCKER_TAGS }}' \
309+
'${{ needs.arm64.outputs.DOCKER_TAGS }}' |\
310+
jq -s 'add'
311+
)
312+
PUBLISH_CONFIGS='[]'
313+
314+
for stage in "${STAGES[@]}"; do
315+
for flavor in "${FLAVORS[@]}";do
316+
317+
# collect images for different platforms, e.g. amd64 and arm64
318+
matching_tags=$(
319+
echo "$ALL_TAGS" |\
320+
jq -c ".[] | select(.stage == \"${stage}\" and .flavor == \"${flavor}\" and .tag != \"\")"
321+
)
322+
323+
# source_image is a list of all platform-specific tags
324+
source_image=$(echo "${matching_tags}" | jq -c "[.tag]" | jq -s 'add')
325+
# if the build job failed without producing any images, skip this flavor
326+
n_source_images=$(echo "$source_image" | jq 'length')
327+
if [[ $n_source_images -gt 0 ]]; then
328+
echo "PUBLISH image $flavor with $n_source_images $stage containers"
329+
330+
# tag priority is the highest priority of all platform-specific tags
331+
priority=$(echo "${matching_tags}" | jq -r ".priority" | jq -s 'max')
332+
333+
# put all final images in the `ghcr.io/nvidia/jax` namespace
334+
# and mealkit images in `ghcr.io/nvidia/jax-toolbox-mealkit` namespace
335+
case ${stage} in
336+
mealkit)
337+
target_image=${MEALKIT_IMAGE_REPO}
338+
;;
339+
final)
340+
target_image=${FINAL_IMAGE_REPO}
341+
;;
342+
esac
343+
344+
PUBLISH_CONFIGS=$(
345+
echo ${PUBLISH_CONFIGS} | jq -c ". + [{
346+
\"flavor\": \"${flavor}\",
347+
\"target_image\": \"${target_image}\",
348+
\"priority\": \"${priority}\",
349+
\"source_image\": ${source_image},
350+
\"stage\": \"${stage}\"
351+
}]"
352+
)
353+
else
354+
echo "SKIPPED image $flavor with 0 $stage containers"
355+
fi
356+
done
357+
done
358+
359+
PUBLISH_CONFIGS=$(echo "$PUBLISH_CONFIGS" | jq -c '{"config": .}')
360+
echo ${PUBLISH_CONFIGS} | jq
361+
echo "PUBLISH_CONFIGS=${PUBLISH_CONFIGS}" >> $GITHUB_OUTPUT
362+
363+
publish-containers:
364+
needs:
365+
- metadata
366+
- make-publish-configs
367+
if: ${{ !cancelled() && needs.make-publish-configs.outputs.PUBLISH_CONFIGS.config != '{"config":[]}' }}
368+
strategy:
369+
fail-fast: false
370+
matrix: ${{ fromJson(needs.make-publish-configs.outputs.PUBLISH_CONFIGS) }}
371+
uses: ./.github/workflows/_publish_container.yaml
132372
with:
133-
ARCHITECTURE: amd64
134-
ARTIFACT_NAME: artifact-maxtext-build
135-
BADGE_FILENAME: badge-maxtext-build
136-
BUILD_DATE: 20240418
137-
BASE_IMAGE: ${{ needs.build-jax.outputs.DOCKER_TAG_MEALKIT }}
138-
CONTAINER_NAME: maxtext
139-
DOCKERFILE: .github/container/Dockerfile.maxtext.amd64
140-
secrets: inherit
373+
ARTIFACT_NAME: ${{ matrix.config.stage }}-${{ matrix.config.flavor }}
374+
ARTIFACT_TAG: ${{ matrix.config.flavor }}-${{ needs.metadata.outputs.BUILD_DATE }}
375+
SOURCE_IMAGE: ${{ join(matrix.config.source_image, ' ') }}
376+
TARGET_IMAGE: ${{ matrix.config.target_image }}
377+
TARGET_TAGS: |
378+
type=raw,value=${{ matrix.config.flavor }},priority=${{ matrix.config.priority }}
379+
type=raw,value=${{ matrix.config.flavor }}-${{ needs.metadata.outputs.BUILD_DATE }},priority=${{ matrix.config.priority }}
141380
142-
build-rosetta-maxtext:
143-
needs: build-upstream-maxtext
144-
uses: ./.github/workflows/_build_rosetta.yaml
381+
finalize:
382+
needs: [metadata, amd64, arm64, publish-containers]
383+
if: '!cancelled()'
384+
uses: ./.github/workflows/_finalize.yaml
145385
with:
146-
ARCHITECTURE: amd64
147-
BUILD_DATE: 20240418
148-
BASE_IMAGE: ${{ needs.build-upstream-maxtext.outputs.DOCKER_TAG_MEALKIT }}
149-
BASE_LIBRARY: maxtext
386+
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }}
387+
PUBLISH_BADGE: ${{ needs.metadata.outputs.PUBLISH == 'true' }}
150388
secrets: inherit

0 commit comments

Comments
 (0)