From 7f66e40731c57e7f3bbeb14f9792c0ae9177ed41 Mon Sep 17 00:00:00 2001
From: Oliver Koenig <okoenig@nvidia.com>
Date: Mon, 22 Apr 2024 15:17:43 +0200
Subject: [PATCH 1/4] ci: Reorder records

To improve readability
---
 .github/workflows/_ci.yaml | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/_ci.yaml b/.github/workflows/_ci.yaml
index 589a42d3b..92b4f6413 100644
--- a/.github/workflows/_ci.yaml
+++ b/.github/workflows/_ci.yaml
@@ -117,6 +117,16 @@ jobs:
       DOCKERFILE: .github/container/Dockerfile.t5x.${{ inputs.ARCHITECTURE }}
     secrets: inherit
 
+  build-rosetta-t5x:
+    needs: build-upstream-t5x
+    uses: ./.github/workflows/_build_rosetta.yaml
+    with:
+      ARCHITECTURE: ${{ inputs.ARCHITECTURE }}
+      BUILD_DATE: ${{ inputs.BUILD_DATE }}
+      BASE_IMAGE: ${{ needs.build-upstream-t5x.outputs.DOCKER_TAG_MEALKIT }}
+      BASE_LIBRARY: t5x
+    secrets: inherit
+  
   build-upstream-pax:
     needs: build-jax
     uses: ./.github/workflows/_build.yaml
@@ -130,16 +140,6 @@ jobs:
       DOCKERFILE: .github/container/Dockerfile.pax.${{ inputs.ARCHITECTURE }}
     secrets: inherit
 
-  build-rosetta-t5x:
-    needs: build-upstream-t5x
-    uses: ./.github/workflows/_build_rosetta.yaml
-    with:
-      ARCHITECTURE: ${{ inputs.ARCHITECTURE }}
-      BUILD_DATE: ${{ inputs.BUILD_DATE }}
-      BASE_IMAGE: ${{ needs.build-upstream-t5x.outputs.DOCKER_TAG_MEALKIT }}
-      BASE_LIBRARY: t5x
-    secrets: inherit
-
   build-rosetta-pax:
     needs: build-upstream-pax
     uses: ./.github/workflows/_build_rosetta.yaml

From 778a8569ff23becb626df8ea461fd4137f9d5d16 Mon Sep 17 00:00:00 2001
From: Oliver Koenig <okoenig@nvidia.com>
Date: Mon, 22 Apr 2024 15:22:44 +0200
Subject: [PATCH 2/4] refactor: Rename rosetta tests

For easier readability
---
 .github/workflows/_ci.yaml                                  | 6 +++---
 .../{_test_pax_rosetta.yaml => _test_rosetta_pax.yaml}      | 0
 .../{_test_t5x_rosetta.yaml => _test_rosetta_t5x.yaml}      | 0
 3 files changed, 3 insertions(+), 3 deletions(-)
 rename .github/workflows/{_test_pax_rosetta.yaml => _test_rosetta_pax.yaml} (100%)
 rename .github/workflows/{_test_t5x_rosetta.yaml => _test_rosetta_t5x.yaml} (100%)

diff --git a/.github/workflows/_ci.yaml b/.github/workflows/_ci.yaml
index 92b4f6413..1eb6c3d3c 100644
--- a/.github/workflows/_ci.yaml
+++ b/.github/workflows/_ci.yaml
@@ -126,7 +126,7 @@ jobs:
       BASE_IMAGE: ${{ needs.build-upstream-t5x.outputs.DOCKER_TAG_MEALKIT }}
       BASE_LIBRARY: t5x
     secrets: inherit
-  
+
   build-upstream-pax:
     needs: build-jax
     uses: ./.github/workflows/_build.yaml
@@ -310,7 +310,7 @@ jobs:
   test-rosetta-t5x:
     needs: build-rosetta-t5x
     if: inputs.ARCHITECTURE == 'amd64' # no images for arm64
-    uses: ./.github/workflows/_test_t5x_rosetta.yaml
+    uses: ./.github/workflows/_test_rosetta_t5x.yaml
     with:
       T5X_IMAGE: ${{ needs.build-rosetta-t5x.outputs.DOCKER_TAG_FINAL }}
     secrets: inherit
@@ -433,7 +433,7 @@ jobs:
   test-rosetta-pax:
     needs: build-rosetta-pax
     if: inputs.ARCHITECTURE == 'amd64' # no images for arm64
-    uses: ./.github/workflows/_test_pax_rosetta.yaml
+    uses: ./.github/workflows/_test_rosetta_pax.yaml
     with:
       PAX_IMAGE: ${{ needs.build-rosetta-pax.outputs.DOCKER_TAG_FINAL }}
     secrets: inherit
diff --git a/.github/workflows/_test_pax_rosetta.yaml b/.github/workflows/_test_rosetta_pax.yaml
similarity index 100%
rename from .github/workflows/_test_pax_rosetta.yaml
rename to .github/workflows/_test_rosetta_pax.yaml
diff --git a/.github/workflows/_test_t5x_rosetta.yaml b/.github/workflows/_test_rosetta_t5x.yaml
similarity index 100%
rename from .github/workflows/_test_t5x_rosetta.yaml
rename to .github/workflows/_test_rosetta_t5x.yaml

From 82d00dd1e2e1e20aa29d108bb50a154e4242739b Mon Sep 17 00:00:00 2001
From: Oliver Koenig <okoenig@nvidia.com>
Date: Mon, 22 Apr 2024 15:27:48 +0200
Subject: [PATCH 3/4] ci: Restore rosetta-t5x unit tests

---
 .github/workflows/_test_rosetta.yaml     |  97 -----------------
 .github/workflows/_test_rosetta_t5x.yaml | 131 +++++++++++++++++++----
 2 files changed, 109 insertions(+), 119 deletions(-)
 delete mode 100644 .github/workflows/_test_rosetta.yaml

diff --git a/.github/workflows/_test_rosetta.yaml b/.github/workflows/_test_rosetta.yaml
deleted file mode 100644
index 017662ea3..000000000
--- a/.github/workflows/_test_rosetta.yaml
+++ /dev/null
@@ -1,97 +0,0 @@
-name: ~test Rosetta
-
-on:
-  workflow_call:
-    inputs:
-      ROSETTA_IMAGE:
-        type: string
-        description: 'Rosetta image build by NVIDIA/JAX-Toolbox'
-        required: true
-        default: 'ghcr.io/nvidia/t5x:latest'
-    outputs:
-      TEST_ARTIFACT_NAME:
-        description: 'Name of the unit test artifact for downstream workflows'
-        value: ${{ jobs.rosetta-unit-tests.outputs.TEST_ARTIFACT_NAME }}
-      TEST_STATUS:
-        description: 'Summary pass/fail value indicating if results from tests are acceptable'
-        value: ${{ jobs.publish-test.outputs.STATUS }}
-
-env:
-  TEST_ARTIFACT_NAME: rosetta-test-logs
-  TEST_LOG_LOCAL_PATH: /log/unit-report.jsonl
-
-jobs:
-  rosetta-unit-tests:
-    runs-on: [self-hosted, V100]
-    outputs:
-      TEST_ARTIFACT_NAME: ${{ env.TEST_ARTIFACT_NAME }}
-    steps:
-      - name: Print environment variables
-        run: |
-          env
-
-      - name: Print GPU information
-        run: nvidia-smi  
-
-      - name: Login to GitHub Container Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ghcr.io
-          username: ${{ github.repository_owner }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Pull Rosetta image
-        shell: bash -x -e {0}
-        run: |
-          docker pull ${{ inputs.ROSETTA_IMAGE }}
-          docker tag ${{ inputs.ROSETTA_IMAGE }} rosetta:latest
-
-      - name: Run Rosetta tests w/ docker
-        shell: docker run --gpus all -v {0}:/cmd.sh -v /log:/log rosetta:latest bash -x -e /cmd.sh
-        run: |
-          ROSETTA_PATH=$(dirname $(python -c "import rosetta; print(*rosetta.__path__)"))
-          pip install "${ROSETTA_PATH}[test]" pytest-reportlog
-          pytest --report-log=${{ env.TEST_LOG_LOCAL_PATH }} ${ROSETTA_PATH} || true
-
-      - name: Upload unit test json logs
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ env.TEST_ARTIFACT_NAME }}
-          path: ${{ env.TEST_LOG_LOCAL_PATH }}
-
-  publish-test:
-    needs: rosetta-unit-tests
-    uses: ./.github/workflows/_publish_badge.yaml
-    if: ( always() )
-    secrets: inherit
-    with:
-      ENDPOINT_FILENAME: 'rosetta-unit-test-status.json'
-      PUBLISH: false
-      SCRIPT: |
-          ARTIFACTS="${{ needs.rosetta-unit-tests.outputs.TEST_ARTIFACT_NAME }}/*.jsonl"
-          all_outcomes() {
-            cat $ARTIFACTS | jq -r '. | select((.["$report_type"] == "TestReport") and (.when == "call")) | .outcome'
-          }
-          cnt_type() {
-            cat $ARTIFACTS | jq '. | select((.["$report_type"] == "TestReport") and (.when == "call") and (.outcome | contains("'${1}'"))) | .outcome' | wc -l
-          }
-          SKIPPED_TESTS=$(cnt_type skipped)
-          FAILED_TESTS=$(cnt_type failed)
-          PASSED_TESTS=$(cnt_type passed)
-          TOTAL_TESTS=$(all_outcomes | wc -l)
-          echo "## Unit/Integration test breakdown" | tee -a $GITHUB_STEP_SUMMARY
-          all_outcomes | sort | uniq -c | tee -a $GITHUB_STEP_SUMMARY
-          if [[ $FAILED_TESTS -eq 0 ]] && [[ $TOTAL_TESTS -gt 0 ]]; then
-            BADGE_COLOR=brightgreen
-            echo "STATUS=success" >> $GITHUB_OUTPUT
-          else
-            echo "STATUS=failure" >> $GITHUB_OUTPUT
-            if [[ $PASSED_TESTS -eq 0 ]]; then
-              BADGE_COLOR=red
-            else
-              BADGE_COLOR=yellow
-            fi
-          fi
-          echo "LABEL='V100 Unit'" >> $GITHUB_OUTPUT
-          echo "MESSAGE='${PASSED_TESTS}/${SKIPPED_TESTS}/${FAILED_TESTS} pass/skip/fail'" >> $GITHUB_OUTPUT
-          echo "COLOR='${BADGE_COLOR}'" >> $GITHUB_OUTPUT
diff --git a/.github/workflows/_test_rosetta_t5x.yaml b/.github/workflows/_test_rosetta_t5x.yaml
index 7bf6cc150..f6f43d8d2 100644
--- a/.github/workflows/_test_rosetta_t5x.yaml
+++ b/.github/workflows/_test_rosetta_t5x.yaml
@@ -6,26 +6,26 @@ on:
       T5X_IMAGE:
         type: string
         description: T5X image from ghcr.io/nvidia/t5x
-        default: 'ghcr.io/nvidia/t5x:latest'
+        default: "ghcr.io/nvidia/t5x:latest"
         required: false
       BADGE_FILENAME:
         type: string
-        description: 'Name of the endpoint JSON file for shields.io badge'
+        description: "Name of the endpoint JSON file for shields.io badge"
         required: false
-        default: 'badge-rosetta-t5x-mgmn-test.json'
+        default: "badge-rosetta-t5x-mgmn-test.json"
       ARTIFACT_NAME:
         type: string
-        description: 'Name of the artifact zip file'
+        description: "Name of the artifact zip file"
         required: false
-        default: 'artifact-rosetta-t5x-mgmn-test'
+        default: "artifact-rosetta-t5x-mgmn-test"
       FW_NAME:
         type: string
-        description: 'Name of the framework being used'
+        description: "Name of the framework being used"
         required: false
-        default: 'rosetta-t5x'
+        default: "rosetta-t5x"
     outputs:
       TEST_STATUS:
-        description: 'Summary pass/fail value indicating if results from tests are acceptable'
+        description: "Summary pass/fail value indicating if results from tests are acceptable"
         value: ${{ jobs.sitrep.outputs.STATUS }}
 
 env:
@@ -33,7 +33,6 @@ env:
   VIT_BATCH_SIZE_PER_GPU: 256
 
 jobs:
-
   single-process-multi-device:
     strategy:
       matrix:
@@ -63,10 +62,10 @@ jobs:
         uses: webfactory/ssh-agent@v0.9.0
         with:
           ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
-          
+
       - name: Check out the repository under ${GITHUB_WORKSPACE}
         uses: actions/checkout@v4
-        
+
       - name: Setup SSH known hosts
         id: ssh-known-hosts
         run: |
@@ -182,7 +181,7 @@ jobs:
               dump = {'state': "${{ steps.submit.outputs.SLURM_STATE }}", 'exitcode': "${{ steps.submit.outputs.SLURM_EXITCODE }}"}
               json.dump(dump, f)
           EOF
-          
+
       - name: Generate sitrep
         if: success() || failure()
         shell: bash -x -e {0}
@@ -196,7 +195,7 @@ jobs:
           passed_tests=$(jq -r '. | select ((.state == "COMPLETED") and (.exitcode == "0")) | .state' $EXIT_STATUSES | wc -l)
           failed_tests=$(jq -r '. | select ((.state != "COMPLETED") or (.exitcode != "0")) | .state' $EXIT_STATUSES | wc -l)
           total_tests=$(ls $EXIT_STATUSES | wc -l)
-          
+
           if [[ ${failed_tests} > 0 ]] || [[ ${total_tests} == 0 ]]; then
             badge_message='error'
             badge_color=red
@@ -402,7 +401,7 @@ jobs:
           passed_tests=$(jq -r '. | select ((.state == "COMPLETED") and (.exitcode == "0")) | .state' $EXIT_STATUSES | wc -l)
           failed_tests=$(jq -r '. | select ((.state != "COMPLETED") or (.exitcode != "0")) | .state' $EXIT_STATUSES | wc -l)
           total_tests=$(ls $EXIT_STATUSES | wc -l)
-          
+
           if [[ ${failed_tests} > 0 ]] || [[ ${total_tests} == 0 ]]; then
             badge_message='error'
             badge_color=red
@@ -429,7 +428,7 @@ jobs:
           color="${badge_color}" \
           to_json schemaVersion label message color \
           > output/${{ env.BADGE_FILENAME_PREFIX }}-${{ steps.meta.outputs.TEST_CASE_NAME }}.json
- 
+
       - name: Upload training logs as artifacts
         uses: actions/upload-artifact@v4
         with:
@@ -571,7 +570,7 @@ jobs:
           passed_tests=$(jq -r '. | select ((.state == "COMPLETED") and (.exitcode == "0")) | .state' $EXIT_STATUSES | wc -l)
           failed_tests=$(jq -r '. | select ((.state != "COMPLETED") or (.exitcode != "0")) | .state' $EXIT_STATUSES | wc -l)
           total_tests=$(ls $EXIT_STATUSES | wc -l)
-          
+
           if [[ ${failed_tests} > 0 ]] || [[ ${total_tests} == 0 ]]; then
             badge_message='error'
             badge_color=red
@@ -744,7 +743,7 @@ jobs:
           passed_tests=$(jq -r '. | select ((.state == "COMPLETED") and (.exitcode == "0")) | .state' $EXIT_STATUSES | wc -l)
           failed_tests=$(jq -r '. | select ((.state != "COMPLETED") or (.exitcode != "0")) | .state' $EXIT_STATUSES | wc -l)
           total_tests=$(ls $EXIT_STATUSES | wc -l)
-          
+
           if [[ ${failed_tests} > 0 ]] || [[ ${total_tests} == 0 ]]; then
             badge_message='error'
             badge_color=red
@@ -771,7 +770,7 @@ jobs:
           color="${badge_color}" \
           to_json schemaVersion label message color \
           > output/${{ env.BADGE_FILENAME_PREFIX }}-${{ steps.meta.outputs.TEST_CASE_NAME }}.json
- 
+
       - name: Upload training logs as artifacts
         uses: actions/upload-artifact@v4
         with:
@@ -779,7 +778,13 @@ jobs:
           path: output/*
 
   metrics:
-    needs: [multi-gpu-multi-node, single-process-multi-device, vit-single-process-multi-device, vit-multi-gpu-multi-node]
+    needs:
+      [
+        multi-gpu-multi-node,
+        single-process-multi-device,
+        vit-single-process-multi-device,
+        vit-multi-gpu-multi-node,
+      ]
     runs-on: ubuntu-22.04
 
     steps:
@@ -810,7 +815,7 @@ jobs:
           path: |
             report.jsonl
             *_metrics.json
-  
+
   sitrep:
     needs: metrics
     if: "!cancelled()"
@@ -820,10 +825,16 @@ jobs:
       BADGE_FILENAME: ${{ inputs.BADGE_FILENAME }}
       ARTIFACT_NAME: ${{ inputs.ARTIFACT_NAME }}
       FW_NAME: ${{ inputs.FW_NAME }}
-      
+
   summary:
     runs-on: ubuntu-22.04
-    needs: [multi-gpu-multi-node, single-process-multi-device, vit-single-process-multi-device, vit-multi-gpu-multi-node]
+    needs:
+      [
+        multi-gpu-multi-node,
+        single-process-multi-device,
+        vit-single-process-multi-device,
+        vit-multi-gpu-multi-node,
+      ]
     if: "!cancelled()"
     steps:
       - name: Generate TensorBoard query URL
@@ -848,3 +859,79 @@ jobs:
           if [[ ${{ needs.sitrep.outputs.STATUS }} != success ]]; then
             exit 1
           fi
+
+  unit-tests:
+    runs-on: [self-hosted, V100]
+    env:
+      TEST_ARTIFACT_NAME: rosetta-test-logs
+      TEST_LOG_LOCAL_PATH: /log/unit-report.jsonl
+    steps:
+      - name: Print environment variables
+        run: |
+          env
+
+      - name: Print GPU information
+        run: nvidia-smi
+
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Pull Rosetta image
+        shell: bash -x -e {0}
+        run: |
+          docker pull ${{ inputs.T5X_IMAGE }}
+          docker tag ${{ inputs.T5X_IMAGE }} rosetta:latest
+
+      - name: Run Rosetta tests w/ docker
+        shell: docker run --gpus all -v {0}:/cmd.sh -v /log:/log rosetta:latest bash -x -e /cmd.sh
+        run: |
+          ROSETTA_PATH=$(dirname $(python -c "import rosetta; print(*rosetta.__path__)"))
+          pip install "${ROSETTA_PATH}[test]" pytest-reportlog
+          pytest --report-log=${{ env.TEST_LOG_LOCAL_PATH }} ${ROSETTA_PATH} || true
+
+      - name: Upload unit test json logs
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ env.TEST_ARTIFACT_NAME }}
+          path: ${{ env.TEST_LOG_LOCAL_PATH }}
+
+  publish-test:
+    needs: unit-tests
+    uses: ./.github/workflows/_publish_badge.yaml
+    if: ( always() )
+    secrets: inherit
+    with:
+      ENDPOINT_FILENAME: "rosetta-unit-test-status.json"
+      PUBLISH: false
+      SCRIPT: |
+        ARTIFACTS="${{ needs.rosetta-unit-tests.outputs.TEST_ARTIFACT_NAME }}/*.jsonl"
+        all_outcomes() {
+          cat $ARTIFACTS | jq -r '. | select((.["$report_type"] == "TestReport") and (.when == "call")) | .outcome'
+        }
+        cnt_type() {
+          cat $ARTIFACTS | jq '. | select((.["$report_type"] == "TestReport") and (.when == "call") and (.outcome | contains("'${1}'"))) | .outcome' | wc -l
+        }
+        SKIPPED_TESTS=$(cnt_type skipped)
+        FAILED_TESTS=$(cnt_type failed)
+        PASSED_TESTS=$(cnt_type passed)
+        TOTAL_TESTS=$(all_outcomes | wc -l)
+        echo "## Unit/Integration test breakdown" | tee -a $GITHUB_STEP_SUMMARY
+        all_outcomes | sort | uniq -c | tee -a $GITHUB_STEP_SUMMARY
+        if [[ $FAILED_TESTS -eq 0 ]] && [[ $TOTAL_TESTS -gt 0 ]]; then
+          BADGE_COLOR=brightgreen
+          echo "STATUS=success" >> $GITHUB_OUTPUT
+        else
+          echo "STATUS=failure" >> $GITHUB_OUTPUT
+          if [[ $PASSED_TESTS -eq 0 ]]; then
+            BADGE_COLOR=red
+          else
+            BADGE_COLOR=yellow
+          fi
+        fi
+        echo "LABEL='V100 Unit'" >> $GITHUB_OUTPUT
+        echo "MESSAGE='${PASSED_TESTS}/${SKIPPED_TESTS}/${FAILED_TESTS} pass/skip/fail'" >> $GITHUB_OUTPUT
+        echo "COLOR='${BADGE_COLOR}'" >> $GITHUB_OUTPUT

From 80ccc49f8c7461425475a55c71a10c6c97ff6e4b Mon Sep 17 00:00:00 2001
From: Oliver Koenig <okoenig@nvidia.com>
Date: Mon, 22 Apr 2024 16:48:32 +0200
Subject: [PATCH 4/4] style: Use single quotes

---
 .github/workflows/_test_rosetta_t5x.yaml | 66 ++++++++++++------------
 .github/workflows/ci.yaml                | 36 ++++++-------
 2 files changed, 50 insertions(+), 52 deletions(-)

diff --git a/.github/workflows/_test_rosetta_t5x.yaml b/.github/workflows/_test_rosetta_t5x.yaml
index f6f43d8d2..fe121b841 100644
--- a/.github/workflows/_test_rosetta_t5x.yaml
+++ b/.github/workflows/_test_rosetta_t5x.yaml
@@ -6,26 +6,26 @@ on:
       T5X_IMAGE:
         type: string
         description: T5X image from ghcr.io/nvidia/t5x
-        default: "ghcr.io/nvidia/t5x:latest"
+        default: 'ghcr.io/nvidia/t5x:latest'
         required: false
       BADGE_FILENAME:
         type: string
-        description: "Name of the endpoint JSON file for shields.io badge"
+        description: 'Name of the endpoint JSON file for shields.io badge'
         required: false
-        default: "badge-rosetta-t5x-mgmn-test.json"
+        default: 'badge-rosetta-t5x-mgmn-test.json'
       ARTIFACT_NAME:
         type: string
-        description: "Name of the artifact zip file"
+        description: 'Name of the artifact zip file'
         required: false
-        default: "artifact-rosetta-t5x-mgmn-test"
+        default: 'artifact-rosetta-t5x-mgmn-test'
       FW_NAME:
         type: string
-        description: "Name of the framework being used"
+        description: 'Name of the framework being used'
         required: false
-        default: "rosetta-t5x"
+        default: 'rosetta-t5x'
     outputs:
       TEST_STATUS:
-        description: "Summary pass/fail value indicating if results from tests are acceptable"
+        description: 'Summary pass/fail value indicating if results from tests are acceptable'
         value: ${{ jobs.sitrep.outputs.STATUS }}
 
 env:
@@ -37,18 +37,18 @@ jobs:
     strategy:
       matrix:
         include:
-          - TEST_NAME: "1P1G_te-1"
+          - TEST_NAME: '1P1G_te-1'
             N_GPU: 1
-            ADDITIONAL_ARGS: ""
-            EXTRA_GIN_ARGS: "--gin.train/utils.DatasetConfig.pack=False --gin.train_eval/utils.DatasetConfig.pack=False"
-          - TEST_NAME: "1P1G_te-0"
+            ADDITIONAL_ARGS: ''
+            EXTRA_GIN_ARGS: '--gin.train/utils.DatasetConfig.pack=False --gin.train_eval/utils.DatasetConfig.pack=False'
+          - TEST_NAME: '1P1G_te-0'
             N_GPU: 1
-            ADDITIONAL_ARGS: "--enable-te 0"
-            EXTRA_GIN_ARGS: ""
-          - TEST_NAME: "1P8G_te-1"
+            ADDITIONAL_ARGS: '--enable-te 0'
+            EXTRA_GIN_ARGS: ''
+          - TEST_NAME: '1P8G_te-1'
             N_GPU: 8
-            ADDITIONAL_ARGS: ""
-            EXTRA_GIN_ARGS: "--gin.train/utils.DatasetConfig.pack=False --gin.train_eval/utils.DatasetConfig.pack=False"
+            ADDITIONAL_ARGS: ''
+            EXTRA_GIN_ARGS: '--gin.train/utils.DatasetConfig.pack=False --gin.train_eval/utils.DatasetConfig.pack=False'
       fail-fast: false
 
     runs-on: ubuntu-22.04
@@ -233,26 +233,26 @@ jobs:
     strategy:
       matrix:
         include:
-          - TEST_NAME: "1N1G-te-1"
+          - TEST_NAME: '1N1G-te-1'
             N_GPU: 1
             N_NODE: 1
-            ADDITIONAL_ARGS: ""
-            EXTRA_GIN_ARGS: "--gin.train/utils.DatasetConfig.pack=False --gin.train_eval/utils.DatasetConfig.pack=False"
-          - TEST_NAME: "1N8G-te-1"
+            ADDITIONAL_ARGS: ''
+            EXTRA_GIN_ARGS: '--gin.train/utils.DatasetConfig.pack=False --gin.train_eval/utils.DatasetConfig.pack=False'
+          - TEST_NAME: '1N8G-te-1'
             N_GPU: 8
             N_NODE: 1
-            ADDITIONAL_ARGS: ""
-            EXTRA_GIN_ARGS: "--gin.train/utils.DatasetConfig.pack=False --gin.train_eval/utils.DatasetConfig.pack=False"
-          - TEST_NAME: "2N8G-te-1"
+            ADDITIONAL_ARGS: ''
+            EXTRA_GIN_ARGS: '--gin.train/utils.DatasetConfig.pack=False --gin.train_eval/utils.DatasetConfig.pack=False'
+          - TEST_NAME: '2N8G-te-1'
             N_GPU: 8
             N_NODE: 2
-            ADDITIONAL_ARGS: ""
-            EXTRA_GIN_ARGS: "--gin.train/utils.DatasetConfig.pack=False --gin.train_eval/utils.DatasetConfig.pack=False"
-          - TEST_NAME: "2N2G_te-0"
+            ADDITIONAL_ARGS: ''
+            EXTRA_GIN_ARGS: '--gin.train/utils.DatasetConfig.pack=False --gin.train_eval/utils.DatasetConfig.pack=False'
+          - TEST_NAME: '2N2G_te-0'
             N_GPU: 2
             N_NODE: 2
-            ADDITIONAL_ARGS: "--enable-te 0"
-            EXTRA_GIN_ARGS: ""
+            ADDITIONAL_ARGS: '--enable-te 0'
+            EXTRA_GIN_ARGS: ''
       fail-fast: false
 
     runs-on: ubuntu-22.04
@@ -818,7 +818,7 @@ jobs:
 
   sitrep:
     needs: metrics
-    if: "!cancelled()"
+    if: '!cancelled()'
     uses: ./.github/workflows/_sitrep_mgmn.yaml
     secrets: inherit
     with:
@@ -835,7 +835,7 @@ jobs:
         vit-single-process-multi-device,
         vit-multi-gpu-multi-node,
       ]
-    if: "!cancelled()"
+    if: '!cancelled()'
     steps:
       - name: Generate TensorBoard query URL
         run: |
@@ -852,7 +852,7 @@ jobs:
   outcome:
     needs: sitrep
     runs-on: ubuntu-22.04
-    if: "!cancelled()"
+    if: '!cancelled()'
     steps:
       - name: Sets workflow status based on test outputs
         run: |
@@ -905,7 +905,7 @@ jobs:
     if: ( always() )
     secrets: inherit
     with:
-      ENDPOINT_FILENAME: "rosetta-unit-test-status.json"
+      ENDPOINT_FILENAME: 'rosetta-unit-test-status.json'
       PUBLISH: false
       SCRIPT: |
         ARTIFACTS="${{ needs.rosetta-unit-tests.outputs.TEST_ARTIFACT_NAME }}/*.jsonl"
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 0098b83bf..75dddeeb8 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -2,7 +2,7 @@ name: CI
 
 on:
   schedule:
-    - cron: '30 9 * * *'  # Pacific Time 01:30 AM in UTC
+    - cron: '30 9 * * *' # Pacific Time 01:30 AM in UTC
   pull_request:
     types:
       - opened
@@ -25,7 +25,7 @@ on:
         required: false
       MERGE_BUMPED_MANIFEST:
         type: boolean
-        description: "(used if BUMP_MANIFEST=true) If true: attempt to PR/merge manifest branch"
+        description: '(used if BUMP_MANIFEST=true) If true: attempt to PR/merge manifest branch'
         default: false
         required: false
 
@@ -34,16 +34,15 @@ concurrency:
   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
 
 permissions:
-  contents: write       # to fetch code and push branch
-  actions:  write       # to cancel previous workflows
-  packages: write       # to upload container
-  pull-requests: write  # to make pull request for manifest bump
+  contents: write # to fetch code and push branch
+  actions: write # to cancel previous workflows
+  packages: write # to upload container
+  pull-requests: write # to make pull request for manifest bump
 
 env:
   DEFAULT_MANIFEST_ARTIFACT_NAME: bumped-manifest
 
 jobs:
-
   metadata:
     runs-on: ubuntu-22.04
     outputs:
@@ -115,7 +114,7 @@ jobs:
         shell: bash -x -e {0}
         run: |
           bash bump.sh --input-manifest manifest.yaml --output-manifest manifest.yaml.new --base-patch-dir ./patches-new
-      
+
       - name: Maybe replace current manifest/patches with the new one and show diff
         working-directory: .github/container
         shell: bash -x -e {0}
@@ -168,12 +167,11 @@ jobs:
     steps:
       - name: "Tests Succeeded: ${{ !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }}"
         id: test_result
-        run:
-          echo "SUCCEEDED=${{ !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }}" | tee -a $GITHUB_OUTPUT
+        run: echo "SUCCEEDED=${{ !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }}" | tee -a $GITHUB_OUTPUT
 
       - name: Check out the repository under ${GITHUB_WORKSPACE}
         uses: actions/checkout@v4
-      
+
       - name: Delete checked-out manifest and patches
         run: |
           rm .github/container/manifest.yaml
@@ -185,7 +183,7 @@ jobs:
           name: ${{ needs.metadata.outputs.MANIFEST_ARTIFACT_NAME }}
           path: .github/container/
 
-      - name: "Create local manifest branch: ${{ needs.metadata.outputs.MANIFEST_BRANCH }}"
+      - name: 'Create local manifest branch: ${{ needs.metadata.outputs.MANIFEST_BRANCH }}'
         id: local_branch
         shell: bash -x -e {0}
         run: |
@@ -213,7 +211,7 @@ jobs:
           git merge --ff-only ${{ needs.metadata.outputs.MANIFEST_BRANCH }}
           # Push the new change
           git push origin ${{ github.ref_name }}
-      
+
       # We will create a Draft PR & remote branch if:
       #  1. The tests failed
       #  2. The merge failed
@@ -244,12 +242,12 @@ jobs:
           draft: true
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      
-      - name: "Log created PR: #${{ fromJson(steps.create_pr.outputs.data).number }}"
+
+      - name: 'Log created PR: #${{ fromJson(steps.create_pr.outputs.data).number }}'
         if: steps.create_pr.outcome == 'success'
         run: |
           echo "https://github.com/NVIDIA/JAX-Toolbox/pull/${{ fromJson(steps.create_pr.outputs.data).number }}" | tee -a $GITHUB_STEP_SUMMARY
-      
+
       # Guard delete in simple check to protect other branches
       - name: Check that the branch matches znightly- prefix
         run: |
@@ -271,7 +269,7 @@ jobs:
 
   make-publish-configs:
     runs-on: ubuntu-22.04
-    if:  ${{ !cancelled() }}
+    if: ${{ !cancelled() }}
     env:
       MEALKIT_IMAGE_REPO: ${{ needs.metadata.outputs.PUBLISH == 'true' && 'jax-mealkit' || 'mock-jax-mealkit' }}
       FINAL_IMAGE_REPO: ${{ needs.metadata.outputs.PUBLISH == 'true' && 'jax' || 'mock-jax' }}
@@ -365,7 +363,7 @@ jobs:
     needs:
       - metadata
       - make-publish-configs
-    if:  ${{ !cancelled() && needs.make-publish-configs.outputs.PUBLISH_CONFIGS.config != '{"config":[]}' }}
+    if: ${{ !cancelled() && needs.make-publish-configs.outputs.PUBLISH_CONFIGS.config != '{"config":[]}' }}
     strategy:
       fail-fast: false
       matrix: ${{ fromJson(needs.make-publish-configs.outputs.PUBLISH_CONFIGS) }}
@@ -381,7 +379,7 @@ jobs:
 
   finalize:
     needs: [metadata, amd64, arm64, publish-containers]
-    if: "!cancelled()"
+    if: '!cancelled()'
     uses: ./.github/workflows/_finalize.yaml
     with:
       BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }}