From 89fc39eeea7e54a549c7dd9d279a2406c56897a5 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 13 Aug 2025 13:34:34 +0200 Subject: [PATCH] [no-relnote] Update internal CI definition This change updates the internal CI defintion to be equivalent to that of the k8s-dra-driver. Signed-off-by: Evan Lezar --- .common-ci.yml | 190 ---------------------------------------- .nvidia-ci.yml | 232 ++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 202 insertions(+), 220 deletions(-) delete mode 100644 .common-ci.yml diff --git a/.common-ci.yml b/.common-ci.yml deleted file mode 100644 index e20148bb9..000000000 --- a/.common-ci.yml +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -default: - image: docker:24.0.6 - services: - - name: docker:24.0.6-dind - command: ["--experimental"] - -variables: - GIT_SUBMODULE_STRATEGY: recursive - BUILD_MULTI_ARCH_IMAGES: "true" - -stages: - - trigger - - package-build - - image-build - - test - - scan - - release - -.pipeline-trigger-rules: - rules: - # We trigger the pipeline if started manually - - if: $CI_PIPELINE_SOURCE == "web" - # We trigger the pipeline on the main branch - - if: $CI_COMMIT_BRANCH == "main" - # We trigger the pipeline on the release- branches - - if: $CI_COMMIT_BRANCH =~ /^release-.*$/ - # We trigger the pipeline on tags - - if: $CI_COMMIT_TAG && $CI_COMMIT_TAG != "" - -workflow: - rules: - # We trigger the pipeline on a merge request - - if: $CI_PIPELINE_SOURCE == 'merge_request_event' - # We then add all the regular triggers - - !reference [.pipeline-trigger-rules, rules] - -# The main or manual job is used to filter out distributions or architectures that are not required on -# every build. -.main-or-manual: - rules: - - !reference [.pipeline-trigger-rules, rules] - - if: $CI_PIPELINE_SOURCE == "schedule" - when: manual - -# The trigger-pipeline job adds a manualy triggered job to the pipeline on merge requests. -trigger-pipeline: - stage: trigger - script: - - echo "starting pipeline" - rules: - - !reference [.main-or-manual, rules] - - if: $CI_PIPELINE_SOURCE == "merge_request_event" - when: manual - allow_failure: false - - when: always - -# Define the platform targets -.platform-amd64: - variables: - PLATFORM: linux/amd64 - -.platform-arm64: - variables: - PLATFORM: linux/arm64 - -# Make buildx available as a docker CLI plugin -.buildx-setup: - before_script: - - export BUILDX_VERSION=v0.6.3 - - apk add --no-cache curl - - mkdir -p ~/.docker/cli-plugins - - curl -sSLo ~/.docker/cli-plugins/docker-buildx "https://github.com/docker/buildx/releases/download/${BUILDX_VERSION}/buildx-${BUILDX_VERSION}.linux-amd64" - - chmod a+x ~/.docker/cli-plugins/docker-buildx - - - docker buildx create --use --platform=linux/amd64,linux/arm64 - - - '[[ -n "${SKIP_QEMU_SETUP}" ]] || docker run --rm --privileged multiarch/qemu-user-static --reset -p yes' - -# The .scan step forms the base of the image scan operation performed before releasing -# images. We implement a .scan-base job here to allow for the variable checks to be -# exercised before internal releases. -.scan-base: - stage: scan - variables: - IMAGE: "${CI_REGISTRY_IMAGE}/k8s-device-plugin:${CI_COMMIT_SHORT_SHA}" - IMAGE_ARCHIVE: "k8s-device-plugin.tar" - except: - variables: - - $SKIP_SCANS && $SKIP_SCANS == "yes" - before_script: - - docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}" - - docker pull --platform="${PLATFORM}" "${IMAGE}" - - docker save "${IMAGE}" -o "${IMAGE_ARCHIVE}" - script: - - > - echo "Skipping scan of ${IMAGE}" - -.scan: - extends: - - .scan-base - -# Define the scan targets -scan-amd64: - extends: - - .scan - - .platform-amd64 - needs: - - image-pull - -scan-arm64: - extends: - - .scan - - .platform-arm64 - needs: - - image-pull - - scan-amd64 - -# Download the regctl binary for use in the release steps -.regctl-setup: - before_script: - - export REGCTL_VERSION=v0.3.10 - - apk add --no-cache curl - - mkdir -p bin - - curl -sSLo bin/regctl https://github.com/regclient/regclient/releases/download/${REGCTL_VERSION}/regctl-linux-amd64 - - chmod a+x bin/regctl - - export PATH=$(pwd)/bin:${PATH} - -# .release forms the base of the deployment jobs which push images to the CI registry. -# This is extended with the version to be deployed (e.g. the SHA or TAG) and the -# target os. -.release: - stage: release - variables: - # Define the source image for the release - IMAGE_NAME: "${CI_REGISTRY_IMAGE}/k8s-device-plugin" - VERSION: "${CI_COMMIT_SHORT_SHA}" - # OUT_IMAGE_VERSION is overridden for external releases - OUT_IMAGE_VERSION: "${CI_COMMIT_SHORT_SHA}" - before_script: - - !reference [.regctl-setup, before_script] - - # We ensure that the OUT_IMAGE_VERSION and OUT_IMAGE_NAME are set - - 'echo Version: ${OUT_IMAGE_VERSION} ; [[ -n "${OUT_IMAGE_VERSION}" ]] || exit 1' - - 'echo Version: ${OUT_IMAGE_NAME} ; [[ -n "${OUT_IMAGE_NAME}" ]] || exit 1' - - # In the case where we are deploying a different version to the CI_COMMIT_SHA, we - # need to tag the image. - # Note: a leading 'v' is stripped from the version if present - - apk add --no-cache make bash - script: - # Log in to the "output" registry, tag the image and push the image - - 'echo "Logging in to CI registry ${CI_REGISTRY}"' - - regctl registry login "${CI_REGISTRY}" -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" - - '[ ${CI_REGISTRY} = ${OUT_REGISTRY} ] || echo "Logging in to output registry ${OUT_REGISTRY}"' - - '[ ${CI_REGISTRY} = ${OUT_REGISTRY} ] || regctl registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}"' - - # Since OUT_IMAGE_NAME and OUT_IMAGE_VERSION are set, this will push the CI image to the - # Target - - make -f deployments/container/Makefile push-image - -# Define a staging release step that pushes an image to an internal "staging" repository -# This is triggered for all pipelines (i.e. not only tags) to test the pipeline steps -# outside of the release process. -.release:staging: - extends: - - .release - variables: - OUT_REGISTRY_USER: "${NGC_REGISTRY_USER}" - OUT_REGISTRY_TOKEN: "${NGC_REGISTRY_TOKEN}" - OUT_REGISTRY: "${NGC_REGISTRY}" - OUT_IMAGE_NAME: "${NGC_STAGING_REGISTRY}/k8s-device-plugin" - -release:staging: - extends: - - .release:staging - needs: - - image-pull diff --git a/.nvidia-ci.yml b/.nvidia-ci.yml index d2302ca8e..4baa28f7f 100644 --- a/.nvidia-ci.yml +++ b/.nvidia-ci.yml @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -include: - - local: '.common-ci.yml' - default: + image: docker + services: + - name: docker:dind + command: ["--experimental"] tags: - cnt - container-dev @@ -25,6 +26,8 @@ default: - type/docker variables: + GIT_SUBMODULE_STRATEGY: recursive + DOCKER_DRIVER: overlay2 DOCKER_TLS_CERTDIR: "/certs" # Release "devel"-tagged images off the main branch @@ -36,38 +39,104 @@ variables: STAGING_REGISTRY: ghcr.io/nvidia STAGING_VERSION: ${CI_COMMIT_SHORT_SHA} -.image-pull: - stage: image-build +stages: + - pull + - scan + - release + - ngc-publish + +.pipeline-trigger-rules: + rules: + # We trigger the pipeline if started manually + - if: $CI_PIPELINE_SOURCE == "web" + # We trigger the pipeline on the main branch + - if: $CI_COMMIT_BRANCH == "main" + # We trigger the pipeline on the release- branches + - if: $CI_COMMIT_BRANCH =~ /^release-.*$/ + # We trigger the pipeline on tags + - if: $CI_COMMIT_TAG && $CI_COMMIT_TAG != "" + +workflow: + rules: + # We trigger the pipeline on a merge request + - if: $CI_PIPELINE_SOURCE == 'merge_request_event' + # We then add all the regular triggers + - !reference [.pipeline-trigger-rules, rules] + +# Download the regctl binary for use in the release steps +.regctl-setup: + before_script: + - export REGCTL_VERSION=v0.4.5 + - apk add --no-cache curl + - mkdir -p bin + - curl -sSLo bin/regctl https://github.com/regclient/regclient/releases/download/${REGCTL_VERSION}/regctl-linux-amd64 + - chmod a+x bin/regctl + - export PATH=$(pwd)/bin:${PATH} + +# .copy-images copies the required application and packaging images from the +# IN_IMAGE="${IN_IMAGE_NAME}:${IN_IMAGE_TAG}${TAG_SUFFIX}" +# to +# OUT_IMAGE="${OUT_IMAGE_NAME}:${OUT_IMAGE_TAG}${TAG_SUFFIX}" +# The script also logs into IN_REGISTRY and OUT_REGISTRY using the supplied +# username and tokens. +.copy-images: + parallel: + matrix: + - TAG_SUFFIX: [""] + before_script: + - !reference [.regctl-setup, before_script] + - apk add --no-cache make bash + variables: + REGCTL: regctl + script: + - | + if [ -n ${IN_REGISTRY} ] && [ -n ${IN_REGISTRY_USER} ]; then + echo "Logging in to ${IN_REGISTRY}" + ${REGCTL} registry login "${IN_REGISTRY}" -u "${IN_REGISTRY_USER}" -p "${IN_REGISTRY_TOKEN}" || exit 1 + fi + + if [ -n ${OUT_REGISTRY} ] && [ -n ${OUT_REGISTRY_USER} ] && [ "${IN_REGISTRY}" != "${OUT_REGISTRY}" ]; then + echo "Logging in to ${OUT_REGISTRY}" + ${REGCTL} registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}" || exit 1 + fi + + export IN_IMAGE="${IN_IMAGE_NAME}:${IN_IMAGE_TAG}${TAG_SUFFIX}" + export OUT_IMAGE="${OUT_IMAGE_NAME}:${OUT_IMAGE_TAG}${TAG_SUFFIX}" + + echo "Copying ${IN_IMAGE} to ${OUT_IMAGE}" + ${REGCTL} image copy ${IN_IMAGE} ${OUT_IMAGE} + +# pull-images pulls images from the public CI registry to the internal CI registry. +pull-images: + extends: + - .copy-images + stage: pull variables: IN_REGISTRY: "${STAGING_REGISTRY}" - IN_IMAGE_NAME: k8s-device-plugin - IN_VERSION: "${STAGING_VERSION}" + IN_IMAGE_NAME: ${STAGING_REGISTRY}/k8s-device-plugin + IN_IMAGE_TAG: "${STAGING_VERSION}" + + OUT_REGISTRY: "${CI_REGISTRY}" OUT_REGISTRY_USER: "${CI_REGISTRY_USER}" OUT_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}" - OUT_REGISTRY: "${CI_REGISTRY}" OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/k8s-device-plugin" - PUSH_MULTIPLE_TAGS: "false" + OUT_IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}" # We delay the job start to allow the public pipeline to generate the required images. - when: delayed - start_in: 30 minutes + rules: + # If the pipeline is triggered from a tag or the WEB UI we don't delay the + # start of the pipeline. + - if: $CI_COMMIT_TAG || $CI_PIPELINE_SOURCE == "web" + # If the pipeline is triggered through other means (i.e. a branch or MR) + # we add a 30 minute delay to ensure that the images are available in the + # public CI registry. + - when: delayed + start_in: 30 minutes timeout: 30 minutes retry: max: 2 when: - job_execution_timeout - stuck_or_timeout_failure - before_script: - - !reference [.regctl-setup, before_script] - - apk add --no-cache make bash - - > - regctl manifest get ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION} --list > /dev/null && echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}" || ( echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION} does not exist" && sleep infinity ) - script: - - regctl registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}" - - make -f deployments/container/Makefile IMAGE=${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION} OUT_IMAGE=${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA} push-image - -image-pull: - extends: - - .image-pull # We skip the integration tests for the internal CI: .integration: @@ -79,17 +148,32 @@ image-pull: # The .scan step forms the base of the image scan operation performed before releasing # images. -.scan: - extends: - - .scan-base +scan-images: stage: scan + needs: + - pull-images image: "${PULSE_IMAGE}" + parallel: + matrix: + - TAG_SUFFIX: [""] + PLATFORM: ["linux/amd64", "linux/arm64"] + variables: + IMAGE: "${CI_REGISTRY_IMAGE}/k8s-device-plugin:${CI_COMMIT_SHORT_SHA}" + IMAGE_ARCHIVE: "k8s-device-plugin-${CI_JOB_ID}.tar" + allow_failure: true script: - - AuthHeader=$(echo -n $SSA_CLIENT_ID:$SSA_CLIENT_SECRET | base64 -w0) - - > + - | + docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}" + export SCAN_IMAGE=${IMAGE}${TAG_SUFFIX} + echo "Scanning image ${SCAN_IMAGE} for ${PLATFORM}" + docker pull --platform="${PLATFORM}" "${SCAN_IMAGE}" + docker save "${SCAN_IMAGE}" -o "${IMAGE_ARCHIVE}" + AuthHeader=$(echo -n $SSA_CLIENT_ID:$SSA_CLIENT_SECRET | base64 -w0) export SSA_TOKEN=$(curl --request POST --header "Authorization: Basic $AuthHeader" --header "Content-Type: application/x-www-form-urlencoded" ${SSA_ISSUER_URL} | jq ".access_token" | tr -d '"') - - if [ -z "$SSA_TOKEN" ]; then exit 1; else echo "SSA_TOKEN set!"; fi - - pulse-cli -n $NSPECT_ID --ssa $SSA_TOKEN scan -i $IMAGE_ARCHIVE -p $CONTAINER_POLICY -o + if [ -z "$SSA_TOKEN" ]; then exit 1; else echo "SSA_TOKEN set!"; fi + + pulse-cli -n $NSPECT_ID --ssa $SSA_TOKEN scan -i $IMAGE_ARCHIVE -p $CONTAINER_POLICY -o + rm -f "${IMAGE_ARCHIVE}" artifacts: when: always expire_in: 1 week @@ -100,3 +184,91 @@ image-pull: - vulns.json - policy_evaluation.json +push-images-to-staging: + extends: + - .copy-images + stage: release + needs: + - scan-images + variables: + IN_REGISTRY: "${CI_REGISTRY}" + IN_REGISTRY_USER: "${CI_REGISTRY_USER}" + IN_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}" + IN_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/k8s-device-plugin" + IN_IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}" + + OUT_REGISTRY: "${NGC_REGISTRY}" + OUT_REGISTRY_USER: "${NGC_REGISTRY_USER}" + OUT_REGISTRY_TOKEN: "${NGC_REGISTRY_TOKEN}" + OUT_IMAGE_NAME: "${NGC_STAGING_REGISTRY}/k8s-device-plugin" + OUT_IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}" + +.publish-images: + stage: ngc-publish + needs: + - scan-images + - push-images-to-staging + image: + name: "${CNT_NGC_PUBLISH_IMAGE}" + pull_policy: always + variables: + + variables: + GITLAB_ACCESS_TOKEN: "${CNT_GITLAB_TOKEN}" + + IN_IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}" + OUT_IMAGE_TAG: "${CI_COMMIT_TAG}" + + VERSION_FILE: "build-info-${CI_PIPELINE_ID}.txt" + PROJECT_NAME: "k8s-device-plugin" + # Allow for setting nspect program version manually. + # The default empty string value results in the key + # being omitted from the publishing doc (which is + # valid). + NSPECT_PROGRAM_VERSION: "" + before_script: + - | + if [ -n "${OVERRIDE_PUBLISHING_PROJECT_PATH}" ]; then + NGC_PUBLISHING_PROJECT_PATH="${OVERRIDE_PUBLISHING_PROJECT_PATH}" + fi + + if [ -z "${NGC_PUBLISHING_PROJECT_PATH}" ]; then + echo "NGC_PUBLISHING_PROJECT_PATH not set" + exit 1 + fi + + echo "publishing to ${NGC_PUBLISHING_PROJECT_PATH}" + + rm -f ${VERSION_FILE} + echo "${IN_IMAGE_TAG} ${OUT_IMAGE_TAG}" >> ${VERSION_FILE} + cat ${VERSION_FILE} + script: + - | + cnt-ngc-publish render \ + --project-name "${PROJECT_NAME}" \ + --versions-file "${VERSION_FILE}" \ + --output "${PROJECT_NAME}".yaml \ + --nspect-program-version "${NSPECT_PROGRAM_VERSION}" + - cnt-ngc-publish merge-request --files "${PROJECT_NAME}.yaml" + artifacts: + paths: + - "${VERSION_FILE}" + - "${PROJECT_NAME}.yaml" + + +publish-images-to-ngc: + extends: + - .publish-images + rules: + - if: $CI_COMMIT_TAG + +# We create a dummy MR that exercises the publishing logic. +# TODO: This MR should be closed automatically. +publish-images-dummy: + extends: + - .publish-images + variables: + OVERRIDE_PUBLISHING_PROJECT_PATH: "dl/container-dev/ngc-automation" + OUT_IMAGE_TAG: "publish-${CI_COMMIT_SHORT_SHA}" + rules: + - if: $CI_COMMIT_TAG == null || $CI_COMMIT_TAG == ""