From aeb9b53c7cf2ddfe8e3f1dacc1455191ff2ae5ad Mon Sep 17 00:00:00 2001 From: vr4manta Date: Tue, 14 Oct 2025 13:16:59 -0400 Subject: [PATCH 1/2] Added provision/deprovision logic for dedicated hosts --- .../ipi/conf/aws/ipi-conf-aws-commands.sh | 40 +++++++++++++++++++ .../ipi/conf/aws/ipi-conf-aws-ref.yaml | 5 +++ .../deprovision/aws/dedicated-hosts/OWNERS | 1 + ...eprovision-aws-dedicated-hosts-commands.sh | 24 +++++++++++ ...sion-aws-dedicated-hosts-ref.metadata.json | 10 +++++ ...i-deprovision-aws-dedicated-hosts-ref.yaml | 11 +++++ 6 files changed, 91 insertions(+) create mode 120000 ci-operator/step-registry/ipi/deprovision/aws/dedicated-hosts/OWNERS create mode 100644 ci-operator/step-registry/ipi/deprovision/aws/dedicated-hosts/ipi-deprovision-aws-dedicated-hosts-commands.sh create mode 100644 ci-operator/step-registry/ipi/deprovision/aws/dedicated-hosts/ipi-deprovision-aws-dedicated-hosts-ref.metadata.json create mode 100644 ci-operator/step-registry/ipi/deprovision/aws/dedicated-hosts/ipi-deprovision-aws-dedicated-hosts-ref.yaml diff --git a/ci-operator/step-registry/ipi/conf/aws/ipi-conf-aws-commands.sh b/ci-operator/step-registry/ipi/conf/aws/ipi-conf-aws-commands.sh index 41c3bf6532833..32047eecfe30b 100755 --- a/ci-operator/step-registry/ipi/conf/aws/ipi-conf-aws-commands.sh +++ b/ci-operator/step-registry/ipi/conf/aws/ipi-conf-aws-commands.sh @@ -386,3 +386,43 @@ platform: EOF yq-go m -a -x -i "${CONFIG}" "${patch_user_provisioned_dns}" fi + +# Add config for dedicated hosts to compute nodes if job is configured +if [[ "${DEDICATED_HOST}" == "yes" ]]; then + echo "Detected dedicated host configured. Starting install-config patching." + patch_dedicated_host="${SHARED_DIR}/install-config-dedicated-host.yaml.patch" + + # Create Host for each zone. If no zones configured, error out. Zones can exist before script execution so we'll pull zone listing out for workers. + WORKER_ZONES=$(cat "${CONFIG}" | yq-v4 '.compute[] | select(.name == "worker") | .platform.aws.zones'[] ) + if [[ "${WORKER_ZONES}" == "" ]]; then + echo "No zones configured, Unable to determine where to create dedicated hosts." + exit + fi + + cat > "${patch_dedicated_host}" << EOF +compute: +- name: worker + platform: + aws: + dedicatedHosts: + hostAffinity: Host + hosts: [] +EOF + + for zone in ${WORKER_ZONES}; do + HOST_TYPE=$(echo "${COMPUTE_NODE_TYPE}" | cut -d'.' -f1) + echo "Creating dedicated host. Region='${aws_source_region}' Zone='${zone}' InstanceFamily='${HOST_TYPE}'" + + EXPIRATION_DATE=$(date -d '6 hours' --iso=minutes --utc) + HOST_SPECS='{"ResourceType":"dedicated-host","Tags":[{"Key":"Name","Value":"'${JOB_NAME_SAFE}'-'${zone}'"},{"Key":"CI-JOB","Value":"'${JOB_NAME_SAFE}'"},{"Key":"expirationDate","Value":"'${EXPIRATION_DATE}'"},{"Key":"ci-build-info","Value":"'${BUILD_ID}_${JOB_NAME}'"}]}' + HOST_ID=$(aws ec2 allocate-hosts --instance-type "${HOST_TYPE}.4xlarge" --auto-placement 'off' --host-recovery 'off' --tag-specifications "${HOST_SPECS}" --host-maintenance 'off' --quantity '1' --availability-zone "${zone}" --region "${aws_source_region}" | jq -r '.HostIds[0]') + + # We need to pass in the vars since YQ doesnt see the loop variables + ZONE_NAME="${zone}" HOST_ID="${HOST_ID}" yq-v4 -i '.compute[] |= (select(.name == "worker") | .platform.aws.dedicatedHosts.hosts += [ { "id": strenv(HOST_ID), "zone": strenv(ZONE_NAME) } ])' "${patch_dedicated_host}" + done + + # Update config with host ID + echo "Patching install-config.yaml for dedicated hosts." + yq-go m -x -i ${CONFIG} ${patch_dedicated_host} + cp "${patch_dedicated_host}" "${ARTIFACT_DIR}/" +fi \ No newline at end of file diff --git a/ci-operator/step-registry/ipi/conf/aws/ipi-conf-aws-ref.yaml b/ci-operator/step-registry/ipi/conf/aws/ipi-conf-aws-ref.yaml index 6161ac5e75576..a2fb4a6a2315d 100644 --- a/ci-operator/step-registry/ipi/conf/aws/ipi-conf-aws-ref.yaml +++ b/ci-operator/step-registry/ipi/conf/aws/ipi-conf-aws-ref.yaml @@ -106,5 +106,10 @@ ref: documentation: |- Experimental feature allowing jobs to use NAT instances instead of NAT gateways, in certain accounts, for cost reduction purposes. + - name: DEDICATED_HOST + default: "no" + documentation: |- + Allows users to enable configuration of dedicated hosts for compute nodes. Valid options are "yes" and "no". When "yes", the + configuration will create a dedicated host for each zone the "worker" compute pool has configured. documentation: |- The IPI AWS configure step generates the AWS-specific install-config.yaml contents based on the cluster profile and optional input files. diff --git a/ci-operator/step-registry/ipi/deprovision/aws/dedicated-hosts/OWNERS b/ci-operator/step-registry/ipi/deprovision/aws/dedicated-hosts/OWNERS new file mode 120000 index 0000000000000..ec405d65a79df --- /dev/null +++ b/ci-operator/step-registry/ipi/deprovision/aws/dedicated-hosts/OWNERS @@ -0,0 +1 @@ +../OWNERS \ No newline at end of file diff --git a/ci-operator/step-registry/ipi/deprovision/aws/dedicated-hosts/ipi-deprovision-aws-dedicated-hosts-commands.sh b/ci-operator/step-registry/ipi/deprovision/aws/dedicated-hosts/ipi-deprovision-aws-dedicated-hosts-commands.sh new file mode 100644 index 0000000000000..bd990d8b52e16 --- /dev/null +++ b/ci-operator/step-registry/ipi/deprovision/aws/dedicated-hosts/ipi-deprovision-aws-dedicated-hosts-commands.sh @@ -0,0 +1,24 @@ +#!/bin/bash +set -o nounset +set -o errexit +set -o pipefail + +export AWS_SHARED_CREDENTIALS_FILE="${CLUSTER_PROFILE_DIR}/.awscred" +CONFIG="${SHARED_DIR}/install-config.yaml" +patch_dedicated_host="${SHARED_DIR}/install-config-dedicated-host.yaml.patch" + +if test ! -f "${patch_dedicated_host}" +then + echo "No dedicated hosts patch file found, so assuming patch never occurred." + exit 0 +fi + +echo "Deprovisioning dedicated hosts..." + +# We get the region information from the install-config.yaml. For the dedicated hosts, we are pulling from the patch file in +# the event that an error occurred during creation of the dedicated host. +REGION=$(yq-v4 -r '.platform.aws.region' ${CONFIG}) +for HOST in $(yq-v4 -r '.compute[] | select(.name == "worker") | .platform.aws.dedicatedHosts.hosts[] | .id' "${patch_dedicated_host}"); do + echo "Release host ${HOST}" + aws ec2 release-hosts --region "${REGION}" --host-ids "${HOST}" +done \ No newline at end of file diff --git a/ci-operator/step-registry/ipi/deprovision/aws/dedicated-hosts/ipi-deprovision-aws-dedicated-hosts-ref.metadata.json b/ci-operator/step-registry/ipi/deprovision/aws/dedicated-hosts/ipi-deprovision-aws-dedicated-hosts-ref.metadata.json new file mode 100644 index 0000000000000..2cd57dcd5665b --- /dev/null +++ b/ci-operator/step-registry/ipi/deprovision/aws/dedicated-hosts/ipi-deprovision-aws-dedicated-hosts-ref.metadata.json @@ -0,0 +1,10 @@ +{ + "path": "ipi/deprovision/aws/dedicated-hosts/ipi-deprovision-aws-dedicated-hosts-ref.yaml", + "owners": { + "approvers": [ + "jhixson74", + "patrickdillon", + "barbacbd" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/ipi/deprovision/aws/dedicated-hosts/ipi-deprovision-aws-dedicated-hosts-ref.yaml b/ci-operator/step-registry/ipi/deprovision/aws/dedicated-hosts/ipi-deprovision-aws-dedicated-hosts-ref.yaml new file mode 100644 index 0000000000000..9799edfa51e0d --- /dev/null +++ b/ci-operator/step-registry/ipi/deprovision/aws/dedicated-hosts/ipi-deprovision-aws-dedicated-hosts-ref.yaml @@ -0,0 +1,11 @@ +ref: + as: ipi-deprovision-aws-dedicated-hosts + from: upi-installer + grace_period: 10m + commands: ipi-deprovision-aws-dedicated-hosts-commands.sh + resources: + requests: + cpu: 300m + memory: 300Mi + documentation: |- + This deprovision step tears down any dedicated hosts that were provisioned for AWS IPI dedicated host feature. From 007bd06fab3789ea7791b223fb3c4619ee2ac97d Mon Sep 17 00:00:00 2001 From: vr4manta Date: Tue, 14 Oct 2025 13:31:46 -0400 Subject: [PATCH 2/2] Added AWS dedicated hosts job --- ...penshift-release-master__nightly-4.21.yaml | 19 +++++ .../openshift-release-master-periodics.yaml | 77 +++++++++++++++++++ 2 files changed, 96 insertions(+) diff --git a/ci-operator/config/openshift/release/openshift-release-master__nightly-4.21.yaml b/ci-operator/config/openshift/release/openshift-release-master__nightly-4.21.yaml index f27d0cee80c7e..d318de209d71c 100644 --- a/ci-operator/config/openshift/release/openshift-release-master__nightly-4.21.yaml +++ b/ci-operator/config/openshift/release/openshift-release-master__nightly-4.21.yaml @@ -783,6 +783,25 @@ tests: FEATURE_SET: TechPreviewNoUpgrade workflow: openshift-e2e-vsphere timeout: 6h0m0s +- as: e2e-aws-ovn-dedicated + cron: '@yearly' + steps: + cluster_profile: aws + env: + DEDICATED_HOST: "yes" + leases: + - env: LEASED_RESOURCE + resource_type: aws-edge-zones-quota-slice + observers: + enable: + - observers-resource-watch + post: + - chain: gather-network + - chain: gather-core-dump + - chain: ipi-deprovision + - ref: ipi-deprovision-aws-dedicated-hosts + workflow: openshift-e2e-aws-ovn + timeout: 6h0m0s - as: e2e-aws-ovn-fips interval: 168h steps: diff --git a/ci-operator/jobs/openshift/release/openshift-release-master-periodics.yaml b/ci-operator/jobs/openshift/release/openshift-release-master-periodics.yaml index b1d6267bc69af..e45d0799bcb1a 100644 --- a/ci-operator/jobs/openshift/release/openshift-release-master-periodics.yaml +++ b/ci-operator/jobs/openshift/release/openshift-release-master-periodics.yaml @@ -162029,6 +162029,83 @@ periodics: - name: result-aggregator secret: secretName: result-aggregator +- agent: kubernetes + cluster: build11 + cron: '@yearly' + decorate: true + decoration_config: + skip_cloning: true + timeout: 6h0m0s + extra_refs: + - base_ref: master + org: openshift + repo: release + labels: + ci-operator.openshift.io/cloud: aws + ci-operator.openshift.io/cloud-cluster-profile: aws + ci-operator.openshift.io/variant: nightly-4.21 + ci.openshift.io/generator: prowgen + ci.openshift.io/no-builds: "true" + job-release: "4.21" + pj-rehearse.openshift.io/can-be-rehearsed: "true" + name: periodic-ci-openshift-release-master-nightly-4.21-e2e-aws-ovn-dedicated + spec: + containers: + - args: + - --gcs-upload-secret=/secrets/gcs/service-account.json + - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson + - --lease-server-credentials-file=/etc/boskos/credentials + - --report-credentials-file=/etc/report/credentials + - --secret-dir=/secrets/ci-pull-credentials + - --target=e2e-aws-ovn-dedicated + - --variant=nightly-4.21 + command: + - ci-operator + image: quay-proxy.ci.openshift.org/openshift/ci:ci_ci-operator_latest + imagePullPolicy: Always + name: "" + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /etc/boskos + name: boskos + readOnly: true + - mountPath: /secrets/ci-pull-credentials + name: ci-pull-credentials + readOnly: true + - mountPath: /secrets/gcs + name: gcs-credentials + readOnly: true + - mountPath: /secrets/manifest-tool + name: manifest-tool-local-pusher + readOnly: true + - mountPath: /etc/pull-secret + name: pull-secret + readOnly: true + - mountPath: /etc/report + name: result-aggregator + readOnly: true + serviceAccountName: ci-operator + volumes: + - name: boskos + secret: + items: + - key: credentials + path: credentials + secretName: boskos-credentials + - name: ci-pull-credentials + secret: + secretName: ci-pull-credentials + - name: manifest-tool-local-pusher + secret: + secretName: manifest-tool-local-pusher + - name: pull-secret + secret: + secretName: registry-pull-credentials + - name: result-aggregator + secret: + secretName: result-aggregator - agent: kubernetes cluster: build11 cron: '@weekly'