diff --git a/cli/src/pcluster/resources/imagebuilder/parallelcluster.yaml b/cli/src/pcluster/resources/imagebuilder/parallelcluster.yaml index 326a8c25f9..b20c29c7d7 100644 --- a/cli/src/pcluster/resources/imagebuilder/parallelcluster.yaml +++ b/cli/src/pcluster/resources/imagebuilder/parallelcluster.yaml @@ -42,8 +42,8 @@ phases: commands: - | set -v - COOKBOOK_URL="https://${AWS::Region}-aws-parallelcluster.s3.${AWS::Region}.${AWS::URLSuffix}/parallelcluster/${CfnParamCookbookVersion}/cookbooks/{{ build.PClusterCookbookVersionName.outputs.stdout }}.tgz" - [ -n "${CfnParamChefCookbook}" ] && COOKBOOK_URL="${CfnParamChefCookbook}" + COOKBOOK_URL="${CfnParamChefCookbook}" + [ -n "${COOKBOOK_URL}" ] && COOKBOOK_URL="https://${AWS::Region}-aws-parallelcluster.s3.${AWS::Region}.${AWS::URLSuffix}/parallelcluster/${CfnParamCookbookVersion}/cookbooks/{{ build.PClusterCookbookVersionName.outputs.stdout }}.tgz" echo "${!COOKBOOK_URL}" # Get Cinc Url @@ -81,7 +81,6 @@ phases: - | set -v RELEASE='{{ build.OperatingSystemRelease.outputs.stdout }}' - if [ `echo "${!RELEASE}" | grep -w '^amzn\.2'` ]; then OS='alinux2' elif [ `echo "${!RELEASE}" | grep -w '^amzn\.2023'` ]; then @@ -104,7 +103,6 @@ phases: echo "Operating System '${!RELEASE}' is not supported. Failing build." exit {{ FailExitCode }} fi - echo ${!OS} - name: OperatingSystemVersion @@ -130,13 +128,11 @@ phases: - | set -v OS='{{ build.OperatingSystemName.outputs.stdout }}' - if [ `echo "${!OS}" | grep -E '^(alinux|rhel|rocky)'` ]; then PLATFORM='RHEL' elif [ `echo "${!OS}" | grep -E '^ubuntu'` ]; then PLATFORM='DEBIAN' fi - echo ${!PLATFORM} # Get input base AMI Architecture @@ -173,7 +169,6 @@ phases: echo "This component does not support '${!RELEASE}'. Failing build." exit {{ FailExitCode }} fi - # This component only supports aarch64 CPUs on Amazon Linux 2, Ubuntu2004, Ubuntu2204, RHEL8, Rocky8, RHEL9 and Rocky9 ARCH=$(uname -m) if [[ `echo ${!ARCH}` == 'aarch64' ]]; then @@ -206,7 +201,6 @@ phases: if [[ ${!OS} != "rocky8" ]] && [[ ${!OS} != "rhel8" ]]; then PACKAGE_LIST+=" kernel-devel-matched-$(uname -r)" fi - if [[ ${!OS} == "rocky8" ]] || [[ ${!OS} == "rocky9" ]] ; then for PACKAGE in ${!PACKAGE_LIST} do @@ -224,7 +218,6 @@ phases: yum -y install ${!PACKAGE} done fi - yum install -y yum-plugin-versionlock # listing all the packages because wildcard does not work as expected yum versionlock kernel kernel-core kernel-modules @@ -263,7 +256,6 @@ phases: set -v OS='{{ build.OperatingSystemName.outputs.stdout }}' PLATFORM='{{ build.PlatformName.outputs.stdout }}' - if [[ ${!PLATFORM} == RHEL ]]; then yum -y update krb5-libs yum -y groupinstall development && sudo yum -y install wget jq @@ -299,7 +291,6 @@ phases: - | set -v PLATFORM='{{ build.PlatformName.outputs.stdout }}' - if [[ ${!PLATFORM} == RHEL ]]; then CA_CERTS_FILE=/etc/ssl/certs/ca-bundle.crt yum -y upgrade ca-certificates @@ -307,17 +298,13 @@ phases: CA_CERTS_FILE=/etc/ssl/certs/ca-certificates.crt apt-get -y --only-upgrade install ca-certificates fi - curl --retry 3 -L {{ build.CincUrl.outputs.stdout }} | bash -s -- -v {{ ChefVersion }} - if [[ -e ${!CA_CERTS_FILE} ]]; then mkdir -p /opt/cinc/embedded/ssl/certs ln -sf ${!CA_CERTS_FILE} /opt/cinc/embedded/ssl/certs/cacert.pem fi - curl --retry 3 -L -o gems.tgz https://${AWS::Region}-aws-parallelcluster.s3.${AWS::Region}.${AWS::URLSuffix}/archives/dependencies/ruby/gems.tgz tar -xf gems.tgz - cd vendor/cache /opt/cinc/embedded/bin/gem install --no-document minitar:0.9 /opt/cinc/embedded/bin/gem install --local --no-document berkshelf:{{ BerkshelfVersion }} @@ -330,13 +317,10 @@ phases: - | set -v mkdir -p /etc/chef && sudo chown -R root:root /etc/chef - curl --retry 3 -L -o /etc/chef/aws-parallelcluster-cookbook.tgz "{{ build.CookbookUrl.outputs.stdout }}" - mkdir -p /tmp/cookbooks cd /tmp/cookbooks tar -xzf /etc/chef/aws-parallelcluster-cookbook.tgz - export HOME="/tmp" for dir in $(ls /tmp/cookbooks); do cd /tmp/cookbooks/${!dir} @@ -388,7 +372,6 @@ phases: # Remove kernel version lock if [[ ${!PLATFORM} == RHEL ]]; then yum versionlock delete kernel kernel-core kernel-modules - if [[ ${!OS} == "rocky8" ]] || [[ ${!OS} == "rocky9" ]] ; then yum versionlock delete rocky-release rocky-repos elif [[ ${!OS} == "rhel8" ]] || [[ ${!OS} == "rhel9" ]] ; then diff --git a/cli/src/pcluster/resources/imagebuilder/parallelcluster_test.yaml b/cli/src/pcluster/resources/imagebuilder/parallelcluster_test.yaml index cbc5acb6dd..983550a74c 100644 --- a/cli/src/pcluster/resources/imagebuilder/parallelcluster_test.yaml +++ b/cli/src/pcluster/resources/imagebuilder/parallelcluster_test.yaml @@ -31,7 +31,6 @@ phases: - | set -v RELEASE='{{ test.OSRelease.outputs.stdout }}' - if [ `echo "${RELEASE}" | grep -w '^amzn\.2'` ]; then OS='alinux2' elif [ `echo "${RELEASE}" | grep -w '^amzn\.2023'` ]; then @@ -53,7 +52,6 @@ phases: else echo "Operating System '${RELEASE}' is not supported. Failing build." && exit 1 fi - echo ${OS} - name: OSArchitecture @@ -82,13 +80,11 @@ phases: - | set -v OS='{{ test.OSName.outputs.stdout }}' - if [ `echo "${OS}" | grep -E '^(alinux|centos|rhel|rocky)'` ]; then PLATFORM='RHEL' elif [ `echo "${OS}" | grep -E '^ubuntu'` ]; then PLATFORM='DEBIAN' fi - echo ${PLATFORM} - name: IntelMPISupported @@ -212,13 +208,11 @@ phases: - | set -vx PLATFORM='{{ test.PlatformName.outputs.stdout }}' - if [ {{ test.IntelMPISupported.outputs.stdout }} == true ]; then echo "Checking efa packages installed..." if [ ${PLATFORM} == RHEL ]; then rpm -qa | grep libfabric && rpm -qa | grep efa- [[ $? -ne 0 ]] && echo "Check efa rpm failed" && exit 1 - echo "Checking Intel MPI 20xx installed and module available..." unset MODULEPATH source /etc/profile.d/modules.sh @@ -238,22 +232,18 @@ phases: - | set -vx PLATFORM='{{ test.PlatformName.outputs.stdout }}' - if [[ {{ test.NvidiaEnabled.outputs.stdout }} == 'no' ]]; then echo "Nvidia recipe not enabled, skipping." && exit 0 fi if [ {{ test.HasGPU.outputs.stdout }} == "false" ]; then echo "No GPU detected, skipping." && exit 0 fi - driver_ver="{{ test.NvidiaVersion.outputs.stdout }}" export PATH="/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/opt/aws/bin" - echo "Testing Nvidia driver version" driver_output=$(nvidia-smi | grep -E -o "Driver Version: [0-9.]+") [[ "${driver_output}" != "Driver Version: ${driver_ver}" ]] && "ERROR Installed version ${driver_output} but expected ${driver_ver}" && exit 1 echo "Correctly installed Nvidia ${driver_output}" - if [ {{ test.FabricManagerSupported.outputs.stdout }} == "true" ]; then echo "Testing Nvidia Fabric Manager version" nvidia_driver_version=$(modinfo -F version nvidia) @@ -266,7 +256,6 @@ phases: fi echo "Fabric Manager match Nvidia driver and version is locked" fi - echo "Testing CUDA installation with nvcc" cuda_ver="{{ test.CudaVersion.outputs.stdout }}" export PATH=/usr/local/cuda-${cuda_ver}/bin:${PATH} @@ -274,7 +263,6 @@ phases: cuda_output=$(nvcc -V | grep -E -o "release [0-9]+.[0-9]+") [[ "${cuda_output}" != "release ${cuda_ver}" ]] && echo "ERROR Installed version ${cuda_output} but expected ${cuda_ver}" && exit 1 echo "Correctly installed CUDA ${cuda_output}" - echo "Testing CUDA with deviceQuery..." if [ {{ test.OSArchitecture.outputs.stdout }} != 'arm64' ]; then /usr/local/cuda-${cuda_ver}/extras/demo_suite/deviceQuery | grep -o "Result = PASS" @@ -322,7 +310,6 @@ phases: - | set -vx OS='{{ test.OSName.outputs.stdout }}' - [[ $? -ne 0 ]] && echo "Check for Lustre client failed" && exit 1 echo "FSx Lustre test passed" diff --git a/cli/tests/pcluster/templates/test_imagebuilder_stack.py b/cli/tests/pcluster/templates/test_imagebuilder_stack.py index ca2917686c..50de32cb15 100644 --- a/cli/tests/pcluster/templates/test_imagebuilder_stack.py +++ b/cli/tests/pcluster/templates/test_imagebuilder_stack.py @@ -2203,6 +2203,63 @@ def test_imagebuilder_lambda_execution_role( {"ComponentArn": {"Ref": "ParallelClusterTestComponent"}}, ], ), + ( + { + "imagebuilder": { + "build": { + "parent_image": "ami-0185634c5a8a37250", + "installation": {"nvidia_software": {"enabled": True}, "lustre_client": {"enabled": True}}, + "imds": {"imds_support": "v2.0"}, + "subnet_id": "subnet-0292c5356eadc531f", + "iam": { + "instance_role": "arn:aws:iam::123456789012:role/pcluster", + "instance_profile": "arn:aws:iam::123456789012:instance-profile/pcluster", + "cleanup_lambda_role": "arn:aws:iam::123456789012:role/pcluster", + "additional_iam_policies": [{"policy": "arn:aws:iam::aws:policy/AmazonEC2ReadOnlyAccess"}] + }, + "instance_type": "c5.xlarge", + "security_group_ids": ["sg-b0bbeacc", "sg-0fc70b22048995b07"], + "components": [ + { + "type": "arn", + "value": "arn:aws:imagebuilder:us-east-1:aws:component/apache-tomcat-9-linux/1.0.0", + }, + { + "type": "arn", + "value": "arn:aws:imagebuilder:us-east-1:" + "aws:component/amazon-cloudwatch-agent-linux/1.0.0", + }, + ], + "update_os_packages": {"enabled": True}, + }, + "dev_settings": { + "cookbook": { + "chef_cookbook": "https://tests/aws-parallelcluster-cookbook-3.0.tgz", + "extra_chef_attributes": '{"cluster": {"test_cluster_attribute": "test_cluster_attribute_values"}}', + }, + "node_package": "https://tests/aws-parallelcluster-node-3.0.tgz", + }, + } + }, + { + "Architecture": "x86_64", + "BlockDeviceMappings": [ + { + "DeviceName": "/dev/xvda", + "Ebs": { + "VolumeSize": 50, + }, + } + ], + }, + [ + {"ComponentArn": {"Ref": "UpdateOSComponent"}}, + {"ComponentArn": {"Ref": "ParallelClusterComponent"}}, + {"ComponentArn": {"Ref": "ParallelClusterTagComponent"}}, + {"ComponentArn": "arn:aws:imagebuilder:us-east-1:aws:component/apache-tomcat-9-linux/1.0.0"}, + {"ComponentArn": "arn:aws:imagebuilder:us-east-1:aws:component/amazon-cloudwatch-agent-linux/1.0.0"}, + ], + ), ], ) def test_imagebuilder_components(mocker, resource, response, expected_components): @@ -2222,6 +2279,17 @@ def test_imagebuilder_components(mocker, resource, response, expected_components assert_that(generated_template.get("Resources").get("ImageRecipe").get("Properties").get("Components")).is_equal_to( expected_components ) + # Check size Limits of ImageBuilder Components + imagebuilder_resources = generated_template.get("Resources") + for component_name, component_content in imagebuilder_resources.items(): + if ( + imagebuilder_resources.get(component_name) + and imagebuilder_resources.get(component_name).get("Type") == "AWS::ImageBuilder::Component" + ): + print("Component {} has size {}".format(component_name, len(str(imagebuilder_resources.get(component_name).get("Properties").get("Data"))))) + assert_that( + len(str(imagebuilder_resources.get(component_name).get("Properties").get("Data"))) + ).is_less_than(16000) @pytest.mark.parametrize(