diff --git a/ci/baseimage.cuda.Dockerfile b/ci/baseimage.cuda.Dockerfile index a022c37..9f1fadd 100644 --- a/ci/baseimage.cuda.Dockerfile +++ b/ci/baseimage.cuda.Dockerfile @@ -1,6 +1,6 @@ FROM ubuntu:22.04 as builder -ARG CUDA_ARCH=60 +ARG CUDA_ARCH=90 ENV DEBIAN_FRONTEND noninteractive @@ -10,7 +10,7 @@ ENV PATH="/spack/bin:${PATH}" ENV MPICH_VERSION=3.4.3 -ENV CMAKE_VERSION=3.27.9 +ENV CMAKE_VERSION=3.30.3 RUN apt-get -y update @@ -23,11 +23,12 @@ RUN apt-get install -y --no-install-recommends gcc g++ gfortran clang libomp-14- liblzma-dev libbz2-dev # install CMake -RUN wget https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz -O cmake.tar.gz && \ +RUN wget https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.tar.gz -O cmake.tar.gz && \ tar zxvf cmake.tar.gz --strip-components=1 -C /usr +# # get latest version of spack -RUN git clone -b v0.21.0 https://github.com/spack/spack.git +RUN git clone -b v0.23.0 https://github.com/spack/spack.git # set the location of packages built by spack RUN spack config add config:install_tree:root:/opt/local @@ -45,13 +46,7 @@ RUN spack external find --all --exclude python RUN spack compiler find # install yq (utility to manipulate the yaml files) -RUN wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_386 && chmod a+x /usr/local/bin/yq - -# change the fortran compilers: for gcc the gfortran is already properly set and the change has no effect; add it for clang -RUN yq -i '.compilers[0].compiler.paths.f77 = "/usr/bin/gfortran"' /root/.spack/linux/compilers.yaml && \ - yq -i '.compilers[0].compiler.paths.fc = "/usr/bin/gfortran"' /root/.spack/linux/compilers.yaml && \ - yq -i '.compilers[1].compiler.paths.f77 = "/usr/bin/gfortran"' /root/.spack/linux/compilers.yaml && \ - yq -i '.compilers[1].compiler.paths.fc = "/usr/bin/gfortran"' /root/.spack/linux/compilers.yaml +RUN wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_arm64 && chmod a+x /usr/local/bin/yq # install MPICH RUN spack install mpich@${MPICH_VERSION} %gcc diff --git a/ci/daint-alps.yml b/ci/daint-alps.yml new file mode 100644 index 0000000..dc2480e --- /dev/null +++ b/ci/daint-alps.yml @@ -0,0 +1,95 @@ +include: + - remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml' + +stages: + - baseimage + - build + - test + +build base image: + extends: [.dynamic-image-name, .container-builder-cscs-gh200] + stage: baseimage + timeout: 2h + variables: + SLURM_RESERVATION: 'NCCL' + DOCKERFILE: ci/baseimage.cuda.Dockerfile + WATCH_FILECHANGES: ci/baseimage.cuda.Dockerfile + PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/base/cosma-ci + +build tiled-mm: + extends: .container-builder-cscs-gh200 + needs: ["build base image"] + stage: build + variables: + SLURM_RESERVATION: 'NCCL' + DOCKERFILE: ci/build.Dockerfile + PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/cosma/cosma-ci:$CI_COMMIT_SHA + ENVPATH: "/cosma-env-cuda" + DOCKER_BUILD_ARGS: '["BASE_IMAGE=${BASE_IMAGE}", "ENVPATH=$ENVPATH"]' + +.run_tests: + extends: [.container-runner-todi-gh200] + needs: ["build tiled-mm"] + stage: test + image: $CSCS_REGISTRY_PATH/cosma/cosma-ci:$CI_COMMIT_SHA + variables: + GIT_STRATEGY: none + MPICH_MAX_THREAD_SAFETY: multiple + CSCS_REGISTRY_LOGIN: 'YES' + PULL_IMAGE: 'YES' + SLURM_HINT: nomultithread + SLURM_UNBUFFEREDIO: '' + SLURM_RESERVATION: 'NCCL' + SLURM_CPU_BIND: 'socket' + SLURM_MPI: "pmi2" + CRAY_CUDA_MPS: 'YES' + # Workaround after update until hooks are fixed + ENROOT_LIBRARY_PATH: /capstor/scratch/cscs/fmohamed/enrootlibn + # SLURM_WAIT: 0 + COSMA_GPU_MAX_TILE_K: 100 + COSMA_GPU_MAX_TILE_M: 100 + COSMA_GPU_MAX_TILE_N: 100 + +mapper: + extends: .run_tests + stage: test + script: /cosma-env-cuda/.spack-env/view/bin/test.mapper + variables: + SLURM_JOB_NUM_NODES: 1 + SLURM_NTASKS: 1 + USE_MPI: 'YES' + +pdgemm: + extends: .run_tests + stage: test + script: /cosma-env-cuda/.spack-env/view/bin/test.pdgemm + variables: + SLURM_JOB_NUM_NODES: 2 + SLURM_NTASKS: 16 + USE_MPI: 'YES' + +multiply: + extends: .run_tests + stage: test + script: /cosma-env-cuda/.spack-env/view/bin/test.multiply + variables: + SLURM_JOB_NUM_NODES: 2 + SLURM_NTASKS: 16 + USE_MPI: 'YES' + +scalar_matmul: + extends: .run_tests + stage: test + script: /cosma-env-cuda/.spack-env/view/bin/test.scalar_matmul + variables: + SLURM_JOB_NUM_NODES: 1 + SLURM_NTASKS: 8 + USE_MPI: 'YES' + +multiply_using_layout: + extends: .run_tests + stage: test + script: /cosma-env-cuda/.spack-env/view/bin/test.multiply_using_layout + variables: + SLURM_JOB_NUM_NODES: 1 + SLURM_NTASKS: 4 diff --git a/ci/mps-wrapper.sh b/ci/mps-wrapper.sh new file mode 100755 index 0000000..5a6e9fd --- /dev/null +++ b/ci/mps-wrapper.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# Example mps-wrapper.sh usage: +# > srun --cpu-bind=socket [...] mps-wrapper.sh + +export CUDA_MPS_PIPE_DIRECTORY=/tmp/nvidia-mps +export CUDA_MPS_LOG_DIRECTORY=/tmp/nvidia-log +# Launch MPS from a single rank per node +if [ $SLURM_LOCALID -eq 0 ]; then + CUDA_VISIBLE_DEVICES=0,1,2,3 nvidia-cuda-mps-control -d +fi + +# set cuda device +numa_nodes=$(hwloc-calc --physical --intersect NUMAnode $(taskset -p $$ | awk '{print "0x"$6}')) +export CUDA_VISIBLE_DEVICES=$numa_nodes +# Run the command +exec numactl --membind=$numa_nodes "$@" diff --git a/spack/packages/cosma/fj-ssl2.patch b/spack/packages/cosma/fj-ssl2.patch new file mode 100644 index 0000000..3e09383 --- /dev/null +++ b/spack/packages/cosma/fj-ssl2.patch @@ -0,0 +1,104 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 1fd1e55..41a041b 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -19,7 +19,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS "YES") # always write compile_commands.json + + set(COSMA_GPU_BACKENDS_LIST "CUDA" "ROCM") + set(COSMA_SCALAPACK_LIST "OFF" "MKL" "CRAY_LIBSCI" "CUSTOM") +-set(COSMA_BLAS_LIST "auto" "MKL" "OPENBLAS" "CRAY_LIBSCI" "CUSTOM" "BLIS" "ATLAS" "CUDA" "ROCM" "OFF") ++set(COSMA_BLAS_LIST "auto" "MKL" "SSL2" "OPENBLAS" "CRAY_LIBSCI" "CUSTOM" "BLIS" "ATLAS" "CUDA" "ROCM" "OFF") + option(COSMA_WITH_TESTS "Generate the test target." ON) + option(COSMA_WITH_APPS "Generate the miniapp targets." ON) + option(COSMA_WITH_BENCHMARKS "Generate the benchmark targets." ON) +@@ -45,7 +45,7 @@ if (COSMA_BLAS MATCHES "CUDA|ROCM") + set(COSMA_GPU_BACKEND ${COSMA_BLAS}) + else() + if(COSMA_BLAS STREQUAL "OFF") +- message(FATAL_ERROR "A Blas implementation is needed when running on CPU only: choices are : auto, MKL, OPENBLAS, CRAY_LIBSCI, CUSTOM, BLIS, ATLAS, FLEXIBLAS, ARMPL, GenericBLAS") ++ message(FATAL_ERROR "A Blas implementation is needed when running on CPU only: choices are : auto, MKL, SSL2, OPENBLAS, CRAY_LIBSCI, CUSTOM, BLIS, ATLAS, FLEXIBLAS, ARMPL, GenericBLAS") + else() + set(COSMA_BLAS_VENDOR ${COSMA_BLAS}) + endif() +@@ -190,6 +190,7 @@ install(FILES "${cosma_BINARY_DIR}/cosmaConfig.cmake" + "${cosma_BINARY_DIR}/cosmaConfigVersion.cmake" + "${cosma_BINARY_DIR}/cosmaConfigVersion.cmake" + "${cosma_SOURCE_DIR}/cmake/FindMKL.cmake" ++ "${cosma_SOURCE_DIR}/cmake/FindSSL2.cmake" + "${cosma_SOURCE_DIR}/cmake/FindBlas.cmake" + "${cosma_SOURCE_DIR}/cmake/FindSCALAPACK.cmake" + "${cosma_SOURCE_DIR}/cmake/FindOPENBLAS.cmake" +diff --git a/cmake/FindBlas.cmake b/cmake/FindBlas.cmake +index aef956c..3c47561 100644 +--- a/cmake/FindBlas.cmake ++++ b/cmake/FindBlas.cmake +@@ -14,6 +14,7 @@ endif() + set(COSMA_BLAS_VENDOR_LIST + "auto" + "MKL" ++ "SSL2" + "OPENBLAS" + "FLEXIBLAS" + "ARMPL" +diff --git a/cmake/FindSSL2.cmake b/cmake/FindSSL2.cmake +new file mode 100644 +index 0000000..f0e11bf +--- /dev/null ++++ b/cmake/FindSSL2.cmake +@@ -0,0 +1,56 @@ ++#.rst: ++# FindSSL2 ++# ----------- ++# ++# This module tries to find the SSL2 library. ++# ++# The following variables are set ++# ++# :: ++# ++# SSL2_FOUND - True if ssl2 is found ++# SSL2_LIBRARIES - The required libraries ++# SSL2_INCLUDE_DIRS - The required include directory ++# ++# The following import target is created ++# ++# :: ++# ++# SSL2::ssl2 ++ ++#set paths to look for library from ROOT variables.If new policy is set, find_library() automatically uses them. ++# if(NOT POLICY CMP0074) ++set(_SSL2_PATHS ${SSL2_ROOT} ++ $ENV{SSL2_ROOT} ++ $ENV{SSL2ROOT} ++ $ENV{SSL2_DIR} ++ $ENV{SSL2DIR}) ++# endif() ++ ++find_library( ++ COSMA_SSL2_LINK_LIBRARIES ++ NAMES "fjlapackex" ++ HINTS ${_SSL2_PATHS} ++ PATH_SUFFIXES "lib64" ++) ++find_path( ++ COSMA_SSL2_INCLUDE_DIRS ++ NAMES "cblas.h" ++ HINTS ${_SSL2_PATHS} ++ PATH_SUFFIXES "include" ++) ++ ++# check if found ++include(FindPackageHandleStandardArgs) ++find_package_handle_standard_args(SSL2 REQUIRED_VARS COSMA_SSL2_INCLUDE_DIRS COSMA_SSL2_LINK_LIBRARIES) ++ ++# add target to link against ++if(NOT TARGET cosma::BLAS::SSL2::ssl2) ++ add_library(cosma::BLAS::SSL2::ssl2 INTERFACE IMPORTED) ++ add_library(cosma::BLAS::SSL2::blas ALIAS cosma::BLAS::SSL2::ssl2) ++endif() ++set_property(TARGET cosma::BLAS::SSL2::ssl2 PROPERTY INTERFACE_LINK_LIBRARIES ${COSMA_SSL2_LINK_LIBRARIES}) ++set_property(TARGET cosma::BLAS::SSL2::ssl2 PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${COSMA_SSL2_INCLUDE_DIRS}) ++ ++# prevent clutter in cache ++MARK_AS_ADVANCED(SSL2_FOUND SSL2_LIBRARIES SSL2_INCLUDE_DIRS) diff --git a/spack/packages/cosma/package.py b/spack/packages/cosma/package.py index 2273812..6e62f71 100644 --- a/spack/packages/cosma/package.py +++ b/spack/packages/cosma/package.py @@ -1,4 +1,4 @@ -# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other +# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other # Spack Project Developers. See the top-level COPYRIGHT file for details. # # SPDX-License-Identifier: (Apache-2.0 OR MIT) @@ -17,13 +17,15 @@ class Cosma(CMakePackage): url = "https://github.com/eth-cscs/COSMA/archive/refs/tags/v2.6.6.tar.gz" git = "https://github.com/eth-cscs/COSMA.git" + license("BSD-3-Clause") + # note: The default archives produced with github do not have the archives # of the submodules. version("master", branch="master", submodules=False) version("2.6.6", sha256="1604be101e77192fbcc5551236bc87888d336e402f5409bbdd9dea900401cc37") version("2.6.5", sha256="10d9b7ecc1ce44ec5b9e0c0bf89278a63029912ec3ea99661be8576b553ececf") version("2.6.4", sha256="6d7bd5e3005874af9542a329c93e7ccd29ca1a5573dae27618fac2704fa2b6ab") - version("2.6.3", sha256="8ca96ca41458f1e9d0da70d524c5a03c677dba7238d23a578f852163b6d45ac9") + version("2.6.3", sha256="c2a3735ea8f860930bea6706d968497d72a1be0498c689b5bc4a951ffc2d1146") version("2.6.2", sha256="2debb5123cc35aeebc5fd2f8a46cfd6356d1e27618c9bb57129ecd09aa400940") version("2.6.1", sha256="69aa6634a030674f0d9be61e7b0bf0dc17acf0fc9e7a90b40e3179e2254c8d67") version("2.5.1", sha256="085b7787597374244bbb1eb89bc69bf58c35f6c85be805e881e1c0b25166c3ce") @@ -34,12 +36,14 @@ class Cosma(CMakePackage): version("2.0.7", sha256="8d70bfcbda6239b6a8fbeaca138790bbe58c0c3aa576879480d2632d4936cf7e") version("2.0.2", sha256="4f3354828bc718f3eef2f0098c3bdca3499297497a220da32db1acd57920c68d") + depends_on("cxx", type="build") # generated + # We just need the libraries of cuda and rocm, so no need to extend # CudaPackage or ROCmPackage. variant("cuda", default=False, description="Build with cuBLAS support") variant("rocm", default=False, description="Build with rocBLAS support") variant("scalapack", default=False, description="Build with ScaLAPACK API") - variant("shared", default=False, description="Build the shared library version") + variant("shared", default=True, description="Build the shared library version") variant("tests", default=False, description="Build tests") variant("apps", default=False, description="Build miniapp") variant("profiling", default=False, description="Enable profiling") @@ -76,8 +80,10 @@ class Cosma(CMakePackage): depends_on("semiprof", when="+profiling") depends_on("costa+profiling", when="+profiling") + patch("fj-ssl2.patch", when="^fujitsu-ssl2") + def setup_build_environment(self, env): - if "+cuda" in self.spec: + if self.spec.satisfies("+cuda"): env.set("CUDA_PATH", self.spec["cuda"].prefix) def cosma_blas_cmake_arg(self): @@ -89,6 +95,7 @@ def cosma_blas_cmake_arg(self): ("^cray-libsci", "CRAY_LIBSCI"), ("^netlib-lapack", "CUSTOM"), ("^openblas", "OPENBLAS"), + ("^fujitsu-ssl2", "SSL2"), ] if self.version >= Version("2.4.0"): @@ -105,11 +112,11 @@ def cosma_blas_cmake_arg(self): def cosma_scalapack_cmake_arg(self): spec = self.spec - if "~scalapack" in spec: + if spec.satisfies("~scalapack"): return "OFF" - elif "^intel-mkl" in spec or "^intel-oneapi-mkl" in spec: + elif spec.satisfies("^intel-mkl") or spec.satisfies("^intel-oneapi-mkl"): return "MKL" - elif "^cray-libsci" in spec: + elif spec.satisfies("^cray-libsci"): return "CRAY_LIBSCI" return "CUSTOM" diff --git a/spack/packages/tiled-mm/package.py b/spack/packages/tiled-mm/package.py index c057a35..9a2ea91 100644 --- a/spack/packages/tiled-mm/package.py +++ b/spack/packages/tiled-mm/package.py @@ -25,6 +25,8 @@ class TiledMm(CMakePackage, CudaPackage, ROCmPackage): version("2.2", sha256="6d0b49c9588ece744166822fd44a7bc5bec3dc666b836de8bf4bf1a7bb675aac") version("2.0", sha256="ea554aea8c53d7c8e40044e6d478c0e8137d7e8b09d7cb9650703430d92cf32e") + depends_on("cxx", type="build") # generated + variant("shared", default=True, description="Build shared libraries") variant("examples", default=False, description="Enable examples") variant("tests", default=False, description="Enable tests")