Skip to content

Commit db16c37

Browse files
authored
Merge pull request #810 from trz42/2023.06-software.eessi.io-use-accel-target-with-eessi-extend
{2023.06} Rebuild `EESSI-extend` module to use `$EESSI_ACCELERATOR_TARGET`
2 parents 2626840 + 8e87c33 commit db16c37

File tree

7 files changed

+188
-31
lines changed

7 files changed

+188
-31
lines changed

EESSI-extend-2023.06-easybuild.eb

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,16 @@ if (os.getenv("EESSI_CVMFS_INSTALL") ~= nil) then
8787
end
8888
eessi_cvmfs_install = true
8989
easybuild_installpath = os.getenv("EESSI_SOFTWARE_PATH")
90+
eessi_accelerator_target = os.getenv("EESSI_ACCELERATOR_TARGET")
91+
if (eessi_accelerator_target ~= nil) then
92+
cuda_compute_capability = string.match(eessi_accelerator_target, "^nvidia/cc([0-9][0-9])$")
93+
if (cuda_compute_capability ~= nil) then
94+
easybuild_installpath = pathJoin(easybuild_installpath, 'accel', eessi_accelerator_target)
95+
easybuild_cuda_compute_capabilities = cuda_compute_capability:sub(1, 1) .. "." .. cuda_compute_capability:sub(2, 2)
96+
else
97+
LmodError("Incorrect value for $EESSI_ACCELERATOR_TARGET: " .. eessi_accelerator_target)
98+
end
99+
end
90100
elseif (os.getenv("EESSI_SITE_INSTALL") ~= nil) then
91101
-- Make sure no other EESSI install environment variables are set
92102
if ((os.getenv("EESSI_PROJECT_INSTALL") ~= nil) or (os.getenv("EESSI_USER_INSTALL") ~= nil)) then
@@ -146,6 +156,11 @@ setenv ("EASYBUILD_UMASK", "022")
146156
-- Allow this module to be loaded when running EasyBuild
147157
setenv ("EASYBUILD_ALLOW_LOADED_MODULES", "EasyBuild,EESSI-extend")
148158
159+
-- Set environment variables if building for CUDA compute capabilities
160+
if (easybuild_cuda_compute_capabilities ~= nil) then
161+
setenv ("EASYBUILD_CUDA_COMPUTE_CAPABILITIES", easybuild_cuda_compute_capabilities)
162+
end
163+
149164
-- Set all related environment variables if we have project or user installations (including extending MODULEPATH)
150165
if (user_modulepath ~= nil) then
151166
-- Use a more restrictive umask for this case

EESSI-install-software.sh

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -229,22 +229,34 @@ if [[ "${EESSI_CVMFS_REPO}" != /cvmfs/dev.eessi.io ]]; then
229229
${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX}
230230
fi
231231

232+
echo ">> Configuring EasyBuild..."
233+
234+
# Make sure EESSI-extend is not loaded, and configure location variables for a
235+
# CVMFS installation
236+
module unload EESSI-extend
237+
unset EESSI_USER_INSTALL
238+
unset EESSI_PROJECT_INSTALL
239+
unset EESSI_SITE_INSTALL
240+
export EESSI_CVMFS_INSTALL=1
241+
242+
# We now run 'source load_eessi_extend_module.sh' to load or install and load the
243+
# EESSI-extend module which sets up all build environment settings.
244+
# The script requires the EESSI_VERSION given as argument, a couple of
245+
# environment variables set (TMPDIR, EB and EASYBUILD_INSTALLPATH) and the
246+
# function check_exit_code defined.
247+
# NOTE 1, the script exits if those variables/functions are undefined.
248+
# NOTE 2, loading the EESSI-extend module may adjust the value of EASYBUILD_INSTALLPATH,
249+
# e.g., to point to the installation directory for accelerators.
250+
# NOTE 3, we have to set a default for EASYBUILD_INSTALLPATH here in cases the
251+
# EESSI-extend module itself needs to be installed.
252+
export EASYBUILD_INSTALLPATH=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}
253+
source load_eessi_extend_module.sh ${EESSI_VERSION}
254+
232255
# Install full CUDA SDK and cu* libraries in host_injections
233256
# Hardcode this for now, see if it works
234257
# TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install
235258
# Allow skipping CUDA SDK install in e.g. CI environments
236-
# The install_cuda... script uses EasyBuild. So, we need to check if we have EB
237-
# or skip this step.
238259
echo "Going to install full CUDA SDK and cu* libraries under host_injections if necessary"
239-
module_avail_out=$TMPDIR/ml.out
240-
module avail 2>&1 | grep EasyBuild &> ${module_avail_out}
241-
if [[ $? -eq 0 ]]; then
242-
echo_green ">> Found an EasyBuild module"
243-
else
244-
echo_yellow ">> No EasyBuild module found: skipping step to install CUDA (see output in ${module_avail_out})"
245-
export skip_cuda_install=True
246-
fi
247-
248260
temp_install_storage=${TMPDIR}/temp_install_storage
249261
mkdir -p ${temp_install_storage}
250262
if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then
@@ -253,7 +265,7 @@ if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then
253265
--accept-cuda-eula \
254266
--accept-cudnn-eula
255267
else
256-
echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed OR no EasyBuild module was found"
268+
echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed"
257269
fi
258270

259271
# Install NVIDIA drivers in host_injections (if they exist)
@@ -263,18 +275,6 @@ if command_exists "nvidia-smi"; then
263275
fi
264276

265277

266-
echo ">> Configuring EasyBuild..."
267-
268-
# Make sure that we use the EESSI_CVMFS_INSTALL
269-
# Since the path is set when loading EESSI-extend, we reload it to make sure it works - even if it is already loaded
270-
# Note we need to do this after running install_cuda_and_libraries, since that does installations in the EESSI_SITE_INSTALL
271-
unset EESSI_USER_INSTALL
272-
unset EESSI_PROJECT_INSTALL
273-
unset EESSI_SITE_INSTALL
274-
export EESSI_CVMFS_INSTALL=1
275-
module unload EESSI-extend
276-
module load EESSI-extend/${EESSI_VERSION}-easybuild
277-
278278
if [ ! -z "${shared_fs_path}" ]; then
279279
shared_eb_sourcepath=${shared_fs_path}/easybuild/sources
280280
echo ">> Using ${shared_eb_sourcepath} as shared EasyBuild source path"

EESSI-remove-software.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,9 @@ if [ $EUID -eq 0 ]; then
129129
echo_yellow "Removing ${app_dir} and ${app_module}..."
130130
rm -rf ${app_dir}
131131
rm -rf ${app_module}
132+
# recreate some directory to work around permission denied
133+
# issues when rebuilding the package
134+
mkdir -p ${app_dir}/easybuild
132135
done
133136
else
134137
fatal_error "Easystack file ${easystack_file} not found!"

bot/build.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ else
208208
declare -a REMOVAL_STEP_ARGS=()
209209
REMOVAL_STEP_ARGS+=("--save" "${TARBALL_TMP_REMOVAL_STEP_DIR}")
210210
REMOVAL_STEP_ARGS+=("--storage" "${STORAGE}")
211+
211212
# add fakeroot option in order to be able to remove software, see:
212213
# https://github.com/EESSI/software-layer/issues/312
213214
REMOVAL_STEP_ARGS+=("--fakeroot")
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# 2024.11.12
2+
# for installations under /cvmfs, if EESSI_ACCELERATOR_TARGET is set,
3+
# EESSI-extend should adjust EASYBUILD_INSTALLPATH and set
4+
# EASYBUILD_CUDA_COMPUTE_CAPABILITIES
5+
easyconfigs:
6+
- EESSI-extend-2023.06-easybuild.eb

load_eessi_extend_module.sh

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
# Script to load the environment module for EESSI-extend.
2+
# If that module is not available yet, a specific version will be installed using the latest EasyBuild.
3+
#
4+
# This script must be sourced, since it makes changes in the current environment, like loading an EESSI-extend module.
5+
#
6+
# Assumptions (if one is not satisfied the script prints a message and exits)
7+
# - EESSI version is given as first argument
8+
# - TMPDIR is set
9+
# - EB is set
10+
# - EASYBUILD_INSTALLPATH needs to be set
11+
# - Function check_exit_code is defined;
12+
# scripts/utils.sh in EESSI/software-layer repository defines this function, hence
13+
# scripts/utils.sh shall be sourced before this script is run
14+
#
15+
# This script is part of the EESSI software layer, see
16+
# https://github.com/EESSI/software-layer.git
17+
#
18+
# author: Kenneth Hoste (@boegel, HPC-UGent)
19+
# author: Alan O'Cais (@ocaisa, CECAM)
20+
# author: Thomas Roeblitz (@trz42, University of Bergen)
21+
#
22+
# license: GPLv2
23+
#
24+
#
25+
set -o pipefail
26+
27+
# this script is *sourced*, not executed, so can't rely on $0 to determine path to self or script name
28+
# $BASH_SOURCE points to correct path or script name, see also http://mywiki.wooledge.org/BashFAQ/028
29+
if [ $# -ne 1 ]; then
30+
echo "Usage: source ${BASH_SOURCE} <EESSI-extend version>" >&2
31+
exit 1
32+
fi
33+
34+
EESSI_EXTEND_VERSION="${1}-easybuild"
35+
36+
# make sure that environment variables that we expect to be set are indeed set
37+
if [ -z "${TMPDIR}" ]; then
38+
echo "\$TMPDIR is not set; exiting" >&2
39+
exit 2
40+
fi
41+
42+
# ${EB} is used to specify which 'eb' command should be used;
43+
# can potentially be more than just 'eb', for example when using 'eb --optarch=GENERIC'
44+
if [ -z "${EB}" ]; then
45+
echo "\$EB is not set; exiting" >&2
46+
exit 2
47+
fi
48+
49+
# ${EASYBUILD_INSTALLPATH} points to the installation path and needs to be set
50+
if [ -z "${EASYBUILD_INSTALLPATH}" ]; then
51+
echo "\$EASYBUILD_INSTALLPATH is not set; exiting" >&2
52+
exit 2
53+
fi
54+
55+
# make sure that utility functions are defined (cfr. scripts/utils.sh script in EESSI/software-layer repo)
56+
type check_exit_code
57+
if [ $? -ne 0 ]; then
58+
echo "check_exit_code function is not defined; exiting" >&2
59+
exit 3
60+
fi
61+
62+
echo ">> Checking for EESSI-extend module..."
63+
64+
ml_av_eessi_extend_out=${TMPDIR}/ml_av_eessi_extend.out
65+
# need to use --ignore_cache to avoid the case that the module was removed (to be
66+
# rebuilt) but it is still in the cache
67+
module --ignore_cache avail 2>&1 | grep -i EESSI-extend/${EESSI_EXTEND_VERSION} &> ${ml_av_eessi_extend_out}
68+
69+
if [[ $? -eq 0 ]]; then
70+
echo_green ">> Module for EESSI-extend/${EESSI_EXTEND_VERSION} found!"
71+
else
72+
echo_yellow ">> No module yet for EESSI-extend/${EESSI_EXTEND_VERSION}, installing it..."
73+
74+
EB_TMPDIR=${TMPDIR}/ebtmp
75+
echo ">> Using temporary installation of EasyBuild (in ${EB_TMPDIR})..."
76+
pip_install_out=${TMPDIR}/pip_install.out
77+
pip3 install --prefix ${EB_TMPDIR} easybuild &> ${pip_install_out}
78+
79+
# keep track of original $PATH and $PYTHONPATH values, so we can restore them
80+
ORIG_PATH=${PATH}
81+
ORIG_PYTHONPATH=${PYTHONPATH}
82+
83+
# source configure_easybuild to use correct eb settings
84+
(
85+
export EASYBUILD_PREFIX=${TMPDIR}/easybuild
86+
export EASYBUILD_READ_ONLY_INSTALLDIR=1
87+
88+
echo ">> Final installation in ${EASYBUILD_INSTALLPATH}..."
89+
export PATH=${EB_TMPDIR}/bin:${PATH}
90+
export PYTHONPATH=$(ls -d ${EB_TMPDIR}/lib/python*/site-packages):${PYTHONPATH}
91+
eb_install_out=${TMPDIR}/eb_install.out
92+
ok_msg="EESSI-extend/${EESSI_EXTEND_VERSION} installed, let's go!"
93+
fail_msg="Installing EESSI-extend/${EESSI_EXTEND_VERSION} failed, that's not good... (output: ${eb_install_out})"
94+
# while always adding --try-amend=keep... may do no harm, we could make
95+
# an attempt to figure out if it is needed, e.g., when we are rebuilding
96+
${EB} "EESSI-extend-${EESSI_EXTEND_VERSION}.eb" --try-amend=keeppreviousinstall=True 2>&1 | tee ${eb_install_out}
97+
check_exit_code $? "${ok_msg}" "${fail_msg}"
98+
)
99+
100+
# restore origin $PATH and $PYTHONPATH values, and clean up environment variables that are no longer needed
101+
export PATH=${ORIG_PATH}
102+
export PYTHONPATH=${ORIG_PYTHONPATH}
103+
unset EB_TMPDIR ORIG_PATH ORIG_PYTHONPATH
104+
105+
module --ignore_cache avail EESSI-extend/${EESSI_EXTEND_VERSION} &> ${ml_av_eessi_extend_out}
106+
if [[ $? -eq 0 ]]; then
107+
echo_green ">> EESSI-extend/${EESSI_EXTEND_VERSION} module installed!"
108+
else
109+
fatal_error "EESSI-extend/${EESSI_EXTEND_VERSION} module failed to install?! (output of 'pip install' in ${pip_install_out}, output of 'eb' in ${eb_install_out}, output of 'module avail EESSI-extend' in ${ml_av_eessi_extend_out})"
110+
fi
111+
fi
112+
113+
echo ">> Loading EESSI-extend/${EESSI_EXTEND_VERSION} module..."
114+
module --ignore_cache load EESSI-extend/${EESSI_EXTEND_VERSION}
115+
116+
unset EESSI_EXTEND_VERSION

scripts/gpu_support/nvidia/install_cuda_and_libraries.sh

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,6 @@ done
7777
# Make sure EESSI is initialised
7878
check_eessi_initialised
7979

80-
# Make sure that `EESSI-extend` will install in the site installation path EESSI_SITE_SOFTWARE_PATH
81-
export EESSI_SITE_INSTALL=1
82-
echo "EESSI_SITE_SOFTWARE_PATH=${EESSI_SITE_SOFTWARE_PATH}"
83-
8480
# we need a directory we can use for temporary storage
8581
if [[ -z "${TEMP_DIR}" ]]; then
8682
tmpdir=$(mktemp -d)
@@ -93,7 +89,7 @@ else
9389
fi
9490
echo "Created temporary directory '${tmpdir}'"
9591

96-
# use EESSI_SITE_SOFTWARE_PATH/.modules/all as MODULEPATH
92+
# Store MODULEPATH so it can be restored at the end of each loop iteration
9793
SAVE_MODULEPATH=${MODULEPATH}
9894

9995
for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do
@@ -103,16 +99,36 @@ for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do
10399
eb_version=$(echo ${EASYSTACK_FILE} | sed 's/.*eb-\([0-9.]*\).*/\1/g')
104100

105101
# Load EasyBuild version for this easystack file _before_ loading EESSI-extend
106-
module avail EasyBuild
102+
module_avail_out=${tmpdir}/ml.out
103+
module avail 2>&1 | grep EasyBuild/${eb_version} &> ${module_avail_out}
104+
if [[ $? -eq 0 ]]; then
105+
echo_green ">> Found an EasyBuild/${eb_version} module"
106+
else
107+
echo_yellow ">> No EasyBuild/${eb_version} module found: skipping step to install easystack file ${easystack_file} (see output in ${module_avail_out})"
108+
continue
109+
fi
107110
module load EasyBuild/${eb_version}
111+
108112
# Make sure EESSI-extend does a site install here
109113
# We need to reload it with the current environment variables set
110114
unset EESSI_CVMFS_INSTALL
111115
unset EESSI_PROJECT_INSTALL
112116
unset EESSI_USER_INSTALL
113117
export EESSI_SITE_INSTALL=1
114118
module unload EESSI-extend
115-
module load EESSI-extend/${EESSI_VERSION}-easybuild
119+
ml_av_eessi_extend_out=${tmpdir}/ml_av_eessi_extend.out
120+
# need to use --ignore_cache to avoid the case that the module was removed (to be
121+
# rebuilt) but it is still in the cache and the rebuild failed
122+
EESSI_EXTEND_VERSION=${EESSI_VERSION}-easybuild
123+
module --ignore_cache avail 2>&1 | grep -i EESSI-extend/${EESSI_EXTEND_VERSION} &> ${ml_av_eessi_extend_out}
124+
if [[ $? -eq 0 ]]; then
125+
echo_green ">> Module for EESSI-extend/${EESSI_EXTEND_VERSION} found!"
126+
else
127+
error="\nNo module for EESSI-extend/${EESSI_EXTEND_VERSION} found\nwhile EESSI has been initialised to use software under ${EESSI_SOFTWARE_PATH}\n"
128+
fatal_error "${error}"
129+
fi
130+
module --ignore_cache load EESSI-extend/${EESSI_EXTEND_VERSION}
131+
unset EESSI_EXTEND_VERSION
116132

117133
# Install modules in hidden .modules dir to keep track of what was installed before
118134
# (this action is temporary, and we do not call Lmod again within the current shell context, but in EasyBuild

0 commit comments

Comments
 (0)