3232# else.
3333GPURUN_BYPASS=${GPURUN_BYPASS:- 0}
3434
35- if [ " $GPURUN_BYPASS " = " 1 " ] ; then
35+ function execOnError() {
3636 exec " $@ "
37+ }
38+
39+ if [ " $GPURUN_BYPASS " = " 1" ]; then
40+ execOnError " $@ "
3741fi
3842
3943# PROGVERSION string is updated by cmake when component is installed
220224if [ ! -d $AOMP ] ; then
221225 >&2 echo " ERROR: AOMP not found at $AOMP "
222226 >&2 echo " Please install AOMP or correctly set env-var AOMP"
223- exit 1
227+ execOnError " $@ "
224228fi
225229ROCMINFO_BINARY=${ROCMINFO_BINARY:- $AOMP / bin/ rocminfo}
226230[ ! -f $ROCMINFO_BINARY ] && ROCMINFO_BINARY=$AOMP /../bin/rocminfo
227231[ ! -f $ROCMINFO_BINARY ] && ROCMINFO_BINARY=$AOMP /../../bin/rocminfo
228232if [ ! -f $ROCMINFO_BINARY ] ; then
229233 >&2 echo " ERROR: Could not find binary for rocminfo,"
230234 >&2 echo " Please correct installation of ROCM or AOMP compiler"
231- exit 1
235+ execOnError " $@ "
232236fi
233237
234238# Use rocminfo to find number number of CUs and gfxids for each GPU.
@@ -238,7 +242,7 @@ _tfile_lines=`wc -l $_tfile | cut -d" " -f1`
238242if [ $_tfile_lines == 0 ] ; then
239243 >&2 echo " ERROR: $ROCMINFO_BINARY failed to find GPU devices"
240244 rm $_tfile
241- exit 1
245+ execOnError " $@ "
242246fi
243247# Create 3 _ri_ arrays by parsing rocminfo (ri), one array entry per device
244248_ri_all_gfxids=" "
@@ -312,9 +316,9 @@ if [ $_ri_num_devices == 0 ] ; then
312316 >&2 echo " ROCR_VISIBLE_DEVICES was preset to $ROCR_VISIBLE_DEVICES "
313317 >&2 echo " Consider unset ROCR_VISIBLE_DEVICES and let gpurun set it correctly."
314318 fi
315- exit 1
319+ execOnError " $@ "
316320 else
317- exit
321+ execOnError " $@ "
318322 fi
319323fi
320324
@@ -399,21 +403,21 @@ if [[ $_ss_num_devices -lt 1 ]] ; then
399403 else
400404 >&2 echo " ERROR: No amdgpu devices found in $_sysdevdir ."
401405 fi
402- exit 1
406+ execOnError " $@ "
403407fi
404408
405409# check for taskset or numactl cmd
406410if [ " $_use_numactl_membind " == " 1" ] || [ " $_use_numactl_localalloc " == " 1" ] ; then
407411 _launch_process_cmd_binary=` which numactl`
408412 if [ $? != 0 ] ; then
409413 >&2 echo " ERROR: The -m (membind) or -l (localalloc) require numactl to be installed."
410- exit 1
414+ execOnError " $@ "
411415 fi
412416else
413417 _launch_process_cmd_binary=` which taskset`
414418 if [ $? != 0 ] ; then
415419 >&2 echo " ERROR: $0 requires the taskset command to be installed."
416- exit 1
420+ execOnError " $@ "
417421 fi
418422fi
419423if [ " $_use_numactl_membind " == " 1" ] && [ " $_use_numactl_localalloc " == " 1" ] ; then
448452_node_cus=$(( $_ri_num_devices * ${_ss_cucount[$_device_num]} ))
449453if [ $_num_local_ranks -gt $_node_cus ] ; then
450454 >&2 echo " ERROR: Not enough CUs ($_node_cus ) for $_num_local_ranks ranks "
451- exit 1
455+ execOnError " $@ "
452456fi
453457
454458if [ $_uses_multi_device == 1 ]; then
455459 # Enforce some rules on the use of -md option
456460 # Note -md forces GPURUN_MASK_POLICY=nomask
457461 if [[ ! -z $ROCR_VISIBLE_DEVICES ]] ; then
458462 >&2 echo " ERROR: DO NOT PRESET ROCR_VISIBLE_DEVICES in gpurun multi-device (-md) mode"
459- exit 1
463+ execOnError " $@ "
460464 fi
461465 if [ $_devices_per_mdset -gt $_ri_num_devices ] ; then
462466 >&2 echo " ERROR: More devices requested ($_devices_per_mdset ) than available ($_ri_num_devices )"
463- exit 1
467+ execOnError " $@ "
464468 fi
465469 _md_total_devices=$(( $_num_local_ranks * $_devices_per_mdset ))
466470 if [ $_md_total_devices -gt $_ri_num_devices ] && [ $_local_rank_num == 0 ] ; then
@@ -507,7 +511,7 @@ _gfxid=${_ss_gfxid[$_device_num]}
507511_node_cus=$(( $_ri_num_devices * ${_ss_cucount[$_device_num]} ))
508512if [ $_num_local_ranks -gt $_node_cus ] ; then
509513 >&2 echo " ERROR: Not enough CUs ($_node_cus ) for $_num_local_ranks ranks "
510- exit 1
514+ execOnError " $@ "
511515fi
512516
513517_utilized_CUs_per_device=$_available_CUs_per_device
0 commit comments