1
1
#! /bin/bash
2
2
3
3
# # Uncomment for CLI debugging
4
- set -o xtrace
4
+ # set -o xtrace
5
+ # set -v
6
+
7
+ exec > logfile 2>&1
5
8
6
9
# # while loop
7
10
while
@@ -10,114 +13,82 @@ while
10
13
date
11
14
12
15
# # Read configuration file
13
- source $( pwd) /configuration.sh
16
+ # source $(pwd)/configuration.sh
17
+ source $( pwd) /variablecreation.sh
14
18
15
19
# # Beginning of variable creation and main loop
16
20
do
17
- # # Count the number of idle jobs
18
- IDLEJOBS=$( condor_q -l -submitter galaxy -submitter centos | grep -wc ' JobStatus = 1' )
19
- echo " The number of idle jobs is $IDLEJOBS "
20
-
21
- # # Count how many slots are available to calculate max jobs/slots
22
- MAXJOBS=$( condor_status -l | grep -i " TotalSlotCpus = [2,4,8]" | awk ' BEGIN{ total=0 } { total=total+$3 } END{ printf total }' )
23
- echo " The execute node(s) can currently run " $MAXJOBS " jobs/threads"
24
-
25
- # # Count how many jobs are currently running
26
- RUNNINGJOBS=$( condor_q -l -submitter galaxy -submitter centos | grep -wc ' JobStatus = 2' )
27
- echo " The number of running jobs is $RUNNINGJOBS "
28
-
29
- # # Create array with IP numbers of idle nodes
30
- readarray IDLENODES < <( condor_status -l | grep -iEo ' StartdIpAddr = "<[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' | uniq -u | grep -Eo " [0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}" )
31
- # echo "The number of idle execute nodes is ${#IDLENODES[@]} and the idle node IP(s) is "$(printf '%s\n' "${IDLENODES[@]}")""
32
-
33
- # # Create array with IP numbers of nodes that are running jobs
34
- readarray BUSYMACHINES < <( condor_q -l $( echo ${SUBMITTINGUSERS[@]} ) | grep -oE " [0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}" | sort -u)
35
- echo " The following execute nodes are running jobs: " $( printf ' %s\n' " ${BUSYMACHINES[@]} " ) " "
36
-
37
- # # Create array with name and IP address information of the execute nodes that have been created on openstack
38
- readarray EXECUTENODES < <( openstack server list --name $CONDORINSTANCENAME -c Name -c Networks -c Status -f value)
39
- echo " The total number of execute nodes in the pool is: ${# EXECUTENODES[@]} "
40
- i=0
41
- while [ $i -lt ${# EXECUTENODES[@]} ]; do
42
- printf " ${EXECUTENODES[$i]} "
43
- let i=i+1;
44
- done
45
-
46
- # # Variable that chooses which node to kill based on the conditionals below
47
- MACHINETOKILL=$( echo ${EXECUTENODES[@]} | grep -Eo " $CONDORINSTANCENAME -[0-9]* ACTIVE dualStack=${IDLENODES[0]} " | awk {' print $1 ' })
48
- # echo "\$MACHINETOKILL is $MACHINETOKILL"
49
-
50
- # # True or false variable that determines if a larger than standard VM should be created or not, only checks idle jobs
51
- REQCPUS=$( condor_q -l $( echo ${SUBMITTINGUSERS[@]} ) | grep -o ' ^JobStatus = 1\|^RequestCpus = [4,8]' | grep -c " RequestCpus = 4" )
52
-
53
- IPV6MACHINETOKILL=$( echo ${EXECUTENODES[@]} | grep -Eo " htcondorexecute-[0-9]* ACTIVE dualStack=[0-9]{4}\:[0-9]{3}\:[0-9]{1}\:[0-9]{4}\:\:[0-9]{2}[a-z]{1}[0-9]{1}" | awk {' print $1 ' })
54
-
55
- # # Display information about how many jobs are idle and how many execute nodes are available
56
- # echo "$IDLEJOBS jobs are idle and there's ${#EXECUTENODES[@]} execute node(s) available"
57
-
58
21
# # Delete idle nodes that are not needed
59
- if [[ " ${# IDLENODES[@]} " -ge " ${# BUSYMACHINES[@]} " && " ${# IDLENODES[@]} " -gt " $MINNODES " ]] 2>> logfile; then
60
- echo " Deleting idle node " $MACHINETOKILL " " &&
61
- condor_off -fast -name $MACHINETOKILL .novalocal &&
62
- openstack server delete $MACHINETOKILL &&
63
- date &&
22
+ # if [[ "${#IDLENODES[@]}" -ge "${#BUSYMACHINES[@]}" && "${#IDLENODES[@]}" -gt "$MINNODES" ]] 2>>logfile; then
23
+ if [[ " ${# IDLENODES[@]} " -ge " $MINNODES " && " ${# IDLENODES[@]} " -gt " $REDUNDANTNODES " ]] 2>> logfile; then
24
+ echo " Deleting idle node " $MACHINETOKILL " "
25
+ condor_off -fast -name $MACHINETOKILL .novalocal
26
+ openstack server delete $MACHINETOKILL
27
+ date
64
28
sleep $SHORTSLEEP
65
29
66
30
# # Do nothing if max number of execute nodes has been reached
67
31
elif [[ " ${# EXECUTENODES[@]} " -eq " $MAXNODES " ]] 2>> logfile; then
68
- echo " Max execute node limit has been reached" &&
69
- date &&
32
+ echo " Max execute node limit has been reached"
33
+ date
70
34
sleep $SHORTSLEEP
71
35
72
36
# # Create execute node if none are running
73
37
elif [[ " ${# EXECUTENODES[@]} " -lt " $MINNODES " && " ${# EXECUTENODES[@]} " -le " $MAXNODES " ]] 2>> logfile; then
74
- VM=$( date +%s) &&
75
- echo " All execute nodes are full, or the minimum number of machines is not running, create command will execute" &&
38
+ VM=$( date +%s)
39
+ echo " All execute nodes are full, or the minimum number of machines is not running, create command will execute"
76
40
./createvm.sh $SMALL 2>&1>> logfile
77
- echo " Create command for " $CONDORINSTANCENAME " -" ${VM} " sent" &&
78
- date &&
41
+ echo " Create command for " $CONDORINSTANCENAME " -" ${VM} " sent"
42
+ date
79
43
sleep $LONGSLEEP
80
44
81
45
# # Create execute node if there are idle jobs and the max vm quota is not exceeded
82
46
elif [[ " $IDLEJOBS " -gt 0 && " ${# EXECUTENODES[@]} " -le " $MAXNODES " ]] 2>> logfile; then if [[ " $REQCPUS " -ge 1 ]] || [[ " $IDLEJOBS " -gt " $IDLEJOBVMC " ]] 2>> logfile; then
83
- for i in $( seq 1 $STARTMANY ) ; do
84
- VM=$( date +%s)
85
- echo " There are idle jobs, sending create command for " $CONDORINSTANCENAME " -" ${VM} " " &&
86
- ./createvm.sh $LARGE 2>&1>> logfile && sleep 1;
87
- echo " Create command for " $CONDORINSTANCENAME " -" ${VM} " sent"
88
- done
89
- date &&
90
- sleep $LONGSLEEP
47
+ while [ " ${# EXECUTENODES[@]} " -lt " $MAXNODES " ]; do
48
+ VM=$( date +%s)
49
+ echo " There are idle jobs, sending create command for " $CONDORINSTANCENAME " -" ${VM} " "
50
+ ./createvm.sh $LARGE 2>&1>> logfile
51
+ sleep 1
52
+ echo " Create command for " $CONDORINSTANCENAME " -" ${VM} " sent"
53
+ source $( pwd) /variablecreation.sh
54
+ done
55
+ date
56
+ sleep $LONGSLEEP
91
57
else
92
- VM=$( date +%s) &&
93
- echo " There are idle jobs, sending create command for " $CONDORINSTANCENAME " -" ${VM} " " &&
58
+ VM=$( date +%s)
59
+ echo " There are idle jobs, sending create command for " $CONDORINSTANCENAME " -" ${VM} " "
94
60
./createvm.sh $SMALL 2>&1>> logfile
95
- echo " Create command for " $CONDORINSTANCENAME " -" ${VM} " sent" &&
96
- date &&
61
+ echo " Create command for " $CONDORINSTANCENAME " -" ${VM} " sent"
62
+ date
97
63
sleep $LONGSLEEP
98
64
fi
99
65
# # Create one redundant execute node if all currently running execute nodes are full
100
66
elif [[ " $IDLEJOBS " -eq 0 && " $RUNNINGJOBS " -gt 1 && " $RUNNINGJOBS " -eq " $MAXJOBS " && ${# EXECUTENODES[@]} -le " $MAXNODES " ]] 2>> logfile; then
101
- VM=$( date +%s) &&
102
- echo " Redundant node is needed, sending create command for " $CONDORINSTANCENAME " -" ${VM} " " &&
67
+ VM=$( date +%s)
68
+ echo " Redundant node is needed, sending create command for " $CONDORINSTANCENAME " -" ${VM} " "
103
69
for i in $( seq 1 $REDUNDANTNODES ) ; do
104
70
./createvm.sh $SMALL 2>&1>> logfile && sleep 1;
105
71
done
106
- echo " Create command for " $CONDORINSTANCENAME " -" ${VM} " sent" &&
107
- date &&
72
+ echo " Create command for " $CONDORINSTANCENAME " -" ${VM} " sent"
73
+ date
108
74
sleep $LONGSLEEP
109
75
110
76
# # Do nothing if minimum node limit has been reached
111
77
elif [[ " ${# EXECUTENODES[@]} " -eq " $MINNODES " ]] 2>> logfile; then
112
78
echo " The minimum number of execute nodes are running, do nothing."
113
79
fi
114
- if [ " $IPV6MACHINETOKILL " = true ] ; then
80
+
81
+ # Sometimes OpenStack creates an instance where the Network information in `openstack server list` is in reverse order
82
+ # Meaning the IPv6 IP comes before the IPv4 IP, and that breaks the kill redundant node function
83
+ # These faulty formated instances are deleted immediately since the bug is on the OpenStack side
84
+ # No clean solution can be implemented as of now, this workaround has to do
85
+ if [ ! -z " $IPV6MACHINETOKILL " ] 2>> logfile; then
115
86
openstack server delete " $IPV6MACHINETOKILL "
116
- echo " Deleted faulty formated instance $IPV6MACHINETOKILL "
117
- elif [ " $IPV6MACHINETOKILL " = false ] ; then
118
- echo " No faulty VM's created, it's all good."
87
+ echo " Instance with IPv6 where IPv4 IP should be has been created, killing $IPV6MACHINETOKILL "
88
+ elif [ -z " $IPV6MACHINETOKILL " ] 2>> logfile ; then
89
+ echo " No instance with IPv6 where IPv4 IP should be has been created, it's all good."
119
90
fi
120
- echo " Nothing is happening, sleeping for 60 seconds" &&
121
- sleep $LONGSLEEP &&
91
+ echo " Nothing is happening, sleeping for 60 seconds"
92
+ sleep $LONGSLEEP
122
93
clear
123
94
done
0 commit comments