Skip to content

Commit 5d49699

Browse files
committed
Added default logfile writing and other things
1 parent f51a8d8 commit 5d49699

File tree

5 files changed

+112
-87
lines changed

5 files changed

+112
-87
lines changed

EHOS.sh

+47-76
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
#!/bin/bash
22

33
## Uncomment for CLI debugging
4-
set -o xtrace
4+
#set -o xtrace
5+
#set -v
6+
7+
exec >logfile 2>&1
58

69
## while loop
710
while
@@ -10,114 +13,82 @@ while
1013
date
1114

1215
## Read configuration file
13-
source $(pwd)/configuration.sh
16+
# source $(pwd)/configuration.sh
17+
source $(pwd)/variablecreation.sh
1418

1519
## Beginning of variable creation and main loop
1620
do
17-
## Count the number of idle jobs
18-
IDLEJOBS=$(condor_q -l -submitter galaxy -submitter centos | grep -wc 'JobStatus = 1')
19-
echo "The number of idle jobs is $IDLEJOBS"
20-
21-
## Count how many slots are available to calculate max jobs/slots
22-
MAXJOBS=$(condor_status -l | grep -i "TotalSlotCpus = [2,4,8]" | awk 'BEGIN{ total=0 } { total=total+$3 } END{ printf total }')
23-
echo "The execute node(s) can currently run "$MAXJOBS" jobs/threads"
24-
25-
## Count how many jobs are currently running
26-
RUNNINGJOBS=$(condor_q -l -submitter galaxy -submitter centos | grep -wc 'JobStatus = 2')
27-
echo "The number of running jobs is $RUNNINGJOBS"
28-
29-
## Create array with IP numbers of idle nodes
30-
readarray IDLENODES < <(condor_status -l | grep -iEo 'StartdIpAddr = "<[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' | uniq -u | grep -Eo "[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}")
31-
# echo "The number of idle execute nodes is ${#IDLENODES[@]} and the idle node IP(s) is "$(printf '%s\n' "${IDLENODES[@]}")""
32-
33-
## Create array with IP numbers of nodes that are running jobs
34-
readarray BUSYMACHINES < <(condor_q -l $(echo ${SUBMITTINGUSERS[@]}) | grep -oE "[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}" | sort -u)
35-
echo "The following execute nodes are running jobs: "$(printf '%s\n' "${BUSYMACHINES[@]}")""
36-
37-
## Create array with name and IP address information of the execute nodes that have been created on openstack
38-
readarray EXECUTENODES < <(openstack server list --name $CONDORINSTANCENAME -c Name -c Networks -c Status -f value)
39-
echo "The total number of execute nodes in the pool is: ${#EXECUTENODES[@]}"
40-
i=0
41-
while [ $i -lt ${#EXECUTENODES[@]} ]; do
42-
printf "${EXECUTENODES[$i]}"
43-
let i=i+1;
44-
done
45-
46-
## Variable that chooses which node to kill based on the conditionals below
47-
MACHINETOKILL=$(echo ${EXECUTENODES[@]} | grep -Eo "$CONDORINSTANCENAME-[0-9]* ACTIVE dualStack=${IDLENODES[0]}" | awk {' print $1 '})
48-
# echo "\$MACHINETOKILL is $MACHINETOKILL"
49-
50-
## True or false variable that determines if a larger than standard VM should be created or not, only checks idle jobs
51-
REQCPUS=$(condor_q -l $(echo ${SUBMITTINGUSERS[@]}) | grep -o '^JobStatus = 1\|^RequestCpus = [4,8]' | grep -c "RequestCpus = 4")
52-
53-
IPV6MACHINETOKILL=$(echo ${EXECUTENODES[@]} | grep -Eo "htcondorexecute-[0-9]* ACTIVE dualStack=[0-9]{4}\:[0-9]{3}\:[0-9]{1}\:[0-9]{4}\:\:[0-9]{2}[a-z]{1}[0-9]{1}" | awk {' print $1 '})
54-
55-
## Display information about how many jobs are idle and how many execute nodes are available
56-
# echo "$IDLEJOBS jobs are idle and there's ${#EXECUTENODES[@]} execute node(s) available"
57-
5821
## Delete idle nodes that are not needed
59-
if [[ "${#IDLENODES[@]}" -ge "${#BUSYMACHINES[@]}" && "${#IDLENODES[@]}" -gt "$MINNODES" ]] 2>>logfile; then
60-
echo "Deleting idle node "$MACHINETOKILL"" &&
61-
condor_off -fast -name $MACHINETOKILL.novalocal &&
62-
openstack server delete $MACHINETOKILL &&
63-
date &&
22+
# if [[ "${#IDLENODES[@]}" -ge "${#BUSYMACHINES[@]}" && "${#IDLENODES[@]}" -gt "$MINNODES" ]] 2>>logfile; then
23+
if [[ "${#IDLENODES[@]}" -ge "$MINNODES" && "${#IDLENODES[@]}" -gt "$REDUNDANTNODES" ]] 2>>logfile; then
24+
echo "Deleting idle node "$MACHINETOKILL""
25+
condor_off -fast -name $MACHINETOKILL.novalocal
26+
openstack server delete $MACHINETOKILL
27+
date
6428
sleep $SHORTSLEEP
6529

6630
## Do nothing if max number of execute nodes has been reached
6731
elif [[ "${#EXECUTENODES[@]}" -eq "$MAXNODES" ]] 2>>logfile; then
68-
echo "Max execute node limit has been reached" &&
69-
date &&
32+
echo "Max execute node limit has been reached"
33+
date
7034
sleep $SHORTSLEEP
7135

7236
## Create execute node if none are running
7337
elif [[ "${#EXECUTENODES[@]}" -lt "$MINNODES" && "${#EXECUTENODES[@]}" -le "$MAXNODES" ]] 2>>logfile; then
74-
VM=$(date +%s) &&
75-
echo "All execute nodes are full, or the minimum number of machines is not running, create command will execute" &&
38+
VM=$(date +%s)
39+
echo "All execute nodes are full, or the minimum number of machines is not running, create command will execute"
7640
./createvm.sh $SMALL 2>&1>>logfile
77-
echo "Create command for "$CONDORINSTANCENAME"-"${VM}" sent" &&
78-
date &&
41+
echo "Create command for "$CONDORINSTANCENAME"-"${VM}" sent"
42+
date
7943
sleep $LONGSLEEP
8044

8145
## Create execute node if there are idle jobs and the max vm quota is not exceeded
8246
elif [[ "$IDLEJOBS" -gt 0 && "${#EXECUTENODES[@]}" -le "$MAXNODES" ]] 2>>logfile; then if [[ "$REQCPUS" -ge 1 ]] || [[ "$IDLEJOBS" -gt "$IDLEJOBVMC" ]] 2>>logfile; then
83-
for i in $(seq 1 $STARTMANY); do
84-
VM=$(date +%s)
85-
echo "There are idle jobs, sending create command for "$CONDORINSTANCENAME"-"${VM}"" &&
86-
./createvm.sh $LARGE 2>&1>>logfile && sleep 1;
87-
echo "Create command for "$CONDORINSTANCENAME"-"${VM}" sent"
88-
done
89-
date &&
90-
sleep $LONGSLEEP
47+
while [ "${#EXECUTENODES[@]}" -lt "$MAXNODES" ]; do
48+
VM=$(date +%s)
49+
echo "There are idle jobs, sending create command for "$CONDORINSTANCENAME"-"${VM}""
50+
./createvm.sh $LARGE 2>&1>>logfile
51+
sleep 1
52+
echo "Create command for "$CONDORINSTANCENAME"-"${VM}" sent"
53+
source $(pwd)/variablecreation.sh
54+
done
55+
date
56+
sleep $LONGSLEEP
9157
else
92-
VM=$(date +%s) &&
93-
echo "There are idle jobs, sending create command for "$CONDORINSTANCENAME"-"${VM}"" &&
58+
VM=$(date +%s)
59+
echo "There are idle jobs, sending create command for "$CONDORINSTANCENAME"-"${VM}""
9460
./createvm.sh $SMALL 2>&1>>logfile
95-
echo "Create command for "$CONDORINSTANCENAME"-"${VM}" sent" &&
96-
date &&
61+
echo "Create command for "$CONDORINSTANCENAME"-"${VM}" sent"
62+
date
9763
sleep $LONGSLEEP
9864
fi
9965
## Create one redundant execute node if all currently running execute nodes are full
10066
elif [[ "$IDLEJOBS" -eq 0 && "$RUNNINGJOBS" -gt 1 && "$RUNNINGJOBS" -eq "$MAXJOBS" && ${#EXECUTENODES[@]} -le "$MAXNODES" ]] 2>>logfile; then
101-
VM=$(date +%s) &&
102-
echo "Redundant node is needed, sending create command for "$CONDORINSTANCENAME"-"${VM}"" &&
67+
VM=$(date +%s)
68+
echo "Redundant node is needed, sending create command for "$CONDORINSTANCENAME"-"${VM}""
10369
for i in $(seq 1 $REDUNDANTNODES); do
10470
./createvm.sh $SMALL 2>&1>>logfile && sleep 1;
10571
done
106-
echo "Create command for "$CONDORINSTANCENAME"-"${VM}" sent" &&
107-
date &&
72+
echo "Create command for "$CONDORINSTANCENAME"-"${VM}" sent"
73+
date
10874
sleep $LONGSLEEP
10975

11076
## Do nothing if minimum node limit has been reached
11177
elif [[ "${#EXECUTENODES[@]}" -eq "$MINNODES" ]] 2>>logfile; then
11278
echo "The minimum number of execute nodes are running, do nothing."
11379
fi
114-
if [ "$IPV6MACHINETOKILL"=true ] ; then
80+
81+
# Sometimes OpenStack creates an instance where the Network information in `openstack server list` is in reverse order
82+
# Meaning the IPv6 IP comes before the IPv4 IP, and that breaks the kill redundant node function
83+
# These faulty formated instances are deleted immediately since the bug is on the OpenStack side
84+
# No clean solution can be implemented as of now, this workaround has to do
85+
if [ ! -z "$IPV6MACHINETOKILL" ] 2>>logfile; then
11586
openstack server delete "$IPV6MACHINETOKILL"
116-
echo "Deleted faulty formated instance $IPV6MACHINETOKILL"
117-
elif [ "$IPV6MACHINETOKILL"=false ] ; then
118-
echo "No faulty VM's created, it's all good."
87+
echo "Instance with IPv6 where IPv4 IP should be has been created, killing $IPV6MACHINETOKILL"
88+
elif [ -z "$IPV6MACHINETOKILL" ] 2>>logfile; then
89+
echo "No instance with IPv6 where IPv4 IP should be has been created, it's all good."
11990
fi
120-
echo "Nothing is happening, sleeping for 60 seconds" &&
121-
sleep $LONGSLEEP &&
91+
echo "Nothing is happening, sleeping for 60 seconds"
92+
sleep $LONGSLEEP
12293
clear
12394
done

configuration.sh

+3-3
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ readarray SUBMITTINGUSERS < <(echo "-submitter galaxy"; echo "-submitter centos"
1414
CONDORINSTANCENAME=htcondorexecute
1515

1616
## Name of the base VM image that will be used as execute node
17-
CONDORIMAGENAME=HTCondorVanilla-11-07-18
18-
#CONDORIMAGENAME=("GOLD CentOS 7")
17+
#CONDORIMAGENAME=HTCondorVanilla-11-07-18
18+
CONDORIMAGENAME=("GOLD CentOS 7")
1919

2020
## Security group name(s) openstack
2121
readarray SECURITYGROUPS < <(echo "--security-group Pipeline-development"; echo "--security-group test"; echo "--security-group default")
@@ -47,7 +47,7 @@ SHORTSLEEP=15
4747
LONGSLEEP=60
4848

4949
## How many redundant nodes to create
50-
REDUNDANTNODES=1
50+
REDUNDANTNODES=2
5151

5252
## How many nodes to start when there are many queued jobs
5353
STARTMANY=4

createvm.sh

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/bash
22

33
## Uncomment for CLI debugging
4-
set -o xtrace
4+
#set -o xtrace
55

66
source "$(pwd)"/configuration.sh
77

@@ -13,7 +13,7 @@ VM=$(date +%s)
1313
#openstack server create --flavor $1 --image $CONDORIMAGENAME ${NIC[@]} ${SECURITYGROUPS[@]} --key-name $SSHKEY --user-data vm-configs/master-config.yaml $CONDORINSTANCENAME-"${VM}"
1414

1515
##Execute node
16-
openstack server create --flavor $1 --image $CONDORIMAGENAME ${NIC[@]} ${SECURITYGROUPS[@]} --key-name $SSHKEY --user-data vm-configs/execute-config.yaml $CONDORINSTANCENAME-"${VM}"
16+
#openstack server create --flavor $1 --image $CONDORIMAGENAME ${NIC[@]} ${SECURITYGROUPS[@]} --key-name $SSHKEY --user-data vm-configs/execute-config.yaml $CONDORINSTANCENAME-"${VM}"
1717

1818
## Submit node
1919
#openstack server create --flavor $1 --image $CONDORIMAGENAME ${NIC[@]} ${SECURITYGROUPS[@]} --key-name $SSHKEY --user-data vm-configs/submit-config.yaml $CONDORINSTANCENAME-"${VM}"
@@ -22,4 +22,4 @@ openstack server create --flavor $1 --image $CONDORIMAGENAME ${NIC[@]} ${SECURIT
2222
#openstack server create --flavor $1 --image $CONDORIMAGENAME ${NIC[@]} ${SECURITYGROUPS[@]} --key-name $SSHKEY --user-data vm-configs/kek.yaml $CONDORINSTANCENAME-"${VM}"
2323

2424
## Vanilla HTCondor
25-
#openstack server create --flavor $1 --image "${CONDORIMAGENAME[@]}" ${NIC[@]} ${SECURITYGROUPS[@]} --key-name $SSHKEY --user-data vm-configs/vanilla-condor.sh $CONDORINSTANCENAME-"${VM}"
25+
openstack server create --flavor $1 --image "${CONDORIMAGENAME[@]}" ${NIC[@]} ${SECURITYGROUPS[@]} --key-name $SSHKEY --user-data vm-configs/vanilla-condor.sh $CONDORINSTANCENAME-"${VM}"

scripts/cloud-monitor.sh

+7-5
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
#!/bin/bash
22
#You must run `source keystone_rc.sh` for the `openstack` command to work
33

4-
source configuration.sh
54

6-
watch '
7-
echo -e "EHOS cloud monitor (CTRL-C to exit):\n";
8-
condor_status;
9-
condor_q ${SUBMITTINGUSERS[@]};
5+
#while true; do source configuration.sh && echo "EHOS cloud monitor (CTRL-C to exit):\n" && condor_status && condor_q ${SUBMITTINGUSERS[@]} && openstack server list && sleep 5 && clear; done
6+
source ../configuration.sh
7+
8+
watch -d '
9+
echo "EHOS cloud monitor (CTRL-C to exit)"
10+
condor_status
11+
condor_q -submitter centos -submitter galaxy
1012
openstack server list'

variablecreation.sh

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#!/bin/bash
2+
3+
## Uncomment for CLI debugging
4+
#set -o xtrace
5+
6+
## Display date
7+
date
8+
9+
## Read configuration file
10+
source $(pwd)/configuration.sh
11+
12+
## Beginning of variable creation and main loop
13+
## Count the number of idle jobs
14+
IDLEJOBS=$(condor_q -l -submitter galaxy -submitter centos | grep -wc 'JobStatus = 1')
15+
echo "The number of idle jobs is $IDLEJOBS"
16+
17+
## Count how many slots are available to calculate max jobs/slots
18+
MAXJOBS=$(condor_status -l | grep -i "TotalSlotCpus = [2,4,8]" | awk 'BEGIN{ total=0 } { total=total+$3 } END{ printf total }')
19+
echo "The execute node(s) can currently run "$MAXJOBS" jobs/threads"
20+
21+
## Count how many jobs are currently running
22+
RUNNINGJOBS=$(condor_q -l -submitter galaxy -submitter centos | grep -wc 'JobStatus = 2')
23+
echo "The number of running jobs is $RUNNINGJOBS"
24+
25+
## Create array with IP numbers of idle nodes
26+
readarray IDLENODES < <(condor_status -l | grep -iEo 'StartdIpAddr = "<[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' | uniq -u | grep -Eo "[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}")
27+
# echo "The number of idle execute nodes is ${#IDLENODES[@]} and the idle node IP(s) is "$(printf '%s\n' "${IDLENODES[@]}")""
28+
29+
## Create array with IP numbers of nodes that are running jobs
30+
readarray BUSYMACHINES < <(condor_q -l $(echo ${SUBMITTINGUSERS[@]}) | grep -oE "[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}" | sort -u)
31+
echo "The following execute nodes are running jobs: "$(printf '%s\n' "${BUSYMACHINES[@]}")""
32+
33+
## Create array with name and IP address information of the execute nodes that have been created on openstack
34+
readarray EXECUTENODES < <(openstack server list --name $CONDORINSTANCENAME -c Name -c Networks -c Status -f value)
35+
echo "The total number of execute nodes in the pool is: ${#EXECUTENODES[@]}"
36+
i=0
37+
while [ $i -lt ${#EXECUTENODES[@]} ]; do
38+
printf "${EXECUTENODES[$i]}"
39+
let i=i+1;
40+
done
41+
42+
## Variable that chooses which node to kill based on the conditionals below
43+
MACHINETOKILL=$(echo ${EXECUTENODES[@]} | grep -Eo "$CONDORINSTANCENAME-[0-9]* ACTIVE dualStack=${IDLENODES[0]}" | awk {' print $1 '})
44+
# echo "\$MACHINETOKILL is $MACHINETOKILL"
45+
46+
## True or false variable that determines if a larger than standard VM should be created or not, only checks idle jobs
47+
REQCPUS=$(condor_q -l $(echo ${SUBMITTINGUSERS[@]}) | grep -o '^JobStatus = 1\|^RequestCpus = [4,8]' | grep -c "RequestCpus = 4")
48+
49+
IPV6MACHINETOKILL=$(echo ${EXECUTENODES[@]} | grep -Eo "htcondorexecute-[0-9]* ACTIVE dualStack=[0-9]{4}\:[0-9]{3}\:[0-9]{1}\:[0-9]{4}\:\:[0-9]{2}[a-z]{1}[0-9]{1}" | awk {' print $1 '})
50+
51+
## Display information about how many jobs are idle and how many execute nodes are available
52+
# echo "$IDLEJOBS jobs are idle and there's ${#EXECUTENODES[@]} execute node(s) available"

0 commit comments

Comments
 (0)