forked from crowbar/crowbar
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_lib.sh
1280 lines (1191 loc) · 48 KB
/
test_lib.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/bin/bash
# Library for smoketests.
# Includes common testing functionality
# We only need to be sourced once.
[[ $SMOKETEST_LIB_SOURCED = true ]] && exit 0
[[ $ADMIN_MEM ]] || ADMIN_MEM=4G
# Make sure we know where to find our test binaries
[[ -d $CROWBAR_DIR/testing/cli ]] || mkdir -p "$CROWBAR_DIR/testing/cli"
export PATH="$CROWBAR_DIR/testing/cli:$CROWBAR_DIR/test_framework:$CROWBAR_DIR:$CROWBAR_DIR/change-image/dell:$PATH:/sbin:/usr/sbin"
set -o pipefail
SMOKETEST_RESULTS=()
# Commands we have to run under sudo -n
SUDO_CMDS="brctl ip umount mount make_cgroups.sh"
# Commands we need to run as $USER
NEEDED_CMDS="ruby gem screen qemu-img sudo"
# Gems we have to have installed.
NEEDED_GEMS="json net-http-digest_auth"
declare -a SMOKETEST_VLANS
SMOKETEST_VLANS[200]="192.168.125.1/24"
SMOKETEST_VLANS[300]="192.168.126.1/24"
SMOKETEST_VLANS[500]="192.168.127.1/24"
SMOKETEST_VLANS[600]="192.168.128.1/24"
ADMIN_HOSTNAMES=("cr0wbar.pwns.joo"
"vltima.ratio.regvm"
"admin.nihil.fit"
"omnia.fines.bon"
"admin.smoke.test"
"eheyeh.asher.eheyeh"
"bork.bork.bork")
ADMIN_HOSTNAME=${ADMIN_HOSTNAMES[$(($RANDOM % ${#ADMIN_HOSTNAMES[@]}))]}
debug "Picked $ADMIN_HOSTNAME"
export SMOKETEST_DOMAIN=${ADMIN_HOSTNAME#*.}
for KVM in kvm qemu-kvm qemu-system-x86_64 ''; do
type $KVM &>/dev/null && break
done
# THis lock is held whenever we are running tests. It exists to
# prevent multiple instances of the smoketest from running at once.
SMOKETEST_LOCK="$CROWBAR_DIR/testing/.testing.lck"
# This lock is held whenever we are starting or killing KVM.
# We use it to ensure that only one virtual machine is being set
# up or torn down at any given time, and that our teardown
# function does not race with our launching function.
SMOKETEST_KVM_LOCK="$CROWBAR_DIR/testing/.kvm.lck"
# This lock is held whenever we are performing a cleanup.
# We want to ensure that we never try to run two
# cleanups in parallel.
SMOKETEST_CLEANUP_LOCK="$CROWBAR_DIR/testing/.cleanup.lck"
SMOKETEST_DIR="$CROWBAR_DIR/test_framework"
# We run all KVM instances in a screen session.
SMOKETEST_SCREEN="crowbar-smoketest"
# The number of computer nodes we will end up deploying.
# Each node has 2 gigs of memory, 2 cores, and a 6 gig disk
# image, so make sure your test machine can handle the load.
SMOKETEST_VIRT_NODES=(virt-1 virt-2 virt-3 virt-4)
# Number of seconds we wait for compute nodes to go through a full deploy.
COMPUTE_DEPLOY_WAIT=2400
# IP address of the node that ends up as the admin node.
export CROWBAR_IP=192.168.124.10
export CROWBAR_KEY="crowbar:crowbar"
# The names of the bridges we will create. Bridge names must be
# 15 characters or less due to kernel name constraints.
# The last part of the name will be used in tap interface name generation.
# Please keep it at 4 characters or less.
SMOKETEST_BRIDGES=(crowbar-pub)
NICS_PER_BRIDGE=3
# An array of physical interfaces and the bridges they should be bound to.
# We need to use real physical interfaces becasue Crowbar assumes
# it can create and destroy vlans as needed.
# Each entry in this array is if the form ifname,bridgename
# PHYSICAL_INTERFACES=(eth1,crowbar-pub)
PHYSICAL_INTERFACES=()
# An array of MAC addresses of the primary interfaces of the physical machines.
# We need to have this information beforehand so that we can send
# Wake On LAN packets to them.
PHYSICAL_MACS=()
# An Assoc Array for test results
declare -A test_hook_results
[[ $SMOKETEST_ISO && -f $SMOKETEST_ISO ]] || \
SMOKETEST_ISO="$ISO_DEST/$BUILT_ISO"
# Source a local config file. Settings in it will override the
# ones here.
if [[ -f $HOME/.test-crowbar ]]; then
. "$HOME/.test-crowbar"
fi
# A hash containing slave name -> hostname mappings.
declare -A SMOKETEST_SLAVES
grep -q cgroup /proc/filesystems || die "We must have cgroups to track our processes for proper cleanup!"
smoketest_update_status() {
# $1 = VM status to update.
# $2 = Status update
local current_date=$(date '+%F %T %z')
echo "$current_date: $1 - $2"
{
flock 66
echo "$current_date: $2" >> "$smoketest_dir/$1.status"
} 66>"$smoketest_dir/.$1.status.lck"
}
# make the bridges we will use for testing.
smoketest_make_bridges() {
local pub_re='pub$'
local bridge
for bridge in "${SMOKETEST_BRIDGES[@]}"; do
sudo -n brctl show |grep -q "$bridge" || \
sudo -n brctl addbr "$bridge" || \
die "Could not create $bridge bridge!"
# Emulate a switch with STP but no portfast.
sudo -n brctl stp "$bridge" on || \
die "Could not enable spanning tree protocol on $bridge!"
sudo -n brctl setfd "$bridge" 2 || \
die "Could not set forwarding time on $bridge!"
sudo -n brctl sethello "$bridge" 1 || \
die "Could not set hello time for $bridge!"
sudo -n ip link set "$bridge" up || \
die "Could not set link on $bridge up!"
if [[ $bridge =~ $pub_re ]]; then
sudo -n ip addr add 192.168.124.1/24 dev "$bridge"
for vlan in "${!SMOKETEST_VLANS[@]}"; do
if [[ $network_mode = *novlan ]]; then
sudo -n ip addr add "${SMOKETEST_VLANS[$vlan]}" \
dev $bridge
else
sudo -n ip link add link "$bridge" \
name "$bridge.$vlan" type vlan id $vlan
sudo -n ip link set "$bridge.$vlan" up
sudo -n ip addr add "${SMOKETEST_VLANS[$vlan]}" \
dev "$bridge.$vlan"
fi
done
fi
done
# Bind the physical nics we want to use to the appropriate bridges.
for iface in "${PHYSICAL_INTERFACES[@]}"; do
local ifname=${iface%%,*}
local bridge=${iface##*,}
sudo -n ip link set "$ifname" up
sudo -n brctl addif "$bridge" "$ifname"
done
}
# remove our bridges after we are done with them.
smoketest_kill_bridges() {
# Ignore any simple errors in this function.
set +e
# Unbind any physical nics we have bound.
for iface in "${PHYSICAL_INTERFACES[@]}"; do
local ifname=${iface%%,*}
local bridge=${iface##*,}
sudo -n ip link set "$ifname" down
sudo -n brctl delif "$bridge" "$ifname"
done
# Tear down the bridges we created.
for bridge in "${SMOKETEST_BRIDGES[@]}"; do
if [[ $bridge =~ $pub_re ]]; then
sudo -n ip addr del 192.168.124.1/24 dev "$bridge"
for vlan in "${!SMOKETEST_VLANS[@]}"; do
if [[ $network_mode = *-novlan ]]; then
sudo -n ip addr del "${SMOKETEST_VLAN[$vlan]}" \
dev "$bridge"
else
sudo -n ip addr del "${SMOKETEST_VLAN[$vlan]}" \
dev "$bridge.$vlan"
sudo -n ip link set "$bridge.$vlan" down
sudo -n ip link del dev "$bridge.$vlan" type vlan id "$vlan"
fi
done
fi
sudo -n ip link set "$bridge" down
sudo -n brctl delbr "$bridge"
done
}
smoketest_get_cluster_logs() (
set +e
echo "$(date '+%F %T %z'): Gathering $1 logs, please wait."
cd "$LOGDIR"
[[ -f $smoketest_dir/admin.pid ]] || return 1
local curlargs=(-L -o "$1-$(date '+%Y%m%d-%H%M%S').tar" \
--connect-timeout 120 --max-time 120)
[[ $CROWBAR_KEY ]] && curlargs+=(-u "$CROWBAR_KEY" --digest) || :
http_proxy='' curl "${curlargs[@]}" "http://$CROWBAR_IP:3000/support/logs"
echo "$(date '+%F %T %z'): Done gathering $1 logs."
)
# Tidy up after ourselves when we exit.
# This take care to make sure we don't have any stray
# child processes left behind (including VMs), tear down any
# network infrastructure we created, and create any success
# or failure logs we need to create.
declare -a smoketest_cleanup_cmds taps
smoketest_cleanup() {
# We ignore errors in this function.
set +e
flock 75
killall check_ready
[[ $develop_mode = true ]] && pause
# Gather final logs if our admin node is up.
smoketest_get_cluster_logs final
# Make sure our virtual machines have been torn down.
if [[ -d $smoketest_dir ]]; then
for pidfile in "$smoketest_dir/"*.pid; do
local vmname=${pidfile##*/}
vmname=${vmname%.pid}
[[ $vmname = '*' ]] && continue
kill_vm "$vmname" || :
done
target="${smoketest_dir##*/}-$(date '+%Y%m%d-%H%M%S')-${final_status}"
rm -f "$smoketest_dir/"*.disk || :
cp "$CROWBAR_DIR/smoketest.log" "$smoketest_dir"
(cd "$smoketest_dir/.."; \
tar czf "$currdir/$target.tar.gz" "${smoketest_dir##*/}")
echo "Logs are available at $currdir/$target.tar.gz."
rm -rf "$smoketest_dir"
fi
# If there are any commands to run at smoketest_cleanup, run them now.
for c in "${smoketest_cleanup_cmds[@]}"; do
eval $c || :
done
# If our .iso is still mounted, umount it.
sudo -n /bin/umount -d "$LOOPDIR" &>/dev/null
# Tear down out network.
kill_virt_net &>/dev/null
# Kill our screen session
screen -S "$SMOKETEST_SCREEN" -X quit &>/dev/null
# Kill anything else we make have left behind.
for task in $(cat "$CGROUP_DIR/tasks"); do
[[ $task = $$ || $task = $CROWBAR_BUILD_PID ]] && continue
kill -9 $task
done
# Make sure we exit with the right status code.
# Copy passed and failed logs to the right location.
[[ $SMOKETEST_RESULTS ]] && {
for result in "${SMOKETEST_RESULTS[@]}"; do
echo "$result"
[[ $result = *Passed ]] && continue
final_status=Failed
done
}
[[ $final_status ]] || final_status=Passed
echo "Deploy $final_status."
[[ $final_status = Passed ]]
} 75>"$CROWBAR_DIR/.smoketest_cleanup.lock"
# Simple unique mac address generation.
# This will not work if you end up spinning up more than 65535 vm interfaces.
MACNR=1
getmac() {
MACADDR=$(printf "52:54:%02x:12:34:%02x" $((MACNR/256)) $((MACNR%256)))
((MACNR++))
:
}
# Make a tap interface, and attach it to the right bridge.
maketap() {
# $1 = preferred device name
# $2 = bridge to attach it to.
# preemptively arrange to clean up.
sudo -n ip tuntap add dev "$1" mode tap || \
die "Could not create tap device $1"
sudo -n ip link set "$1" up || \
die "Could not bring link on tap device $1 up!"
sudo -n brctl addif "$2" "$1" || \
die "Could not add tap $1 to bridge $2!"
}
# Remove a tap interface we created.
killtap() {
set +e
local res_re='(does not exist|Cannot find device)'
# $1 = device to kill
# $2 = bridge to detach it from
while ! [[ $(sudo -n ip link show "$1" 2>&1) =~ $res_re ]]; do
sudo -n brctl delif "$2" "$1"
sudo -n ip link set "$1" down
sudo -n ip tuntap del dev "$1" mode tap
done
}
# Build up our local virtual net infrastructure.
make_virt_net() {
smoketest_make_bridges
local node bridge idx
for node in admin ${SMOKETEST_VIRT_NODES[@]}; do
for bridge in "${SMOKETEST_BRIDGES[@]}"; do
for ((idx=0; idx < NICS_PER_BRIDGE; idx++)); do
local nic_name="${node}-${idx}-${bridge##*-}"
maketap "$nic_name" "$bridge"
done
done
done
}
# Tear down our local virtual net infrastructure.
kill_virt_net() {
set +e
local node bridge idx
for node in admin ${SMOKETEST_VIRT_NODES[@]}; do
for bridge in ${SMOKETEST_BRIDGES[@]}; do
for ((idx=0; idx < NICS_PER_BRIDGE; idx++)); do
local nic_name="${node}-${idx}-${bridge##*-}"
killtap "$nic_name" "$bridge"
done
done
done
smoketest_kill_bridges
}
# Make MAC addresses for the network interfaces for a VM.
makenics() {
# $1 = base name for each nic. Must be 9 characters or less.
vm_nics=()
local node bridge idx
for bridge in ${SMOKETEST_BRIDGES[@]}; do
for ((idx=0; idx < NICS_PER_BRIDGE; idx++)); do
local nic_name="$1-${idx}-${bridge##*-}"
getmac
if [[ $nic_name =~ virt-.-0-pub ]]; then
SMOKETEST_SLAVES["$1"]="d${MACADDR//:/-}.${SMOKETEST_DOMAIN}"
fi
vm_nics+=("$MACADDR,$nic_name")
done
done
}
# Kill a running VM.
kill_vm() (
flock 65
# $1 = vmname
# $2 = state to assign, defaults to killed.
set +e
local killsig=TERM killtries=0 killstate="${2:-killed}"
# If there is no PID file, assume that the VM is already dead.
[[ -f $smoketest_dir/$1.pid ]] || {
> "$smoketest_dir/$1.$killstate"
flock -u 65
return 0
}
local pid=$(cat "$smoketest_dir/$1.pid")
# If there is no entry for the VM process in /proc/ assume the VM is dead.
# If there is, cd into that directory so that we can be sure
# we are killing the right process and so that we can tell when the process
# is dead -- when the process has finished exiting, the kernel will
# unlink /proc/$pid and everything in it. Even if the kernel manages
# to cycle through pidspace to create another process with the same pid
# between kill attempts, we will not see entry for that process in
# our $PWD, because it will be a different dentry and set of inodes that
# happen to have the same name.
cd "/proc/$pid" &>/dev/null || {
rm -f "$smoketest_dir/$1.pid" &>/dev/null
> "$smoketest_dir/$1.$killstate"
flock -u 65
return 0
}
# If the cmdline entry does not exist in /proc/$pid, the process is
# already dead but the kernel has not finished cleaning up.
[[ -f cmdline ]] || {
rm -f "$smoketest_dir/$1.pid" &>/dev/null
> "$smoketest_dir/$1.$killstate"
flock -u 65
return 0
}
# If the /proc/$pid/cmdline does not contain the name of our VM,
# the kernel has finished cleaning up our process and enough processes
# have been spawned that our PID has been reused.
grep -q "$1" cmdline || {
rm -f "$smoketest_dir/$1.pid" &>/dev/null
> "$smoketest_dir/$1.$killstate"
flock -u 65
return 0
}
# Loop trying to kill this VM. Escalate our violence and sleep longer
# the more tries we take.
while (( killtries++ < 10)); do
smoketest_update_status $vmname "Killing $vm_gen (try $killtries, signal $killsig)"
kill "-$killsig" "$pid"
((killtries < 5)) || killsig=KILL
sleep $killtries
# if /proc/$pid/cmdline (or any other file that normally exists there)
# is gone, the process is dead, and our work is done.
if [[ ! -f cmdline ]]; then
smoketest_update_status $vmname "Killed with SIG${killsig}"
rm -f "$smoketest_dir/$vmname.pid" &>/dev/null
> "$smoketest_dir/$vmname.$killstate"
flock -u 65
return 0
fi
done
flock -u 65
echo "Could not kill $vmname, something went horribly wrong."
return 1
) 65>"$SMOKETEST_KVM_LOCK"
# Wait for a KVM instance to die naturally, for a timeout to expire,
# or for a daemonization condition to be reached.
wait_for_kvm() {
# $1 = name of KVM instance.
local vmname=$1
shift
local pidfile="$smoketest_dir/$vmname.pid"
[[ -f $pidfile ]] || {
smoketest_update_status "$vmanme" "No pid file for KVM."
return 1 # no pidfile? Bad Things happened.
}
local kvmpid=$(cat "$pidfile")
while [[ $1 ]]; do
case $1 in
-timeout) local deadline=$(($(date +%s) + $2)) timeout=$2; shift;;
-daemonif) local daemonif=$2; shift;;
-dieif) local dieif=$2; shift;;
*) break;;
esac
shift
done
local lastres= thisres=
# Use the same /proc/$kvmpid trick we used in kill_vm to
# make sure we are watching the right process.
if [[ ! -d /proc/$kvmpid ]]; then
smoketest_update_status $vmname "No process ID $kvmpid, what happened to our VM?"
ps aux |grep kvm
die "We are done here."
fi
( cd "/proc/$kvmpid"
# if /proc/$kvmpid/$cmdline does not contain the name of our
# VM, something went horrbly wrong.
[[ -f cmdline && $(cat cmdline) =~ $vmname ]] || {
smoketest_status_update "$vmanme" "/proc/$kvmpid is not for our VM."
return 1
}
while [[ -f cmdline ]]; do
# If there is a condition on which we should kill the VM
# immediatly, test and see if it is true.
if [[ $dieif ]] && $dieif; then
smoketest_update_status "$vmname" "Ran into instant-kill condition."
return 1
fi
# If there is a condition on which we should stop waiting for
# a VM, test to see if it is true.
if [[ $daemonif ]]; then
# We assign the output of $daemonif to a variable so that
# we don't spam up the test run transcript.
if thisres=$($daemonif 2>&1); then
# If it is, stop watching this VM.
smoketest_update_status "$vmname" "$thisres"
smoketest_update_status "$vmname" \
"Daemonizing node with $(($deadline - $(date +%s))) seconds left."
return 0
elif [[ $thisres && $lastres != $thisres ]]; then
smoketest_update_status "$vmname" "$thisres"
lastres="$thisres"
fi
fi
# If we were supposed to test for a deadline and we overran it,
# return with the appropriate status code.
if [[ $deadline && ! $develop_mode ]] && (($(date +%s) > $deadline)); then
smoketest_update_status "$vmname" "Node ran for more than $timeout seconds."
return 1
fi
sleep 10
done
# If we wanted to be daemonized but were not, game over man.
if [[ $daemonif ]]; then
smoketest_update_status "$vmname" "Node failed to daemonize."
return 1
else
# We appear to have exited normally.
smoketest_update_status "$vmname" "Node exited."
return 0
fi
)
}
# Hash that allows us to track the number of reboots a VM has had.
declare -A kvm_generations
# Run a KVM session.
run_kvm() {
# run_kvm will try to process the arguemnts it knows about first.
# It will assume that the first argument it does not know how to process
# is the name of the VM you want to create, and that any remining arguments
# are to be passed to KVM verbatim.
# Note that you must provide either a -bootc or a -bootn argument
# before the name of the VM if you want the VM to boot off the first
# hard drive or to PXE boot. If you do not, or do not otherwise arrange
# for a way to boot the VM, it will never boot. The reason for this
# is the design decision that anu hard drives not attached to IDE channels
# are not considered boot candidates without a special argument passed
# to the -drive parameter, and we have to use SCSI attached hard drives to
# work around device naming differences in CentOS 5 (for Sledgehammer) and
# more current kernels.
# $1 = name of KVM to run. Disk image and logfile names are
# derived from this.
# KVMs are always oneshot virtual machines -- this works around
# a few booting and rebooting bugs.
# $@ = after shifing, other args to be passed to kvm.
# In addition, we expect that the caller has set vm_nics appropriatly.
local waitargs=() reboot=false
local pxeboot='' driveboot=''
while true; do
case $1 in
# -daemon tells the framework to stop active monitoring
# once the PID file is written.
-daemon) waitargs+=('-daemonif' 'true');;
# -bootc tells the VM to boot off the first hard drive
-bootc) driveboot=true;;
# -bootn tells the VM to PXE boot.
-bootn) pxeboot=true;;
# -timeout will wait up to $2 seconds before killing the VM if
# we are actively monitoring the VM.
-timeout) waitargs+=("$1" "$2"); shift;;
# -daemonif will have the framework stop actively monitoring the
# VM once $2 exits with a zero status.
-daemonif) waitargs+=("$1" "$2"); shift;;
-dieif) waitargs+=("$1" "$2"); shift;;
# -reboot allows the VM to reboot instead of halting on reboot.
-reboot) reboot=true;;
*) break;;
esac
shift
done
local vmname=$1
shift
# Track the number of times that this VM has been started by the
# framework.
if [[ ! ${kvm_generations["$vmname"]} ]]; then
kvm_generations["$vmname"]=1
else
kvm_generations["$vmname"]=$((${kvm_generations["$vmname"]} + 1))
fi
local cpu_count=2 mem_size=2G
if [[ $vmname = admin ]] ; then
cpu_count=4
# set admin node memory in .build_crowbar.conf file as ADMIN_MEM=xG
mem_size=$ADMIN_MEM
fi
# Hack to pick the fastest disk caching mode.
# We use unsafe caching if we can on the vms because we will just
# rebuild the filesystems from scratch if anything goes wrong.
if ! [[ $drive_cache ]]; then
if "$KVM" --help |grep -q 'cache.*unsafe'; then
drive_cache=unsafe
else
drive_cache=writeback
fi
if "$KVM" -device \? 2>&1 |grep -q ahci && \
[[ $("$KVM" -version) =~ kvm-1 ]]; then
kvm_use_ahci=true
fi
fi
local vm_gen="$vmname.${kvm_generations[$vmname]}"
# create a new log directory for us. vm_logdir needs to be global
# because other things will need to read the logs to see what is happening.
vm_logdir="$smoketest_dir/logs/$vm_gen"
mkdir -p "$vm_logdir"
local pidfile="$smoketest_dir/$vmname.pid"
[[ -f $pidfile ]] && rm "$pidfile" || :
# Our initial array of args for kvm.
# We pass in our own ROM for PXE booting to work around some old
# PXE boot bugs in older versions of the E1000 etherboot roms.
local kvmargs=(-enable-kvm
-m $mem_size
-smp $cpu_count
-pidfile "$pidfile"
-serial "file:$vm_logdir/ttyS0.log"
-serial "file:$vm_logdir/ttyS1.log"
-name "kvm-$vm_gen")
if [[ $kvm_use_ahci = true ]]; then
kvmargs+=(-device "ahci,id=ahci0,bus=pci.0,multifunction=on")
kvmargs+=(-drive "file=$smoketest_dir/$vmname.disk,if=none,format=raw,cache=$drive_cache,id=drive-ahci-0")
kvmargs+=(-device "ide-drive,bus=ahci0.0,drive=drive-ahci-0,id=drive-0")
local drive_idx=1
for image in "$smoketest_dir/$vmname-"*".disk"; do
[[ -f $image ]] || continue
kvmargs+=(-device "ahci,id=ahci${drive_idx},bus=pci.0,multifunction=on")
kvmargs+=(-drive "file=$image,if=none,cache=$drive_cache,id=drive-ahci-${drive_idx}")
kvmargs+=(-device "ide-drive,bus=ahci${drive_idx}.0,drive=drive-ahci-${drive_idx},id=drive-${drive_idx}")
drive_idx=$((drive_idx + 1))
done
unset drive_idx
else
local drivestr="file=$smoketest_dir/$vmname.disk,if=scsi,format=raw,cache=$drive_cache"
if [[ $driveboot ]]; then
drivestr+=",boot=on"
fi
kvmargs+=(-drive "$drivestr")
# Add additional disks if we have any.
for image in "$smoketest_dir/$vmname-"*".disk"; do
[[ -f $image ]] || continue
kvmargs+=(-drive "file=$image,if=scsi,format=qcow2,cache=$drive_cache")
done
fi
# Add appropriate nics based on the contents of vm_nics.
local vlan=0
for line in "${vm_nics[@]}"; do
kvmargs+=(-net "nic,macaddr=${line%%,*},model=e1000,vlan=$vlan")
kvmargs+=(-net "tap,ifname=${line##*,},script=no,downscript=no,vlan=$vlan")
vlan=$(($vlan + 1))
done
unset vlan
if [[ $pxeboot ]]; then
kvmargs+=(-boot "order=n")
elif [[ $driveboot ]]; then
kvmargs+=(-boot "order=nc")
fi
if [[ $reboot = false ]]; then
kvmargs+=(-no-reboot)
fi
{
# Make sure that we serialize KVM launches.
# Sometimes we get a boot failure if we try launching
# KVM instances in parallel. As long as we wait for
# the pidfile to be created, we should be OK -- kvm
# does not create the pidfile until it has finshed launching
# the VM.
flock 65
# If we are running under X, then use a graphical display.
if [[ $DISPLAY ]]; then
kvmargs+=( -sdl -daemonize )
"$KVM" "${kvmargs[@]}" "$@"
else
# otherwise, launch ourselves under screen.
# get port from name. For virt-123 should be 123
vncport="$(echo $vmname | sed 's|.*\([0-9]\+\)|\1|g')"
kvmargs+=( -vnc :$vncport )
screen -S "$SMOKETEST_SCREEN" -X screen \
-t "$vm_gen" "$KVM" "${kvmargs[@]}" "$@"
screen -S "$SMOKETEST_SCREEN" -p "$vm_gen" -X log on
fi
# wait up to 10 seconds for a PID file
local s
for ((s=0; s<10; s++)); do
[[ -f $pidfile ]] && break
sleep 1
done
flock -u 65
} 65>"$SMOKETEST_KVM_LOCK"
# Now that we have launched the KVM instance, wait for it using the
# waitargs we were passed.
wait_for_kvm "$vmname" "${waitargs[@]}" || return 1
}
get_boot_info() {
if [[ -e ${LOOPDIR}/isolinux ]]; then
TARGET=${LOOPDIR}/isolinux
elif
[[ -e ${LOOPDIR}/boot/x86_64/loader ]]; then
TARGET=${LOOPDIR}/boot/x86_64/loader
else
die "DVD isolinux not found on $TARGET"
fi
while read line; do
[[ ! $kernel && ( $line =~ $kernel_re ) ]] && \
kernel="${BASH_REMATCH[1]}" || :
[[ ! $kernel_params && ( $line =~ $append_re ) ]] && \
kernel_params=${BASH_REMATCH[2]} || :
[[ ! $initrd && $kernel_params && ( $kernel_params =~ $initrd_re ) ]] && {
kernel_params=${kernel_params/append=${BASH_REMATCH[1]}/}
initrd="${BASH_REMATCH[1]}"
} || :
done < "$TARGET/isolinux.cfg"
# Fix up our paths to the initrd and the kernel
for d in "$TARGET/isolinux" "$TARGET" "$LOOPDIR"; do
[[ -f $d/$kernel && -f $d/$initrd ]] || continue
kernel="$d/$kernel"
initrd="$d/$initrd"
break
done
}
# This expects to be run with the testing lock held.
# It boots the admin node from the .iso in the testing directory,
# injecting the appropriate kernel parameters to make things
# work in the test environment.
run_admin_node() {
# mount our .iso file as a loopback device. Since the default
# setup tries to got straight to graphics mode and we want to
# test everything using a serial console, some Special Help
# is needed
# First, set up our admin node.
local nodename="admin"
local vm_nics
makenics admin
smoketest_update_status admin "Creating disk image"
screen -S "$SMOKETEST_SCREEN" -X screen -t Status "$CROWBAR_DIR/test_framework/watch_Status.sh"
qemu-img create -f raw "$smoketest_dir/admin.disk" 20G &>/dev/null
if [[ $online = true ]]; then
if ! run_kvm "$nodename" -cdrom "$SMOKETEST_ISO" ; then
smoketest_update_status "$nodename" "Failed to install admin node after 1200 seconds."
smoketest_update_status "$nodename" "Node failed to deploy."
return 1
fi
else
smoketest_update_status admin "Mounting .iso"
sudo -n /bin/mount -o loop "$SMOKETEST_ISO" "$LOOPDIR" &>/dev/null || \
die "Could not loopback mount $SMOKETEST_ISO on $LOOPDIR."
smoketest_cleanup_cmds+=("sudo -n /bin/umount -d '$LOOPDIR'")
smoketest_update_status admin "Hacking up kernel parameters"
# OK, now figure out what we need to grab by reading the
# isolinux.cfg file.
kernel_re='kernel (.+)$'
append_re='append([^a-zA-Z/=?]+)(.*)$'
initrd_re='initrd=([^ ]+)'
console_re='console=([^ ]+)'
unset kernel kernel_params initrd
get_boot_info
[[ $kernel && -f $kernel && $kernel_params && $initrd && -f $initrd ]] || \
die "Could not find our kernel!"
kernel_params+=" crowbar.url=http://192.168.124.10:8091/config crowbar.debug.logdest=/dev/ttyS0 crowbar.use_serial_console=true"
[[ $DISPLAY ]] || kernel_params+=" console=ttyS1,115200n81"
[[ -r $HOME/.ssh/id_rsa.pub ]] && kernel_params+=" crowbar.authkey=$(sed 's/ /\\040/g' <"$HOME/.ssh/id_rsa.pub")"
if ! [[ $manual_deploy = true ]]; then
kernel_params+=" crowbar.hostname=$ADMIN_HOSTNAME"
fi
if [[ $develop_mode ]]; then
kernel_params+=" crowbar.debug"
fi
smoketest_update_status admin "Performing install from ${SMOKETEST_ISO##*/}"
# First run of the admin node. Note that we do not actaully boot off the
# .iso image, instead we boot the vm directly using the extracted kernel
# and initrd, and arrange for the kernel arguments to contain the
# extra arguments that the test framework needs.
if ! run_kvm -timeout 1200 "$nodename" \
-cdrom "$SMOKETEST_ISO" -kernel "$kernel" -initrd "$initrd" \
-append "$kernel_params"; then
smoketest_update_status "$nodename" "Failed to install admin node after 1200 seconds."
smoketest_update_status "$nodename" "Node failed to deploy."
return 1
fi
# Once this is finished, we no longer need the .iso image mounted.
sudo -n /bin/umount -d "$LOOPDIR" &>/dev/null
fi
# restart the admin node as a daemon, and wait for it to be ready to
# start installing compute nodes.
smoketest_update_status admin "Deploying admin node crowbar tasks"
if ! run_kvm -reboot -timeout 1800 -bootc \
-daemonif "ping -q -c 1 -t 5 192.168.124.10" \
"$nodename"; then
smoketest_update_status admin "Node failed to deploy."
return 1
fi
if [[ $online = true ]]; then
local cont=false
while [[ $cont != "continue" ]]; do
read -p "Type 'continue' when the admin node is deployed." cont
done
return 0
fi
# Once the KVM instance has launched, start watching the system log.
screen -S "$SMOKETEST_SCREEN" -X screen -t "Syslog Capture" \
tail -f "$LOGDIR/admin.2/ttyS0.log"
#Spin until we can SSH into the admin node.
while ! ssh [email protected] true; do
sleep 5
done
# COpy over the network.json we want to use.
scp "$CROWBAR_DIR/test_framework/network-${network_mode}.json" \
"[email protected]:/opt/dell/barclamps/network/chef/data_bags/crowbar/bc-template-network.json"
# Copy over our post-install hooks
ssh [email protected] mkdir -p /opt/dell/.hooks/admin-post-install.d
scp -r "$CROWBAR_DIR/test_framework/admin-post-hooks/." \
"[email protected]:/opt/dell/.hooks/admin-post-install.d/"
# If we want to manually deploy, then exit now.
[[ $manual_deploy ]] && return 0
# Otherwise, kick off the install.
ssh [email protected] /opt/dell/bin/install-crowbar $ADMIN_HOSTNAME
sleep 5
# Wait for the screen session to terminate
printf "Waiting for crowbar to install: "
while grep -q crowbar-install < <(ssh [email protected] screen -ls); do
sleep 30
printf "."
done
echo
if ! ssh [email protected] \
test -f /opt/dell/crowbar_framework/.crowbar-installed-ok; then
smoketest_update_status admin "Install of Crowbar failed."
return 1
fi
# Grab the latest crowbar CLI code off the admin node.
(
cd "$CROWBAR_DIR/testing/cli"
curlargs=(-L -o - --connect-timeout 120 --max-time 120)
[[ $CROWBAR_KEY ]] && curlargs+=(-u "$CROWBAR_KEY" --digest) || :
http_proxy='' curl "${curlargs[@]}" \
"http://${CROWBAR_IP}:3000/support/get_cli" | tar xzvf -
# make run_on
cat >"$CROWBAR_DIR/testing/cli/run_on" <<"EOF"
#!/bin/bash
target="$1"
shift
[[ $1 ]] || exit 1
ip_re='([0-9]{1,3}\.){3}[0-9]{1,3}'
addr_re='Address: ([0-9.]+)'
if ! [[ $target =~ $ip_re ]]; then
if [[ ! $(nslookup "$target" "$CROWBAR_IP") =~ $addr_re ]]; then
echo "Could not find IP address $target" >&2
exit 1
fi
target=${BASH_REMATCH[1]}
fi
ssh -q -o "UserKnownHostsFile /dev/null" -o "StrictHostKeyChecking no" \
-o "PasswordAuthentication no" root@"$target" "$@"
EOF
# make a cheesy knife command
cat >"$CROWBAR_DIR/testing/cli/knife" <<"EOF"
#!/bin/bash
run_on "$CROWBAR_IP" knife "$@"
EOF
chmod 755 "$CROWBAR_DIR/testing/cli"/*
) || :
# if there are any tests we should run on the admin node, do them now.
if run_admin_hooks admin_deployed; then
smoketest_update_status admin "Admin deploy tests passed"
else
smoketest_update_status admin "Admin deploy tests failed."
local ret=1
fi
# If we were asked to pause, do so.
if [[ $pause_after_admin ]]; then
pause
fi
smoketest_update_status admin "Node deployed."
smoketest_get_cluster_logs admin-deployed
return ${ret:-0}
}
# Deregister a node from Crowbar.
# The next time it boots Crowbar will restart the discovery cycle for it.
# $1 = the hostname of the node to deregister.
deregister_slave() {
# make Chef forget about the node.
crowbar machines delete "$1"
}
# Create infrastructure for our slave nodes, but do not start them.
create_slaves() {
local nodename
# First, handle virtual machines.
for nodename in ${SMOKETEST_VIRT_NODES[@]}; do
local vm_nics=()
makenics "$nodename"
# Clear out our status, if one is lying around.
>"$smoketest_dir/$nodename.status"
>"$smoketest_dir/$nodename.reset"
qemu-img create -f raw "$smoketest_dir/$nodename.disk" 10G &>/dev/null
# Create a second and third image for Swift testing
qemu-img create -f qcow2 "$smoketest_dir/$nodename-01.disk" 4G &>/dev/null
qemu-img create -f qcow2 "$smoketest_dir/$nodename-02.disk" 4G &>/dev/null
# Run our VMs in the background, looping as we go.
(
trap - 0 INT QUIT TERM
# Keep rebooting as long as the admin node is alive and
# the node has not been explicitly been killed by kill_vm.
count=0
in_reset=false
while [[ -f $smoketest_dir/admin.pid && \
! -f $smoketest_dir/$nodename.killed ]]; do
# If we are in a reset state, spin until the .reset file is gone
# or we were asked to die.
if [[ -f $smoketest_dir/$nodename.reset ]]; then
# If we were asked to reset the node, nuke it.
# For whatever reason if we just nuke the MBR d/i
# complains about the LVM VG even though it should not.
if [[ $in_reset != true ]]; then
deregister_slave ${SMOKETEST_SLAVES["$nodename"]}
sleep 30
qemu-img create -f raw \
"$smoketest_dir/$nodename.disk" 10G &>/dev/null
in_reset=true
fi
sleep 10
continue
fi
in_reset=false
# If we have a valid MBR on our primary disk, boot to it.
# This works around a bug in KVM where you cannot boot to a
# local disk if you are asked to while PXE booting.
if [[ $(hexdump -n 2 -s 0x1fe "$smoketest_dir/$nodename.disk") =~ \
aa55 ]]; then
smoketest_update_status "$nodename" "Booting node to disk ($((count++)))"
if run_kvm -bootc "$nodename"; then
kill_vm "$nodename" exited
else
smoketest_update_status "$nodename" "Node failed to deploy."
kill_vm "$nodename" exited
fi
else
# Otherwise, PXE boot the machine.
smoketest_update_status "$nodename" "PXE booting node ($((count++)))"
if run_kvm -bootn -timeout 1200 "$nodename"; then
kill_vm "$nodename" exited
else
smoketest_update_status "$nodename" "Node failed to deploy"
kill_vm "$nodename" timeout
fi
fi
done
smoketest_update_status "$nodename" "Asked to be killed, giving up."
exit 0
) &
done
# Second, handle our physical machines.
local mac
for mac in "${PHYSICAL_MACS[@]}"; do
nodename="physical-$((i++))"
SMOKETEST_SLAVES["$nodename"]="d${mac//:/-}.${SMOKETEST_DOMAIN}"
> "$smoketest_dir/$nodename.status"
done
}
kill_slave() {
local hname=${SMOKETEST_SLAVES["$1"]}
smoketest_update_status "$1" "Killing $hname"
if [[ $1 =~ virt ]]; then
kill_vm "$1" "$@"
else
crowbar machines shutdown "$1" || :
fi
}
kill_slaves() {
local slave
for slave in "${!SMOKETEST_SLAVES[@]}"; do
kill_slave "$slave" "$@"
done
}
reset_slave() {
if [[ $1 =~ virt ]]; then
> "$smoketest_dir/$1.reset"
fi
kill_slave "$1" reset
}
# Reset nodes from Crowbar's standpoint -- that is,
# remove them from the Chef database and power them off.
# If the node is a VM, place it in the reset state.
reset_slaves() {
local slave
if [[ $develop_mode = true ]]; then
pause "Press any key to reset the slaves."
fi
for slave in "${!SMOKETEST_SLAVES[@]}"; do
reset_slave "$slave"
done
}
# Wake up any slaves that are either powered off or in a reset state.
wakeup_slaves() {
local i=0 slave='' mac=''
for slave in "${!SMOKETEST_SLAVES[@]}"; do
> "$smoketest_dir/$slave.status"
echo "${SMOKETEST_SLAVES["$slave"]}" >"$smoketest_dir/$slave.hostname"
case $slave in
virt*) rm -f "$smoketest_dir/$slave.reset";;