Skip to content

Commit 182ab64

Browse files
Abhinandan Prateekabhinandanprateek
authored andcommitted
CLOUDSTACK-9323: Fix Cancel maintenance so that if maintenance is cancelled the host come back to normal state gracefully.
Added marvin tests for host maintennace.
1 parent 8df8094 commit 182ab64

File tree

3 files changed

+320
-5
lines changed

3 files changed

+320
-5
lines changed

server/src/com/cloud/resource/ResourceManagerImpl.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2112,11 +2112,13 @@ private boolean doCancelMaintenance(final long hostId) {
21122112

21132113
/* TODO: move to listener */
21142114
_haMgr.cancelScheduledMigrations(host);
2115+
2116+
boolean vms_migrating = false;
21152117
final List<VMInstanceVO> vms = _haMgr.findTakenMigrationWork();
21162118
for (final VMInstanceVO vm : vms) {
2117-
if (vm != null && vm.getHostId() != null && vm.getHostId() == hostId) {
2118-
s_logger.info("Unable to cancel migration because the vm is being migrated: " + vm);
2119-
return false;
2119+
if (vm.getHostId() != null && vm.getHostId() == hostId) {
2120+
s_logger.warn("Unable to cancel migration because the vm is being migrated: " + vm + ", hostId = " + hostId);
2121+
vms_migrating = true;
21202122
}
21212123
}
21222124

@@ -2125,7 +2127,7 @@ private boolean doCancelMaintenance(final long hostId) {
21252127
_agentMgr.pullAgentOutMaintenance(hostId);
21262128

21272129
// for kvm, need to log into kvm host, restart cloudstack-agent
2128-
if (host.getHypervisorType() == HypervisorType.KVM || host.getHypervisorType() == HypervisorType.LXC) {
2130+
if ((host.getHypervisorType() == HypervisorType.KVM && !vms_migrating) || host.getHypervisorType() == HypervisorType.LXC) {
21292131

21302132
final boolean sshToAgent = Boolean.parseBoolean(_configDao.getValue(Config.KvmSshToAgentEnabled.key()));
21312133
if (!sshToAgent) {
Lines changed: 292 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,292 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
""" BVT tests for Hosts Maintenance
18+
"""
19+
20+
# Import Local Modules
21+
from marvin.codes import FAILED
22+
from marvin.cloudstackTestCase import *
23+
from marvin.cloudstackAPI import *
24+
from marvin.lib.utils import *
25+
from marvin.lib.base import *
26+
from marvin.lib.common import *
27+
from nose.plugins.attrib import attr
28+
29+
from time import sleep
30+
31+
_multiprocess_shared_ = False
32+
33+
34+
class TestHostMaintenance(cloudstackTestCase):
35+
36+
def setUp(self):
37+
self.logger = logging.getLogger('TestHM')
38+
self.stream_handler = logging.StreamHandler()
39+
self.logger.setLevel(logging.DEBUG)
40+
self.logger.addHandler(self.stream_handler)
41+
self.apiclient = self.testClient.getApiClient()
42+
self.hypervisor = self.testClient.getHypervisorInfo()
43+
self.dbclient = self.testClient.getDbConnection()
44+
self.services = self.testClient.getParsedTestDataConfig()
45+
self.zone = get_zone(self.apiclient, self.testClient.getZoneForTests())
46+
self.pod = get_pod(self.apiclient, self.zone.id)
47+
self.cleanup = []
48+
self.services = {
49+
"service_offering": {
50+
"name": "Ultra Tiny Instance",
51+
"displaytext": "Ultra Tiny Instance",
52+
"cpunumber": 1,
53+
"cpuspeed": 100,
54+
"memory": 128,
55+
},
56+
"vm": {
57+
"username": "root",
58+
"password": "password",
59+
"ssh_port": 22,
60+
# Hypervisor type should be same as
61+
# hypervisor type of cluster
62+
"privateport": 22,
63+
"publicport": 22,
64+
"protocol": 'TCP',
65+
},
66+
"natrule": {
67+
"privateport": 22,
68+
"publicport": 22,
69+
"startport": 22,
70+
"endport": 22,
71+
"protocol": "TCP",
72+
"cidrlist": '0.0.0.0/0',
73+
},
74+
"ostype": 'CentOS 5.3 (64-bit)',
75+
"sleep": 60,
76+
"timeout": 10,
77+
}
78+
79+
80+
def tearDown(self):
81+
try:
82+
# Clean up, terminate the created templates
83+
cleanup_resources(self.apiclient, self.cleanup)
84+
85+
except Exception as e:
86+
raise Exception("Warning: Exception during cleanup : %s" % e)
87+
88+
return
89+
90+
def createVMs(self, hostId, number):
91+
92+
self.template = get_template(
93+
self.apiclient,
94+
self.zone.id,
95+
self.services["ostype"]
96+
)
97+
98+
if self.template == FAILED:
99+
assert False, "get_template() failed to return template with description %s" % self.services["ostype"]
100+
101+
self.logger.debug("Using template %s " % self.template.id)
102+
103+
self.service_offering = ServiceOffering.create(
104+
self.apiclient,
105+
self.services["service_offering"]
106+
)
107+
self.logger.debug("Using service offering %s " % self.service_offering.id)
108+
109+
vms=[]
110+
for i in range(0, number):
111+
self.services["vm"]["zoneid"] = self.zone.id
112+
self.services["vm"]["template"] = self.template.id
113+
self.services["vm"]["displayname"] = 'vm' + str(i)
114+
self.services["vm"]["hypervisor"] = self.hypervisor
115+
vm = VirtualMachine.create(
116+
self.apiclient,
117+
self.services["vm"],
118+
serviceofferingid=self.service_offering.id,
119+
hostid=hostId
120+
)
121+
vms.append(vm)
122+
self.cleanup.append(vm)
123+
self.logger.debug("VM create = {}".format(vm.id))
124+
return vms
125+
126+
def checkVmMigratingOnHost(self, hostId):
127+
vm_migrating=False
128+
listVms1 = VirtualMachine.list(
129+
self.apiclient,
130+
hostid=hostId
131+
)
132+
133+
if (listVms1 is not None):
134+
self.logger.debug('Vms found = {} '.format(len(listVms1)))
135+
for vm in listVms1:
136+
if (vm.state == "Migrating"):
137+
self.logger.debug('VirtualMachine on Hyp id = {} is in {}'.format(vm.id, vm.state))
138+
vm_migrating=True
139+
break
140+
141+
return (vm_migrating, None)
142+
143+
def checkNoVmMigratingOnHost(self, hostId):
144+
no_vm_migrating=True
145+
listVms1 = VirtualMachine.list(
146+
self.apiclient,
147+
hostid=hostId
148+
)
149+
150+
if (listVms1 is not None):
151+
self.logger.debug('Vms found = {} '.format(len(listVms1)))
152+
for vm in listVms1:
153+
if (vm.state == "Migrating"):
154+
self.logger.debug('VirtualMachine on Hyp id = {} is in {}'.format(vm.id, vm.state))
155+
no_vm_migrating=False
156+
break
157+
158+
return (no_vm_migrating, None)
159+
160+
def noOfVMsOnHost(self, hostId):
161+
listVms = VirtualMachine.list(
162+
self.apiclient,
163+
hostid=hostId
164+
)
165+
no_of_vms=0
166+
if (listVms is not None):
167+
for vm in listVms:
168+
self.logger.debug('VirtualMachine on Hyp 1 = {}'.format(vm.id))
169+
no_of_vms=no_of_vms+1
170+
171+
return no_of_vms
172+
173+
def hostPrepareAndCancelMaintenance(self, target_host_id, other_host_id, checkVMMigration):
174+
175+
cmd = prepareHostForMaintenance.prepareHostForMaintenanceCmd()
176+
cmd.id = target_host_id
177+
response = self.apiclient.prepareHostForMaintenance(cmd)
178+
179+
self.logger.debug('Host with id {} is in prepareHostForMaintenance'.format(target_host_id))
180+
181+
vm_migrating = wait_until(1, 10, checkVMMigration, other_host_id)
182+
183+
cmd = cancelHostMaintenance.cancelHostMaintenanceCmd()
184+
cmd.id = target_host_id
185+
response = self.apiclient.cancelHostMaintenance(cmd)
186+
187+
self.logger.debug('Host with id {} is in cancelHostMaintenance'.format(target_host_id) )
188+
189+
return vm_migrating
190+
191+
@attr(
192+
tags=[
193+
"advanced",
194+
"advancedns",
195+
"smoke",
196+
"basic",
197+
"eip",
198+
"sg"],
199+
required_hardware="true")
200+
def test_01_cancel_host_maintenace_with_no_migration_jobs(self):
201+
listHost = Host.list(
202+
self.apiclient,
203+
type='Routing',
204+
zoneid=self.zone.id,
205+
podid=self.pod.id,
206+
)
207+
for host in listHost:
208+
self.logger.debug('1 Hypervisor = {}'.format(host.id))
209+
210+
211+
if (len(listHost) < 2):
212+
raise unittest.SkipTest("Cancel host maintenance when VMs are migrating should be tested for 2 or more hosts");
213+
return
214+
215+
vm_migrating=False
216+
217+
try:
218+
219+
vm_migrating = self.hostPrepareAndCancelMaintenance(listHost[0].id, listHost[1].id, self.checkNoVmMigratingOnHost)
220+
221+
vm_migrating = self.hostPrepareAndCancelMaintenance(listHost[1].id, listHost[0].id, self.checkNoVmMigratingOnHost)
222+
223+
except Exception as e:
224+
self.logger.debug("Exception {}".format(e))
225+
self.fail("Cancel host maintenance failed {}".format(e[0]))
226+
227+
228+
if (vm_migrating == True):
229+
raise unittest.SkipTest("VMs are migrating and the test will not be able to check the conditions the test is intended for");
230+
231+
232+
return
233+
234+
235+
236+
237+
@attr(
238+
tags=[
239+
"advanced",
240+
"advancedns",
241+
"smoke",
242+
"basic",
243+
"eip",
244+
"sg"],
245+
required_hardware="true")
246+
def test_02_cancel_host_maintenace_with_migration_jobs(self):
247+
248+
listHost = Host.list(
249+
self.apiclient,
250+
type='Routing',
251+
zoneid=self.zone.id,
252+
podid=self.pod.id,
253+
)
254+
for host in listHost:
255+
self.logger.debug('2 Hypervisor = {}'.format(host.id))
256+
257+
if (len(listHost) != 2):
258+
raise unittest.SkipTest("Cancel host maintenance when VMs are migrating can only be tested with 2 hosts");
259+
return
260+
261+
262+
no_of_vms = self.noOfVMsOnHost(listHost[0].id)
263+
264+
no_of_vms = no_of_vms + self.noOfVMsOnHost(listHost[1].id)
265+
266+
if no_of_vms < 5:
267+
self.logger.debug("Create VMs as there are not enough vms to check host maintenance")
268+
no_vm_req = 5 - no_of_vms
269+
if (no_vm_req > 0):
270+
self.logger.debug("Creating vms = {}".format(no_vm_req))
271+
self.vmlist = self.createVMs(listHost[0].id, no_vm_req)
272+
273+
vm_migrating=False
274+
275+
try:
276+
277+
vm_migrating = self.hostPrepareAndCancelMaintenance(listHost[0].id, listHost[1].id, self.checkVmMigratingOnHost)
278+
279+
vm_migrating = self.hostPrepareAndCancelMaintenance(listHost[1].id, listHost[0].id, self.checkVmMigratingOnHost)
280+
281+
except Exception as e:
282+
self.logger.debug("Exception {}".format(e))
283+
self.fail("Cancel host maintenance failed {}".format(e[0]))
284+
285+
286+
if (vm_migrating == False):
287+
raise unittest.SkipTest("No VM is migrating and the test will not be able to check the conditions the test is intended for");
288+
289+
290+
return
291+
292+

tools/marvin/marvin/lib/utils.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -520,4 +520,25 @@ def verifyRouterState(apiclient, routerid, allowedstates):
520520
if routers[0].state.lower() not in allowedstates:
521521
return [FAIL, "state of the router should be in %s but is %s" %
522522
(allowedstates, routers[0].state)]
523-
return [PASS, None]
523+
return [PASS, None]
524+
525+
526+
527+
def wait_until(retry_interval=2, no_of_times=2, callback=None, *callback_args):
528+
""" Utility method to try out the callback method at most no_of_times with a interval of retry_interval,
529+
Will return immediately if callback returns True. The callback method should be written to return a list of values first being a boolean """
530+
531+
if callback is None:
532+
raise ("Bad value for callback method !")
533+
534+
wait_result = False
535+
for i in range(0,no_of_times):
536+
time.sleep(retry_interval)
537+
wait_result, return_val = callback(*callback_args)
538+
if not(isinstance(wait_result, bool)):
539+
raise ("Bad parameter returned from callback !")
540+
if wait_result :
541+
break
542+
543+
return wait_result, return_val
544+

0 commit comments

Comments
 (0)