aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--rumba/model.py35
-rw-r--r--rumba/testbeds/qemu.py22
2 files changed, 49 insertions, 8 deletions
diff --git a/rumba/model.py b/rumba/model.py
index 094babc..bb7d213 100644
--- a/rumba/model.py
+++ b/rumba/model.py
@@ -31,6 +31,7 @@ import time
import rumba.log as log
from rumba import ssh_support
+from rumba.ssh_support import SSHException
logger = log.get_logger(__name__)
@@ -851,14 +852,24 @@ class ClientProcess(Client):
opt_str = self.options if self.options is not None else ""
cmd = "./startup.sh %s %s" % (self.ap, opt_str)
self.running = True
- self.pid = self.node.execute_command(cmd)
+ try:
+ self.pid = self.node.execute_command(cmd)
+ except SSHException:
+ logger.warn('Could not start client %s on node %s.',
+ self.ap, node.name)
+ logger.debug('Client app %s on node %s got pid %s.',
+ self.ap, self.node.name, self.pid)
def stop(self):
logger.debug(
'Killing client %s on node %s.',
self.ap, self.node.name
)
- self.node.execute_command("kill %s" % self.pid)
+ try:
+ self.node.execute_command("kill %s" % self.pid)
+ except SSHException:
+ logger.warn('Could not kill client %s on node %s.',
+ self.ap, self.node.name)
def check(self):
"""Check if the process should keep running, stop it if not,
@@ -868,6 +879,7 @@ class ClientProcess(Client):
return False
if now - self.start_time >= self.duration:
self.stop()
+ self.running = False
return False
return True
@@ -942,7 +954,11 @@ class Server:
'Starting server %s on node %s with logfile %s.',
self.ap, node.name, logfile
)
- self.pids[node] = (node.execute_commands(cmds))
+ try:
+ self.pids[node] = (node.execute_commands(cmds))
+ except SSHException:
+ logger.warn('Could not start server %s on node %s.',
+ self.ap, node.name)
def stop(self):
for node, pid in self.pids.items():
@@ -950,7 +966,11 @@ class Server:
'Killing server %s on node %s.',
self.ap, node.name
)
- node.execute_command("kill %s" % pid)
+ try:
+ node.execute_command("kill %s" % pid)
+ except SSHException:
+ logger.warn('Could not kill server %s on node %s.',
+ self.ap, node.name)
# Base class for ARCFIRE storyboards
@@ -997,8 +1017,11 @@ class StoryBoard:
client_node = random.choice(self.client_nodes)
new_client.run(client_node)
self.active_clients.append(new_client)
- self.active_clients = \
- [x for x in self.active_clients if x.check()]
+ surviving = []
+ for x in self.active_clients:
+ if x.check():
+ surviving.append(x)
+ self.active_clients = surviving
time.sleep(self.DEFAULT_INTERVAL)
finally:
for client in self.active_clients:
diff --git a/rumba/testbeds/qemu.py b/rumba/testbeds/qemu.py
index 80a3d4c..4d6804d 100644
--- a/rumba/testbeds/qemu.py
+++ b/rumba/testbeds/qemu.py
@@ -88,6 +88,8 @@ class Testbed(mod.Testbed):
errors += 1
if not ignore_errors:
break
+ except KeyboardInterrupt as e:
+ error_queue.put('Interrupted')
if errors == 0:
results_queue.put("Command chain ran correctly")
else:
@@ -185,6 +187,7 @@ class Testbed(mod.Testbed):
'brctl addif %(br)s %(tap)s'
% {'tap': tap_id, 'br': shim.name}
).split('\n')
+ command_list += ['sleep 15']
if shim.link_speed > 0:
speed = '%dmbit' % shim.link_speed
@@ -229,14 +232,21 @@ class Testbed(mod.Testbed):
# Wait for all processes to be over.
total_processes = len(shim_processes)
- max_waiting_time = 2 * total_processes
+ max_waiting_time = 4 * total_processes
over_processes = 0
- while max_waiting_time > 0 and over_processes < total_processes:
+ while over_processes < total_processes and max_waiting_time > 0:
+ # Check for deadlock
+
# Check for errors
if not e_queue.empty():
error_str = str(e_queue.get())
logger.error('Testbed instantiation failed: %s', error_str)
+
+ # Wait for the running processes to quit before swapping out
+ for process in shim_processes:
+ process.join()
+
raise Exception('Failure: %s' % error_str)
try:
# Check for results
@@ -248,6 +258,14 @@ class Testbed(mod.Testbed):
except:
max_waiting_time -= 1
+ if max_waiting_time == 0:
+ logger.error("Swap in is in deadlock, aborting.")
+ for process in shim_processes:
+ process.terminate()
+ time.sleep(0.1)
+ process.join()
+ raise Exception('Swap in is in deadlock')
+
for process in shim_processes:
process.join()