diff options
-rw-r--r-- | rumba/model.py | 35 | ||||
-rw-r--r-- | rumba/testbeds/qemu.py | 22 |
2 files changed, 49 insertions, 8 deletions
diff --git a/rumba/model.py b/rumba/model.py index 094babc..bb7d213 100644 --- a/rumba/model.py +++ b/rumba/model.py @@ -31,6 +31,7 @@ import time import rumba.log as log from rumba import ssh_support +from rumba.ssh_support import SSHException logger = log.get_logger(__name__) @@ -851,14 +852,24 @@ class ClientProcess(Client): opt_str = self.options if self.options is not None else "" cmd = "./startup.sh %s %s" % (self.ap, opt_str) self.running = True - self.pid = self.node.execute_command(cmd) + try: + self.pid = self.node.execute_command(cmd) + except SSHException: + logger.warn('Could not start client %s on node %s.', + self.ap, node.name) + logger.debug('Client app %s on node %s got pid %s.', + self.ap, self.node.name, self.pid) def stop(self): logger.debug( 'Killing client %s on node %s.', self.ap, self.node.name ) - self.node.execute_command("kill %s" % self.pid) + try: + self.node.execute_command("kill %s" % self.pid) + except SSHException: + logger.warn('Could not kill client %s on node %s.', + self.ap, self.node.name) def check(self): """Check if the process should keep running, stop it if not, @@ -868,6 +879,7 @@ class ClientProcess(Client): return False if now - self.start_time >= self.duration: self.stop() + self.running = False return False return True @@ -942,7 +954,11 @@ class Server: 'Starting server %s on node %s with logfile %s.', self.ap, node.name, logfile ) - self.pids[node] = (node.execute_commands(cmds)) + try: + self.pids[node] = (node.execute_commands(cmds)) + except SSHException: + logger.warn('Could not start server %s on node %s.', + self.ap, node.name) def stop(self): for node, pid in self.pids.items(): @@ -950,7 +966,11 @@ class Server: 'Killing server %s on node %s.', self.ap, node.name ) - node.execute_command("kill %s" % pid) + try: + node.execute_command("kill %s" % pid) + except SSHException: + logger.warn('Could not kill server %s on node %s.', + self.ap, node.name) # Base class for ARCFIRE storyboards @@ -997,8 +1017,11 @@ class StoryBoard: client_node = random.choice(self.client_nodes) new_client.run(client_node) self.active_clients.append(new_client) - self.active_clients = \ - [x for x in self.active_clients if x.check()] + surviving = [] + for x in self.active_clients: + if x.check(): + surviving.append(x) + self.active_clients = surviving time.sleep(self.DEFAULT_INTERVAL) finally: for client in self.active_clients: diff --git a/rumba/testbeds/qemu.py b/rumba/testbeds/qemu.py index 80a3d4c..4d6804d 100644 --- a/rumba/testbeds/qemu.py +++ b/rumba/testbeds/qemu.py @@ -88,6 +88,8 @@ class Testbed(mod.Testbed): errors += 1 if not ignore_errors: break + except KeyboardInterrupt as e: + error_queue.put('Interrupted') if errors == 0: results_queue.put("Command chain ran correctly") else: @@ -185,6 +187,7 @@ class Testbed(mod.Testbed): 'brctl addif %(br)s %(tap)s' % {'tap': tap_id, 'br': shim.name} ).split('\n') + command_list += ['sleep 15'] if shim.link_speed > 0: speed = '%dmbit' % shim.link_speed @@ -229,14 +232,21 @@ class Testbed(mod.Testbed): # Wait for all processes to be over. total_processes = len(shim_processes) - max_waiting_time = 2 * total_processes + max_waiting_time = 4 * total_processes over_processes = 0 - while max_waiting_time > 0 and over_processes < total_processes: + while over_processes < total_processes and max_waiting_time > 0: + # Check for deadlock + # Check for errors if not e_queue.empty(): error_str = str(e_queue.get()) logger.error('Testbed instantiation failed: %s', error_str) + + # Wait for the running processes to quit before swapping out + for process in shim_processes: + process.join() + raise Exception('Failure: %s' % error_str) try: # Check for results @@ -248,6 +258,14 @@ class Testbed(mod.Testbed): except: max_waiting_time -= 1 + if max_waiting_time == 0: + logger.error("Swap in is in deadlock, aborting.") + for process in shim_processes: + process.terminate() + time.sleep(0.1) + process.join() + raise Exception('Swap in is in deadlock') + for process in shim_processes: process.join() |