aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSander Vrijders <sander.vrijders@ugent.be>2018-05-08 17:05:17 +0200
committerSander Vrijders <sander.vrijders@ugent.be>2018-05-14 11:03:34 +0200
commit2e250f0235b80a052ec14f1da35dd2df93d8327c (patch)
tree1cde6fdc295f9f17911d9fc7b066a80afa99f12f
parent031cdc38878686d7ae5532f3968e2b4d428ab7cd (diff)
downloadrumba-2e250f0235b80a052ec14f1da35dd2df93d8327c.tar.gz
rumba-2e250f0235b80a052ec14f1da35dd2df93d8327c.zip
rumba: Reconnect SSH in case of SIGPIPE
This reconnects the SSH connection in case the other side unilaterally closed the connection. This often happens with the exogeni testbed.
-rw-r--r--rumba/prototypes/ouroboros.py2
-rw-r--r--rumba/ssh_support.py77
2 files changed, 49 insertions, 30 deletions
diff --git a/rumba/prototypes/ouroboros.py b/rumba/prototypes/ouroboros.py
index 016183e..1402180 100644
--- a/rumba/prototypes/ouroboros.py
+++ b/rumba/prototypes/ouroboros.py
@@ -113,7 +113,7 @@ class Experiment(mod.Experiment):
" " + fs_loc,
"cd " + fs_loc + " && mkdir build && cd build && " +
"cmake -DCMAKE_BUILD_TYPE=Debug -DIPCP_FLOW_STATS=True " +
- "-DCONNECT_TIMEOUT=60000 " +
+ "-DCONNECT_TIMEOUT=60000 -DIPCP_CONN_WAIT_DIR=True " +
"-DREG_TIMEOUT=60000 -DQUERY_TIMEOUT=4000 .. && " +
"sudo make install -j$(nproc)"]
diff --git a/rumba/ssh_support.py b/rumba/ssh_support.py
index 1a95d7f..cfe8c26 100644
--- a/rumba/ssh_support.py
+++ b/rumba/ssh_support.py
@@ -29,6 +29,8 @@ import paramiko
import re
import time
+from paramiko.ssh_exception import *
+
import rumba.log as log
# Fix input reordering
@@ -118,6 +120,41 @@ def ssh_connect(hostname, port, username, password, time_out, proxy_server):
if retry == max_retries:
raise SSHException('Failed to connect to host')
+def ssh_connect_check(ssh_config, testbed, time_out, force_reconnect=False):
+ if ssh_config.client is None or \
+ not ssh_config.client.get_transport().is_active or force_reconnect:
+ client, proxy_client = ssh_connect(ssh_config.hostname, ssh_config.port,
+ testbed.username, testbed.password,
+ time_out, ssh_config.proxy_server)
+ ssh_config.client = client
+ ssh_config.proxy_client = proxy_client
+
+def ssh_chan(ssh_config, testbed, time_out):
+ retry = 0
+ max_retries = 2
+ while retry < max_retries:
+ try:
+ chan = ssh_config.client.get_transport().open_session()
+ return chan
+ except paramiko.ssh_exception.SSHException as e:
+ if str(e) == 'No existing session' or \
+ str(e) == 'SSH session not active':
+ logger.debug('Failed to open transport: ' + str(e))
+ retry += 1
+ logger.debug('Trying to reconnect: ' +
+ str(retry) + '/' + str(max_retries) + ' retries.')
+ ssh_connect_check(ssh_config, testbed, time_out,
+ force_reconnect=True)
+ else:
+ raise e
+
+ if retry == max_retries:
+ raise SSHException('Unable to re-establish SSH connection.')
+
+def ssh_sftp(ssh_config, testbed):
+ chan = ssh_chan(ssh_config, testbed, None)
+ chan.invoke_subsystem("sftp")
+ return paramiko.sftp_client.SFTPClient(chan)
def execute_proxy_commands(testbed, ssh_config, commands, time_out=3):
"""
@@ -177,13 +214,7 @@ def execute_commands(testbed, ssh_config, commands, time_out=3):
be used when no timeout is needed
"""
- if ssh_config.client is None:
- client, proxy_client = ssh_connect(ssh_config.hostname, ssh_config.port,
- testbed.username, testbed.password,
- time_out, ssh_config.proxy_server)
- ssh_config.client = client
- ssh_config.proxy_client = proxy_client
-
+ ssh_connect_check(ssh_config, testbed, time_out)
o = ""
for command in commands:
logger.debug("%s@%s:%s >> %s" % (testbed.username,
@@ -192,7 +223,9 @@ def execute_commands(testbed, ssh_config, commands, time_out=3):
command))
envars = '. /etc/profile;'
command = envars + ' ' + command
- chan = ssh_config.client.get_transport().open_session()
+
+ chan = ssh_chan(ssh_config, testbed, time_out)
+
stdout = chan.makefile()
stderr = chan.makefile_stderr()
try:
@@ -246,12 +279,8 @@ def write_text_to_file(testbed, ssh_config, text, file_name):
@param text: string to be written in file
@param file_name: file name (including full path) on the host
"""
- if ssh_config.client is None:
- client, proxy_client = ssh_connect(ssh_config.hostname, ssh_config.port,
- testbed.username, testbed.password,
- None, ssh_config.proxy_server)
- ssh_config.client = client
- ssh_config.proxy_client = proxy_client
+
+ ssh_connect_check(ssh_config, testbed, time_out=None)
cmd = "touch " + file_name + "; chmod a+rwx " + file_name
@@ -262,7 +291,7 @@ def write_text_to_file(testbed, ssh_config, text, file_name):
if err != "":
logger.error(err)
- sftp_client = ssh_config.client.open_sftp()
+ sftp_client = ssh_sftp(ssh_config, testbed)
remote_file = sftp_client.open(file_name, 'w')
remote_file.write(text)
@@ -284,15 +313,10 @@ def copy_files_to_testbed(testbed, ssh_config, paths, destination):
if destination is not '' and not destination.endswith('/'):
destination = destination + '/'
- if ssh_config.client is None:
- client, proxy_client = ssh_connect(ssh_config.hostname, ssh_config.port,
- testbed.username, testbed.password,
- None, ssh_config.proxy_server)
- ssh_config.client = client
- ssh_config.proxy_client = proxy_client
+ ssh_connect_check(ssh_config, testbed, time_out=None)
try:
- sftp_client = ssh_config.client.open_sftp()
+ sftp_client = ssh_sftp(ssh_config, testbed)
for path in paths:
file_name = os.path.basename(path)
@@ -341,15 +365,10 @@ def copy_files_from_testbed(testbed, ssh_config, paths,
cmd = "sudo %s" % cmd
execute_command(testbed, ssh_config, cmd)
- if ssh_config.client is None:
- client, proxy_client = ssh_connect(ssh_config.hostname, ssh_config.port,
- testbed.username, testbed.password,
- None, ssh_config.proxy_server)
- ssh_config.client = client
- ssh_config.proxy_client = proxy_client
+ ssh_connect_check(ssh_config, testbed, time_out=None)
try:
- sftp_client = ssh_config.client.open_sftp()
+ sftp_client = ssh_sftp(ssh_config, testbed)
for path in paths:
file_name = os.path.basename(path)