From b3e733957d1ab71a783e2049a52989f31bbbe2b1 Mon Sep 17 00:00:00 2001 From: vamahaja Date: Thu, 9 Apr 2020 16:38:47 +0530 Subject: [LibFix] Fix 'node_reboot_by_command' library Fix consists of - 1. Currently waiter in 'node_reboot_by_command' function uses 'g.rpyc_get_connection' method to check for conection, which causing issue. Use 'wait_for_ssh_connection' method in same library which uses 'g.run' method. 2. Add parameter 'wait_for_connection' Change-Id: I81aecb126b1e4914a2dfb6dc8901da0a3a4329d0 Signed-off-by: vamahaja --- .../openshiftstoragelibs/node_ops.py | 68 +++++++++------------- 1 file changed, 27 insertions(+), 41 deletions(-) diff --git a/openshift-storage-libs/openshiftstoragelibs/node_ops.py b/openshift-storage-libs/openshiftstoragelibs/node_ops.py index 74e67998..943ad194 100644 --- a/openshift-storage-libs/openshiftstoragelibs/node_ops.py +++ b/openshift-storage-libs/openshiftstoragelibs/node_ops.py @@ -13,47 +13,6 @@ from openshiftstoragelibs import waiter CLOUD_PROVIDER = None -def node_reboot_by_command(node, timeout=600, wait_step=10): - """Reboot node and wait to start for given timeout. - - Args: - node (str) : Node which needs to be rebooted. - timeout (int) : Seconds to wait before node to be started. - wait_step (int): Interval in seconds to wait before checking - status of node again. - """ - cmd = "sleep 3; /sbin/shutdown -r now 'Reboot triggered by Glusto'" - ret, out, err = g.run(node, cmd) - if ret != 255: - err_msg = "failed to reboot host '%s' error %s" % (node, err) - g.log.error(err_msg) - raise AssertionError(err_msg) - - try: - g.ssh_close_connection(node) - except Exception as e: - g.log.error("failed to close connection with host %s " - "with error: %s" % (node, e)) - raise - - # added sleep as node will restart after 3 sec - time.sleep(3) - - for w in waiter.Waiter(timeout=timeout, interval=wait_step): - try: - if g.rpyc_get_connection(node, user="root"): - g.rpyc_close_connection(node, user="root") - return - except Exception as err: - g.log.info("exception while getting connection: '%s'" % err) - - if w.expired: - error_msg = ("exceeded timeout %s sec, node '%s' is " - "not reachable" % (timeout, node)) - g.log.error(error_msg) - raise exceptions.ExecutionError(error_msg) - - def wait_for_ssh_connection(hostname, timeout=600, interval=10): """Wait for ssh conection to be ready within given timeout. @@ -77,6 +36,33 @@ def wait_for_ssh_connection(hostname, timeout=600, interval=10): raise exceptions.CloudProviderError(msg) +def node_reboot_by_command( + node, timeout=600, wait_step=10, wait_for_connection=True): + """Reboot node and wait to start for given timeout. + + Args: + node (str) : Node which needs to be rebooted. + timeout (int) : Seconds to wait before node to be started. + wait_step (int) : Interval in seconds to wait before checking + status of node again. + wait_for_connection : Flag to wait for to check SSH connection to node. + Raises: + CloudProviderError: In case of any failures. + """ + cmd = "sleep 3; /sbin/shutdown -r now 'Reboot triggered by Glusto'" + ret, out, err = g.run(node, cmd) + if ret != 255: + err_msg = "failed to reboot host '{}' error {}".format(node, err) + g.log.error(err_msg) + raise AssertionError(err_msg) + + # added sleep as node will restart after 3 sec + time.sleep(3) + + if wait_for_connection: + wait_for_ssh_connection(node, timeout=timeout, interval=wait_step) + + def _get_cloud_provider(): """Gather cloud provider facts""" -- cgit