summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--openshift-storage-libs/openshiftstoragelibs/node_ops.py47
-rw-r--r--tests/functional/test_node_restart.py92
2 files changed, 70 insertions, 69 deletions
diff --git a/openshift-storage-libs/openshiftstoragelibs/node_ops.py b/openshift-storage-libs/openshiftstoragelibs/node_ops.py
new file mode 100644
index 00000000..fb4aaa26
--- /dev/null
+++ b/openshift-storage-libs/openshiftstoragelibs/node_ops.py
@@ -0,0 +1,47 @@
+import time
+
+from glusto.core import Glusto as g
+
+from openshiftstoragelibs import exceptions
+from openshiftstoragelibs import waiter
+
+
+def node_reboot_by_command(node, timeout=600, wait_step=10):
+ """Reboot node and wait to start for given timeout.
+
+ Args:
+ node (str) : Node which needs to be rebooted.
+ timeout (int) : Seconds to wait before node to be started.
+ wait_step (int): Interval in seconds to wait before checking
+ status of node again.
+ """
+ cmd = "sleep 3; /sbin/shutdown -r now 'Reboot triggered by Glusto'"
+ ret, out, err = g.run(node, cmd)
+ if ret != 255:
+ err_msg = "failed to reboot host '%s' error %s" % (node, err)
+ g.log.error(err_msg)
+ raise AssertionError(err_msg)
+
+ try:
+ g.ssh_close_connection(node)
+ except Exception as e:
+ g.log.error("failed to close connection with host %s "
+ "with error: %s" % (node, e))
+ raise
+
+ # added sleep as node will restart after 3 sec
+ time.sleep(3)
+
+ for w in waiter.Waiter(timeout=timeout, interval=wait_step):
+ try:
+ if g.rpyc_get_connection(node, user="root"):
+ g.rpyc_close_connection(node, user="root")
+ return
+ except Exception as err:
+ g.log.info("exception while getting connection: '%s'" % err)
+
+ if w.expired:
+ error_msg = ("exceeded timeout %s sec, node '%s' is "
+ "not reachable" % (timeout, node))
+ g.log.error(error_msg)
+ raise exceptions.ExecutionError(error_msg)
diff --git a/tests/functional/test_node_restart.py b/tests/functional/test_node_restart.py
index 2452c616..3d342af6 100644
--- a/tests/functional/test_node_restart.py
+++ b/tests/functional/test_node_restart.py
@@ -1,17 +1,16 @@
-import time
from unittest import skip
from glusto.core import Glusto as g
from openshiftstoragelibs.baseclass import BaseClass
from openshiftstoragelibs.exceptions import ExecutionError
+from openshiftstoragelibs.node_ops import node_reboot_by_command
from openshiftstoragelibs.openshift_ops import (
check_service_status_on_pod,
get_ocp_gluster_pod_details,
oc_rsh,
wait_for_pod_be_ready,
)
-from openshiftstoragelibs.waiter import Waiter
class TestNodeRestart(BaseClass):
@@ -68,76 +67,30 @@ class TestNodeRestart(BaseClass):
)
self.assertEqual(ret, 0, err_msg % (second_cmd, err))
- def _wait_for_gluster_pod_to_be_ready(self):
- for gluster_pod in self.gluster_pod_list:
- for w in Waiter(timeout=600, interval=10):
- try:
- success = wait_for_pod_be_ready(
- self.oc_node, gluster_pod, timeout=1, wait_step=1
- )
- if success:
- break
- except ExecutionError as e:
- g.log.info("exception %s while validating gluster "
- "pod %s" % (e, gluster_pod))
-
- if w.expired:
- error_msg = ("exceeded timeout 600 sec, pod '%s' is "
- "not in 'running' state" % gluster_pod)
- g.log.error(error_msg)
- raise ExecutionError(error_msg)
-
- def _node_reboot(self):
- storage_hostname = (g.config["gluster_servers"]
- [self.gluster_servers[0]]["storage"])
-
- cmd = "sleep 3; /sbin/shutdown -r now 'Reboot triggered by Glusto'"
- ret, out, err = g.run(storage_hostname, cmd)
-
- self.addCleanup(self._wait_for_gluster_pod_to_be_ready)
-
- if ret != 255:
- err_msg = "failed to reboot host %s error: %s" % (
- storage_hostname, err)
- g.log.error(err_msg)
- raise AssertionError(err_msg)
-
- try:
- g.ssh_close_connection(storage_hostname)
- except Exception as e:
- g.log.error("failed to close connection with host %s"
- " with error: %s" % (storage_hostname, e))
- raise
-
- # added sleep as node will restart after 3 sec
- time.sleep(3)
-
- for w in Waiter(timeout=600, interval=10):
- try:
- if g.rpyc_get_connection(storage_hostname, user="root"):
- g.rpyc_close_connection(storage_hostname, user="root")
- break
- except Exception as err:
- g.log.info("exception while getting connection: '%s'" % err)
-
- if w.expired:
- error_msg = ("exceeded timeout 600 sec, node '%s' is "
- "not reachable" % storage_hostname)
- g.log.error(error_msg)
- raise ExecutionError(error_msg)
+ def reboot_gluster_node_and_wait_for_services(self):
+ gluster_node_ip = (
+ g.config["gluster_servers"][self.gluster_servers[0]]["storage"])
+ gluster_pod = filter(
+ lambda pod: (pod["pod_host_ip"] == gluster_node_ip),
+ get_ocp_gluster_pod_details(self.oc_node))
+ if not gluster_pod:
+ raise ExecutionError(
+ "Gluster pod Host IP '%s' not matched." % gluster_node_ip)
+ gluster_pod = gluster_pod[0]["pod_name"]
+ self.addCleanup(
+ wait_for_pod_be_ready, self.oc_node, gluster_pod)
+ node_reboot_by_command(gluster_node_ip, timeout=600, wait_step=10)
# wait for the gluster pod to be in 'Running' state
- self._wait_for_gluster_pod_to_be_ready()
+ wait_for_pod_be_ready(self.oc_node, gluster_pod)
# glusterd and gluster-blockd service should be up and running
- service_names = ("glusterd", "gluster-blockd", "tcmu-runner")
- for gluster_pod in self.gluster_pod_list:
- for service in service_names:
- g.log.info("gluster_pod - '%s' : gluster_service '%s'" % (
- gluster_pod, service))
- check_service_status_on_pod(
- self.oc_node, gluster_pod, service, "active", "running"
- )
+ services = (
+ ("glusterd", "running"), ("gluster-blockd", "running"),
+ ("tcmu-runner", "running"), ("gluster-block-target", "exited"))
+ for service, state in services:
+ check_service_status_on_pod(
+ self.oc_node, gluster_pod, service, "active", state)
@skip("Blocked by BZ-1652913")
def test_node_restart_check_volume(self):
@@ -145,7 +98,8 @@ class TestNodeRestart(BaseClass):
fstab_cmd = "grep '%s' /var/lib/heketi/fstab"
self._check_fstab_and_df_entries(df_cmd, fstab_cmd)
- self._node_reboot()
+ # reboot gluster node
+ self.reboot_gluster_node_and_wait_for_services()
fstab_cmd = ("grep '/var/lib/heketi' /var/lib/heketi/fstab "
"| cut -f2 -d ' '")