summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--openshift-storage-libs/openshiftstoragelibs/baseclass.py64
-rw-r--r--openshift-storage-libs/openshiftstoragelibs/cloundproviders/vmware.py4
-rw-r--r--openshift-storage-libs/openshiftstoragelibs/openshift_ops.py77
-rw-r--r--tests/functional/gluster_stability/test_gluster_block_stability.py107
4 files changed, 247 insertions, 5 deletions
diff --git a/openshift-storage-libs/openshiftstoragelibs/baseclass.py b/openshift-storage-libs/openshiftstoragelibs/baseclass.py
index ebf5c77..4ca48f0 100644
--- a/openshift-storage-libs/openshiftstoragelibs/baseclass.py
+++ b/openshift-storage-libs/openshiftstoragelibs/baseclass.py
@@ -7,6 +7,7 @@ import six
from openshiftstoragelibs import command
from openshiftstoragelibs.exceptions import (
+ CloudProviderError,
ConfigError,
ExecutionError,
)
@@ -26,6 +27,8 @@ from openshiftstoragelibs.heketi_ops import (
from openshiftstoragelibs.node_ops import (
node_add_iptables_rules,
node_delete_iptables_rules,
+ power_off_vm_by_name,
+ power_on_vm_by_name,
)
from openshiftstoragelibs.openshift_ops import (
get_block_provisioner,
@@ -41,9 +44,12 @@ from openshiftstoragelibs.openshift_ops import (
oc_label,
scale_dcs_pod_amount_and_wait,
switch_oc_project,
+ wait_for_gluster_pod_be_ready_on_specific_node,
+ wait_for_ocp_node_be_ready,
wait_for_pvcs_be_bound,
wait_for_pods_be_ready,
wait_for_resources_absence,
+ wait_for_service_status_on_gluster_pod_or_node,
)
from openshiftstoragelibs.openshift_storage_libs import (
get_iscsi_block_devices_by_path,
@@ -549,6 +555,64 @@ class BaseClass(unittest.TestCase):
g.log.warn(msg)
return super(BaseClass, cls).doClassCleanups()
+ def power_on_vm(self, vm_name):
+ try:
+ power_on_vm_by_name(vm_name)
+ except CloudProviderError as e:
+ # Try to power on VM, if it raises already powered On error
+ # then don't raise exception.
+ if 'VM %s is already powered On' % vm_name not in e:
+ raise
+
+ def power_off_vm(self, vm_name):
+ self.addCleanup(self.power_on_vm, vm_name)
+ power_off_vm_by_name(vm_name)
+
+ def power_on_gluster_node_vm(
+ self, vm_name, gluster_hostname, timeout=300, wait_step=3):
+ # NOTE(Nitin Goyal): Same timeout is used for all functions.
+
+ # Bring up the target node
+ power_on_vm_by_name(vm_name)
+
+ # Wait for gluster node and pod to be ready
+ if self.is_containerized_gluster():
+ wait_for_ocp_node_be_ready(
+ self.node, gluster_hostname,
+ timeout=timeout, wait_step=wait_step)
+ wait_for_gluster_pod_be_ready_on_specific_node(
+ self.node, gluster_hostname,
+ timeout=timeout, wait_step=wait_step)
+
+ # Wait for gluster services to be up
+ for service in ('glusterd', 'gluster-blockd'):
+ wait_for_service_status_on_gluster_pod_or_node(
+ self.node, service, 'active', 'running', gluster_hostname,
+ raise_on_error=False, timeout=timeout, wait_step=wait_step)
+
+ def power_off_gluster_node_vm(
+ self, vm_name, gluster_hostname, timeout=300, wait_step=3):
+ # NOTE(Nitin Goyal): Same timeout is used for all functions.
+
+ # Wait for gluster services to be up in cleanup
+ for service in ('gluster-blockd', 'glusterd'):
+ self.addCleanup(
+ wait_for_service_status_on_gluster_pod_or_node,
+ self.node, service, 'active', 'running', gluster_hostname,
+ raise_on_error=False, timeout=timeout, wait_step=wait_step)
+
+ # Wait for gluster pod to be up and node to be ready in cleanup
+ if self.is_containerized_gluster():
+ self.addCleanup(
+ wait_for_gluster_pod_be_ready_on_specific_node, self.node,
+ gluster_hostname, timeout=timeout, wait_step=wait_step)
+ self.addCleanup(
+ wait_for_ocp_node_be_ready, self.node, gluster_hostname,
+ timeout=timeout, wait_step=wait_step)
+
+ # Power off vm
+ self.power_off_vm(vm_name)
+
class GlusterBlockBaseClass(BaseClass):
"""Base class for gluster-block test cases."""
diff --git a/openshift-storage-libs/openshiftstoragelibs/cloundproviders/vmware.py b/openshift-storage-libs/openshiftstoragelibs/cloundproviders/vmware.py
index fe69b53..1d4d4c3 100644
--- a/openshift-storage-libs/openshiftstoragelibs/cloundproviders/vmware.py
+++ b/openshift-storage-libs/openshiftstoragelibs/cloundproviders/vmware.py
@@ -198,6 +198,8 @@ class VmWare(object):
g.log.error(msg)
raise exceptions.CloudProviderError(msg)
+ # TODO(Nitin Goyal): Need to raise exact same below exception in other
+ # cloud providers as well in future e.g. AWS etc.
if vm[0].summary.runtime.powerState == 'poweredOn':
msg = 'VM %s is already powered On' % vm_name
g.log.error(msg)
@@ -228,6 +230,8 @@ class VmWare(object):
g.log.error(msg)
raise exceptions.CloudProviderError(msg)
+ # TODO(Nitin Goyal): Need to raise exact same below exception in other
+ # cloud providers as well in future e.g. AWS etc.
if vm[0].summary.runtime.powerState == 'poweredOff':
msg = 'VM %s is already powered Off' % vm_name
g.log.error(msg)
diff --git a/openshift-storage-libs/openshiftstoragelibs/openshift_ops.py b/openshift-storage-libs/openshiftstoragelibs/openshift_ops.py
index a20e91e..56b0629 100644
--- a/openshift-storage-libs/openshiftstoragelibs/openshift_ops.py
+++ b/openshift-storage-libs/openshiftstoragelibs/openshift_ops.py
@@ -467,7 +467,8 @@ def oc_delete(ocp_node, rtype, name, raise_on_absence=True):
g.log.info('Deleted resource: %r %r', rtype, name)
-def oc_get_custom_resource(ocp_node, rtype, custom, name=None, selector=None):
+def oc_get_custom_resource(ocp_node, rtype, custom, name=None, selector=None,
+ field_selector=None):
"""Get an OCP resource by custom column names.
Args:
@@ -477,6 +478,8 @@ def oc_get_custom_resource(ocp_node, rtype, custom, name=None, selector=None):
name (str|None): Name of the resource to fetch.
selector (str|list|None): Column Name or list of column
names select to.
+ field_selector (str|list|None): object field selector
+ which looks like following: 'spec.nodeName=foo_node_1'
Returns:
list: List containting data about the resource custom column
Raises:
@@ -489,15 +492,25 @@ def oc_get_custom_resource(ocp_node, rtype, custom, name=None, selector=None):
"""
cmd = ['oc', 'get', rtype, '--no-headers']
- cmd.append('-o=custom-columns=%s' % (
- ','.join(custom) if isinstance(custom, list) else custom))
+ if name:
+ cmd.append(name)
if selector:
cmd.append('--selector %s' % (
','.join(selector) if isinstance(selector, list) else selector))
- if name:
- cmd.append(name)
+ if field_selector:
+ # NOTE(Nitin Goyal): Add field-selector parameters to custom because it
+ # is not supported in ocp 3.6 and 3.7 and filter them via python later
+ custom = ','.join(custom) if isinstance(custom, list) else custom
+ field_selector = (field_selector.split(',') if isinstance(
+ field_selector, six.string_types) else field_selector)
+
+ for fs in field_selector:
+ custom += ',:' + re.split('=|!=', fs)[0]
+
+ cmd.append('-o=custom-columns=%s' % (
+ ','.join(custom) if isinstance(custom, list) else custom))
out = command.cmd_run(cmd, hostname=ocp_node)
@@ -508,7 +521,21 @@ def oc_get_custom_resource(ocp_node, rtype, custom, name=None, selector=None):
for line in (out.strip()).split('\n'):
out_list.append(
list(filter(None, map(str.strip, line.split(' ')))))
+
+ if not field_selector:
return out_list
+ # Filter out field-selector parameters
+ for fs in field_selector[::-1]:
+ fs_value = re.split('=|!=', fs)[1]
+ found = fs.find('!=')
+ for out in out_list[:]:
+ # Not equalto in fs and value present in list then remove it
+ if (found > 0 and out[-1] == fs_value
+ # Equalto in fs and value does not match to fs then remove
+ or found == -1 and out[-1] != fs_value):
+ out_list.remove(out)
+ out.pop()
+ return out_list
def get_block_provisioner(ocp_node):
@@ -802,6 +829,46 @@ def get_gluster_pod_names_by_pvc_name(
return None
+def wait_for_gluster_pod_be_ready_on_specific_node(
+ ocp_client_node, gluster_hostname, selector='glusterfs=storage-pod',
+ timeout=300, wait_step=10):
+ """Wait for gluster pod to be ready on specific node.
+
+ Args:
+ ocp_client_node (str): Node to execute OCP commands on.
+ gluster_hostname (str): Name of node hosting gluster pod.
+ selector (str): Selector for gluster pod.
+
+ Returns:
+ None
+ """
+ g_pod_name = get_gluster_pod_name_for_specific_node(
+ ocp_client_node, gluster_hostname, selector=selector)
+ wait_for_pod_be_ready(
+ ocp_client_node, g_pod_name, timeout=timeout, wait_step=wait_step)
+
+
+def get_gluster_pod_name_for_specific_node(
+ ocp_client_node, gluster_hostname, selector='glusterfs=storage-pod'):
+ """Get gluster pod name on specific gluster node.
+
+ Args:
+ ocp_client_node (str): Node to execute OCP commands on.
+ gluster_hostname (str): Name of node hosting gluster pod.
+ selector (str): Selector for gluster pod.
+
+ Returns:
+ str: Name of the gluster pod
+ """
+ g_pod_name = oc_get_custom_resource(
+ ocp_client_node, 'pod', ':.metadata.name', selector=selector,
+ field_selector='spec.nodeName=%s' % gluster_hostname)
+ if not g_pod_name:
+ raise AssertionError(
+ 'Gluster pod was not found for node %s' % gluster_hostname)
+ return g_pod_name[0][0]
+
+
def cmd_run_on_gluster_pod_or_node(
ocp_client_node, cmd, gluster_node=None, raise_on_error=True):
"""Run shell command on either Gluster PODs or Gluster nodes.
diff --git a/tests/functional/gluster_stability/test_gluster_block_stability.py b/tests/functional/gluster_stability/test_gluster_block_stability.py
index e0c4378..e81949f 100644
--- a/tests/functional/gluster_stability/test_gluster_block_stability.py
+++ b/tests/functional/gluster_stability/test_gluster_block_stability.py
@@ -1138,3 +1138,110 @@ class TestGlusterBlockStability(GlusterBlockBaseClass):
node_add_iptables_rules(path_node, chain, rules % tcmu_port)
oc_rsh(self.node, pod_name, cmd_run_io % file1)
self.verify_iscsi_sessions_and_multipath(self.pvc_name, dc_name)
+
+ def test_initiator_failures_reboot_initiator_node_when_target_node_is_down(
+ self):
+ """Restart initiator node when gluster node is down, to make sure paths
+ rediscovery is happening.
+ """
+ # Skip test if not able to connect to Cloud Provider
+ try:
+ find_vm_name_by_ip_or_hostname(self.node)
+ except (NotImplementedError, ConfigError) as e:
+ self.skipTest(e)
+
+ ini_node = self.get_initiator_node_and_mark_other_nodes_unschedulable()
+
+ # Create 5 PVC's and DC's
+ pvcs = self.create_and_wait_for_pvcs(pvc_amount=5)
+ dcs = self.create_dcs_with_pvc(pvcs)
+
+ # Run I/O on app pods
+ _file, base_size, count = '/mnt/file', 4096, 1000
+ file_size = base_size * count
+ cmd_run_io = 'dd if=/dev/urandom of=%s bs=%s count=%s' % (
+ _file, base_size, count)
+ for _, pod_name in dcs.values():
+ oc_rsh(self.node, pod_name, cmd_run_io)
+
+ vol_info = {}
+ for pvc, (dc_name, _) in dcs.items():
+ # Get target portals
+ pv_name = get_pv_name_from_pvc(self.node, pvc)
+ targets = oc_get_custom_resource(
+ self.node, 'pv', ':.spec.iscsi.portals,'
+ ':.spec.iscsi.targetPortal', name=pv_name)
+ targets = [item.strip('[').strip(
+ ']') for item in targets if isinstance(item, six.string_types)]
+
+ iqn, hacount, _ = self.verify_iscsi_sessions_and_multipath(
+ pvc, dc_name)
+ vol_info[pvc] = (iqn, hacount, targets)
+
+ target = targets[0]
+
+ # Get hostname of target node from heketi
+ h_node_list = heketi_node_list(
+ self.heketi_client_node, self.heketi_server_url)
+ for node_id in h_node_list:
+ node_info = heketi_node_info(
+ self.heketi_client_node, self.heketi_server_url, node_id,
+ json=True)
+ if node_info['hostnames']['storage'][0] == target:
+ target_hostname = node_info['hostnames']['manage'][0]
+ break
+ self.assertTrue(target_hostname)
+
+ # Find VM Name and power it off
+ target_vm_name = find_vm_name_by_ip_or_hostname(target_hostname)
+ self.power_off_gluster_node_vm(target_vm_name, target_hostname)
+
+ # Sync I/O
+ for _, pod_name in dcs.values():
+ oc_rsh(self.node, pod_name, "/bin/sh -c 'cd /mnt && sync'")
+
+ # Reboot initiator node where all the app pods are running
+ node_reboot_by_command(ini_node)
+ wait_for_ocp_node_be_ready(self.node, ini_node)
+
+ # Wait for pods to be ready after node reboot
+ dc_names = [dc[0] for dc in dcs.values()]
+ pod_names = scale_dcs_pod_amount_and_wait(self.node, dc_names)
+
+ # Verify one path is down, because one gluster node is down
+ for iqn, _, _ in vol_info.values():
+ devices = get_iscsi_block_devices_by_path(ini_node, iqn).keys()
+ mpath = get_mpath_name_from_device_name(ini_node, list(devices)[0])
+ with self.assertRaises(AssertionError):
+ self.verify_all_paths_are_up_in_multipath(
+ mpath, hacount, ini_node, timeout=1)
+
+ # Power on gluster node and wait for the services to be up
+ self.power_on_gluster_node_vm(target_vm_name, target_hostname)
+
+ # Delete pod so it can rediscover paths while creating new pod
+ scale_dcs_pod_amount_and_wait(self.node, dc_names, pod_amount=0)
+ pod_names = scale_dcs_pod_amount_and_wait(self.node, dc_names)
+
+ # Verify file
+ for pvc, (dc_name, _) in dcs.items():
+ pod_name = pod_names[dc_name][0]
+ _, out, _ = oc_rsh(self.node, pod_name, 'ls -l %s' % _file)
+ msg = ("Expected size %s of file '%s' is not present "
+ "in out '%s'" % (file_size, _file, out))
+ self.assertIn(six.text_type(file_size), out, msg)
+
+ # Verify all paths are up and running
+ for iqn, hacount, _ in vol_info.values():
+ devices = get_iscsi_block_devices_by_path(ini_node, iqn).keys()
+
+ # Get mpath names and verify that only one mpath is there
+ mpaths = set()
+ for device in devices:
+ mpaths.add(get_mpath_name_from_device_name(ini_node, device))
+ msg = ("Only one mpath was expected on Node %s, but got %s" % (
+ ini_node, mpaths))
+ self.assertEqual(1, len(mpaths), msg)
+
+ self.verify_all_paths_are_up_in_multipath(
+ list(mpaths)[0], hacount, ini_node, timeout=1)