diff options
10 files changed, 1212 insertions, 5 deletions
diff --git a/openshift-storage-libs/openshiftstoragelibs/gluster_ops.py b/openshift-storage-libs/openshiftstoragelibs/gluster_ops.py index f621a860..7f5f5535 100644 --- a/openshift-storage-libs/openshiftstoragelibs/gluster_ops.py +++ b/openshift-storage-libs/openshiftstoragelibs/gluster_ops.py @@ -4,7 +4,12 @@ try: except ImportError: # py2 import json +try: + import xml.etree.cElementTree as etree +except ImportError: + import xml.etree.ElementTree as etree import re +import six import time from glusto.core import Glusto as g @@ -20,7 +25,10 @@ from glustolibs.gluster.volume_ops import ( from openshiftstoragelibs import exceptions from openshiftstoragelibs.heketi_ops import heketi_blockvolume_info -from openshiftstoragelibs.openshift_ops import cmd_run_on_gluster_pod_or_node +from openshiftstoragelibs.openshift_ops import ( + cmd_run_on_gluster_pod_or_node, + get_ocp_gluster_pod_details, +) from openshiftstoragelibs import podcmd from openshiftstoragelibs import waiter @@ -352,3 +360,84 @@ def get_gluster_vol_free_inodes_with_hosts_of_bricks(vol_name): inodes_info = {brick_process: process_data["inodesFree"]} hosts_with_inodes_info[g_node].update(inodes_info) return hosts_with_inodes_info + + +def _get_gluster_cmd(target, command): + + if isinstance(command, six.string_types): + command = [command] + ocp_client_node = list(g.config['ocp_servers']['client'].keys())[0] + gluster_pods = get_ocp_gluster_pod_details(ocp_client_node) + + if target == 'auto_get_gluster_endpoint': + if gluster_pods: + target = podcmd.Pod(ocp_client_node, gluster_pods[0]["pod_name"]) + else: + target = list(g.config.get("gluster_servers", {}).keys())[0] + elif not isinstance(target, podcmd.Pod) and gluster_pods: + for g_pod in gluster_pods: + if target in (g_pod['pod_host_ip'], g_pod['pod_hostname']): + target = podcmd.Pod(ocp_client_node, g_pod['pod_name']) + break + + if isinstance(target, podcmd.Pod): + return target.node, ' '.join(['oc', 'rsh', target.podname] + command) + + return target, ' '.join(command) + + +def get_peer_status(mnode): + """Parse the output of command 'gluster peer status' using run_async. + + Args: + mnode (str): Node on which command has to be executed. + + Returns: + NoneType: None if command execution fails or parse errors. + list: list of dicts on success. + + Examples: + >>> get_peer_status(mnode = 'abc.lab.eng.xyz.com') + [{'uuid': '77dc299a-32f7-43d8-9977-7345a344c398', + 'hostname': 'ijk.lab.eng.xyz.com', + 'state': '3', + 'hostnames' : ['ijk.lab.eng.xyz.com'], + 'connected': '1', + 'stateStr': 'Peer in Cluster'}, + + {'uuid': 'b15b8337-9f8e-4ec3-8bdb-200d6a67ae12', + 'hostname': 'def.lab.eng.xyz.com', + 'state': '3', + 'hostnames': ['def.lab.eng.xyz.com'], + 'connected': '1', + 'stateStr': 'Peer in Cluster'} + ] + """ + mnode, cmd = _get_gluster_cmd(mnode, "gluster peer status --xml") + obj = g.run_async(mnode, cmd, log_level='DEBUG') + ret, out, err = obj.async_communicate() + + if ret: + g.log.error( + "Failed to execute peer status command on node {} with error " + "{}".format(mnode, err)) + return None + + try: + root = etree.XML(out) + except etree.ParseError: + g.log.error("Failed to parse the gluster peer status xml output.") + return None + + peer_status_list = [] + for peer in root.findall("peerStatus/peer"): + peer_dict = {} + for element in peer.getchildren(): + if element.tag == "hostnames": + hostnames_list = [] + for hostname in element.getchildren(): + hostnames_list.append(hostname.text) + element.text = hostnames_list + peer_dict[element.tag] = element.text + peer_status_list.append(peer_dict) + return peer_status_list diff --git a/tests/functional/heketi/test_heketi_brick_evict.py b/tests/functional/heketi/test_heketi_brick_evict.py index 27cc1ebf..1cba24c4 100644 --- a/tests/functional/heketi/test_heketi_brick_evict.py +++ b/tests/functional/heketi/test_heketi_brick_evict.py @@ -1,11 +1,16 @@ import pytest from glustolibs.gluster import volume_ops +import six from openshiftstoragelibs.baseclass import BaseClass +from openshiftstoragelibs import exceptions from openshiftstoragelibs import heketi_ops from openshiftstoragelibs import heketi_version +from openshiftstoragelibs import node_ops +from openshiftstoragelibs import openshift_ops from openshiftstoragelibs import podcmd +from openshiftstoragelibs import waiter class TestHeketiBrickEvict(BaseClass): @@ -20,6 +25,8 @@ class TestHeketiBrickEvict(BaseClass): "heketi-client package {} does not support brick evict".format( version.v_str)) + self.ocp_client = self.ocp_master_node[0] + node_list = heketi_ops.heketi_node_list( self.heketi_client_node, self.heketi_server_url) @@ -88,3 +95,86 @@ class TestHeketiBrickEvict(BaseClass): bricks_new, gbricks, "gluster vol info and heketi vol info " "mismatched after brick evict {} \n {}".format( gvol_info, vol_info_new)) + + def _wait_for_gluster_pod_after_node_reboot(self, node_hostname): + """Wait for glusterfs pod to be ready after node reboot""" + openshift_ops.wait_for_ocp_node_be_ready( + self.ocp_client, node_hostname) + gluster_pod = openshift_ops.get_gluster_pod_name_for_specific_node( + self.ocp_client, node_hostname) + openshift_ops.wait_for_pod_be_ready(self.ocp_client, gluster_pod) + services = ( + ("glusterd", "running"), ("gluster-blockd", "running"), + ("tcmu-runner", "running"), ("gluster-block-target", "exited")) + for service, state in services: + openshift_ops.check_service_status_on_pod( + self.ocp_client, gluster_pod, service, "active", state) + + @pytest.mark.tier4 + def test_brick_evict_with_node_down(self): + """Test brick evict basic functionality and verify brick evict + after node down""" + + h_node, h_server = self.heketi_client_node, self.heketi_server_url + + # Disable node if more than 3 + node_list = heketi_ops.heketi_node_list(h_node, h_server) + if len(node_list) > 3: + for node_id in node_list[3:]: + heketi_ops.heketi_node_disable(h_node, h_server, node_id) + self.addCleanup( + heketi_ops.heketi_node_enable, h_node, h_server, node_id) + + # Create heketi volume + vol_info = heketi_ops.heketi_volume_create( + h_node, h_server, 1, json=True) + self.addCleanup( + heketi_ops.heketi_volume_delete, + h_node, h_server, vol_info.get('id')) + + # Get node on which heketi pod is scheduled + heketi_pod = openshift_ops.get_pod_name_from_dc( + self.ocp_client, self.heketi_dc_name) + heketi_node = openshift_ops.oc_get_custom_resource( + self.ocp_client, 'pod', '.:spec.nodeName', heketi_pod)[0] + + # Get list of hostname from node id + host_list = [] + for node_id in node_list[3:]: + node_info = heketi_ops.heketi_node_info( + h_node, h_server, node_id, json=True) + host_list.append(node_info.get('hostnames').get('manage')[0]) + + # Get brick id and glusterfs node which is not heketi node + for node in vol_info.get('bricks', {}): + node_info = heketi_ops.heketi_node_info( + h_node, h_server, node.get('node'), json=True) + hostname = node_info.get('hostnames').get('manage')[0] + if (hostname != heketi_node) and (hostname not in host_list): + brick_id = node.get('id') + break + + # Bring down the glusterfs node + vm_name = node_ops.find_vm_name_by_ip_or_hostname(hostname) + self.addCleanup( + self._wait_for_gluster_pod_after_node_reboot, hostname) + self.addCleanup(node_ops.power_on_vm_by_name, vm_name) + node_ops.power_off_vm_by_name(vm_name) + + # Wait glusterfs node to become NotReady + custom = r'":.status.conditions[?(@.type==\"Ready\")]".status' + for w in waiter.Waiter(300, 20): + status = openshift_ops.oc_get_custom_resource( + self.ocp_client, 'node', custom, hostname) + if status[0] in ['False', 'Unknown']: + break + if w.expired: + raise exceptions.ExecutionError( + "Failed to bring down node {}".format(hostname)) + + # Perform brick evict operation + try: + heketi_ops.heketi_brick_evict(h_node, h_server, brick_id) + except AssertionError as e: + if ('No Replacement was found' not in six.text_type(e)): + raise diff --git a/tests/functional/heketi/test_heketi_device_operations.py b/tests/functional/heketi/test_heketi_device_operations.py index a6831e98..05f16ef9 100755 --- a/tests/functional/heketi/test_heketi_device_operations.py +++ b/tests/functional/heketi/test_heketi_device_operations.py @@ -1,6 +1,7 @@ import ddt from glusto.core import Glusto as g import pytest +import six from openshiftstoragelibs.baseclass import BaseClass from openshiftstoragelibs.heketi_ops import ( @@ -17,6 +18,8 @@ from openshiftstoragelibs.heketi_ops import ( heketi_topology_info, heketi_volume_create, heketi_volume_delete, + rm_tags, + set_tags, validate_dev_path_vg_and_uuid, ) from openshiftstoragelibs import utils @@ -600,3 +603,96 @@ class TestHeketiDeviceOperations(BaseClass): h_node, h_url, node, dev) self.assertTrue(is_true, "Failed to verify dv_path for the " "device {}".format(dev)) + + @pytest.mark.tier3 + def test_volume_create_as_tag_maching_rule(self): + """Validate settags operation only on one device in the cluster""" + + h_node, h_server = self.heketi_client_node, self.heketi_server_url + + # Set tag on any one device in cluster + node_list = heketi_node_list(h_node, h_server, json=True) + node_info = heketi_node_info(h_node, h_server, node_list[0], json=True) + device_id = node_info.get('devices', {})[0].get('id') + set_tags(h_node, h_server, 'device', device_id, "tier:it") + self.addCleanup(rm_tags, h_node, h_server, 'device', device_id, 'tier') + + # Volume creation should fail + try: + heketi_volume_create( + h_node, h_server, 2, + gluster_volume_options="user.heketi.device-tag-match tier=it") + except AssertionError as e: + if ("Failed to allocate new volume" not in six.text_type(e)): + raise + + @pytest.mark.tier4 + def test_device_settags_tier_option(self): + """Validate volume creation with a tag-matching rule""" + + h_node, h_server = self.heketi_client_node, self.heketi_server_url + initial_brick_count, before_brick_count, after_brick_count = [], [], [] + + # Set tag on device on 3 different nodes + node_list = heketi_node_list(h_node, h_server, json=True) + device_list = [] + for node_id in node_list[:3]: + node_info = heketi_node_info(h_node, h_server, node_id, json=True) + device_id = node_info.get('devices', {})[0].get('id') + device_list.append(device_id) + set_tags(h_node, h_server, 'device', device_id, "tier:test") + self.addCleanup( + rm_tags, h_node, h_server, 'device', device_id, "tier", + raise_on_error=False) + + # Get initial number of bricks present on device + for device_id in device_list: + device_info = heketi_device_info( + h_node, h_server, device_id, json=True) + initial_brick_count.append(len(device_info.get("bricks"))) + + # Create volume with device tag option + volume_info = heketi_volume_create( + h_node, h_server, 2, + gluster_volume_options="user.heketi.device-tag-match tier=test", + json=True) + self.addCleanup( + heketi_volume_delete, h_node, h_server, volume_info.get("id")) + + # Get number of bricks present on device after volume create + for device_id in device_list: + device_info = heketi_device_info( + h_node, h_server, device_id, json=True) + before_brick_count.append(len(device_info.get("bricks"))) + + # Validate volume has created on tag devices + self.assertGreater( + before_brick_count, initial_brick_count, + "Volume {} has not created on tag devices".format( + volume_info.get("id"))) + + # Create volume with not equal to tag option + volume_info = heketi_volume_create( + h_node, h_server, 2, + gluster_volume_options="user.heketi.device-tag-match tier!=test", + json=True) + self.addCleanup( + heketi_volume_delete, h_node, h_server, volume_info.get("id")) + + # Get number of bricks present on device after volume create + for device_id in device_list: + device_info = heketi_device_info( + h_node, h_server, device_id, json=True) + after_brick_count.append(len(device_info.get("bricks"))) + + # Validate volume has not created on tag devices + self.assertEqual( + before_brick_count, after_brick_count, + "Volume {} has created on tag devices".format( + volume_info.get("id"))) + + # Update the tag on device + for device_id in device_list: + set_tags(h_node, h_server, 'device', device_id, "tier:test_update") + self.addCleanup( + rm_tags, h_node, h_server, 'device', device_id, "tier") diff --git a/tests/functional/heketi/test_volume_expansion_and_devices.py b/tests/functional/heketi/test_volume_expansion_and_devices.py index df064e76..fa78b1aa 100644 --- a/tests/functional/heketi/test_volume_expansion_and_devices.py +++ b/tests/functional/heketi/test_volume_expansion_and_devices.py @@ -10,6 +10,7 @@ from openshiftstoragelibs import ( heketi_ops, podcmd, ) +from openshiftstoragelibs import utils class TestVolumeExpansionAndDevicesTestCases(BaseClass): @@ -521,3 +522,44 @@ class TestVolumeExpansionAndDevicesTestCases(BaseClass): free_space_after_deletion > free_space_after_expansion, "Free space is not reclaimed after volume deletion of %s" % volume_id) + + @pytest.mark.tier2 + @podcmd.GlustoPod() + def test_replica_volume_expand(self): + """ + Test expansion of a replica volume + """ + h_node, h_server = self.heketi_client_node, self.heketi_server_url + volume_name = ( + "autotests-heketi-volume-{}".format(utils.get_random_str())) + volume_size = 10 + creation_info = self.create_heketi_volume_with_name_and_wait( + volume_name, volume_size, json=True, raise_on_cleanup_error=False) + volume_id = creation_info["id"] + volume_info = heketi_ops.heketi_volume_info( + h_node, h_server, volume_id, json=True) + + # Get gluster volume info + gluster_vol = volume_ops.get_volume_info( + 'auto_get_gluster_endpoint', volname=volume_name) + self.assertTrue( + gluster_vol, "Failed to get volume {} info".format(volume_name)) + vol_name = gluster_vol[volume_name] + self.assertEqual( + vol_name['replicaCount'], "3", + "Replica count is different for volume {} Actual:{} " + "Expected : 3".format(vol_name, vol_name['replicaCount'])) + + expand_size = 5 + heketi_ops.heketi_volume_expand( + h_node, h_server, volume_id, expand_size) + volume_info = heketi_ops.heketi_volume_info( + h_node, h_server, volume_id, json=True) + expected_size = volume_size + expand_size + self.assertEqual( + volume_info['size'], expected_size, + "Volume Expansion failed, Expected Size: {}, Actual " + "Size: {}".format(str(expected_size), str(volume_info['size']))) + + self.get_brick_and_volume_status(volume_name) + self.get_rebalance_status(volume_name) diff --git a/tests/functional/logging/test_logging_validations.py b/tests/functional/logging/test_logging_validations.py index a160fd7a..509c71d8 100644 --- a/tests/functional/logging/test_logging_validations.py +++ b/tests/functional/logging/test_logging_validations.py @@ -9,6 +9,7 @@ from openshiftstoragelibs import command from openshiftstoragelibs import exceptions from openshiftstoragelibs import gluster_ops from openshiftstoragelibs import openshift_ops +from openshiftstoragelibs import waiter @ddt.ddt @@ -352,3 +353,44 @@ class TestLoggingAndGlusterRegistryValidation(GlusterBlockBaseClass): pvc_name, self._logging_es_dc, heketi_server_url=self._registry_heketi_server_url, is_registry_gluster=True) + + @pytest.mark.tier3 + def test_run_workload_with_logging(self): + """Validate logs are being generated aifter running workload""" + + # Get the size of used space of logs + es_pod = openshift_ops.get_pod_name_from_dc( + self._master, self._logging_es_dc) + mount_point = "/elasticsearch/persistent" + cmd_space_check = ('df -kh --output=used {} | sed "/Used/d" |' + 'sed "s/G//"'.format(mount_point)) + ret, initial_used_percent, err = openshift_ops.oc_rsh( + self._master, es_pod, cmd_space_check) + err_msg = "Failed to fetch the size of used space, error {}" + self.assertFalse(ret, err_msg.format(err)) + + # Create 20 pvcs and app pods with io + openshift_ops.switch_oc_project( + self._master, self.storage_project_name) + pvc_count, batch_count = 5, 4 + for _ in range(batch_count): + pvcs = self.create_and_wait_for_pvcs(pvc_amount=pvc_count) + self.create_dcs_with_pvc(pvcs) + self.addCleanup( + openshift_ops.switch_oc_project, + self._master, self.storage_project_name) + + # Get and verify the final used size of used space of logs + openshift_ops.switch_oc_project( + self._master, self._logging_project_name) + for w in waiter.Waiter(600, 30): + ret, final_used_percent, err = openshift_ops.oc_rsh( + self._master, es_pod, cmd_space_check) + self.assertFalse(ret, err_msg.format(err)) + if int(initial_used_percent) < int(final_used_percent): + break + if w.expired: + raise AssertionError( + "Initial used space {} for logs is not less than final " + "used space {}".format( + initial_used_percent, final_used_percent)) diff --git a/tests/functional/metrics/test_metrics_validation.py b/tests/functional/metrics/test_metrics_validation.py index ce7e843f..e16fe349 100644 --- a/tests/functional/metrics/test_metrics_validation.py +++ b/tests/functional/metrics/test_metrics_validation.py @@ -27,6 +27,7 @@ from openshiftstoragelibs.openshift_storage_libs import ( get_iscsi_block_devices_by_path, get_mpath_name_from_device_name, ) +from openshiftstoragelibs import waiter @ddt.ddt @@ -274,3 +275,41 @@ class TestMetricsAndGlusterRegistryValidation(GlusterBlockBaseClass): restart_gluster_vol_brick_processes( self.master, bhv_name, list(self.registry_servers_info.keys())) self.addCleanup(self.cassandra_pod_delete_cleanup, raise_on_error=True) + + @pytest.mark.tier3 + def test_run_workload_with_metrics(self): + """Validate if logs are being generated after running workload""" + + # Get the size of used space of logs + cassandra_pod = get_pod_name_from_rc( + self.master, self.metrics_rc_hawkular_cassandra) + mount_point = "/cassandra_data" + cmd_space_check = ('df -k --output=used {} | sed "/Used/d" |' + 'sed "s/G//"'.format(mount_point)) + ret, initial_used_percent, err = oc_rsh( + self.master, cassandra_pod, cmd_space_check) + err_msg = "Failed to fetch the size of used space, error {}" + self.assertFalse(ret, err_msg.format(err)) + + # Create 20 PVCs and app pods with IO + switch_oc_project(self.master, self.storage_project_name) + pvc_count, batch_count = 5, 4 + for _ in range(batch_count): + pvcs = self.create_and_wait_for_pvcs(pvc_amount=pvc_count) + self.create_dcs_with_pvc(pvcs) + self.addCleanup( + switch_oc_project, self.master, self.storage_project_name) + + # Get and verify the final size of used space of logs + switch_oc_project(self.master, self.metrics_project_name) + for w in waiter.Waiter(600, 30): + ret, final_used_percent, err = oc_rsh( + self.master, cassandra_pod, cmd_space_check) + self.assertFalse(ret, err_msg.format(err)) + if int(initial_used_percent) < int(final_used_percent): + break + if w.expired: + raise AssertionError( + "Initial used space {} for logs is not less than final " + "used space {}".format( + initial_used_percent, final_used_percent)) diff --git a/tests/functional/prometheous/test_prometheus_validations.py b/tests/functional/prometheous/test_prometheus_validations.py index cfd9ab1b..68b69212 100644 --- a/tests/functional/prometheous/test_prometheus_validations.py +++ b/tests/functional/prometheous/test_prometheus_validations.py @@ -5,6 +5,7 @@ except ImportError: # py2 import json from pkg_resources import parse_version +from functools import reduce import ddt from glusto.core import Glusto as g @@ -19,6 +20,7 @@ from openshiftstoragelibs import heketi_ops from openshiftstoragelibs import gluster_ops from openshiftstoragelibs import node_ops from openshiftstoragelibs import openshift_ops +from openshiftstoragelibs import openshift_storage_libs from openshiftstoragelibs import podcmd from openshiftstoragelibs import waiter @@ -132,6 +134,52 @@ class TestPrometheusAndGlusterRegistryValidation(GlusterBlockBaseClass): self.assertFalse( start_vol, "Failed to start volume using force") + def _get_newly_deployed_gluster_pod(self, g_pod_list_before): + + # Fetch pod after delete + g_pod_list_after = [ + pod["pod_name"] + for pod in openshift_ops.get_ocp_gluster_pod_details(self._master)] + + # Fetch the new gluster pod + g_new_pod = list(set(g_pod_list_after) - set(g_pod_list_before)) + self.assertTrue(g_new_pod, "No new gluster pod deployed after delete") + return g_new_pod + + def _guster_pod_delete(self, g_pod_list_before): + """Delete the gluster pod using force delete""" + openshift_ops.switch_oc_project( + self._master, self._registry_project_name) + + # Fetch newly deployed gluster pod after delete + try: + pod_name = self._get_newly_deployed_gluster_pod(g_pod_list_before) + openshift_ops.wait_for_pod_be_ready( + self._master, + pod_name[0] if pod_name else g_pod_list_before[0], + timeout=120, wait_step=6) + except exceptions.ExecutionError: + openshift_ops.oc_delete( + self._master, 'pod', g_pod_list_before[0], is_force=True) + openshift_ops.wait_for_resource_absence( + self._master, 'pod', g_pod_list_before[0]) + g_new_pod = self._get_newly_deployed_gluster_pod(g_pod_list_before) + openshift_ops.wait_for_pod_be_ready(self._master, g_new_pod[0]) + + def _wait_for_gluster_pod_be_ready(self, g_pod_list_before): + """Wait for the gluster pods to be in ready state""" + openshift_ops.switch_oc_project( + self._master, self._registry_project_name) + + # Check if the gluster pods are in ready state + try: + pod_count = len(self._registry_servers_info.keys()) + openshift_ops.wait_for_pods_be_ready( + self._master, pod_count, "glusterfs-node=pod", + timeout=120, wait_step=6) + except exceptions.ExecutionError: + self._guster_pod_delete(g_pod_list_before) + @pytest.mark.tier2 def test_promethoues_pods_and_pvcs(self): """Validate prometheus pods and PVC""" @@ -711,3 +759,218 @@ class TestPrometheusAndGlusterRegistryValidation(GlusterBlockBaseClass): self.assertEqual( initial_prometheus, final_prometheus, err_msg.format( initial_prometheus, final_prometheus)) + + @pytest.mark.tier4 + @ddt.data('add', 'delete') + def test_heketi_metrics_validation_after_node(self, condition): + """Validate heketi metrics after adding and remove node""" + + # Get additional node + additional_host_info = g.config.get("additional_gluster_servers") + if not additional_host_info: + self.skipTest( + "Skipping this test case as additional gluster server is " + "not provied in config file") + + additional_host_info = list(additional_host_info.values())[0] + storage_hostname = additional_host_info.get("manage") + storage_ip = additional_host_info.get("storage") + if not (storage_hostname and storage_ip): + self.skipTest( + "Config options 'additional_gluster_servers.manage' " + "and 'additional_gluster_servers.storage' must be set.") + + h_client, h_server = self.heketi_client_node, self.heketi_server_url + initial_node_count, final_node_count = 0, 0 + + # Get initial node count from prometheus metrics + metric_result = self._fetch_metric_from_promtheus_pod( + metric='heketi_nodes_count') + initial_node_count = reduce( + lambda x, y: x + y, + [result.get('value')[1] for result in metric_result]) + + # Switch to storage project + openshift_ops.switch_oc_project( + self._master, self.storage_project_name) + + # Configure node before adding node + self.configure_node_to_run_gluster(storage_hostname) + + # Get cluster list + cluster_info = heketi_ops.heketi_cluster_list( + h_client, h_server, json=True) + + # Add node to the cluster + heketi_node_info = heketi_ops.heketi_node_add( + h_client, h_server, + len(self.gluster_servers), cluster_info.get('clusters')[0], + storage_hostname, storage_ip, json=True) + heketi_node_id = heketi_node_info.get("id") + self.addCleanup( + heketi_ops.heketi_node_delete, + h_client, h_server, heketi_node_id, raise_on_error=False) + self.addCleanup( + heketi_ops.heketi_node_remove, + h_client, h_server, heketi_node_id, raise_on_error=False) + self.addCleanup( + heketi_ops.heketi_node_disable, + h_client, h_server, heketi_node_id, raise_on_error=False) + self.addCleanup( + openshift_ops.switch_oc_project, + self._master, self.storage_project_name) + + if condition == 'delete': + # Switch to openshift-monitoring project + openshift_ops.switch_oc_project( + self.ocp_master_node[0], self._prometheus_project_name) + + # Get initial node count from prometheus metrics + for w in waiter.Waiter(timeout=60, interval=10): + metric_result = self._fetch_metric_from_promtheus_pod( + metric='heketi_nodes_count') + node_count = reduce( + lambda x, y: x + y, + [result.get('value')[1] for result in metric_result]) + if node_count != initial_node_count: + break + + if w.expired: + raise exceptions.ExecutionError( + "Failed to get updated node details from prometheus") + + # Remove node from cluster + heketi_ops.heketi_node_disable(h_client, h_server, heketi_node_id) + heketi_ops.heketi_node_remove(h_client, h_server, heketi_node_id) + for device in heketi_node_info.get('devices'): + heketi_ops.heketi_device_delete( + h_client, h_server, device.get('id')) + heketi_ops.heketi_node_delete(h_client, h_server, heketi_node_id) + + # Switch to openshift-monitoring project + openshift_ops.switch_oc_project( + self.ocp_master_node[0], self._prometheus_project_name) + + # Get final node count from prometheus metrics + for w in waiter.Waiter(timeout=60, interval=10): + metric_result = self._fetch_metric_from_promtheus_pod( + metric='heketi_nodes_count') + final_node_count = reduce( + lambda x, y: x + y, + [result.get('value')[1] for result in metric_result]) + + if condition == 'delete': + if final_node_count < node_count: + break + else: + if final_node_count > initial_node_count: + break + + if w.expired: + raise exceptions.ExecutionError( + "Failed to update node details in prometheus") + + @pytest.mark.tier2 + def test_restart_prometheus_glusterfs_pod(self): + """Validate restarting glusterfs pod""" + + # Add check for CRS version + openshift_ops.switch_oc_project( + self._master, self._registry_project_name) + if not self.is_containerized_gluster(): + self.skipTest( + "Skipping this test case as CRS version check " + "can not be implemented") + + # Get one of the prometheus pod name and respective pvc name + openshift_ops.switch_oc_project( + self._master, self._prometheus_project_name) + prometheus_pods = openshift_ops.oc_get_pods( + self._master, selector=self._prometheus_resources_selector) + if not prometheus_pods: + self.skipTest( + prometheus_pods, "Skipping test as prometheus" + " pod is not present") + prometheus_pod = list(prometheus_pods.keys())[0] + pvc_name = openshift_ops.oc_get_custom_resource( + self._master, "pod", + ":.spec.volumes[*].persistentVolumeClaim.claimName", + prometheus_pod)[0] + self.assertTrue( + pvc_name, + "Failed to get pvc name from {} pod".format(prometheus_pod)) + iqn, _, node = self.verify_iscsi_sessions_and_multipath( + pvc_name, prometheus_pod, rtype='pod', + heketi_server_url=self._registry_heketi_server_url, + is_registry_gluster=True) + + # Get the ip of active path + devices = openshift_storage_libs.get_iscsi_block_devices_by_path( + node, iqn) + mpath = openshift_storage_libs.get_mpath_name_from_device_name( + node, list(devices.keys())[0]) + mpath_dev = ( + openshift_storage_libs.get_active_and_enabled_devices_from_mpath( + node, mpath)) + node_ip = devices[mpath_dev['active'][0]] + + # Get the name of gluster pod from the ip + openshift_ops.switch_oc_project( + self._master, self._registry_project_name) + gluster_pods = openshift_ops.get_ocp_gluster_pod_details( + self._master) + active_pod_name = list( + filter(lambda pod: (pod["pod_host_ip"] == node_ip), gluster_pods) + )[0]["pod_name"] + err_msg = "Failed to get the gluster pod name {} with active path" + self.assertTrue(active_pod_name, err_msg.format(active_pod_name)) + g_pods = [pod['pod_name'] for pod in gluster_pods] + g_pods.remove(active_pod_name) + pod_list = [active_pod_name, g_pods[0]] + for pod_name in pod_list: + + # Delete the glusterfs pods + openshift_ops.switch_oc_project( + self._master, self._prometheus_project_name) + self._fetch_metric_from_promtheus_pod( + metric='heketi_device_brick_count') + + openshift_ops.switch_oc_project( + self._master, self._registry_project_name) + g_pod_list_before = [ + pod["pod_name"] + for pod in openshift_ops.get_ocp_gluster_pod_details( + self._master)] + + openshift_ops.oc_delete(self._master, 'pod', pod_name) + self.addCleanup( + self._guster_pod_delete, g_pod_list_before) + + # Wait for gluster pod to be absent + openshift_ops.wait_for_resource_absence( + self._master, 'pod', pod_name) + + # Try to fetch metric from prometheus pod + openshift_ops.switch_oc_project( + self._master, self._prometheus_project_name) + self._fetch_metric_from_promtheus_pod( + metric='heketi_device_brick_count') + + # Wait for new pod to come up + openshift_ops.switch_oc_project( + self._master, self._registry_project_name) + self.assertTrue(self._get_newly_deployed_gluster_pod( + g_pod_list_before), "Failed to get new pod") + self._wait_for_gluster_pod_be_ready(g_pod_list_before) + + # Validate iscsi and multipath + openshift_ops.switch_oc_project( + self._master, self._prometheus_project_name) + self.verify_iscsi_sessions_and_multipath( + pvc_name, prometheus_pod, rtype='pod', + heketi_server_url=self._registry_heketi_server_url, + is_registry_gluster=True) + + # Try to fetch metric from prometheus pod + self._fetch_metric_from_promtheus_pod( + metric='heketi_device_brick_count') diff --git a/tests/functional/prometheous/test_prometheus_validations_file.py b/tests/functional/prometheous/test_prometheus_validations_file.py index b4186627..bbf4aedc 100644 --- a/tests/functional/prometheous/test_prometheus_validations_file.py +++ b/tests/functional/prometheous/test_prometheus_validations_file.py @@ -8,11 +8,14 @@ import time import ddt from glusto.core import Glusto as g +from glustolibs.gluster import rebalance_ops import pytest from openshiftstoragelibs import baseclass from openshiftstoragelibs import exceptions +from openshiftstoragelibs import heketi_ops from openshiftstoragelibs import openshift_ops +from openshiftstoragelibs import podcmd from openshiftstoragelibs import waiter @@ -83,15 +86,18 @@ class TestPrometheusValidationFile(baseclass.BaseClass): "__name__"]] = matric_result["value"][1] return metric_data - def _fetch_initial_metrics(self, volume_expansion=False): + def _fetch_initial_metrics(self, vol_name_prefix=None, + volume_expansion=False): # Create PVC and wait for it to be in 'Bound' state sc_name = self.create_storage_class( + vol_name_prefix=vol_name_prefix, allow_volume_expansion=volume_expansion) - pvc_name = self.create_and_wait_for_pvc(sc_name=sc_name) + pvc_name = self.create_and_wait_for_pvc( + pvc_name_prefix=vol_name_prefix, sc_name=sc_name) # Create DC and attach with pvc - dc_name, pod_name = self.create_dc_with_pvc(pvc_name) + self.dc_name, pod_name = self.create_dc_with_pvc(pvc_name) for w in waiter.Waiter(120, 10): initial_metrics = self._get_and_manipulate_metric_data( self.metrics, pvc_name) @@ -146,6 +152,24 @@ class TestPrometheusValidationFile(baseclass.BaseClass): self.assertFalse(ret, "Failed to run the IO with error msg {}". format(err)) + @podcmd.GlustoPod() + def _rebalance_completion(self, volume_name): + """Rebalance start and completion after expansion.""" + ret, _, err = rebalance_ops.rebalance_start( + 'auto_get_gluster_endpoint', volume_name) + self.assertFalse( + ret, "Rebalance for {} volume not started with error {}".format( + volume_name, err)) + + for w in waiter.Waiter(240, 10): + reb_status = rebalance_ops.get_rebalance_status( + 'auto_get_gluster_endpoint', volume_name) + if reb_status["aggregate"]["statusStr"] == "completed": + break + if w.expired: + raise AssertionError( + "Failed to complete the rebalance in 240 seconds") + @pytest.mark.tier2 def test_prometheus_volume_metrics_on_pod_restart(self): """Validate volume metrics using prometheus before and after pod @@ -245,3 +269,67 @@ class TestPrometheusValidationFile(baseclass.BaseClass): pod_name=pod_name, pvc_name=pvc_name, filename="filename1", dirname="dirname1", metric_data=half_io_metrics, operation="delete") + + @pytest.mark.tier2 + def test_prometheus_pv_resize(self): + """ Validate prometheus metrics with pv resize""" + + # Fetch the metrics and storing initial_metrics as dictionary + pvc_name, pod_name, initial_metrics = self._fetch_initial_metrics( + vol_name_prefix="for-pv-resize", volume_expansion=True) + + # Write data on the pvc and confirm it is reflected in the prometheus + self._perform_io_and_fetch_metrics( + pod_name=pod_name, pvc_name=pvc_name, + filename="filename1", dirname="dirname1", + metric_data=initial_metrics, operation="create") + + # Resize the pvc to 2GiB + openshift_ops.switch_oc_project( + self._master, self.storage_project_name) + pvc_size = 2 + openshift_ops.resize_pvc(self._master, pvc_name, pvc_size) + openshift_ops.wait_for_events(self._master, obj_name=pvc_name, + event_reason='VolumeResizeSuccessful') + openshift_ops.verify_pvc_size(self._master, pvc_name, pvc_size) + pv_name = openshift_ops.get_pv_name_from_pvc( + self._master, pvc_name) + openshift_ops.verify_pv_size(self._master, pv_name, pvc_size) + + heketi_volume_name = heketi_ops.heketi_volume_list_by_name_prefix( + self.heketi_client_node, self.heketi_server_url, + "for-pv-resize", json=True)[0][2] + self.assertIsNotNone( + heketi_volume_name, "Failed to fetch volume with prefix {}". + format("for-pv-resize")) + + openshift_ops.oc_delete(self._master, 'pod', pod_name) + openshift_ops.wait_for_resource_absence(self._master, 'pod', pod_name) + pod_name = openshift_ops.get_pod_name_from_dc( + self._master, self.dc_name) + openshift_ops.wait_for_pod_be_ready(self._master, pod_name) + + # Check whether the metrics are updated or not + for w in waiter.Waiter(120, 10): + resize_metrics = self._get_and_manipulate_metric_data( + self.metrics, pvc_name) + if bool(resize_metrics) and int(resize_metrics[ + 'kubelet_volume_stats_capacity_bytes']) > int( + initial_metrics['kubelet_volume_stats_capacity_bytes']): + break + if w.expired: + raise AssertionError("Failed to reflect PVC Size after resizing") + openshift_ops.switch_oc_project( + self._master, self.storage_project_name) + time.sleep(240) + + # Lookup and trigger rebalance and wait for the its completion + for _ in range(100): + self.cmd_run("oc rsh {} ls /mnt/".format(pod_name)) + self._rebalance_completion(heketi_volume_name) + + # Write data on the resized pvc and compared with the resized_metrics + self._perform_io_and_fetch_metrics( + pod_name=pod_name, pvc_name=pvc_name, + filename="secondfilename", dirname="seconddirname", + metric_data=resize_metrics, operation="create") diff --git a/tests/functional/provisioning/test_dev_path_mapping_file.py b/tests/functional/provisioning/test_dev_path_mapping_file.py index 741ad51f..fe4e9834 100644 --- a/tests/functional/provisioning/test_dev_path_mapping_file.py +++ b/tests/functional/provisioning/test_dev_path_mapping_file.py @@ -374,3 +374,421 @@ class TestDevPathMapping(baseclass.BaseClass): use_percent, use_percent_after, "Failed to execute IO's in the app pod {} after respin".format( pod_name)) + + def _get_bricks_and_device_details(self): + """Fetch bricks count and device id list from the node where dev path + operation is performed + """ + + h_client, h_url = self.heketi_client_node, self.heketi_server_url + h_node_details = [] + + # Fetch bricks on the devices + h_nodes = heketi_ops.heketi_node_list(h_client, h_url) + for h_node in h_nodes: + h_node_info = heketi_ops.heketi_node_info( + h_client, h_url, h_node, json=True) + h_node_hostname = h_node_info.get("hostnames").get("manage")[0] + + # Fetch bricks count and device list + if h_node_hostname == self.node_hostname: + h_node_details = [ + [node_info['id'], len(node_info['bricks']), + node_info['name']] + for node_info in h_node_info['devices']] + return h_node_details, h_node + + @pytest.mark.tier4 + @podcmd.GlustoPod() + def test_dev_path_mapping_heketi_device_delete(self): + """Validate dev path mapping for heketi device delete lifecycle""" + h_client, h_url = self.heketi_client_node, self.heketi_server_url + + node_ids = heketi_ops.heketi_node_list(h_client, h_url) + self.assertTrue(node_ids, "Failed to get heketi node list") + + # Fetch #4th node for the operations + h_disable_node = node_ids[3] + + # Fetch bricks on the devices before volume create + h_node_details_before, h_node = self._get_bricks_and_device_details() + + # Bricks count on the node before pvc creation + brick_count_before = [count[1] for count in h_node_details_before] + + # Create file volume with app pod and verify IO's + # and compare path, UUID, vg_name + pod_name, dc_name, use_percent = self._create_app_pod_and_verify_pvs() + + # Check if IO's are running + use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) + self.assertNotEqual( + use_percent, use_percent_after, + "Failed to execute IO's in the app pod {} after respin".format( + pod_name)) + + # Fetch bricks on the devices after volume create + h_node_details_after, h_node = self._get_bricks_and_device_details() + + # Bricks count on the node after pvc creation + brick_count_after = [count[1] for count in h_node_details_after] + + self.assertGreater( + sum(brick_count_after), sum(brick_count_before), + "Failed to add bricks on the node {}".format(h_node)) + + # Enable the #4th node + heketi_ops.heketi_node_enable(h_client, h_url, h_disable_node) + node_info = heketi_ops.heketi_node_info( + h_client, h_url, h_disable_node, json=True) + h_node_id = node_info['id'] + self.assertEqual( + node_info['state'], "online", + "Failed to enable node {}".format(h_disable_node)) + + # Fetch device list i.e to be deleted + h_node_info = heketi_ops.heketi_node_info( + h_client, h_url, h_node, json=True) + devices_list = [ + [device['id'], device['name']] + for device in h_node_info['devices']] + + # Device deletion operation + for device in devices_list: + device_id, device_name = device[0], device[1] + self.addCleanup( + heketi_ops.heketi_device_enable, h_client, h_url, + device_id, raise_on_error=False) + + # Disable device from heketi + device_disable = heketi_ops.heketi_device_disable( + h_client, h_url, device_id) + self.assertTrue( + device_disable, + "Device {} could not be disabled".format(device_id)) + + device_info = heketi_ops.heketi_device_info( + h_client, h_url, device_id, json=True) + self.assertEqual( + device_info['state'], "offline", + "Failed to disable device {}".format(device_id)) + + # Remove device from heketi + device_remove = heketi_ops.heketi_device_remove( + h_client, h_url, device_id) + self.assertTrue( + device_remove, + "Device {} could not be removed".format(device_id)) + + # Bricks after device removal + device_info = heketi_ops.heketi_device_info( + h_client, h_url, device_id, json=True) + bricks_count_after = len(device_info['bricks']) + self.assertFalse( + bricks_count_after, + "Failed to remove the bricks from the device {}".format( + device_id)) + + # Delete device from heketi + self.addCleanup( + heketi_ops. heketi_device_add, h_client, h_url, + device_name, h_node, raise_on_error=False) + device_delete = heketi_ops.heketi_device_delete( + h_client, h_url, device_id) + self.assertTrue( + device_delete, + "Device {} could not be deleted".format(device_id)) + + # Check if IO's are running after device is deleted + use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) + self.assertNotEqual( + use_percent, use_percent_after, + "Failed to execute IO's in the app pod {} after respin".format( + pod_name)) + + # Add device operations + for device in devices_list: + device_name = device[1] + + # Add device back to the node + heketi_ops.heketi_device_add(h_client, h_url, device_name, h_node) + + # Fetch device info after device add + node_info = heketi_ops.heketi_node_info( + h_client, h_url, h_node, json=True) + device_id = None + for device in node_info["devices"]: + if device["name"] == device_name: + device_id = device["id"] + break + self.assertTrue( + device_id, + "Failed to add device {} on node" + " {}".format(device_name, h_node)) + + # Disable the #4th node + heketi_ops.heketi_node_disable(h_client, h_url, h_node_id) + node_info = heketi_ops.heketi_node_info( + h_client, h_url, h_node_id, json=True) + self.assertEqual( + node_info['state'], "offline", + "Failed to disable node {}".format(h_node_id)) + pvc_amount, pvc_size = 5, 1 + + # Fetch bricks on the devices before volume create + h_node_details_before, h_node = self._get_bricks_and_device_details() + + # Bricks count on the node before pvc creation + brick_count_before = [count[1] for count in h_node_details_before] + + # Create file volumes + pvc_name = self.create_and_wait_for_pvcs( + pvc_size=pvc_size, pvc_amount=pvc_amount) + self.assertEqual( + len(pvc_name), pvc_amount, + "Failed to create {} pvc".format(pvc_amount)) + + # Fetch bricks on the devices after volume create + h_node_details_after, h_node = self._get_bricks_and_device_details() + + # Bricks count on the node after pvc creation + brick_count_after = [count[1] for count in h_node_details_after] + + self.assertGreater( + sum(brick_count_after), sum(brick_count_before), + "Failed to add bricks on the node {}".format(h_node)) + + # Check if IO's are running after new device is added + use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) + self.assertNotEqual( + use_percent, use_percent_after, + "Failed to execute IO's in the app pod {} after respin".format( + pod_name)) + + def _get_bricks_counts_and_device_name(self): + """Fetch bricks count and device name from all the nodes""" + h_client, h_url = self.heketi_client_node, self.heketi_server_url + + # Fetch bricks on the devices + h_nodes = heketi_ops.heketi_node_list(h_client, h_url) + + node_details = {} + for h_node in h_nodes: + h_node_info = heketi_ops.heketi_node_info( + h_client, h_url, h_node, json=True) + node_details[h_node] = [[], []] + for device in h_node_info['devices']: + node_details[h_node][0].append(len(device['bricks'])) + node_details[h_node][1].append(device['id']) + return node_details + + @pytest.mark.tier4 + @podcmd.GlustoPod() + def test_dev_path_mapping_heketi_node_delete(self): + """Validate dev path mapping for heketi node deletion lifecycle""" + h_client, h_url = self.heketi_client_node, self.heketi_server_url + + node_ids = heketi_ops.heketi_node_list(h_client, h_url) + self.assertTrue(node_ids, "Failed to get heketi node list") + + # Fetch #4th node for the operations + h_disable_node = node_ids[3] + + # Fetch bricks on the devices before volume create + h_node_details_before, h_node = self._get_bricks_and_device_details() + + # Bricks count on the node before pvc creation + brick_count_before = [count[1] for count in h_node_details_before] + + # Create file volume with app pod and verify IO's + # and compare path, UUID, vg_name + pod_name, dc_name, use_percent = self._create_app_pod_and_verify_pvs() + + # Check if IO's are running + use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) + self.assertNotEqual( + use_percent, use_percent_after, + "Failed to execute IO's in the app pod {} after respin".format( + pod_name)) + + # Fetch bricks on the devices after volume create + h_node_details_after, h_node = self._get_bricks_and_device_details() + + # Bricks count on the node after pvc creation + brick_count_after = [count[1] for count in h_node_details_after] + + self.assertGreater( + sum(brick_count_after), sum(brick_count_before), + "Failed to add bricks on the node {}".format(h_node)) + self.addCleanup( + heketi_ops.heketi_node_disable, h_client, h_url, h_disable_node) + + # Enable the #4th node + heketi_ops.heketi_node_enable(h_client, h_url, h_disable_node) + node_info = heketi_ops.heketi_node_info( + h_client, h_url, h_disable_node, json=True) + h_node_id = node_info['id'] + self.assertEqual( + node_info['state'], "online", + "Failed to enable node {}".format(h_disable_node)) + + # Disable the node and check for brick migrations + self.addCleanup( + heketi_ops.heketi_node_enable, h_client, h_url, h_node, + raise_on_error=False) + heketi_ops.heketi_node_disable(h_client, h_url, h_node) + node_info = heketi_ops.heketi_node_info( + h_client, h_url, h_node, json=True) + self.assertEqual( + node_info['state'], "offline", + "Failed to disable node {}".format(h_node)) + + # Before bricks migration + h_node_info = heketi_ops.heketi_node_info( + h_client, h_url, h_node, json=True) + + # Bricks before migration on the node i.e to be deleted + bricks_counts_before = 0 + for device in h_node_info['devices']: + bricks_counts_before += (len(device['bricks'])) + + # Remove the node + heketi_ops.heketi_node_remove(h_client, h_url, h_node) + + # After bricks migration + h_node_info_after = heketi_ops.heketi_node_info( + h_client, h_url, h_node, json=True) + + # Bricks after migration on the node i.e to be delete + bricks_counts = 0 + for device in h_node_info_after['devices']: + bricks_counts += (len(device['bricks'])) + + self.assertFalse( + bricks_counts, + "Failed to remove all the bricks from node {}".format(h_node)) + + # Old node which is to deleted, new node were bricks resides + old_node, new_node = h_node, h_node_id + + # Node info for the new node were brick reside after migration + h_node_info_new = heketi_ops.heketi_node_info( + h_client, h_url, new_node, json=True) + + bricks_counts_after = 0 + for device in h_node_info_new['devices']: + bricks_counts_after += (len(device['bricks'])) + + self.assertEqual( + bricks_counts_before, bricks_counts_after, + "Failed to migrated bricks from {} node to {}".format( + old_node, new_node)) + + # Fetch device list i.e to be deleted + h_node_info = heketi_ops.heketi_node_info( + h_client, h_url, h_node, json=True) + devices_list = [ + [device['id'], device['name']] + for device in h_node_info['devices']] + + for device in devices_list: + device_id = device[0] + device_name = device[1] + self.addCleanup( + heketi_ops.heketi_device_add, h_client, h_url, + device_name, h_node, raise_on_error=False) + + # Device deletion from heketi node + device_delete = heketi_ops.heketi_device_delete( + h_client, h_url, device_id) + self.assertTrue( + device_delete, + "Failed to delete the device {}".format(device_id)) + + node_info = heketi_ops.heketi_node_info( + h_client, h_url, h_node, json=True) + cluster_id = node_info['cluster'] + zone = node_info['zone'] + storage_hostname = node_info['hostnames']['manage'][0] + storage_ip = node_info['hostnames']['storage'][0] + + # Delete the node + self.addCleanup( + heketi_ops.heketi_node_add, h_client, h_url, + zone, cluster_id, storage_hostname, storage_ip, + raise_on_error=False) + heketi_ops.heketi_node_delete(h_client, h_url, h_node) + + # Verify if the node is deleted + node_ids = heketi_ops.heketi_node_list(h_client, h_url) + self.assertNotIn( + old_node, node_ids, + "Failed to delete the node {}".format(old_node)) + + # Check if IO's are running + use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) + self.assertNotEqual( + use_percent, use_percent_after, + "Failed to execute IO's in the app pod {} after respin".format( + pod_name)) + + # Adding node back + h_node_info = heketi_ops.heketi_node_add( + h_client, h_url, zone, cluster_id, + storage_hostname, storage_ip, json=True) + self.assertTrue( + h_node_info, + "Failed to add the node in the cluster {}".format(cluster_id)) + h_node_id = h_node_info["id"] + + # Adding devices to the new node + for device in devices_list: + storage_device = device[1] + + # Add device to the new heketi node + heketi_ops.heketi_device_add( + h_client, h_url, storage_device, h_node_id) + heketi_node_info = heketi_ops.heketi_node_info( + h_client, h_url, h_node_id, json=True) + device_id = None + for device in heketi_node_info["devices"]: + if device["name"] == storage_device: + device_id = device["id"] + break + + self.assertTrue( + device_id, "Failed to add device {} on node {}".format( + storage_device, h_node_id)) + + # Create n pvc in order to verfiy if the bricks reside on the new node + pvc_amount, pvc_size = 5, 1 + + # Fetch bricks on the devices before volume create + h_node_details_before, h_node = self._get_bricks_and_device_details() + + # Bricks count on the node before pvc creation + brick_count_before = [count[1] for count in h_node_details_before] + + # Create file volumes + pvc_name = self.create_and_wait_for_pvcs( + pvc_size=pvc_size, pvc_amount=pvc_amount) + self.assertEqual( + len(pvc_name), pvc_amount, + "Failed to create {} pvc".format(pvc_amount)) + + # Fetch bricks on the devices before volume create + h_node_details_after, h_node = self._get_bricks_and_device_details() + + # Bricks count on the node after pvc creation + brick_count_after = [count[1] for count in h_node_details_after] + + self.assertGreater( + sum(brick_count_after), sum(brick_count_before), + "Failed to add bricks on the new node {}".format(new_node)) + + # Check if IO's are running after new node is added + use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) + self.assertNotEqual( + use_percent, use_percent_after, + "Failed to execute IO's in the app pod {} after respin".format( + pod_name)) diff --git a/tests/functional/provisioning/test_dynamic_provisioning_file.py b/tests/functional/provisioning/test_dynamic_provisioning_file.py index cdffdbf6..87ff754a 100644 --- a/tests/functional/provisioning/test_dynamic_provisioning_file.py +++ b/tests/functional/provisioning/test_dynamic_provisioning_file.py @@ -4,6 +4,7 @@ from glusto.core import Glusto as g import pytest from openshiftstoragelibs.baseclass import BaseClass +from openshiftstoragelibs import command from openshiftstoragelibs.exceptions import ExecutionError from openshiftstoragelibs.heketi_ops import ( heketi_node_info, @@ -13,7 +14,12 @@ from openshiftstoragelibs.heketi_ops import ( heketi_volume_list, verify_volume_name_prefix, ) -from openshiftstoragelibs.node_ops import node_reboot_by_command +from openshiftstoragelibs.node_ops import ( + find_vm_name_by_ip_or_hostname, + node_reboot_by_command, + power_off_vm_by_name, + power_on_vm_by_name +) from openshiftstoragelibs.openshift_ops import ( cmd_run_on_gluster_pod_or_node, get_gluster_host_ips_by_pvc_name, @@ -545,3 +551,37 @@ class TestDynamicProvisioningP0(BaseClass): "-o=custom-columns=:.spec.storageClassName" % pvc_name) out = self.cmd_run(get_sc_of_pvc_cmd) self.assertEqual(out, self.sc_name) + + @pytest.mark.tier2 + def test_node_failure_pv_mounted(self): + """Test node failure when PV is mounted with app pods running""" + filepath = "/mnt/file_for_testing_volume.log" + pvc_name = self.create_and_wait_for_pvc() + + dc_and_pod_names = self.create_dcs_with_pvc(pvc_name) + dc_name, pod_name = dc_and_pod_names[pvc_name] + + mount_point = "df -kh /mnt -P | tail -1 | awk '{{print $1}}'" + pod_cmd = "oc exec {} -- {}".format(pod_name, mount_point) + hostname = command.cmd_run(pod_cmd, hostname=self.node) + hostname = hostname.split(":")[0] + + vm_name = find_vm_name_by_ip_or_hostname(hostname) + self.addCleanup(power_on_vm_by_name, vm_name) + power_off_vm_by_name(vm_name) + + cmd = "dd if=/dev/urandom of={} bs=1K count=100".format(filepath) + ret, _, err = oc_rsh(self.node, pod_name, cmd) + self.assertFalse( + ret, "Failed to execute command {} on {} with error {}" + .format(cmd, self.node, err)) + + oc_delete(self.node, 'pod', pod_name) + wait_for_resource_absence(self.node, 'pod', pod_name) + pod_name = get_pod_name_from_dc(self.node, dc_name) + wait_for_pod_be_ready(self.node, pod_name) + + ret, _, err = oc_rsh(self.node, pod_name, cmd) + self.assertFalse( + ret, "Failed to execute command {} on {} with error {}" + .format(cmd, self.node, err)) |