summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--openshift-storage-libs/openshiftstoragelibs/gluster_ops.py91
-rw-r--r--tests/functional/heketi/test_heketi_brick_evict.py90
-rwxr-xr-xtests/functional/heketi/test_heketi_device_operations.py96
-rw-r--r--tests/functional/heketi/test_volume_expansion_and_devices.py42
-rw-r--r--tests/functional/logging/test_logging_validations.py42
-rw-r--r--tests/functional/metrics/test_metrics_validation.py39
-rw-r--r--tests/functional/prometheous/test_prometheus_validations.py499
-rw-r--r--tests/functional/prometheous/test_prometheus_validations_file.py94
-rw-r--r--tests/functional/provisioning/test_dev_path_mapping_file.py418
-rw-r--r--tests/functional/provisioning/test_dynamic_provisioning_file.py42
10 files changed, 1448 insertions, 5 deletions
diff --git a/openshift-storage-libs/openshiftstoragelibs/gluster_ops.py b/openshift-storage-libs/openshiftstoragelibs/gluster_ops.py
index f621a860..7f5f5535 100644
--- a/openshift-storage-libs/openshiftstoragelibs/gluster_ops.py
+++ b/openshift-storage-libs/openshiftstoragelibs/gluster_ops.py
@@ -4,7 +4,12 @@ try:
except ImportError:
# py2
import json
+try:
+ import xml.etree.cElementTree as etree
+except ImportError:
+ import xml.etree.ElementTree as etree
import re
+import six
import time
from glusto.core import Glusto as g
@@ -20,7 +25,10 @@ from glustolibs.gluster.volume_ops import (
from openshiftstoragelibs import exceptions
from openshiftstoragelibs.heketi_ops import heketi_blockvolume_info
-from openshiftstoragelibs.openshift_ops import cmd_run_on_gluster_pod_or_node
+from openshiftstoragelibs.openshift_ops import (
+ cmd_run_on_gluster_pod_or_node,
+ get_ocp_gluster_pod_details,
+)
from openshiftstoragelibs import podcmd
from openshiftstoragelibs import waiter
@@ -352,3 +360,84 @@ def get_gluster_vol_free_inodes_with_hosts_of_bricks(vol_name):
inodes_info = {brick_process: process_data["inodesFree"]}
hosts_with_inodes_info[g_node].update(inodes_info)
return hosts_with_inodes_info
+
+
+def _get_gluster_cmd(target, command):
+
+ if isinstance(command, six.string_types):
+ command = [command]
+ ocp_client_node = list(g.config['ocp_servers']['client'].keys())[0]
+ gluster_pods = get_ocp_gluster_pod_details(ocp_client_node)
+
+ if target == 'auto_get_gluster_endpoint':
+ if gluster_pods:
+ target = podcmd.Pod(ocp_client_node, gluster_pods[0]["pod_name"])
+ else:
+ target = list(g.config.get("gluster_servers", {}).keys())[0]
+ elif not isinstance(target, podcmd.Pod) and gluster_pods:
+ for g_pod in gluster_pods:
+ if target in (g_pod['pod_host_ip'], g_pod['pod_hostname']):
+ target = podcmd.Pod(ocp_client_node, g_pod['pod_name'])
+ break
+
+ if isinstance(target, podcmd.Pod):
+ return target.node, ' '.join(['oc', 'rsh', target.podname] + command)
+
+ return target, ' '.join(command)
+
+
+def get_peer_status(mnode):
+ """Parse the output of command 'gluster peer status' using run_async.
+
+ Args:
+ mnode (str): Node on which command has to be executed.
+
+ Returns:
+ NoneType: None if command execution fails or parse errors.
+ list: list of dicts on success.
+
+ Examples:
+ >>> get_peer_status(mnode = 'abc.lab.eng.xyz.com')
+ [{'uuid': '77dc299a-32f7-43d8-9977-7345a344c398',
+ 'hostname': 'ijk.lab.eng.xyz.com',
+ 'state': '3',
+ 'hostnames' : ['ijk.lab.eng.xyz.com'],
+ 'connected': '1',
+ 'stateStr': 'Peer in Cluster'},
+
+ {'uuid': 'b15b8337-9f8e-4ec3-8bdb-200d6a67ae12',
+ 'hostname': 'def.lab.eng.xyz.com',
+ 'state': '3',
+ 'hostnames': ['def.lab.eng.xyz.com'],
+ 'connected': '1',
+ 'stateStr': 'Peer in Cluster'}
+ ]
+ """
+ mnode, cmd = _get_gluster_cmd(mnode, "gluster peer status --xml")
+ obj = g.run_async(mnode, cmd, log_level='DEBUG')
+ ret, out, err = obj.async_communicate()
+
+ if ret:
+ g.log.error(
+ "Failed to execute peer status command on node {} with error "
+ "{}".format(mnode, err))
+ return None
+
+ try:
+ root = etree.XML(out)
+ except etree.ParseError:
+ g.log.error("Failed to parse the gluster peer status xml output.")
+ return None
+
+ peer_status_list = []
+ for peer in root.findall("peerStatus/peer"):
+ peer_dict = {}
+ for element in peer.getchildren():
+ if element.tag == "hostnames":
+ hostnames_list = []
+ for hostname in element.getchildren():
+ hostnames_list.append(hostname.text)
+ element.text = hostnames_list
+ peer_dict[element.tag] = element.text
+ peer_status_list.append(peer_dict)
+ return peer_status_list
diff --git a/tests/functional/heketi/test_heketi_brick_evict.py b/tests/functional/heketi/test_heketi_brick_evict.py
index 27cc1ebf..1cba24c4 100644
--- a/tests/functional/heketi/test_heketi_brick_evict.py
+++ b/tests/functional/heketi/test_heketi_brick_evict.py
@@ -1,11 +1,16 @@
import pytest
from glustolibs.gluster import volume_ops
+import six
from openshiftstoragelibs.baseclass import BaseClass
+from openshiftstoragelibs import exceptions
from openshiftstoragelibs import heketi_ops
from openshiftstoragelibs import heketi_version
+from openshiftstoragelibs import node_ops
+from openshiftstoragelibs import openshift_ops
from openshiftstoragelibs import podcmd
+from openshiftstoragelibs import waiter
class TestHeketiBrickEvict(BaseClass):
@@ -20,6 +25,8 @@ class TestHeketiBrickEvict(BaseClass):
"heketi-client package {} does not support brick evict".format(
version.v_str))
+ self.ocp_client = self.ocp_master_node[0]
+
node_list = heketi_ops.heketi_node_list(
self.heketi_client_node, self.heketi_server_url)
@@ -88,3 +95,86 @@ class TestHeketiBrickEvict(BaseClass):
bricks_new, gbricks, "gluster vol info and heketi vol info "
"mismatched after brick evict {} \n {}".format(
gvol_info, vol_info_new))
+
+ def _wait_for_gluster_pod_after_node_reboot(self, node_hostname):
+ """Wait for glusterfs pod to be ready after node reboot"""
+ openshift_ops.wait_for_ocp_node_be_ready(
+ self.ocp_client, node_hostname)
+ gluster_pod = openshift_ops.get_gluster_pod_name_for_specific_node(
+ self.ocp_client, node_hostname)
+ openshift_ops.wait_for_pod_be_ready(self.ocp_client, gluster_pod)
+ services = (
+ ("glusterd", "running"), ("gluster-blockd", "running"),
+ ("tcmu-runner", "running"), ("gluster-block-target", "exited"))
+ for service, state in services:
+ openshift_ops.check_service_status_on_pod(
+ self.ocp_client, gluster_pod, service, "active", state)
+
+ @pytest.mark.tier4
+ def test_brick_evict_with_node_down(self):
+ """Test brick evict basic functionality and verify brick evict
+ after node down"""
+
+ h_node, h_server = self.heketi_client_node, self.heketi_server_url
+
+ # Disable node if more than 3
+ node_list = heketi_ops.heketi_node_list(h_node, h_server)
+ if len(node_list) > 3:
+ for node_id in node_list[3:]:
+ heketi_ops.heketi_node_disable(h_node, h_server, node_id)
+ self.addCleanup(
+ heketi_ops.heketi_node_enable, h_node, h_server, node_id)
+
+ # Create heketi volume
+ vol_info = heketi_ops.heketi_volume_create(
+ h_node, h_server, 1, json=True)
+ self.addCleanup(
+ heketi_ops.heketi_volume_delete,
+ h_node, h_server, vol_info.get('id'))
+
+ # Get node on which heketi pod is scheduled
+ heketi_pod = openshift_ops.get_pod_name_from_dc(
+ self.ocp_client, self.heketi_dc_name)
+ heketi_node = openshift_ops.oc_get_custom_resource(
+ self.ocp_client, 'pod', '.:spec.nodeName', heketi_pod)[0]
+
+ # Get list of hostname from node id
+ host_list = []
+ for node_id in node_list[3:]:
+ node_info = heketi_ops.heketi_node_info(
+ h_node, h_server, node_id, json=True)
+ host_list.append(node_info.get('hostnames').get('manage')[0])
+
+ # Get brick id and glusterfs node which is not heketi node
+ for node in vol_info.get('bricks', {}):
+ node_info = heketi_ops.heketi_node_info(
+ h_node, h_server, node.get('node'), json=True)
+ hostname = node_info.get('hostnames').get('manage')[0]
+ if (hostname != heketi_node) and (hostname not in host_list):
+ brick_id = node.get('id')
+ break
+
+ # Bring down the glusterfs node
+ vm_name = node_ops.find_vm_name_by_ip_or_hostname(hostname)
+ self.addCleanup(
+ self._wait_for_gluster_pod_after_node_reboot, hostname)
+ self.addCleanup(node_ops.power_on_vm_by_name, vm_name)
+ node_ops.power_off_vm_by_name(vm_name)
+
+ # Wait glusterfs node to become NotReady
+ custom = r'":.status.conditions[?(@.type==\"Ready\")]".status'
+ for w in waiter.Waiter(300, 20):
+ status = openshift_ops.oc_get_custom_resource(
+ self.ocp_client, 'node', custom, hostname)
+ if status[0] in ['False', 'Unknown']:
+ break
+ if w.expired:
+ raise exceptions.ExecutionError(
+ "Failed to bring down node {}".format(hostname))
+
+ # Perform brick evict operation
+ try:
+ heketi_ops.heketi_brick_evict(h_node, h_server, brick_id)
+ except AssertionError as e:
+ if ('No Replacement was found' not in six.text_type(e)):
+ raise
diff --git a/tests/functional/heketi/test_heketi_device_operations.py b/tests/functional/heketi/test_heketi_device_operations.py
index a6831e98..05f16ef9 100755
--- a/tests/functional/heketi/test_heketi_device_operations.py
+++ b/tests/functional/heketi/test_heketi_device_operations.py
@@ -1,6 +1,7 @@
import ddt
from glusto.core import Glusto as g
import pytest
+import six
from openshiftstoragelibs.baseclass import BaseClass
from openshiftstoragelibs.heketi_ops import (
@@ -17,6 +18,8 @@ from openshiftstoragelibs.heketi_ops import (
heketi_topology_info,
heketi_volume_create,
heketi_volume_delete,
+ rm_tags,
+ set_tags,
validate_dev_path_vg_and_uuid,
)
from openshiftstoragelibs import utils
@@ -600,3 +603,96 @@ class TestHeketiDeviceOperations(BaseClass):
h_node, h_url, node, dev)
self.assertTrue(is_true, "Failed to verify dv_path for the "
"device {}".format(dev))
+
+ @pytest.mark.tier3
+ def test_volume_create_as_tag_maching_rule(self):
+ """Validate settags operation only on one device in the cluster"""
+
+ h_node, h_server = self.heketi_client_node, self.heketi_server_url
+
+ # Set tag on any one device in cluster
+ node_list = heketi_node_list(h_node, h_server, json=True)
+ node_info = heketi_node_info(h_node, h_server, node_list[0], json=True)
+ device_id = node_info.get('devices', {})[0].get('id')
+ set_tags(h_node, h_server, 'device', device_id, "tier:it")
+ self.addCleanup(rm_tags, h_node, h_server, 'device', device_id, 'tier')
+
+ # Volume creation should fail
+ try:
+ heketi_volume_create(
+ h_node, h_server, 2,
+ gluster_volume_options="user.heketi.device-tag-match tier=it")
+ except AssertionError as e:
+ if ("Failed to allocate new volume" not in six.text_type(e)):
+ raise
+
+ @pytest.mark.tier4
+ def test_device_settags_tier_option(self):
+ """Validate volume creation with a tag-matching rule"""
+
+ h_node, h_server = self.heketi_client_node, self.heketi_server_url
+ initial_brick_count, before_brick_count, after_brick_count = [], [], []
+
+ # Set tag on device on 3 different nodes
+ node_list = heketi_node_list(h_node, h_server, json=True)
+ device_list = []
+ for node_id in node_list[:3]:
+ node_info = heketi_node_info(h_node, h_server, node_id, json=True)
+ device_id = node_info.get('devices', {})[0].get('id')
+ device_list.append(device_id)
+ set_tags(h_node, h_server, 'device', device_id, "tier:test")
+ self.addCleanup(
+ rm_tags, h_node, h_server, 'device', device_id, "tier",
+ raise_on_error=False)
+
+ # Get initial number of bricks present on device
+ for device_id in device_list:
+ device_info = heketi_device_info(
+ h_node, h_server, device_id, json=True)
+ initial_brick_count.append(len(device_info.get("bricks")))
+
+ # Create volume with device tag option
+ volume_info = heketi_volume_create(
+ h_node, h_server, 2,
+ gluster_volume_options="user.heketi.device-tag-match tier=test",
+ json=True)
+ self.addCleanup(
+ heketi_volume_delete, h_node, h_server, volume_info.get("id"))
+
+ # Get number of bricks present on device after volume create
+ for device_id in device_list:
+ device_info = heketi_device_info(
+ h_node, h_server, device_id, json=True)
+ before_brick_count.append(len(device_info.get("bricks")))
+
+ # Validate volume has created on tag devices
+ self.assertGreater(
+ before_brick_count, initial_brick_count,
+ "Volume {} has not created on tag devices".format(
+ volume_info.get("id")))
+
+ # Create volume with not equal to tag option
+ volume_info = heketi_volume_create(
+ h_node, h_server, 2,
+ gluster_volume_options="user.heketi.device-tag-match tier!=test",
+ json=True)
+ self.addCleanup(
+ heketi_volume_delete, h_node, h_server, volume_info.get("id"))
+
+ # Get number of bricks present on device after volume create
+ for device_id in device_list:
+ device_info = heketi_device_info(
+ h_node, h_server, device_id, json=True)
+ after_brick_count.append(len(device_info.get("bricks")))
+
+ # Validate volume has not created on tag devices
+ self.assertEqual(
+ before_brick_count, after_brick_count,
+ "Volume {} has created on tag devices".format(
+ volume_info.get("id")))
+
+ # Update the tag on device
+ for device_id in device_list:
+ set_tags(h_node, h_server, 'device', device_id, "tier:test_update")
+ self.addCleanup(
+ rm_tags, h_node, h_server, 'device', device_id, "tier")
diff --git a/tests/functional/heketi/test_volume_expansion_and_devices.py b/tests/functional/heketi/test_volume_expansion_and_devices.py
index df064e76..fa78b1aa 100644
--- a/tests/functional/heketi/test_volume_expansion_and_devices.py
+++ b/tests/functional/heketi/test_volume_expansion_and_devices.py
@@ -10,6 +10,7 @@ from openshiftstoragelibs import (
heketi_ops,
podcmd,
)
+from openshiftstoragelibs import utils
class TestVolumeExpansionAndDevicesTestCases(BaseClass):
@@ -521,3 +522,44 @@ class TestVolumeExpansionAndDevicesTestCases(BaseClass):
free_space_after_deletion > free_space_after_expansion,
"Free space is not reclaimed after volume deletion of %s"
% volume_id)
+
+ @pytest.mark.tier2
+ @podcmd.GlustoPod()
+ def test_replica_volume_expand(self):
+ """
+ Test expansion of a replica volume
+ """
+ h_node, h_server = self.heketi_client_node, self.heketi_server_url
+ volume_name = (
+ "autotests-heketi-volume-{}".format(utils.get_random_str()))
+ volume_size = 10
+ creation_info = self.create_heketi_volume_with_name_and_wait(
+ volume_name, volume_size, json=True, raise_on_cleanup_error=False)
+ volume_id = creation_info["id"]
+ volume_info = heketi_ops.heketi_volume_info(
+ h_node, h_server, volume_id, json=True)
+
+ # Get gluster volume info
+ gluster_vol = volume_ops.get_volume_info(
+ 'auto_get_gluster_endpoint', volname=volume_name)
+ self.assertTrue(
+ gluster_vol, "Failed to get volume {} info".format(volume_name))
+ vol_name = gluster_vol[volume_name]
+ self.assertEqual(
+ vol_name['replicaCount'], "3",
+ "Replica count is different for volume {} Actual:{} "
+ "Expected : 3".format(vol_name, vol_name['replicaCount']))
+
+ expand_size = 5
+ heketi_ops.heketi_volume_expand(
+ h_node, h_server, volume_id, expand_size)
+ volume_info = heketi_ops.heketi_volume_info(
+ h_node, h_server, volume_id, json=True)
+ expected_size = volume_size + expand_size
+ self.assertEqual(
+ volume_info['size'], expected_size,
+ "Volume Expansion failed, Expected Size: {}, Actual "
+ "Size: {}".format(str(expected_size), str(volume_info['size'])))
+
+ self.get_brick_and_volume_status(volume_name)
+ self.get_rebalance_status(volume_name)
diff --git a/tests/functional/logging/test_logging_validations.py b/tests/functional/logging/test_logging_validations.py
index a160fd7a..509c71d8 100644
--- a/tests/functional/logging/test_logging_validations.py
+++ b/tests/functional/logging/test_logging_validations.py
@@ -9,6 +9,7 @@ from openshiftstoragelibs import command
from openshiftstoragelibs import exceptions
from openshiftstoragelibs import gluster_ops
from openshiftstoragelibs import openshift_ops
+from openshiftstoragelibs import waiter
@ddt.ddt
@@ -352,3 +353,44 @@ class TestLoggingAndGlusterRegistryValidation(GlusterBlockBaseClass):
pvc_name, self._logging_es_dc,
heketi_server_url=self._registry_heketi_server_url,
is_registry_gluster=True)
+
+ @pytest.mark.tier3
+ def test_run_workload_with_logging(self):
+ """Validate logs are being generated aifter running workload"""
+
+ # Get the size of used space of logs
+ es_pod = openshift_ops.get_pod_name_from_dc(
+ self._master, self._logging_es_dc)
+ mount_point = "/elasticsearch/persistent"
+ cmd_space_check = ('df -kh --output=used {} | sed "/Used/d" |'
+ 'sed "s/G//"'.format(mount_point))
+ ret, initial_used_percent, err = openshift_ops.oc_rsh(
+ self._master, es_pod, cmd_space_check)
+ err_msg = "Failed to fetch the size of used space, error {}"
+ self.assertFalse(ret, err_msg.format(err))
+
+ # Create 20 pvcs and app pods with io
+ openshift_ops.switch_oc_project(
+ self._master, self.storage_project_name)
+ pvc_count, batch_count = 5, 4
+ for _ in range(batch_count):
+ pvcs = self.create_and_wait_for_pvcs(pvc_amount=pvc_count)
+ self.create_dcs_with_pvc(pvcs)
+ self.addCleanup(
+ openshift_ops.switch_oc_project,
+ self._master, self.storage_project_name)
+
+ # Get and verify the final used size of used space of logs
+ openshift_ops.switch_oc_project(
+ self._master, self._logging_project_name)
+ for w in waiter.Waiter(600, 30):
+ ret, final_used_percent, err = openshift_ops.oc_rsh(
+ self._master, es_pod, cmd_space_check)
+ self.assertFalse(ret, err_msg.format(err))
+ if int(initial_used_percent) < int(final_used_percent):
+ break
+ if w.expired:
+ raise AssertionError(
+ "Initial used space {} for logs is not less than final "
+ "used space {}".format(
+ initial_used_percent, final_used_percent))
diff --git a/tests/functional/metrics/test_metrics_validation.py b/tests/functional/metrics/test_metrics_validation.py
index ce7e843f..e16fe349 100644
--- a/tests/functional/metrics/test_metrics_validation.py
+++ b/tests/functional/metrics/test_metrics_validation.py
@@ -27,6 +27,7 @@ from openshiftstoragelibs.openshift_storage_libs import (
get_iscsi_block_devices_by_path,
get_mpath_name_from_device_name,
)
+from openshiftstoragelibs import waiter
@ddt.ddt
@@ -274,3 +275,41 @@ class TestMetricsAndGlusterRegistryValidation(GlusterBlockBaseClass):
restart_gluster_vol_brick_processes(
self.master, bhv_name, list(self.registry_servers_info.keys()))
self.addCleanup(self.cassandra_pod_delete_cleanup, raise_on_error=True)
+
+ @pytest.mark.tier3
+ def test_run_workload_with_metrics(self):
+ """Validate if logs are being generated after running workload"""
+
+ # Get the size of used space of logs
+ cassandra_pod = get_pod_name_from_rc(
+ self.master, self.metrics_rc_hawkular_cassandra)
+ mount_point = "/cassandra_data"
+ cmd_space_check = ('df -k --output=used {} | sed "/Used/d" |'
+ 'sed "s/G//"'.format(mount_point))
+ ret, initial_used_percent, err = oc_rsh(
+ self.master, cassandra_pod, cmd_space_check)
+ err_msg = "Failed to fetch the size of used space, error {}"
+ self.assertFalse(ret, err_msg.format(err))
+
+ # Create 20 PVCs and app pods with IO
+ switch_oc_project(self.master, self.storage_project_name)
+ pvc_count, batch_count = 5, 4
+ for _ in range(batch_count):
+ pvcs = self.create_and_wait_for_pvcs(pvc_amount=pvc_count)
+ self.create_dcs_with_pvc(pvcs)
+ self.addCleanup(
+ switch_oc_project, self.master, self.storage_project_name)
+
+ # Get and verify the final size of used space of logs
+ switch_oc_project(self.master, self.metrics_project_name)
+ for w in waiter.Waiter(600, 30):
+ ret, final_used_percent, err = oc_rsh(
+ self.master, cassandra_pod, cmd_space_check)
+ self.assertFalse(ret, err_msg.format(err))
+ if int(initial_used_percent) < int(final_used_percent):
+ break
+ if w.expired:
+ raise AssertionError(
+ "Initial used space {} for logs is not less than final "
+ "used space {}".format(
+ initial_used_percent, final_used_percent))
diff --git a/tests/functional/prometheous/test_prometheus_validations.py b/tests/functional/prometheous/test_prometheus_validations.py
index 6617b647..68b69212 100644
--- a/tests/functional/prometheous/test_prometheus_validations.py
+++ b/tests/functional/prometheous/test_prometheus_validations.py
@@ -5,22 +5,41 @@ except ImportError:
# py2
import json
from pkg_resources import parse_version
+from functools import reduce
import ddt
from glusto.core import Glusto as g
+from glustolibs.gluster import brick_libs
+from glustolibs.gluster import volume_ops
import pytest
from openshiftstoragelibs.baseclass import GlusterBlockBaseClass
from openshiftstoragelibs import command
from openshiftstoragelibs import exceptions
from openshiftstoragelibs import heketi_ops
+from openshiftstoragelibs import gluster_ops
+from openshiftstoragelibs import node_ops
from openshiftstoragelibs import openshift_ops
+from openshiftstoragelibs import openshift_storage_libs
+from openshiftstoragelibs import podcmd
from openshiftstoragelibs import waiter
@ddt.ddt
class TestPrometheusAndGlusterRegistryValidation(GlusterBlockBaseClass):
+ @classmethod
+ def setUpClass(cls):
+ super(TestPrometheusAndGlusterRegistryValidation, cls).setUpClass()
+
+ cls.metrics = ('heketi_volumes_count',
+ 'heketi_block_volumes_count',
+ 'heketi_device_brick_count',
+ 'heketi_device_free_bytes',
+ 'heketi_nodes_count',
+ 'heketi_device_used_bytes',
+ 'heketi_device_size_bytes')
+
def setUp(self):
"""Initialize all the variables which are necessary for test cases"""
super(TestPrometheusAndGlusterRegistryValidation, self).setUp()
@@ -38,6 +57,8 @@ class TestPrometheusAndGlusterRegistryValidation(GlusterBlockBaseClass):
'heketi_server_url'])
self._registry_project_name = (
g.config['openshift']['registry_project_name'])
+ self._registry_servers_info = (
+ g.config['gluster_registry_servers'])
except KeyError as err:
self.skipTest("Config file doesn't have key {}".format(err))
@@ -97,6 +118,68 @@ class TestPrometheusAndGlusterRegistryValidation(GlusterBlockBaseClass):
return pod_names, pvc_names
+ @podcmd.GlustoPod()
+ def _guster_volume_cleanup(self, vol_name):
+ # Check brick status. Restart vol if bricks are offline
+ openshift_ops.switch_oc_project(
+ self._master, self._registry_project_name)
+ brick_list = brick_libs.get_all_bricks(
+ "auto_get_gluster_endpoint", vol_name)
+ self.assertIsNotNone(brick_list, "Failed to get brick list")
+ check_bricks = brick_libs.are_bricks_online(
+ "auto_get_gluster_endpoint", vol_name, brick_list)
+ if not check_bricks:
+ start_vol, _, _ = volume_ops.volume_start(
+ "auto_get_gluster_endpoint", vol_name, force=True)
+ self.assertFalse(
+ start_vol, "Failed to start volume using force")
+
+ def _get_newly_deployed_gluster_pod(self, g_pod_list_before):
+
+ # Fetch pod after delete
+ g_pod_list_after = [
+ pod["pod_name"]
+ for pod in openshift_ops.get_ocp_gluster_pod_details(self._master)]
+
+ # Fetch the new gluster pod
+ g_new_pod = list(set(g_pod_list_after) - set(g_pod_list_before))
+ self.assertTrue(g_new_pod, "No new gluster pod deployed after delete")
+ return g_new_pod
+
+ def _guster_pod_delete(self, g_pod_list_before):
+ """Delete the gluster pod using force delete"""
+ openshift_ops.switch_oc_project(
+ self._master, self._registry_project_name)
+
+ # Fetch newly deployed gluster pod after delete
+ try:
+ pod_name = self._get_newly_deployed_gluster_pod(g_pod_list_before)
+ openshift_ops.wait_for_pod_be_ready(
+ self._master,
+ pod_name[0] if pod_name else g_pod_list_before[0],
+ timeout=120, wait_step=6)
+ except exceptions.ExecutionError:
+ openshift_ops.oc_delete(
+ self._master, 'pod', g_pod_list_before[0], is_force=True)
+ openshift_ops.wait_for_resource_absence(
+ self._master, 'pod', g_pod_list_before[0])
+ g_new_pod = self._get_newly_deployed_gluster_pod(g_pod_list_before)
+ openshift_ops.wait_for_pod_be_ready(self._master, g_new_pod[0])
+
+ def _wait_for_gluster_pod_be_ready(self, g_pod_list_before):
+ """Wait for the gluster pods to be in ready state"""
+ openshift_ops.switch_oc_project(
+ self._master, self._registry_project_name)
+
+ # Check if the gluster pods are in ready state
+ try:
+ pod_count = len(self._registry_servers_info.keys())
+ openshift_ops.wait_for_pods_be_ready(
+ self._master, pod_count, "glusterfs-node=pod",
+ timeout=120, wait_step=6)
+ except exceptions.ExecutionError:
+ self._guster_pod_delete(g_pod_list_before)
+
@pytest.mark.tier2
def test_promethoues_pods_and_pvcs(self):
"""Validate prometheus pods and PVC"""
@@ -475,3 +558,419 @@ class TestPrometheusAndGlusterRegistryValidation(GlusterBlockBaseClass):
if w.expired:
raise exceptions.ExecutionError(
"Failed to update device details in prometheus")
+
+ @pytest.mark.tier2
+ @podcmd.GlustoPod()
+ def test_prometheous_kill_bhv_brick_process(self):
+ """Validate kill brick process of block hosting
+ volume with prometheus workload running"""
+
+ # Add check for CRS version
+ openshift_ops.switch_oc_project(
+ self._master, self._registry_project_name)
+ if not self.is_containerized_gluster():
+ self.skipTest("Skipping this test case as CRS"
+ " version check can not be implemented")
+
+ # Get one of the prometheus pod name and respective pvc name
+ openshift_ops.switch_oc_project(
+ self._master, self._prometheus_project_name)
+ prometheus_pods = openshift_ops.oc_get_pods(
+ self._master, selector=self._prometheus_resources_selector)
+ if not prometheus_pods:
+ self.skipTest(
+ prometheus_pods, "Skipping test as prometheus"
+ " pod is not present")
+
+ # Validate iscsi and multipath
+ prometheus_pod = list(prometheus_pods.keys())[0]
+ pvc_name = openshift_ops.oc_get_custom_resource(
+ self._master, "pod",
+ ":.spec.volumes[*].persistentVolumeClaim.claimName",
+ prometheus_pod)
+ self.assertTrue(pvc_name, "Failed to get PVC name")
+ pvc_name = pvc_name[0]
+ self.verify_iscsi_sessions_and_multipath(
+ pvc_name, prometheus_pod, rtype='pod',
+ heketi_server_url=self._registry_heketi_server_url,
+ is_registry_gluster=True)
+
+ # Try to fetch metric from prometheus pod
+ self._fetch_metric_from_promtheus_pod(
+ metric='heketi_device_brick_count')
+
+ # Kill the brick process of a BHV
+ gluster_node = list(self._registry_servers_info.keys())[0]
+ openshift_ops.switch_oc_project(
+ self._master, self._registry_project_name)
+ bhv_name = self.get_block_hosting_volume_by_pvc_name(
+ pvc_name, heketi_server_url=self._registry_heketi_server_url,
+ gluster_node=gluster_node, ocp_client_node=self._master)
+ vol_status = gluster_ops.get_gluster_vol_status(bhv_name)
+ gluster_node_ip, brick_pid = None, None
+ for g_node, g_node_data in vol_status.items():
+ for process_name, process_data in g_node_data.items():
+ if process_name.startswith("/var"):
+ gluster_node_ip = g_node
+ brick_pid = process_data["pid"]
+ break
+ if gluster_node_ip and brick_pid:
+ break
+ self.assertIsNotNone(brick_pid, "Could not find pid for brick")
+ cmd = "kill -9 {}".format(brick_pid)
+ openshift_ops.cmd_run_on_gluster_pod_or_node(
+ self._master, cmd, gluster_node_ip)
+ self.addCleanup(self._guster_volume_cleanup, bhv_name)
+
+ # Check if the brick-process has been killed
+ killed_pid_cmd = (
+ "ps -p {} -o pid --no-headers".format(brick_pid))
+ try:
+ openshift_ops.cmd_run_on_gluster_pod_or_node(
+ self._master, killed_pid_cmd, gluster_node_ip)
+ except exceptions.ExecutionError:
+ g.log.info("Brick process {} was killed"
+ "successfully".format(brick_pid))
+
+ # Try to fetch metric from prometheus pod
+ openshift_ops.switch_oc_project(
+ self._master, self._prometheus_project_name)
+ self._fetch_metric_from_promtheus_pod(
+ metric='heketi_device_brick_count')
+
+ # Start the bhv using force
+ openshift_ops.switch_oc_project(
+ self._master, self._registry_project_name)
+ start_vol, _, _ = volume_ops.volume_start(
+ gluster_node_ip, bhv_name, force=True)
+ self.assertFalse(
+ start_vol, "Failed to start volume {}"
+ " using force".format(bhv_name))
+
+ # Validate iscsi and multipath
+ openshift_ops.switch_oc_project(
+ self._master, self._prometheus_project_name)
+ self.verify_iscsi_sessions_and_multipath(
+ pvc_name, prometheus_pod, rtype='pod',
+ heketi_server_url=self._registry_heketi_server_url,
+ is_registry_gluster=True)
+
+ # Try to fetch metric from prometheus pod
+ self._fetch_metric_from_promtheus_pod(
+ metric='heketi_device_brick_count')
+
+ def _check_heketi_and_gluster_pod_after_node_reboot(self, heketi_node):
+ openshift_ops.switch_oc_project(
+ self._master, self.storage_project_name)
+ heketi_pod = openshift_ops.get_pod_names_from_dc(
+ self._master, self.heketi_dc_name)[0]
+
+ # Wait for heketi pod to become ready and running
+ openshift_ops.wait_for_pod_be_ready(self._master, heketi_pod)
+ heketi_ops.hello_heketi(self._master, self.heketi_server_url)
+
+ # Wait for glusterfs pods to become ready if hosted on same node
+ heketi_node_ip = openshift_ops.oc_get_custom_resource(
+ self._master, 'pod', '.:status.hostIP', heketi_pod)[0]
+ if heketi_node_ip in self.gluster_servers:
+ gluster_pod = openshift_ops.get_gluster_pod_name_for_specific_node(
+ self._master, heketi_node)
+
+ # Wait for glusterfs pod to become ready
+ openshift_ops.wait_for_pod_be_ready(self._master, gluster_pod)
+ services = (
+ ("glusterd", "running"), ("gluster-blockd", "running"),
+ ("tcmu-runner", "running"), ("gluster-block-target", "exited"))
+ for service, state in services:
+ openshift_ops.check_service_status_on_pod(
+ self._master, gluster_pod, service, "active", state)
+
+ @pytest.mark.tier4
+ def test_heketi_metrics_validation_with_node_reboot(self):
+ """Validate heketi metrics after node reboot using prometheus"""
+
+ initial_metrics, final_metrics = {}, {}
+
+ # Use storage project
+ openshift_ops.switch_oc_project(
+ self._master, self.storage_project_name)
+
+ # Get initial metrics result
+ h_node, h_server = self.heketi_client_node, self.heketi_server_url
+ initial_metrics = tuple(
+ heketi_ops.get_heketi_metrics(h_node, h_server).get(metric)[0]
+ for metric in self.metrics)
+
+ # Use prometheus project
+ openshift_ops.switch_oc_project(
+ self._master, self._prometheus_project_name)
+
+ # Get initial prometheus result
+ initial_prometheus = self._get_and_manipulate_metric_data(
+ self.metrics)
+
+ # Get hosted node IP of heketi pod
+ openshift_ops.switch_oc_project(
+ self._master, self.storage_project_name)
+ heketi_pod = openshift_ops.get_pod_name_from_dc(
+ self._master, self.heketi_dc_name)
+ heketi_node = openshift_ops.oc_get_custom_resource(
+ self._master, 'pod', '.:spec.nodeName', heketi_pod)[0]
+
+ # Reboot the node on which heketi pod is scheduled
+ self.addCleanup(
+ self._check_heketi_and_gluster_pod_after_node_reboot, heketi_node)
+ node_ops.node_reboot_by_command(heketi_node)
+
+ # Wait node to become NotReady
+ custom = r'":.status.conditions[?(@.type==\"Ready\")]".status'
+ for w in waiter.Waiter(300, 10):
+ status = openshift_ops.oc_get_custom_resource(
+ self._master, 'node', custom, heketi_node)
+ if status[0] == 'False':
+ break
+ if w.expired:
+ raise exceptions.ExecutionError(
+ "Failed to bring down node {}".format(heketi_node))
+
+ # Wait for node to become ready
+ openshift_ops.wait_for_ocp_node_be_ready(self._master, heketi_node)
+
+ # Wait for heketi and glusterfs pod to become ready
+ self._check_heketi_and_gluster_pod_after_node_reboot(heketi_node)
+
+ # Use prometheus project
+ openshift_ops.switch_oc_project(
+ self._master, self._prometheus_project_name)
+
+ # Get final metrics result
+ final_metrics = tuple(
+ heketi_ops.get_heketi_metrics(h_node, h_server).get(metric)[0]
+ for metric in self.metrics)
+
+ # Get final prometheus result
+ final_prometheus = self._get_and_manipulate_metric_data(
+ self.metrics)
+
+ err_msg = "Initial value {} is not same as final value {}"
+ self.assertEqual(
+ initial_metrics, final_metrics, err_msg.format(
+ initial_metrics, final_metrics))
+ self.assertEqual(
+ initial_prometheus, final_prometheus, err_msg.format(
+ initial_prometheus, final_prometheus))
+
+ @pytest.mark.tier4
+ @ddt.data('add', 'delete')
+ def test_heketi_metrics_validation_after_node(self, condition):
+ """Validate heketi metrics after adding and remove node"""
+
+ # Get additional node
+ additional_host_info = g.config.get("additional_gluster_servers")
+ if not additional_host_info:
+ self.skipTest(
+ "Skipping this test case as additional gluster server is "
+ "not provied in config file")
+
+ additional_host_info = list(additional_host_info.values())[0]
+ storage_hostname = additional_host_info.get("manage")
+ storage_ip = additional_host_info.get("storage")
+ if not (storage_hostname and storage_ip):
+ self.skipTest(
+ "Config options 'additional_gluster_servers.manage' "
+ "and 'additional_gluster_servers.storage' must be set.")
+
+ h_client, h_server = self.heketi_client_node, self.heketi_server_url
+ initial_node_count, final_node_count = 0, 0
+
+ # Get initial node count from prometheus metrics
+ metric_result = self._fetch_metric_from_promtheus_pod(
+ metric='heketi_nodes_count')
+ initial_node_count = reduce(
+ lambda x, y: x + y,
+ [result.get('value')[1] for result in metric_result])
+
+ # Switch to storage project
+ openshift_ops.switch_oc_project(
+ self._master, self.storage_project_name)
+
+ # Configure node before adding node
+ self.configure_node_to_run_gluster(storage_hostname)
+
+ # Get cluster list
+ cluster_info = heketi_ops.heketi_cluster_list(
+ h_client, h_server, json=True)
+
+ # Add node to the cluster
+ heketi_node_info = heketi_ops.heketi_node_add(
+ h_client, h_server,
+ len(self.gluster_servers), cluster_info.get('clusters')[0],
+ storage_hostname, storage_ip, json=True)
+ heketi_node_id = heketi_node_info.get("id")
+ self.addCleanup(
+ heketi_ops.heketi_node_delete,
+ h_client, h_server, heketi_node_id, raise_on_error=False)
+ self.addCleanup(
+ heketi_ops.heketi_node_remove,
+ h_client, h_server, heketi_node_id, raise_on_error=False)
+ self.addCleanup(
+ heketi_ops.heketi_node_disable,
+ h_client, h_server, heketi_node_id, raise_on_error=False)
+ self.addCleanup(
+ openshift_ops.switch_oc_project,
+ self._master, self.storage_project_name)
+
+ if condition == 'delete':
+ # Switch to openshift-monitoring project
+ openshift_ops.switch_oc_project(
+ self.ocp_master_node[0], self._prometheus_project_name)
+
+ # Get initial node count from prometheus metrics
+ for w in waiter.Waiter(timeout=60, interval=10):
+ metric_result = self._fetch_metric_from_promtheus_pod(
+ metric='heketi_nodes_count')
+ node_count = reduce(
+ lambda x, y: x + y,
+ [result.get('value')[1] for result in metric_result])
+ if node_count != initial_node_count:
+ break
+
+ if w.expired:
+ raise exceptions.ExecutionError(
+ "Failed to get updated node details from prometheus")
+
+ # Remove node from cluster
+ heketi_ops.heketi_node_disable(h_client, h_server, heketi_node_id)
+ heketi_ops.heketi_node_remove(h_client, h_server, heketi_node_id)
+ for device in heketi_node_info.get('devices'):
+ heketi_ops.heketi_device_delete(
+ h_client, h_server, device.get('id'))
+ heketi_ops.heketi_node_delete(h_client, h_server, heketi_node_id)
+
+ # Switch to openshift-monitoring project
+ openshift_ops.switch_oc_project(
+ self.ocp_master_node[0], self._prometheus_project_name)
+
+ # Get final node count from prometheus metrics
+ for w in waiter.Waiter(timeout=60, interval=10):
+ metric_result = self._fetch_metric_from_promtheus_pod(
+ metric='heketi_nodes_count')
+ final_node_count = reduce(
+ lambda x, y: x + y,
+ [result.get('value')[1] for result in metric_result])
+
+ if condition == 'delete':
+ if final_node_count < node_count:
+ break
+ else:
+ if final_node_count > initial_node_count:
+ break
+
+ if w.expired:
+ raise exceptions.ExecutionError(
+ "Failed to update node details in prometheus")
+
+ @pytest.mark.tier2
+ def test_restart_prometheus_glusterfs_pod(self):
+ """Validate restarting glusterfs pod"""
+
+ # Add check for CRS version
+ openshift_ops.switch_oc_project(
+ self._master, self._registry_project_name)
+ if not self.is_containerized_gluster():
+ self.skipTest(
+ "Skipping this test case as CRS version check "
+ "can not be implemented")
+
+ # Get one of the prometheus pod name and respective pvc name
+ openshift_ops.switch_oc_project(
+ self._master, self._prometheus_project_name)
+ prometheus_pods = openshift_ops.oc_get_pods(
+ self._master, selector=self._prometheus_resources_selector)
+ if not prometheus_pods:
+ self.skipTest(
+ prometheus_pods, "Skipping test as prometheus"
+ " pod is not present")
+ prometheus_pod = list(prometheus_pods.keys())[0]
+ pvc_name = openshift_ops.oc_get_custom_resource(
+ self._master, "pod",
+ ":.spec.volumes[*].persistentVolumeClaim.claimName",
+ prometheus_pod)[0]
+ self.assertTrue(
+ pvc_name,
+ "Failed to get pvc name from {} pod".format(prometheus_pod))
+ iqn, _, node = self.verify_iscsi_sessions_and_multipath(
+ pvc_name, prometheus_pod, rtype='pod',
+ heketi_server_url=self._registry_heketi_server_url,
+ is_registry_gluster=True)
+
+ # Get the ip of active path
+ devices = openshift_storage_libs.get_iscsi_block_devices_by_path(
+ node, iqn)
+ mpath = openshift_storage_libs.get_mpath_name_from_device_name(
+ node, list(devices.keys())[0])
+ mpath_dev = (
+ openshift_storage_libs.get_active_and_enabled_devices_from_mpath(
+ node, mpath))
+ node_ip = devices[mpath_dev['active'][0]]
+
+ # Get the name of gluster pod from the ip
+ openshift_ops.switch_oc_project(
+ self._master, self._registry_project_name)
+ gluster_pods = openshift_ops.get_ocp_gluster_pod_details(
+ self._master)
+ active_pod_name = list(
+ filter(lambda pod: (pod["pod_host_ip"] == node_ip), gluster_pods)
+ )[0]["pod_name"]
+ err_msg = "Failed to get the gluster pod name {} with active path"
+ self.assertTrue(active_pod_name, err_msg.format(active_pod_name))
+ g_pods = [pod['pod_name'] for pod in gluster_pods]
+ g_pods.remove(active_pod_name)
+ pod_list = [active_pod_name, g_pods[0]]
+ for pod_name in pod_list:
+
+ # Delete the glusterfs pods
+ openshift_ops.switch_oc_project(
+ self._master, self._prometheus_project_name)
+ self._fetch_metric_from_promtheus_pod(
+ metric='heketi_device_brick_count')
+
+ openshift_ops.switch_oc_project(
+ self._master, self._registry_project_name)
+ g_pod_list_before = [
+ pod["pod_name"]
+ for pod in openshift_ops.get_ocp_gluster_pod_details(
+ self._master)]
+
+ openshift_ops.oc_delete(self._master, 'pod', pod_name)
+ self.addCleanup(
+ self._guster_pod_delete, g_pod_list_before)
+
+ # Wait for gluster pod to be absent
+ openshift_ops.wait_for_resource_absence(
+ self._master, 'pod', pod_name)
+
+ # Try to fetch metric from prometheus pod
+ openshift_ops.switch_oc_project(
+ self._master, self._prometheus_project_name)
+ self._fetch_metric_from_promtheus_pod(
+ metric='heketi_device_brick_count')
+
+ # Wait for new pod to come up
+ openshift_ops.switch_oc_project(
+ self._master, self._registry_project_name)
+ self.assertTrue(self._get_newly_deployed_gluster_pod(
+ g_pod_list_before), "Failed to get new pod")
+ self._wait_for_gluster_pod_be_ready(g_pod_list_before)
+
+ # Validate iscsi and multipath
+ openshift_ops.switch_oc_project(
+ self._master, self._prometheus_project_name)
+ self.verify_iscsi_sessions_and_multipath(
+ pvc_name, prometheus_pod, rtype='pod',
+ heketi_server_url=self._registry_heketi_server_url,
+ is_registry_gluster=True)
+
+ # Try to fetch metric from prometheus pod
+ self._fetch_metric_from_promtheus_pod(
+ metric='heketi_device_brick_count')
diff --git a/tests/functional/prometheous/test_prometheus_validations_file.py b/tests/functional/prometheous/test_prometheus_validations_file.py
index b4186627..bbf4aedc 100644
--- a/tests/functional/prometheous/test_prometheus_validations_file.py
+++ b/tests/functional/prometheous/test_prometheus_validations_file.py
@@ -8,11 +8,14 @@ import time
import ddt
from glusto.core import Glusto as g
+from glustolibs.gluster import rebalance_ops
import pytest
from openshiftstoragelibs import baseclass
from openshiftstoragelibs import exceptions
+from openshiftstoragelibs import heketi_ops
from openshiftstoragelibs import openshift_ops
+from openshiftstoragelibs import podcmd
from openshiftstoragelibs import waiter
@@ -83,15 +86,18 @@ class TestPrometheusValidationFile(baseclass.BaseClass):
"__name__"]] = matric_result["value"][1]
return metric_data
- def _fetch_initial_metrics(self, volume_expansion=False):
+ def _fetch_initial_metrics(self, vol_name_prefix=None,
+ volume_expansion=False):
# Create PVC and wait for it to be in 'Bound' state
sc_name = self.create_storage_class(
+ vol_name_prefix=vol_name_prefix,
allow_volume_expansion=volume_expansion)
- pvc_name = self.create_and_wait_for_pvc(sc_name=sc_name)
+ pvc_name = self.create_and_wait_for_pvc(
+ pvc_name_prefix=vol_name_prefix, sc_name=sc_name)
# Create DC and attach with pvc
- dc_name, pod_name = self.create_dc_with_pvc(pvc_name)
+ self.dc_name, pod_name = self.create_dc_with_pvc(pvc_name)
for w in waiter.Waiter(120, 10):
initial_metrics = self._get_and_manipulate_metric_data(
self.metrics, pvc_name)
@@ -146,6 +152,24 @@ class TestPrometheusValidationFile(baseclass.BaseClass):
self.assertFalse(ret, "Failed to run the IO with error msg {}".
format(err))
+ @podcmd.GlustoPod()
+ def _rebalance_completion(self, volume_name):
+ """Rebalance start and completion after expansion."""
+ ret, _, err = rebalance_ops.rebalance_start(
+ 'auto_get_gluster_endpoint', volume_name)
+ self.assertFalse(
+ ret, "Rebalance for {} volume not started with error {}".format(
+ volume_name, err))
+
+ for w in waiter.Waiter(240, 10):
+ reb_status = rebalance_ops.get_rebalance_status(
+ 'auto_get_gluster_endpoint', volume_name)
+ if reb_status["aggregate"]["statusStr"] == "completed":
+ break
+ if w.expired:
+ raise AssertionError(
+ "Failed to complete the rebalance in 240 seconds")
+
@pytest.mark.tier2
def test_prometheus_volume_metrics_on_pod_restart(self):
"""Validate volume metrics using prometheus before and after pod
@@ -245,3 +269,67 @@ class TestPrometheusValidationFile(baseclass.BaseClass):
pod_name=pod_name, pvc_name=pvc_name,
filename="filename1", dirname="dirname1",
metric_data=half_io_metrics, operation="delete")
+
+ @pytest.mark.tier2
+ def test_prometheus_pv_resize(self):
+ """ Validate prometheus metrics with pv resize"""
+
+ # Fetch the metrics and storing initial_metrics as dictionary
+ pvc_name, pod_name, initial_metrics = self._fetch_initial_metrics(
+ vol_name_prefix="for-pv-resize", volume_expansion=True)
+
+ # Write data on the pvc and confirm it is reflected in the prometheus
+ self._perform_io_and_fetch_metrics(
+ pod_name=pod_name, pvc_name=pvc_name,
+ filename="filename1", dirname="dirname1",
+ metric_data=initial_metrics, operation="create")
+
+ # Resize the pvc to 2GiB
+ openshift_ops.switch_oc_project(
+ self._master, self.storage_project_name)
+ pvc_size = 2
+ openshift_ops.resize_pvc(self._master, pvc_name, pvc_size)
+ openshift_ops.wait_for_events(self._master, obj_name=pvc_name,
+ event_reason='VolumeResizeSuccessful')
+ openshift_ops.verify_pvc_size(self._master, pvc_name, pvc_size)
+ pv_name = openshift_ops.get_pv_name_from_pvc(
+ self._master, pvc_name)
+ openshift_ops.verify_pv_size(self._master, pv_name, pvc_size)
+
+ heketi_volume_name = heketi_ops.heketi_volume_list_by_name_prefix(
+ self.heketi_client_node, self.heketi_server_url,
+ "for-pv-resize", json=True)[0][2]
+ self.assertIsNotNone(
+ heketi_volume_name, "Failed to fetch volume with prefix {}".
+ format("for-pv-resize"))
+
+ openshift_ops.oc_delete(self._master, 'pod', pod_name)
+ openshift_ops.wait_for_resource_absence(self._master, 'pod', pod_name)
+ pod_name = openshift_ops.get_pod_name_from_dc(
+ self._master, self.dc_name)
+ openshift_ops.wait_for_pod_be_ready(self._master, pod_name)
+
+ # Check whether the metrics are updated or not
+ for w in waiter.Waiter(120, 10):
+ resize_metrics = self._get_and_manipulate_metric_data(
+ self.metrics, pvc_name)
+ if bool(resize_metrics) and int(resize_metrics[
+ 'kubelet_volume_stats_capacity_bytes']) > int(
+ initial_metrics['kubelet_volume_stats_capacity_bytes']):
+ break
+ if w.expired:
+ raise AssertionError("Failed to reflect PVC Size after resizing")
+ openshift_ops.switch_oc_project(
+ self._master, self.storage_project_name)
+ time.sleep(240)
+
+ # Lookup and trigger rebalance and wait for the its completion
+ for _ in range(100):
+ self.cmd_run("oc rsh {} ls /mnt/".format(pod_name))
+ self._rebalance_completion(heketi_volume_name)
+
+ # Write data on the resized pvc and compared with the resized_metrics
+ self._perform_io_and_fetch_metrics(
+ pod_name=pod_name, pvc_name=pvc_name,
+ filename="secondfilename", dirname="seconddirname",
+ metric_data=resize_metrics, operation="create")
diff --git a/tests/functional/provisioning/test_dev_path_mapping_file.py b/tests/functional/provisioning/test_dev_path_mapping_file.py
index 741ad51f..fe4e9834 100644
--- a/tests/functional/provisioning/test_dev_path_mapping_file.py
+++ b/tests/functional/provisioning/test_dev_path_mapping_file.py
@@ -374,3 +374,421 @@ class TestDevPathMapping(baseclass.BaseClass):
use_percent, use_percent_after,
"Failed to execute IO's in the app pod {} after respin".format(
pod_name))
+
+ def _get_bricks_and_device_details(self):
+ """Fetch bricks count and device id list from the node where dev path
+ operation is performed
+ """
+
+ h_client, h_url = self.heketi_client_node, self.heketi_server_url
+ h_node_details = []
+
+ # Fetch bricks on the devices
+ h_nodes = heketi_ops.heketi_node_list(h_client, h_url)
+ for h_node in h_nodes:
+ h_node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node, json=True)
+ h_node_hostname = h_node_info.get("hostnames").get("manage")[0]
+
+ # Fetch bricks count and device list
+ if h_node_hostname == self.node_hostname:
+ h_node_details = [
+ [node_info['id'], len(node_info['bricks']),
+ node_info['name']]
+ for node_info in h_node_info['devices']]
+ return h_node_details, h_node
+
+ @pytest.mark.tier4
+ @podcmd.GlustoPod()
+ def test_dev_path_mapping_heketi_device_delete(self):
+ """Validate dev path mapping for heketi device delete lifecycle"""
+ h_client, h_url = self.heketi_client_node, self.heketi_server_url
+
+ node_ids = heketi_ops.heketi_node_list(h_client, h_url)
+ self.assertTrue(node_ids, "Failed to get heketi node list")
+
+ # Fetch #4th node for the operations
+ h_disable_node = node_ids[3]
+
+ # Fetch bricks on the devices before volume create
+ h_node_details_before, h_node = self._get_bricks_and_device_details()
+
+ # Bricks count on the node before pvc creation
+ brick_count_before = [count[1] for count in h_node_details_before]
+
+ # Create file volume with app pod and verify IO's
+ # and compare path, UUID, vg_name
+ pod_name, dc_name, use_percent = self._create_app_pod_and_verify_pvs()
+
+ # Check if IO's are running
+ use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
+ self.assertNotEqual(
+ use_percent, use_percent_after,
+ "Failed to execute IO's in the app pod {} after respin".format(
+ pod_name))
+
+ # Fetch bricks on the devices after volume create
+ h_node_details_after, h_node = self._get_bricks_and_device_details()
+
+ # Bricks count on the node after pvc creation
+ brick_count_after = [count[1] for count in h_node_details_after]
+
+ self.assertGreater(
+ sum(brick_count_after), sum(brick_count_before),
+ "Failed to add bricks on the node {}".format(h_node))
+
+ # Enable the #4th node
+ heketi_ops.heketi_node_enable(h_client, h_url, h_disable_node)
+ node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_disable_node, json=True)
+ h_node_id = node_info['id']
+ self.assertEqual(
+ node_info['state'], "online",
+ "Failed to enable node {}".format(h_disable_node))
+
+ # Fetch device list i.e to be deleted
+ h_node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node, json=True)
+ devices_list = [
+ [device['id'], device['name']]
+ for device in h_node_info['devices']]
+
+ # Device deletion operation
+ for device in devices_list:
+ device_id, device_name = device[0], device[1]
+ self.addCleanup(
+ heketi_ops.heketi_device_enable, h_client, h_url,
+ device_id, raise_on_error=False)
+
+ # Disable device from heketi
+ device_disable = heketi_ops.heketi_device_disable(
+ h_client, h_url, device_id)
+ self.assertTrue(
+ device_disable,
+ "Device {} could not be disabled".format(device_id))
+
+ device_info = heketi_ops.heketi_device_info(
+ h_client, h_url, device_id, json=True)
+ self.assertEqual(
+ device_info['state'], "offline",
+ "Failed to disable device {}".format(device_id))
+
+ # Remove device from heketi
+ device_remove = heketi_ops.heketi_device_remove(
+ h_client, h_url, device_id)
+ self.assertTrue(
+ device_remove,
+ "Device {} could not be removed".format(device_id))
+
+ # Bricks after device removal
+ device_info = heketi_ops.heketi_device_info(
+ h_client, h_url, device_id, json=True)
+ bricks_count_after = len(device_info['bricks'])
+ self.assertFalse(
+ bricks_count_after,
+ "Failed to remove the bricks from the device {}".format(
+ device_id))
+
+ # Delete device from heketi
+ self.addCleanup(
+ heketi_ops. heketi_device_add, h_client, h_url,
+ device_name, h_node, raise_on_error=False)
+ device_delete = heketi_ops.heketi_device_delete(
+ h_client, h_url, device_id)
+ self.assertTrue(
+ device_delete,
+ "Device {} could not be deleted".format(device_id))
+
+ # Check if IO's are running after device is deleted
+ use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
+ self.assertNotEqual(
+ use_percent, use_percent_after,
+ "Failed to execute IO's in the app pod {} after respin".format(
+ pod_name))
+
+ # Add device operations
+ for device in devices_list:
+ device_name = device[1]
+
+ # Add device back to the node
+ heketi_ops.heketi_device_add(h_client, h_url, device_name, h_node)
+
+ # Fetch device info after device add
+ node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node, json=True)
+ device_id = None
+ for device in node_info["devices"]:
+ if device["name"] == device_name:
+ device_id = device["id"]
+ break
+ self.assertTrue(
+ device_id,
+ "Failed to add device {} on node"
+ " {}".format(device_name, h_node))
+
+ # Disable the #4th node
+ heketi_ops.heketi_node_disable(h_client, h_url, h_node_id)
+ node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node_id, json=True)
+ self.assertEqual(
+ node_info['state'], "offline",
+ "Failed to disable node {}".format(h_node_id))
+ pvc_amount, pvc_size = 5, 1
+
+ # Fetch bricks on the devices before volume create
+ h_node_details_before, h_node = self._get_bricks_and_device_details()
+
+ # Bricks count on the node before pvc creation
+ brick_count_before = [count[1] for count in h_node_details_before]
+
+ # Create file volumes
+ pvc_name = self.create_and_wait_for_pvcs(
+ pvc_size=pvc_size, pvc_amount=pvc_amount)
+ self.assertEqual(
+ len(pvc_name), pvc_amount,
+ "Failed to create {} pvc".format(pvc_amount))
+
+ # Fetch bricks on the devices after volume create
+ h_node_details_after, h_node = self._get_bricks_and_device_details()
+
+ # Bricks count on the node after pvc creation
+ brick_count_after = [count[1] for count in h_node_details_after]
+
+ self.assertGreater(
+ sum(brick_count_after), sum(brick_count_before),
+ "Failed to add bricks on the node {}".format(h_node))
+
+ # Check if IO's are running after new device is added
+ use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
+ self.assertNotEqual(
+ use_percent, use_percent_after,
+ "Failed to execute IO's in the app pod {} after respin".format(
+ pod_name))
+
+ def _get_bricks_counts_and_device_name(self):
+ """Fetch bricks count and device name from all the nodes"""
+ h_client, h_url = self.heketi_client_node, self.heketi_server_url
+
+ # Fetch bricks on the devices
+ h_nodes = heketi_ops.heketi_node_list(h_client, h_url)
+
+ node_details = {}
+ for h_node in h_nodes:
+ h_node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node, json=True)
+ node_details[h_node] = [[], []]
+ for device in h_node_info['devices']:
+ node_details[h_node][0].append(len(device['bricks']))
+ node_details[h_node][1].append(device['id'])
+ return node_details
+
+ @pytest.mark.tier4
+ @podcmd.GlustoPod()
+ def test_dev_path_mapping_heketi_node_delete(self):
+ """Validate dev path mapping for heketi node deletion lifecycle"""
+ h_client, h_url = self.heketi_client_node, self.heketi_server_url
+
+ node_ids = heketi_ops.heketi_node_list(h_client, h_url)
+ self.assertTrue(node_ids, "Failed to get heketi node list")
+
+ # Fetch #4th node for the operations
+ h_disable_node = node_ids[3]
+
+ # Fetch bricks on the devices before volume create
+ h_node_details_before, h_node = self._get_bricks_and_device_details()
+
+ # Bricks count on the node before pvc creation
+ brick_count_before = [count[1] for count in h_node_details_before]
+
+ # Create file volume with app pod and verify IO's
+ # and compare path, UUID, vg_name
+ pod_name, dc_name, use_percent = self._create_app_pod_and_verify_pvs()
+
+ # Check if IO's are running
+ use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
+ self.assertNotEqual(
+ use_percent, use_percent_after,
+ "Failed to execute IO's in the app pod {} after respin".format(
+ pod_name))
+
+ # Fetch bricks on the devices after volume create
+ h_node_details_after, h_node = self._get_bricks_and_device_details()
+
+ # Bricks count on the node after pvc creation
+ brick_count_after = [count[1] for count in h_node_details_after]
+
+ self.assertGreater(
+ sum(brick_count_after), sum(brick_count_before),
+ "Failed to add bricks on the node {}".format(h_node))
+ self.addCleanup(
+ heketi_ops.heketi_node_disable, h_client, h_url, h_disable_node)
+
+ # Enable the #4th node
+ heketi_ops.heketi_node_enable(h_client, h_url, h_disable_node)
+ node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_disable_node, json=True)
+ h_node_id = node_info['id']
+ self.assertEqual(
+ node_info['state'], "online",
+ "Failed to enable node {}".format(h_disable_node))
+
+ # Disable the node and check for brick migrations
+ self.addCleanup(
+ heketi_ops.heketi_node_enable, h_client, h_url, h_node,
+ raise_on_error=False)
+ heketi_ops.heketi_node_disable(h_client, h_url, h_node)
+ node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node, json=True)
+ self.assertEqual(
+ node_info['state'], "offline",
+ "Failed to disable node {}".format(h_node))
+
+ # Before bricks migration
+ h_node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node, json=True)
+
+ # Bricks before migration on the node i.e to be deleted
+ bricks_counts_before = 0
+ for device in h_node_info['devices']:
+ bricks_counts_before += (len(device['bricks']))
+
+ # Remove the node
+ heketi_ops.heketi_node_remove(h_client, h_url, h_node)
+
+ # After bricks migration
+ h_node_info_after = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node, json=True)
+
+ # Bricks after migration on the node i.e to be delete
+ bricks_counts = 0
+ for device in h_node_info_after['devices']:
+ bricks_counts += (len(device['bricks']))
+
+ self.assertFalse(
+ bricks_counts,
+ "Failed to remove all the bricks from node {}".format(h_node))
+
+ # Old node which is to deleted, new node were bricks resides
+ old_node, new_node = h_node, h_node_id
+
+ # Node info for the new node were brick reside after migration
+ h_node_info_new = heketi_ops.heketi_node_info(
+ h_client, h_url, new_node, json=True)
+
+ bricks_counts_after = 0
+ for device in h_node_info_new['devices']:
+ bricks_counts_after += (len(device['bricks']))
+
+ self.assertEqual(
+ bricks_counts_before, bricks_counts_after,
+ "Failed to migrated bricks from {} node to {}".format(
+ old_node, new_node))
+
+ # Fetch device list i.e to be deleted
+ h_node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node, json=True)
+ devices_list = [
+ [device['id'], device['name']]
+ for device in h_node_info['devices']]
+
+ for device in devices_list:
+ device_id = device[0]
+ device_name = device[1]
+ self.addCleanup(
+ heketi_ops.heketi_device_add, h_client, h_url,
+ device_name, h_node, raise_on_error=False)
+
+ # Device deletion from heketi node
+ device_delete = heketi_ops.heketi_device_delete(
+ h_client, h_url, device_id)
+ self.assertTrue(
+ device_delete,
+ "Failed to delete the device {}".format(device_id))
+
+ node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node, json=True)
+ cluster_id = node_info['cluster']
+ zone = node_info['zone']
+ storage_hostname = node_info['hostnames']['manage'][0]
+ storage_ip = node_info['hostnames']['storage'][0]
+
+ # Delete the node
+ self.addCleanup(
+ heketi_ops.heketi_node_add, h_client, h_url,
+ zone, cluster_id, storage_hostname, storage_ip,
+ raise_on_error=False)
+ heketi_ops.heketi_node_delete(h_client, h_url, h_node)
+
+ # Verify if the node is deleted
+ node_ids = heketi_ops.heketi_node_list(h_client, h_url)
+ self.assertNotIn(
+ old_node, node_ids,
+ "Failed to delete the node {}".format(old_node))
+
+ # Check if IO's are running
+ use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
+ self.assertNotEqual(
+ use_percent, use_percent_after,
+ "Failed to execute IO's in the app pod {} after respin".format(
+ pod_name))
+
+ # Adding node back
+ h_node_info = heketi_ops.heketi_node_add(
+ h_client, h_url, zone, cluster_id,
+ storage_hostname, storage_ip, json=True)
+ self.assertTrue(
+ h_node_info,
+ "Failed to add the node in the cluster {}".format(cluster_id))
+ h_node_id = h_node_info["id"]
+
+ # Adding devices to the new node
+ for device in devices_list:
+ storage_device = device[1]
+
+ # Add device to the new heketi node
+ heketi_ops.heketi_device_add(
+ h_client, h_url, storage_device, h_node_id)
+ heketi_node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node_id, json=True)
+ device_id = None
+ for device in heketi_node_info["devices"]:
+ if device["name"] == storage_device:
+ device_id = device["id"]
+ break
+
+ self.assertTrue(
+ device_id, "Failed to add device {} on node {}".format(
+ storage_device, h_node_id))
+
+ # Create n pvc in order to verfiy if the bricks reside on the new node
+ pvc_amount, pvc_size = 5, 1
+
+ # Fetch bricks on the devices before volume create
+ h_node_details_before, h_node = self._get_bricks_and_device_details()
+
+ # Bricks count on the node before pvc creation
+ brick_count_before = [count[1] for count in h_node_details_before]
+
+ # Create file volumes
+ pvc_name = self.create_and_wait_for_pvcs(
+ pvc_size=pvc_size, pvc_amount=pvc_amount)
+ self.assertEqual(
+ len(pvc_name), pvc_amount,
+ "Failed to create {} pvc".format(pvc_amount))
+
+ # Fetch bricks on the devices before volume create
+ h_node_details_after, h_node = self._get_bricks_and_device_details()
+
+ # Bricks count on the node after pvc creation
+ brick_count_after = [count[1] for count in h_node_details_after]
+
+ self.assertGreater(
+ sum(brick_count_after), sum(brick_count_before),
+ "Failed to add bricks on the new node {}".format(new_node))
+
+ # Check if IO's are running after new node is added
+ use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
+ self.assertNotEqual(
+ use_percent, use_percent_after,
+ "Failed to execute IO's in the app pod {} after respin".format(
+ pod_name))
diff --git a/tests/functional/provisioning/test_dynamic_provisioning_file.py b/tests/functional/provisioning/test_dynamic_provisioning_file.py
index cdffdbf6..87ff754a 100644
--- a/tests/functional/provisioning/test_dynamic_provisioning_file.py
+++ b/tests/functional/provisioning/test_dynamic_provisioning_file.py
@@ -4,6 +4,7 @@ from glusto.core import Glusto as g
import pytest
from openshiftstoragelibs.baseclass import BaseClass
+from openshiftstoragelibs import command
from openshiftstoragelibs.exceptions import ExecutionError
from openshiftstoragelibs.heketi_ops import (
heketi_node_info,
@@ -13,7 +14,12 @@ from openshiftstoragelibs.heketi_ops import (
heketi_volume_list,
verify_volume_name_prefix,
)
-from openshiftstoragelibs.node_ops import node_reboot_by_command
+from openshiftstoragelibs.node_ops import (
+ find_vm_name_by_ip_or_hostname,
+ node_reboot_by_command,
+ power_off_vm_by_name,
+ power_on_vm_by_name
+)
from openshiftstoragelibs.openshift_ops import (
cmd_run_on_gluster_pod_or_node,
get_gluster_host_ips_by_pvc_name,
@@ -545,3 +551,37 @@ class TestDynamicProvisioningP0(BaseClass):
"-o=custom-columns=:.spec.storageClassName" % pvc_name)
out = self.cmd_run(get_sc_of_pvc_cmd)
self.assertEqual(out, self.sc_name)
+
+ @pytest.mark.tier2
+ def test_node_failure_pv_mounted(self):
+ """Test node failure when PV is mounted with app pods running"""
+ filepath = "/mnt/file_for_testing_volume.log"
+ pvc_name = self.create_and_wait_for_pvc()
+
+ dc_and_pod_names = self.create_dcs_with_pvc(pvc_name)
+ dc_name, pod_name = dc_and_pod_names[pvc_name]
+
+ mount_point = "df -kh /mnt -P | tail -1 | awk '{{print $1}}'"
+ pod_cmd = "oc exec {} -- {}".format(pod_name, mount_point)
+ hostname = command.cmd_run(pod_cmd, hostname=self.node)
+ hostname = hostname.split(":")[0]
+
+ vm_name = find_vm_name_by_ip_or_hostname(hostname)
+ self.addCleanup(power_on_vm_by_name, vm_name)
+ power_off_vm_by_name(vm_name)
+
+ cmd = "dd if=/dev/urandom of={} bs=1K count=100".format(filepath)
+ ret, _, err = oc_rsh(self.node, pod_name, cmd)
+ self.assertFalse(
+ ret, "Failed to execute command {} on {} with error {}"
+ .format(cmd, self.node, err))
+
+ oc_delete(self.node, 'pod', pod_name)
+ wait_for_resource_absence(self.node, 'pod', pod_name)
+ pod_name = get_pod_name_from_dc(self.node, dc_name)
+ wait_for_pod_be_ready(self.node, pod_name)
+
+ ret, _, err = oc_rsh(self.node, pod_name, cmd)
+ self.assertFalse(
+ ret, "Failed to execute command {} on {} with error {}"
+ .format(cmd, self.node, err))