diff options
Diffstat (limited to 'tests/functional/prometheous')
-rw-r--r-- | tests/functional/prometheous/test_prometheus_validations.py | 263 | ||||
-rw-r--r-- | tests/functional/prometheous/test_prometheus_validations_file.py | 94 |
2 files changed, 354 insertions, 3 deletions
diff --git a/tests/functional/prometheous/test_prometheus_validations.py b/tests/functional/prometheous/test_prometheus_validations.py index cfd9ab1b..68b69212 100644 --- a/tests/functional/prometheous/test_prometheus_validations.py +++ b/tests/functional/prometheous/test_prometheus_validations.py @@ -5,6 +5,7 @@ except ImportError: # py2 import json from pkg_resources import parse_version +from functools import reduce import ddt from glusto.core import Glusto as g @@ -19,6 +20,7 @@ from openshiftstoragelibs import heketi_ops from openshiftstoragelibs import gluster_ops from openshiftstoragelibs import node_ops from openshiftstoragelibs import openshift_ops +from openshiftstoragelibs import openshift_storage_libs from openshiftstoragelibs import podcmd from openshiftstoragelibs import waiter @@ -132,6 +134,52 @@ class TestPrometheusAndGlusterRegistryValidation(GlusterBlockBaseClass): self.assertFalse( start_vol, "Failed to start volume using force") + def _get_newly_deployed_gluster_pod(self, g_pod_list_before): + + # Fetch pod after delete + g_pod_list_after = [ + pod["pod_name"] + for pod in openshift_ops.get_ocp_gluster_pod_details(self._master)] + + # Fetch the new gluster pod + g_new_pod = list(set(g_pod_list_after) - set(g_pod_list_before)) + self.assertTrue(g_new_pod, "No new gluster pod deployed after delete") + return g_new_pod + + def _guster_pod_delete(self, g_pod_list_before): + """Delete the gluster pod using force delete""" + openshift_ops.switch_oc_project( + self._master, self._registry_project_name) + + # Fetch newly deployed gluster pod after delete + try: + pod_name = self._get_newly_deployed_gluster_pod(g_pod_list_before) + openshift_ops.wait_for_pod_be_ready( + self._master, + pod_name[0] if pod_name else g_pod_list_before[0], + timeout=120, wait_step=6) + except exceptions.ExecutionError: + openshift_ops.oc_delete( + self._master, 'pod', g_pod_list_before[0], is_force=True) + openshift_ops.wait_for_resource_absence( + self._master, 'pod', g_pod_list_before[0]) + g_new_pod = self._get_newly_deployed_gluster_pod(g_pod_list_before) + openshift_ops.wait_for_pod_be_ready(self._master, g_new_pod[0]) + + def _wait_for_gluster_pod_be_ready(self, g_pod_list_before): + """Wait for the gluster pods to be in ready state""" + openshift_ops.switch_oc_project( + self._master, self._registry_project_name) + + # Check if the gluster pods are in ready state + try: + pod_count = len(self._registry_servers_info.keys()) + openshift_ops.wait_for_pods_be_ready( + self._master, pod_count, "glusterfs-node=pod", + timeout=120, wait_step=6) + except exceptions.ExecutionError: + self._guster_pod_delete(g_pod_list_before) + @pytest.mark.tier2 def test_promethoues_pods_and_pvcs(self): """Validate prometheus pods and PVC""" @@ -711,3 +759,218 @@ class TestPrometheusAndGlusterRegistryValidation(GlusterBlockBaseClass): self.assertEqual( initial_prometheus, final_prometheus, err_msg.format( initial_prometheus, final_prometheus)) + + @pytest.mark.tier4 + @ddt.data('add', 'delete') + def test_heketi_metrics_validation_after_node(self, condition): + """Validate heketi metrics after adding and remove node""" + + # Get additional node + additional_host_info = g.config.get("additional_gluster_servers") + if not additional_host_info: + self.skipTest( + "Skipping this test case as additional gluster server is " + "not provied in config file") + + additional_host_info = list(additional_host_info.values())[0] + storage_hostname = additional_host_info.get("manage") + storage_ip = additional_host_info.get("storage") + if not (storage_hostname and storage_ip): + self.skipTest( + "Config options 'additional_gluster_servers.manage' " + "and 'additional_gluster_servers.storage' must be set.") + + h_client, h_server = self.heketi_client_node, self.heketi_server_url + initial_node_count, final_node_count = 0, 0 + + # Get initial node count from prometheus metrics + metric_result = self._fetch_metric_from_promtheus_pod( + metric='heketi_nodes_count') + initial_node_count = reduce( + lambda x, y: x + y, + [result.get('value')[1] for result in metric_result]) + + # Switch to storage project + openshift_ops.switch_oc_project( + self._master, self.storage_project_name) + + # Configure node before adding node + self.configure_node_to_run_gluster(storage_hostname) + + # Get cluster list + cluster_info = heketi_ops.heketi_cluster_list( + h_client, h_server, json=True) + + # Add node to the cluster + heketi_node_info = heketi_ops.heketi_node_add( + h_client, h_server, + len(self.gluster_servers), cluster_info.get('clusters')[0], + storage_hostname, storage_ip, json=True) + heketi_node_id = heketi_node_info.get("id") + self.addCleanup( + heketi_ops.heketi_node_delete, + h_client, h_server, heketi_node_id, raise_on_error=False) + self.addCleanup( + heketi_ops.heketi_node_remove, + h_client, h_server, heketi_node_id, raise_on_error=False) + self.addCleanup( + heketi_ops.heketi_node_disable, + h_client, h_server, heketi_node_id, raise_on_error=False) + self.addCleanup( + openshift_ops.switch_oc_project, + self._master, self.storage_project_name) + + if condition == 'delete': + # Switch to openshift-monitoring project + openshift_ops.switch_oc_project( + self.ocp_master_node[0], self._prometheus_project_name) + + # Get initial node count from prometheus metrics + for w in waiter.Waiter(timeout=60, interval=10): + metric_result = self._fetch_metric_from_promtheus_pod( + metric='heketi_nodes_count') + node_count = reduce( + lambda x, y: x + y, + [result.get('value')[1] for result in metric_result]) + if node_count != initial_node_count: + break + + if w.expired: + raise exceptions.ExecutionError( + "Failed to get updated node details from prometheus") + + # Remove node from cluster + heketi_ops.heketi_node_disable(h_client, h_server, heketi_node_id) + heketi_ops.heketi_node_remove(h_client, h_server, heketi_node_id) + for device in heketi_node_info.get('devices'): + heketi_ops.heketi_device_delete( + h_client, h_server, device.get('id')) + heketi_ops.heketi_node_delete(h_client, h_server, heketi_node_id) + + # Switch to openshift-monitoring project + openshift_ops.switch_oc_project( + self.ocp_master_node[0], self._prometheus_project_name) + + # Get final node count from prometheus metrics + for w in waiter.Waiter(timeout=60, interval=10): + metric_result = self._fetch_metric_from_promtheus_pod( + metric='heketi_nodes_count') + final_node_count = reduce( + lambda x, y: x + y, + [result.get('value')[1] for result in metric_result]) + + if condition == 'delete': + if final_node_count < node_count: + break + else: + if final_node_count > initial_node_count: + break + + if w.expired: + raise exceptions.ExecutionError( + "Failed to update node details in prometheus") + + @pytest.mark.tier2 + def test_restart_prometheus_glusterfs_pod(self): + """Validate restarting glusterfs pod""" + + # Add check for CRS version + openshift_ops.switch_oc_project( + self._master, self._registry_project_name) + if not self.is_containerized_gluster(): + self.skipTest( + "Skipping this test case as CRS version check " + "can not be implemented") + + # Get one of the prometheus pod name and respective pvc name + openshift_ops.switch_oc_project( + self._master, self._prometheus_project_name) + prometheus_pods = openshift_ops.oc_get_pods( + self._master, selector=self._prometheus_resources_selector) + if not prometheus_pods: + self.skipTest( + prometheus_pods, "Skipping test as prometheus" + " pod is not present") + prometheus_pod = list(prometheus_pods.keys())[0] + pvc_name = openshift_ops.oc_get_custom_resource( + self._master, "pod", + ":.spec.volumes[*].persistentVolumeClaim.claimName", + prometheus_pod)[0] + self.assertTrue( + pvc_name, + "Failed to get pvc name from {} pod".format(prometheus_pod)) + iqn, _, node = self.verify_iscsi_sessions_and_multipath( + pvc_name, prometheus_pod, rtype='pod', + heketi_server_url=self._registry_heketi_server_url, + is_registry_gluster=True) + + # Get the ip of active path + devices = openshift_storage_libs.get_iscsi_block_devices_by_path( + node, iqn) + mpath = openshift_storage_libs.get_mpath_name_from_device_name( + node, list(devices.keys())[0]) + mpath_dev = ( + openshift_storage_libs.get_active_and_enabled_devices_from_mpath( + node, mpath)) + node_ip = devices[mpath_dev['active'][0]] + + # Get the name of gluster pod from the ip + openshift_ops.switch_oc_project( + self._master, self._registry_project_name) + gluster_pods = openshift_ops.get_ocp_gluster_pod_details( + self._master) + active_pod_name = list( + filter(lambda pod: (pod["pod_host_ip"] == node_ip), gluster_pods) + )[0]["pod_name"] + err_msg = "Failed to get the gluster pod name {} with active path" + self.assertTrue(active_pod_name, err_msg.format(active_pod_name)) + g_pods = [pod['pod_name'] for pod in gluster_pods] + g_pods.remove(active_pod_name) + pod_list = [active_pod_name, g_pods[0]] + for pod_name in pod_list: + + # Delete the glusterfs pods + openshift_ops.switch_oc_project( + self._master, self._prometheus_project_name) + self._fetch_metric_from_promtheus_pod( + metric='heketi_device_brick_count') + + openshift_ops.switch_oc_project( + self._master, self._registry_project_name) + g_pod_list_before = [ + pod["pod_name"] + for pod in openshift_ops.get_ocp_gluster_pod_details( + self._master)] + + openshift_ops.oc_delete(self._master, 'pod', pod_name) + self.addCleanup( + self._guster_pod_delete, g_pod_list_before) + + # Wait for gluster pod to be absent + openshift_ops.wait_for_resource_absence( + self._master, 'pod', pod_name) + + # Try to fetch metric from prometheus pod + openshift_ops.switch_oc_project( + self._master, self._prometheus_project_name) + self._fetch_metric_from_promtheus_pod( + metric='heketi_device_brick_count') + + # Wait for new pod to come up + openshift_ops.switch_oc_project( + self._master, self._registry_project_name) + self.assertTrue(self._get_newly_deployed_gluster_pod( + g_pod_list_before), "Failed to get new pod") + self._wait_for_gluster_pod_be_ready(g_pod_list_before) + + # Validate iscsi and multipath + openshift_ops.switch_oc_project( + self._master, self._prometheus_project_name) + self.verify_iscsi_sessions_and_multipath( + pvc_name, prometheus_pod, rtype='pod', + heketi_server_url=self._registry_heketi_server_url, + is_registry_gluster=True) + + # Try to fetch metric from prometheus pod + self._fetch_metric_from_promtheus_pod( + metric='heketi_device_brick_count') diff --git a/tests/functional/prometheous/test_prometheus_validations_file.py b/tests/functional/prometheous/test_prometheus_validations_file.py index b4186627..bbf4aedc 100644 --- a/tests/functional/prometheous/test_prometheus_validations_file.py +++ b/tests/functional/prometheous/test_prometheus_validations_file.py @@ -8,11 +8,14 @@ import time import ddt from glusto.core import Glusto as g +from glustolibs.gluster import rebalance_ops import pytest from openshiftstoragelibs import baseclass from openshiftstoragelibs import exceptions +from openshiftstoragelibs import heketi_ops from openshiftstoragelibs import openshift_ops +from openshiftstoragelibs import podcmd from openshiftstoragelibs import waiter @@ -83,15 +86,18 @@ class TestPrometheusValidationFile(baseclass.BaseClass): "__name__"]] = matric_result["value"][1] return metric_data - def _fetch_initial_metrics(self, volume_expansion=False): + def _fetch_initial_metrics(self, vol_name_prefix=None, + volume_expansion=False): # Create PVC and wait for it to be in 'Bound' state sc_name = self.create_storage_class( + vol_name_prefix=vol_name_prefix, allow_volume_expansion=volume_expansion) - pvc_name = self.create_and_wait_for_pvc(sc_name=sc_name) + pvc_name = self.create_and_wait_for_pvc( + pvc_name_prefix=vol_name_prefix, sc_name=sc_name) # Create DC and attach with pvc - dc_name, pod_name = self.create_dc_with_pvc(pvc_name) + self.dc_name, pod_name = self.create_dc_with_pvc(pvc_name) for w in waiter.Waiter(120, 10): initial_metrics = self._get_and_manipulate_metric_data( self.metrics, pvc_name) @@ -146,6 +152,24 @@ class TestPrometheusValidationFile(baseclass.BaseClass): self.assertFalse(ret, "Failed to run the IO with error msg {}". format(err)) + @podcmd.GlustoPod() + def _rebalance_completion(self, volume_name): + """Rebalance start and completion after expansion.""" + ret, _, err = rebalance_ops.rebalance_start( + 'auto_get_gluster_endpoint', volume_name) + self.assertFalse( + ret, "Rebalance for {} volume not started with error {}".format( + volume_name, err)) + + for w in waiter.Waiter(240, 10): + reb_status = rebalance_ops.get_rebalance_status( + 'auto_get_gluster_endpoint', volume_name) + if reb_status["aggregate"]["statusStr"] == "completed": + break + if w.expired: + raise AssertionError( + "Failed to complete the rebalance in 240 seconds") + @pytest.mark.tier2 def test_prometheus_volume_metrics_on_pod_restart(self): """Validate volume metrics using prometheus before and after pod @@ -245,3 +269,67 @@ class TestPrometheusValidationFile(baseclass.BaseClass): pod_name=pod_name, pvc_name=pvc_name, filename="filename1", dirname="dirname1", metric_data=half_io_metrics, operation="delete") + + @pytest.mark.tier2 + def test_prometheus_pv_resize(self): + """ Validate prometheus metrics with pv resize""" + + # Fetch the metrics and storing initial_metrics as dictionary + pvc_name, pod_name, initial_metrics = self._fetch_initial_metrics( + vol_name_prefix="for-pv-resize", volume_expansion=True) + + # Write data on the pvc and confirm it is reflected in the prometheus + self._perform_io_and_fetch_metrics( + pod_name=pod_name, pvc_name=pvc_name, + filename="filename1", dirname="dirname1", + metric_data=initial_metrics, operation="create") + + # Resize the pvc to 2GiB + openshift_ops.switch_oc_project( + self._master, self.storage_project_name) + pvc_size = 2 + openshift_ops.resize_pvc(self._master, pvc_name, pvc_size) + openshift_ops.wait_for_events(self._master, obj_name=pvc_name, + event_reason='VolumeResizeSuccessful') + openshift_ops.verify_pvc_size(self._master, pvc_name, pvc_size) + pv_name = openshift_ops.get_pv_name_from_pvc( + self._master, pvc_name) + openshift_ops.verify_pv_size(self._master, pv_name, pvc_size) + + heketi_volume_name = heketi_ops.heketi_volume_list_by_name_prefix( + self.heketi_client_node, self.heketi_server_url, + "for-pv-resize", json=True)[0][2] + self.assertIsNotNone( + heketi_volume_name, "Failed to fetch volume with prefix {}". + format("for-pv-resize")) + + openshift_ops.oc_delete(self._master, 'pod', pod_name) + openshift_ops.wait_for_resource_absence(self._master, 'pod', pod_name) + pod_name = openshift_ops.get_pod_name_from_dc( + self._master, self.dc_name) + openshift_ops.wait_for_pod_be_ready(self._master, pod_name) + + # Check whether the metrics are updated or not + for w in waiter.Waiter(120, 10): + resize_metrics = self._get_and_manipulate_metric_data( + self.metrics, pvc_name) + if bool(resize_metrics) and int(resize_metrics[ + 'kubelet_volume_stats_capacity_bytes']) > int( + initial_metrics['kubelet_volume_stats_capacity_bytes']): + break + if w.expired: + raise AssertionError("Failed to reflect PVC Size after resizing") + openshift_ops.switch_oc_project( + self._master, self.storage_project_name) + time.sleep(240) + + # Lookup and trigger rebalance and wait for the its completion + for _ in range(100): + self.cmd_run("oc rsh {} ls /mnt/".format(pod_name)) + self._rebalance_completion(heketi_volume_name) + + # Write data on the resized pvc and compared with the resized_metrics + self._perform_io_and_fetch_metrics( + pod_name=pod_name, pvc_name=pvc_name, + filename="secondfilename", dirname="seconddirname", + metric_data=resize_metrics, operation="create") |