summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValerii Ponomarov <vponomar@redhat.com>2019-10-24 22:43:34 +0530
committervponomar <vponomar@redhat.com>2019-11-22 12:03:55 +0000
commit3fb8e567d71452117f4993c0355174dde9df3b91 (patch)
tree594e366dfb5cb15382498d946feeab2c70ffdc41
parentd39cf3033eff89ed570176bec1f78deb7113230f (diff)
Add tc where targetcli fails during block PVC creation
Add tc where we simulate targetcli failure in parallel to the block PVC creations. Change-Id: I6f94079593d8ff978c321b64729b4c0b67810c23
-rw-r--r--tests/functional/provisioning/test_dynamic_provisioning_block.py127
1 files changed, 127 insertions, 0 deletions
diff --git a/tests/functional/provisioning/test_dynamic_provisioning_block.py b/tests/functional/provisioning/test_dynamic_provisioning_block.py
index 01b14a6e..3372e331 100644
--- a/tests/functional/provisioning/test_dynamic_provisioning_block.py
+++ b/tests/functional/provisioning/test_dynamic_provisioning_block.py
@@ -11,6 +11,8 @@ from openshiftstoragelibs.heketi_ops import (
heketi_blockvolume_create,
heketi_blockvolume_delete,
heketi_blockvolume_list,
+ heketi_node_disable,
+ heketi_node_enable,
heketi_node_info,
heketi_node_list,
heketi_volume_create,
@@ -22,6 +24,7 @@ from openshiftstoragelibs.node_ops import node_reboot_by_command
from openshiftstoragelibs.openshift_ops import (
cmd_run_on_gluster_pod_or_node,
get_default_block_hosting_volume_size,
+ get_events,
get_gluster_host_ips_by_pvc_name,
get_gluster_pod_names_by_pvc_name,
get_pod_name_from_dc,
@@ -36,7 +39,9 @@ from openshiftstoragelibs.openshift_ops import (
verify_pvc_status_is_bound,
wait_for_events,
wait_for_pod_be_ready,
+ wait_for_pvcs_be_bound,
wait_for_resource_absence,
+ wait_for_resources_absence,
)
from openshiftstoragelibs.openshift_version import get_openshift_version
from openshiftstoragelibs.waiter import Waiter
@@ -584,3 +589,125 @@ class TestDynamicProvisioningBlockP0(GlusterBlockBaseClass):
# create more PVCs in expanded BHV
pvcs = self.create_and_wait_for_pvcs(
pvc_size=(expand_size - 1), pvc_amount=1)
+
+ @skip("Blocked by BZ-1769426")
+ def test_targetcli_failure_during_block_pvc_creation(self):
+ h_node, h_server = self.heketi_client_node, self.heketi_server_url
+
+ # Disable redundant nodes and leave just 3 nodes online
+ h_node_id_list = heketi_node_list(h_node, h_server)
+ self.assertGreater(len(h_node_id_list), 2)
+ for node_id in h_node_id_list[3:]:
+ heketi_node_disable(h_node, h_server, node_id)
+ self.addCleanup(heketi_node_enable, h_node, h_server, node_id)
+
+ # Gather info about the Gluster node we are going to use for killing
+ # targetcli processes.
+ chosen_g_node_id = h_node_id_list[0]
+ chosen_g_node_info = heketi_node_info(
+ h_node, h_server, chosen_g_node_id, json=True)
+ chosen_g_node_ip = chosen_g_node_info['hostnames']['storage'][0]
+ chosen_g_node_hostname = chosen_g_node_info['hostnames']['manage'][0]
+ chosen_g_node_ip_and_hostname = set((
+ chosen_g_node_ip, chosen_g_node_hostname))
+
+ g_pods = oc_get_custom_resource(
+ self.node, 'pod', [':.metadata.name', ':.status.hostIP',
+ ':.status.podIP', ':.spec.nodeName'],
+ selector='glusterfs-node=pod')
+ if g_pods and g_pods[0]:
+ for g_pod in g_pods:
+ if chosen_g_node_ip_and_hostname.intersection(set(g_pod[1:])):
+ host_to_run_cmds = self.node
+ g_pod_prefix, g_pod = 'oc exec %s -- ' % g_pod[0], g_pod[0]
+ break
+ else:
+ err_msg = (
+ 'Failed to find Gluster pod filtering it by following IPs '
+ 'and hostnames: %s\nFound following Gluster pods: %s'
+ ) % (chosen_g_node_ip_and_hostname, g_pods)
+ g.log.error(err_msg)
+ raise AssertionError(err_msg)
+ else:
+ host_to_run_cmds, g_pod_prefix, g_pod = chosen_g_node_ip, '', ''
+
+ # Schedule deletion of targetcli process
+ file_for_bkp, pvc_number = "~/.targetcli/prefs.bin", 10
+ self.cmd_run(
+ "%scp %s %s_backup" % (g_pod_prefix, file_for_bkp, file_for_bkp),
+ hostname=host_to_run_cmds)
+ self.addCleanup(
+ self.cmd_run,
+ "%srm -f %s_backup" % (g_pod_prefix, file_for_bkp),
+ hostname=host_to_run_cmds)
+ kill_targetcli_services_cmd = (
+ "while true; do "
+ " %spkill targetcli || echo 'failed to kill targetcli process'; "
+ "done" % g_pod_prefix)
+ loop_for_killing_targetcli_process = g.run_async(
+ host_to_run_cmds, kill_targetcli_services_cmd, "root")
+ try:
+ # Create bunch of PVCs
+ sc_name, pvc_names = self.create_storage_class(), []
+ for i in range(pvc_number):
+ pvc_names.append(oc_create_pvc(self.node, sc_name, pvc_size=1))
+ self.addCleanup(
+ wait_for_resources_absence, self.node, 'pvc', pvc_names)
+ self.addCleanup(oc_delete, self.node, 'pvc', ' '.join(pvc_names))
+
+ # Check that we get expected number of provisioning errors
+ timeout, wait_step, succeeded_pvcs, failed_pvcs = 120, 1, [], []
+ _waiter, err_msg = Waiter(timeout=timeout, interval=wait_step), ""
+ for pvc_name in pvc_names:
+ _waiter._attempt = 0
+ for w in _waiter:
+ events = get_events(
+ self.node, pvc_name, obj_type="PersistentVolumeClaim")
+ for event in events:
+ if event['reason'] == 'ProvisioningSucceeded':
+ succeeded_pvcs.append(pvc_name)
+ break
+ elif event['reason'] == 'ProvisioningFailed':
+ failed_pvcs.append(pvc_name)
+ break
+ else:
+ continue
+ break
+ if w.expired:
+ err_msg = (
+ "Failed to get neither 'ProvisioningSucceeded' nor "
+ "'ProvisioningFailed' statuses for all the PVCs in "
+ "time. Timeout was %ss, interval was %ss." % (
+ timeout, wait_step))
+ g.log.error(err_msg)
+ raise AssertionError(err_msg)
+ self.assertGreater(len(failed_pvcs), len(succeeded_pvcs))
+ finally:
+ # Restore targetcli workability
+ loop_for_killing_targetcli_process._proc.terminate()
+
+ # Revert breakage back which can be caused by BZ-1769426
+ check_bkp_file_size_cmd = (
+ "%sls -lah %s | awk '{print $5}'" % (
+ g_pod_prefix, file_for_bkp))
+ bkp_file_size = self.cmd_run(
+ check_bkp_file_size_cmd, hostname=host_to_run_cmds).strip()
+ if bkp_file_size == "0":
+ self.cmd_run(
+ "%smv %s_backup %s" % (
+ g_pod_prefix, file_for_bkp, file_for_bkp),
+ hostname=host_to_run_cmds)
+ breakage_err_msg = (
+ "File located at '%s' was corrupted (zero size) on the "
+ "%s. Looks like BZ-1769426 took effect. \n"
+ "Don't worry, it has been restored after test failure." % (
+ file_for_bkp,
+ "'%s' Gluster pod" % g_pod if g_pod
+ else "'%s' Gluster node" % chosen_g_node_ip))
+ g.log.error(breakage_err_msg)
+ if err_msg:
+ breakage_err_msg = "%s\n%s" % (err_msg, breakage_err_msg)
+ raise AssertionError(breakage_err_msg)
+
+ # Wait for all the PVCs to be in bound state
+ wait_for_pvcs_be_bound(self.node, pvc_names, timeout=300, wait_step=5)