diff options
author | vamahaja <vamahaja@redhat.com> | 2018-10-04 17:30:11 +0530 |
---|---|---|
committer | vamahaja <vamahaja@redhat.com> | 2018-12-10 09:22:38 +0530 |
commit | 024f0f0bc9f8c969c8f5a3ec494cee0c019f1868 (patch) | |
tree | 8a000350e82a43e01e90b31d437cf3973d29f9ff /cns-libs | |
parent | 31492fa754bd77e583564d8356822500078d1e2c (diff) |
[CNS-1314][CNS-1285] Restart gluster block volumes and validate
Change-Id: Ib7e3125e5120a91fe431816b33be4d4e6f15078e
Signed-off-by: vamahaja <vamahaja@redhat.com>
Diffstat (limited to 'cns-libs')
-rw-r--r-- | cns-libs/cnslibs/common/gluster_ops.py | 296 | ||||
-rw-r--r-- | cns-libs/cnslibs/common/heketi_ops.py | 46 | ||||
-rw-r--r-- | cns-libs/cnslibs/common/openshift_ops.py | 47 |
3 files changed, 342 insertions, 47 deletions
diff --git a/cns-libs/cnslibs/common/gluster_ops.py b/cns-libs/cnslibs/common/gluster_ops.py new file mode 100644 index 00000000..76b3bc7d --- /dev/null +++ b/cns-libs/cnslibs/common/gluster_ops.py @@ -0,0 +1,296 @@ +import six +import time +import json +import re + +from glusto.core import Glusto as g +from glustolibs.gluster.heal_libs import is_heal_complete +from glustolibs.gluster.volume_ops import ( + get_volume_status, + get_volume_list, + volume_status, + volume_start, + volume_stop +) +from glustolibs.gluster.block_ops import block_list +from cnslibs.common.openshift_ops import ( + oc_get_pods, + oc_rsh, + wait_for_process_to_kill_on_pod +) +from cnslibs.common.heketi_ops import heketi_blockvolume_info +from cnslibs.common import exceptions, podcmd +from cnslibs.common import waiter + + +def _get_gluster_pod(gluster_pod, hostname=None): + """create glusto.podcmd object if gluster_pod is string and + hostname is given else returns gluster_pod object given + + Args: + gluster_pod (podcmd | str): gluster pod class object has gluster + pod and ocp master node or gluster + pod name + hostname (str): master node on which gluster pod exists + """ + if isinstance(gluster_pod, podcmd.Pod): + return gluster_pod + elif isinstance(gluster_pod, six.string_types): + if hostname: + return podcmd.Pod(hostname, gluster_pod) + else: + raise exceptions.ExecutionError( + "gluster pod is string '%s' but hostname '%s' not valid" % ( + gluster_pod, hostname) + ) + else: + raise exceptions.ExecutionError( + "invalid gluster pod parameter '%s', '%s'" % ( + gluster_pod, type(gluster_pod)) + ) + + +@podcmd.GlustoPod() +def wait_to_heal_complete( + gluster_pod, hostname=None, timeout=300, wait_step=5): + """Monitors heal for volumes on gluster + gluster_pod (podcmd | str): gluster pod class object has gluster + pod and ocp master node or gluster + pod name + hostname (str): master node on which gluster pod exists + """ + gluster_pod = _get_gluster_pod(gluster_pod, hostname) + + gluster_vol_list = get_volume_list(gluster_pod) + if not gluster_vol_list: + raise AssertionError("failed to get gluster volume list") + + _waiter = waiter.Waiter(timeout=timeout, interval=wait_step) + for gluster_vol in gluster_vol_list: + for w in _waiter: + if is_heal_complete(gluster_pod, gluster_vol): + break + + if w.expired: + err_msg = ("reached timeout waiting for all the gluster volumes " + "to reach the 'healed' state.") + g.log.error(err_msg) + raise AssertionError(err_msg) + + +@podcmd.GlustoPod() +def get_brick_pids(gluster_pod, block_hosting_vol, hostname=None): + """gets brick pids from gluster pods + + Args: + hostname (str): hostname on which gluster pod exists + gluster_pod (podcmd | str): gluster pod class object has gluster + pod and ocp master node or gluster + pod name + block_hosting_vol (str): Block hosting volume id + """ + gluster_pod = _get_gluster_pod(gluster_pod, hostname) + + gluster_volume_status = get_volume_status(gluster_pod, block_hosting_vol) + if not gluster_volume_status: + raise AssertionError("failed to get volume status for gluster " + "volume '%s' on pod '%s'" % ( + gluster_pod, block_hosting_vol)) + + gluster_volume_status = gluster_volume_status.get(block_hosting_vol) + assert gluster_volume_status, ("gluster volume %s not present" % ( + block_hosting_vol)) + + pids = {} + for parent_key, parent_val in gluster_volume_status.items(): + for child_key, child_val in parent_val.items(): + if not child_key.startswith("/var"): + continue + + pid = child_val["pid"] + # When birck is down, pid of the brick is returned as -1. + # Which is unexepeted situation, hence raising error. + if pid == "-1": + raise AssertionError("Something went wrong brick pid is -1") + + pids[parent_key] = pid + + return pids + + +@podcmd.GlustoPod() +def restart_brick_process(hostname, gluster_pod, block_hosting_vol): + """restarts brick process of block hosting volumes + + Args: + hostname (str): hostname on which gluster pod exists + gluster_pod (podcmd | str): gluster pod class object has gluster + pod and ocp master node or gluster + pod name + block_hosting_vol (str): block hosting volume name + """ + pids = get_brick_pids(gluster_pod, block_hosting_vol, hostname) + + # using count variable to limit the max pod process kill to 2 + count = 0 + killed_process = {} + pid_keys = pids.keys() + oc_pods = oc_get_pods(hostname) + for pod in oc_pods.keys(): + if not (oc_pods[pod]["ip"] in pid_keys and count <= 1): + continue + + ret, out, err = oc_rsh( + hostname, pod, "kill -9 %s" % pids[oc_pods[pod]["ip"]] + ) + if ret != 0: + err_msg = "failed to kill process id %s error: %s" % ( + pids[oc_pods[pod]["ip"]], err) + g.log.error(err_msg) + raise AssertionError(err_msg) + + killed_process[pod] = pids[oc_pods[pod]["ip"]] + count += 1 + + for pod, pid in killed_process.items(): + wait_for_process_to_kill_on_pod(pod, pid, hostname) + + ret, out, err = volume_start(gluster_pod, block_hosting_vol, force=True) + if ret != 0: + err_msg = "failed to start gluster volume %s on pod %s error: %s" % ( + block_hosting_vol, gluster_pod, err) + g.log.error(err_msg) + raise AssertionError(err_msg) + + +@podcmd.GlustoPod() +def restart_block_hosting_volume( + gluster_pod, block_hosting_vol, sleep_time=120, hostname=None): + """restars block hosting volume service + + Args: + hostname (str): hostname on which gluster pod exists + gluster_pod (podcmd | str): gluster pod class object has gluster + pod and ocp master node or gluster + pod name + block_hosting_vol (str): name of block hosting volume + """ + gluster_pod = _get_gluster_pod(gluster_pod, hostname) + + gluster_volume_status = get_volume_status(gluster_pod, block_hosting_vol) + if not gluster_volume_status: + raise AssertionError("failed to get gluster volume status") + + g.log.info("Gluster volume %s status\n%s : " % ( + block_hosting_vol, gluster_volume_status) + ) + + ret, out, err = volume_stop(gluster_pod, block_hosting_vol) + if ret != 0: + err_msg = "failed to stop gluster volume %s on pod %s error: %s" % ( + block_hosting_vol, gluster_pod, err) + g.log.error(err_msg) + raise AssertionError(err_msg) + + # Explicit wait to stop ios and pvc creation for 2 mins + time.sleep(sleep_time) + ret, out, err = volume_start(gluster_pod, block_hosting_vol, force=True) + if ret != 0: + err_msg = "failed to start gluster volume %s on pod %s error: %s" % ( + block_hosting_vol, gluster_pod, err) + g.log.error(err_msg) + raise AssertionError(err_msg) + + ret, out, err = volume_status(gluster_pod, block_hosting_vol) + if ret != 0: + err_msg = ("failed to get status for gluster volume %s on pod %s " + "error: %s" % (block_hosting_vol, gluster_pod, err)) + g.log.error(err_msg) + raise AssertionError(err_msg) + + +@podcmd.GlustoPod() +def match_heketi_and_gluster_block_volumes_by_prefix( + gluster_pod, heketi_block_volumes, block_vol_prefix, hostname=None): + """Match block volumes from heketi and gluster. This function can't + be used for block volumes with custom prefixes + + Args: + gluster_pod (podcmd | str): gluster pod class object has gluster + pod and ocp master node or gluster + pod name + heketi_block_volumes (list): list of heketi block volumes with + which gluster block volumes need to + be matched + block_vol_prefix (str): block volume prefix by which the block + volumes needs to be filtered + hostname (str): ocp master node on which oc command gets executed + + """ + gluster_pod = _get_gluster_pod(gluster_pod, hostname) + + gluster_vol_list = get_volume_list(gluster_pod) + + gluster_vol_block_list = [] + for gluster_vol in gluster_vol_list[1:]: + ret, out, err = block_list(gluster_pod, gluster_vol) + try: + if ret != 0 and json.loads(out)["RESULT"] == "FAIL": + msg = "failed to get block volume list with error: %s" % err + g.log.error(msg) + raise AssertionError(msg) + except Exception as e: + g.log.error(e) + raise + + gluster_vol_block_list.extend([ + block_vol.replace(block_vol_prefix, "") + for block_vol in json.loads(out)["blocks"] + if block_vol.startswith(block_vol_prefix) + ]) + + if cmp(sorted(gluster_vol_block_list), heketi_block_volumes) != 0: + err_msg = "Gluster and Heketi Block volume list match failed" + err_msg += "\nGluster Volumes: %s, " % gluster_vol_block_list + err_msg += "\nBlock volumes %s" % heketi_block_volumes + err_msg += "\nDifference: %s" % (set(gluster_vol_block_list) ^ + set(heketi_block_volumes)) + raise AssertionError(err_msg) + + +@podcmd.GlustoPod() +def get_block_hosting_volume_name(heketi_client_node, heketi_server_url, + block_volume, gluster_pod, hostname=None): + """Returns block hosting volume name of given block volume + + Args: + heketi_client_node (str): Node on which cmd has to be executed. + heketi_server_url (str): Heketi server url + block_volume (str): Block volume of which block hosting volume + returned + gluster_pod (podcmd | str): Gluster pod class object has gluster + pod and ocp master node or gluster + pod name + hostname (str): OCP master node on which ocp commands get executed + + Returns: + str : Name of the block hosting volume for given block volume + """ + gluster_pod = _get_gluster_pod(gluster_pod, hostname) + + block_vol_info = heketi_blockvolume_info( + heketi_client_node, heketi_server_url, block_volume + ) + + for line in block_vol_info.splitlines(): + block_hosting_vol_match = re.search( + "^Block Hosting Volume: (.*)$", line + ) + + if not block_hosting_vol_match: + continue + + gluster_vol_list = get_volume_list(gluster_pod) + for vol in gluster_vol_list: + if block_hosting_vol_match.group(1).strip() in vol: + return vol diff --git a/cns-libs/cnslibs/common/heketi_ops.py b/cns-libs/cnslibs/common/heketi_ops.py index 534017ff..12910492 100644 --- a/cns-libs/cnslibs/common/heketi_ops.py +++ b/cns-libs/cnslibs/common/heketi_ops.py @@ -3,20 +3,18 @@ """ import json -import six from glusto.core import Glusto as g -from glustolibs.gluster.block_ops import block_list -from glustolibs.gluster.volume_ops import get_volume_list from collections import OrderedDict try: from heketi import HeketiClient except ImportError: g.log.error("Please install python-client for heketi and re-run the test") -from cnslibs.common import exceptions, podcmd +from cnslibs.common import exceptions from cnslibs.common.utils import parse_prometheus_data + HEKETI_SSH_KEY = "/etc/heketi/heketi_key" HEKETI_CONFIG_FILE = "/etc/heketi/heketi.json" @@ -2351,46 +2349,6 @@ def rm_arbiter_tag(heketi_client_node, heketi_server_url, source, source_id, source, source_id, 'arbiter', **kwargs) -@podcmd.GlustoPod() -def match_heketi_and_gluster_block_volumes( - gluster_pod, heketi_block_volumes, block_vol_prefix, hostname=None): - """Match block volumes from heketi and gluster - - Args: - gluster_pod (podcmd | str): gluster pod class object has gluster - pod and ocp master node or gluster - pod name - heketi_block_volumes (list): list of heketi block volumes with - which gluster block volumes need to - be matched - block_vol_prefix (str): block volume prefix by which the block - volumes needs to be filtered - hostname (str): master node on which gluster pod exists - - """ - if isinstance(gluster_pod, podcmd.Pod): - g.log.info("Recieved gluster pod object using same") - elif isinstance(gluster_pod, six.string_types) and hostname: - g.log.info("Recieved gluster pod name and hostname") - gluster_pod = podcmd.Pod(hostname, gluster_pod) - else: - raise exceptions.ExecutionError("Invalid glsuter pod parameter") - - gluster_vol_list = get_volume_list(gluster_pod) - - gluster_vol_block_list = [] - for gluster_vol in gluster_vol_list[1:]: - ret, out, err = block_list(gluster_pod, gluster_vol) - gluster_vol_block_list.extend([ - block_vol.replace(block_vol_prefix, "") - for block_vol in json.loads(out)["blocks"] - if block_vol.startswith(block_vol_prefix) - ]) - - assert sorted(gluster_vol_block_list) == heketi_block_volumes, ( - "Gluster and Heketi Block volume list match failed") - - def get_heketi_metrics(heketi_client_node, heketi_server_url, prometheus_format=False): ''' Execute curl command to get metrics output diff --git a/cns-libs/cnslibs/common/openshift_ops.py b/cns-libs/cnslibs/common/openshift_ops.py index 7e000bc7..3a6f38b3 100644 --- a/cns-libs/cnslibs/common/openshift_ops.py +++ b/cns-libs/cnslibs/common/openshift_ops.py @@ -1422,7 +1422,12 @@ def match_pvc_and_pv(hostname, prefix): if pv[0].startswith(prefix) ]) - assert pvc_list == pv_list, "PVC and PV list match failed" + if cmp(pvc_list, pv_list) != 0: + err_msg = "PVC and PV list match failed" + err_msg += "\nPVC list: %s, " % pvc_list + err_msg += "\nPV list %s" % pv_list + err_msg += "\nDifference: %s" % (set(pvc_list) ^ set(pv_list)) + raise AssertionError(err_msg) def match_pv_and_heketi_block_volumes( @@ -1446,8 +1451,13 @@ def match_pv_and_heketi_block_volumes( if pv[0].startswith(pvc_prefix) and pv[1] == "gluster.org/glusterblock" ]) - assert pv_block_volumes == heketi_block_volumes, ( - "PV and Heketi Block list match failed") + if cmp(pv_block_volumes, heketi_block_volumes) != 0: + err_msg = "PV block volumes and Heketi Block volume list match failed" + err_msg += "\nPV Block Volumes: %s, " % pv_block_volumes + err_msg += "\nHeketi Block volumes %s" % heketi_block_volumes + err_msg += "\nDifference: %s" % (set(pv_block_volumes) ^ + set(heketi_block_volumes)) + raise AssertionError(err_msg) def check_service_status( @@ -1502,3 +1512,34 @@ def restart_service_on_pod(hostname, podname, service): (service, podname)) g.log.error(err_msg) raise AssertionError(err_msg) + + +def wait_for_process_to_kill_on_pod( + pod, pid, hostname, timeout=60, interval=3): + """check for process presence if process is present for more than + timeout sec raise exception + + Args: + pid (int | str): process id to be killed on pod + pod (str): pod name on which process id to be killed + hostname (str): hostname on which pod is present + """ + killed_pid_cmd = "ps -eaf | grep %s | grep -v grep | awk '{print $2}'" + _waiter = waiter.Waiter(timeout=60, interval=3) + for w in _waiter: + ret, out, err = oc_rsh(hostname, pod, killed_pid_cmd % pid) + if ret != 0: + err_msg = ("failed to get killed process id '%s' details " + "from pod '%s' err: %s" % (pid, pod, err)) + g.log.error(err_msg) + raise AssertionError(err_msg) + + if not out.strip() == pid: + g.log.info("brick process '%s' killed on pod '%s'" % (pid, pod)) + break + + if w.expired: + error_msg = ("process id '%s' still exists on pod '%s' after waiting " + "for it '%s' seconds to get kill" % (pid, pod, timeout)) + g.log.error(error_msg) + raise exceptions.ExecutionError(error_msg) |