summaryrefslogtreecommitdiffstats
path: root/cns-libs
diff options
context:
space:
mode:
authorvamahaja <vamahaja@redhat.com>2018-10-04 17:30:11 +0530
committervamahaja <vamahaja@redhat.com>2018-12-10 09:22:38 +0530
commit024f0f0bc9f8c969c8f5a3ec494cee0c019f1868 (patch)
tree8a000350e82a43e01e90b31d437cf3973d29f9ff /cns-libs
parent31492fa754bd77e583564d8356822500078d1e2c (diff)
[CNS-1314][CNS-1285] Restart gluster block volumes and validate
Change-Id: Ib7e3125e5120a91fe431816b33be4d4e6f15078e Signed-off-by: vamahaja <vamahaja@redhat.com>
Diffstat (limited to 'cns-libs')
-rw-r--r--cns-libs/cnslibs/common/gluster_ops.py296
-rw-r--r--cns-libs/cnslibs/common/heketi_ops.py46
-rw-r--r--cns-libs/cnslibs/common/openshift_ops.py47
3 files changed, 342 insertions, 47 deletions
diff --git a/cns-libs/cnslibs/common/gluster_ops.py b/cns-libs/cnslibs/common/gluster_ops.py
new file mode 100644
index 00000000..76b3bc7d
--- /dev/null
+++ b/cns-libs/cnslibs/common/gluster_ops.py
@@ -0,0 +1,296 @@
+import six
+import time
+import json
+import re
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.heal_libs import is_heal_complete
+from glustolibs.gluster.volume_ops import (
+ get_volume_status,
+ get_volume_list,
+ volume_status,
+ volume_start,
+ volume_stop
+)
+from glustolibs.gluster.block_ops import block_list
+from cnslibs.common.openshift_ops import (
+ oc_get_pods,
+ oc_rsh,
+ wait_for_process_to_kill_on_pod
+)
+from cnslibs.common.heketi_ops import heketi_blockvolume_info
+from cnslibs.common import exceptions, podcmd
+from cnslibs.common import waiter
+
+
+def _get_gluster_pod(gluster_pod, hostname=None):
+ """create glusto.podcmd object if gluster_pod is string and
+ hostname is given else returns gluster_pod object given
+
+ Args:
+ gluster_pod (podcmd | str): gluster pod class object has gluster
+ pod and ocp master node or gluster
+ pod name
+ hostname (str): master node on which gluster pod exists
+ """
+ if isinstance(gluster_pod, podcmd.Pod):
+ return gluster_pod
+ elif isinstance(gluster_pod, six.string_types):
+ if hostname:
+ return podcmd.Pod(hostname, gluster_pod)
+ else:
+ raise exceptions.ExecutionError(
+ "gluster pod is string '%s' but hostname '%s' not valid" % (
+ gluster_pod, hostname)
+ )
+ else:
+ raise exceptions.ExecutionError(
+ "invalid gluster pod parameter '%s', '%s'" % (
+ gluster_pod, type(gluster_pod))
+ )
+
+
+@podcmd.GlustoPod()
+def wait_to_heal_complete(
+ gluster_pod, hostname=None, timeout=300, wait_step=5):
+ """Monitors heal for volumes on gluster
+ gluster_pod (podcmd | str): gluster pod class object has gluster
+ pod and ocp master node or gluster
+ pod name
+ hostname (str): master node on which gluster pod exists
+ """
+ gluster_pod = _get_gluster_pod(gluster_pod, hostname)
+
+ gluster_vol_list = get_volume_list(gluster_pod)
+ if not gluster_vol_list:
+ raise AssertionError("failed to get gluster volume list")
+
+ _waiter = waiter.Waiter(timeout=timeout, interval=wait_step)
+ for gluster_vol in gluster_vol_list:
+ for w in _waiter:
+ if is_heal_complete(gluster_pod, gluster_vol):
+ break
+
+ if w.expired:
+ err_msg = ("reached timeout waiting for all the gluster volumes "
+ "to reach the 'healed' state.")
+ g.log.error(err_msg)
+ raise AssertionError(err_msg)
+
+
+@podcmd.GlustoPod()
+def get_brick_pids(gluster_pod, block_hosting_vol, hostname=None):
+ """gets brick pids from gluster pods
+
+ Args:
+ hostname (str): hostname on which gluster pod exists
+ gluster_pod (podcmd | str): gluster pod class object has gluster
+ pod and ocp master node or gluster
+ pod name
+ block_hosting_vol (str): Block hosting volume id
+ """
+ gluster_pod = _get_gluster_pod(gluster_pod, hostname)
+
+ gluster_volume_status = get_volume_status(gluster_pod, block_hosting_vol)
+ if not gluster_volume_status:
+ raise AssertionError("failed to get volume status for gluster "
+ "volume '%s' on pod '%s'" % (
+ gluster_pod, block_hosting_vol))
+
+ gluster_volume_status = gluster_volume_status.get(block_hosting_vol)
+ assert gluster_volume_status, ("gluster volume %s not present" % (
+ block_hosting_vol))
+
+ pids = {}
+ for parent_key, parent_val in gluster_volume_status.items():
+ for child_key, child_val in parent_val.items():
+ if not child_key.startswith("/var"):
+ continue
+
+ pid = child_val["pid"]
+ # When birck is down, pid of the brick is returned as -1.
+ # Which is unexepeted situation, hence raising error.
+ if pid == "-1":
+ raise AssertionError("Something went wrong brick pid is -1")
+
+ pids[parent_key] = pid
+
+ return pids
+
+
+@podcmd.GlustoPod()
+def restart_brick_process(hostname, gluster_pod, block_hosting_vol):
+ """restarts brick process of block hosting volumes
+
+ Args:
+ hostname (str): hostname on which gluster pod exists
+ gluster_pod (podcmd | str): gluster pod class object has gluster
+ pod and ocp master node or gluster
+ pod name
+ block_hosting_vol (str): block hosting volume name
+ """
+ pids = get_brick_pids(gluster_pod, block_hosting_vol, hostname)
+
+ # using count variable to limit the max pod process kill to 2
+ count = 0
+ killed_process = {}
+ pid_keys = pids.keys()
+ oc_pods = oc_get_pods(hostname)
+ for pod in oc_pods.keys():
+ if not (oc_pods[pod]["ip"] in pid_keys and count <= 1):
+ continue
+
+ ret, out, err = oc_rsh(
+ hostname, pod, "kill -9 %s" % pids[oc_pods[pod]["ip"]]
+ )
+ if ret != 0:
+ err_msg = "failed to kill process id %s error: %s" % (
+ pids[oc_pods[pod]["ip"]], err)
+ g.log.error(err_msg)
+ raise AssertionError(err_msg)
+
+ killed_process[pod] = pids[oc_pods[pod]["ip"]]
+ count += 1
+
+ for pod, pid in killed_process.items():
+ wait_for_process_to_kill_on_pod(pod, pid, hostname)
+
+ ret, out, err = volume_start(gluster_pod, block_hosting_vol, force=True)
+ if ret != 0:
+ err_msg = "failed to start gluster volume %s on pod %s error: %s" % (
+ block_hosting_vol, gluster_pod, err)
+ g.log.error(err_msg)
+ raise AssertionError(err_msg)
+
+
+@podcmd.GlustoPod()
+def restart_block_hosting_volume(
+ gluster_pod, block_hosting_vol, sleep_time=120, hostname=None):
+ """restars block hosting volume service
+
+ Args:
+ hostname (str): hostname on which gluster pod exists
+ gluster_pod (podcmd | str): gluster pod class object has gluster
+ pod and ocp master node or gluster
+ pod name
+ block_hosting_vol (str): name of block hosting volume
+ """
+ gluster_pod = _get_gluster_pod(gluster_pod, hostname)
+
+ gluster_volume_status = get_volume_status(gluster_pod, block_hosting_vol)
+ if not gluster_volume_status:
+ raise AssertionError("failed to get gluster volume status")
+
+ g.log.info("Gluster volume %s status\n%s : " % (
+ block_hosting_vol, gluster_volume_status)
+ )
+
+ ret, out, err = volume_stop(gluster_pod, block_hosting_vol)
+ if ret != 0:
+ err_msg = "failed to stop gluster volume %s on pod %s error: %s" % (
+ block_hosting_vol, gluster_pod, err)
+ g.log.error(err_msg)
+ raise AssertionError(err_msg)
+
+ # Explicit wait to stop ios and pvc creation for 2 mins
+ time.sleep(sleep_time)
+ ret, out, err = volume_start(gluster_pod, block_hosting_vol, force=True)
+ if ret != 0:
+ err_msg = "failed to start gluster volume %s on pod %s error: %s" % (
+ block_hosting_vol, gluster_pod, err)
+ g.log.error(err_msg)
+ raise AssertionError(err_msg)
+
+ ret, out, err = volume_status(gluster_pod, block_hosting_vol)
+ if ret != 0:
+ err_msg = ("failed to get status for gluster volume %s on pod %s "
+ "error: %s" % (block_hosting_vol, gluster_pod, err))
+ g.log.error(err_msg)
+ raise AssertionError(err_msg)
+
+
+@podcmd.GlustoPod()
+def match_heketi_and_gluster_block_volumes_by_prefix(
+ gluster_pod, heketi_block_volumes, block_vol_prefix, hostname=None):
+ """Match block volumes from heketi and gluster. This function can't
+ be used for block volumes with custom prefixes
+
+ Args:
+ gluster_pod (podcmd | str): gluster pod class object has gluster
+ pod and ocp master node or gluster
+ pod name
+ heketi_block_volumes (list): list of heketi block volumes with
+ which gluster block volumes need to
+ be matched
+ block_vol_prefix (str): block volume prefix by which the block
+ volumes needs to be filtered
+ hostname (str): ocp master node on which oc command gets executed
+
+ """
+ gluster_pod = _get_gluster_pod(gluster_pod, hostname)
+
+ gluster_vol_list = get_volume_list(gluster_pod)
+
+ gluster_vol_block_list = []
+ for gluster_vol in gluster_vol_list[1:]:
+ ret, out, err = block_list(gluster_pod, gluster_vol)
+ try:
+ if ret != 0 and json.loads(out)["RESULT"] == "FAIL":
+ msg = "failed to get block volume list with error: %s" % err
+ g.log.error(msg)
+ raise AssertionError(msg)
+ except Exception as e:
+ g.log.error(e)
+ raise
+
+ gluster_vol_block_list.extend([
+ block_vol.replace(block_vol_prefix, "")
+ for block_vol in json.loads(out)["blocks"]
+ if block_vol.startswith(block_vol_prefix)
+ ])
+
+ if cmp(sorted(gluster_vol_block_list), heketi_block_volumes) != 0:
+ err_msg = "Gluster and Heketi Block volume list match failed"
+ err_msg += "\nGluster Volumes: %s, " % gluster_vol_block_list
+ err_msg += "\nBlock volumes %s" % heketi_block_volumes
+ err_msg += "\nDifference: %s" % (set(gluster_vol_block_list) ^
+ set(heketi_block_volumes))
+ raise AssertionError(err_msg)
+
+
+@podcmd.GlustoPod()
+def get_block_hosting_volume_name(heketi_client_node, heketi_server_url,
+ block_volume, gluster_pod, hostname=None):
+ """Returns block hosting volume name of given block volume
+
+ Args:
+ heketi_client_node (str): Node on which cmd has to be executed.
+ heketi_server_url (str): Heketi server url
+ block_volume (str): Block volume of which block hosting volume
+ returned
+ gluster_pod (podcmd | str): Gluster pod class object has gluster
+ pod and ocp master node or gluster
+ pod name
+ hostname (str): OCP master node on which ocp commands get executed
+
+ Returns:
+ str : Name of the block hosting volume for given block volume
+ """
+ gluster_pod = _get_gluster_pod(gluster_pod, hostname)
+
+ block_vol_info = heketi_blockvolume_info(
+ heketi_client_node, heketi_server_url, block_volume
+ )
+
+ for line in block_vol_info.splitlines():
+ block_hosting_vol_match = re.search(
+ "^Block Hosting Volume: (.*)$", line
+ )
+
+ if not block_hosting_vol_match:
+ continue
+
+ gluster_vol_list = get_volume_list(gluster_pod)
+ for vol in gluster_vol_list:
+ if block_hosting_vol_match.group(1).strip() in vol:
+ return vol
diff --git a/cns-libs/cnslibs/common/heketi_ops.py b/cns-libs/cnslibs/common/heketi_ops.py
index 534017ff..12910492 100644
--- a/cns-libs/cnslibs/common/heketi_ops.py
+++ b/cns-libs/cnslibs/common/heketi_ops.py
@@ -3,20 +3,18 @@
"""
import json
-import six
from glusto.core import Glusto as g
-from glustolibs.gluster.block_ops import block_list
-from glustolibs.gluster.volume_ops import get_volume_list
from collections import OrderedDict
try:
from heketi import HeketiClient
except ImportError:
g.log.error("Please install python-client for heketi and re-run the test")
-from cnslibs.common import exceptions, podcmd
+from cnslibs.common import exceptions
from cnslibs.common.utils import parse_prometheus_data
+
HEKETI_SSH_KEY = "/etc/heketi/heketi_key"
HEKETI_CONFIG_FILE = "/etc/heketi/heketi.json"
@@ -2351,46 +2349,6 @@ def rm_arbiter_tag(heketi_client_node, heketi_server_url, source, source_id,
source, source_id, 'arbiter', **kwargs)
-@podcmd.GlustoPod()
-def match_heketi_and_gluster_block_volumes(
- gluster_pod, heketi_block_volumes, block_vol_prefix, hostname=None):
- """Match block volumes from heketi and gluster
-
- Args:
- gluster_pod (podcmd | str): gluster pod class object has gluster
- pod and ocp master node or gluster
- pod name
- heketi_block_volumes (list): list of heketi block volumes with
- which gluster block volumes need to
- be matched
- block_vol_prefix (str): block volume prefix by which the block
- volumes needs to be filtered
- hostname (str): master node on which gluster pod exists
-
- """
- if isinstance(gluster_pod, podcmd.Pod):
- g.log.info("Recieved gluster pod object using same")
- elif isinstance(gluster_pod, six.string_types) and hostname:
- g.log.info("Recieved gluster pod name and hostname")
- gluster_pod = podcmd.Pod(hostname, gluster_pod)
- else:
- raise exceptions.ExecutionError("Invalid glsuter pod parameter")
-
- gluster_vol_list = get_volume_list(gluster_pod)
-
- gluster_vol_block_list = []
- for gluster_vol in gluster_vol_list[1:]:
- ret, out, err = block_list(gluster_pod, gluster_vol)
- gluster_vol_block_list.extend([
- block_vol.replace(block_vol_prefix, "")
- for block_vol in json.loads(out)["blocks"]
- if block_vol.startswith(block_vol_prefix)
- ])
-
- assert sorted(gluster_vol_block_list) == heketi_block_volumes, (
- "Gluster and Heketi Block volume list match failed")
-
-
def get_heketi_metrics(heketi_client_node, heketi_server_url,
prometheus_format=False):
''' Execute curl command to get metrics output
diff --git a/cns-libs/cnslibs/common/openshift_ops.py b/cns-libs/cnslibs/common/openshift_ops.py
index 7e000bc7..3a6f38b3 100644
--- a/cns-libs/cnslibs/common/openshift_ops.py
+++ b/cns-libs/cnslibs/common/openshift_ops.py
@@ -1422,7 +1422,12 @@ def match_pvc_and_pv(hostname, prefix):
if pv[0].startswith(prefix)
])
- assert pvc_list == pv_list, "PVC and PV list match failed"
+ if cmp(pvc_list, pv_list) != 0:
+ err_msg = "PVC and PV list match failed"
+ err_msg += "\nPVC list: %s, " % pvc_list
+ err_msg += "\nPV list %s" % pv_list
+ err_msg += "\nDifference: %s" % (set(pvc_list) ^ set(pv_list))
+ raise AssertionError(err_msg)
def match_pv_and_heketi_block_volumes(
@@ -1446,8 +1451,13 @@ def match_pv_and_heketi_block_volumes(
if pv[0].startswith(pvc_prefix) and pv[1] == "gluster.org/glusterblock"
])
- assert pv_block_volumes == heketi_block_volumes, (
- "PV and Heketi Block list match failed")
+ if cmp(pv_block_volumes, heketi_block_volumes) != 0:
+ err_msg = "PV block volumes and Heketi Block volume list match failed"
+ err_msg += "\nPV Block Volumes: %s, " % pv_block_volumes
+ err_msg += "\nHeketi Block volumes %s" % heketi_block_volumes
+ err_msg += "\nDifference: %s" % (set(pv_block_volumes) ^
+ set(heketi_block_volumes))
+ raise AssertionError(err_msg)
def check_service_status(
@@ -1502,3 +1512,34 @@ def restart_service_on_pod(hostname, podname, service):
(service, podname))
g.log.error(err_msg)
raise AssertionError(err_msg)
+
+
+def wait_for_process_to_kill_on_pod(
+ pod, pid, hostname, timeout=60, interval=3):
+ """check for process presence if process is present for more than
+ timeout sec raise exception
+
+ Args:
+ pid (int | str): process id to be killed on pod
+ pod (str): pod name on which process id to be killed
+ hostname (str): hostname on which pod is present
+ """
+ killed_pid_cmd = "ps -eaf | grep %s | grep -v grep | awk '{print $2}'"
+ _waiter = waiter.Waiter(timeout=60, interval=3)
+ for w in _waiter:
+ ret, out, err = oc_rsh(hostname, pod, killed_pid_cmd % pid)
+ if ret != 0:
+ err_msg = ("failed to get killed process id '%s' details "
+ "from pod '%s' err: %s" % (pid, pod, err))
+ g.log.error(err_msg)
+ raise AssertionError(err_msg)
+
+ if not out.strip() == pid:
+ g.log.info("brick process '%s' killed on pod '%s'" % (pid, pod))
+ break
+
+ if w.expired:
+ error_msg = ("process id '%s' still exists on pod '%s' after waiting "
+ "for it '%s' seconds to get kill" % (pid, pod, timeout))
+ g.log.error(error_msg)
+ raise exceptions.ExecutionError(error_msg)