summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--README.rst2
-rw-r--r--openshift-storage-libs/openshiftstoragelibs/baseclass.py25
-rw-r--r--openshift-storage-libs/openshiftstoragelibs/gluster_ops.py91
-rw-r--r--openshift-storage-libs/openshiftstoragelibs/heketi_ops.py86
-rw-r--r--openshift-storage-libs/openshiftstoragelibs/openshift_ops.py148
-rwxr-xr-xtests/functional/arbiter/test_arbiter.py4
-rw-r--r--tests/functional/heketi/test_block_volumes_heketi.py136
-rw-r--r--tests/functional/heketi/test_heketi_authentication.py33
-rw-r--r--tests/functional/heketi/test_heketi_brick_evict.py180
-rw-r--r--tests/functional/heketi/test_heketi_create_volume.py139
-rwxr-xr-xtests/functional/heketi/test_heketi_device_operations.py96
-rw-r--r--tests/functional/heketi/test_heketi_lvm_wrapper.py69
-rw-r--r--tests/functional/heketi/test_heketi_zones.py3
-rw-r--r--tests/functional/heketi/test_volume_expansion_and_devices.py42
-rw-r--r--tests/functional/logging/test_logging_validations.py122
-rw-r--r--tests/functional/metrics/test_metrics_validation.py39
-rw-r--r--tests/functional/prometheous/test_prometheus_validations.py709
-rw-r--r--tests/functional/prometheous/test_prometheus_validations_file.py335
-rw-r--r--tests/functional/provisioning/test_dev_path_mapping_block.py4
-rw-r--r--tests/functional/provisioning/test_dev_path_mapping_file.py422
-rw-r--r--tests/functional/provisioning/test_dynamic_provisioning_file.py60
-rw-r--r--tests/functional/provisioning/test_pv_resize.py9
-rw-r--r--tests/functional/provisioning/test_storage_class_cases.py66
-rw-r--r--tests/glusterfs-containers-tests-config.yaml6
24 files changed, 2767 insertions, 59 deletions
diff --git a/README.rst b/README.rst
index dc7c32e1..65d3c137 100644
--- a/README.rst
+++ b/README.rst
@@ -173,7 +173,7 @@ For example:
$ tox -e functional -- glusto -c 'config.yml' \
-l /tmp/glustotests-ocp.log --log-level DEBUG \
- --pytest='-v -x tests -m ocp'
+ '--pytest=-v -x tests -m ocp'
One can configure log files, log levels in the test cases as well. For details
on how to use `glusto` framework for configuring logs in tests, refer `docs
diff --git a/openshift-storage-libs/openshiftstoragelibs/baseclass.py b/openshift-storage-libs/openshiftstoragelibs/baseclass.py
index 81327c11..b5969764 100644
--- a/openshift-storage-libs/openshiftstoragelibs/baseclass.py
+++ b/openshift-storage-libs/openshiftstoragelibs/baseclass.py
@@ -45,7 +45,6 @@ from openshiftstoragelibs.openshift_ops import (
get_pod_name_from_rc,
get_pv_name_from_pvc,
oc_create_app_dc_with_io,
- oc_create_busybox_app_dc_with_io,
oc_create_pvc,
oc_create_sc,
oc_create_secret,
@@ -127,6 +126,11 @@ class BaseClass(unittest.TestCase):
cls.heketi_logs_before_delete = bool(
g.config.get("common", {}).get("heketi_logs_before_delete", False))
+ cls.io_container_image_cirros = cls.openshift_config.get(
+ "io_container_images", {}).get("cirros", "cirros")
+ cls.io_container_image_busybox = cls.openshift_config.get(
+ "io_container_images", {}).get("busybox", "busybox")
+
cmd = "echo -n %s | base64" % cls.heketi_cli_key
ret, out, err = g.run(cls.ocp_master_node[0], cmd, "root")
if ret != 0:
@@ -434,7 +438,7 @@ class BaseClass(unittest.TestCase):
def create_dcs_with_pvc(
self, pvc_names, timeout=600, wait_step=5,
dc_name_prefix='autotests-dc', space_to_use=1048576, label=None,
- skip_cleanup=False, is_busybox=False):
+ skip_cleanup=False, image=None):
"""Create bunch of DCs with app PODs which use unique PVCs.
Args:
@@ -445,7 +449,7 @@ class BaseClass(unittest.TestCase):
dc_name_prefix(str): name prefix for deployement config.
space_to_use(int): space to use for io's in KB.
label (dict): keys and value for adding label into DC.
- is_busybox (bool): True for busybox app pod else default is False
+ image (str): container image used for I/O.
Returns: dictionary with following structure:
{
"pvc_name_1": ("dc_name_1", "pod_name_1"),
@@ -454,16 +458,17 @@ class BaseClass(unittest.TestCase):
"pvc_name_n": ("dc_name_n", "pod_name_n"),
}
"""
+ if not image:
+ image = self.io_container_image_cirros
+
pvc_names = (
pvc_names
if isinstance(pvc_names, (list, set, tuple)) else [pvc_names])
dc_and_pod_names, dc_names = {}, {}
- function = (oc_create_busybox_app_dc_with_io if is_busybox else
- oc_create_app_dc_with_io)
for pvc_name in pvc_names:
- dc_name = function(self.ocp_client[0], pvc_name,
- space_to_use=space_to_use,
- dc_name_prefix=dc_name_prefix, label=label)
+ dc_name = oc_create_app_dc_with_io(
+ self.ocp_client[0], pvc_name, space_to_use=space_to_use,
+ dc_name_prefix=dc_name_prefix, label=label, image=image)
dc_names[pvc_name] = dc_name
if not skip_cleanup:
self.addCleanup(oc_delete, self.ocp_client[0], 'dc', dc_name)
@@ -484,11 +489,11 @@ class BaseClass(unittest.TestCase):
def create_dc_with_pvc(
self, pvc_name, timeout=300, wait_step=10,
dc_name_prefix='autotests-dc', label=None,
- skip_cleanup=False, is_busybox=False):
+ skip_cleanup=False, image=None):
return self.create_dcs_with_pvc(
pvc_name, timeout, wait_step,
dc_name_prefix=dc_name_prefix, label=label,
- skip_cleanup=skip_cleanup, is_busybox=is_busybox)[pvc_name]
+ skip_cleanup=skip_cleanup, image=image)[pvc_name]
def create_heketi_volume_with_name_and_wait(
self, name, size, raise_on_cleanup_error=True,
diff --git a/openshift-storage-libs/openshiftstoragelibs/gluster_ops.py b/openshift-storage-libs/openshiftstoragelibs/gluster_ops.py
index f621a860..7f5f5535 100644
--- a/openshift-storage-libs/openshiftstoragelibs/gluster_ops.py
+++ b/openshift-storage-libs/openshiftstoragelibs/gluster_ops.py
@@ -4,7 +4,12 @@ try:
except ImportError:
# py2
import json
+try:
+ import xml.etree.cElementTree as etree
+except ImportError:
+ import xml.etree.ElementTree as etree
import re
+import six
import time
from glusto.core import Glusto as g
@@ -20,7 +25,10 @@ from glustolibs.gluster.volume_ops import (
from openshiftstoragelibs import exceptions
from openshiftstoragelibs.heketi_ops import heketi_blockvolume_info
-from openshiftstoragelibs.openshift_ops import cmd_run_on_gluster_pod_or_node
+from openshiftstoragelibs.openshift_ops import (
+ cmd_run_on_gluster_pod_or_node,
+ get_ocp_gluster_pod_details,
+)
from openshiftstoragelibs import podcmd
from openshiftstoragelibs import waiter
@@ -352,3 +360,84 @@ def get_gluster_vol_free_inodes_with_hosts_of_bricks(vol_name):
inodes_info = {brick_process: process_data["inodesFree"]}
hosts_with_inodes_info[g_node].update(inodes_info)
return hosts_with_inodes_info
+
+
+def _get_gluster_cmd(target, command):
+
+ if isinstance(command, six.string_types):
+ command = [command]
+ ocp_client_node = list(g.config['ocp_servers']['client'].keys())[0]
+ gluster_pods = get_ocp_gluster_pod_details(ocp_client_node)
+
+ if target == 'auto_get_gluster_endpoint':
+ if gluster_pods:
+ target = podcmd.Pod(ocp_client_node, gluster_pods[0]["pod_name"])
+ else:
+ target = list(g.config.get("gluster_servers", {}).keys())[0]
+ elif not isinstance(target, podcmd.Pod) and gluster_pods:
+ for g_pod in gluster_pods:
+ if target in (g_pod['pod_host_ip'], g_pod['pod_hostname']):
+ target = podcmd.Pod(ocp_client_node, g_pod['pod_name'])
+ break
+
+ if isinstance(target, podcmd.Pod):
+ return target.node, ' '.join(['oc', 'rsh', target.podname] + command)
+
+ return target, ' '.join(command)
+
+
+def get_peer_status(mnode):
+ """Parse the output of command 'gluster peer status' using run_async.
+
+ Args:
+ mnode (str): Node on which command has to be executed.
+
+ Returns:
+ NoneType: None if command execution fails or parse errors.
+ list: list of dicts on success.
+
+ Examples:
+ >>> get_peer_status(mnode = 'abc.lab.eng.xyz.com')
+ [{'uuid': '77dc299a-32f7-43d8-9977-7345a344c398',
+ 'hostname': 'ijk.lab.eng.xyz.com',
+ 'state': '3',
+ 'hostnames' : ['ijk.lab.eng.xyz.com'],
+ 'connected': '1',
+ 'stateStr': 'Peer in Cluster'},
+
+ {'uuid': 'b15b8337-9f8e-4ec3-8bdb-200d6a67ae12',
+ 'hostname': 'def.lab.eng.xyz.com',
+ 'state': '3',
+ 'hostnames': ['def.lab.eng.xyz.com'],
+ 'connected': '1',
+ 'stateStr': 'Peer in Cluster'}
+ ]
+ """
+ mnode, cmd = _get_gluster_cmd(mnode, "gluster peer status --xml")
+ obj = g.run_async(mnode, cmd, log_level='DEBUG')
+ ret, out, err = obj.async_communicate()
+
+ if ret:
+ g.log.error(
+ "Failed to execute peer status command on node {} with error "
+ "{}".format(mnode, err))
+ return None
+
+ try:
+ root = etree.XML(out)
+ except etree.ParseError:
+ g.log.error("Failed to parse the gluster peer status xml output.")
+ return None
+
+ peer_status_list = []
+ for peer in root.findall("peerStatus/peer"):
+ peer_dict = {}
+ for element in peer.getchildren():
+ if element.tag == "hostnames":
+ hostnames_list = []
+ for hostname in element.getchildren():
+ hostnames_list.append(hostname.text)
+ element.text = hostnames_list
+ peer_dict[element.tag] = element.text
+ peer_status_list.append(peer_dict)
+ return peer_status_list
diff --git a/openshift-storage-libs/openshiftstoragelibs/heketi_ops.py b/openshift-storage-libs/openshiftstoragelibs/heketi_ops.py
index 77be7883..1f6a7705 100644
--- a/openshift-storage-libs/openshiftstoragelibs/heketi_ops.py
+++ b/openshift-storage-libs/openshiftstoragelibs/heketi_ops.py
@@ -293,6 +293,55 @@ def heketi_volume_expand(heketi_client_node, heketi_server_url, volume_id,
return out
+def heketi_blockvolume_expand(heketi_client_node, heketi_server_url,
+ blockvolume_id, new_size, raise_on_error=True,
+ **kwargs):
+ """Executes heketi blockvolume expand command.
+
+ Args:
+ heketi_client_node (str): Node on which cmd has to be executed.
+ heketi_server_url (str): Heketi server url
+ blockvolume_id (str): blockvolume ID
+ new_size (str): blockvolume net new size
+ raise_on_error (bool): whether or not to raise exception
+ in case of an error.
+
+ Kwargs:
+ The keys, values in kwargs are:
+ - json : (bool)
+ - secret : (str)|None
+ - user : (str)|None
+
+ Returns:
+ dict: if json if True, then it returns raw string output.
+ string: returns raw string output if json is False
+
+ Raises:
+ exceptions.ExecutionError: if command fails.
+ """
+
+ version = heketi_version.get_heketi_version(heketi_client_node)
+ if version < '9.0.0-14':
+ msg = ("heketi-client package {} does not support blockvolume "
+ "expand".format(version.v_str))
+ g.log.error(msg)
+ raise NotImplementedError(msg)
+
+ heketi_server_url, json_arg, admin_key, user = _set_heketi_global_flags(
+ heketi_server_url, **kwargs)
+
+ cmd = ("heketi-cli -s {} blockvolume expand {} "
+ "--new-size={} {} {} {}".format(
+ heketi_server_url, blockvolume_id, new_size, json_arg,
+ admin_key, user))
+ cmd = TIMEOUT_PREFIX + cmd
+ out = heketi_cmd_run(
+ heketi_client_node, cmd, raise_on_error=raise_on_error)
+ if json_arg and out:
+ return json.loads(out)
+ return out
+
+
def heketi_volume_delete(heketi_client_node, heketi_server_url, volume_id,
raise_on_error=True, **kwargs):
"""Executes heketi volume delete command.
@@ -2067,3 +2116,40 @@ def validate_dev_path_vg_and_uuid(
# Compare the uuid from node and heketi
return n_uuid == h_uuid
+
+
+def heketi_brick_evict(heketi_client_node, heketi_server_url, brick_id,
+ raise_on_error=True, **kwargs):
+ """Executes heketi brick evict command.
+
+ Args:
+ heketi_client_node (str): Node on which cmd has to be executed.
+ heketi_server_url (str): Heketi server url
+ brick_id (str): Brick ID
+ raise_on_error (bool): whether or not to raise exception
+ in case of an error.
+
+ Kwargs:
+ The keys, values in kwargs are:
+ - secret : (str)|None
+ - user : (str)|None
+
+ Raises:
+ exceptions.ExecutionError: if command fails.
+ """
+
+ version = heketi_version.get_heketi_version(heketi_client_node)
+ if version < '9.0.0-14':
+ msg = (
+ "heketi-client package {} does not support brick evict".format(
+ version.v_str))
+ raise NotImplementedError(msg)
+
+ heketi_server_url, _, admin_key, user = _set_heketi_global_flags(
+ heketi_server_url, **kwargs)
+
+ cmd = "heketi-cli -s {} brick evict {} {} {}".format(
+ heketi_server_url, brick_id, admin_key, user)
+ cmd = TIMEOUT_PREFIX + cmd
+ heketi_cmd_run(
+ heketi_client_node, cmd, raise_on_error=raise_on_error)
diff --git a/openshift-storage-libs/openshiftstoragelibs/openshift_ops.py b/openshift-storage-libs/openshiftstoragelibs/openshift_ops.py
index 5f53dabb..a228e190 100644
--- a/openshift-storage-libs/openshiftstoragelibs/openshift_ops.py
+++ b/openshift-storage-libs/openshiftstoragelibs/openshift_ops.py
@@ -411,7 +411,7 @@ def _oc_create_app_dc_with_io_image(hostname, pvc_name, dc_name_prefix,
def oc_create_app_dc_with_io(
hostname, pvc_name, dc_name_prefix="autotests-dc-with-app-io",
- replicas=1, space_to_use=1048576, label=None):
+ replicas=1, space_to_use=1048576, label=None, image="cirros"):
"""Create DC with app PODs and attached PVC, constantly running I/O.
Args:
@@ -423,34 +423,15 @@ def oc_create_app_dc_with_io(
replicas (int): amount of application POD replicas.
space_to_use (int): value in bytes which will be used for I/O.
label (dict): dict of keys and values to add labels in DC.
+ image (str): Container image for I/O.
"""
return _oc_create_app_dc_with_io_image(
hostname, pvc_name, dc_name_prefix, replicas, space_to_use,
- label, "cirros")
-
-
-def oc_create_busybox_app_dc_with_io(
- hostname, pvc_name, dc_name_prefix="autotests-dc-with-app-io",
- replicas=1, space_to_use=1048576, label=None):
- """Create DC with app PODs and attached PVC, constantly running I/O.
-
- Args:
- hostname (str): Node on which 'oc create' command will be executed.
- pvc_name (str): name of the Persistent Volume Claim to attach to
- the application PODs where constant I/O will run.
- dc_name_prefix (str): DC name will consist of this prefix and
- random str.
- replicas (int): amount of application POD replicas.
- space_to_use (int): value in bytes which will be used for I/O.
- label (dict): dict of keys and values to add labels in DC.
- """
- return _oc_create_app_dc_with_io_image(
- hostname, pvc_name, dc_name_prefix, replicas, space_to_use,
- label, "busybox")
+ label, image=image)
def oc_create_tiny_pod_with_volume(hostname, pvc_name, pod_name_prefix='',
- mount_path='/mnt'):
+ mount_path='/mnt', image='cirros'):
"""Create tiny POD from image in 10Mb with attached volume at /mnt"""
pod_name = "%s-%s" % (pod_name_prefix, utils.get_random_str())
pod_data = json.dumps({
@@ -463,7 +444,7 @@ def oc_create_tiny_pod_with_volume(hostname, pvc_name, pod_name_prefix='',
"terminationGracePeriodSeconds": 20,
"containers": [{
"name": pod_name,
- "image": "cirros", # noqa: 10 Mb! linux image
+ "image": image, # noqa: 10 Mb! linux image
"volumeMounts": [{"mountPath": mount_path, "name": "vol"}],
"command": [
"/bin/sh", "-ec",
@@ -483,7 +464,8 @@ def oc_create_tiny_pod_with_volume(hostname, pvc_name, pod_name_prefix='',
def oc_delete(
- ocp_node, rtype, name, raise_on_absence=True, collect_logs=False):
+ ocp_node, rtype, name, raise_on_absence=True, collect_logs=False,
+ skip_res_validation=True, is_force=False):
"""Delete an OCP resource by name
Args:
@@ -495,8 +477,11 @@ def oc_delete(
else return
default value: True
collect_logs (bool): Collect logs before deleting resource
+ skip_res_validation(bool): To validate before deletion of resource.
+ is_force (bool): True for deleting forcefully, default is False
"""
- if not oc_get_yaml(ocp_node, rtype, name, raise_on_error=raise_on_absence):
+ if skip_res_validation and not oc_get_yaml(
+ ocp_node, rtype, name, raise_on_error=raise_on_absence):
return
if rtype == "pod" and collect_logs:
@@ -509,6 +494,10 @@ def oc_delete(
if openshift_version.get_openshift_version() >= '3.11':
cmd.append('--wait=false')
+ # Forcefully delete
+ if is_force:
+ cmd.append("--grace-period 0 --force")
+
command.cmd_run(cmd, hostname=ocp_node)
@@ -1084,7 +1073,7 @@ def wait_for_pod_be_ready(hostname, pod_name,
g.log.info("pod %s is in ready state and is "
"Running" % pod_name)
return True
- elif output[1] == "Error":
+ elif output[1] in ["Error", "CrashBackOffLoop"]:
msg = ("pod %s status error" % pod_name)
g.log.error(msg)
raise exceptions.ExecutionError(msg)
@@ -2055,3 +2044,108 @@ def match_pv_and_heketi_volumes(hostname, heketi_volumes, pvc_prefix):
"PV: {}, Heketi volumes {}, "
"Difference: {}".format(pv_volumes, heketi_volumes, vol_diff))
assert not vol_diff, err_msg
+
+
+def oc_create_offline_block_volume_expand_job(
+ hostname, pvc_name, job_name_prefix='block-expand-job',
+ mount_path='/mnt'):
+ """Create Block Volume Expand Job with block PVC mounted at /mnt
+
+ Args:
+ hostname (str): Hostname on which we want to run oc commands
+ pvc_name (str): Name of a block PVC to attach to block expand job
+ job_name_prefix (str): Job name prefix given by user at the time
+ of job creation
+ mount_path (str): Where PVC should be mounted
+
+ Returns:
+ string: Name of the created job
+ """
+
+ # Find MOUNTPOINT on host node wrt to the mount_path on pod and run
+ # xfs_growfs on host MOUNTPOINT
+ command = [
+ 'sh', '-c', 'echo -e "# df -Th {0}" && df -Th {0} && '
+ 'DEVICE=$(df --output=source {0} | sed -e /^Filesystem/d) && '
+ 'MOUNTPOINT=$($EXEC_ON_HOST lsblk $DEVICE -n -o MOUNTPOINT) && '
+ '$EXEC_ON_HOST xfs_growfs $MOUNTPOINT > /dev/null && '
+ 'echo -e "\n# df -Th {0}" && df -Th {0}'.format(mount_path)
+ ]
+
+ # This will be a privileged container be careful while playing with it
+ job_name = "%s-%s" % (job_name_prefix, utils.get_random_str())
+ job_data = json.dumps({
+ "apiVersion": "batch/v1",
+ "kind": "Job",
+ "metadata": {"name": job_name},
+ "spec": {
+ "completions": 1,
+ "template": {
+ "spec": {
+ "containers": [{
+ "image": "rhel7",
+ "env": [
+ {
+ "name": "HOST_ROOTFS",
+ "value": "/rootfs"
+ },
+ {
+ "name": "EXEC_ON_HOST",
+ "value": "nsenter --root=$(HOST_ROOTFS) "
+ "nsenter -t 1 -m"
+ }
+ ],
+ "command": command,
+ "name": "rhel7",
+ "volumeMounts": [
+ {"mountPath": mount_path, "name": "block-pvc"},
+ {"mountPath": "/dev", "name": "host-dev"},
+ {"mountPath": "/rootfs", "name": "host-rootfs"}
+ ],
+ "securityContext": {"privileged": True}
+ }],
+ "volumes": [
+ {
+ "name": "block-pvc", "persistentVolumeClaim": {
+ "claimName": pvc_name
+ }
+ },
+ {
+ "name": "host-dev", "hostPath": {
+ "path": "/dev"
+ }
+ },
+ {
+ "name": "host-rootfs", "hostPath": {
+ "path": "/"
+ }
+ }
+ ],
+ "restartPolicy": "Never"
+ }
+ }
+ }
+ })
+
+ oc_create(hostname, job_data, 'stdin')
+ return job_name
+
+
+def is_job_complete(hostname, job_name, namespace=""):
+ """Check job completion status
+
+ Args:
+ hostname (str): Hostname on which we want to run command
+ job_name (str): k8s job name
+ namespace (str): k8s namespace name
+ Return:
+ bool
+ """
+
+ cmd = ['oc', 'get', 'jobs', '-o=custom-columns=:.status.succeeded',
+ '--no-headers', job_name]
+
+ cmd += ['-n', namespace] if namespace else []
+
+ out = command.cmd_run(cmd, hostname=hostname)
+ return out == "1"
diff --git a/tests/functional/arbiter/test_arbiter.py b/tests/functional/arbiter/test_arbiter.py
index a3c7279a..8a0a8f9e 100755
--- a/tests/functional/arbiter/test_arbiter.py
+++ b/tests/functional/arbiter/test_arbiter.py
@@ -155,7 +155,7 @@ class TestArbiterVolumeCreateExpandDelete(baseclass.BaseClass):
mount_path = "/mnt"
pod_name = openshift_ops.oc_create_tiny_pod_with_volume(
self.node, self.pvc_name, "test-arbiter-pvc-mount-on-app-pod",
- mount_path=mount_path)
+ mount_path=mount_path, image=self.io_container_image_cirros)
self.addCleanup(openshift_ops.oc_delete, self.node, 'pod', pod_name)
# Wait for POD be up and running
@@ -1513,7 +1513,7 @@ class TestArbiterVolumeCreateExpandDelete(baseclass.BaseClass):
# Create PVC and corresponding App pod
self.create_and_wait_for_pvc(sc_name=sc_name)
dc_name, pod_name = self.create_dc_with_pvc(
- self.pvc_name, is_busybox=True)
+ self.pvc_name, image=self.io_container_image_busybox)
# Get vol info
vol_info = openshift_ops.get_gluster_vol_info_by_pvc_name(
diff --git a/tests/functional/heketi/test_block_volumes_heketi.py b/tests/functional/heketi/test_block_volumes_heketi.py
index 694a45ed..cee48242 100644
--- a/tests/functional/heketi/test_block_volumes_heketi.py
+++ b/tests/functional/heketi/test_block_volumes_heketi.py
@@ -21,6 +21,7 @@ from openshiftstoragelibs.heketi_ops import (
get_total_free_space,
heketi_blockvolume_create,
heketi_blockvolume_delete,
+ heketi_blockvolume_expand,
heketi_blockvolume_info,
heketi_blockvolume_list,
heketi_blockvolume_list_by_name_prefix,
@@ -31,14 +32,27 @@ from openshiftstoragelibs.heketi_ops import (
heketi_volume_info,
hello_heketi,
)
+from openshiftstoragelibs import heketi_version
from openshiftstoragelibs.openshift_ops import (
cmd_run_on_gluster_pod_or_node,
get_default_block_hosting_volume_size,
+ get_pod_name_from_dc,
+ get_pv_name_from_pvc,
+ is_job_complete,
+ oc_create_offline_block_volume_expand_job,
+ oc_delete,
+ oc_get_custom_resource,
oc_rsh,
restart_service_on_gluster_pod_or_node,
+ scale_dc_pod_amount_and_wait,
wait_for_service_status_on_gluster_pod_or_node,
)
+from openshiftstoragelibs.openshift_storage_libs import (
+ get_iscsi_block_devices_by_path,
+ get_mpath_name_from_device_name,
+)
from openshiftstoragelibs import podcmd
+from openshiftstoragelibs import waiter
from openshiftstoragelibs import utils
@@ -582,3 +596,125 @@ class TestBlockVolumeOps(GlusterBlockBaseClass):
"Expecting free space in app pod before {} should be greater than"
" {} as 100M file is created".format(
free_space_before, free_space_after))
+
+ def _block_vol_expand_common_offline_vs_online(self, is_online_expand):
+ node = self.ocp_master_node[0]
+ h_node, h_server = self.heketi_client_node, self.heketi_server_url
+
+ version = heketi_version.get_heketi_version(h_node)
+ if version < '9.0.0-14':
+ self.skipTest("heketi-client package {} does not support "
+ "blockvolume expand".format(version.v_str))
+
+ pvc_name = self.create_and_wait_for_pvc()
+ dc_name = self.create_dc_with_pvc(pvc_name)
+ pv_name = get_pv_name_from_pvc(node, pvc_name)
+
+ # get block volume id
+ custom = r":.metadata.annotations.'gluster\.org\/volume-id'"
+ bvol_id = oc_get_custom_resource(node, 'pv', custom, pv_name)
+ self.assertNotEqual(
+ bvol_id[0], "<none>",
+ "volume name not found from pv {}".format(pv_name))
+ bvol_info = heketi_blockvolume_info(
+ h_node, h_server, bvol_id[0], json=True)
+
+ # verify required blockhostingvolume free size
+ bhv_id = bvol_info["blockhostingvolume"]
+ bhv_info = heketi_volume_info(h_node, h_server, bhv_id, json=True)
+ if bhv_info["blockinfo"]["freesize"] < 1:
+ self.skipTest("blockhostingvolume doesn't have required freespace")
+
+ if not is_online_expand:
+ scale_dc_pod_amount_and_wait(node, dc_name[0], pod_amount=0)
+
+ # expand block volume and verify usable size
+ bvol_info = heketi_blockvolume_expand(
+ h_node, h_server, bvol_id[0], 2, json=True)
+ self.assertEqual(
+ bvol_info["size"], 2, "Block volume expand does not works")
+ self.assertEqual(
+ bvol_info["size"], bvol_info["usablesize"],
+ "block volume size is not equal to the usablesize: {}".format(
+ bvol_info))
+
+ return pvc_name, dc_name, bvol_info
+
+ @pytest.mark.tier1
+ def test_block_vol_offline_expand(self):
+ """Test blockvol expansion while PVC is not in use"""
+ node = self.ocp_master_node[0]
+
+ pvc_name, dc_name, bvol_info = (
+ self._block_vol_expand_common_offline_vs_online(False))
+
+ # create and wait for job to be completed
+ jobname = oc_create_offline_block_volume_expand_job(node, pvc_name)
+ self.addCleanup(oc_delete, node, 'job', jobname)
+ for w in waiter.Waiter(300, 5):
+ if is_job_complete(node, jobname):
+ break
+ if w.expired:
+ raise AssertionError(
+ "block expand job {} is not completed".format(jobname))
+
+ # verify expand size
+ scale_dc_pod_amount_and_wait(node, dc_name[0], pod_amount=1)
+ pod_name = get_pod_name_from_dc(node, dc_name[0])
+ ret, size, _ = oc_rsh(
+ node, pod_name,
+ 'df -kh /mnt | sed "/Filesystem/d" | awk \'{print $2}\' '
+ '| sed "s/G//"')
+ self.assertFalse(ret, "Failed to get size from client side")
+ self.assertEqual(
+ int(float(size)), bvol_info["size"], "new size is not "
+ "reflected at mount point after block volume expand")
+
+ @pytest.mark.tier1
+ def test_block_vol_online_expand(self):
+ """Test blockvol expansion while PVC is in use"""
+ node = self.ocp_master_node[0]
+
+ pvc_name, dc_name, bvol_info = (
+ self._block_vol_expand_common_offline_vs_online(True))
+
+ # get pod hostname
+ iqn, _, pod_hostname = self.verify_iscsi_sessions_and_multipath(
+ pvc_name, dc_name[0])
+
+ # Get the paths info from the node
+ device = list(
+ get_iscsi_block_devices_by_path(pod_hostname, iqn).keys())[0]
+
+ # Get mpath name
+ mpath = get_mpath_name_from_device_name(pod_hostname, device)
+
+ # rescan the devices on pod_hostname
+ cmd = "iscsiadm -m node -R -T {}".format(iqn)
+ self.cmd_run(cmd, pod_hostname)
+
+ # refresh multipath device size
+ cmd = "multipathd -k'resize map {}'".format(mpath)
+ self.cmd_run(cmd, pod_hostname)
+
+ # get mount point
+ cmd = "lsblk /dev/{} --output MOUNTPOINT --noheadings".format(device)
+ mount_point = self.cmd_run(cmd, pod_hostname)
+
+ cmd = "xfs_growfs {}".format(mount_point)
+ self.cmd_run(cmd, pod_hostname)
+
+ cmd = ("df -h {} | sed '/Filesystem/d' | awk '{{print $2}}' |"
+ " sed 's/G//'")
+ size = self.cmd_run(cmd.format(mount_point), pod_hostname)
+ self.assertEqual(
+ int(float(size)), bvol_info["size"], "new size is not "
+ "reflected at host mount point after block volume expand")
+
+ # verify expand size
+ pod_name = get_pod_name_from_dc(node, dc_name[0])
+ ret, size, _ = oc_rsh(node, pod_name, cmd.format("/mnt"))
+ self.assertFalse(ret, "Failed to get size from client side")
+ self.assertEqual(
+ int(float(size)), bvol_info["size"], "new size is not "
+ "reflected at mount point after block volume expand")
diff --git a/tests/functional/heketi/test_heketi_authentication.py b/tests/functional/heketi/test_heketi_authentication.py
new file mode 100644
index 00000000..5979c808
--- /dev/null
+++ b/tests/functional/heketi/test_heketi_authentication.py
@@ -0,0 +1,33 @@
+from glusto.core import Glusto as g
+import pytest
+
+from openshiftstoragelibs import baseclass
+from openshiftstoragelibs import heketi_ops
+
+
+class TestHeketiAuthenticationFromOCPClient(baseclass.BaseClass):
+ """Class to test heketi-client authentication"""
+
+ @pytest.mark.tier1
+ def test_heketi_authentication_with_user_credentials(self):
+ """Heketi command authentication with invalid and valid credentials"""
+
+ h_client, h_server = self.heketi_client_node, self.heketi_server_url
+ err_msg = "Error: Invalid JWT token: Token missing iss claim"
+
+ # Run heketi commands with invalid credentials
+ for each_cmd in ("volume list", "topology info"):
+ cmd = "timeout 120 heketi-cli -s {} {}".format(
+ self.heketi_server_url, each_cmd)
+ ret, _, err = g.run(h_client, cmd)
+ self.assertTrue(ret, "Command execution with invalid credentials"
+ " should not succeed")
+ self.assertEqual(
+ err_msg, err.strip(), "Error is different from the command"
+ " execution {}".format(err.strip()))
+
+ # Run heketi commands with valid credentials
+ kwar = {'json_arg': True, 'secret': self.heketi_cli_key,
+ 'user': self.heketi_cli_user}
+ heketi_ops.heketi_volume_list(h_client, h_server, **kwar)
+ heketi_ops.heketi_topology_info(h_client, h_server, **kwar)
diff --git a/tests/functional/heketi/test_heketi_brick_evict.py b/tests/functional/heketi/test_heketi_brick_evict.py
new file mode 100644
index 00000000..1cba24c4
--- /dev/null
+++ b/tests/functional/heketi/test_heketi_brick_evict.py
@@ -0,0 +1,180 @@
+import pytest
+
+from glustolibs.gluster import volume_ops
+import six
+
+from openshiftstoragelibs.baseclass import BaseClass
+from openshiftstoragelibs import exceptions
+from openshiftstoragelibs import heketi_ops
+from openshiftstoragelibs import heketi_version
+from openshiftstoragelibs import node_ops
+from openshiftstoragelibs import openshift_ops
+from openshiftstoragelibs import podcmd
+from openshiftstoragelibs import waiter
+
+
+class TestHeketiBrickEvict(BaseClass):
+ """Test Heketi brick evict functionality."""
+
+ def setUp(self):
+ super(TestHeketiBrickEvict, self).setUp()
+
+ version = heketi_version.get_heketi_version(self.heketi_client_node)
+ if version < '9.0.0-14':
+ self.skipTest(
+ "heketi-client package {} does not support brick evict".format(
+ version.v_str))
+
+ self.ocp_client = self.ocp_master_node[0]
+
+ node_list = heketi_ops.heketi_node_list(
+ self.heketi_client_node, self.heketi_server_url)
+
+ if len(node_list) > 3:
+ return
+
+ for node_id in node_list:
+ node_info = heketi_ops.heketi_node_info(
+ self.heketi_client_node, self.heketi_server_url, node_id,
+ json=True)
+ if len(node_info["devices"]) < 2:
+ self.skipTest("does not have extra device/node to evict brick")
+
+ @podcmd.GlustoPod()
+ def _get_gluster_vol_info(self, file_vol):
+ """Get Gluster vol info.
+
+ Args:
+ ocp_client (str): Node to execute OCP commands.
+ file_vol (str): file volume name.
+
+ Returns:
+ dict: Info of the given gluster vol.
+ """
+ g_vol_info = volume_ops.get_volume_info(
+ "auto_get_gluster_endpoint", file_vol)
+
+ if not g_vol_info:
+ raise AssertionError("Failed to get volume info for gluster "
+ "volume {}".format(file_vol))
+ if file_vol in g_vol_info:
+ g_vol_info = g_vol_info.get(file_vol)
+ return g_vol_info
+
+ @pytest.mark.tier1
+ def test_heketi_brick_evict(self):
+ """Test brick evict basic functionality and verify it replace a brick
+ properly
+ """
+ h_node, h_server = self.heketi_client_node, self.heketi_server_url
+
+ size = 1
+ vol_info_old = heketi_ops.heketi_volume_create(
+ h_node, h_server, size, json=True)
+ self.addCleanup(
+ heketi_ops.heketi_volume_delete, h_node, h_server,
+ vol_info_old['id'])
+ heketi_ops.heketi_brick_evict(
+ h_node, h_server, vol_info_old["bricks"][0]['id'])
+
+ vol_info_new = heketi_ops.heketi_volume_info(
+ h_node, h_server, vol_info_old['id'], json=True)
+
+ bricks_old = set({brick['path'] for brick in vol_info_old["bricks"]})
+ bricks_new = set({brick['path'] for brick in vol_info_new["bricks"]})
+ self.assertEqual(
+ len(bricks_new - bricks_old), 1,
+ "Brick was not replaced with brick evict for vol \n {}".format(
+ vol_info_new))
+
+ gvol_info = self._get_gluster_vol_info(vol_info_new['name'])
+ gbricks = set(
+ {brick['name'].split(":")[1]
+ for brick in gvol_info["bricks"]["brick"]})
+ self.assertEqual(
+ bricks_new, gbricks, "gluster vol info and heketi vol info "
+ "mismatched after brick evict {} \n {}".format(
+ gvol_info, vol_info_new))
+
+ def _wait_for_gluster_pod_after_node_reboot(self, node_hostname):
+ """Wait for glusterfs pod to be ready after node reboot"""
+ openshift_ops.wait_for_ocp_node_be_ready(
+ self.ocp_client, node_hostname)
+ gluster_pod = openshift_ops.get_gluster_pod_name_for_specific_node(
+ self.ocp_client, node_hostname)
+ openshift_ops.wait_for_pod_be_ready(self.ocp_client, gluster_pod)
+ services = (
+ ("glusterd", "running"), ("gluster-blockd", "running"),
+ ("tcmu-runner", "running"), ("gluster-block-target", "exited"))
+ for service, state in services:
+ openshift_ops.check_service_status_on_pod(
+ self.ocp_client, gluster_pod, service, "active", state)
+
+ @pytest.mark.tier4
+ def test_brick_evict_with_node_down(self):
+ """Test brick evict basic functionality and verify brick evict
+ after node down"""
+
+ h_node, h_server = self.heketi_client_node, self.heketi_server_url
+
+ # Disable node if more than 3
+ node_list = heketi_ops.heketi_node_list(h_node, h_server)
+ if len(node_list) > 3:
+ for node_id in node_list[3:]:
+ heketi_ops.heketi_node_disable(h_node, h_server, node_id)
+ self.addCleanup(
+ heketi_ops.heketi_node_enable, h_node, h_server, node_id)
+
+ # Create heketi volume
+ vol_info = heketi_ops.heketi_volume_create(
+ h_node, h_server, 1, json=True)
+ self.addCleanup(
+ heketi_ops.heketi_volume_delete,
+ h_node, h_server, vol_info.get('id'))
+
+ # Get node on which heketi pod is scheduled
+ heketi_pod = openshift_ops.get_pod_name_from_dc(
+ self.ocp_client, self.heketi_dc_name)
+ heketi_node = openshift_ops.oc_get_custom_resource(
+ self.ocp_client, 'pod', '.:spec.nodeName', heketi_pod)[0]
+
+ # Get list of hostname from node id
+ host_list = []
+ for node_id in node_list[3:]:
+ node_info = heketi_ops.heketi_node_info(
+ h_node, h_server, node_id, json=True)
+ host_list.append(node_info.get('hostnames').get('manage')[0])
+
+ # Get brick id and glusterfs node which is not heketi node
+ for node in vol_info.get('bricks', {}):
+ node_info = heketi_ops.heketi_node_info(
+ h_node, h_server, node.get('node'), json=True)
+ hostname = node_info.get('hostnames').get('manage')[0]
+ if (hostname != heketi_node) and (hostname not in host_list):
+ brick_id = node.get('id')
+ break
+
+ # Bring down the glusterfs node
+ vm_name = node_ops.find_vm_name_by_ip_or_hostname(hostname)
+ self.addCleanup(
+ self._wait_for_gluster_pod_after_node_reboot, hostname)
+ self.addCleanup(node_ops.power_on_vm_by_name, vm_name)
+ node_ops.power_off_vm_by_name(vm_name)
+
+ # Wait glusterfs node to become NotReady
+ custom = r'":.status.conditions[?(@.type==\"Ready\")]".status'
+ for w in waiter.Waiter(300, 20):
+ status = openshift_ops.oc_get_custom_resource(
+ self.ocp_client, 'node', custom, hostname)
+ if status[0] in ['False', 'Unknown']:
+ break
+ if w.expired:
+ raise exceptions.ExecutionError(
+ "Failed to bring down node {}".format(hostname))
+
+ # Perform brick evict operation
+ try:
+ heketi_ops.heketi_brick_evict(h_node, h_server, brick_id)
+ except AssertionError as e:
+ if ('No Replacement was found' not in six.text_type(e)):
+ raise
diff --git a/tests/functional/heketi/test_heketi_create_volume.py b/tests/functional/heketi/test_heketi_create_volume.py
index 50718ff2..938a568e 100644
--- a/tests/functional/heketi/test_heketi_create_volume.py
+++ b/tests/functional/heketi/test_heketi_create_volume.py
@@ -20,6 +20,7 @@ import six
from openshiftstoragelibs.baseclass import BaseClass
from openshiftstoragelibs import command
from openshiftstoragelibs.heketi_ops import (
+ get_block_hosting_volume_list,
get_heketi_volume_and_brick_count_list,
get_total_free_space,
heketi_blockvolume_create,
@@ -29,8 +30,11 @@ from openshiftstoragelibs.heketi_ops import (
heketi_cluster_list,
heketi_db_check,
heketi_node_delete,
+ heketi_node_enable,
heketi_node_info,
heketi_node_list,
+ heketi_node_disable,
+ heketi_server_operation_cleanup,
heketi_volume_create,
heketi_volume_delete,
heketi_volume_expand,
@@ -42,7 +46,9 @@ from openshiftstoragelibs.openshift_ops import (
cmd_run_on_gluster_pod_or_node,
get_default_block_hosting_volume_size,
get_pod_name_from_dc,
+ kill_service_on_gluster_pod_or_node,
oc_delete,
+ restart_service_on_gluster_pod_or_node,
wait_for_pod_be_ready,
wait_for_resource_absence,
wait_for_service_status_on_gluster_pod_or_node,
@@ -452,6 +458,13 @@ class TestHeketiVolume(BaseClass):
h_db_check_vol_before = (
h_db_check_before.get("{}volumes".format(vol_type)))
+ # Get existing heketi volume list
+ existing_volumes = heketi_volume_list(h_node, h_url, json=True)
+
+ # Add cleanup function to clean stale volumes created during test
+ self.addCleanup(
+ self._cleanup_heketi_volumes, existing_volumes.get("volumes"))
+
# Delete heketi pod to clean db operations
if(h_db_check_bricks_before.get("pending")
or h_db_check_vol_before.get("pending")):
@@ -773,3 +786,129 @@ class TestHeketiVolume(BaseClass):
info_cluster_id, creation_cluster_id,
"Volume creation cluster id {} not matching the info cluster id "
"{}".format(creation_cluster_id, info_cluster_id))
+
+ def _check_for_pending_operations(self, h_node, h_url):
+ # Check for pending operations
+ for w in waiter.Waiter(timeout=120, interval=10):
+ h_db_check = heketi_db_check(h_node, h_url)
+ h_db_check_vol = h_db_check.get("blockvolumes")
+ if h_db_check_vol.get("pending"):
+ break
+ if w.expired:
+ raise exceptions.ExecutionError(
+ "No pending operations found during blockvolumes creation "
+ "{}".format(h_db_check_vol.get("pending")))
+
+ @pytest.mark.tier2
+ def test_heketi_manual_cleanup_operation_in_bhv(self):
+ """Validate heketi db cleanup will resolve the mismatch
+ in the free size of the block hosting volume with failed
+ block device create operations.
+ """
+ bhv_size_before, bhv_size_after, vol_count = [], [], 5
+ ocp_node, g_node = self.ocp_master_node[0], self.gluster_servers[0]
+ h_node, h_url = self.heketi_client_node, self.heketi_server_url
+
+ # Get existing heketi volume list
+ existing_volumes = heketi_volume_list(h_node, h_url, json=True)
+
+ # Add function to clean stale volumes created during test
+ self.addCleanup(
+ self._cleanup_heketi_volumes, existing_volumes.get("volumes"))
+
+ # Get nodes id list
+ node_id_list = heketi_node_list(h_node, h_url)
+
+ # Disable 4th and other nodes
+ for node_id in node_id_list[3:]:
+ heketi_node_disable(h_node, h_url, node_id)
+ self.addCleanup(heketi_node_enable, h_node, h_url, node_id)
+
+ # Calculate heketi volume size
+ free_space, nodenum = get_total_free_space(h_node, h_url)
+ free_space_available = int(free_space / nodenum)
+ if free_space_available > vol_count:
+ h_volume_size = int(free_space_available / vol_count)
+ if h_volume_size > 50:
+ h_volume_size = 50
+ else:
+ h_volume_size, vol_count = 1, free_space_available
+
+ # Create BHV in case blockvolume size is greater than default BHV size
+ default_bhv_size = get_default_block_hosting_volume_size(
+ h_node, self.heketi_dc_name)
+ if default_bhv_size < h_volume_size:
+ h_volume_name = "autotest-{}".format(utils.get_random_str())
+ bhv_info = self.create_heketi_volume_with_name_and_wait(
+ h_volume_name, free_space_available,
+ raise_on_cleanup_error=False, block=True, json=True)
+ free_space_available -= (
+ int(bhv_info.get("blockinfo").get("reservedsize")) + 1)
+ h_volume_size = int(free_space_available / vol_count)
+
+ # Get BHV list
+ h_bhv_list = get_block_hosting_volume_list(h_node, h_url).keys()
+ self.assertTrue(h_bhv_list, "Failed to get the BHV list")
+
+ # Get BHV size
+ for bhv in h_bhv_list:
+ vol_info = heketi_volume_info(h_node, h_url, bhv, json=True)
+ bhv_vol_size_before = vol_info.get("freesize")
+ bhv_size_before.append(bhv_vol_size_before)
+
+ # Kill Tcmu-runner service
+ services = ("tcmu-runner", "gluster-block-target", "gluster-blockd")
+ kill_service_on_gluster_pod_or_node(ocp_node, "tcmu-runner", g_node)
+
+ # Restart the services
+ for service in services:
+ state = (
+ 'exited' if service == 'gluster-block-target' else 'running')
+ self.addCleanup(
+ wait_for_service_status_on_gluster_pod_or_node,
+ ocp_node, service, 'active', state, g_node)
+ self.addCleanup(
+ restart_service_on_gluster_pod_or_node,
+ ocp_node, service, g_node)
+
+ def run_async(cmd, hostname, raise_on_error=True):
+ return g.run_async(host=hostname, command=cmd)
+
+ # Create stale block volumes in async
+ for count in range(vol_count):
+ with mock.patch.object(json, 'loads', side_effect=(lambda j: j)):
+ with mock.patch.object(
+ command, 'cmd_run', side_effect=run_async):
+ heketi_blockvolume_create(
+ h_node, h_url, h_volume_size, json=True)
+
+ # Wait for pending operation to get generated
+ self._check_for_pending_operations(h_node, h_url)
+
+ # Restart the services
+ for service in services:
+ state = (
+ 'exited' if service == 'gluster-block-target' else 'running')
+ restart_service_on_gluster_pod_or_node(
+ ocp_node, service, g_node)
+ wait_for_service_status_on_gluster_pod_or_node(
+ ocp_node, service, 'active', state, g_node)
+
+ # Cleanup pending operation
+ heketi_server_operation_cleanup(h_node, h_url)
+
+ # wait for pending operation to get cleaned up
+ for w in waiter.Waiter(timeout=120, interval=10):
+ # Get BHV size
+ for bhv in h_bhv_list:
+ vol_info = heketi_volume_info(h_node, h_url, bhv, json=True)
+ bhv_vol_size_after = vol_info.get("freesize")
+ bhv_size_after.append(bhv_vol_size_after)
+
+ if(set(bhv_size_before) == set(bhv_size_after)):
+ break
+ if w.expired:
+ raise exceptions.ExecutionError(
+ "Failed to Validate volume size Actual:{},"
+ " Expected:{}".format(
+ set(bhv_size_before), set(bhv_size_after)))
diff --git a/tests/functional/heketi/test_heketi_device_operations.py b/tests/functional/heketi/test_heketi_device_operations.py
index a6831e98..05f16ef9 100755
--- a/tests/functional/heketi/test_heketi_device_operations.py
+++ b/tests/functional/heketi/test_heketi_device_operations.py
@@ -1,6 +1,7 @@
import ddt
from glusto.core import Glusto as g
import pytest
+import six
from openshiftstoragelibs.baseclass import BaseClass
from openshiftstoragelibs.heketi_ops import (
@@ -17,6 +18,8 @@ from openshiftstoragelibs.heketi_ops import (
heketi_topology_info,
heketi_volume_create,
heketi_volume_delete,
+ rm_tags,
+ set_tags,
validate_dev_path_vg_and_uuid,
)
from openshiftstoragelibs import utils
@@ -600,3 +603,96 @@ class TestHeketiDeviceOperations(BaseClass):
h_node, h_url, node, dev)
self.assertTrue(is_true, "Failed to verify dv_path for the "
"device {}".format(dev))
+
+ @pytest.mark.tier3
+ def test_volume_create_as_tag_maching_rule(self):
+ """Validate settags operation only on one device in the cluster"""
+
+ h_node, h_server = self.heketi_client_node, self.heketi_server_url
+
+ # Set tag on any one device in cluster
+ node_list = heketi_node_list(h_node, h_server, json=True)
+ node_info = heketi_node_info(h_node, h_server, node_list[0], json=True)
+ device_id = node_info.get('devices', {})[0].get('id')
+ set_tags(h_node, h_server, 'device', device_id, "tier:it")
+ self.addCleanup(rm_tags, h_node, h_server, 'device', device_id, 'tier')
+
+ # Volume creation should fail
+ try:
+ heketi_volume_create(
+ h_node, h_server, 2,
+ gluster_volume_options="user.heketi.device-tag-match tier=it")
+ except AssertionError as e:
+ if ("Failed to allocate new volume" not in six.text_type(e)):
+ raise
+
+ @pytest.mark.tier4
+ def test_device_settags_tier_option(self):
+ """Validate volume creation with a tag-matching rule"""
+
+ h_node, h_server = self.heketi_client_node, self.heketi_server_url
+ initial_brick_count, before_brick_count, after_brick_count = [], [], []
+
+ # Set tag on device on 3 different nodes
+ node_list = heketi_node_list(h_node, h_server, json=True)
+ device_list = []
+ for node_id in node_list[:3]:
+ node_info = heketi_node_info(h_node, h_server, node_id, json=True)
+ device_id = node_info.get('devices', {})[0].get('id')
+ device_list.append(device_id)
+ set_tags(h_node, h_server, 'device', device_id, "tier:test")
+ self.addCleanup(
+ rm_tags, h_node, h_server, 'device', device_id, "tier",
+ raise_on_error=False)
+
+ # Get initial number of bricks present on device
+ for device_id in device_list:
+ device_info = heketi_device_info(
+ h_node, h_server, device_id, json=True)
+ initial_brick_count.append(len(device_info.get("bricks")))
+
+ # Create volume with device tag option
+ volume_info = heketi_volume_create(
+ h_node, h_server, 2,
+ gluster_volume_options="user.heketi.device-tag-match tier=test",
+ json=True)
+ self.addCleanup(
+ heketi_volume_delete, h_node, h_server, volume_info.get("id"))
+
+ # Get number of bricks present on device after volume create
+ for device_id in device_list:
+ device_info = heketi_device_info(
+ h_node, h_server, device_id, json=True)
+ before_brick_count.append(len(device_info.get("bricks")))
+
+ # Validate volume has created on tag devices
+ self.assertGreater(
+ before_brick_count, initial_brick_count,
+ "Volume {} has not created on tag devices".format(
+ volume_info.get("id")))
+
+ # Create volume with not equal to tag option
+ volume_info = heketi_volume_create(
+ h_node, h_server, 2,
+ gluster_volume_options="user.heketi.device-tag-match tier!=test",
+ json=True)
+ self.addCleanup(
+ heketi_volume_delete, h_node, h_server, volume_info.get("id"))
+
+ # Get number of bricks present on device after volume create
+ for device_id in device_list:
+ device_info = heketi_device_info(
+ h_node, h_server, device_id, json=True)
+ after_brick_count.append(len(device_info.get("bricks")))
+
+ # Validate volume has not created on tag devices
+ self.assertEqual(
+ before_brick_count, after_brick_count,
+ "Volume {} has created on tag devices".format(
+ volume_info.get("id")))
+
+ # Update the tag on device
+ for device_id in device_list:
+ set_tags(h_node, h_server, 'device', device_id, "tier:test_update")
+ self.addCleanup(
+ rm_tags, h_node, h_server, 'device', device_id, "tier")
diff --git a/tests/functional/heketi/test_heketi_lvm_wrapper.py b/tests/functional/heketi/test_heketi_lvm_wrapper.py
index 93f76ef1..5817f57b 100644
--- a/tests/functional/heketi/test_heketi_lvm_wrapper.py
+++ b/tests/functional/heketi/test_heketi_lvm_wrapper.py
@@ -16,6 +16,8 @@ from openshiftstoragelibs import waiter
ENV_NAME = "HEKETI_LVM_WRAPPER"
ENV_VALUE = "/usr/sbin/exec-on-host"
ENV_FALSE_VALUE = "/usr/bin/false"
+DOCKER_SERVICE = "systemctl {} docker"
+SERVICE_STATUS_REGEX = r"Active: (.*) \((.*)\)"
@ddt.ddt
@@ -50,6 +52,24 @@ class TestHeketiLvmWrapper(baseclass.BaseClass):
openshift_ops.wait_for_pod_be_ready(
self.oc_node, new_heketi_pod, wait_step=20)
+ def _wait_for_docker_service_status(self, pod_host_ip, status, state):
+ for w in waiter.Waiter(30, 3):
+ out = command.cmd_run(DOCKER_SERVICE.format("status"), pod_host_ip)
+ for line in out.splitlines():
+ status_match = re.search(SERVICE_STATUS_REGEX, line)
+ if (status_match and status_match.group(1) == status
+ and status_match.group(2) == state):
+ return True
+
+ def _check_docker_status_is_active(self, pod_host_ip):
+ try:
+ command.cmd_run(DOCKER_SERVICE.format("is-active"), pod_host_ip)
+ except Exception as err:
+ if "inactive" in err:
+ command.cmd_run(DOCKER_SERVICE.format("start"), pod_host_ip)
+ self._wait_for_docker_service_status(
+ pod_host_ip, "active", "running")
+
@pytest.mark.tier1
def test_lvm_script_and_wrapper_environments(self):
"""Validate lvm script present on glusterfs pods
@@ -206,3 +226,52 @@ class TestHeketiLvmWrapper(baseclass.BaseClass):
err_msg = "Heketi unable to execute LVM commands with {}".format(
env_var_value)
self.assertTrue(status_match, err_msg)
+
+ @pytest.mark.tier2
+ def test_docker_service_restart(self):
+ """Validate docker service should not fail after restart"""
+
+ # Skip the TC if independent mode deployment
+ if not self.is_containerized_gluster():
+ self.skipTest(
+ "Skipping this test case as LVM script is not available in "
+ "independent mode deployment")
+
+ # Skip the TC if docker storage driver other than devicemapper
+ pod_host_ip = self.pod_name[0]["pod_host_ip"]
+ cmd = "docker info -f '{{json .Driver}}'"
+ device_driver = command.cmd_run(cmd, pod_host_ip)
+ if device_driver != '"devicemapper"':
+ self.skipTest(
+ "Skipping this test case as docker storage driver is not "
+ "set to devicemapper")
+
+ # Validate LVM environment is present
+ custom = (r'":spec.containers[*].env[?(@.name==\"{}\")]'
+ r'.value"'.format(ENV_NAME))
+ env_var_value = openshift_ops.oc_get_custom_resource(
+ self.oc_node, "pod", custom, self.h_pod_name)[0]
+ err_msg = "Heketi {} environment should has {}".format(
+ ENV_NAME, ENV_VALUE)
+ self.assertEqual(env_var_value, ENV_VALUE, err_msg)
+
+ # Check docker status is active
+ command.cmd_run(DOCKER_SERVICE.format("is-active"), pod_host_ip)
+
+ # Restart the docker service
+ self.addCleanup(self._check_docker_status_is_active, pod_host_ip)
+ command.cmd_run(DOCKER_SERVICE.format("restart"), pod_host_ip)
+
+ # Wait for docker service to become active
+ self._wait_for_docker_service_status(pod_host_ip, "active", "running")
+
+ # Wait for glusterfs pods to be ready
+ openshift_ops.wait_for_pods_be_ready(
+ self.oc_node, len(self.gluster_servers), "glusterfs=storage-pod")
+
+ # Check the docker pool is available after docker restart
+ cmd = "ls -lrt /dev/docker-vg/docker-pool"
+ command.cmd_run(cmd, pod_host_ip)
+
+ # Create PVC after docker restart
+ self.create_and_wait_for_pvcs()
diff --git a/tests/functional/heketi/test_heketi_zones.py b/tests/functional/heketi/test_heketi_zones.py
index e1cd5dd4..5c5d3df5 100644
--- a/tests/functional/heketi/test_heketi_zones.py
+++ b/tests/functional/heketi/test_heketi_zones.py
@@ -626,7 +626,8 @@ class TestHeketiZones(baseclass.BaseClass):
# Create app dcs with I/O
for pvc_name in pvc_names:
app_dc = openshift_ops.oc_create_app_dc_with_io(
- self.node, pvc_name=pvc_name, dc_name_prefix=prefix)
+ self.node, pvc_name=pvc_name,
+ dc_name_prefix=prefix, image=self.io_container_image_cirros)
self.addCleanup(openshift_ops.oc_delete, self.node, 'dc', app_dc)
# Get pod names and label them
diff --git a/tests/functional/heketi/test_volume_expansion_and_devices.py b/tests/functional/heketi/test_volume_expansion_and_devices.py
index df064e76..fa78b1aa 100644
--- a/tests/functional/heketi/test_volume_expansion_and_devices.py
+++ b/tests/functional/heketi/test_volume_expansion_and_devices.py
@@ -10,6 +10,7 @@ from openshiftstoragelibs import (
heketi_ops,
podcmd,
)
+from openshiftstoragelibs import utils
class TestVolumeExpansionAndDevicesTestCases(BaseClass):
@@ -521,3 +522,44 @@ class TestVolumeExpansionAndDevicesTestCases(BaseClass):
free_space_after_deletion > free_space_after_expansion,
"Free space is not reclaimed after volume deletion of %s"
% volume_id)
+
+ @pytest.mark.tier2
+ @podcmd.GlustoPod()
+ def test_replica_volume_expand(self):
+ """
+ Test expansion of a replica volume
+ """
+ h_node, h_server = self.heketi_client_node, self.heketi_server_url
+ volume_name = (
+ "autotests-heketi-volume-{}".format(utils.get_random_str()))
+ volume_size = 10
+ creation_info = self.create_heketi_volume_with_name_and_wait(
+ volume_name, volume_size, json=True, raise_on_cleanup_error=False)
+ volume_id = creation_info["id"]
+ volume_info = heketi_ops.heketi_volume_info(
+ h_node, h_server, volume_id, json=True)
+
+ # Get gluster volume info
+ gluster_vol = volume_ops.get_volume_info(
+ 'auto_get_gluster_endpoint', volname=volume_name)
+ self.assertTrue(
+ gluster_vol, "Failed to get volume {} info".format(volume_name))
+ vol_name = gluster_vol[volume_name]
+ self.assertEqual(
+ vol_name['replicaCount'], "3",
+ "Replica count is different for volume {} Actual:{} "
+ "Expected : 3".format(vol_name, vol_name['replicaCount']))
+
+ expand_size = 5
+ heketi_ops.heketi_volume_expand(
+ h_node, h_server, volume_id, expand_size)
+ volume_info = heketi_ops.heketi_volume_info(
+ h_node, h_server, volume_id, json=True)
+ expected_size = volume_size + expand_size
+ self.assertEqual(
+ volume_info['size'], expected_size,
+ "Volume Expansion failed, Expected Size: {}, Actual "
+ "Size: {}".format(str(expected_size), str(volume_info['size'])))
+
+ self.get_brick_and_volume_status(volume_name)
+ self.get_rebalance_status(volume_name)
diff --git a/tests/functional/logging/test_logging_validations.py b/tests/functional/logging/test_logging_validations.py
index 63346f0a..509c71d8 100644
--- a/tests/functional/logging/test_logging_validations.py
+++ b/tests/functional/logging/test_logging_validations.py
@@ -9,6 +9,7 @@ from openshiftstoragelibs import command
from openshiftstoragelibs import exceptions
from openshiftstoragelibs import gluster_ops
from openshiftstoragelibs import openshift_ops
+from openshiftstoragelibs import waiter
@ddt.ddt
@@ -112,7 +113,7 @@ class TestLoggingAndGlusterRegistryValidation(GlusterBlockBaseClass):
@pytest.mark.tier3
def test_validate_logging_pods_and_pvc(self):
- """Validate metrics pods and PVC"""
+ """Validate logging pods and PVC"""
# Wait for kibana pod to be ready
kibana_pod = openshift_ops.get_pod_name_from_dc(
@@ -274,3 +275,122 @@ class TestLoggingAndGlusterRegistryValidation(GlusterBlockBaseClass):
openshift_ops.oc_rsh(self._master, es_pod, cmd_run_io)
self.addCleanup(
openshift_ops.oc_rsh, self._master, es_pod, cmd_remove_file)
+
+ def _delete_and_wait_for_new_es_pod_to_come_up(self):
+
+ # Force delete and wait for es pod to come up
+ openshift_ops.switch_oc_project(
+ self._master, self._logging_project_name)
+ pod_name = openshift_ops.get_pod_name_from_dc(
+ self._master, self._logging_es_dc)
+ openshift_ops.oc_delete(self._master, 'pod', pod_name, is_force=True)
+ openshift_ops.wait_for_resource_absence(self._master, 'pod', pod_name)
+ new_pod_name = openshift_ops.get_pod_name_from_dc(
+ self._master, self._logging_es_dc)
+ openshift_ops.wait_for_pod_be_ready(
+ self._master, new_pod_name, timeout=1800)
+
+ @pytest.mark.tier2
+ @ddt.data('delete', 'drain')
+ def test_respin_es_pod(self, motive):
+ """Validate respin of elastic search pod"""
+
+ # Get the pod name and PVC name
+ es_pod = openshift_ops.get_pod_name_from_dc(
+ self._master, self._logging_es_dc)
+ pvc_custom = ":.spec.volumes[*].persistentVolumeClaim.claimName"
+ pvc_name = openshift_ops.oc_get_custom_resource(
+ self._master, "pod", pvc_custom, es_pod)[0]
+
+ # Validate iscsi and multipath
+ _, _, node = self.verify_iscsi_sessions_and_multipath(
+ pvc_name, self._logging_es_dc,
+ heketi_server_url=self._registry_heketi_server_url,
+ is_registry_gluster=True)
+ if motive == 'delete':
+
+ # Delete the es pod
+ self.addCleanup(self._delete_and_wait_for_new_es_pod_to_come_up)
+ openshift_ops.oc_delete(self._master, "pod", es_pod)
+ elif motive == 'drain':
+
+ # Get the number of infra nodes
+ infra_node_count_cmd = (
+ 'oc get nodes '
+ '--no-headers -l node-role.kubernetes.io/infra=true|wc -l')
+ infra_node_count = command.cmd_run(
+ infra_node_count_cmd, self._master)
+
+ # Skip test case if number infra nodes are less than #2
+ if int(infra_node_count) < 2:
+ self.skipTest('Available number of infra nodes "{}", it should'
+ ' be more than 1'.format(infra_node_count))
+
+ # Cleanup to make node schedulable
+ cmd_schedule = (
+ 'oc adm manage-node {} --schedulable=true'.format(node))
+ self.addCleanup(
+ command.cmd_run, cmd_schedule, hostname=self._master)
+
+ # Drain the node
+ drain_cmd = ('oc adm drain {} --force=true --ignore-daemonsets '
+ '--delete-local-data'.format(node))
+ command.cmd_run(drain_cmd, hostname=self._master)
+
+ # Wait for pod to get absent
+ openshift_ops.wait_for_resource_absence(self._master, "pod", es_pod)
+
+ # Wait for new pod to come up
+ try:
+ pod_name = openshift_ops.get_pod_name_from_dc(
+ self._master, self._logging_es_dc)
+ openshift_ops.wait_for_pod_be_ready(self._master, pod_name)
+ except exceptions.ExecutionError:
+ self._delete_and_wait_for_new_es_pod_to_come_up()
+
+ # Validate iscsi and multipath
+ self.verify_iscsi_sessions_and_multipath(
+ pvc_name, self._logging_es_dc,
+ heketi_server_url=self._registry_heketi_server_url,
+ is_registry_gluster=True)
+
+ @pytest.mark.tier3
+ def test_run_workload_with_logging(self):
+ """Validate logs are being generated aifter running workload"""
+
+ # Get the size of used space of logs
+ es_pod = openshift_ops.get_pod_name_from_dc(
+ self._master, self._logging_es_dc)
+ mount_point = "/elasticsearch/persistent"
+ cmd_space_check = ('df -kh --output=used {} | sed "/Used/d" |'
+ 'sed "s/G//"'.format(mount_point))
+ ret, initial_used_percent, err = openshift_ops.oc_rsh(
+ self._master, es_pod, cmd_space_check)
+ err_msg = "Failed to fetch the size of used space, error {}"
+ self.assertFalse(ret, err_msg.format(err))
+
+ # Create 20 pvcs and app pods with io
+ openshift_ops.switch_oc_project(
+ self._master, self.storage_project_name)
+ pvc_count, batch_count = 5, 4
+ for _ in range(batch_count):
+ pvcs = self.create_and_wait_for_pvcs(pvc_amount=pvc_count)
+ self.create_dcs_with_pvc(pvcs)
+ self.addCleanup(
+ openshift_ops.switch_oc_project,
+ self._master, self.storage_project_name)
+
+ # Get and verify the final used size of used space of logs
+ openshift_ops.switch_oc_project(
+ self._master, self._logging_project_name)
+ for w in waiter.Waiter(600, 30):
+ ret, final_used_percent, err = openshift_ops.oc_rsh(
+ self._master, es_pod, cmd_space_check)
+ self.assertFalse(ret, err_msg.format(err))
+ if int(initial_used_percent) < int(final_used_percent):
+ break
+ if w.expired:
+ raise AssertionError(
+ "Initial used space {} for logs is not less than final "
+ "used space {}".format(
+ initial_used_percent, final_used_percent))
diff --git a/tests/functional/metrics/test_metrics_validation.py b/tests/functional/metrics/test_metrics_validation.py
index ce7e843f..e16fe349 100644
--- a/tests/functional/metrics/test_metrics_validation.py
+++ b/tests/functional/metrics/test_metrics_validation.py
@@ -27,6 +27,7 @@ from openshiftstoragelibs.openshift_storage_libs import (
get_iscsi_block_devices_by_path,
get_mpath_name_from_device_name,
)
+from openshiftstoragelibs import waiter
@ddt.ddt
@@ -274,3 +275,41 @@ class TestMetricsAndGlusterRegistryValidation(GlusterBlockBaseClass):
restart_gluster_vol_brick_processes(
self.master, bhv_name, list(self.registry_servers_info.keys()))
self.addCleanup(self.cassandra_pod_delete_cleanup, raise_on_error=True)
+
+ @pytest.mark.tier3
+ def test_run_workload_with_metrics(self):
+ """Validate if logs are being generated after running workload"""
+
+ # Get the size of used space of logs
+ cassandra_pod = get_pod_name_from_rc(
+ self.master, self.metrics_rc_hawkular_cassandra)
+ mount_point = "/cassandra_data"
+ cmd_space_check = ('df -k --output=used {} | sed "/Used/d" |'
+ 'sed "s/G//"'.format(mount_point))
+ ret, initial_used_percent, err = oc_rsh(
+ self.master, cassandra_pod, cmd_space_check)
+ err_msg = "Failed to fetch the size of used space, error {}"
+ self.assertFalse(ret, err_msg.format(err))
+
+ # Create 20 PVCs and app pods with IO
+ switch_oc_project(self.master, self.storage_project_name)
+ pvc_count, batch_count = 5, 4
+ for _ in range(batch_count):
+ pvcs = self.create_and_wait_for_pvcs(pvc_amount=pvc_count)
+ self.create_dcs_with_pvc(pvcs)
+ self.addCleanup(
+ switch_oc_project, self.master, self.storage_project_name)
+
+ # Get and verify the final size of used space of logs
+ switch_oc_project(self.master, self.metrics_project_name)
+ for w in waiter.Waiter(600, 30):
+ ret, final_used_percent, err = oc_rsh(
+ self.master, cassandra_pod, cmd_space_check)
+ self.assertFalse(ret, err_msg.format(err))
+ if int(initial_used_percent) < int(final_used_percent):
+ break
+ if w.expired:
+ raise AssertionError(
+ "Initial used space {} for logs is not less than final "
+ "used space {}".format(
+ initial_used_percent, final_used_percent))
diff --git a/tests/functional/prometheous/test_prometheus_validations.py b/tests/functional/prometheous/test_prometheus_validations.py
index 79416aef..68b69212 100644
--- a/tests/functional/prometheous/test_prometheus_validations.py
+++ b/tests/functional/prometheous/test_prometheus_validations.py
@@ -5,21 +5,41 @@ except ImportError:
# py2
import json
from pkg_resources import parse_version
+from functools import reduce
import ddt
from glusto.core import Glusto as g
+from glustolibs.gluster import brick_libs
+from glustolibs.gluster import volume_ops
import pytest
from openshiftstoragelibs.baseclass import GlusterBlockBaseClass
from openshiftstoragelibs import command
from openshiftstoragelibs import exceptions
from openshiftstoragelibs import heketi_ops
+from openshiftstoragelibs import gluster_ops
+from openshiftstoragelibs import node_ops
from openshiftstoragelibs import openshift_ops
+from openshiftstoragelibs import openshift_storage_libs
+from openshiftstoragelibs import podcmd
+from openshiftstoragelibs import waiter
@ddt.ddt
class TestPrometheusAndGlusterRegistryValidation(GlusterBlockBaseClass):
+ @classmethod
+ def setUpClass(cls):
+ super(TestPrometheusAndGlusterRegistryValidation, cls).setUpClass()
+
+ cls.metrics = ('heketi_volumes_count',
+ 'heketi_block_volumes_count',
+ 'heketi_device_brick_count',
+ 'heketi_device_free_bytes',
+ 'heketi_nodes_count',
+ 'heketi_device_used_bytes',
+ 'heketi_device_size_bytes')
+
def setUp(self):
"""Initialize all the variables which are necessary for test cases"""
super(TestPrometheusAndGlusterRegistryValidation, self).setUp()
@@ -37,6 +57,8 @@ class TestPrometheusAndGlusterRegistryValidation(GlusterBlockBaseClass):
'heketi_server_url'])
self._registry_project_name = (
g.config['openshift']['registry_project_name'])
+ self._registry_servers_info = (
+ g.config['gluster_registry_servers'])
except KeyError as err:
self.skipTest("Config file doesn't have key {}".format(err))
@@ -96,6 +118,68 @@ class TestPrometheusAndGlusterRegistryValidation(GlusterBlockBaseClass):
return pod_names, pvc_names
+ @podcmd.GlustoPod()
+ def _guster_volume_cleanup(self, vol_name):
+ # Check brick status. Restart vol if bricks are offline
+ openshift_ops.switch_oc_project(
+ self._master, self._registry_project_name)
+ brick_list = brick_libs.get_all_bricks(
+ "auto_get_gluster_endpoint", vol_name)
+ self.assertIsNotNone(brick_list, "Failed to get brick list")
+ check_bricks = brick_libs.are_bricks_online(
+ "auto_get_gluster_endpoint", vol_name, brick_list)
+ if not check_bricks:
+ start_vol, _, _ = volume_ops.volume_start(
+ "auto_get_gluster_endpoint", vol_name, force=True)
+ self.assertFalse(
+ start_vol, "Failed to start volume using force")
+
+ def _get_newly_deployed_gluster_pod(self, g_pod_list_before):
+
+ # Fetch pod after delete
+ g_pod_list_after = [
+ pod["pod_name"]
+ for pod in openshift_ops.get_ocp_gluster_pod_details(self._master)]
+
+ # Fetch the new gluster pod
+ g_new_pod = list(set(g_pod_list_after) - set(g_pod_list_before))
+ self.assertTrue(g_new_pod, "No new gluster pod deployed after delete")
+ return g_new_pod
+
+ def _guster_pod_delete(self, g_pod_list_before):
+ """Delete the gluster pod using force delete"""
+ openshift_ops.switch_oc_project(
+ self._master, self._registry_project_name)
+
+ # Fetch newly deployed gluster pod after delete
+ try:
+ pod_name = self._get_newly_deployed_gluster_pod(g_pod_list_before)
+ openshift_ops.wait_for_pod_be_ready(
+ self._master,
+ pod_name[0] if pod_name else g_pod_list_before[0],
+ timeout=120, wait_step=6)
+ except exceptions.ExecutionError:
+ openshift_ops.oc_delete(
+ self._master, 'pod', g_pod_list_before[0], is_force=True)
+ openshift_ops.wait_for_resource_absence(
+ self._master, 'pod', g_pod_list_before[0])
+ g_new_pod = self._get_newly_deployed_gluster_pod(g_pod_list_before)
+ openshift_ops.wait_for_pod_be_ready(self._master, g_new_pod[0])
+
+ def _wait_for_gluster_pod_be_ready(self, g_pod_list_before):
+ """Wait for the gluster pods to be in ready state"""
+ openshift_ops.switch_oc_project(
+ self._master, self._registry_project_name)
+
+ # Check if the gluster pods are in ready state
+ try:
+ pod_count = len(self._registry_servers_info.keys())
+ openshift_ops.wait_for_pods_be_ready(
+ self._master, pod_count, "glusterfs-node=pod",
+ timeout=120, wait_step=6)
+ except exceptions.ExecutionError:
+ self._guster_pod_delete(g_pod_list_before)
+
@pytest.mark.tier2
def test_promethoues_pods_and_pvcs(self):
"""Validate prometheus pods and PVC"""
@@ -265,3 +349,628 @@ class TestPrometheusAndGlusterRegistryValidation(GlusterBlockBaseClass):
self.assertLess(
initial_result[metric], final_result[metric],
msg + " differnt")
+
+ @ddt.data('add', 'delete')
+ @pytest.mark.tier3
+ def test_heketi_prometheus_device_count_after_operation(self, operation):
+ """Do operation and validate device count in heketi and prometheus"""
+ h_node, h_server = self.heketi_client_node, self.heketi_server_url
+
+ # Get list of additional devices for one of the Gluster nodes
+ gluster_server_0 = list(self.gluster_servers_info.values())[0]
+ manage_hostname = gluster_server_0.get("manage")
+ self.assertTrue(
+ manage_hostname, "IP Address is not specified for "
+ "node {}".format(gluster_server_0))
+ device_name = gluster_server_0.get("additional_devices")[0]
+ self.assertTrue(
+ device_name, "Additional devices are not specified for "
+ "node {}".format(gluster_server_0))
+
+ # Get node ID of the Gluster hostname
+ node_list = heketi_ops.heketi_topology_info(
+ h_node, h_server, json=True).get("clusters")[0].get("nodes")
+ self.assertTrue(
+ node_list, "Cluster info command returned empty list of nodes")
+ node_id = None
+ for node in node_list:
+ if manage_hostname == node.get("hostnames").get("manage")[0]:
+ node_id = node.get("id")
+ break
+ self.assertTrue(
+ node_id, "Failed to get node_id for {}".format(manage_hostname))
+
+ # Adding heketi device
+ heketi_ops.heketi_device_add(h_node, h_server, device_name, node_id)
+ node_info_after_addition = heketi_ops.heketi_node_info(
+ h_node, h_server, node_id, json=True)
+ device_id, bricks = None, None
+ for device in node_info_after_addition.get("devices"):
+ if device.get("name") == device_name:
+ device_id, bricks = (
+ device.get("id"), len(device.get("bricks")))
+ break
+ self.addCleanup(
+ heketi_ops.heketi_device_delete, h_node, h_server, device_id,
+ raise_on_error=False)
+ self.addCleanup(
+ heketi_ops.heketi_device_remove, h_node, h_server, device_id,
+ raise_on_error=False)
+ self.addCleanup(
+ heketi_ops.heketi_device_disable, h_node, h_server, device_id,
+ raise_on_error=False)
+
+ if operation == "delete":
+ # Disable,Remove and Delete heketi device
+ heketi_ops.heketi_device_disable(h_node, h_server, device_id)
+ heketi_ops.heketi_device_remove(h_node, h_server, device_id)
+ heketi_ops.heketi_device_delete(h_node, h_server, device_id)
+ # Verify zero bricks on the deleted device and device deletion
+ msg = (
+ "Number of bricks on the device {} of the nodes should be"
+ "zero".format(device_name))
+ self.assertFalse(bricks, msg)
+ node_info_after_deletion = (
+ heketi_ops.heketi_node_info(h_node, h_server, node_id))
+ msg = ("Device {} should not be shown in node info of the node {}"
+ "after the device deletion".format(device_id, node_id))
+ self.assertNotIn(device_id, node_info_after_deletion, msg)
+
+ # Validate heketi and prometheus device count
+ for w in waiter.Waiter(timeout=60, interval=10):
+ total_value_prometheus, total_value_metrics = 0, 0
+ openshift_ops.switch_oc_project(
+ self.ocp_master_node[0], 'openshift-monitoring')
+ metric_result = self._fetch_metric_from_promtheus_pod(
+ metric='heketi_device_count')
+ for result in metric_result:
+ total_value_prometheus += int(result.get('value')[1])
+ openshift_ops.switch_oc_project(
+ self.ocp_master_node[0], 'glusterfs')
+ metrics = heketi_ops.get_heketi_metrics(h_node, h_server)
+ heketi_device_count_metric = metrics.get('heketi_device_count')
+ for result in heketi_device_count_metric:
+ total_value_metrics += int(result.get('value'))
+
+ if total_value_prometheus == total_value_metrics:
+ break
+ if w.expired:
+ raise exceptions.ExecutionError(
+ "Failed to update device details in prometheus")
+
+ @ddt.data('usedbytes', 'brickcount')
+ @pytest.mark.tier3
+ def test_heketi_prometheus_usedbytes_brickcount_on_device_delete(
+ self, operation):
+ """Validate used bytes,device count on heketi and prometheus"""
+ h_node, h_server = self.heketi_client_node, self.heketi_server_url
+
+ # Get list of additional devices for one of the Gluster nodes
+ gluster_server_0 = list(self.gluster_servers_info.values())[0]
+ manage_hostname = gluster_server_0.get("manage")
+ self.assertTrue(
+ manage_hostname, "IP Address is not specified for "
+ "node {}".format(gluster_server_0))
+ device_name = gluster_server_0.get("additional_devices")[0]
+ self.assertTrue(
+ device_name, "Additional devices are not specified for "
+ "node {}".format(gluster_server_0))
+
+ # Get node ID of the Gluster hostname
+ node_list = heketi_ops.heketi_topology_info(
+ h_node, h_server, json=True).get("clusters")[0].get("nodes")
+ self.assertTrue(
+ node_list, "Cluster info command returned empty list of nodes")
+ node_id = [
+ node.get("id")
+ for node in node_list
+ if manage_hostname == node.get("hostnames").get("manage")[0]]
+ self.assertTrue(
+ node_id, "Failed to get node_id for {}".format(manage_hostname))
+ node_id = node_id[0]
+
+ # Adding heketi device
+ heketi_ops.heketi_device_add(h_node, h_server, device_name, node_id)
+ node_info_after_addition = heketi_ops.heketi_node_info(
+ h_node, h_server, node_id, json=True)
+ device_id, bricks = None, None
+ for device in node_info_after_addition.get("devices"):
+ if device.get("name") == device_name:
+ device_id, bricks = (
+ device.get("id"), len(device.get("bricks")))
+ break
+
+ # Verify zero bricks on the device
+ msg = (
+ "Number of bricks on the device {} of the nodes should be"
+ "zero".format(device_name))
+ self.assertFalse(bricks, msg)
+ self.addCleanup(
+ heketi_ops.heketi_device_delete, h_node, h_server, device_id,
+ raise_on_error=False)
+ self.addCleanup(
+ heketi_ops.heketi_device_remove, h_node, h_server, device_id,
+ raise_on_error=False)
+ self.addCleanup(
+ heketi_ops.heketi_device_disable, h_node, h_server, device_id,
+ raise_on_error=False)
+
+ # Disable,Remove and Delete heketi device
+ heketi_ops.heketi_device_disable(h_node, h_server, device_id)
+ heketi_ops.heketi_device_remove(h_node, h_server, device_id)
+ heketi_ops.heketi_device_delete(h_node, h_server, device_id)
+
+ # Verify device deletion
+ node_info_after_deletion = (
+ heketi_ops.heketi_node_info(h_node, h_server, node_id))
+ msg = ("Device {} should not be shown in node info of the node {}"
+ "after the device deletion".format(device_id, node_id))
+ self.assertNotIn(device_id, node_info_after_deletion, msg)
+
+ if operation == "usedbytes":
+ # Validate heketi and prometheus device used bytes
+ for w in waiter.Waiter(timeout=60, interval=10):
+ device_used_bytes_prometheus = 0
+ device_used_bytes_metrics = 0
+ openshift_ops.switch_oc_project(
+ self.ocp_master_node[0], 'openshift-monitoring')
+ metric_result = self._fetch_metric_from_promtheus_pod(
+ metric='heketi_device_used_bytes')
+ for result in metric_result:
+ if (node_id == result.get('cluster')
+ and device_name == result.get('device')):
+ device_used_bytes_prometheus += (
+ int(result.get('value')[1]))
+ openshift_ops.switch_oc_project(
+ self.ocp_master_node[0], 'glusterfs')
+ metrics = heketi_ops.get_heketi_metrics(h_node, h_server)
+ heketi_device_count_metric = (
+ metrics.get('heketi_device_used_bytes'))
+ for result in heketi_device_count_metric:
+ if (node_id == result.get('cluster')
+ and device_name == result.get('device')):
+ device_used_bytes_metrics = int(result.get('value'))
+ if device_used_bytes_prometheus == device_used_bytes_metrics:
+ break
+ if w.expired:
+ raise exceptions.ExecutionError(
+ "Failed to update device details in prometheus")
+
+ elif operation == "brickcount":
+ # Validate heketi and prometheus device brick count
+ for w in waiter.Waiter(timeout=60, interval=10):
+ device_brick_count_prometheus = 0
+ device_brick_count_metrics = 0
+ metrics = heketi_ops.get_heketi_metrics(h_node, h_server)
+ heketi_device_count_metric = metrics.get(
+ 'heketi_device_brick_count')
+ for result in heketi_device_count_metric:
+ device_brick_count_metrics += int(result.get('value'))
+ openshift_ops.switch_oc_project(
+ self.ocp_master_node[0], 'openshift-monitoring')
+ metric_result = self._fetch_metric_from_promtheus_pod(
+ metric='heketi_device_brick_count')
+ for result in metric_result:
+ device_brick_count_prometheus += (
+ int(result.get('value')[1]))
+ if device_brick_count_prometheus == device_brick_count_metrics:
+ break
+ if w.expired:
+ raise exceptions.ExecutionError(
+ "Failed to update device details in prometheus")
+
+ @pytest.mark.tier2
+ @podcmd.GlustoPod()
+ def test_prometheous_kill_bhv_brick_process(self):
+ """Validate kill brick process of block hosting
+ volume with prometheus workload running"""
+
+ # Add check for CRS version
+ openshift_ops.switch_oc_project(
+ self._master, self._registry_project_name)
+ if not self.is_containerized_gluster():
+ self.skipTest("Skipping this test case as CRS"
+ " version check can not be implemented")
+
+ # Get one of the prometheus pod name and respective pvc name
+ openshift_ops.switch_oc_project(
+ self._master, self._prometheus_project_name)
+ prometheus_pods = openshift_ops.oc_get_pods(
+ self._master, selector=self._prometheus_resources_selector)
+ if not prometheus_pods:
+ self.skipTest(
+ prometheus_pods, "Skipping test as prometheus"
+ " pod is not present")
+
+ # Validate iscsi and multipath
+ prometheus_pod = list(prometheus_pods.keys())[0]
+ pvc_name = openshift_ops.oc_get_custom_resource(
+ self._master, "pod",
+ ":.spec.volumes[*].persistentVolumeClaim.claimName",
+ prometheus_pod)
+ self.assertTrue(pvc_name, "Failed to get PVC name")
+ pvc_name = pvc_name[0]
+ self.verify_iscsi_sessions_and_multipath(
+ pvc_name, prometheus_pod, rtype='pod',
+ heketi_server_url=self._registry_heketi_server_url,
+ is_registry_gluster=True)
+
+ # Try to fetch metric from prometheus pod
+ self._fetch_metric_from_promtheus_pod(
+ metric='heketi_device_brick_count')
+
+ # Kill the brick process of a BHV
+ gluster_node = list(self._registry_servers_info.keys())[0]
+ openshift_ops.switch_oc_project(
+ self._master, self._registry_project_name)
+ bhv_name = self.get_block_hosting_volume_by_pvc_name(
+ pvc_name, heketi_server_url=self._registry_heketi_server_url,
+ gluster_node=gluster_node, ocp_client_node=self._master)
+ vol_status = gluster_ops.get_gluster_vol_status(bhv_name)
+ gluster_node_ip, brick_pid = None, None
+ for g_node, g_node_data in vol_status.items():
+ for process_name, process_data in g_node_data.items():
+ if process_name.startswith("/var"):
+ gluster_node_ip = g_node
+ brick_pid = process_data["pid"]
+ break
+ if gluster_node_ip and brick_pid:
+ break
+ self.assertIsNotNone(brick_pid, "Could not find pid for brick")
+ cmd = "kill -9 {}".format(brick_pid)
+ openshift_ops.cmd_run_on_gluster_pod_or_node(
+ self._master, cmd, gluster_node_ip)
+ self.addCleanup(self._guster_volume_cleanup, bhv_name)
+
+ # Check if the brick-process has been killed
+ killed_pid_cmd = (
+ "ps -p {} -o pid --no-headers".format(brick_pid))
+ try:
+ openshift_ops.cmd_run_on_gluster_pod_or_node(
+ self._master, killed_pid_cmd, gluster_node_ip)
+ except exceptions.ExecutionError:
+ g.log.info("Brick process {} was killed"
+ "successfully".format(brick_pid))
+
+ # Try to fetch metric from prometheus pod
+ openshift_ops.switch_oc_project(
+ self._master, self._prometheus_project_name)
+ self._fetch_metric_from_promtheus_pod(
+ metric='heketi_device_brick_count')
+
+ # Start the bhv using force
+ openshift_ops.switch_oc_project(
+ self._master, self._registry_project_name)
+ start_vol, _, _ = volume_ops.volume_start(
+ gluster_node_ip, bhv_name, force=True)
+ self.assertFalse(
+ start_vol, "Failed to start volume {}"
+ " using force".format(bhv_name))
+
+ # Validate iscsi and multipath
+ openshift_ops.switch_oc_project(
+ self._master, self._prometheus_project_name)
+ self.verify_iscsi_sessions_and_multipath(
+ pvc_name, prometheus_pod, rtype='pod',
+ heketi_server_url=self._registry_heketi_server_url,
+ is_registry_gluster=True)
+
+ # Try to fetch metric from prometheus pod
+ self._fetch_metric_from_promtheus_pod(
+ metric='heketi_device_brick_count')
+
+ def _check_heketi_and_gluster_pod_after_node_reboot(self, heketi_node):
+ openshift_ops.switch_oc_project(
+ self._master, self.storage_project_name)
+ heketi_pod = openshift_ops.get_pod_names_from_dc(
+ self._master, self.heketi_dc_name)[0]
+
+ # Wait for heketi pod to become ready and running
+ openshift_ops.wait_for_pod_be_ready(self._master, heketi_pod)
+ heketi_ops.hello_heketi(self._master, self.heketi_server_url)
+
+ # Wait for glusterfs pods to become ready if hosted on same node
+ heketi_node_ip = openshift_ops.oc_get_custom_resource(
+ self._master, 'pod', '.:status.hostIP', heketi_pod)[0]
+ if heketi_node_ip in self.gluster_servers:
+ gluster_pod = openshift_ops.get_gluster_pod_name_for_specific_node(
+ self._master, heketi_node)
+
+ # Wait for glusterfs pod to become ready
+ openshift_ops.wait_for_pod_be_ready(self._master, gluster_pod)
+ services = (
+ ("glusterd", "running"), ("gluster-blockd", "running"),
+ ("tcmu-runner", "running"), ("gluster-block-target", "exited"))
+ for service, state in services:
+ openshift_ops.check_service_status_on_pod(
+ self._master, gluster_pod, service, "active", state)
+
+ @pytest.mark.tier4
+ def test_heketi_metrics_validation_with_node_reboot(self):
+ """Validate heketi metrics after node reboot using prometheus"""
+
+ initial_metrics, final_metrics = {}, {}
+
+ # Use storage project
+ openshift_ops.switch_oc_project(
+ self._master, self.storage_project_name)
+
+ # Get initial metrics result
+ h_node, h_server = self.heketi_client_node, self.heketi_server_url
+ initial_metrics = tuple(
+ heketi_ops.get_heketi_metrics(h_node, h_server).get(metric)[0]
+ for metric in self.metrics)
+
+ # Use prometheus project
+ openshift_ops.switch_oc_project(
+ self._master, self._prometheus_project_name)
+
+ # Get initial prometheus result
+ initial_prometheus = self._get_and_manipulate_metric_data(
+ self.metrics)
+
+ # Get hosted node IP of heketi pod
+ openshift_ops.switch_oc_project(
+ self._master, self.storage_project_name)
+ heketi_pod = openshift_ops.get_pod_name_from_dc(
+ self._master, self.heketi_dc_name)
+ heketi_node = openshift_ops.oc_get_custom_resource(
+ self._master, 'pod', '.:spec.nodeName', heketi_pod)[0]
+
+ # Reboot the node on which heketi pod is scheduled
+ self.addCleanup(
+ self._check_heketi_and_gluster_pod_after_node_reboot, heketi_node)
+ node_ops.node_reboot_by_command(heketi_node)
+
+ # Wait node to become NotReady
+ custom = r'":.status.conditions[?(@.type==\"Ready\")]".status'
+ for w in waiter.Waiter(300, 10):
+ status = openshift_ops.oc_get_custom_resource(
+ self._master, 'node', custom, heketi_node)
+ if status[0] == 'False':
+ break
+ if w.expired:
+ raise exceptions.ExecutionError(
+ "Failed to bring down node {}".format(heketi_node))
+
+ # Wait for node to become ready
+ openshift_ops.wait_for_ocp_node_be_ready(self._master, heketi_node)
+
+ # Wait for heketi and glusterfs pod to become ready
+ self._check_heketi_and_gluster_pod_after_node_reboot(heketi_node)
+
+ # Use prometheus project
+ openshift_ops.switch_oc_project(
+ self._master, self._prometheus_project_name)
+
+ # Get final metrics result
+ final_metrics = tuple(
+ heketi_ops.get_heketi_metrics(h_node, h_server).get(metric)[0]
+ for metric in self.metrics)
+
+ # Get final prometheus result
+ final_prometheus = self._get_and_manipulate_metric_data(
+ self.metrics)
+
+ err_msg = "Initial value {} is not same as final value {}"
+ self.assertEqual(
+ initial_metrics, final_metrics, err_msg.format(
+ initial_metrics, final_metrics))
+ self.assertEqual(
+ initial_prometheus, final_prometheus, err_msg.format(
+ initial_prometheus, final_prometheus))
+
+ @pytest.mark.tier4
+ @ddt.data('add', 'delete')
+ def test_heketi_metrics_validation_after_node(self, condition):
+ """Validate heketi metrics after adding and remove node"""
+
+ # Get additional node
+ additional_host_info = g.config.get("additional_gluster_servers")
+ if not additional_host_info:
+ self.skipTest(
+ "Skipping this test case as additional gluster server is "
+ "not provied in config file")
+
+ additional_host_info = list(additional_host_info.values())[0]
+ storage_hostname = additional_host_info.get("manage")
+ storage_ip = additional_host_info.get("storage")
+ if not (storage_hostname and storage_ip):
+ self.skipTest(
+ "Config options 'additional_gluster_servers.manage' "
+ "and 'additional_gluster_servers.storage' must be set.")
+
+ h_client, h_server = self.heketi_client_node, self.heketi_server_url
+ initial_node_count, final_node_count = 0, 0
+
+ # Get initial node count from prometheus metrics
+ metric_result = self._fetch_metric_from_promtheus_pod(
+ metric='heketi_nodes_count')
+ initial_node_count = reduce(
+ lambda x, y: x + y,
+ [result.get('value')[1] for result in metric_result])
+
+ # Switch to storage project
+ openshift_ops.switch_oc_project(
+ self._master, self.storage_project_name)
+
+ # Configure node before adding node
+ self.configure_node_to_run_gluster(storage_hostname)
+
+ # Get cluster list
+ cluster_info = heketi_ops.heketi_cluster_list(
+ h_client, h_server, json=True)
+
+ # Add node to the cluster
+ heketi_node_info = heketi_ops.heketi_node_add(
+ h_client, h_server,
+ len(self.gluster_servers), cluster_info.get('clusters')[0],
+ storage_hostname, storage_ip, json=True)
+ heketi_node_id = heketi_node_info.get("id")
+ self.addCleanup(
+ heketi_ops.heketi_node_delete,
+ h_client, h_server, heketi_node_id, raise_on_error=False)
+ self.addCleanup(
+ heketi_ops.heketi_node_remove,
+ h_client, h_server, heketi_node_id, raise_on_error=False)
+ self.addCleanup(
+ heketi_ops.heketi_node_disable,
+ h_client, h_server, heketi_node_id, raise_on_error=False)
+ self.addCleanup(
+ openshift_ops.switch_oc_project,
+ self._master, self.storage_project_name)
+
+ if condition == 'delete':
+ # Switch to openshift-monitoring project
+ openshift_ops.switch_oc_project(
+ self.ocp_master_node[0], self._prometheus_project_name)
+
+ # Get initial node count from prometheus metrics
+ for w in waiter.Waiter(timeout=60, interval=10):
+ metric_result = self._fetch_metric_from_promtheus_pod(
+ metric='heketi_nodes_count')
+ node_count = reduce(
+ lambda x, y: x + y,
+ [result.get('value')[1] for result in metric_result])
+ if node_count != initial_node_count:
+ break
+
+ if w.expired:
+ raise exceptions.ExecutionError(
+ "Failed to get updated node details from prometheus")
+
+ # Remove node from cluster
+ heketi_ops.heketi_node_disable(h_client, h_server, heketi_node_id)
+ heketi_ops.heketi_node_remove(h_client, h_server, heketi_node_id)
+ for device in heketi_node_info.get('devices'):
+ heketi_ops.heketi_device_delete(
+ h_client, h_server, device.get('id'))
+ heketi_ops.heketi_node_delete(h_client, h_server, heketi_node_id)
+
+ # Switch to openshift-monitoring project
+ openshift_ops.switch_oc_project(
+ self.ocp_master_node[0], self._prometheus_project_name)
+
+ # Get final node count from prometheus metrics
+ for w in waiter.Waiter(timeout=60, interval=10):
+ metric_result = self._fetch_metric_from_promtheus_pod(
+ metric='heketi_nodes_count')
+ final_node_count = reduce(
+ lambda x, y: x + y,
+ [result.get('value')[1] for result in metric_result])
+
+ if condition == 'delete':
+ if final_node_count < node_count:
+ break
+ else:
+ if final_node_count > initial_node_count:
+ break
+
+ if w.expired:
+ raise exceptions.ExecutionError(
+ "Failed to update node details in prometheus")
+
+ @pytest.mark.tier2
+ def test_restart_prometheus_glusterfs_pod(self):
+ """Validate restarting glusterfs pod"""
+
+ # Add check for CRS version
+ openshift_ops.switch_oc_project(
+ self._master, self._registry_project_name)
+ if not self.is_containerized_gluster():
+ self.skipTest(
+ "Skipping this test case as CRS version check "
+ "can not be implemented")
+
+ # Get one of the prometheus pod name and respective pvc name
+ openshift_ops.switch_oc_project(
+ self._master, self._prometheus_project_name)
+ prometheus_pods = openshift_ops.oc_get_pods(
+ self._master, selector=self._prometheus_resources_selector)
+ if not prometheus_pods:
+ self.skipTest(
+ prometheus_pods, "Skipping test as prometheus"
+ " pod is not present")
+ prometheus_pod = list(prometheus_pods.keys())[0]
+ pvc_name = openshift_ops.oc_get_custom_resource(
+ self._master, "pod",
+ ":.spec.volumes[*].persistentVolumeClaim.claimName",
+ prometheus_pod)[0]
+ self.assertTrue(
+ pvc_name,
+ "Failed to get pvc name from {} pod".format(prometheus_pod))
+ iqn, _, node = self.verify_iscsi_sessions_and_multipath(
+ pvc_name, prometheus_pod, rtype='pod',
+ heketi_server_url=self._registry_heketi_server_url,
+ is_registry_gluster=True)
+
+ # Get the ip of active path
+ devices = openshift_storage_libs.get_iscsi_block_devices_by_path(
+ node, iqn)
+ mpath = openshift_storage_libs.get_mpath_name_from_device_name(
+ node, list(devices.keys())[0])
+ mpath_dev = (
+ openshift_storage_libs.get_active_and_enabled_devices_from_mpath(
+ node, mpath))
+ node_ip = devices[mpath_dev['active'][0]]
+
+ # Get the name of gluster pod from the ip
+ openshift_ops.switch_oc_project(
+ self._master, self._registry_project_name)
+ gluster_pods = openshift_ops.get_ocp_gluster_pod_details(
+ self._master)
+ active_pod_name = list(
+ filter(lambda pod: (pod["pod_host_ip"] == node_ip), gluster_pods)
+ )[0]["pod_name"]
+ err_msg = "Failed to get the gluster pod name {} with active path"
+ self.assertTrue(active_pod_name, err_msg.format(active_pod_name))
+ g_pods = [pod['pod_name'] for pod in gluster_pods]
+ g_pods.remove(active_pod_name)
+ pod_list = [active_pod_name, g_pods[0]]
+ for pod_name in pod_list:
+
+ # Delete the glusterfs pods
+ openshift_ops.switch_oc_project(
+ self._master, self._prometheus_project_name)
+ self._fetch_metric_from_promtheus_pod(
+ metric='heketi_device_brick_count')
+
+ openshift_ops.switch_oc_project(
+ self._master, self._registry_project_name)
+ g_pod_list_before = [
+ pod["pod_name"]
+ for pod in openshift_ops.get_ocp_gluster_pod_details(
+ self._master)]
+
+ openshift_ops.oc_delete(self._master, 'pod', pod_name)
+ self.addCleanup(
+ self._guster_pod_delete, g_pod_list_before)
+
+ # Wait for gluster pod to be absent
+ openshift_ops.wait_for_resource_absence(
+ self._master, 'pod', pod_name)
+
+ # Try to fetch metric from prometheus pod
+ openshift_ops.switch_oc_project(
+ self._master, self._prometheus_project_name)
+ self._fetch_metric_from_promtheus_pod(
+ metric='heketi_device_brick_count')
+
+ # Wait for new pod to come up
+ openshift_ops.switch_oc_project(
+ self._master, self._registry_project_name)
+ self.assertTrue(self._get_newly_deployed_gluster_pod(
+ g_pod_list_before), "Failed to get new pod")
+ self._wait_for_gluster_pod_be_ready(g_pod_list_before)
+
+ # Validate iscsi and multipath
+ openshift_ops.switch_oc_project(
+ self._master, self._prometheus_project_name)
+ self.verify_iscsi_sessions_and_multipath(
+ pvc_name, prometheus_pod, rtype='pod',
+ heketi_server_url=self._registry_heketi_server_url,
+ is_registry_gluster=True)
+
+ # Try to fetch metric from prometheus pod
+ self._fetch_metric_from_promtheus_pod(
+ metric='heketi_device_brick_count')
diff --git a/tests/functional/prometheous/test_prometheus_validations_file.py b/tests/functional/prometheous/test_prometheus_validations_file.py
new file mode 100644
index 00000000..bbf4aedc
--- /dev/null
+++ b/tests/functional/prometheous/test_prometheus_validations_file.py
@@ -0,0 +1,335 @@
+try:
+ # py2/3
+ import simplejson as json
+except ImportError:
+ # py2
+ import json
+import time
+
+import ddt
+from glusto.core import Glusto as g
+from glustolibs.gluster import rebalance_ops
+import pytest
+
+from openshiftstoragelibs import baseclass
+from openshiftstoragelibs import exceptions
+from openshiftstoragelibs import heketi_ops
+from openshiftstoragelibs import openshift_ops
+from openshiftstoragelibs import podcmd
+from openshiftstoragelibs import waiter
+
+
+@ddt.ddt
+class TestPrometheusValidationFile(baseclass.BaseClass):
+ """Prometheus Validations for file volumes"""
+
+ @classmethod
+ def setUpClass(cls):
+ super(TestPrometheusValidationFile, cls).setUpClass()
+
+ # Metrics of which the data need to retrieve in this class
+ cls.metrics = ('kubelet_volume_stats_inodes_free',
+ 'kubelet_volume_stats_inodes',
+ 'kubelet_volume_stats_inodes_used',
+ 'kubelet_volume_stats_available_bytes',
+ 'kubelet_volume_stats_capacity_bytes',
+ 'kubelet_volume_stats_used_bytes')
+
+ def setUp(self):
+ """Initialize all the variables which are necessary for test cases"""
+ super(TestPrometheusValidationFile, self).setUp()
+
+ try:
+ prometheus_config = g.config['openshift']['prometheus']
+ self._prometheus_project_name = prometheus_config[
+ 'prometheus_project_name']
+ self._prometheus_resources_selector = prometheus_config[
+ 'prometheus_resources_selector']
+ self._alertmanager_resources_selector = prometheus_config[
+ 'alertmanager_resources_selector']
+ except KeyError as err:
+ self.skipTest("Config file doesn't have key {}".format(err))
+
+ self._master = self.ocp_master_node[0]
+
+ def _fetch_metric_from_promtheus_pod(self, metric):
+ """Fetch metric from prometheus pod using api call"""
+
+ prometheus_pods = list(openshift_ops.oc_get_pods(
+ self._master, selector=self._prometheus_resources_selector).keys())
+ fetch_metric_cmd = ("curl 'http://localhost:9090/api/v1/query"
+ "?query={}'".format(metric))
+ ret, metric_data, _ = openshift_ops.oc_rsh(
+ self._master, prometheus_pods[0], fetch_metric_cmd)
+ metric_result = json.loads(metric_data)["data"]["result"]
+ if (not metric_result) or ret:
+ raise exceptions.ExecutionError(
+ "Failed to fecth data for metric {}, output {}".format(
+ metric, metric_result))
+ return metric_result
+
+ def _get_and_manipulate_metric_data(self, metrics, pvc):
+ """Create a dict of metric names and total values"""
+
+ # Switch to namespace containing prometheus pods
+ openshift_ops.switch_oc_project(self._master,
+ self._prometheus_project_name)
+ self.addCleanup(openshift_ops.switch_oc_project,
+ self._master, self.storage_project_name)
+
+ metric_data = dict()
+ for metric in metrics:
+ out = self._fetch_metric_from_promtheus_pod(metric)
+ for matric_result in out:
+ if matric_result["metric"]["persistentvolumeclaim"] == pvc:
+ metric_data[matric_result["metric"][
+ "__name__"]] = matric_result["value"][1]
+ return metric_data
+
+ def _fetch_initial_metrics(self, vol_name_prefix=None,
+ volume_expansion=False):
+
+ # Create PVC and wait for it to be in 'Bound' state
+ sc_name = self.create_storage_class(
+ vol_name_prefix=vol_name_prefix,
+ allow_volume_expansion=volume_expansion)
+ pvc_name = self.create_and_wait_for_pvc(
+ pvc_name_prefix=vol_name_prefix, sc_name=sc_name)
+
+ # Create DC and attach with pvc
+ self.dc_name, pod_name = self.create_dc_with_pvc(pvc_name)
+ for w in waiter.Waiter(120, 10):
+ initial_metrics = self._get_and_manipulate_metric_data(
+ self.metrics, pvc_name)
+ if bool(initial_metrics) and len(initial_metrics) == 6:
+ break
+ if w.expired:
+ raise AssertionError("Unable to fetch metrics for the pvc")
+ return pvc_name, pod_name, initial_metrics
+
+ def _perform_io_and_fetch_metrics(
+ self, pod_name, pvc_name, filename, dirname,
+ metric_data, operation):
+ """Create 1000 files and dirs and validate with old metrics"""
+ openshift_ops.switch_oc_project(
+ self._master, self.storage_project_name)
+ if operation == "create":
+ cmds = ("touch /mnt/{}{{1..1000}}".format(filename),
+ "mkdir /mnt/{}{{1..1000}}".format(dirname))
+ else:
+ cmds = ("rm -rf /mnt/large_file",
+ "rm -rf /mnt/{}{{1..1000}}".format(filename),
+ "rm -rf /mnt/{}{{1..1000}}".format(dirname))
+ for cmd in cmds:
+ self.cmd_run("oc rsh {} {}".format(pod_name, cmd))
+
+ # Fetch the new metrics and compare the inodes used and bytes used
+ for w in waiter.Waiter(120, 10):
+ after_io_metrics = self._get_and_manipulate_metric_data(
+ self.metrics, pvc_name)
+ if operation == "create":
+ if (int(after_io_metrics[
+ 'kubelet_volume_stats_inodes_used']) > int(
+ metric_data['kubelet_volume_stats_inodes_used']) and int(
+ after_io_metrics[
+ 'kubelet_volume_stats_used_bytes']) > int(
+ metric_data['kubelet_volume_stats_used_bytes'])):
+ break
+ else:
+ if int(metric_data[
+ 'kubelet_volume_stats_used_bytes']) > int(
+ after_io_metrics['kubelet_volume_stats_used_bytes']):
+ break
+ if w.expired:
+ raise AssertionError(
+ "After data is modified metrics like bytes used and inodes "
+ "used are not reflected in prometheus")
+
+ def _run_io_on_the_pod(self, pod_name, number_of_files):
+ for each in range(number_of_files):
+ cmd = "touch /mnt/file{}".format(each)
+ ret, _, err = openshift_ops.oc_rsh(self._master, pod_name, cmd)
+ self.assertFalse(ret, "Failed to run the IO with error msg {}".
+ format(err))
+
+ @podcmd.GlustoPod()
+ def _rebalance_completion(self, volume_name):
+ """Rebalance start and completion after expansion."""
+ ret, _, err = rebalance_ops.rebalance_start(
+ 'auto_get_gluster_endpoint', volume_name)
+ self.assertFalse(
+ ret, "Rebalance for {} volume not started with error {}".format(
+ volume_name, err))
+
+ for w in waiter.Waiter(240, 10):
+ reb_status = rebalance_ops.get_rebalance_status(
+ 'auto_get_gluster_endpoint', volume_name)
+ if reb_status["aggregate"]["statusStr"] == "completed":
+ break
+ if w.expired:
+ raise AssertionError(
+ "Failed to complete the rebalance in 240 seconds")
+
+ @pytest.mark.tier2
+ def test_prometheus_volume_metrics_on_pod_restart(self):
+ """Validate volume metrics using prometheus before and after pod
+ restart"""
+
+ # Create PVC and wait for it to be in 'Bound' state
+ pvc_name = self.create_and_wait_for_pvc()
+ pod_name = openshift_ops.oc_create_tiny_pod_with_volume(
+ self._master, pvc_name, "autotest-volume",
+ image=self.io_container_image_cirros)
+ self.addCleanup(openshift_ops.oc_delete, self._master, 'pod', pod_name,
+ raise_on_absence=False)
+
+ # Wait for POD be up and running
+ openshift_ops.wait_for_pod_be_ready(
+ self._master, pod_name, timeout=60, wait_step=2)
+
+ # Write data on the volume and wait for 2 mins and sleep is must for
+ # prometheus to get the exact values of the metrics
+ self._run_io_on_the_pod(pod_name, 30)
+ time.sleep(120)
+
+ # Fetching the metrics and storing in initial_metrics as dictionary
+ initial_metrics = self._get_and_manipulate_metric_data(
+ self.metrics, pvc_name)
+
+ # Mark the current node unschedulable on which app pod is running
+ openshift_ops.switch_oc_project(
+ self._master, self.storage_project_name)
+ pod_info = openshift_ops.oc_get_pods(self._master, name=pod_name)
+ openshift_ops.oc_adm_manage_node(
+ self._master, '--schedulable=false',
+ nodes=[pod_info[pod_name]["node"]])
+ self.addCleanup(
+ openshift_ops.oc_adm_manage_node, self._master,
+ '--schedulable=true', nodes=[pod_info[pod_name]["node"]])
+
+ # Delete the existing pod and create a new pod
+ openshift_ops.oc_delete(self._master, 'pod', pod_name)
+ pod_name = openshift_ops.oc_create_tiny_pod_with_volume(
+ self._master, pvc_name, "autotest-volume")
+ self.addCleanup(openshift_ops.oc_delete, self._master, 'pod', pod_name)
+
+ # Wait for POD be up and running and prometheus to refresh the data
+ openshift_ops.wait_for_pod_be_ready(
+ self._master, pod_name, timeout=60, wait_step=2)
+ time.sleep(120)
+
+ # Fetching the metrics and storing in final_metrics as dictionary and
+ # validating with initial_metrics
+ final_metrics = self._get_and_manipulate_metric_data(
+ self.metrics, pvc_name)
+ self.assertEqual(dict(initial_metrics), dict(final_metrics),
+ "Metrics are different post pod restart")
+
+ @pytest.mark.tier2
+ def test_prometheus_basic_validation(self):
+ """ Validate basic volume metrics using prometheus """
+
+ # Fetch the metrics and storing initial_metrics as dictionary
+ pvc_name, pod_name, initial_metrics = self._fetch_initial_metrics(
+ volume_expansion=False)
+
+ # Create 1000 files and fetch the metrics that the data is updated
+ self._perform_io_and_fetch_metrics(
+ pod_name=pod_name, pvc_name=pvc_name,
+ filename="filename1", dirname="dirname1",
+ metric_data=initial_metrics, operation="create")
+
+ # Write the IO half the size of the volume and validated from
+ # prometheus pod that the size change is reflected
+ size_to_write = int(initial_metrics[
+ 'kubelet_volume_stats_capacity_bytes']) // 2
+ openshift_ops.switch_oc_project(
+ self._master, self.storage_project_name)
+ cmd = ("dd if=/dev/urandom of=/mnt/large_file bs={} count=1024".
+ format(size_to_write // 1024))
+ ret, _, err = openshift_ops.oc_rsh(self._master, pod_name, cmd)
+ self.assertFalse(ret, 'Failed to write file due to err {}'.format(err))
+
+ # Fetching the metrics and validating the data change is reflected
+ for w in waiter.Waiter(120, 10):
+ half_io_metrics = self._get_and_manipulate_metric_data(
+ ['kubelet_volume_stats_used_bytes'], pvc_name)
+ if bool(half_io_metrics) and (int(
+ half_io_metrics['kubelet_volume_stats_used_bytes'])
+ > size_to_write):
+ break
+ if w.expired:
+ raise AssertionError(
+ "After Data is written on the pvc, metrics like inodes used "
+ "and bytes used are not reflected in the prometheus")
+
+ # Delete the files from the volume and wait for the
+ # updated details reflected in prometheus
+ self._perform_io_and_fetch_metrics(
+ pod_name=pod_name, pvc_name=pvc_name,
+ filename="filename1", dirname="dirname1",
+ metric_data=half_io_metrics, operation="delete")
+
+ @pytest.mark.tier2
+ def test_prometheus_pv_resize(self):
+ """ Validate prometheus metrics with pv resize"""
+
+ # Fetch the metrics and storing initial_metrics as dictionary
+ pvc_name, pod_name, initial_metrics = self._fetch_initial_metrics(
+ vol_name_prefix="for-pv-resize", volume_expansion=True)
+
+ # Write data on the pvc and confirm it is reflected in the prometheus
+ self._perform_io_and_fetch_metrics(
+ pod_name=pod_name, pvc_name=pvc_name,
+ filename="filename1", dirname="dirname1",
+ metric_data=initial_metrics, operation="create")
+
+ # Resize the pvc to 2GiB
+ openshift_ops.switch_oc_project(
+ self._master, self.storage_project_name)
+ pvc_size = 2
+ openshift_ops.resize_pvc(self._master, pvc_name, pvc_size)
+ openshift_ops.wait_for_events(self._master, obj_name=pvc_name,
+ event_reason='VolumeResizeSuccessful')
+ openshift_ops.verify_pvc_size(self._master, pvc_name, pvc_size)
+ pv_name = openshift_ops.get_pv_name_from_pvc(
+ self._master, pvc_name)
+ openshift_ops.verify_pv_size(self._master, pv_name, pvc_size)
+
+ heketi_volume_name = heketi_ops.heketi_volume_list_by_name_prefix(
+ self.heketi_client_node, self.heketi_server_url,
+ "for-pv-resize", json=True)[0][2]
+ self.assertIsNotNone(
+ heketi_volume_name, "Failed to fetch volume with prefix {}".
+ format("for-pv-resize"))
+
+ openshift_ops.oc_delete(self._master, 'pod', pod_name)
+ openshift_ops.wait_for_resource_absence(self._master, 'pod', pod_name)
+ pod_name = openshift_ops.get_pod_name_from_dc(
+ self._master, self.dc_name)
+ openshift_ops.wait_for_pod_be_ready(self._master, pod_name)
+
+ # Check whether the metrics are updated or not
+ for w in waiter.Waiter(120, 10):
+ resize_metrics = self._get_and_manipulate_metric_data(
+ self.metrics, pvc_name)
+ if bool(resize_metrics) and int(resize_metrics[
+ 'kubelet_volume_stats_capacity_bytes']) > int(
+ initial_metrics['kubelet_volume_stats_capacity_bytes']):
+ break
+ if w.expired:
+ raise AssertionError("Failed to reflect PVC Size after resizing")
+ openshift_ops.switch_oc_project(
+ self._master, self.storage_project_name)
+ time.sleep(240)
+
+ # Lookup and trigger rebalance and wait for the its completion
+ for _ in range(100):
+ self.cmd_run("oc rsh {} ls /mnt/".format(pod_name))
+ self._rebalance_completion(heketi_volume_name)
+
+ # Write data on the resized pvc and compared with the resized_metrics
+ self._perform_io_and_fetch_metrics(
+ pod_name=pod_name, pvc_name=pvc_name,
+ filename="secondfilename", dirname="seconddirname",
+ metric_data=resize_metrics, operation="create")
diff --git a/tests/functional/provisioning/test_dev_path_mapping_block.py b/tests/functional/provisioning/test_dev_path_mapping_block.py
index 21caf852..b0955f5d 100644
--- a/tests/functional/provisioning/test_dev_path_mapping_block.py
+++ b/tests/functional/provisioning/test_dev_path_mapping_block.py
@@ -32,11 +32,11 @@ class TestDevPathMapping(baseclass.GlusterBlockBaseClass):
# Disable 4th and other nodes
for node_id in h_nodes_list[3:]:
- heketi_ops.heketi_node_disable(
- self.h_node, self.h_server, node_id)
self.addCleanup(
heketi_ops.heketi_node_enable,
self.h_node, self.h_server, node_id)
+ heketi_ops.heketi_node_disable(
+ self.h_node, self.h_server, node_id)
h_info = heketi_ops.heketi_node_info(
self.h_node, self.h_server, h_nodes_list[0], json=True)
diff --git a/tests/functional/provisioning/test_dev_path_mapping_file.py b/tests/functional/provisioning/test_dev_path_mapping_file.py
index 8d237a9b..fe4e9834 100644
--- a/tests/functional/provisioning/test_dev_path_mapping_file.py
+++ b/tests/functional/provisioning/test_dev_path_mapping_file.py
@@ -33,11 +33,11 @@ class TestDevPathMapping(baseclass.BaseClass):
# Disable 4th and other nodes
for node_id in h_nodes_list[3:]:
- heketi_ops.heketi_node_disable(
- self.h_node, self.h_server, node_id)
self.addCleanup(
heketi_ops.heketi_node_enable,
self.h_node, self.h_server, node_id)
+ heketi_ops.heketi_node_disable(
+ self.h_node, self.h_server, node_id)
h_info = heketi_ops.heketi_node_info(
self.h_node, self.h_server, h_nodes_list[0], json=True)
@@ -374,3 +374,421 @@ class TestDevPathMapping(baseclass.BaseClass):
use_percent, use_percent_after,
"Failed to execute IO's in the app pod {} after respin".format(
pod_name))
+
+ def _get_bricks_and_device_details(self):
+ """Fetch bricks count and device id list from the node where dev path
+ operation is performed
+ """
+
+ h_client, h_url = self.heketi_client_node, self.heketi_server_url
+ h_node_details = []
+
+ # Fetch bricks on the devices
+ h_nodes = heketi_ops.heketi_node_list(h_client, h_url)
+ for h_node in h_nodes:
+ h_node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node, json=True)
+ h_node_hostname = h_node_info.get("hostnames").get("manage")[0]
+
+ # Fetch bricks count and device list
+ if h_node_hostname == self.node_hostname:
+ h_node_details = [
+ [node_info['id'], len(node_info['bricks']),
+ node_info['name']]
+ for node_info in h_node_info['devices']]
+ return h_node_details, h_node
+
+ @pytest.mark.tier4
+ @podcmd.GlustoPod()
+ def test_dev_path_mapping_heketi_device_delete(self):
+ """Validate dev path mapping for heketi device delete lifecycle"""
+ h_client, h_url = self.heketi_client_node, self.heketi_server_url
+
+ node_ids = heketi_ops.heketi_node_list(h_client, h_url)
+ self.assertTrue(node_ids, "Failed to get heketi node list")
+
+ # Fetch #4th node for the operations
+ h_disable_node = node_ids[3]
+
+ # Fetch bricks on the devices before volume create
+ h_node_details_before, h_node = self._get_bricks_and_device_details()
+
+ # Bricks count on the node before pvc creation
+ brick_count_before = [count[1] for count in h_node_details_before]
+
+ # Create file volume with app pod and verify IO's
+ # and compare path, UUID, vg_name
+ pod_name, dc_name, use_percent = self._create_app_pod_and_verify_pvs()
+
+ # Check if IO's are running
+ use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
+ self.assertNotEqual(
+ use_percent, use_percent_after,
+ "Failed to execute IO's in the app pod {} after respin".format(
+ pod_name))
+
+ # Fetch bricks on the devices after volume create
+ h_node_details_after, h_node = self._get_bricks_and_device_details()
+
+ # Bricks count on the node after pvc creation
+ brick_count_after = [count[1] for count in h_node_details_after]
+
+ self.assertGreater(
+ sum(brick_count_after), sum(brick_count_before),
+ "Failed to add bricks on the node {}".format(h_node))
+
+ # Enable the #4th node
+ heketi_ops.heketi_node_enable(h_client, h_url, h_disable_node)
+ node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_disable_node, json=True)
+ h_node_id = node_info['id']
+ self.assertEqual(
+ node_info['state'], "online",
+ "Failed to enable node {}".format(h_disable_node))
+
+ # Fetch device list i.e to be deleted
+ h_node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node, json=True)
+ devices_list = [
+ [device['id'], device['name']]
+ for device in h_node_info['devices']]
+
+ # Device deletion operation
+ for device in devices_list:
+ device_id, device_name = device[0], device[1]
+ self.addCleanup(
+ heketi_ops.heketi_device_enable, h_client, h_url,
+ device_id, raise_on_error=False)
+
+ # Disable device from heketi
+ device_disable = heketi_ops.heketi_device_disable(
+ h_client, h_url, device_id)
+ self.assertTrue(
+ device_disable,
+ "Device {} could not be disabled".format(device_id))
+
+ device_info = heketi_ops.heketi_device_info(
+ h_client, h_url, device_id, json=True)
+ self.assertEqual(
+ device_info['state'], "offline",
+ "Failed to disable device {}".format(device_id))
+
+ # Remove device from heketi
+ device_remove = heketi_ops.heketi_device_remove(
+ h_client, h_url, device_id)
+ self.assertTrue(
+ device_remove,
+ "Device {} could not be removed".format(device_id))
+
+ # Bricks after device removal
+ device_info = heketi_ops.heketi_device_info(
+ h_client, h_url, device_id, json=True)
+ bricks_count_after = len(device_info['bricks'])
+ self.assertFalse(
+ bricks_count_after,
+ "Failed to remove the bricks from the device {}".format(
+ device_id))
+
+ # Delete device from heketi
+ self.addCleanup(
+ heketi_ops. heketi_device_add, h_client, h_url,
+ device_name, h_node, raise_on_error=False)
+ device_delete = heketi_ops.heketi_device_delete(
+ h_client, h_url, device_id)
+ self.assertTrue(
+ device_delete,
+ "Device {} could not be deleted".format(device_id))
+
+ # Check if IO's are running after device is deleted
+ use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
+ self.assertNotEqual(
+ use_percent, use_percent_after,
+ "Failed to execute IO's in the app pod {} after respin".format(
+ pod_name))
+
+ # Add device operations
+ for device in devices_list:
+ device_name = device[1]
+
+ # Add device back to the node
+ heketi_ops.heketi_device_add(h_client, h_url, device_name, h_node)
+
+ # Fetch device info after device add
+ node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node, json=True)
+ device_id = None
+ for device in node_info["devices"]:
+ if device["name"] == device_name:
+ device_id = device["id"]
+ break
+ self.assertTrue(
+ device_id,
+ "Failed to add device {} on node"
+ " {}".format(device_name, h_node))
+
+ # Disable the #4th node
+ heketi_ops.heketi_node_disable(h_client, h_url, h_node_id)
+ node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node_id, json=True)
+ self.assertEqual(
+ node_info['state'], "offline",
+ "Failed to disable node {}".format(h_node_id))
+ pvc_amount, pvc_size = 5, 1
+
+ # Fetch bricks on the devices before volume create
+ h_node_details_before, h_node = self._get_bricks_and_device_details()
+
+ # Bricks count on the node before pvc creation
+ brick_count_before = [count[1] for count in h_node_details_before]
+
+ # Create file volumes
+ pvc_name = self.create_and_wait_for_pvcs(
+ pvc_size=pvc_size, pvc_amount=pvc_amount)
+ self.assertEqual(
+ len(pvc_name), pvc_amount,
+ "Failed to create {} pvc".format(pvc_amount))
+
+ # Fetch bricks on the devices after volume create
+ h_node_details_after, h_node = self._get_bricks_and_device_details()
+
+ # Bricks count on the node after pvc creation
+ brick_count_after = [count[1] for count in h_node_details_after]
+
+ self.assertGreater(
+ sum(brick_count_after), sum(brick_count_before),
+ "Failed to add bricks on the node {}".format(h_node))
+
+ # Check if IO's are running after new device is added
+ use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
+ self.assertNotEqual(
+ use_percent, use_percent_after,
+ "Failed to execute IO's in the app pod {} after respin".format(
+ pod_name))
+
+ def _get_bricks_counts_and_device_name(self):
+ """Fetch bricks count and device name from all the nodes"""
+ h_client, h_url = self.heketi_client_node, self.heketi_server_url
+
+ # Fetch bricks on the devices
+ h_nodes = heketi_ops.heketi_node_list(h_client, h_url)
+
+ node_details = {}
+ for h_node in h_nodes:
+ h_node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node, json=True)
+ node_details[h_node] = [[], []]
+ for device in h_node_info['devices']:
+ node_details[h_node][0].append(len(device['bricks']))
+ node_details[h_node][1].append(device['id'])
+ return node_details
+
+ @pytest.mark.tier4
+ @podcmd.GlustoPod()
+ def test_dev_path_mapping_heketi_node_delete(self):
+ """Validate dev path mapping for heketi node deletion lifecycle"""
+ h_client, h_url = self.heketi_client_node, self.heketi_server_url
+
+ node_ids = heketi_ops.heketi_node_list(h_client, h_url)
+ self.assertTrue(node_ids, "Failed to get heketi node list")
+
+ # Fetch #4th node for the operations
+ h_disable_node = node_ids[3]
+
+ # Fetch bricks on the devices before volume create
+ h_node_details_before, h_node = self._get_bricks_and_device_details()
+
+ # Bricks count on the node before pvc creation
+ brick_count_before = [count[1] for count in h_node_details_before]
+
+ # Create file volume with app pod and verify IO's
+ # and compare path, UUID, vg_name
+ pod_name, dc_name, use_percent = self._create_app_pod_and_verify_pvs()
+
+ # Check if IO's are running
+ use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
+ self.assertNotEqual(
+ use_percent, use_percent_after,
+ "Failed to execute IO's in the app pod {} after respin".format(
+ pod_name))
+
+ # Fetch bricks on the devices after volume create
+ h_node_details_after, h_node = self._get_bricks_and_device_details()
+
+ # Bricks count on the node after pvc creation
+ brick_count_after = [count[1] for count in h_node_details_after]
+
+ self.assertGreater(
+ sum(brick_count_after), sum(brick_count_before),
+ "Failed to add bricks on the node {}".format(h_node))
+ self.addCleanup(
+ heketi_ops.heketi_node_disable, h_client, h_url, h_disable_node)
+
+ # Enable the #4th node
+ heketi_ops.heketi_node_enable(h_client, h_url, h_disable_node)
+ node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_disable_node, json=True)
+ h_node_id = node_info['id']
+ self.assertEqual(
+ node_info['state'], "online",
+ "Failed to enable node {}".format(h_disable_node))
+
+ # Disable the node and check for brick migrations
+ self.addCleanup(
+ heketi_ops.heketi_node_enable, h_client, h_url, h_node,
+ raise_on_error=False)
+ heketi_ops.heketi_node_disable(h_client, h_url, h_node)
+ node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node, json=True)
+ self.assertEqual(
+ node_info['state'], "offline",
+ "Failed to disable node {}".format(h_node))
+
+ # Before bricks migration
+ h_node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node, json=True)
+
+ # Bricks before migration on the node i.e to be deleted
+ bricks_counts_before = 0
+ for device in h_node_info['devices']:
+ bricks_counts_before += (len(device['bricks']))
+
+ # Remove the node
+ heketi_ops.heketi_node_remove(h_client, h_url, h_node)
+
+ # After bricks migration
+ h_node_info_after = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node, json=True)
+
+ # Bricks after migration on the node i.e to be delete
+ bricks_counts = 0
+ for device in h_node_info_after['devices']:
+ bricks_counts += (len(device['bricks']))
+
+ self.assertFalse(
+ bricks_counts,
+ "Failed to remove all the bricks from node {}".format(h_node))
+
+ # Old node which is to deleted, new node were bricks resides
+ old_node, new_node = h_node, h_node_id
+
+ # Node info for the new node were brick reside after migration
+ h_node_info_new = heketi_ops.heketi_node_info(
+ h_client, h_url, new_node, json=True)
+
+ bricks_counts_after = 0
+ for device in h_node_info_new['devices']:
+ bricks_counts_after += (len(device['bricks']))
+
+ self.assertEqual(
+ bricks_counts_before, bricks_counts_after,
+ "Failed to migrated bricks from {} node to {}".format(
+ old_node, new_node))
+
+ # Fetch device list i.e to be deleted
+ h_node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node, json=True)
+ devices_list = [
+ [device['id'], device['name']]
+ for device in h_node_info['devices']]
+
+ for device in devices_list:
+ device_id = device[0]
+ device_name = device[1]
+ self.addCleanup(
+ heketi_ops.heketi_device_add, h_client, h_url,
+ device_name, h_node, raise_on_error=False)
+
+ # Device deletion from heketi node
+ device_delete = heketi_ops.heketi_device_delete(
+ h_client, h_url, device_id)
+ self.assertTrue(
+ device_delete,
+ "Failed to delete the device {}".format(device_id))
+
+ node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node, json=True)
+ cluster_id = node_info['cluster']
+ zone = node_info['zone']
+ storage_hostname = node_info['hostnames']['manage'][0]
+ storage_ip = node_info['hostnames']['storage'][0]
+
+ # Delete the node
+ self.addCleanup(
+ heketi_ops.heketi_node_add, h_client, h_url,
+ zone, cluster_id, storage_hostname, storage_ip,
+ raise_on_error=False)
+ heketi_ops.heketi_node_delete(h_client, h_url, h_node)
+
+ # Verify if the node is deleted
+ node_ids = heketi_ops.heketi_node_list(h_client, h_url)
+ self.assertNotIn(
+ old_node, node_ids,
+ "Failed to delete the node {}".format(old_node))
+
+ # Check if IO's are running
+ use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
+ self.assertNotEqual(
+ use_percent, use_percent_after,
+ "Failed to execute IO's in the app pod {} after respin".format(
+ pod_name))
+
+ # Adding node back
+ h_node_info = heketi_ops.heketi_node_add(
+ h_client, h_url, zone, cluster_id,
+ storage_hostname, storage_ip, json=True)
+ self.assertTrue(
+ h_node_info,
+ "Failed to add the node in the cluster {}".format(cluster_id))
+ h_node_id = h_node_info["id"]
+
+ # Adding devices to the new node
+ for device in devices_list:
+ storage_device = device[1]
+
+ # Add device to the new heketi node
+ heketi_ops.heketi_device_add(
+ h_client, h_url, storage_device, h_node_id)
+ heketi_node_info = heketi_ops.heketi_node_info(
+ h_client, h_url, h_node_id, json=True)
+ device_id = None
+ for device in heketi_node_info["devices"]:
+ if device["name"] == storage_device:
+ device_id = device["id"]
+ break
+
+ self.assertTrue(
+ device_id, "Failed to add device {} on node {}".format(
+ storage_device, h_node_id))
+
+ # Create n pvc in order to verfiy if the bricks reside on the new node
+ pvc_amount, pvc_size = 5, 1
+
+ # Fetch bricks on the devices before volume create
+ h_node_details_before, h_node = self._get_bricks_and_device_details()
+
+ # Bricks count on the node before pvc creation
+ brick_count_before = [count[1] for count in h_node_details_before]
+
+ # Create file volumes
+ pvc_name = self.create_and_wait_for_pvcs(
+ pvc_size=pvc_size, pvc_amount=pvc_amount)
+ self.assertEqual(
+ len(pvc_name), pvc_amount,
+ "Failed to create {} pvc".format(pvc_amount))
+
+ # Fetch bricks on the devices before volume create
+ h_node_details_after, h_node = self._get_bricks_and_device_details()
+
+ # Bricks count on the node after pvc creation
+ brick_count_after = [count[1] for count in h_node_details_after]
+
+ self.assertGreater(
+ sum(brick_count_after), sum(brick_count_before),
+ "Failed to add bricks on the new node {}".format(new_node))
+
+ # Check if IO's are running after new node is added
+ use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
+ self.assertNotEqual(
+ use_percent, use_percent_after,
+ "Failed to execute IO's in the app pod {} after respin".format(
+ pod_name))
diff --git a/tests/functional/provisioning/test_dynamic_provisioning_file.py b/tests/functional/provisioning/test_dynamic_provisioning_file.py
index 3a11cbe5..87ff754a 100644
--- a/tests/functional/provisioning/test_dynamic_provisioning_file.py
+++ b/tests/functional/provisioning/test_dynamic_provisioning_file.py
@@ -4,6 +4,7 @@ from glusto.core import Glusto as g
import pytest
from openshiftstoragelibs.baseclass import BaseClass
+from openshiftstoragelibs import command
from openshiftstoragelibs.exceptions import ExecutionError
from openshiftstoragelibs.heketi_ops import (
heketi_node_info,
@@ -13,7 +14,12 @@ from openshiftstoragelibs.heketi_ops import (
heketi_volume_list,
verify_volume_name_prefix,
)
-from openshiftstoragelibs.node_ops import node_reboot_by_command
+from openshiftstoragelibs.node_ops import (
+ find_vm_name_by_ip_or_hostname,
+ node_reboot_by_command,
+ power_off_vm_by_name,
+ power_on_vm_by_name
+)
from openshiftstoragelibs.openshift_ops import (
cmd_run_on_gluster_pod_or_node,
get_gluster_host_ips_by_pvc_name,
@@ -56,7 +62,8 @@ class TestDynamicProvisioningP0(BaseClass):
pvc_name = self.create_and_wait_for_pvc()
# Create DC with POD and attached PVC to it.
- dc_name = oc_create_app_dc_with_io(self.node, pvc_name)
+ dc_name = oc_create_app_dc_with_io(
+ self.node, pvc_name, image=self.io_container_image_cirros)
self.addCleanup(oc_delete, self.node, 'dc', dc_name)
self.addCleanup(scale_dc_pod_amount_and_wait, self.node, dc_name, 0)
@@ -144,7 +151,7 @@ class TestDynamicProvisioningP0(BaseClass):
# Create app POD with attached volume
app_1_pod_name = oc_create_tiny_pod_with_volume(
self.node, app_1_pvc_name, "test-pvc-mount-on-app-pod",
- mount_path=mount_path)
+ mount_path=mount_path, image=self.io_container_image_cirros)
self.addCleanup(
wait_for_resource_absence, self.node, 'pod', app_1_pod_name)
self.addCleanup(oc_delete, self.node, 'pod', app_1_pod_name)
@@ -184,7 +191,7 @@ class TestDynamicProvisioningP0(BaseClass):
# Create second app POD
app_2_pod_name = oc_create_tiny_pod_with_volume(
self.node, app_2_pvc_name, "test-pvc-mount-on-app-pod",
- mount_path=mount_path)
+ mount_path=mount_path, image=self.io_container_image_cirros)
self.addCleanup(
wait_for_resource_absence, self.node, 'pod', app_2_pod_name)
self.addCleanup(oc_delete, self.node, 'pod', app_2_pod_name)
@@ -225,7 +232,7 @@ class TestDynamicProvisioningP0(BaseClass):
# Create app POD with attached volume
pod_name = oc_create_tiny_pod_with_volume(
self.node, pvc_name, "test-pvc-mount-on-app-pod",
- mount_path=mount_path)
+ mount_path=mount_path, image=self.io_container_image_cirros)
self.addCleanup(
wait_for_resource_absence, self.node, 'pod', pod_name)
self.addCleanup(oc_delete, self.node, 'pod', pod_name)
@@ -312,7 +319,8 @@ class TestDynamicProvisioningP0(BaseClass):
pvc_name = self.create_and_wait_for_pvc(sc_name=sc_name)
# Create DC with POD and attached PVC to it.
- dc_name = oc_create_app_dc_with_io(self.node, pvc_name)
+ dc_name = oc_create_app_dc_with_io(
+ self.node, pvc_name, image=self.io_container_image_cirros)
self.addCleanup(oc_delete, self.node, 'dc', dc_name)
self.addCleanup(scale_dc_pod_amount_and_wait, self.node, dc_name, 0)
@@ -385,7 +393,8 @@ class TestDynamicProvisioningP0(BaseClass):
# Create DC with application PODs
dc_name = oc_create_app_dc_with_io(
- self.node, pvc_name, replicas=replicas)
+ self.node, pvc_name, replicas=replicas,
+ image=self.io_container_image_cirros)
self.addCleanup(oc_delete, self.node, 'dc', dc_name)
self.addCleanup(scale_dc_pod_amount_and_wait, self.node, dc_name, 0)
@@ -456,7 +465,8 @@ class TestDynamicProvisioningP0(BaseClass):
# Create DC with POD and attached PVC to it.
try:
- dc_name = oc_create_app_dc_with_io(self.node, self.pvc_name)
+ dc_name = oc_create_app_dc_with_io(
+ self.node, self.pvc_name, image=self.io_container_image_cirros)
pod_name = get_pod_name_from_dc(self.node, dc_name)
wait_for_pod_be_ready(self.node, pod_name)
finally:
@@ -541,3 +551,37 @@ class TestDynamicProvisioningP0(BaseClass):
"-o=custom-columns=:.spec.storageClassName" % pvc_name)
out = self.cmd_run(get_sc_of_pvc_cmd)
self.assertEqual(out, self.sc_name)
+
+ @pytest.mark.tier2
+ def test_node_failure_pv_mounted(self):
+ """Test node failure when PV is mounted with app pods running"""
+ filepath = "/mnt/file_for_testing_volume.log"
+ pvc_name = self.create_and_wait_for_pvc()
+
+ dc_and_pod_names = self.create_dcs_with_pvc(pvc_name)
+ dc_name, pod_name = dc_and_pod_names[pvc_name]
+
+ mount_point = "df -kh /mnt -P | tail -1 | awk '{{print $1}}'"
+ pod_cmd = "oc exec {} -- {}".format(pod_name, mount_point)
+ hostname = command.cmd_run(pod_cmd, hostname=self.node)
+ hostname = hostname.split(":")[0]
+
+ vm_name = find_vm_name_by_ip_or_hostname(hostname)
+ self.addCleanup(power_on_vm_by_name, vm_name)
+ power_off_vm_by_name(vm_name)
+
+ cmd = "dd if=/dev/urandom of={} bs=1K count=100".format(filepath)
+ ret, _, err = oc_rsh(self.node, pod_name, cmd)
+ self.assertFalse(
+ ret, "Failed to execute command {} on {} with error {}"
+ .format(cmd, self.node, err))
+
+ oc_delete(self.node, 'pod', pod_name)
+ wait_for_resource_absence(self.node, 'pod', pod_name)
+ pod_name = get_pod_name_from_dc(self.node, dc_name)
+ wait_for_pod_be_ready(self.node, pod_name)
+
+ ret, _, err = oc_rsh(self.node, pod_name, cmd)
+ self.assertFalse(
+ ret, "Failed to execute command {} on {} with error {}"
+ .format(cmd, self.node, err))
diff --git a/tests/functional/provisioning/test_pv_resize.py b/tests/functional/provisioning/test_pv_resize.py
index dacdd992..f5833a99 100644
--- a/tests/functional/provisioning/test_pv_resize.py
+++ b/tests/functional/provisioning/test_pv_resize.py
@@ -70,7 +70,8 @@ class TestPvResizeClass(BaseClass):
pvc_name = self.create_and_wait_for_pvc()
# Create DC with POD and attached PVC to it.
- dc_name = oc_create_app_dc_with_io(node, pvc_name)
+ dc_name = oc_create_app_dc_with_io(
+ node, pvc_name, image=self.io_container_image_cirros)
self.addCleanup(oc_delete, node, 'dc', dc_name)
self.addCleanup(scale_dc_pod_amount_and_wait,
node, dc_name, 0)
@@ -194,7 +195,8 @@ class TestPvResizeClass(BaseClass):
pvc_name = self.create_and_wait_for_pvc(pvc_size=pvc_size_gb)
# Create DC with POD and attached PVC to it
- dc_name = oc_create_app_dc_with_io(self.node, pvc_name)
+ dc_name = oc_create_app_dc_with_io(
+ self.node, pvc_name, image=self.io_container_image_cirros)
self.addCleanup(oc_delete, self.node, 'dc', dc_name)
self.addCleanup(scale_dc_pod_amount_and_wait, self.node, dc_name, 0)
pod_name = get_pod_name_from_dc(self.node, dc_name)
@@ -270,7 +272,8 @@ class TestPvResizeClass(BaseClass):
pvc_name = self.create_and_wait_for_pvc(pvc_size=pv_size)
# Create DC with POD and attached PVC to it.
- dc_name = oc_create_app_dc_with_io(node, pvc_name)
+ dc_name = oc_create_app_dc_with_io(
+ node, pvc_name, image=self.io_container_image_cirros)
self.addCleanup(oc_delete, node, 'dc', dc_name)
self.addCleanup(scale_dc_pod_amount_and_wait,
node, dc_name, 0)
diff --git a/tests/functional/provisioning/test_storage_class_cases.py b/tests/functional/provisioning/test_storage_class_cases.py
index 3a2b9dfa..976398db 100644
--- a/tests/functional/provisioning/test_storage_class_cases.py
+++ b/tests/functional/provisioning/test_storage_class_cases.py
@@ -16,6 +16,7 @@ from openshiftstoragelibs.openshift_storage_libs import (
validate_multipath_pod,
)
from openshiftstoragelibs.openshift_ops import (
+ cmd_run_on_gluster_pod_or_node,
get_amount_of_gluster_nodes,
get_gluster_blockvol_info_by_pvc_name,
get_pod_name_from_dc,
@@ -27,11 +28,13 @@ from openshiftstoragelibs.openshift_ops import (
oc_delete,
oc_get_custom_resource,
oc_get_pods,
+ restart_service_on_gluster_pod_or_node,
scale_dc_pod_amount_and_wait,
verify_pvc_status_is_bound,
wait_for_events,
wait_for_pod_be_ready,
wait_for_resource_absence,
+ wait_for_service_status_on_gluster_pod_or_node,
)
from openshiftstoragelibs.openshift_storage_version import (
get_openshift_storage_version
@@ -143,7 +146,8 @@ class TestStorageClassCases(BaseClass):
"""
# create pod using pvc created
dc_name = oc_create_app_dc_with_io(
- self.ocp_master_node[0], self.pvc_name
+ self.ocp_master_node[0], self.pvc_name,
+ image=self.io_container_image_cirros
)
pod_name = get_pod_name_from_dc(self.ocp_master_node[0], dc_name)
self.addCleanup(oc_delete, self.ocp_master_node[0], "dc", dc_name)
@@ -421,3 +425,63 @@ class TestStorageClassCases(BaseClass):
"Cluster ID %s has NOT been used to"
"create the PVC %s. Found %s" %
(cluster_id, pvc_name, volume_info["cluster"]))
+
+ def _validate_permission(
+ self, ocp_node, gluster_node, dir_perm, file_perm):
+ """Validate /etc/target and /etc/target/backup permissions"""
+
+ target_dir_perm = "ls -ld /etc/target | awk '{print $1}'"
+ target_file_perm = (
+ "ls -l /etc/target/backup | awk '{print $1}' | sed 1D")
+
+ dir_perm_result = cmd_run_on_gluster_pod_or_node(
+ ocp_node, target_dir_perm, gluster_node)
+ self.assertEqual(
+ dir_perm_result, dir_perm,
+ "Failed to validate permission of '/etc/target'")
+ results = cmd_run_on_gluster_pod_or_node(
+ ocp_node, target_file_perm, gluster_node)
+ file_perm_results = list(results.split("\n"))
+ for perm in file_perm_results:
+ self.assertEqual(
+ perm, file_perm, "Failed to validate permission"
+ " in '/etc/target/backup'")
+
+ @pytest.mark.tier1
+ def test_targetcli_weak_permissions_config_files(self):
+ """Validate permissions on config files"""
+
+ ocp_node = self.ocp_master_node[0]
+ gluster_node = self.gluster_servers[0]
+ dir_perm_before, dir_perm_after = "drwxrwxrwx.", "drw-------."
+ file_perm_before, file_perm_after = "-rwxrwxrwx.", "-rw-------."
+ services = ("tcmu-runner", "gluster-block-target", "gluster-blockd")
+ cmd = "chmod -R 777 /etc/target/"
+
+ # Check the permissions on '/etc/target' and '/etc/target/backup'
+ cmd_run_on_gluster_pod_or_node(ocp_node, cmd, gluster_node)
+ for service in services:
+ state = (
+ 'exited' if service == 'gluster-block-target' else 'running')
+ self.addCleanup(
+ wait_for_service_status_on_gluster_pod_or_node,
+ ocp_node, service, 'active', state, gluster_node)
+ self.addCleanup(
+ restart_service_on_gluster_pod_or_node,
+ ocp_node, service, gluster_node)
+
+ self._validate_permission(
+ ocp_node, gluster_node, dir_perm_before, file_perm_before)
+
+ # Restart the services
+ for service in services:
+ state = (
+ 'exited' if service == 'gluster-block-target' else 'running')
+ restart_service_on_gluster_pod_or_node(
+ ocp_node, service, gluster_node)
+ wait_for_service_status_on_gluster_pod_or_node(
+ ocp_node, service, 'active', state, gluster_node)
+
+ # Permission on '/etc/target' should be changed to default
+ self._validate_permission(
+ ocp_node, gluster_node, dir_perm_after, file_perm_after)
diff --git a/tests/glusterfs-containers-tests-config.yaml b/tests/glusterfs-containers-tests-config.yaml
index 65409c4d..cbccfbf6 100644
--- a/tests/glusterfs-containers-tests-config.yaml
+++ b/tests/glusterfs-containers-tests-config.yaml
@@ -114,6 +114,12 @@ openshift:
prometheus_resources_selector: "<prometheus_recources_selector>"
alertmanager_resources_selector: "<alertmanager_resources_selector>"
+ # 'io_container_images' section covers the details of container images
+ # used for I/O
+ io_container_images:
+ cirros: quay.io/libpod/cirros
+ busybox: quay.io/prometheus/busybox
+
common:
allow_heketi_zones_update: False
check_heketi_db_inconsistencies: True