summaryrefslogtreecommitdiffstats
path: root/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py
diff options
context:
space:
mode:
Diffstat (limited to 'glustolibs-gluster/glustolibs/gluster/gluster_base_class.py')
-rwxr-xr-x[-rw-r--r--]glustolibs-gluster/glustolibs/gluster/gluster_base_class.py350
1 files changed, 332 insertions, 18 deletions
diff --git a/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py b/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py
index 0d8731994..65061cb13 100644..100755
--- a/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py
+++ b/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2020 Red Hat, Inc. <http://www.redhat.com>
+# Copyright (C) 2018-2021 Red Hat, Inc. <http://www.redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -47,6 +47,7 @@ from glustolibs.gluster.peer_ops import (
from glustolibs.gluster.gluster_init import (
restart_glusterd, stop_glusterd, wait_for_glusterd_to_start)
from glustolibs.gluster.samba_libs import share_volume_over_smb
+from glustolibs.gluster.shared_storage_ops import is_shared_volume_mounted
from glustolibs.gluster.volume_libs import (
cleanup_volume,
log_volume_info_and_status,
@@ -59,6 +60,9 @@ from glustolibs.gluster.volume_ops import (
set_volume_options, volume_reset, volume_start)
from glustolibs.io.utils import log_mounts_info
from glustolibs.gluster.geo_rep_libs import setup_master_and_slave_volumes
+from glustolibs.gluster.nfs_ganesha_ops import (
+ teardown_nfs_ganesha_cluster)
+from glustolibs.misc.misc_libs import kill_process
class runs_on(g.CarteTestClass):
@@ -192,6 +196,11 @@ class GlusterBaseClass(TestCase):
Returns (bool): True if all peers are in connected with other peers.
False otherwise.
"""
+
+ # If the setup has single node server, by pass this validation.
+ if len(cls.servers) == 1:
+ return True
+
# Validate if peer is connected from all the servers
g.log.info("Validating if servers %s are connected from other servers "
"in the cluster", cls.servers)
@@ -256,10 +265,13 @@ class GlusterBaseClass(TestCase):
False otherwise.
"""
if error_or_failure_exists:
+ shared_storage_mounted = False
+ if is_shared_volume_mounted(cls.mnode):
+ shared_storage_mounted = True
ret = stop_glusterd(cls.servers)
if not ret:
g.log.error("Failed to stop glusterd")
- cmd_list = ("pkill pidof glusterd",
+ cmd_list = ("pkill `pidof glusterd`",
"rm /var/run/glusterd.socket")
for server in cls.servers:
for cmd in cmd_list:
@@ -268,11 +280,29 @@ class GlusterBaseClass(TestCase):
g.log.error("Failed to stop glusterd")
return False
for server in cls.servers:
- cmd_list = ("rm -rf /var/lib/glusterd/vols/*",
- "rm -rf /var/lib/glusterd/snaps/*",
- "rm -rf /var/lib/glusterd/peers/*",
- "rm -rf {}/*/*".format(
- cls.all_servers_info[server]['brick_root']))
+ ret, out, _ = g.run(server, "pgrep glusterfsd", "root")
+ if not ret:
+ ret = kill_process(server,
+ process_ids=out.strip().split('\n'))
+ if not ret:
+ g.log.error("Unable to kill process {}".format(
+ out.strip().split('\n')))
+ return False
+ if not shared_storage_mounted:
+ cmd_list = (
+ "rm -rf /var/lib/glusterd/vols/*",
+ "rm -rf /var/lib/glusterd/snaps/*",
+ "rm -rf /var/lib/glusterd/peers/*",
+ "rm -rf {}/*/*".format(
+ cls.all_servers_info[server]['brick_root']))
+ else:
+ cmd_list = (
+ "for vol in `ls /var/lib/glusterd/vols/ | "
+ "grep -v gluster_shared_storage`;do "
+ "rm -rf /var/lib/glusterd/vols/$vol;done",
+ "rm -rf /var/lib/glusterd/snaps/*"
+ "rm -rf {}/*/*".format(
+ cls.all_servers_info[server]['brick_root']))
for cmd in cmd_list:
ret, _, _ = g.run(server, cmd, "root")
if ret:
@@ -288,10 +318,11 @@ class GlusterBaseClass(TestCase):
if not ret:
g.log.error("Failed to bring glusterd up")
return False
- ret = peer_probe_servers(cls.mnode, cls.servers)
- if not ret:
- g.log.error("Failed to peer probe servers")
- return False
+ if not shared_storage_mounted:
+ ret = peer_probe_servers(cls.mnode, cls.servers)
+ if not ret:
+ g.log.error("Failed to peer probe servers")
+ return False
for client in cls.clients:
cmd_list = ("umount /mnt/*", "rm -rf /mnt/*")
for cmd in cmd_list:
@@ -303,10 +334,10 @@ class GlusterBaseClass(TestCase):
return True
@classmethod
- def setup_volume(cls, volume_create_force=False):
+ def setup_volume(cls, volume_create_force=False, only_volume_create=False):
"""Setup the volume:
- Create the volume, Start volume, Set volume
- options, enable snapshot/quota/tier if specified in the config
+ options, enable snapshot/quota if specified in the config
file.
- Wait for volume processes to be online
- Export volume as NFS/SMB share if mount_type is NFS or SMB
@@ -315,6 +346,9 @@ class GlusterBaseClass(TestCase):
Args:
volume_create_force(bool): True if create_volume should be
executed with 'force' option.
+ only_volume_create(bool): True, only volume creation is needed
+ False, by default volume creation and
+ start.
Returns (bool): True if all the steps mentioned in the descriptions
passes. False otherwise.
@@ -337,12 +371,19 @@ class GlusterBaseClass(TestCase):
g.log.info("Setting up volume %s", cls.volname)
ret = setup_volume(mnode=cls.mnode,
all_servers_info=cls.all_servers_info,
- volume_config=cls.volume, force=force_volume_create)
+ volume_config=cls.volume, force=force_volume_create,
+ create_only=only_volume_create)
if not ret:
g.log.error("Failed to Setup volume %s", cls.volname)
return False
g.log.info("Successful in setting up volume %s", cls.volname)
+ # Returning the value without proceeding for next steps
+ if only_volume_create and ret:
+ g.log.info("Setup volume with volume creation {} "
+ "successful".format(cls.volname))
+ return True
+
# Wait for volume processes to be online
g.log.info("Wait for volume %s processes to be online", cls.volname)
ret = wait_for_volume_process_to_be_online(cls.mnode, cls.volname)
@@ -433,6 +474,9 @@ class GlusterBaseClass(TestCase):
"""
g.log.info("Starting to mount volume %s", cls.volname)
for mount_obj in mounts:
+ # For nfs-ganesha, mount is done via vip
+ if cls.enable_nfs_ganesha:
+ mount_obj.server_system = cls.vips[0]
g.log.info("Mounting volume '%s:%s' on '%s:%s'",
mount_obj.server_system, mount_obj.volname,
mount_obj.client_system, mount_obj.mountpoint)
@@ -952,8 +996,8 @@ class GlusterBaseClass(TestCase):
mount_dict['volname'] = cls.slave_volume
mount_dict['server'] = cls.mnode_slave
mount_dict['mountpoint'] = path_join(
- "/mnt", '_'.join([cls.slave_volname,
- cls.mount_type]))
+ "/mnt", '_'.join([cls.slave_volname,
+ cls.mount_type]))
cls.slave_mounts = create_mount_objs(slave_mount_dict_list)
# Defining clients from mounts.
@@ -993,6 +1037,31 @@ class GlusterBaseClass(TestCase):
datetime.now().strftime('%H_%M_%d_%m_%Y'))
cls.glustotest_run_id = g.config['glustotest_run_id']
+ if cls.enable_nfs_ganesha:
+ g.log.info("Setup NFS_Ganesha")
+ cls.num_of_nfs_ganesha_nodes = int(cls.num_of_nfs_ganesha_nodes)
+ cls.servers_in_nfs_ganesha_cluster = (
+ cls.servers[:cls.num_of_nfs_ganesha_nodes])
+ cls.vips_in_nfs_ganesha_cluster = (
+ cls.vips[:cls.num_of_nfs_ganesha_nodes])
+
+ # Obtain hostname of servers in ganesha cluster
+ cls.ganesha_servers_hostname = []
+ for ganesha_server in cls.servers_in_nfs_ganesha_cluster:
+ ret, hostname, _ = g.run(ganesha_server, "hostname")
+ if ret:
+ raise ExecutionError("Failed to obtain hostname of %s"
+ % ganesha_server)
+ hostname = hostname.strip()
+ g.log.info("Obtained hostname: IP- %s, hostname- %s",
+ ganesha_server, hostname)
+ cls.ganesha_servers_hostname.append(hostname)
+ from glustolibs.gluster.nfs_ganesha_libs import setup_nfs_ganesha
+ ret = setup_nfs_ganesha(cls)
+ if not ret:
+ raise ExecutionError("Failed to setup nfs ganesha")
+ g.log.info("Successful in setting up NFS Ganesha Cluster")
+
msg = "Setupclass: %s : %s" % (cls.__name__, cls.glustotest_run_id)
g.log.info(msg)
cls.inject_msg_in_gluster_logs(msg)
@@ -1020,7 +1089,7 @@ class GlusterBaseClass(TestCase):
if (self.error_or_failure_exists or
self._is_error_or_failure_exists()):
ret = self.scratch_cleanup(self.error_or_failure_exists)
- g.log.warn(ret)
+ g.log.info(ret)
return self.get_super_method(self, 'doCleanups')()
@classmethod
@@ -1029,5 +1098,250 @@ class GlusterBaseClass(TestCase):
cls._is_error_or_failure_exists()):
ret = cls.scratch_cleanup(
GlusterBaseClass.error_or_failure_exists)
- g.log.warn(ret)
+ g.log.info(ret)
return cls.get_super_method(cls, 'doClassCleanups')()
+
+ @classmethod
+ def delete_nfs_ganesha_cluster(cls):
+ ret = teardown_nfs_ganesha_cluster(
+ cls.servers_in_nfs_ganesha_cluster)
+ if not ret:
+ g.log.error("Teardown got failed. Hence, cleaning up "
+ "nfs-ganesha cluster forcefully")
+ ret = teardown_nfs_ganesha_cluster(
+ cls.servers_in_nfs_ganesha_cluster, force=True)
+ if not ret:
+ raise ExecutionError("Force cleanup of nfs-ganesha "
+ "cluster failed")
+ g.log.info("Teardown nfs ganesha cluster succeeded")
+
+ @classmethod
+ def start_memory_and_cpu_usage_logging(cls, test_id, interval=60,
+ count=100):
+ """Upload logger script and start logging usage on cluster
+
+ Args:
+ test_id(str): ID of the test running fetched from self.id()
+
+ Kawrgs:
+ interval(int): Time interval after which logs are to be collected
+ (Default: 60)
+ count(int): Number of samples to be collected(Default: 100)
+
+ Returns:
+ proc_dict(dict):Dictionary of logging processes
+ """
+ # imports are added inside function to make it them
+ # optional and not cause breakage on installation
+ # which don't use the resource leak library
+ from glustolibs.io.memory_and_cpu_utils import (
+ check_upload_memory_and_cpu_logger_script,
+ log_memory_and_cpu_usage_on_cluster)
+
+ # Checking if script is present on servers or not if not then
+ # upload it to servers.
+ if not check_upload_memory_and_cpu_logger_script(cls.servers):
+ return None
+
+ # Checking if script is present on clients or not if not then
+ # upload it to clients.
+ if not check_upload_memory_and_cpu_logger_script(cls.clients):
+ return None
+
+ # Start logging on servers and clients
+ proc_dict = log_memory_and_cpu_usage_on_cluster(
+ cls.servers, cls.clients, test_id, interval, count)
+
+ return proc_dict
+
+ @classmethod
+ def compute_and_print_usage_stats(cls, test_id, proc_dict,
+ kill_proc=False):
+ """Compute and print CPU and memory usage statistics
+
+ Args:
+ proc_dict(dict):Dictionary of logging processes
+ test_id(str): ID of the test running fetched from self.id()
+
+ Kwargs:
+ kill_proc(bool): Kill logging process if true else wait
+ for process to complete execution
+ """
+ # imports are added inside function to make it them
+ # optional and not cause breakage on installation
+ # which don't use the resource leak library
+ from glustolibs.io.memory_and_cpu_utils import (
+ wait_for_logging_processes_to_stop, kill_all_logging_processes,
+ compute_data_usage_stats_on_servers,
+ compute_data_usage_stats_on_clients)
+
+ # Wait or kill running logging process
+ if kill_proc:
+ nodes = cls.servers + cls.clients
+ ret = kill_all_logging_processes(proc_dict, nodes, cluster=True)
+ if not ret:
+ g.log.error("Unable to stop logging processes.")
+ else:
+ ret = wait_for_logging_processes_to_stop(proc_dict, cluster=True)
+ if not ret:
+ g.log.error("Processes didn't complete still running.")
+
+ # Compute and print stats for servers
+ ret = compute_data_usage_stats_on_servers(cls.servers, test_id)
+ g.log.info('*' * 50)
+ g.log.info(ret) # TODO: Make logged message more structured
+ g.log.info('*' * 50)
+
+ # Compute and print stats for clients
+ ret = compute_data_usage_stats_on_clients(cls.clients, test_id)
+ g.log.info('*' * 50)
+ g.log.info(ret) # TODO: Make logged message more structured
+ g.log.info('*' * 50)
+
+ @classmethod
+ def check_for_memory_leaks_and_oom_kills_on_servers(cls, test_id,
+ gain=30.0):
+ """Check for memory leaks and OOM kills on servers
+
+ Args:
+ test_id(str): ID of the test running fetched from self.id()
+
+ Kwargs:
+ gain(float): Accepted amount of leak for a given testcase in MB
+ (Default:30)
+
+ Returns:
+ bool: True if memory leaks or OOM kills are observed else false
+ """
+ # imports are added inside function to make it them
+ # optional and not cause breakage on installation
+ # which don't use the resource leak library
+ from glustolibs.io.memory_and_cpu_utils import (
+ check_for_memory_leaks_in_glusterd,
+ check_for_memory_leaks_in_glusterfs,
+ check_for_memory_leaks_in_glusterfsd,
+ check_for_oom_killers_on_servers)
+
+ # Check for memory leaks on glusterd
+ if check_for_memory_leaks_in_glusterd(cls.servers, test_id, gain):
+ g.log.error("Memory leak on glusterd.")
+ return True
+
+ if cls.volume_type != "distributed":
+ # Check for memory leaks on shd
+ if check_for_memory_leaks_in_glusterfs(cls.servers, test_id,
+ gain):
+ g.log.error("Memory leak on shd.")
+ return True
+
+ # Check for memory leaks on brick processes
+ if check_for_memory_leaks_in_glusterfsd(cls.servers, test_id, gain):
+ g.log.error("Memory leak on brick process.")
+ return True
+
+ # Check OOM kills on servers for all gluster server processes
+ if check_for_oom_killers_on_servers(cls.servers):
+ g.log.error('OOM kills present on servers.')
+ return True
+ return False
+
+ @classmethod
+ def check_for_memory_leaks_and_oom_kills_on_clients(cls, test_id, gain=30):
+ """Check for memory leaks and OOM kills on clients
+
+ Args:
+ test_id(str): ID of the test running fetched from self.id()
+
+ Kwargs:
+ gain(float): Accepted amount of leak for a given testcase in MB
+ (Default:30)
+
+ Returns:
+ bool: True if memory leaks or OOM kills are observed else false
+ """
+ # imports are added inside function to make it them
+ # optional and not cause breakage on installation
+ # which don't use the resource leak library
+ from glustolibs.io.memory_and_cpu_utils import (
+ check_for_memory_leaks_in_glusterfs_fuse,
+ check_for_oom_killers_on_clients)
+
+ # Check for memory leak on glusterfs fuse process
+ if check_for_memory_leaks_in_glusterfs_fuse(cls.clients, test_id,
+ gain):
+ g.log.error("Memory leaks observed on FUSE clients.")
+ return True
+
+ # Check for oom kills on clients
+ if check_for_oom_killers_on_clients(cls.clients):
+ g.log.error("OOM kills present on clients.")
+ return True
+ return False
+
+ @classmethod
+ def check_for_cpu_usage_spikes_on_servers(cls, test_id, threshold=3):
+ """Check for CPU usage spikes on servers
+
+ Args:
+ test_id(str): ID of the test running fetched from self.id()
+
+ Kwargs:
+ threshold(int): Accepted amount of instances of 100% CPU usage
+ (Default:3)
+ Returns:
+ bool: True if CPU spikes are more than threshold else False
+ """
+ # imports are added inside function to make it them
+ # optional and not cause breakage on installation
+ # which don't use the resource leak library
+ from glustolibs.io.memory_and_cpu_utils import (
+ check_for_cpu_usage_spikes_on_glusterd,
+ check_for_cpu_usage_spikes_on_glusterfs,
+ check_for_cpu_usage_spikes_on_glusterfsd)
+
+ # Check for CPU usage spikes on glusterd
+ if check_for_cpu_usage_spikes_on_glusterd(cls.servers, test_id,
+ threshold):
+ g.log.error("CPU usage spikes observed more than threshold "
+ "on glusterd.")
+ return True
+
+ if cls.volume_type != "distributed":
+ # Check for CPU usage spikes on shd
+ if check_for_cpu_usage_spikes_on_glusterfs(cls.servers, test_id,
+ threshold):
+ g.log.error("CPU usage spikes observed more than threshold "
+ "on shd.")
+ return True
+
+ # Check for CPU usage spikes on brick processes
+ if check_for_cpu_usage_spikes_on_glusterfsd(cls.servers, test_id,
+ threshold):
+ g.log.error("CPU usage spikes observed more than threshold "
+ "on shd.")
+ return True
+ return False
+
+ @classmethod
+ def check_for_cpu_spikes_on_clients(cls, test_id, threshold=3):
+ """Check for CPU usage spikes on clients
+
+ Args:
+ test_id(str): ID of the test running fetched from self.id()
+
+ Kwargs:
+ threshold(int): Accepted amount of instances of 100% CPU usage
+ (Default:3)
+ Returns:
+ bool: True if CPU spikes are more than threshold else False
+ """
+ # imports are added inside function to make it them
+ # optional and not cause breakage on installation
+ # which don't use the resource leak library
+ from glustolibs.io.memory_and_cpu_utils import (
+ check_for_cpu_usage_spikes_on_glusterfs_fuse)
+
+ ret = check_for_cpu_usage_spikes_on_glusterfs_fuse(cls.clients,
+ test_id,
+ threshold)
+ return ret