1 files changed, 332 insertions, 18 deletions
diff --git a/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py b/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py
index 0d8731994..65061cb13 100644..100755
--- a/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py
+++ b/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py
@@ -1,4 +1,4 @@
-#  Copyright (C) 2018-2020 Red Hat, Inc. <http://www.redhat.com>
+#  Copyright (C) 2018-2021 Red Hat, Inc. <http://www.redhat.com>
 #
 #  This program is free software; you can redistribute it and/or modify
 #  it under the terms of the GNU General Public License as published by
@@ -47,6 +47,7 @@ from glustolibs.gluster.peer_ops import (
 from glustolibs.gluster.gluster_init import (
     restart_glusterd, stop_glusterd, wait_for_glusterd_to_start)
 from glustolibs.gluster.samba_libs import share_volume_over_smb
+from glustolibs.gluster.shared_storage_ops import is_shared_volume_mounted
 from glustolibs.gluster.volume_libs import (
     cleanup_volume,
     log_volume_info_and_status,
@@ -59,6 +60,9 @@ from glustolibs.gluster.volume_ops import (
     set_volume_options, volume_reset, volume_start)
 from glustolibs.io.utils import log_mounts_info
 from glustolibs.gluster.geo_rep_libs import setup_master_and_slave_volumes
+from glustolibs.gluster.nfs_ganesha_ops import (
+    teardown_nfs_ganesha_cluster)
+from glustolibs.misc.misc_libs import kill_process
 
 
 class runs_on(g.CarteTestClass):
@@ -192,6 +196,11 @@ class GlusterBaseClass(TestCase):
         Returns (bool): True if all peers are in connected with other peers.
             False otherwise.
         """
+
+        # If the setup has single node server, by pass this validation.
+        if len(cls.servers) == 1:
+            return True
+
         # Validate if peer is connected from all the servers
         g.log.info("Validating if servers %s are connected from other servers "
                    "in the cluster", cls.servers)
@@ -256,10 +265,13 @@ class GlusterBaseClass(TestCase):
             False otherwise.
         """
         if error_or_failure_exists:
+            shared_storage_mounted = False
+            if is_shared_volume_mounted(cls.mnode):
+                shared_storage_mounted = True
             ret = stop_glusterd(cls.servers)
             if not ret:
                 g.log.error("Failed to stop glusterd")
-                cmd_list = ("pkill pidof glusterd",
+                cmd_list = ("pkill `pidof glusterd`",
                             "rm /var/run/glusterd.socket")
                 for server in cls.servers:
                     for cmd in cmd_list:
@@ -268,11 +280,29 @@ class GlusterBaseClass(TestCase):
                             g.log.error("Failed to stop glusterd")
                             return False
             for server in cls.servers:
-                cmd_list = ("rm -rf /var/lib/glusterd/vols/*",
-                            "rm -rf /var/lib/glusterd/snaps/*",
-                            "rm -rf /var/lib/glusterd/peers/*",
-                            "rm -rf {}/*/*".format(
-                                cls.all_servers_info[server]['brick_root']))
+                ret, out, _ = g.run(server, "pgrep glusterfsd", "root")
+                if not ret:
+                    ret = kill_process(server,
+                                       process_ids=out.strip().split('\n'))
+                    if not ret:
+                        g.log.error("Unable to kill process {}".format(
+                            out.strip().split('\n')))
+                        return False
+                if not shared_storage_mounted:
+                    cmd_list = (
+                        "rm -rf /var/lib/glusterd/vols/*",
+                        "rm -rf /var/lib/glusterd/snaps/*",
+                        "rm -rf /var/lib/glusterd/peers/*",
+                        "rm -rf {}/*/*".format(
+                            cls.all_servers_info[server]['brick_root']))
+                else:
+                    cmd_list = (
+                        "for vol in `ls /var/lib/glusterd/vols/ | "
+                        "grep -v gluster_shared_storage`;do "
+                        "rm -rf /var/lib/glusterd/vols/$vol;done",
+                        "rm -rf /var/lib/glusterd/snaps/*"
+                        "rm -rf {}/*/*".format(
+                            cls.all_servers_info[server]['brick_root']))
                 for cmd in cmd_list:
                     ret, _, _ = g.run(server, cmd, "root")
                     if ret:
@@ -288,10 +318,11 @@ class GlusterBaseClass(TestCase):
             if not ret:
                 g.log.error("Failed to bring glusterd up")
                 return False
-            ret = peer_probe_servers(cls.mnode, cls.servers)
-            if not ret:
-                g.log.error("Failed to peer probe servers")
-                return False
+            if not shared_storage_mounted:
+                ret = peer_probe_servers(cls.mnode, cls.servers)
+                if not ret:
+                    g.log.error("Failed to peer probe servers")
+                    return False
             for client in cls.clients:
                 cmd_list = ("umount /mnt/*", "rm -rf /mnt/*")
                 for cmd in cmd_list:
@@ -303,10 +334,10 @@ class GlusterBaseClass(TestCase):
             return True
 
     @classmethod
-    def setup_volume(cls, volume_create_force=False):
+    def setup_volume(cls, volume_create_force=False, only_volume_create=False):
         """Setup the volume:
             - Create the volume, Start volume, Set volume
-            options, enable snapshot/quota/tier if specified in the config
+            options, enable snapshot/quota if specified in the config
             file.
             - Wait for volume processes to be online
             - Export volume as NFS/SMB share if mount_type is NFS or SMB
@@ -315,6 +346,9 @@ class GlusterBaseClass(TestCase):
         Args:
             volume_create_force(bool): True if create_volume should be
                 executed with 'force' option.
+            only_volume_create(bool): True, only volume creation is needed
+                                      False, by default volume creation and
+                                      start.
 
         Returns (bool): True if all the steps mentioned in the descriptions
             passes. False otherwise.
@@ -337,12 +371,19 @@ class GlusterBaseClass(TestCase):
         g.log.info("Setting up volume %s", cls.volname)
         ret = setup_volume(mnode=cls.mnode,
                            all_servers_info=cls.all_servers_info,
-                           volume_config=cls.volume, force=force_volume_create)
+                           volume_config=cls.volume, force=force_volume_create,
+                           create_only=only_volume_create)
         if not ret:
             g.log.error("Failed to Setup volume %s", cls.volname)
             return False
         g.log.info("Successful in setting up volume %s", cls.volname)
 
+        # Returning the value without proceeding for next steps
+        if only_volume_create and ret:
+            g.log.info("Setup volume with volume creation {} "
+                       "successful".format(cls.volname))
+            return True
+
         # Wait for volume processes to be online
         g.log.info("Wait for volume %s processes to be online", cls.volname)
         ret = wait_for_volume_process_to_be_online(cls.mnode, cls.volname)
@@ -433,6 +474,9 @@ class GlusterBaseClass(TestCase):
         """
         g.log.info("Starting to mount volume %s", cls.volname)
         for mount_obj in mounts:
+            # For nfs-ganesha, mount is done via vip
+            if cls.enable_nfs_ganesha:
+                mount_obj.server_system = cls.vips[0]
             g.log.info("Mounting volume '%s:%s' on '%s:%s'",
                        mount_obj.server_system, mount_obj.volname,
                        mount_obj.client_system, mount_obj.mountpoint)
@@ -952,8 +996,8 @@ class GlusterBaseClass(TestCase):
                     mount_dict['volname'] = cls.slave_volume
                     mount_dict['server'] = cls.mnode_slave
                     mount_dict['mountpoint'] = path_join(
-                            "/mnt", '_'.join([cls.slave_volname,
-                                              cls.mount_type]))
+                        "/mnt", '_'.join([cls.slave_volname,
+                                          cls.mount_type]))
                 cls.slave_mounts = create_mount_objs(slave_mount_dict_list)
 
             # Defining clients from mounts.
@@ -993,6 +1037,31 @@ class GlusterBaseClass(TestCase):
                 datetime.now().strftime('%H_%M_%d_%m_%Y'))
         cls.glustotest_run_id = g.config['glustotest_run_id']
 
+        if cls.enable_nfs_ganesha:
+            g.log.info("Setup NFS_Ganesha")
+            cls.num_of_nfs_ganesha_nodes = int(cls.num_of_nfs_ganesha_nodes)
+            cls.servers_in_nfs_ganesha_cluster = (
+                cls.servers[:cls.num_of_nfs_ganesha_nodes])
+            cls.vips_in_nfs_ganesha_cluster = (
+                cls.vips[:cls.num_of_nfs_ganesha_nodes])
+
+            # Obtain hostname of servers in ganesha cluster
+            cls.ganesha_servers_hostname = []
+            for ganesha_server in cls.servers_in_nfs_ganesha_cluster:
+                ret, hostname, _ = g.run(ganesha_server, "hostname")
+                if ret:
+                    raise ExecutionError("Failed to obtain hostname of %s"
+                                         % ganesha_server)
+                hostname = hostname.strip()
+                g.log.info("Obtained hostname: IP- %s, hostname- %s",
+                           ganesha_server, hostname)
+                cls.ganesha_servers_hostname.append(hostname)
+            from glustolibs.gluster.nfs_ganesha_libs import setup_nfs_ganesha
+            ret = setup_nfs_ganesha(cls)
+            if not ret:
+                raise ExecutionError("Failed to setup nfs ganesha")
+            g.log.info("Successful in setting up NFS Ganesha Cluster")
+
         msg = "Setupclass: %s : %s" % (cls.__name__, cls.glustotest_run_id)
         g.log.info(msg)
         cls.inject_msg_in_gluster_logs(msg)
@@ -1020,7 +1089,7 @@ class GlusterBaseClass(TestCase):
         if (self.error_or_failure_exists or
                 self._is_error_or_failure_exists()):
             ret = self.scratch_cleanup(self.error_or_failure_exists)
-            g.log.warn(ret)
+            g.log.info(ret)
         return self.get_super_method(self, 'doCleanups')()
 
     @classmethod
@@ -1029,5 +1098,250 @@ class GlusterBaseClass(TestCase):
                 cls._is_error_or_failure_exists()):
             ret = cls.scratch_cleanup(
                 GlusterBaseClass.error_or_failure_exists)
-            g.log.warn(ret)
+            g.log.info(ret)
         return cls.get_super_method(cls, 'doClassCleanups')()
+
+    @classmethod
+    def delete_nfs_ganesha_cluster(cls):
+        ret = teardown_nfs_ganesha_cluster(
+            cls.servers_in_nfs_ganesha_cluster)
+        if not ret:
+            g.log.error("Teardown got failed. Hence, cleaning up "
+                        "nfs-ganesha cluster forcefully")
+            ret = teardown_nfs_ganesha_cluster(
+                cls.servers_in_nfs_ganesha_cluster, force=True)
+            if not ret:
+                raise ExecutionError("Force cleanup of nfs-ganesha "
+                                     "cluster failed")
+        g.log.info("Teardown nfs ganesha cluster succeeded")
+
+    @classmethod
+    def start_memory_and_cpu_usage_logging(cls, test_id, interval=60,
+                                           count=100):
+        """Upload logger script and start logging usage on cluster
+
+        Args:
+         test_id(str): ID of the test running fetched from self.id()
+
+        Kawrgs:
+         interval(int): Time interval after which logs are to be collected
+                        (Default: 60)
+         count(int): Number of samples to be collected(Default: 100)
+
+        Returns:
+         proc_dict(dict):Dictionary of logging processes
+        """
+        # imports are added inside function to make it them
+        # optional and not cause breakage on installation
+        # which don't use the resource leak library
+        from glustolibs.io.memory_and_cpu_utils import (
+            check_upload_memory_and_cpu_logger_script,
+            log_memory_and_cpu_usage_on_cluster)
+
+        # Checking if script is present on servers or not if not then
+        # upload it to servers.
+        if not check_upload_memory_and_cpu_logger_script(cls.servers):
+            return None
+
+        # Checking if script is present on clients or not if not then
+        # upload it to clients.
+        if not check_upload_memory_and_cpu_logger_script(cls.clients):
+            return None
+
+        # Start logging on servers and clients
+        proc_dict = log_memory_and_cpu_usage_on_cluster(
+            cls.servers, cls.clients, test_id, interval, count)
+
+        return proc_dict
+
+    @classmethod
+    def compute_and_print_usage_stats(cls, test_id, proc_dict,
+                                      kill_proc=False):
+        """Compute and print CPU and memory usage statistics
+
+        Args:
+         proc_dict(dict):Dictionary of logging processes
+         test_id(str): ID of the test running fetched from self.id()
+
+        Kwargs:
+         kill_proc(bool): Kill logging process if true else wait
+                          for process to complete execution
+        """
+        # imports are added inside function to make it them
+        # optional and not cause breakage on installation
+        # which don't use the resource leak library
+        from glustolibs.io.memory_and_cpu_utils import (
+            wait_for_logging_processes_to_stop, kill_all_logging_processes,
+            compute_data_usage_stats_on_servers,
+            compute_data_usage_stats_on_clients)
+
+        # Wait or kill running logging process
+        if kill_proc:
+            nodes = cls.servers + cls.clients
+            ret = kill_all_logging_processes(proc_dict, nodes, cluster=True)
+            if not ret:
+                g.log.error("Unable to stop logging processes.")
+        else:
+            ret = wait_for_logging_processes_to_stop(proc_dict, cluster=True)
+            if not ret:
+                g.log.error("Processes didn't complete still running.")
+
+        # Compute and print stats for servers
+        ret = compute_data_usage_stats_on_servers(cls.servers, test_id)
+        g.log.info('*' * 50)
+        g.log.info(ret)  # TODO: Make logged message more structured
+        g.log.info('*' * 50)
+
+        # Compute and print stats for clients
+        ret = compute_data_usage_stats_on_clients(cls.clients, test_id)
+        g.log.info('*' * 50)
+        g.log.info(ret)  # TODO: Make logged message more structured
+        g.log.info('*' * 50)
+
+    @classmethod
+    def check_for_memory_leaks_and_oom_kills_on_servers(cls, test_id,
+                                                        gain=30.0):
+        """Check for memory leaks and OOM kills on servers
+
+        Args:
+         test_id(str): ID of the test running fetched from self.id()
+
+        Kwargs:
+         gain(float): Accepted amount of leak for a given testcase in MB
+                      (Default:30)
+
+        Returns:
+         bool: True if memory leaks or OOM kills are observed else false
+        """
+        # imports are added inside function to make it them
+        # optional and not cause breakage on installation
+        # which don't use the resource leak library
+        from glustolibs.io.memory_and_cpu_utils import (
+            check_for_memory_leaks_in_glusterd,
+            check_for_memory_leaks_in_glusterfs,
+            check_for_memory_leaks_in_glusterfsd,
+            check_for_oom_killers_on_servers)
+
+        # Check for memory leaks on glusterd
+        if check_for_memory_leaks_in_glusterd(cls.servers, test_id, gain):
+            g.log.error("Memory leak on glusterd.")
+            return True
+
+        if cls.volume_type != "distributed":
+            # Check for memory leaks on shd
+            if check_for_memory_leaks_in_glusterfs(cls.servers, test_id,
+                                                   gain):
+                g.log.error("Memory leak on shd.")
+                return True
+
+        # Check for memory leaks on brick processes
+        if check_for_memory_leaks_in_glusterfsd(cls.servers, test_id, gain):
+            g.log.error("Memory leak on brick process.")
+            return True
+
+        # Check OOM kills on servers for all gluster server processes
+        if check_for_oom_killers_on_servers(cls.servers):
+            g.log.error('OOM kills present on servers.')
+            return True
+        return False
+
+    @classmethod
+    def check_for_memory_leaks_and_oom_kills_on_clients(cls, test_id, gain=30):
+        """Check for memory leaks and OOM kills on clients
+
+        Args:
+         test_id(str): ID of the test running fetched from self.id()
+
+        Kwargs:
+         gain(float): Accepted amount of leak for a given testcase in MB
+                      (Default:30)
+
+        Returns:
+         bool: True if memory leaks or OOM kills are observed else false
+        """
+        # imports are added inside function to make it them
+        # optional and not cause breakage on installation
+        # which don't use the resource leak library
+        from glustolibs.io.memory_and_cpu_utils import (
+            check_for_memory_leaks_in_glusterfs_fuse,
+            check_for_oom_killers_on_clients)
+
+        # Check for memory leak on glusterfs fuse process
+        if check_for_memory_leaks_in_glusterfs_fuse(cls.clients, test_id,
+                                                    gain):
+            g.log.error("Memory leaks observed on FUSE clients.")
+            return True
+
+        # Check for oom kills on clients
+        if check_for_oom_killers_on_clients(cls.clients):
+            g.log.error("OOM kills present on clients.")
+            return True
+        return False
+
+    @classmethod
+    def check_for_cpu_usage_spikes_on_servers(cls, test_id, threshold=3):
+        """Check for CPU usage spikes on servers
+
+        Args:
+         test_id(str): ID of the test running fetched from self.id()
+
+        Kwargs:
+         threshold(int): Accepted amount of instances of 100% CPU usage
+                        (Default:3)
+        Returns:
+         bool: True if CPU spikes are more than threshold else False
+        """
+        # imports are added inside function to make it them
+        # optional and not cause breakage on installation
+        # which don't use the resource leak library
+        from glustolibs.io.memory_and_cpu_utils import (
+            check_for_cpu_usage_spikes_on_glusterd,
+            check_for_cpu_usage_spikes_on_glusterfs,
+            check_for_cpu_usage_spikes_on_glusterfsd)
+
+        # Check for CPU usage spikes on glusterd
+        if check_for_cpu_usage_spikes_on_glusterd(cls.servers, test_id,
+                                                  threshold):
+            g.log.error("CPU usage spikes observed more than threshold "
+                        "on glusterd.")
+            return True
+
+        if cls.volume_type != "distributed":
+            # Check for CPU usage spikes on shd
+            if check_for_cpu_usage_spikes_on_glusterfs(cls.servers, test_id,
+                                                       threshold):
+                g.log.error("CPU usage spikes observed more than threshold "
+                            "on shd.")
+                return True
+
+        # Check for CPU usage spikes on brick processes
+        if check_for_cpu_usage_spikes_on_glusterfsd(cls.servers, test_id,
+                                                    threshold):
+            g.log.error("CPU usage spikes observed more than threshold "
+                        "on shd.")
+            return True
+        return False
+
+    @classmethod
+    def check_for_cpu_spikes_on_clients(cls, test_id, threshold=3):
+        """Check for CPU usage spikes on clients
+
+        Args:
+         test_id(str): ID of the test running fetched from self.id()
+
+        Kwargs:
+         threshold(int): Accepted amount of instances of 100% CPU usage
+                        (Default:3)
+        Returns:
+         bool: True if CPU spikes are more than threshold else False
+        """
+        # imports are added inside function to make it them
+        # optional and not cause breakage on installation
+        # which don't use the resource leak library
+        from glustolibs.io.memory_and_cpu_utils import (
+            check_for_cpu_usage_spikes_on_glusterfs_fuse)
+
+        ret = check_for_cpu_usage_spikes_on_glusterfs_fuse(cls.clients,
+                                                           test_id,
+                                                           threshold)
+        return ret