From 08faae06ab07b56b815aec5bfbfcf72d653e8055 Mon Sep 17 00:00:00 2001
From: kshithijiyer <kshithij.ki@gmail.com>
Date: Tue, 6 Oct 2020 09:05:44 +0530
Subject: [Test] Add 2 memory leak tests and fix library issues

Scenarios added:
----------------

Test case:
1. Create a volume, start it and mount it.
2. Start I/O from mount point.
3. Check if there are any memory leaks and OOM killers.

Test case:
1. Create a volume, start it and mount it.
2. Set features.cache-invalidation to ON.
3. Start I/O from mount point.
4. Run gluster volume heal command in a loop
5. Check if there are any memory leaks and OOM killers on servers.

Design change:
--------------

- self.id() is moved into test class as it was hitting bound
errors in the original logic.
- Logic changed for checking leaks fuse.
- Fixed breakage in methods where ever needed.

Change-Id: Icb600d833d0c08636b6002abb489342ea1f946d7
Signed-off-by: kshithijiyer <kshithij.ki@gmail.com>
---
 .../glustolibs/gluster/gluster_base_class.py       | 75 ++++++++++++++--------
 1 file changed, 48 insertions(+), 27 deletions(-)

(limited to 'glustolibs-gluster/glustolibs/gluster/gluster_base_class.py')

diff --git a/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py b/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py
index baec1be8a..3ce38a304 100755
--- a/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py
+++ b/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py
@@ -1107,9 +1107,13 @@ class GlusterBaseClass(TestCase):
         g.log.info("Teardown nfs ganesha cluster succeeded")
 
     @classmethod
-    def start_memory_and_cpu_usage_logging(cls, interval=60, count=100):
+    def start_memory_and_cpu_usage_logging(cls, test_id, interval=60,
+                                           count=100):
         """Upload logger script and start logging usage on cluster
 
+        Args:
+         test_id(str): ID of the test running fetched from self.id()
+
         Kawrgs:
          interval(int): Time interval after which logs are to be collected
                         (Default: 60)
@@ -1137,16 +1141,18 @@ class GlusterBaseClass(TestCase):
 
         # Start logging on servers and clients
         proc_dict = log_memory_and_cpu_usage_on_cluster(
-            cls.servers, cls.clients, cls.id(), interval, count)
+            cls.servers, cls.clients, test_id, interval, count)
 
         return proc_dict
 
     @classmethod
-    def compute_and_print_usage_stats(cls, proc_dict, kill_proc=False):
+    def compute_and_print_usage_stats(cls, test_id, proc_dict,
+                                      kill_proc=False):
         """Compute and print CPU and memory usage statistics
 
         Args:
          proc_dict(dict):Dictionary of logging processes
+         test_id(str): ID of the test running fetched from self.id()
 
         Kwargs:
          kill_proc(bool): Kill logging process if true else wait
@@ -1172,21 +1178,25 @@ class GlusterBaseClass(TestCase):
                 g.log.error("Processes didn't complete still running.")
 
         # Compute and print stats for servers
-        ret = compute_data_usage_stats_on_servers(cls.servers, cls.id())
+        ret = compute_data_usage_stats_on_servers(cls.servers, test_id)
         g.log.info('*' * 50)
         g.log.info(ret)  # TODO: Make logged message more structured
         g.log.info('*' * 50)
 
         # Compute and print stats for clients
-        ret = compute_data_usage_stats_on_clients(cls.clients, cls.id())
+        ret = compute_data_usage_stats_on_clients(cls.clients, test_id)
         g.log.info('*' * 50)
         g.log.info(ret)  # TODO: Make logged message more structured
         g.log.info('*' * 50)
 
     @classmethod
-    def check_for_memory_leaks_and_oom_kills_on_servers(cls, gain=30.0):
+    def check_for_memory_leaks_and_oom_kills_on_servers(cls, test_id,
+                                                        gain=30.0):
         """Check for memory leaks and OOM kills on servers
 
+        Args:
+         test_id(str): ID of the test running fetched from self.id()
+
         Kwargs:
          gain(float): Accepted amount of leak for a given testcase in MB
                       (Default:30)
@@ -1204,31 +1214,35 @@ class GlusterBaseClass(TestCase):
             check_for_oom_killers_on_servers)
 
         # Check for memory leaks on glusterd
-        if check_for_memory_leaks_in_glusterd(cls.servers, cls.id(), gain):
+        if check_for_memory_leaks_in_glusterd(cls.servers, test_id, gain):
             g.log.error("Memory leak on glusterd.")
             return True
 
-        # Check for memory leaks on shd
-        if check_for_memory_leaks_in_glusterfs(cls.servers, cls.id(), gain):
-            g.log.error("Memory leak on shd.")
-            return True
+        if cls.volume_type != "distributed":
+            # Check for memory leaks on shd
+            if check_for_memory_leaks_in_glusterfs(cls.servers, test_id,
+                                                   gain):
+                g.log.error("Memory leak on shd.")
+                return True
 
         # Check for memory leaks on brick processes
-        if check_for_memory_leaks_in_glusterfsd(cls.servers, cls.id(), gain):
+        if check_for_memory_leaks_in_glusterfsd(cls.servers, test_id, gain):
             g.log.error("Memory leak on brick process.")
             return True
 
         # Check OOM kills on servers for all gluster server processes
-        ret = check_for_oom_killers_on_servers(cls.servers)
-        if not ret:
+        if check_for_oom_killers_on_servers(cls.servers):
             g.log.error('OOM kills present on servers.')
             return True
         return False
 
     @classmethod
-    def check_for_memory_leaks_and_oom_kills_on_clients(cls, gain=30):
+    def check_for_memory_leaks_and_oom_kills_on_clients(cls, test_id, gain=30):
         """Check for memory leaks and OOM kills on clients
 
+        Args:
+         test_id(str): ID of the test running fetched from self.id()
+
         Kwargs:
          gain(float): Accepted amount of leak for a given testcase in MB
                       (Default:30)
@@ -1244,7 +1258,7 @@ class GlusterBaseClass(TestCase):
             check_for_oom_killers_on_clients)
 
         # Check for memory leak on glusterfs fuse process
-        if check_for_memory_leaks_in_glusterfs_fuse(cls.clients, cls.id(),
+        if check_for_memory_leaks_in_glusterfs_fuse(cls.clients, test_id,
                                                     gain):
             g.log.error("Memory leaks observed on FUSE clients.")
             return True
@@ -1256,9 +1270,12 @@ class GlusterBaseClass(TestCase):
         return False
 
     @classmethod
-    def check_for_cpu_usage_spikes_on_servers(cls, threshold=3):
+    def check_for_cpu_usage_spikes_on_servers(cls, test_id, threshold=3):
         """Check for CPU usage spikes on servers
 
+        Args:
+         test_id(str): ID of the test running fetched from self.id()
+
         Kwargs:
          threshold(int): Accepted amount of instances of 100% CPU usage
                         (Default:3)
@@ -1274,21 +1291,22 @@ class GlusterBaseClass(TestCase):
             check_for_cpu_usage_spikes_on_glusterfsd)
 
         # Check for CPU usage spikes on glusterd
-        if check_for_cpu_usage_spikes_on_glusterd(cls.servers, cls.id(),
+        if check_for_cpu_usage_spikes_on_glusterd(cls.servers, test_id,
                                                   threshold):
             g.log.error("CPU usage spikes observed more than threshold "
                         "on glusterd.")
             return True
 
-        # Check for CPU usage spikes on shd
-        if check_for_cpu_usage_spikes_on_glusterfs(cls.servers, cls.id(),
-                                                   threshold):
-            g.log.error("CPU usage spikes observed more than threshold "
-                        "on shd.")
-            return True
+        if cls.volume_type != "distributed":
+            # Check for CPU usage spikes on shd
+            if check_for_cpu_usage_spikes_on_glusterfs(cls.servers, test_id,
+                                                       threshold):
+                g.log.error("CPU usage spikes observed more than threshold "
+                            "on shd.")
+                return True
 
         # Check for CPU usage spikes on brick processes
-        if check_for_cpu_usage_spikes_on_glusterfsd(cls.servers, cls.id(),
+        if check_for_cpu_usage_spikes_on_glusterfsd(cls.servers, test_id,
                                                     threshold):
             g.log.error("CPU usage spikes observed more than threshold "
                         "on shd.")
@@ -1296,9 +1314,12 @@ class GlusterBaseClass(TestCase):
         return False
 
     @classmethod
-    def check_for_cpu_spikes_on_clients(cls, threshold=3):
+    def check_for_cpu_spikes_on_clients(cls, test_id, threshold=3):
         """Check for CPU usage spikes on clients
 
+        Args:
+         test_id(str): ID of the test running fetched from self.id()
+
         Kwargs:
          threshold(int): Accepted amount of instances of 100% CPU usage
                         (Default:3)
@@ -1312,6 +1333,6 @@ class GlusterBaseClass(TestCase):
             check_for_cpu_usage_spikes_on_glusterfs_fuse)
 
         ret = check_for_cpu_usage_spikes_on_glusterfs_fuse(cls.clients,
-                                                           cls.id(),
+                                                           test_id,
                                                            threshold)
         return ret
-- 
cgit