From 08faae06ab07b56b815aec5bfbfcf72d653e8055 Mon Sep 17 00:00:00 2001 From: kshithijiyer Date: Tue, 6 Oct 2020 09:05:44 +0530 Subject: [Test] Add 2 memory leak tests and fix library issues Scenarios added: ---------------- Test case: 1. Create a volume, start it and mount it. 2. Start I/O from mount point. 3. Check if there are any memory leaks and OOM killers. Test case: 1. Create a volume, start it and mount it. 2. Set features.cache-invalidation to ON. 3. Start I/O from mount point. 4. Run gluster volume heal command in a loop 5. Check if there are any memory leaks and OOM killers on servers. Design change: -------------- - self.id() is moved into test class as it was hitting bound errors in the original logic. - Logic changed for checking leaks fuse. - Fixed breakage in methods where ever needed. Change-Id: Icb600d833d0c08636b6002abb489342ea1f946d7 Signed-off-by: kshithijiyer --- .../glustolibs/gluster/gluster_base_class.py | 75 ++++++++++++++-------- 1 file changed, 48 insertions(+), 27 deletions(-) (limited to 'glustolibs-gluster/glustolibs/gluster/gluster_base_class.py') diff --git a/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py b/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py index baec1be8a..3ce38a304 100755 --- a/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py +++ b/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py @@ -1107,9 +1107,13 @@ class GlusterBaseClass(TestCase): g.log.info("Teardown nfs ganesha cluster succeeded") @classmethod - def start_memory_and_cpu_usage_logging(cls, interval=60, count=100): + def start_memory_and_cpu_usage_logging(cls, test_id, interval=60, + count=100): """Upload logger script and start logging usage on cluster + Args: + test_id(str): ID of the test running fetched from self.id() + Kawrgs: interval(int): Time interval after which logs are to be collected (Default: 60) @@ -1137,16 +1141,18 @@ class GlusterBaseClass(TestCase): # Start logging on servers and clients proc_dict = log_memory_and_cpu_usage_on_cluster( - cls.servers, cls.clients, cls.id(), interval, count) + cls.servers, cls.clients, test_id, interval, count) return proc_dict @classmethod - def compute_and_print_usage_stats(cls, proc_dict, kill_proc=False): + def compute_and_print_usage_stats(cls, test_id, proc_dict, + kill_proc=False): """Compute and print CPU and memory usage statistics Args: proc_dict(dict):Dictionary of logging processes + test_id(str): ID of the test running fetched from self.id() Kwargs: kill_proc(bool): Kill logging process if true else wait @@ -1172,21 +1178,25 @@ class GlusterBaseClass(TestCase): g.log.error("Processes didn't complete still running.") # Compute and print stats for servers - ret = compute_data_usage_stats_on_servers(cls.servers, cls.id()) + ret = compute_data_usage_stats_on_servers(cls.servers, test_id) g.log.info('*' * 50) g.log.info(ret) # TODO: Make logged message more structured g.log.info('*' * 50) # Compute and print stats for clients - ret = compute_data_usage_stats_on_clients(cls.clients, cls.id()) + ret = compute_data_usage_stats_on_clients(cls.clients, test_id) g.log.info('*' * 50) g.log.info(ret) # TODO: Make logged message more structured g.log.info('*' * 50) @classmethod - def check_for_memory_leaks_and_oom_kills_on_servers(cls, gain=30.0): + def check_for_memory_leaks_and_oom_kills_on_servers(cls, test_id, + gain=30.0): """Check for memory leaks and OOM kills on servers + Args: + test_id(str): ID of the test running fetched from self.id() + Kwargs: gain(float): Accepted amount of leak for a given testcase in MB (Default:30) @@ -1204,31 +1214,35 @@ class GlusterBaseClass(TestCase): check_for_oom_killers_on_servers) # Check for memory leaks on glusterd - if check_for_memory_leaks_in_glusterd(cls.servers, cls.id(), gain): + if check_for_memory_leaks_in_glusterd(cls.servers, test_id, gain): g.log.error("Memory leak on glusterd.") return True - # Check for memory leaks on shd - if check_for_memory_leaks_in_glusterfs(cls.servers, cls.id(), gain): - g.log.error("Memory leak on shd.") - return True + if cls.volume_type != "distributed": + # Check for memory leaks on shd + if check_for_memory_leaks_in_glusterfs(cls.servers, test_id, + gain): + g.log.error("Memory leak on shd.") + return True # Check for memory leaks on brick processes - if check_for_memory_leaks_in_glusterfsd(cls.servers, cls.id(), gain): + if check_for_memory_leaks_in_glusterfsd(cls.servers, test_id, gain): g.log.error("Memory leak on brick process.") return True # Check OOM kills on servers for all gluster server processes - ret = check_for_oom_killers_on_servers(cls.servers) - if not ret: + if check_for_oom_killers_on_servers(cls.servers): g.log.error('OOM kills present on servers.') return True return False @classmethod - def check_for_memory_leaks_and_oom_kills_on_clients(cls, gain=30): + def check_for_memory_leaks_and_oom_kills_on_clients(cls, test_id, gain=30): """Check for memory leaks and OOM kills on clients + Args: + test_id(str): ID of the test running fetched from self.id() + Kwargs: gain(float): Accepted amount of leak for a given testcase in MB (Default:30) @@ -1244,7 +1258,7 @@ class GlusterBaseClass(TestCase): check_for_oom_killers_on_clients) # Check for memory leak on glusterfs fuse process - if check_for_memory_leaks_in_glusterfs_fuse(cls.clients, cls.id(), + if check_for_memory_leaks_in_glusterfs_fuse(cls.clients, test_id, gain): g.log.error("Memory leaks observed on FUSE clients.") return True @@ -1256,9 +1270,12 @@ class GlusterBaseClass(TestCase): return False @classmethod - def check_for_cpu_usage_spikes_on_servers(cls, threshold=3): + def check_for_cpu_usage_spikes_on_servers(cls, test_id, threshold=3): """Check for CPU usage spikes on servers + Args: + test_id(str): ID of the test running fetched from self.id() + Kwargs: threshold(int): Accepted amount of instances of 100% CPU usage (Default:3) @@ -1274,21 +1291,22 @@ class GlusterBaseClass(TestCase): check_for_cpu_usage_spikes_on_glusterfsd) # Check for CPU usage spikes on glusterd - if check_for_cpu_usage_spikes_on_glusterd(cls.servers, cls.id(), + if check_for_cpu_usage_spikes_on_glusterd(cls.servers, test_id, threshold): g.log.error("CPU usage spikes observed more than threshold " "on glusterd.") return True - # Check for CPU usage spikes on shd - if check_for_cpu_usage_spikes_on_glusterfs(cls.servers, cls.id(), - threshold): - g.log.error("CPU usage spikes observed more than threshold " - "on shd.") - return True + if cls.volume_type != "distributed": + # Check for CPU usage spikes on shd + if check_for_cpu_usage_spikes_on_glusterfs(cls.servers, test_id, + threshold): + g.log.error("CPU usage spikes observed more than threshold " + "on shd.") + return True # Check for CPU usage spikes on brick processes - if check_for_cpu_usage_spikes_on_glusterfsd(cls.servers, cls.id(), + if check_for_cpu_usage_spikes_on_glusterfsd(cls.servers, test_id, threshold): g.log.error("CPU usage spikes observed more than threshold " "on shd.") @@ -1296,9 +1314,12 @@ class GlusterBaseClass(TestCase): return False @classmethod - def check_for_cpu_spikes_on_clients(cls, threshold=3): + def check_for_cpu_spikes_on_clients(cls, test_id, threshold=3): """Check for CPU usage spikes on clients + Args: + test_id(str): ID of the test running fetched from self.id() + Kwargs: threshold(int): Accepted amount of instances of 100% CPU usage (Default:3) @@ -1312,6 +1333,6 @@ class GlusterBaseClass(TestCase): check_for_cpu_usage_spikes_on_glusterfs_fuse) ret = check_for_cpu_usage_spikes_on_glusterfs_fuse(cls.clients, - cls.id(), + test_id, threshold) return ret -- cgit