diff options
Diffstat (limited to 'tests/functional/glusterd')
3 files changed, 401 insertions, 43 deletions
diff --git a/tests/functional/glusterd/test_glusterd_memory_consumption_increase.py b/tests/functional/glusterd/test_glusterd_memory_consumption_increase.py new file mode 100644 index 000000000..92c48da6f --- /dev/null +++ b/tests/functional/glusterd/test_glusterd_memory_consumption_increase.py @@ -0,0 +1,207 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Increase in glusterd memory consumption on repetetive operations + for 100 volumes +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass +from glustolibs.gluster.volume_ops import (volume_stop, volume_delete, + get_volume_list, + volume_start) +from glustolibs.gluster.gluster_init import (restart_glusterd, + wait_for_glusterd_to_start) +from glustolibs.gluster.volume_libs import (bulk_volume_creation, + cleanup_volume) +from glustolibs.gluster.volume_ops import set_volume_options + + +class TestGlusterMemoryConsumptionIncrease(GlusterBaseClass): + def tearDown(self): + # Clean up all volumes + if self.volume_present: + vol_list = get_volume_list(self.mnode) + if vol_list is None: + raise ExecutionError("Failed to get the volume list") + + for volume in vol_list: + ret = cleanup_volume(self.mnode, volume) + if not ret: + raise ExecutionError("Unable to delete volume %s" % volume) + g.log.info("Volume deleted successfully : %s", volume) + + # Disable multiplex + ret = set_volume_options(self.mnode, 'all', + {'cluster.brick-multiplex': 'disable'}) + self.assertTrue(ret, "Failed to enable brick-multiplex" + " for the cluster") + + # Calling baseclass tearDown method + self.get_super_method(self, 'tearDown')() + + def _volume_operations_in_loop(self): + """ Create, start, stop and delete 100 volumes in a loop """ + # Create and start 100 volumes in a loop + self.volume_config = { + 'name': 'volume-', + 'servers': self.servers, + 'voltype': {'type': 'distributed-replicated', + 'dist_count': 2, + 'replica_count': 3}, + } + + ret = bulk_volume_creation(self.mnode, 100, self.all_servers_info, + self.volume_config, "", False, True) + self.assertTrue(ret, "Failed to create volumes") + + self.volume_present = True + + g.log.info("Successfully created all the volumes") + + # Start 100 volumes in loop + for i in range(100): + self.volname = "volume-%d" % i + ret, _, _ = volume_start(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to start volume: %s" + % self.volname) + + g.log.info("Successfully started all the volumes") + + # Stop 100 volumes in loop + for i in range(100): + self.volname = "volume-%d" % i + ret, _, _ = volume_stop(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to stop volume: %s" + % self.volname) + + g.log.info("Successfully stopped all the volumes") + + # Delete 100 volumes in loop + for i in range(100): + self.volname = "volume-%d" % i + ret = volume_delete(self.mnode, self.volname) + self.assertTrue(ret, "Failed to delete volume: %s" + % self.volname) + + self.volume_present = False + + g.log.info("Successfully deleted all the volumes") + + def _memory_consumption_for_all_nodes(self, pid_list): + """Fetch the memory consumption by glusterd process for + all the nodes + """ + memory_consumed_list = [] + for i, server in enumerate(self.servers): + # Get the memory consumption of glusterd in each node + cmd = "top -b -n 1 -p %d | awk 'FNR==8 {print $6}'" % pid_list[i] + ret, mem, _ = g.run(server, cmd) + self.assertEqual(ret, 0, "Failed to get the memory usage of" + " glusterd process") + mem = int(mem)//1024 + memory_consumed_list.append(mem) + + return memory_consumed_list + + def test_glusterd_memory_consumption_increase(self): + """ + Test Case: + 1) Enable brick-multiplex and set max-bricks-per-process to 3 in + the cluster + 2) Get the glusterd memory consumption + 3) Perform create,start,stop,delete operation for 100 volumes + 4) Check glusterd memory consumption, it should not increase by + more than 50MB + 5) Repeat steps 3-4 for two more time + 6) Check glusterd memory consumption it should not increase by + more than 10MB + """ + # pylint: disable=too-many-locals + # Restarting glusterd to refresh its memory consumption + ret = restart_glusterd(self.servers) + self.assertTrue(ret, "Restarting glusterd failed") + + # check if glusterd is running post reboot + ret = wait_for_glusterd_to_start(self.servers) + self.assertTrue(ret, "Glusterd service is not running post reboot") + + # Enable brick-multiplex, set max-bricks-per-process to 3 in cluster + for key, value in (('cluster.brick-multiplex', 'enable'), + ('cluster.max-bricks-per-process', '3')): + ret = set_volume_options(self.mnode, 'all', {key: value}) + self.assertTrue(ret, "Failed to set {} to {} " + " for the cluster".format(key, value)) + + # Get the pidof of glusterd process + pid_list = [] + for server in self.servers: + # Get the pidof of glusterd process + cmd = "pidof glusterd" + ret, pid, _ = g.run(server, cmd) + self.assertEqual(ret, 0, "Failed to get the pid of glusterd") + pid = int(pid) + pid_list.append(pid) + + # Fetch the list of memory consumed in all the nodes + mem_consumed_list = self._memory_consumption_for_all_nodes(pid_list) + + # Perform volume operations for 100 volumes for first time + self._volume_operations_in_loop() + + # Fetch the list of memory consumed in all the nodes after 1 iteration + mem_consumed_list_1 = self._memory_consumption_for_all_nodes(pid_list) + + for i, mem in enumerate(mem_consumed_list_1): + condition_met = False + if mem - mem_consumed_list[i] <= 50: + condition_met = True + + self.assertTrue(condition_met, "Unexpected: Memory consumption" + " glusterd increased more than the expected" + " of value") + + # Perform volume operations for 100 volumes for second time + self._volume_operations_in_loop() + + # Fetch the list of memory consumed in all the nodes after 2 iterations + mem_consumed_list_2 = self._memory_consumption_for_all_nodes(pid_list) + + for i, mem in enumerate(mem_consumed_list_2): + condition_met = False + if mem - mem_consumed_list_1[i] <= 10: + condition_met = True + + self.assertTrue(condition_met, "Unexpected: Memory consumption" + " glusterd increased more than the expected" + " of value") + + # Perform volume operations for 100 volumes for third time + self._volume_operations_in_loop() + + # Fetch the list of memory consumed in all the nodes after 3 iterations + mem_consumed_list_3 = self._memory_consumption_for_all_nodes(pid_list) + + for i, mem in enumerate(mem_consumed_list_3): + condition_met = False + if mem - mem_consumed_list_2[i] <= 10: + condition_met = True + + self.assertTrue(condition_met, "Unexpected: Memory consumption" + " glusterd increased more than the expected" + " of value") diff --git a/tests/functional/glusterd/test_probe_glusterd_down.py b/tests/functional/glusterd/test_probe_glusterd_down.py index 3705904a9..c851bf104 100644 --- a/tests/functional/glusterd/test_probe_glusterd_down.py +++ b/tests/functional/glusterd/test_probe_glusterd_down.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2020-2021 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,17 +14,14 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -from time import sleep - from glusto.core import Glusto as g from glustolibs.gluster.gluster_base_class import GlusterBaseClass from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.peer_ops import peer_probe from glustolibs.gluster.lib_utils import is_core_file_created from glustolibs.gluster.peer_ops import peer_detach, is_peer_connected -from glustolibs.gluster.gluster_init import (stop_glusterd, start_glusterd, - wait_for_glusterd_to_start) -from glustolibs.misc.misc_libs import are_nodes_online +from glustolibs.gluster.gluster_init import stop_glusterd, start_glusterd +from glustolibs.misc.misc_libs import bring_down_network_interface class PeerProbeWhenGlusterdDown(GlusterBaseClass): @@ -57,7 +54,7 @@ class PeerProbeWhenGlusterdDown(GlusterBaseClass): ret, test_timestamp, _ = g.run_local('date +%s') test_timestamp = test_timestamp.strip() - # detach one of the nodes which is part of the cluster + # Detach one of the nodes which is part of the cluster g.log.info("detaching server %s ", self.servers[1]) ret, _, err = peer_detach(self.mnode, self.servers[1]) msg = 'peer detach: failed: %s is not part of cluster\n' \ @@ -66,12 +63,12 @@ class PeerProbeWhenGlusterdDown(GlusterBaseClass): self.assertEqual(err, msg, "Failed to detach %s " % (self.servers[1])) - # bring down glusterd of the server which has been detached + # Bring down glusterd of the server which has been detached g.log.info("Stopping glusterd on %s ", self.servers[1]) ret = stop_glusterd(self.servers[1]) self.assertTrue(ret, "Fail to stop glusterd on %s " % self.servers[1]) - # trying to peer probe the node whose glusterd was stopped using its IP + # Trying to peer probe the node whose glusterd was stopped using IP g.log.info("Peer probing %s when glusterd down ", self.servers[1]) ret, _, err = peer_probe(self.mnode, self.servers[1]) self.assertNotEqual(ret, 0, "Peer probe should not pass when " @@ -79,7 +76,7 @@ class PeerProbeWhenGlusterdDown(GlusterBaseClass): self.assertEqual(err, "peer probe: failed: Probe returned with " "Transport endpoint is not connected\n") - # trying to peer probe the same node with hostname + # Trying to peer probe the same node with hostname g.log.info("Peer probing node %s using hostname with glusterd down ", self.servers[1]) hostname = g.run(self.servers[1], "hostname") @@ -89,27 +86,24 @@ class PeerProbeWhenGlusterdDown(GlusterBaseClass): self.assertEqual(err, "peer probe: failed: Probe returned with" " Transport endpoint is not connected\n") - # start glusterd again for the next set of test steps + # Start glusterd again for the next set of test steps g.log.info("starting glusterd on %s ", self.servers[1]) ret = start_glusterd(self.servers[1]) self.assertTrue(ret, "glusterd couldn't start successfully on %s" % self.servers[1]) - # reboot a server and then trying to peer probe at the time of reboot - g.log.info("Rebooting %s and checking peer probe", self.servers[1]) - reboot = g.run_async(self.servers[1], "reboot") - - # Mandatory sleep for 3 seconds to make sure node is in halted state - sleep(3) + # Bring down the network for sometime + network_status = bring_down_network_interface(self.servers[1], 150) # Peer probing the node using IP when it is still not online - g.log.info("Peer probing node %s which has been issued a reboot ", + g.log.info("Peer probing node %s when network is down", self.servers[1]) ret, _, err = peer_probe(self.mnode, self.servers[1]) self.assertNotEqual(ret, 0, "Peer probe passed when it was expected to" " fail") - self.assertEqual(err, "peer probe: failed: Probe returned with " - "Transport endpoint is not connected\n") + self.assertEqual(err.split("\n")[0], "peer probe: failed: Probe " + "returned with Transport endpoint" + " is not connected") # Peer probing the node using hostname when it is still not online g.log.info("Peer probing node %s using hostname which is still " @@ -118,35 +112,21 @@ class PeerProbeWhenGlusterdDown(GlusterBaseClass): ret, _, err = peer_probe(self.mnode, hostname[1].strip()) self.assertNotEqual(ret, 0, "Peer probe should not pass when node " "has not come online") - self.assertEqual(err, "peer probe: failed: Probe returned with " - "Transport endpoint is not connected\n") + self.assertEqual(err.split("\n")[0], "peer probe: failed: Probe " + "returned with Transport endpoint" + " is not connected") + + ret, _, _ = network_status.async_communicate() + if ret != 0: + g.log.error("Failed to perform network interface ops") - ret, _, _ = reboot.async_communicate() - self.assertEqual(ret, 255, "reboot failed") - - # Validate if rebooted node is online or not - count = 0 - while count < 40: - sleep(15) - ret, _ = are_nodes_online(self.servers[1]) - if ret: - g.log.info("Node %s is online", self.servers[1]) - break - count += 1 - self.assertTrue(ret, "Node in test not yet online") - - # check if glusterd is running post reboot - ret = wait_for_glusterd_to_start(self.servers[1], - glusterd_start_wait_timeout=120) - self.assertTrue(ret, "Glusterd service is not running post reboot") - - # peer probe the node must pass + # Peer probe the node must pass g.log.info("peer probing node %s", self.servers[1]) ret, _, err = peer_probe(self.mnode, self.servers[1]) self.assertEqual(ret, 0, "Peer probe has failed unexpectedly with " "%s " % err) - # checking if core file created in "/", "/tmp" and "/var/log/core" + # Checking if core file created in "/", "/tmp" and "/var/log/core" ret = is_core_file_created(self.servers, test_timestamp) self.assertTrue(ret, "core file found") diff --git a/tests/functional/glusterd/test_verify_df_output.py b/tests/functional/glusterd/test_verify_df_output.py new file mode 100644 index 000000000..4eac9193b --- /dev/null +++ b/tests/functional/glusterd/test_verify_df_output.py @@ -0,0 +1,171 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, + runs_on) +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.io.utils import validate_io_procs +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import (replace_brick_from_volume, + shrink_volume, expand_volume) +from glustolibs.gluster.brick_libs import get_all_bricks + + +@runs_on([['distributed-dispersed', 'distributed-replicated', + 'distributed-arbiter', 'dispersed', 'replicated', + 'arbiter'], + ['glusterfs']]) +class VerifyDFWithReplaceBrick(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + if not upload_scripts(cls.clients, [cls.script_upload_path]): + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + if not self.setup_volume_and_mount_volume(mounts=self.mounts): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def _perform_io_and_validate(self): + """ Performs IO on the mount points and validates it""" + all_mounts_procs, count = [], 1 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 2 " + "--dir-length 3 --max-num-of-dirs 3 " + "--num-of-files 2 %s" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + count = count + 10 + + # Validating IO's on mount point and waiting to complete + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("Successfully validated IO's") + + def _replace_bricks_and_wait_for_heal_completion(self): + """ Replaces all the bricks and waits for the heal to complete""" + existing_bricks = get_all_bricks(self.mnode, self.volname) + for brick_to_replace in existing_bricks: + ret = replace_brick_from_volume(self.mnode, self.volname, + self.servers, + self.all_servers_info, + src_brick=brick_to_replace) + self.assertTrue(ret, + "Replace of %s failed" % brick_to_replace) + g.log.info("Replace of brick %s successful for volume %s", + brick_to_replace, self.volname) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + g.log.info('Heal has completed successfully') + + def _get_mount_size_from_df_h_output(self): + """ Extracts the mount size from the df -h output""" + + split_cmd = " | awk '{split($0,a,\" \");print a[2]}' | sed 's/.$//'" + cmd = ("cd {};df -h | grep {} {}".format(self.mounts[0].mountpoint, + self.volname, split_cmd)) + ret, mount_size, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "Failed to extract mount size") + return float(mount_size.split("\n")[0]) + + def test_verify_df_output_when_brick_replaced(self): + """ + - Take the output of df -h. + - Replace any one brick for the volumes. + - Wait till the heal is completed + - Repeat steps 1, 2 and 3 for all bricks for all volumes. + - Check if there are any inconsistencies in the output of df -h + - Remove bricks from volume and check output of df -h + - Add bricks to volume and check output of df -h + """ + + # Perform some IO on the mount point + self._perform_io_and_validate() + + # Get the mount size from df -h output + initial_mount_size = self._get_mount_size_from_df_h_output() + + # Replace all the bricks and wait till the heal completes + self._replace_bricks_and_wait_for_heal_completion() + + # Get df -h output after brick replace + mount_size_after_replace = self._get_mount_size_from_df_h_output() + + # Verify the mount point size remains the same after brick replace + self.assertEqual(initial_mount_size, mount_size_after_replace, + "The mount sizes before and after replace bricks " + "are not same") + + # Add bricks + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info, force=True) + self.assertTrue(ret, "Failed to add-brick to volume") + + # Get df -h output after volume expand + mount_size_after_expand = self._get_mount_size_from_df_h_output() + + # Verify df -h output returns greater value + self.assertGreater(mount_size_after_expand, initial_mount_size, + "The mount size has not increased after expanding") + + # Remove bricks + ret = shrink_volume(self.mnode, self.volname, force=True) + self.assertTrue(ret, ("Remove brick operation failed on " + "%s", self.volname)) + g.log.info("Remove brick operation is successful on " + "volume %s", self.volname) + + # Get df -h output after volume shrink + mount_size_after_shrink = self._get_mount_size_from_df_h_output() + + # Verify the df -h output returns smaller value + self.assertGreater(mount_size_after_expand, mount_size_after_shrink, + "The mount size has not reduced after shrinking") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts): + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() |