diff options
Diffstat (limited to 'tests/functional')
-rw-r--r-- | tests/functional/glusterd/test_rebalance_hang.py | 201 |
1 files changed, 201 insertions, 0 deletions
diff --git a/tests/functional/glusterd/test_rebalance_hang.py b/tests/functional/glusterd/test_rebalance_hang.py new file mode 100644 index 000000000..8dde6d7d5 --- /dev/null +++ b/tests/functional/glusterd/test_rebalance_hang.py @@ -0,0 +1,201 @@ +# Copyright (C) 2018 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.volume_ops import (volume_create, volume_start, + get_volume_list, get_volume_status) +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.volume_libs import (cleanup_volume) +from glustolibs.gluster.peer_ops import (peer_probe, peer_detach, + peer_probe_servers, + peer_detach_servers, + nodes_from_pool_list) +from glustolibs.gluster.lib_utils import form_bricks_list +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.mount_ops import mount_volume, umount_volume +from glustolibs.io.utils import validate_io_procs +from glustolibs.gluster.gluster_init import (start_glusterd, stop_glusterd, + is_glusterd_running) + + +@runs_on([['distributed'], ['glusterfs']]) +class TestRebalanceHang(GlusterBaseClass): + + def setUp(self): + + # Performing peer detach + ret = peer_detach_servers(self.mnode, self.servers) + if not ret: + raise ExecutionError("Failed to detach servers %s" + % self.servers) + g.log.info("Peer detach SUCCESSFUL.") + GlusterBaseClass.setUp.im_func(self) + + def tearDown(self): + + # UnMount Volume + g.log.info("Starting to Unmount Volume %s", self.volname) + ret = umount_volume(self.mounts[0].client_system, + self.mounts[0].mountpoint, mtype=self.mount_type) + self.assertTrue(ret, ("Failed to Unmount Volume %s" % self.volname)) + g.log.info("Successfully Unmounted Volume %s", self.volname) + + # Clean up all volumes and peer probe to form cluster + vol_list = get_volume_list(self.mnode) + if vol_list is not None: + for volume in vol_list: + ret = cleanup_volume(self.mnode, volume) + if not ret: + raise ExecutionError("Failed to cleanup volume") + g.log.info("Volume deleted successfully : %s", volume) + + # Peer probe detached servers + pool = nodes_from_pool_list(self.mnode) + for node in pool: + peer_detach(self.mnode, node) + ret = peer_probe_servers(self.mnode, self.servers) + if not ret: + raise ExecutionError("Failed to probe peer " + "servers %s" % self.servers) + g.log.info("Peer probe success for detached " + "servers %s", self.servers) + GlusterBaseClass.tearDown.im_func(self) + + def test_rebalance_hang(self): + """ + In this test case: + 1. Trusted storage Pool of 2 nodes + 2. Create a distributed volumes with 2 bricks + 3. Start the volume + 4. Mount the volume + 5. Add some data file on mount + 6. Start rebalance with force + 7. kill glusterd on 2nd node + 8. Issue volume related command + """ + + # pylint: disable=too-many-statements + my_server_info = { + self.servers[0]: self.all_servers_info[self.servers[0]] + } + my_servers = self.servers[0:2] + index = 1 + ret, _, _ = peer_probe(self.servers[0], self.servers[index]) + self.assertEqual(ret, 0, ("peer probe from %s to %s is failed", + self.servers[0], self.servers[index])) + g.log.info("peer probe is success from %s to " + "%s", self.servers[0], self.servers[index]) + key = self.servers[index] + my_server_info[key] = self.all_servers_info[key] + + self.volname = "testvol" + bricks_list = form_bricks_list(self.mnode, self.volname, 2, + my_servers, + my_server_info) + g.log.info("Creating a volume %s ", self.volname) + ret, _, _ = volume_create(self.mnode, self.volname, + bricks_list, force=False) + self.assertEqual(ret, 0, ("Unable" + "to create volume %s" % self.volname)) + g.log.info("Volume created successfuly %s", self.volname) + + ret, _, _ = volume_start(self.mnode, self.volname, False) + self.assertEqual(ret, 0, ("Failed to start the " + "volume %s", self.volname)) + g.log.info("Get all the bricks of the volume") + bricks_list = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(bricks_list, "Failed to get the brick list") + g.log.info("Successfully got the list of bricks of volume") + + # Mounting a volume + ret, _, _ = mount_volume(self.volname, mtype=self.mount_type, + mpoint=self.mounts[0].mountpoint, + mserver=self.mnode, + mclient=self.mounts[0].client_system) + self.assertEqual(ret, 0, ("Volume %s is not mounted") % self.volname) + g.log.info("Volume mounted sucessfully : %s", self.volname) + + self.all_mounts_procs = [] + # Creating files + command = ("cd %s/ ; " + "for i in `seq 1 10` ; " + "do mkdir l1_dir.$i ; " + "for j in `seq 1 5` ; " + "do mkdir l1_dir.$i/l2_dir.$j ; " + "for k in `seq 1 10` ; " + "do dd if=/dev/urandom of=l1_dir.$i/l2_dir.$j/test.$k " + "bs=128k count=$k ; " + "done ; " + "done ; " + "done ; " + % (self.mounts[0].mountpoint)) + + proc = g.run_async(self.mounts[0].client_system, command, + user=self.mounts[0].user) + self.all_mounts_procs.append(proc) + self.io_validation_complete = False + # Validate IO + g.log.info("Wait for IO to complete and validate IO ...") + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.io_validation_complete = True + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + g.log.info("Starting rebalance with force on the volume") + ret, _, _ = rebalance_start(self.mnode, self.volname, False, True) + self.assertEqual(ret, 0, ("Failed to start rebalance for volume %s", + self.volname)) + g.log.info("Successfully rebalance on the volume %s", + self.volname) + + ret = stop_glusterd(self.servers[1]) + self.assertTrue(ret, "Failed to stop glusterd on one of the node") + ret = is_glusterd_running(self.servers[1]) + self.assertNotEqual(ret, 0, ("Glusterd is not stopped on servers %s", + self.servers[1])) + g.log.info("Glusterd stop on the nodes : %s succeeded", + self.servers[1]) + + # Wait for fix-layout to complete + g.log.info("Waiting for rebalance to complete") + ret = wait_for_rebalance_to_complete(self.mnode, self.volname) + self.assertTrue(ret, ("Rebalance is not yet complete on the volume " + "%s", self.volname)) + g.log.info("Rebalance is successfully complete on the volume %s", + self.volname) + + vol_status = get_volume_status(self.mnode, self.volname) + self.assertIsNotNone(vol_status, "Failed to get volume " + "status for %s" % self.volname) + + # Start glusterd on the node where it is stopped + ret = start_glusterd(self.servers[1]) + self.assertTrue(ret, "glusterd start on the node failed") + count = 0 + while count < 60: + ret = is_glusterd_running(self.servers[1]) + if not ret: + break + sleep(2) + count += 1 + self.assertEqual(ret, 0, "glusterd is not running on %s" + % self.servers[1]) + g.log.info("Glusterd start on the nodes : %s " + "succeeded", self.servers[1]) |