From 28b490b73f1e673ef6f7518c4e4640871ac699fc Mon Sep 17 00:00:00 2001 From: Rajesh Madaka Date: Tue, 12 Jun 2018 15:59:31 +0530 Subject: Test replace brick when quorum not met -> Create volume -> Set quorum type -> Set quorum ratio to 95% -> Start the volume -> Stop the glusterd on one node -> Now quorum is in not met condition -> Check all bricks went to offline or not -> Perform replace brick operation -> Start glusterd on same node which is already stopped -> Check all bricks are in online or not -> Verify in vol info that old brick not replaced with new brick Change-Id: Iab84df9449feeaba66ff0df2d0acbddb6b4e7591 Signed-off-by: Rajesh Madaka --- .../glusterd/test_replace_brick_quorum_not_met.py | 225 +++++++++++++++++++++ 1 file changed, 225 insertions(+) create mode 100644 tests/functional/glusterd/test_replace_brick_quorum_not_met.py (limited to 'tests') diff --git a/tests/functional/glusterd/test_replace_brick_quorum_not_met.py b/tests/functional/glusterd/test_replace_brick_quorum_not_met.py new file mode 100644 index 000000000..8af731048 --- /dev/null +++ b/tests/functional/glusterd/test_replace_brick_quorum_not_met.py @@ -0,0 +1,225 @@ +# Copyright (C) 2018 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# pylint: disable= too-many-statements +""" Description: + Test replace brick when quorum not met +""" + +import random +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.lib_utils import form_bricks_list +from glustolibs.gluster.volume_ops import (set_volume_options, volume_start, + volume_create, get_volume_status) +from glustolibs.gluster.gluster_init import (stop_glusterd, + is_glusterd_running, + start_glusterd) +from glustolibs.gluster.brick_libs import (are_bricks_offline, + are_bricks_online, get_all_bricks) +from glustolibs.gluster.brick_ops import replace_brick + + +@runs_on([['distributed-replicated'], ['glusterfs']]) +class TestReplaceBrickWhenQuorumNotMet(GlusterBaseClass): + + def tearDown(self): + """ + tearDown for every test + """ + ret = is_glusterd_running(self.servers) + if ret: + ret = start_glusterd(self.servers) + if not ret: + raise ExecutionError("Glusterd not started on some of " + "the servers") + # checking for peer status from every node + count = 0 + while count < 80: + ret = self.validate_peers_are_connected() + if ret: + break + sleep(2) + count += 1 + + if not ret: + raise ExecutionError("Servers are not in peer probed state") + + # stopping the volume and Cleaning up the volume + ret = self.cleanup_volume() + if not ret: + raise ExecutionError("Failed Cleanup the Volume %s" % self.volname) + g.log.info("Volume deleted successfully : %s", self.volname) + + # Removing brick directories + if not self.replace_brick_failed: + node, brick_path = self.random_brick.split(r':') + cmd = "rm -rf " + brick_path + ret, _, _ = g.run(node, cmd) + if ret: + raise ExecutionError("Failed to delete the brick dir's") + + # Calling GlusterBaseClass tearDown + GlusterBaseClass.tearDown.im_func(self) + + def test_replace_brick_quorum(self): + + ''' + -> Create volume + -> Set quorum type + -> Set quorum ratio to 95% + -> Start the volume + -> Stop the glusterd on one node + -> Now quorum is in not met condition + -> Check all bricks went to offline or not + -> Perform replace brick operation + -> Start glusterd on same node which is already stopped + -> Check all bricks are in online or not + -> Verify in vol info that old brick not replaced with new brick + ''' + + # Forming brick list, 6 bricks for creating volume, 7th brick for + # performing replace brick operation + brick_list = form_bricks_list(self.mnode, self.volname, 7, + self.servers, self.all_servers_info) + + # Create Volume + ret, _, _ = volume_create(self.mnode, self.volname, + brick_list[0:6], replica_count=3) + self.assertEqual(ret, 0, "Failed to create volume %s" % self.volname) + g.log.info("Volume created successfully %s", self.volname) + + # Enabling server quorum + ret = set_volume_options(self.mnode, self.volname, + {'cluster.server-quorum-type': 'server'}) + self.assertTrue(ret, "Failed to set server quorum on volume %s" + % self.volname) + g.log.info("Able to set server quorum successfully on volume %s", + self.volname) + + # Setting Quorum ratio in percentage + ret = set_volume_options(self.mnode, 'all', + {'cluster.server-quorum-ratio': '95%'}) + self.assertTrue(ret, "Failed to set server quorum ratio on %s" + % self.servers) + g.log.info("Able to set server quorum ratio successfully on %s", + self.servers) + + # Start the volume + ret, _, _ = volume_start(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to start volume %s" % self.volname) + g.log.info("Volume started successfully %s", self.volname) + + # Stop glusterd on one of the node + random_server = random.choice(self.servers[1:]) + ret = stop_glusterd(random_server) + self.assertTrue(ret, "Failed to stop glusterd for %s" + % random_server) + g.log.info("Glusterd stopped successfully on server %s", + random_server) + + # Checking whether glusterd is running or not + ret = is_glusterd_running(random_server) + self.assertEqual(ret, 1, "Glusterd is still running on the node %s " + "where glusterd stopped" + % random_server) + g.log.info("Glusterd is not running on the server %s", + random_server) + + # Verifying node count in volume status after glusterd stopped + # on one of the server, Its not possible to check the brick status + # immediately in volume status after glusterd stop + count = 0 + while count < 100: + vol_status = get_volume_status(self.mnode, self.volname) + servers_count = len(vol_status[self.volname].keys()) + if servers_count == 5: + break + sleep(2) + count += 1 + + # creating brick list from volume status + offline_bricks = [] + vol_status = get_volume_status(self.mnode, self.volname) + for node in vol_status[self.volname]: + for brick_path in vol_status[self.volname][node]: + if brick_path != 'Self-heal Daemon': + offline_bricks.append(':'.join([node, brick_path])) + + # Checking bricks are offline or not with quorum ratio(95%) + ret = are_bricks_offline(self.mnode, self.volname, offline_bricks) + self.assertTrue(ret, "Bricks are online when quorum is in not met " + "condition for %s" % self.volname) + g.log.info("Bricks are offline when quorum is in not met " + "condition for %s", self.volname) + + # Getting random brick from offline brick list + self.random_brick = random.choice(offline_bricks) + + # Performing replace brick commit force when quorum not met + self.replace_brick_failed = False + ret, _, _ = replace_brick(self.mnode, self.volname, self.random_brick, + brick_list[6]) + self.assertNotEqual(ret, 0, "Replace brick should fail when quorum is " + "in not met condition but replace brick " + "success on %s" % self.volname) + g.log.info("Failed to replace brick when quorum is in not met " + "condition %s", self.volname) + self.replace_brick_failed = True + + # Start glusterd on one of the node + ret = start_glusterd(random_server) + self.assertTrue(ret, "Failed to start glusterd on server %s" + % random_server) + g.log.info("Glusterd started successfully on server %s", + random_server) + + # Verifying node count in volume status after glusterd started + # on one of the servers, Its not possible to check the brick status + # immediately in volume status after glusterd start + count = 0 + while count < 100: + vol_status = get_volume_status(self.mnode, self.volname) + servers_count = len(vol_status[self.volname].keys()) + if servers_count == 6: + break + sleep(2) + count += 1 + + # Checking bricks are online or not + count = 0 + while count < 100: + ret = are_bricks_online(self.mnode, self.volname, + brick_list[0:6]) + if ret: + break + sleep(2) + count += 1 + self.assertTrue(ret, "All bricks are not online for %s" + % self.volname) + g.log.info("All bricks are online for volume %s", self.volname) + + # Comparing brick lists of before and after performing replace brick + # operation + after_brick_list = get_all_bricks(self.mnode, self.volname) + self.assertListEqual(after_brick_list, brick_list[0:6], + "Bricks are not same before and after performing " + "replace brick operation for volume %s" + % self.volname) + g.log.info("Bricks are same before and after performing replace " + "brick operation for volume %s", self.volname) -- cgit