From 54274586953a19b0962980e19908e0f00f2f83c1 Mon Sep 17 00:00:00 2001 From: karthik-us Date: Mon, 25 Jun 2018 17:00:26 +0530 Subject: afr: Test data split-brain resolution using heal CLI Change-Id: I525f50a42e29270d9ac445d62e12c7e7e25a7ae3 Signed-off-by: karthik-us --- .../afr/heal/test_data_split_brain_resolution.py | 270 +++++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 tests/functional/afr/heal/test_data_split_brain_resolution.py (limited to 'tests') diff --git a/tests/functional/afr/heal/test_data_split_brain_resolution.py b/tests/functional/afr/heal/test_data_split_brain_resolution.py new file mode 100644 index 000000000..e1284cad6 --- /dev/null +++ b/tests/functional/afr/heal/test_data_split_brain_resolution.py @@ -0,0 +1,270 @@ +# Copyright (C) 2017-2018 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# pylint: disable=too-many-statements, too-many-locals + +""" Description: + Test cases in this module tests whether heal command for resolving + split-brains will resolve all the files in data-split brains by using + one of the method (bigger-file/latest-mtime/source-brick). +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain) +from glustolibs.gluster.brick_libs import (get_all_bricks, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + are_bricks_online) + + +@runs_on([['replicated'], + ['glusterfs']]) +class HealDataSplitBrain(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + + # Calling GlusterBaseClass setUpClass + GlusterBaseClass.setUpClass.im_func(cls) + + # Override Volume + if cls.volume_type == "replicated": + cls.volume['voltype'] = { + 'type': 'replicated', + 'replica_count': 2, + 'transport': 'tcp'} + + # Upload io scripts for running IO on mounts + g.log.info("Upload io scripts to clients %s for running IO on " + "mounts", cls.clients) + script_local_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, script_local_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts " + "to clients %s" % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + # Setup Volume and Mount Volume + g.log.info("Starting to Setup Volume and Mount Volume") + ret = cls.setup_volume_and_mount_volume(cls.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + @classmethod + def tearDownClass(cls): + + # Cleanup Volume + g.log.info("Starting to clean up Volume %s", cls.volname) + ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + if not ret: + raise ExecutionError("Failed to create volume") + g.log.info("Successful in cleaning up Volume %s", cls.volname) + + GlusterBaseClass.tearDownClass.im_func(cls) + + def verify_brick_arequals(self): + g.log.info("Fetching bricks for the volume: %s", self.volname) + bricks_list = get_all_bricks(self.mnode, self.volname) + g.log.info('Getting arequal on bricks...') + arequal_0 = 0 + for brick in bricks_list: + g.log.info('Getting arequal on bricks %s...', brick) + node, brick_path = brick.split(':') + command = ('arequal-checksum -p %s ' + '-i .glusterfs -i .landfill -i .trashcan' + % brick_path) + ret, arequal, _ = g.run(node, command) + self.assertFalse(ret, 'Failed to get arequal on brick %s' + % brick) + g.log.info('Getting arequal for %s is successful', brick) + brick_total = arequal.splitlines()[-1].split(':')[-1] + if arequal_0 == 0: + arequal_0 = brick_total + else: + self.assertEqual(brick_total, arequal_0, 'Arequal for %s and ' + '%s are not equal' % (bricks_list[0], brick)) + g.log.info('All arequals are equal on all the bricks') + + def test_data_split_brain_resolution(self): + # Setting options + g.log.info('Setting options...') + options = {"metadata-self-heal": "off", + "entry-self-heal": "off", + "data-self-heal": "off"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, 'Failed to set options %s' % options) + g.log.info("Successfully set %s for volume %s", + options, self.volname) + + # Creating files and directories on client side + g.log.info('Creating files and directories...') + cmd = ("for i in `seq 1 10`; do mkdir %s/dir.$i; for j in `seq 1 5`;" + "do dd if=/dev/urandom of=%s/dir.$i/file.$j bs=1K count=1;" + "done; dd if=/dev/urandom of=%s/file.$i bs=1K count=1; done" + % (self.mounts[0].mountpoint, self.mounts[0].mountpoint, + self.mounts[0].mountpoint)) + + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Creating files and directories failed") + g.log.info("Files & directories created successfully") + + # Check arequals for all the bricks + g.log.info('Getting arequal before getting bricks offline...') + self.verify_brick_arequals() + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + + # Set option self-heal-daemon to OFF + g.log.info('Setting option self-heal-daemon to off...') + options = {"self-heal-daemon": "off"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, 'Failed to set options %s' % options) + g.log.info("Option 'self-heal-daemon' is set to 'off' successfully") + + bricks_list = get_all_bricks(self.mnode, self.volname) + + # Bring brick1 offline + g.log.info('Bringing brick %s offline', bricks_list[0]) + ret = bring_bricks_offline(self.volname, bricks_list[0]) + self.assertTrue(ret, 'Failed to bring bricks %s offline' + % bricks_list[0]) + + ret = are_bricks_offline(self.mnode, self.volname, + [bricks_list[0]]) + self.assertTrue(ret, 'Brick %s is not offline' + % bricks_list[0]) + g.log.info('Bringing brick %s offline is successful', + bricks_list[0]) + + # Modify the contents of the files + cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`;" + "do dd if=/dev/urandom of=%s/dir.$i/file.$j bs=1M count=1;" + "done; dd if=/dev/urandom of=%s/file.$i bs=1K count=1; done" + % (self.mounts[0].mountpoint, self.mounts[0].mountpoint)) + + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Updating file contents failed") + g.log.info("File contents updated successfully") + + # Bricng brick1 online and check the status + g.log.info('Bringing brick %s online', bricks_list[0]) + ret = bring_bricks_online(self.mnode, self.volname, + [bricks_list[0]]) + self.assertTrue(ret, 'Failed to bring brick %s online' % + bricks_list[0]) + g.log.info('Bringing brick %s online is successful', bricks_list[0]) + + g.log.info("Verifying if brick %s is online", bricks_list[0]) + ret = are_bricks_online(self.mnode, self.volname, bricks_list) + self.assertTrue(ret, ("Brick %s did not come up", bricks_list[0])) + g.log.info("Brick %s has come online.", bricks_list[0]) + + # Bring brick2 offline + g.log.info('Bringing brick %s offline', bricks_list[1]) + ret = bring_bricks_offline(self.volname, bricks_list[1]) + self.assertTrue(ret, 'Failed to bring bricks %s offline' + % bricks_list[1]) + + ret = are_bricks_offline(self.mnode, self.volname, + [bricks_list[1]]) + self.assertTrue(ret, 'Brick %s is not offline' + % bricks_list[1]) + g.log.info('Bringing brick %s offline is successful', + bricks_list[1]) + + # Modify the contents of the files + cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`;" + "do dd if=/dev/urandom of=%s/dir.$i/file.$j bs=1M count=2;" + "done; dd if=/dev/urandom of=%s/file.$i bs=1K count=2; done" + % (self.mounts[0].mountpoint, self.mounts[0].mountpoint)) + + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Updating file contents failed") + g.log.info("File contents updated successfully") + + # Bricng brick2 online and check the status + g.log.info('Bringing brick %s online', bricks_list[1]) + ret = bring_bricks_online(self.mnode, self.volname, + [bricks_list[1]]) + self.assertTrue(ret, 'Failed to bring brick %s online' % + bricks_list[1]) + g.log.info('Bringing brick %s online is successful', bricks_list[1]) + + g.log.info("Verifying if brick %s is online", bricks_list[1]) + ret = are_bricks_online(self.mnode, self.volname, bricks_list) + self.assertTrue(ret, ("Brick %s did not come up", bricks_list[1])) + g.log.info("Brick %s has come online.", bricks_list[1]) + + # Set option self-heal-daemon to ON + g.log.info('Setting option self-heal-daemon to on...') + options = {"self-heal-daemon": "on"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, 'Failed to set options %s' % options) + g.log.info("Option 'self-heal-daemon' is set to 'on' successfully") + + g.log.info("Checking if files are in split-brain") + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertTrue(ret, "Unable to create split-brain scenario") + g.log.info("Successfully created split brain scenario") + + g.log.info("Resolving split-brain by using the source-brick option " + "by choosing second brick as source for all the files") + node, _ = bricks_list[1].split(':') + command = ("gluster v heal " + self.volname + " split-brain " + "source-brick " + bricks_list[1]) + ret, _, _ = g.run(node, command) + self.assertEqual(ret, 0, "Command execution not successful") + + # triggering heal + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, "Heal not triggered") + + # waiting for heal to complete + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=120) + self.assertTrue(ret, "Heal not completed") + + # Try accessing the file content from the mount + cmd = ("for i in `seq 1 10`; do cat %s/file.$i > /dev/null;" + "for j in `seq 1 5` ; do cat %s/dir.$i/file.$j > /dev/null;" + "done ; done" + % (self.mounts[0].mountpoint, self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Unable to access the file contents") + g.log.info("File contents are accessible") + + # checking if file is in split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, "File still in split-brain") + g.log.info("Successfully resolved split brain situation using " + "CLI based resolution") + + # Check arequals for all the bricks + g.log.info('Getting arequal for all the bricks after heal...') + self.verify_brick_arequals() + g.log.info('Getting arequal after heal is successful') -- cgit