diff options
Diffstat (limited to 'tests/functional/afr/heal')
-rw-r--r-- | tests/functional/afr/heal/test_heal_for_conservative_merge_with_two_bricks_blame.py | 175 |
1 files changed, 175 insertions, 0 deletions
diff --git a/tests/functional/afr/heal/test_heal_for_conservative_merge_with_two_bricks_blame.py b/tests/functional/afr/heal/test_heal_for_conservative_merge_with_two_bricks_blame.py new file mode 100644 index 000000000..163596bb7 --- /dev/null +++ b/tests/functional/afr/heal/test_heal_for_conservative_merge_with_two_bricks_blame.py @@ -0,0 +1,175 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from time import sleep + +from glusto.core import Glusto as g +from glustolibs.gluster.brick_libs import (get_all_bricks, are_bricks_offline, + bring_bricks_offline, + get_online_bricks_list, + are_bricks_online) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.gluster_init import restart_glusterd +from glustolibs.gluster.glusterfile import set_fattr, get_fattr +from glustolibs.gluster.heal_libs import (is_volume_in_split_brain, + monitor_heal_completion) +from glustolibs.gluster.lib_utils import collect_bricks_arequal + + +@runs_on([['replicated'], ['glusterfs']]) +class TestHealForConservativeMergeWithTwoBricksBlame(GlusterBaseClass): + + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup volume and mount it. + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + # Unmount and cleanup the volume + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Unable to unmount and cleanup volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _bring_brick_offline_and_check(self, brick): + """Brings brick offline an checks if it is offline or not""" + ret = bring_bricks_offline(self.volname, [brick]) + self.assertTrue(ret, "Unable to bring brick: {} offline".format(brick)) + + # Validate the brick is offline + ret = are_bricks_offline(self.mnode, self.volname, [brick]) + self.assertTrue(ret, "Brick:{} is still online".format(brick)) + + def _get_fattr_for_the_brick(self, brick): + """Get xattr of trusted.afr.volname-client-0 for the given brick""" + host, fqpath = brick.split(":") + fqpath = fqpath + "/dir1" + fattr = "trusted.afr.{}-client-0".format(self.volname) + return get_fattr(host, fqpath, fattr, encode="hex") + + def _check_peers_status(self): + """Validates peers are connected or not""" + count = 0 + while count < 4: + if self.validate_peers_are_connected(): + return + sleep(5) + count += 1 + self.fail("Peers are not in connected state") + + def test_heal_for_conservative_merge_with_two_bricks_blame(self): + """ + 1) Create 1x3 volume and fuse mount the volume + 2) On mount created a dir dir1 + 3) Pkill glusterfsd on node n1 (b2 on node2 and b3 and node3 up) + 4) touch f{1..10} on the mountpoint + 5) b2 and b3 xattrs would be blaming b1 as files are created while + b1 is down + 6) Reset the b3 xattrs to NOT blame b1 by using setattr + 7) Now pkill glusterfsd of b2 on node2 + 8) Restart glusterd on node1 to bring up b1 + 9) Now bricks b1 online , b2 down, b3 online + 10) touch x{1..10} under dir1 itself + 11) Again reset xattr on node3 of b3 so that it doesn't blame b2, + as done for b1 in step 6 + 12) Do restart glusterd on node2 hosting b2 to bring all bricks online + 13) Check for heal info, split-brain and arequal for the bricks + """ + # pylint: disable=too-many-locals + # Create dir `dir1/` on mountpont + path = self.mounts[0].mountpoint + "/dir1" + ret = mkdir(self.mounts[0].client_system, path, parents=True) + self.assertTrue(ret, "Directory {} creation failed".format(path)) + + all_bricks = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(all_bricks, "Unable to fetch bricks of volume") + brick1, brick2, brick3 = all_bricks + + # Bring first brick offline + self._bring_brick_offline_and_check(brick1) + + # touch f{1..10} files on the mountpoint + cmd = ("cd {mpt}; for i in `seq 1 10`; do touch f$i" + "; done".format(mpt=path)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Unable to create files on mountpoint") + + # Check b2 and b3 xattrs are blaming b1 and are same + self.assertEqual(self._get_fattr_for_the_brick(brick2), + self._get_fattr_for_the_brick(brick3), + "Both the bricks xattrs are not blaming " + "brick: {}".format(brick1)) + + # Reset the xattrs of dir1 on b3 for brick b1 + first_xattr_to_reset = "trusted.afr.{}-client-0".format(self.volname) + xattr_value = "0x000000000000000000000000" + host, brick_path = brick3.split(":") + brick_path = brick_path + "/dir1" + ret = set_fattr(host, brick_path, first_xattr_to_reset, xattr_value) + self.assertTrue(ret, "Unable to set xattr for the directory") + + # Kill brick2 on the node2 + self._bring_brick_offline_and_check(brick2) + + # Restart glusterd on node1 to bring the brick1 online + self.assertTrue(restart_glusterd([brick1.split(":")[0]]), "Unable to " + "restart glusterd") + # checking for peer status post glusterd restart + self._check_peers_status() + + # Check if the brick b1 on node1 is online or not + online_bricks = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(online_bricks, "Unable to fetch online bricks") + self.assertIn(brick1, online_bricks, "Brick:{} is still offline after " + "glusterd restart".format(brick1)) + + # Create 10 files under dir1 naming x{1..10} + cmd = ("cd {mpt}; for i in `seq 1 10`; do touch x$i" + "; done".format(mpt=path)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Unable to create files on mountpoint") + + # Reset the xattrs from brick3 on to brick2 + second_xattr_to_reset = "trusted.afr.{}-client-1".format(self.volname) + ret = set_fattr(host, brick_path, second_xattr_to_reset, xattr_value) + self.assertTrue(ret, "Unable to set xattr for the directory") + + # Bring brick2 online + self.assertTrue(restart_glusterd([brick2.split(":")[0]]), "Unable to " + "restart glusterd") + self._check_peers_status() + + self.assertTrue(are_bricks_online(self.mnode, self.volname, [brick2])) + + # Check are there any files in split-brain and heal completion + self.assertFalse(is_volume_in_split_brain(self.mnode, self.volname), + "Some files are in split brain for " + "volume: {}".format(self.volname)) + self.assertTrue(monitor_heal_completion(self.mnode, self.volname), + "Conservative merge of files failed") + + # Check arequal checksum of all the bricks is same + ret, arequal_from_the_bricks = collect_bricks_arequal(all_bricks) + self.assertTrue(ret, "Arequal is collected successfully across the" + " bricks in the subvol {}".format(all_bricks)) + self.assertEqual(len(set(arequal_from_the_bricks)), 1, "Arequal is " + "same on all the bricks in the subvol") |