From b585613e73ef8a2906e21d03e2050a4308c5b772 Mon Sep 17 00:00:00 2001
From: Vitalii Koriakov <vkoriako@redhat.com>
Date: Fri, 15 Jun 2018 14:41:21 +0300
Subject: Resolving meta-data split-brain from client using extended attributes

Change-Id: I2ba674b8ea97964040f2e7d47a169c1e41808116
Signed-off-by: Vitalii Koriakov <vkoriako@redhat.com>
---
 ...ng_meta_data_split_brain_extended_attributes.py | 257 +++++++++++++++++++++
 1 file changed, 257 insertions(+)
 create mode 100644 tests/functional/arbiter/test_resolving_meta_data_split_brain_extended_attributes.py

(limited to 'tests')

diff --git a/tests/functional/arbiter/test_resolving_meta_data_split_brain_extended_attributes.py b/tests/functional/arbiter/test_resolving_meta_data_split_brain_extended_attributes.py
new file mode 100644
index 000000000..e6f68c0cf
--- /dev/null
+++ b/tests/functional/arbiter/test_resolving_meta_data_split_brain_extended_attributes.py
@@ -0,0 +1,257 @@
+#  Copyright (C) 2015-2018  Red Hat, Inc. <http://www.redhat.com>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License along
+#  with this program; if not, write to the Free Software Foundation, Inc.,
+#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on)
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.heal_ops import trigger_heal
+from glustolibs.gluster.volume_libs import get_subvols
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+                                           bring_bricks_online,
+                                           are_bricks_offline)
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+                                          is_heal_complete,
+                                          is_volume_in_split_brain)
+from glustolibs.io.utils import (validate_io_procs,
+                                 list_all_files_and_dirs_mounts,
+                                 wait_for_io_to_complete)
+
+
+@runs_on([['distributed-replicated'],
+          ['glusterfs', 'nfs', 'cifs']])
+class TestArbiterSelfHeal(GlusterBaseClass):
+    """
+    Description:
+        Arbiter Test cases related to
+        healing in default configuration of the volume
+    """
+
+    def setUp(self):
+        # Calling GlusterBaseClass setUp
+        GlusterBaseClass.setUp.im_func(self)
+
+        # Setup Volumes
+        if self.volume_type == "distributed-replicated":
+            # Redefine distributed-replicated volume
+            self.volume['voltype'] = {
+                'type': 'distributed-replicated',
+                'replica_count': 3,
+                'dist_count': 2,
+                'arbiter_count': 1,
+                'transport': 'tcp'}
+
+        self.all_mounts_procs = []
+        self.io_validation_complete = False
+
+        # Setup Volume and Mount Volume
+        g.log.info("Starting to Setup Volume and Mount Volume")
+        ret = self.setup_volume_and_mount_volume(mounts=self.mounts)
+        if not ret:
+            raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+        g.log.info("Successful in Setup Volume and Mount Volume")
+
+    def tearDown(self):
+        """
+        If test method failed before validating IO, tearDown waits for the
+        IO's to complete and checks for the IO exit status
+
+        Cleanup and umount volume
+        """
+        if not self.io_validation_complete:
+            g.log.info("Wait for IO to complete as IO validation did not "
+                       "succeed in test method")
+            ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts)
+            if not ret:
+                raise ExecutionError("IO failed on some of the clients")
+            g.log.info("IO is successful on all mounts")
+
+            # List all files and dirs created
+            g.log.info("List all files and directories:")
+            ret = list_all_files_and_dirs_mounts(self.mounts)
+            if not ret:
+                raise ExecutionError("Failed to list all files and dirs")
+            g.log.info("Listing all files and directories is successful")
+
+        # Cleanup and umount volume
+        g.log.info("Starting to Unmount Volume and Cleanup Volume")
+        ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
+        if not ret:
+            raise ExecutionError("Failed to umount the vol & cleanup Volume")
+        g.log.info("Successful in umounting the volume and Cleanup")
+
+        # Calling GlusterBaseClass teardown
+        GlusterBaseClass.tearDown.im_func(self)
+
+    def test_resolving_meta_data(self):
+        """
+        - Create a file test_file.txt
+        - Find out which brick the file resides on and kill arbiter brick
+        in the replica pair
+        - Modify the permissions of the file
+        - Bring back the killed brick
+        - Kill the other brick in the replica pair
+        - Modify the permissions of the file
+        - Bring back the killed brick
+        - Trigger heal
+        - Check if heal is completed
+        - Check for split-brain
+        """
+        # pylint: disable=too-many-locals,too-many-statements
+        # Creating files on client side
+        file_to_create = 'test_file.txt'
+        for mount_obj in self.mounts:
+            g.log.info("Generating data for %s:%s",
+                       mount_obj.client_system, mount_obj.mountpoint)
+            # Create file
+            g.log.info('Creating file...')
+            command = ("cd %s ; "
+                       "touch %s"
+                       % (mount_obj.mountpoint, file_to_create))
+
+            proc = g.run_async(mount_obj.client_system, command,
+                               user=mount_obj.user)
+            self.all_mounts_procs.append(proc)
+        self.io_validation_complete = False
+
+        # Validate IO
+        g.log.info("Wait for IO to complete and validate IO ...")
+        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
+        self.assertTrue(ret, "IO failed on some of the clients")
+        self.io_validation_complete = True
+        g.log.info("IO is successful on all mounts")
+
+        # get bricks with file
+        g.log.info('Getting bricks with file...')
+        subvols_dict = get_subvols(self.mnode, self.volname)
+        brick_list_with_file = []
+        for subvol in subvols_dict['volume_subvols']:
+            for brick in subvol:
+                node, brick_path = brick.split(':')
+                ret, brick_file_list, _ = g.run(node, 'ls %s' % brick_path)
+                if 'test_file.txt' in brick_file_list:
+                    brick_list_with_file.append(brick)
+        g.log.info('Bricks with file: %s', brick_list_with_file)
+
+        # Bring arbiter brick offline
+        bricks_to_bring_offline = [brick_list_with_file[-1]]
+        g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
+        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
+        self.assertTrue(ret, 'Failed to bring bricks %s offline' %
+                        bricks_to_bring_offline)
+
+        ret = are_bricks_offline(self.mnode, self.volname,
+                                 bricks_to_bring_offline)
+        self.assertTrue(ret, 'Bricks %s are not offline'
+                        % bricks_to_bring_offline)
+        g.log.info('Bringing bricks %s offline is successful',
+                   bricks_to_bring_offline)
+
+        # Modify the data
+        self.all_mounts_procs = []
+        for mount_obj in self.mounts:
+            g.log.info("Modifying data for %s:%s",
+                       mount_obj.client_system, mount_obj.mountpoint)
+            # Modify the permissions
+            g.log.info('Modifying the permissions of the file...')
+            command = ("cd %s ; "
+                       "chmod 600 %s"
+                       % (mount_obj.mountpoint, file_to_create))
+
+            proc = g.run_async(mount_obj.client_system, command,
+                               user=mount_obj.user)
+            self.all_mounts_procs.append(proc)
+        self.io_validation_complete = False
+
+        # Validate IO
+        g.log.info("Wait for IO to complete and validate IO ...")
+        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
+        self.assertTrue(ret, "IO failed on some of the clients")
+        self.io_validation_complete = True
+        g.log.info("IO is successful on all mounts")
+
+        # Bring arbiter brick online
+        g.log.info('Bringing bricks %s online...', bricks_to_bring_offline)
+        ret = bring_bricks_online(self.mnode, self.volname,
+                                  bricks_to_bring_offline)
+        self.assertTrue(ret, 'Failed to bring bricks %s online' %
+                        bricks_to_bring_offline)
+        g.log.info('Bringing bricks %s online is successful',
+                   bricks_to_bring_offline)
+
+        # Bring 1-st data brick offline
+        bricks_to_bring_offline = [brick_list_with_file[0]]
+        g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
+        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
+        self.assertTrue(ret, 'Failed to bring bricks %s offline' %
+                        bricks_to_bring_offline)
+
+        ret = are_bricks_offline(self.mnode, self.volname,
+                                 bricks_to_bring_offline)
+        self.assertTrue(ret, 'Bricks %s are not offline'
+                        % bricks_to_bring_offline)
+        g.log.info('Bringing bricks %s offline is successful',
+                   bricks_to_bring_offline)
+
+        # Modify the data
+        self.all_mounts_procs = []
+        for mount_obj in self.mounts:
+            g.log.info("Modifying data for %s:%s",
+                       mount_obj.client_system, mount_obj.mountpoint)
+            # Modify the permissions
+            g.log.info('Modifying the permissions of the file...')
+            command = ("cd %s ; "
+                       "chmod 644 %s"
+                       % (mount_obj.mountpoint, file_to_create))
+
+            proc = g.run_async(mount_obj.client_system, command,
+                               user=mount_obj.user)
+            self.all_mounts_procs.append(proc)
+        self.io_validation_complete = False
+
+        # Validate IO
+        g.log.info("Wait for IO to complete and validate IO ...")
+        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
+        self.assertTrue(ret, "IO failed on some of the clients")
+        self.io_validation_complete = True
+        g.log.info("IO is successful on all mounts")
+
+        # Bring 1-st data brick online
+        g.log.info('Bringing bricks %s online...', bricks_to_bring_offline)
+        ret = bring_bricks_online(self.mnode, self.volname,
+                                  bricks_to_bring_offline)
+        self.assertTrue(ret, 'Failed to bring bricks %s online' %
+                        bricks_to_bring_offline)
+        g.log.info('Bringing bricks %s online is successful',
+                   bricks_to_bring_offline)
+
+        # Start healing
+        ret = trigger_heal(self.mnode, self.volname)
+        self.assertTrue(ret, 'Heal is not started')
+        g.log.info('Healing is started')
+
+        # Monitor heal completion
+        ret = monitor_heal_completion(self.mnode, self.volname)
+        self.assertTrue(ret, 'Heal has not yet completed')
+
+        # Check if heal is completed
+        ret = is_heal_complete(self.mnode, self.volname)
+        self.assertTrue(ret, 'Heal is not complete')
+        g.log.info('Heal is completed successfully')
+
+        # Check for split-brain
+        ret = is_volume_in_split_brain(self.mnode, self.volname)
+        self.assertFalse(ret, 'Volume is in split-brain state')
+        g.log.info('Volume is not in split-brain state')
-- 
cgit