1 files changed, 376 insertions, 0 deletions
diff --git a/tests/functional/dht/test_excerise_rebalance_command.py b/tests/functional/dht/test_excerise_rebalance_command.py
new file mode 100644
index 000000000..b582df518
--- /dev/null
+++ b/tests/functional/dht/test_excerise_rebalance_command.py
@@ -0,0 +1,376 @@
+#  Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License along
+#  with this program; if not, write to the Free Software Foundation, Inc.,
+#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from time import sleep
+from glusto.core import Glusto as g
+from glustolibs.gluster.constants import \
+    TEST_LAYOUT_IS_COMPLETE as LAYOUT_IS_COMPLETE
+from glustolibs.gluster.constants import FILETYPE_DIRS
+from glustolibs.gluster.dht_test_utils import validate_files_in_dir
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.rebalance_ops import (
+    get_rebalance_status,
+    rebalance_start,
+    rebalance_status,
+    rebalance_stop,
+    wait_for_fix_layout_to_complete,
+    wait_for_rebalance_to_complete)
+from glustolibs.gluster.volume_libs import (
+    expand_volume,
+    log_volume_info_and_status,
+    verify_all_process_of_volume_are_online,
+    form_bricks_list_to_add_brick,
+    wait_for_volume_process_to_be_online)
+from glustolibs.io.utils import (
+    collect_mounts_arequal,
+    list_all_files_and_dirs_mounts,
+    wait_for_io_to_complete)
+from glustolibs.misc.misc_libs import upload_scripts
+from glustolibs.gluster.brick_ops import add_brick
+
+
+@runs_on([['distributed', 'replicated', 'distributed-replicated',
+           'dispersed', 'distributed-dispersed'],
+          ['glusterfs']])
+class TestExerciseRebalanceCommand(GlusterBaseClass):
+    @classmethod
+    def setUpClass(cls):
+
+        # Calling GlusterBaseClass setUpClass
+        GlusterBaseClass.setUpClass.im_func(cls)
+
+        # Setup Volume and Mount Volume
+        g.log.info("Starting to Setup Volume and Mount Volume")
+        ret = cls.setup_volume_and_mount_volume(mounts=cls.mounts)
+        if not ret:
+            raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+        g.log.info("Successful in Setup Volume and Mount Volume")
+
+        # Upload io scripts for running IO on mounts
+        g.log.info("Upload io scripts to clients %s for running IO on "
+                   "mounts", cls.clients)
+        script_local_path = ("/usr/share/glustolibs/io/scripts/"
+                             "file_dir_ops.py")
+        cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
+                                  "file_dir_ops.py")
+        ret = upload_scripts(cls.clients, script_local_path)
+        if not ret:
+            raise ExecutionError("Failed to upload IO scripts to clients %s" %
+                                 cls.clients)
+        g.log.info("Successfully uploaded IO scripts to clients %s",
+                   cls.clients)
+
+    def setUp(self):
+        # Calling GlusterBaseClass setUp
+        GlusterBaseClass.setUp.im_func(self)
+
+        # Start IO on mounts
+        g.log.info("Starting IO on all mounts...")
+        self.all_mounts_procs = []
+        for index, mount_obj in enumerate(self.mounts, start=1):
+            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
+                       mount_obj.mountpoint)
+            cmd = ("python %s create_deep_dirs_with_files "
+                   "--dirname-start-num %d "
+                   "--dir-depth 2 "
+                   "--dir-length 2 "
+                   "--max-num-of-dirs 2 "
+                   "--num-of-files 10 %s" % (self.script_upload_path,
+                                             index + 10,
+                                             mount_obj.mountpoint))
+            proc = g.run_async(mount_obj.client_system, cmd,
+                               user=mount_obj.user)
+            self.all_mounts_procs.append(proc)
+
+        # Wait for IO to complete
+        g.log.info("Wait for IO to complete")
+        ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts)
+        if not ret:
+            raise ExecutionError("IO failed on some of the clients")
+        g.log.info("IO is successful on all mounts")
+
+        # List all files and dirs created
+        g.log.info("List all files and directories:")
+        ret = list_all_files_and_dirs_mounts(self.mounts)
+        if not ret:
+            raise ExecutionError("Failed to list all files and dirs")
+        g.log.info("Listing all files and directories is successful")
+
+        # DHT Layout validation
+        g.log.debug("Verifying hash layout values %s:%s",
+                    self.clients[0], self.mounts[0].mountpoint)
+        ret = validate_files_in_dir(self.clients[0], self.mounts[0].mountpoint,
+                                    test_type=LAYOUT_IS_COMPLETE,
+                                    file_type=FILETYPE_DIRS)
+        self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED")
+        g.log.info("LAYOUT_IS_COMPLETE: PASS")
+
+    def test_fix_layout_start(self):
+        # pylint: disable=too-many-statements
+        # Get arequal checksum before starting fix-layout
+        g.log.info("Getting arequal checksum before fix-layout")
+        arequal_checksum_before_fix_layout = collect_mounts_arequal(self.
+                                                                    mounts)
+
+        # Log Volume Info and Status before expanding the volume.
+        g.log.info("Logging volume info and Status before expanding volume")
+        ret = log_volume_info_and_status(self.mnode, self.volname)
+        self.assertTrue(ret, "Logging volume info and status failed on "
+                        "volume %s" % self.volname)
+        g.log.info("Successful in logging volume info and status of volume "
+                   "%s", self.volname)
+
+        # Form brick list for expanding volume
+        add_brick_list = form_bricks_list_to_add_brick(
+            self.mnode, self.volname, self.servers, self.all_servers_info,
+            distribute_count=1)
+        self.assertIsNotNone(add_brick_list, ("Volume %s: Failed to form "
+                                              "bricks list to expand",
+                                              self.volname))
+        g.log.info("Volume %s: Formed bricks list to expand", self.volname)
+
+        # Expanding volume by adding bricks to the volume
+        g.log.info("Volume %s: Expand start")
+        ret, _, _ = add_brick(self.mnode, self.volname, add_brick_list)
+        self.assertEqual(ret, 0, ("Volume %s: Expand failed", self.volname))
+        g.log.info("Volume %s: Expand successful", self.volname)
+
+        # Wait for gluster processes to come online
+        g.log.info("Wait for gluster processes to come online")
+        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
+        self.assertTrue(ret, ("Volume %s: one or more volume process are "
+                              "not up", self.volname))
+        g.log.info("All volume %s processes are online", self.volname)
+
+        # Log Volume Info and Status after expanding the volume
+        g.log.info("Logging volume info and Status after expanding volume")
+        ret = log_volume_info_and_status(self.mnode, self.volname)
+        self.assertTrue(ret, "Logging volume info and status failed on "
+                             "volume %s" % self.volname)
+        g.log.info("Successful in logging volume info and status of volume "
+                   "%s", self.volname)
+
+        # Verify volume's all process are online
+        g.log.info("Verifying volume's all process are online")
+        ret = verify_all_process_of_volume_are_online(self.mnode,
+                                                      self.volname)
+        self.assertTrue(ret, ("Volume %s : All process are not online",
+                              self.volname))
+        g.log.info("Volume %s : All process are online", self.volname)
+
+        # Start Rebalance fix-layout
+        g.log.info("Starting fix-layout on the volume")
+        ret, _, _ = rebalance_start(self.mnode, self.volname, fix_layout=True)
+        self.assertEqual(ret, 0, ("Volume %s: fix-layout start failed"
+                                  "%s", self.volname))
+        g.log.info("Volume %s: fix-layout start success", self.volname)
+
+        # Wait for fix-layout to complete
+        g.log.info("Waiting for fix-layout to complete")
+        ret = wait_for_fix_layout_to_complete(self.mnode, self.volname)
+        self.assertTrue(ret, ("Volume %s: Fix-layout is still in-progress",
+                              self.volname))
+        g.log.info("Volume %s: Fix-layout completed successfully",
+                   self.volname)
+
+        # Check Rebalance status after fix-layout is complete
+        g.log.info("Checking Rebalance status")
+        ret, _, _ = rebalance_status(self.mnode, self.volname)
+        self.assertEqual(ret, 0, ("Volume %s: Failed to get rebalance status",
+                                  self.volname))
+        g.log.info("Volume %s: Successfully got rebalance status",
+                   self.volname)
+
+        # Get arequal checksum after fix-layout is complete
+        g.log.info("arequal after fix-layout is complete")
+        arequal_checksum_after_fix_layout = collect_mounts_arequal(self.mounts)
+
+        # Compare arequals checksum before and after fix-layout
+        g.log.info("Comparing checksum before and after fix-layout")
+        self.assertEqual(arequal_checksum_before_fix_layout,
+                         arequal_checksum_after_fix_layout,
+                         "arequal checksum is NOT MATCHNG")
+        g.log.info("arequal checksum is SAME")
+
+        # Check if there are any file migrations after fix-layout
+        status_info = get_rebalance_status(self.mnode, self.volname)
+        for node in range(len(status_info['node'])):
+            status_info = get_rebalance_status(self.mnode, self.volname)
+            file_migration_count = status_info['node'][node]['files']
+            self.assertEqual(int(file_migration_count), 0, (
+                "Server %s: Few files are migrated", self.servers[node]))
+            g.log.info("Server %s: No files are migrated")
+
+        # Check if new bricks contains any files
+        for brick in add_brick_list:
+            brick_node, brick_path = brick.split(":")
+            cmd = ('find %s -type f ! -perm 1000 | grep -ve .glusterfs'
+                   % brick_path)
+            _, out, _ = g.run(brick_node, cmd)
+            self.assertEqual(len(out), 0, (
+                ("Files(excluded linkto files) are present on %s:%s"),
+                (brick_node, brick_path)))
+            g.log.info("No files (excluded linkto files) are present on %s:%s",
+                       brick_node, brick_path)
+
+    def test_rebalance_start_status_stop(self):
+
+        # Getting arequal checksum before rebalance start
+        g.log.info("Getting arequal before rebalance start")
+        arequal_checksum_before_rebalance_start = collect_mounts_arequal(
+            self.mounts)
+
+        # Start Rebalance
+        g.log.info("Starting Rebalance on the volume")
+        ret, _, _ = rebalance_start(self.mnode, self.volname)
+        self.assertEqual(ret, 0, ("Volume %s: Failed to start rebalance",
+                                  self.volname))
+        g.log.info("Volume %s: Rebalance started ", self.volname)
+
+        # Stop on-going rebalance
+        g.log.info("Stop rebalance on the volume")
+        ret, _, _ = rebalance_stop(self.mnode, self.volname)
+        self.assertEqual(ret, 0, ("Volume %s: Failed to stop rebalance",
+                                  self.volname))
+        g.log.info("Checking whether the migration is stopped or not")
+        # Wait till the on-going file migration completes on all servers
+        count = 0
+        while count < 80:
+            rebalance_count = 0
+            for server in self.servers:
+                ret, _, _ = g.run(server, "pgrep rebalance")
+                if ret != 0:
+                    rebalance_count += 1
+            if rebalance_count == len(self.servers):
+                break
+            sleep(2)
+            count += 1
+        g.log.info("Volume %s: Rebalance process is not running on servers",
+                   self.volname)
+
+        # List all files and dirs from mount point
+        g.log.info("List all files and directories:")
+        ret = list_all_files_and_dirs_mounts(self.mounts)
+        g.log.info("Listing all files and directories is successful")
+
+        # Getting arequal checksum after the rebalance is stopped
+        g.log.info("Getting arequal checksum after the rebalance is stopped")
+        arequal_checksum_after_rebalance_stop = collect_mounts_arequal(self.
+                                                                       mounts)
+
+        # Comparing arequals checksum before start of rebalance and
+        #                       after the rebalance is stopped
+        g.log.info("Comparing arequals checksum before start of rebalance and"
+                   "after the rebalance is stopped")
+        self.assertEqual(arequal_checksum_before_rebalance_start,
+                         arequal_checksum_after_rebalance_stop,
+                         "arequal checksum is NOT MATCHNG")
+        g.log.info("arequal checksum is SAME")
+
+    def test_rebalance_with_force(self):
+
+        # Getting arequal checksum before rebalance
+        g.log.info("Getting arequal checksum before rebalance")
+        arequal_checksum_before_rebalance = collect_mounts_arequal(self.mounts)
+
+        # Log Volume Info and Status before expanding the volume.
+        g.log.info("Logging volume info and Status before expanding volume")
+        ret = log_volume_info_and_status(self.mnode, self.volname)
+        self.assertTrue(ret, "Logging volume info and status failed on "
+                             "volume %s" % self.volname)
+        g.log.info("Successful in logging volume info and"
+                   "status of volume %s", self.volname)
+
+        # Expanding volume by adding bricks to the volume
+        g.log.info("Start adding bricks to volume")
+        ret = expand_volume(self.mnode, self.volname, self.servers,
+                            self.all_servers_info)
+        self.assertTrue(ret, ("Volume %s: Expand failed", self.volname))
+        g.log.info("Volume %s: Expand successful", self.volname)
+
+        # Wait for gluster processes to come online
+        g.log.info("Wait for gluster processes to come online")
+        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
+        self.assertTrue(ret, ("Volume %s: one or more volume process are "
+                              "not up", self.volname))
+        g.log.info("All volume %s processes are online", self.volname)
+
+        # Verify volume's all process are online
+        g.log.info("Verifying volume's all process are online")
+        ret = verify_all_process_of_volume_are_online(self.mnode,
+                                                      self.volname)
+        self.assertTrue(ret, ("Volume %s : All process are not online",
+                              self.volname))
+        g.log.info("Volume %s : All process are online", self.volname)
+
+        # Log Volume Info and Status after expanding the volume
+        g.log.info("Logging volume info and Status after expanding volume")
+        ret = log_volume_info_and_status(self.mnode, self.volname)
+        self.assertTrue(ret, ("Logging volume info and status failed on "
+                              "volume %s", self.volname))
+        g.log.info("Successful in logging volume info and"
+                   "status of volume %s", self.volname)
+
+        # Start Rebalance with force
+        g.log.info("Starting Rebalance on the volume")
+        ret, _, _ = rebalance_start(self.mnode, self.volname, force=True)
+        self.assertEqual(ret, 0, ("Volume %s: Failed to start rebalance with "
+                                  "force", self.volname))
+        g.log.info("Volume %s: Started rebalance with force option",
+                   self.volname)
+
+        # Wait for rebalance to complete
+        g.log.info("Waiting for rebalance to complete")
+        ret = wait_for_rebalance_to_complete(self.mnode, self.volname,
+                                             timeout=600)
+        self.assertTrue(ret, ("Volume %s: Rebalance is still in-progress ",
+                              self.volname))
+        g.log.info("Volume %s: Rebalance completed", self.volname)
+
+        # Getting arequal checksum after rebalance
+        g.log.info("Getting arequal checksum after rebalance with force "
+                   "option")
+        arequal_checksum_after_rebalance = collect_mounts_arequal(self.mounts)
+
+        # Comparing arequals checksum before and after rebalance with force
+        # option
+        g.log.info("Comparing arequals checksum before and after rebalance"
+                   "with force option")
+        self.assertEqual(arequal_checksum_before_rebalance,
+                         arequal_checksum_after_rebalance,
+                         "arequal checksum is NOT MATCHNG")
+        g.log.info("arequal checksum is SAME")
+
+        # Checking if rebalance skipped any files
+        status = get_rebalance_status(self.mnode, self.volname)
+        for each_node in status['node']:
+            self.assertEqual(int(each_node['skipped']), 0,
+                             "Few files are skipped on node %s" %
+                             each_node['nodeName'])
+            g.log.info("No files are skipped on %s", each_node['nodeName'])
+
+    @classmethod
+    def tearDownClass(cls):
+        # Unmount Volume and Cleanup Volume
+        g.log.info("Starting to Unmount Volume and Cleanup Volume")
+        ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts)
+        if not ret:
+            raise ExecutionError("Failed to Unmount Volume and Cleanup Volume")
+        g.log.info("Successful in Unmount Volume and Cleanup Volume")
+
+        # Calling GlusterBaseClass tearDown
+        GlusterBaseClass.tearDownClass.im_func(cls)