2 files changed, 256 insertions, 230 deletions
diff --git a/tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py b/tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py
new file mode 100755
index 000000000..64f5254a5
--- /dev/null
+++ b/tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py
@@ -0,0 +1,252 @@
+#  Copyright (C) 2016-2018  Red Hat, Inc. <http://www.redhat.com>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License along
+#  with this program; if not, write to the Free Software Foundation, Inc.,
+#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+""" Description:
+        Test Cases in this module tests the self heal daemon process.
+"""
+
+import time
+from glusto.core import Glusto as g
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.brick_libs import (
+    bring_bricks_offline, bring_bricks_online,
+    select_volume_bricks_to_bring_offline, get_online_bricks_list)
+from glustolibs.gluster.heal_libs import (get_self_heal_daemon_pid,
+                                          is_shd_daemonized,
+                                          monitor_heal_completion,
+                                          is_heal_complete)
+from glustolibs.gluster.heal_ops import get_heal_info_summary
+from glustolibs.io.utils import validate_io_procs
+
+
+@runs_on([['replicated', 'distributed-replicated'],
+          ['glusterfs', 'cifs', 'nfs']])
+class SelfHealDaemonProcessTestsWithHealing(GlusterBaseClass):
+    """
+    SelfHealDaemonProcessTestsWithHealing contains tests which verifies the
+    self-heal daemon process with healing.
+    """
+    def setUp(self):
+        """
+        setup volume, mount volume and initialize necessary variables
+        which is used in tests
+        """
+        # calling GlusterBaseClass setUpClass
+        GlusterBaseClass.setUp.im_func(self)
+
+        # Setup Volume and Mount Volume
+        g.log.info("Starting to Setup Volume and Mount Volume")
+        ret = self.setup_volume_and_mount_volume(mounts=self.mounts)
+        if not ret:
+            raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+        g.log.info("Successful in Setup Volume and Mount Volume")
+
+        # Verfiy glustershd process releases its parent process
+        g.log.info("Verifying Self Heal Daemon process is daemonized")
+        ret = is_shd_daemonized(self.servers)
+        if not ret:
+            raise ExecutionError("Self Heal Daemon process was still"
+                                 " holding parent process.")
+        g.log.info("Self Heal Daemon processes are online")
+
+    def tearDown(self):
+        """
+        Clean up the volume and umount volume from client
+        """
+        # stopping the volume
+        g.log.info("Starting to Unmount Volume and Cleanup Volume")
+        ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
+        if not ret:
+            raise ExecutionError("Failed to Unmount Volume and Cleanup Volume")
+        g.log.info("Successful in Unmount Volume and Cleanup Volume")
+
+        # calling GlusterBaseClass tearDownClass
+        GlusterBaseClass.tearDownClass.im_func(self)
+
+    def test_existing_glustershd_should_take_care_of_self_healing(self):
+        """
+        Test Script which verifies that the existing glustershd should take
+        care of self healing
+
+        * Create and start the Replicate volume
+        * Check the glustershd processes - Note the pids
+        * Bring down the One brick ( lets say brick1)  without affecting
+          the cluster
+        * Create 1000 files on volume
+        * bring the brick1 up which was killed in previous steps
+        * check the heal info - proactive self healing should start
+        * Bring down brick1 again
+        * wait for 60 sec and brought up the brick1
+        * Check the glustershd processes - pids should be different
+        * Monitor the heal till its complete
+
+        """
+        # pylint: disable=too-many-locals,too-many-lines,too-many-statements
+        nodes = self.servers
+
+        # check the self-heal daemon process
+        g.log.info("Starting to get self-heal daemon process on "
+                   "nodes %s", nodes)
+        ret, pids = get_self_heal_daemon_pid(nodes)
+        self.assertTrue(ret, ("Either No self heal daemon process found or "
+                              "more than One self heal daemon process "
+                              "found : %s" % pids))
+        g.log.info("Successful in getting Single self heal daemon process"
+                   " on all nodes %s", nodes)
+        glustershd_pids = pids
+
+        # select the bricks to bring offline
+        g.log.info("Selecting bricks to brought offline for volume %s",
+                   self.volname)
+        bricks_to_bring_offline = \
+            select_volume_bricks_to_bring_offline(self.mnode,
+                                                  self.volname)
+        g.log.info("Brick List to bring offline : %s",
+                   bricks_to_bring_offline)
+
+        # Bring down the selected bricks
+        g.log.info("Going to bring down the brick process "
+                   "for %s", bricks_to_bring_offline)
+        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
+        self.assertTrue(ret, ("Failed to bring down the bricks. Please "
+                              "check the log file for more details."))
+        g.log.info("Brought down the brick process "
+                   "for %s successfully", bricks_to_bring_offline)
+
+        # get the bricks which are running
+        g.log.info("getting the brick list which are online")
+        online_bricks = get_online_bricks_list(self.mnode, self.volname)
+        g.log.info("Online Bricks for volume %s : %s",
+                   self.volname, online_bricks)
+
+        # write 1MB files to the mounts
+        g.log.info("Starting IO on all mounts...")
+        g.log.info("mounts: %s", self.mounts)
+        all_mounts_procs = []
+        cmd = ("for i in `seq 1 1000`; "
+               "do dd if=/dev/urandom of=%s/file_$i "
+               "bs=1M count=1; "
+               "done"
+               % self.mounts[0].mountpoint)
+        g.log.info(cmd)
+        proc = g.run_async(self.mounts[0].client_system, cmd,
+                           user=self.mounts[0].user)
+        all_mounts_procs.append(proc)
+
+        # Validate IO
+        self.assertTrue(
+            validate_io_procs(all_mounts_procs, self.mounts),
+            "IO failed on some of the clients"
+        )
+
+        # check the heal info
+        g.log.info("Get the pending heal info for the volume %s",
+                   self.volname)
+        heal_info = get_heal_info_summary(self.mnode, self.volname)
+        g.log.info("Successfully got heal info for the volume %s",
+                   self.volname)
+        g.log.info("Heal Info for volume %s : %s", self.volname, heal_info)
+
+        # Bring bricks online
+        g.log.info("Bring bricks: %s online", bricks_to_bring_offline)
+        ret = bring_bricks_online(self.mnode, self.volname,
+                                  bricks_to_bring_offline, 'glusterd_restart')
+        self.assertTrue(ret, ("Failed to bring bricks: %s online"
+                              % bricks_to_bring_offline))
+        g.log.info("Successfully brought all bricks: %s online",
+                   bricks_to_bring_offline)
+
+        # Wait for 90 sec to start self healing
+        g.log.info('Waiting for 90 sec to start self healing')
+        time.sleep(90)
+
+        # check the heal info
+        g.log.info("Get the pending heal info for the volume %s",
+                   self.volname)
+        heal_info_after_brick_online = get_heal_info_summary(self.mnode,
+                                                             self.volname)
+        g.log.info("Successfully got heal info for the volume %s",
+                   self.volname)
+        g.log.info("Heal Info for volume %s : %s",
+                   self.volname, heal_info_after_brick_online)
+
+        # check heal pending is decreased
+        flag = False
+        for brick in online_bricks:
+            if int(heal_info_after_brick_online[brick]['numberOfEntries'])\
+                    < int(heal_info[brick]['numberOfEntries']):
+                flag = True
+                break
+
+        self.assertTrue(flag, "Pro-active self heal is not started")
+        g.log.info("Pro-active self heal is started")
+
+        # bring down bricks again
+        g.log.info("Going to bring down the brick process "
+                   "for %s", bricks_to_bring_offline)
+        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
+        self.assertTrue(ret, ("Failed to bring down the bricks. Please "
+                              "check the log file for more details."))
+        g.log.info("Brought down the brick process "
+                   "for %s successfully", bricks_to_bring_offline)
+
+        # wait for 60 sec and brought up the brick again
+        g.log.info('waiting for 60 sec and brought up the brick again')
+        time.sleep(60)
+        g.log.info("Bring bricks: %s online", bricks_to_bring_offline)
+        ret = bring_bricks_online(self.mnode, self.volname,
+                                  bricks_to_bring_offline, 'glusterd_restart')
+        self.assertTrue(ret, ("Failed to bring bricks: %s online"
+                              % bricks_to_bring_offline))
+        g.log.info("Successfully brought all bricks: %s online",
+                   bricks_to_bring_offline)
+
+        # Verfiy glustershd process releases its parent process
+        ret = is_shd_daemonized(nodes)
+        self.assertTrue(ret, ("Either No self heal daemon process found or "
+                              "more than One self heal daemon process found"))
+
+        # check the self-heal daemon process
+        g.log.info("Starting to get self-heal daemon process on "
+                   "nodes %s", nodes)
+        ret, pids = get_self_heal_daemon_pid(nodes)
+        self.assertTrue(ret, ("Either No self heal daemon process found or "
+                              "more than One self heal daemon process "
+                              "found : %s" % pids))
+        g.log.info("Successful in getting Single self heal daemon process"
+                   " on all nodes %s", nodes)
+        shd_pids_after_bricks_online = pids
+
+        # compare the glustershd pids
+        self.assertNotEqual(glustershd_pids,
+                            shd_pids_after_bricks_online,
+                            ("self heal daemon process are same before and "
+                             "after bringing up bricks online"))
+        g.log.info("EXPECTED : self heal daemon process are different before "
+                   "and after bringing up bricks online")
+
+        # wait for heal to complete
+        g.log.info("Monitoring the heal.....")
+        ret = monitor_heal_completion(self.mnode, self.volname)
+        self.assertTrue(ret, ("Heal is not completed on volume %s"
+                              % self.volname))
+        g.log.info("Heal Completed on volume %s", self.volname)
+
+        # Check if heal is completed
+        ret = is_heal_complete(self.mnode, self.volname)
+        self.assertTrue(ret, 'Heal is not complete')
+        g.log.info('Heal is completed successfully')
diff --git a/tests/functional/afr/heal/test_self_heal_daemon_process.py b/tests/functional/afr/heal/test_self_heal_daemon_process.py
index 15cd43951..b66ae25d2 100644..100755
--- a/tests/functional/afr/heal/test_self_heal_daemon_process.py
+++ b/tests/functional/afr/heal/test_self_heal_daemon_process.py
@@ -32,15 +32,12 @@ from glustolibs.gluster.rebalance_ops import (rebalance_start,
                                               rebalance_status)
 from glustolibs.gluster.brick_libs import (
     get_all_bricks, bring_bricks_offline, bring_bricks_online,
-    are_bricks_online, select_bricks_to_bring_offline, are_bricks_offline,
-    select_volume_bricks_to_bring_offline, get_online_bricks_list)
+    are_bricks_online, select_bricks_to_bring_offline, are_bricks_offline)
 from glustolibs.gluster.brick_ops import replace_brick
 from glustolibs.gluster.heal_libs import (get_self_heal_daemon_pid,
                                           do_bricks_exist_in_shd_volfile,
                                           is_shd_daemonized,
-                                          are_all_self_heal_daemons_are_online,
-                                          monitor_heal_completion)
-from glustolibs.gluster.heal_ops import get_heal_info_summary
+                                          are_all_self_heal_daemons_are_online)
 from glustolibs.gluster.volume_ops import (volume_stop, volume_start,
                                            get_volume_list)
 from glustolibs.gluster.gluster_init import restart_glusterd
@@ -48,6 +45,8 @@ from glustolibs.io.utils import validate_io_procs
 from glustolibs.misc.misc_libs import upload_scripts
 
 
+# pylint: disable=too-many-lines
+
 @runs_on([['replicated', 'distributed-replicated', 'dispersed',
            'distributed-dispersed'], ['glusterfs', 'nfs', 'cifs']])
 class SelfHealDaemonProcessTests(GlusterBaseClass):
@@ -815,231 +814,6 @@ class ImpactOfReplaceBrickForGlustershdTests(GlusterBaseClass):
         g.log.info("Successfully parsed %s file", self.glustershd)
 
 
-@runs_on([['replicated', 'distributed-replicated'],
-          ['glusterfs', 'nfs', 'cifs']])
-class SelfHealDaemonProcessTestsWithHealing(GlusterBaseClass):
-    """
-    SelfHealDaemonProcessTestsWithHealing contains tests which verifies the
-    self-heal daemon process with healing.
-    """
-    @classmethod
-    def setUpClass(cls):
-        """
-        setup volume, mount volume and initialize necessary variables
-        which is used in tests
-        """
-
-        # calling GlusterBaseClass setUpClass
-        GlusterBaseClass.setUpClass.im_func(cls)
-
-        # Setup Volume and Mount Volume
-        g.log.info("Starting to Setup Volume and Mount Volume")
-        ret = cls.setup_volume_and_mount_volume(mounts=cls.mounts)
-        if not ret:
-            raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
-        g.log.info("Successful in Setup Volume and Mount Volume")
-
-        # Verfiy glustershd process releases its parent process
-        g.log.info("Verifying Self Heal Daemon process is daemonized")
-        ret = is_shd_daemonized(cls.servers)
-        if not ret:
-            raise ExecutionError("Self Heal Daemon process was still"
-                                 " holding parent process.")
-        g.log.info("Self Heal Daemon processes are online")
-
-        # upload script
-        script_abs_path = "/usr/share/glustolibs/io/scripts/file_dir_ops.py"
-        cls.script_upload_path = "/usr/share/glustolibs/io/scripts/" \
-                                 "file_dir_ops.py"
-
-        ret = upload_scripts(cls.clients, script_abs_path)
-        if not ret:
-            raise ExecutionError("Failed to upload IO scripts to clients")
-
-        cls.GLUSTERSHD = "/var/lib/glusterd/glustershd/glustershd-server.vol"
-
-    @classmethod
-    def tearDownClass(cls):
-        """
-        Clean up the volume and umount volume from client
-        """
-
-        # stopping the volume
-        g.log.info("Starting to Unmount Volume and Cleanup Volume")
-        ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts)
-        if not ret:
-            raise ExecutionError("Failed to Unmount Volume and Cleanup Volume")
-        g.log.info("Successful in Unmount Volume and Cleanup Volume")
-
-        # calling GlusterBaseClass tearDownClass
-        GlusterBaseClass.tearDownClass.im_func(cls)
-
-    def test_existing_glustershd_should_take_care_of_self_healing(self):
-        """
-        Test Script which verifies that the existing glustershd should take
-        care of self healing
-
-        * Create and start the Replicate volume
-        * Check the glustershd processes - Note the pids
-        * Bring down the One brick ( lets say brick1)  without affecting
-          the cluster
-        * Create 5000 files on volume
-        * bring the brick1 up which was killed in previous steps
-        * check the heal info - proactive self healing should start
-        * Bring down brick1 again
-        * wait for 60 sec and brought up the brick1
-        * Check the glustershd processes - pids should be different
-        * Monitor the heal till its complete
-
-        """
-        # pylint: disable=too-many-locals,too-many-lines,too-many-statements
-        nodes = self.servers
-
-        # check the self-heal daemon process
-        g.log.info("Starting to get self-heal daemon process on "
-                   "nodes %s", nodes)
-        ret, pids = get_self_heal_daemon_pid(nodes)
-        self.assertTrue(ret, ("Either No self heal daemon process found or "
-                              "more than One self heal daemon process "
-                              "found : %s" % pids))
-        g.log.info("Successful in getting Single self heal daemon process"
-                   " on all nodes %s", nodes)
-        glustershd_pids = pids
-
-        # select the bricks to bring offline
-        g.log.info("Selecting bricks to brought offline for volume %s",
-                   self.volname)
-        bricks_to_bring_offline = \
-            select_volume_bricks_to_bring_offline(self.mnode,
-                                                  self.volname)
-        g.log.info("Brick List to bring offline : %s",
-                   bricks_to_bring_offline)
-
-        # Bring down the selected bricks
-        g.log.info("Going to bring down the brick process "
-                   "for %s", bricks_to_bring_offline)
-        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
-        self.assertTrue(ret, ("Failed to bring down the bricks. Please "
-                              "check the log file for more details."))
-        g.log.info("Brought down the brick process "
-                   "for %s successfully", bricks_to_bring_offline)
-
-        # get the bricks which are running
-        g.log.info("getting the brick list which are online")
-        online_bricks = get_online_bricks_list(self.mnode, self.volname)
-        g.log.info("Online Bricks for volume %s : %s",
-                   self.volname, online_bricks)
-
-        # write 1MB files to the mounts
-        g.log.info("Starting IO on all mounts...")
-        g.log.info("mounts: %s", self.mounts)
-        all_mounts_procs = []
-        for mount_obj in self.mounts:
-            cmd = "for i in `seq 1 5000`;do dd if=/dev/urandom " \
-                  "of=%s/file_$i bs=1M count=1;done" % mount_obj.mountpoint
-            g.log.info(cmd)
-            proc = g.run_async(mount_obj.client_system, cmd,
-                               user=mount_obj.user)
-            all_mounts_procs.append(proc)
-
-        # Validate IO
-        self.assertTrue(
-            validate_io_procs(all_mounts_procs, self.mounts),
-            "IO failed on some of the clients"
-        )
-
-        # check the heal info
-        g.log.info("Get the pending heal info for the volume %s",
-                   self.volname)
-        heal_info = get_heal_info_summary(self.mnode, self.volname)
-        g.log.info("Successfully got heal info for the volume %s",
-                   self.volname)
-        g.log.info("Heal Info for volume %s : %s", self.volname, heal_info)
-
-        # Bring bricks online
-        g.log.info("Bring bricks: %s online", bricks_to_bring_offline)
-        ret = bring_bricks_online(self.mnode, self.volname,
-                                  bricks_to_bring_offline, 'glusterd_restart')
-        self.assertTrue(ret, ("Failed to bring bricks: %s online"
-                              % bricks_to_bring_offline))
-        g.log.info("Successfully brought all bricks: %s online",
-                   bricks_to_bring_offline)
-
-        # Wait for 90 sec to start self healing
-        time.sleep(90)
-
-        # check the heal info
-        g.log.info("Get the pending heal info for the volume %s",
-                   self.volname)
-        heal_info_after_brick_online = get_heal_info_summary(self.mnode,
-                                                             self.volname)
-        g.log.info("Successfully got heal info for the volume %s",
-                   self.volname)
-        g.log.info("Heal Info for volume %s : %s",
-                   self.volname, heal_info_after_brick_online)
-
-        # check heal pending is decreased
-        flag = False
-        for brick in online_bricks:
-            if int(heal_info_after_brick_online[brick]['numberOfEntries'])\
-                    < int(heal_info[brick]['numberOfEntries']):
-                flag = True
-                break
-
-        self.assertTrue(flag, ("Pro-active self heal is not started"))
-        g.log.info("Pro-active self heal is started")
-
-        # bring down bricks again
-        g.log.info("Going to bring down the brick process "
-                   "for %s", bricks_to_bring_offline)
-        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
-        self.assertTrue(ret, ("Failed to bring down the bricks. Please "
-                              "check the log file for more details."))
-        g.log.info("Brought down the brick process "
-                   "for %s successfully", bricks_to_bring_offline)
-
-        # wait for 60 sec and brought up the brick again
-        time.sleep(60)
-        g.log.info("Bring bricks: %s online", bricks_to_bring_offline)
-        ret = bring_bricks_online(self.mnode, self.volname,
-                                  bricks_to_bring_offline, 'glusterd_restart')
-        self.assertTrue(ret, ("Failed to bring bricks: %s online"
-                              % bricks_to_bring_offline))
-        g.log.info("Successfully brought all bricks: %s online",
-                   bricks_to_bring_offline)
-
-        # Verfiy glustershd process releases its parent process
-        ret = is_shd_daemonized(nodes)
-        self.assertTrue(ret, ("Either No self heal daemon process found or "
-                              "more than One self heal daemon process found"))
-
-        # check the self-heal daemon process
-        g.log.info("Starting to get self-heal daemon process on "
-                   "nodes %s", nodes)
-        ret, pids = get_self_heal_daemon_pid(nodes)
-        self.assertTrue(ret, ("Either No self heal daemon process found or "
-                              "more than One self heal daemon process "
-                              "found : %s" % pids))
-        g.log.info("Successful in getting Single self heal daemon process"
-                   " on all nodes %s", nodes)
-        glustershd_pids_after_bricks_online = pids
-
-        # compare the glustershd pids
-        self.assertNotEqual(glustershd_pids,
-                            glustershd_pids_after_bricks_online,
-                            ("self heal daemon process are same before and "
-                             "after bringing up bricks online"))
-        g.log.info("EXPECTED : self heal daemon process are different before "
-                   "and after bringing up bricks online")
-
-        # wait for heal to complete
-        g.log.info("Monitoring the heal.....")
-        ret = monitor_heal_completion(self.mnode, self.volname)
-        self.assertTrue(ret, ("Heal is not completed on volume %s"
-                              % self.volname))
-        g.log.info("Heal Completed on volume %s", self.volname)
-
-
 class SelfHealDaemonProcessTestsWithMultipleVolumes(GlusterBaseClass):
     """
     SelfHealDaemonProcessTestsWithMultipleVolumes contains tests which