From 99aedb316e3c9491e4d752d1bbc49b6496a7e627 Mon Sep 17 00:00:00 2001
From: Vijay Avuthu <vavuthu@redhat.com>
Date: Mon, 18 Dec 2017 15:57:44 +0530
Subject: Adding AFR self heal daemon test cases Gave meaningful names to
 functions Returning -1 if there is no process running Replace numbers with
 words Rewording the msg "More than 1 or 0 self heal daemon" Review Comments
 incorporated

Change-Id: If424a6f78536279c178ee45d62099fd8f63421dd
Signed-off-by: Vijay Avuthu <vavuthu@redhat.com>
---
 .../glustolibs/gluster/gluster_init.py             |  55 ++++
 glustolibs-gluster/glustolibs/gluster/heal_libs.py | 149 ++++++++++
 tests/functional/afr/__init__.py                   |   0
 .../afr/test_self_heal_daemon_process.py           | 299 +++++++++++++++++++++
 4 files changed, 503 insertions(+)
 create mode 100644 tests/functional/afr/__init__.py
 create mode 100644 tests/functional/afr/test_self_heal_daemon_process.py

diff --git a/glustolibs-gluster/glustolibs/gluster/gluster_init.py b/glustolibs-gluster/glustolibs/gluster/gluster_init.py
index d45a186d0..5d08acd0e 100644
--- a/glustolibs-gluster/glustolibs/gluster/gluster_init.py
+++ b/glustolibs-gluster/glustolibs/gluster/gluster_init.py
@@ -167,3 +167,58 @@ def env_setup_servers(servers):
         return False
 
     return True
+
+
+def get_glusterd_pids(nodes):
+    """
+    Checks if glusterd process is running and
+    return the process id's in dictionary format
+
+    Args:
+        nodes ( str|list ) : Node/Nodes of the cluster
+
+    Returns:
+        tuple : Tuple containing two elements (ret, gluster_pids).
+        The first element 'ret' is of type 'bool', True if only if
+        glusterd is running on all the nodes in the list and each
+        node contains only one instance of glusterd running.
+        False otherwise.
+
+        The second element 'glusterd_pids' is of type dictonary and
+        it contains the process ID's for glusterd.
+
+    """
+    glusterd_pids = {}
+    _rc = True
+    if isinstance(nodes, str):
+        nodes = [nodes]
+
+    cmd = "pidof glusterd"
+    g.log.info("Executing cmd: %s on node %s" % (cmd, nodes))
+    results = g.run_parallel(nodes, cmd)
+    for node in results:
+        ret, out, err = results[node]
+        if ret == 0:
+            if len(out.strip().split("\n")) == 1:
+                if not out.strip():
+                    g.log.error("NO glusterd process found "
+                                "on node %s" % node)
+                    _rc = False
+                    glusterd_pids[node] = ['-1']
+                else:
+                    g.log.info("glusterd process with "
+                               "pid %s found on %s",
+                               out.strip().split("\n"), node)
+                    glusterd_pids[node] = (out.strip().split("\n"))
+            else:
+                g.log.error("More than One glusterd process "
+                            "found on node %s" % node)
+                _rc = False
+                glusterd_pids[node] = out
+        else:
+            g.log.error("Not able to get glusterd process "
+                        "from node %s" % node)
+            _rc = False
+            glusterd_pids[node] = ['-1']
+
+    return _rc, glusterd_pids
diff --git a/glustolibs-gluster/glustolibs/gluster/heal_libs.py b/glustolibs-gluster/glustolibs/gluster/heal_libs.py
index 5ccb4b2b4..3cd867c36 100644
--- a/glustolibs-gluster/glustolibs/gluster/heal_libs.py
+++ b/glustolibs-gluster/glustolibs/gluster/heal_libs.py
@@ -305,3 +305,152 @@ def wait_for_self_heal_daemons_to_be_online(mnode, volname, timeout=300):
         g.log.info("All self-heal-daemons of the volume '%s' are online ",
                    volname)
     return True
+
+
+def get_self_heal_daemon_pid(nodes):
+    """
+    Checks if self-heal daemon process is running and
+    return the process id's in dictionary format
+
+    Args:
+        nodes ( str|list ) : Node/Nodes of the cluster
+
+    Returns:
+        tuple : Tuple containing two elements (ret, glustershd_pids).
+        The first element 'ret' is of type 'bool', True if and only if
+        glustershd is running on all the nodes in the list and each
+        node contains only one instance of glustershd running.
+        False otherwise.
+
+        The second element 'glustershd_pids' is of type dictonary and it
+        contains the process ID's for glustershd
+    """
+    glustershd_pids = {}
+    _rc = True
+    if isinstance(nodes, str):
+        nodes = [nodes]
+    cmd = "pgrep -f glustershd"
+    g.log.info("Executing cmd: %s on node %s" % (cmd, nodes))
+    results = g.run_parallel(nodes, cmd)
+    for node in results:
+        ret, out, err = results[node]
+        if ret == 0:
+            if len(out.strip().split("\n")) == 1:
+                if not out.strip():
+                    g.log.error("NO self heal daemon process found "
+                                "on node %s" % node)
+                    _rc = False
+                    glustershd_pids[node] = [-1]
+                else:
+                    g.log.info("Single Self Heal Daemon process with "
+                               "pid %s found on %s",
+                               out.strip().split("\n"), node)
+                    glustershd_pids[node] = (out.strip().split("\n"))
+            else:
+                g.log.error("More than One self heal daemon process "
+                            "found on node %s" % node)
+                _rc = False
+                glustershd_pids[node] = [-1]
+        else:
+            g.log.error("Not able to get self heal daemon process "
+                        "from node %s" % node)
+            _rc = False
+            glustershd_pids[node] = [-1]
+
+    return _rc, glustershd_pids
+
+
+def do_bricks_exist_in_shd_volfile(mnode, volname, brick_list):
+    """
+    Checks whether the given brick list is present in glustershd
+    server volume file
+
+    Args:
+        mnode (str)         : Node on which commands will be executed.
+        volname (str)       : Name of the volume.
+        brick_list ( list ) : brick list of a volume which needs to
+                              compare in glustershd server volume file
+
+    Returns:
+        bool : True if brick exists in glustershd server volume file.
+               False Otherwise
+    """
+    GLUSTERSHD = "/var/lib/glusterd/glustershd/glustershd-server.vol"
+    brick_list_server_vol = []
+    volume_clients = "volume " + volname + "-client-"
+    host = brick = None
+    parse = False
+
+    # Establish connection to mnode
+    conn = g.rpyc_get_connection(mnode)
+    if conn is None:
+        g.log.info("Not able to establish connection to node %s" % mnode)
+        return False
+    try:
+        fd = conn.builtins.open(GLUSTERSHD)
+        for each_line in fd:
+            each_line = each_line.strip()
+            if volume_clients in each_line:
+                parse = True
+            elif "end-volume" in each_line:
+                parse = False
+                brick_list_server_vol.append("%s:%s" % (host, brick))
+            elif parse:
+                if "option remote-subvolume" in each_line:
+                    brick = each_line.split(" ")[2]
+                if "option remote-host" in each_line:
+                    host = each_line.split(" ")[2]
+
+    except IOError as e:
+        g.log.info("I/O error ({0}): {1}".format(e.errno, e.strerror))
+        return False
+
+    g.log.info("Brick List from volume info : %s" % brick_list)
+    g.log.info("Brick List from volume server "
+               "file : %s" % brick_list_server_vol)
+
+    if set(brick_list) != set(brick_list_server_vol):
+        return False
+    return True
+
+
+def is_shd_daemonized(nodes, timeout=120):
+    """
+    wait for the glustershd process to release parent process.
+
+    Args:
+        nodes ( str|list ) : Node/Nodes of the cluster
+
+    Kwargs:
+        timeout (int): timeout value in seconds to wait for self-heal-daemons
+        to be online.
+
+    Returns:
+        bool : True if glustershd releases its parent.
+               False Otherwise
+
+    """
+    counter = 0
+    flag = 0
+    if isinstance(nodes, str):
+        nodes = [nodes]
+    while counter < timeout:
+        ret, pids = get_self_heal_daemon_pid(nodes)
+        if not ret:
+            g.log.info("Retry after 3 sec to get self heal "
+                       "daemon process....")
+            time.sleep(3)
+            counter = counter + 3
+        else:
+            flag = 1
+            break
+
+    if not flag:
+        g.log.error("Either No self heal daemon process found or more than"
+                    "One self heal daemon process found even "
+                    "after %d minutes", (timeout/60.0))
+        return False
+    else:
+        g.log.info("Single self heal daemon process on all nodes %s",
+                   nodes)
+    return True
diff --git a/tests/functional/afr/__init__.py b/tests/functional/afr/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/functional/afr/test_self_heal_daemon_process.py b/tests/functional/afr/test_self_heal_daemon_process.py
new file mode 100644
index 000000000..536b76b72
--- /dev/null
+++ b/tests/functional/afr/test_self_heal_daemon_process.py
@@ -0,0 +1,299 @@
+#  Copyright (C) 2016-2017  Red Hat, Inc. <http://www.redhat.com>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License along
+#  with this program; if not, write to the Free Software Foundation, Inc.,
+#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+""" Description:
+        Test Cases in this module tests the self heal daemon process.
+"""
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.volume_libs import (
+    expand_volume, shrink_volume, log_volume_info_and_status,
+    wait_for_volume_process_to_be_online)
+from glustolibs.gluster.rebalance_ops import (rebalance_start,
+                                              wait_for_rebalance_to_complete,
+                                              rebalance_status)
+from glustolibs.gluster.brick_libs import get_all_bricks
+from glustolibs.gluster.heal_libs import (get_self_heal_daemon_pid,
+                                          do_bricks_exist_in_shd_volfile,
+                                          is_shd_daemonized)
+
+
+@runs_on([['replicated', 'distributed-replicated', 'dispersed',
+           'distributed-dispersed'], ['glusterfs', 'nfs', 'cifs']])
+class SelfHealDaemonProcessTests(GlusterBaseClass):
+    """
+    SelfHealDaemonProcessTests contains tests which verifies the
+    self-heal daemon process of the nodes
+    """
+    @classmethod
+    def setUpClass(cls):
+        """
+        setup volume, mount volume and initialize necessary variables
+        which is used in tests
+        """
+
+        # calling GlusterBaseClass setUpClass
+        GlusterBaseClass.setUpClass.im_func(cls)
+
+        # Setup Volume and Mount Volume
+        g.log.info("Starting to Setup Volume and Mount Volume")
+        ret = cls.setup_volume_and_mount_volume(mounts=cls.mounts)
+        if not ret:
+            raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+        g.log.info("Successful in Setup Volume and Mount Volume")
+
+        # Verfiy glustershd process releases its parent process
+        ret = is_shd_daemonized(cls.servers)
+        if not ret:
+            raise ExecutionError("Self Heal Daemon process was still"
+                                 " holding parent process.")
+        g.log.info("Self Heal Daemon processes are online")
+
+        cls.GLUSTERSHD = "/var/lib/glusterd/glustershd/glustershd-server.vol"
+
+    def setUp(self):
+        """
+        setUp method for every test
+        """
+
+        # calling GlusterBaseClass setUp
+        GlusterBaseClass.setUp.im_func(self)
+
+    def tearDown(self):
+        """
+        tearDown for every test
+        """
+
+        # Calling GlusterBaseClass tearDown
+        GlusterBaseClass.tearDown.im_func(self)
+
+    @classmethod
+    def tearDownClass(cls):
+        """
+        Clean up the volume and umount volume from client
+        """
+
+        # stopping the volume
+        g.log.info("Starting to Unmount Volume and Cleanup Volume")
+        ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts)
+        if not ret:
+            raise ExecutionError("Failed to Unmount Volume and Cleanup Volume")
+        g.log.info("Successful in Unmount Volume and Cleanup Volume")
+
+        # calling GlusterBaseClass tearDownClass
+        GlusterBaseClass.tearDownClass.im_func(cls)
+
+    def test_glustershd_with_add_remove_brick(self):
+        """
+        Test script to verify glustershd process with adding and
+        removing bricks
+
+        * check glustershd process - only 1 glustershd process should
+          be running
+        * bricks must be present in glustershd-server.vol file for
+          the replicated involved volumes
+        * Add bricks
+        * check glustershd process - only 1 glustershd process should
+          be running and its should be different from previous one
+        * bricks which are added must present in glustershd-server.vol file
+        * remove bricks
+        * check glustershd process - only 1 glustershd process should
+          be running and its different from previous one
+        * bricks which are removed should not present
+          in glustershd-server.vol file
+
+        """
+
+        nodes = self.volume['servers']
+        bricks_list = []
+        glustershd_pids = {}
+
+        # check the self-heal daemon process
+        g.log.info("Starting to get self-heal daemon process on "
+                   "nodes %s" % nodes)
+        ret, pids = get_self_heal_daemon_pid(nodes)
+        self.assertTrue(ret, ("Either No self heal daemon process found or "
+                              "more than One self heal daemon process "
+                              "found : %s" % pids))
+        g.log.info("Successful in getting Single self heal daemon process"
+                   " on all nodes %s", nodes)
+        glustershd_pids = pids
+
+        # get the bricks for the volume
+        g.log.info("Fetching bricks for the volume : %s" % self.volname)
+        bricks_list = get_all_bricks(self.mnode, self.volname)
+        g.log.info("Brick List : %s" % bricks_list)
+
+        # validate the bricks present in volume info with
+        # glustershd server volume file
+        g.log.info("Starting parsing file %s on "
+                   "node %s" % (self.GLUSTERSHD, self.mnode))
+        ret = do_bricks_exist_in_shd_volfile(self.mnode, self.volname,
+                                             bricks_list)
+        self.assertTrue(ret, ("Brick List from volume info is different "
+                              "from glustershd server volume file. "
+                              "Please check log file for details"))
+        g.log.info("Successfully parsed %s file" % self.GLUSTERSHD)
+
+        # expanding volume
+        g.log.info("Start adding bricks to volume %s" % self.volname)
+        ret = expand_volume(self.mnode, self.volname, self.servers,
+                            self.all_servers_info)
+        self.assertTrue(ret, ("Failed to add bricks to "
+                              "volume %s " % self.volname))
+        g.log.info("Add brick successfull")
+
+        # Log Volume Info and Status after expanding the volume
+        g.log.info("Logging volume info and Status after expanding volume")
+        ret = log_volume_info_and_status(self.mnode, self.volname)
+        self.assertTrue(ret, ("Logging volume info and status failed "
+                              "on volume %s", self.volname))
+        g.log.info("Successful in logging volume info and status "
+                   "of volume %s", self.volname)
+
+        # Verify volume's all process are online for 60 sec
+        g.log.info("Verifying volume's all process are online")
+        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname,
+                                                   60)
+        self.assertTrue(ret, ("Volume %s : All process are not "
+                              "online", self.volname))
+        g.log.info("Successfully Verified volume %s processes are online",
+                   self.volname)
+
+        # Start Rebalance
+        g.log.info("Starting Rebalance on the volume")
+        ret, out, err = rebalance_start(self.mnode, self.volname)
+        self.assertEqual(ret, 0, ("Failed to start rebalance on "
+                                  "the volume %s with error %s" %
+                                  (self.volname, err)))
+        g.log.info("Successfully started rebalance on the "
+                   "volume %s", self.volname)
+
+        # Log Rebalance status
+        g.log.info("Log Rebalance status")
+        _, _, _ = rebalance_status(self.mnode, self.volname)
+
+        # Wait for rebalance to complete
+        g.log.info("Waiting for rebalance to complete")
+        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
+        self.assertTrue(ret, ("Rebalance is not yet complete "
+                              "on the volume %s", self.volname))
+        g.log.info("Rebalance is successfully complete on "
+                   "the volume %s", self.volname)
+
+        # Check Rebalance status after rebalance is complete
+        g.log.info("Checking Rebalance status")
+        ret, _, _ = rebalance_status(self.mnode, self.volname)
+        self.assertEqual(ret, 0, ("Failed to get rebalance status for "
+                                  "the volume %s", self.volname))
+        g.log.info("Successfully got rebalance status of the "
+                   "volume %s", self.volname)
+
+        # Check the self-heal daemon process after adding bricks
+        g.log.info("Starting to get self-heal daemon process on "
+                   "nodes %s" % nodes)
+        glustershd_pids_after_expanding = {}
+        ret, pids = get_self_heal_daemon_pid(nodes)
+        self.assertTrue(ret, ("Either No self heal daemon process found or "
+                              "more than One self heal daemon process found"))
+        g.log.info("Successfull in getting self-heal daemon process "
+                   "on nodes %s" % nodes)
+
+        glustershd_pids_after_expanding = pids
+        g.log.info("Self Heal Daemon Process ID's afetr expanding "
+                   "volume: %s" % glustershd_pids_after_expanding)
+
+        self.assertNotEqual(glustershd_pids,
+                            glustershd_pids_after_expanding,
+                            "Self Daemon process is same before and"
+                            " after adding bricks")
+        g.log.info("Self Heal Daemon Process is different before and "
+                   "after adding bricks")
+
+        # get the bricks for the volume after expanding
+        bricks_list_after_expanding = get_all_bricks(self.mnode, self.volname)
+        g.log.info("Brick List after expanding "
+                   "volume: %s" % bricks_list_after_expanding)
+
+        # validate the bricks present in volume info
+        # with glustershd server volume file after adding bricks
+        g.log.info("Starting parsing file %s" % self.GLUSTERSHD)
+        ret = do_bricks_exist_in_shd_volfile(self.mnode, self.volname,
+                                             bricks_list_after_expanding)
+
+        self.assertTrue(ret, ("Brick List from volume info is different "
+                              "from glustershd server volume file after "
+                              "expanding bricks. Please check log file "
+                              "for details"))
+        g.log.info("Successfully parsed %s file" % self.GLUSTERSHD)
+
+        # shrink the volume
+        g.log.info("Starting volume shrink")
+        ret = shrink_volume(self.mnode, self.volname)
+        self.assertTrue(ret, ("Failed to shrink the volume on "
+                              "volume %s", self.volname))
+        g.log.info("Shrinking volume is successful on "
+                   "volume %s", self.volname)
+
+        # Log Volume Info and Status after shrinking the volume
+        g.log.info("Logging volume info and Status after shrinking volume")
+        ret = log_volume_info_and_status(self.mnode, self.volname)
+        self.assertTrue(ret, ("Logging volume info and status failed on "
+                              "volume %s", self.volname))
+        g.log.info("Successful in logging volume info and status "
+                   "of volume %s", self.volname)
+
+        # get the bricks after shrinking the volume
+        bricks_list_after_shrinking = get_all_bricks(self.mnode, self.volname)
+        g.log.info("Brick List after shrinking "
+                   "volume: %s" % bricks_list_after_shrinking)
+
+        self.assertEqual(len(bricks_list_after_shrinking), len(bricks_list),
+                         "Brick Count is mismatched after "
+                         "shrinking the volume %s" % self.volname)
+        g.log.info("Brick Count matched before before expanding "
+                   "and after shrinking volume")
+
+        # Verfiy glustershd process releases its parent process
+        ret = is_shd_daemonized(nodes)
+        self.assertTrue(ret, ("Either No self heal daemon process found or "
+                              "more than One self heal daemon process found"))
+
+        # check the self-heal daemon process after removing bricks
+        g.log.info("Starting to get self-heal daemon process "
+                   "on nodes %s" % nodes)
+        glustershd_pids_after_shrinking = {}
+        ret, pids = get_self_heal_daemon_pid(nodes)
+        glustershd_pids_after_shrinking = pids
+        self.assertNotEqual(glustershd_pids_after_expanding,
+                            glustershd_pids_after_shrinking,
+                            "Self Heal Daemon process is same "
+                            "after adding bricks and shrinking volume")
+        g.log.info("Self Heal Daemon Process is different after adding bricks "
+                   "and shrinking volume")
+
+        # validate bricks present in volume info
+        # with glustershd server volume file after removing bricks
+        g.log.info("Starting parsing file %s" % self.GLUSTERSHD)
+        ret = do_bricks_exist_in_shd_volfile(self.mnode, self.volname,
+                                             bricks_list_after_shrinking)
+        self.assertTrue(ret, ("Brick List from volume info is different "
+                              "from glustershd server volume file after "
+                              "removing bricks. Please check log file "
+                              "for details"))
+        g.log.info("Successfully parsed %s file" % self.GLUSTERSHD)
-- 
cgit