From e328562b44ba9643231bc206652e56b331684536 Mon Sep 17 00:00:00 2001 From: nchilaka Date: Fri, 28 Feb 2020 17:52:04 +0530 Subject: [Test]: check peer probe behavior when glusterd is down BZ#1257394 - Provide meaningful errors on peer probe and peer detach Test Steps: 1 check the current peer status 2 detach one of the valid nodes which is already part of cluster 3 stop glusterd on that node 4 try to attach above node to cluster, which must fail with Transport End point error 5 Recheck the test using hostname, expected to see same result 6 start glusterd on that node 7 halt/reboot the node 8 try to peer probe the halted node, which must fail again. 9 The only error accepted is as below "peer probe: failed: Probe returned with Transport endpoint is not connected" 10 Check peer status and make sure no other nodes in peer reject state Change-Id: Ic0a083d5cb150275e927723d960e89fe1a5528fb Signed-off-by: nchilaka --- .../glusterd/test_probe_glusterd_down.py | 160 +++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 tests/functional/glusterd/test_probe_glusterd_down.py (limited to 'tests/functional') diff --git a/tests/functional/glusterd/test_probe_glusterd_down.py b/tests/functional/glusterd/test_probe_glusterd_down.py new file mode 100644 index 000000000..3705904a9 --- /dev/null +++ b/tests/functional/glusterd/test_probe_glusterd_down.py @@ -0,0 +1,160 @@ +# Copyright (C) 2020 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from time import sleep + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.peer_ops import peer_probe +from glustolibs.gluster.lib_utils import is_core_file_created +from glustolibs.gluster.peer_ops import peer_detach, is_peer_connected +from glustolibs.gluster.gluster_init import (stop_glusterd, start_glusterd, + wait_for_glusterd_to_start) +from glustolibs.misc.misc_libs import are_nodes_online + + +class PeerProbeWhenGlusterdDown(GlusterBaseClass): + + def test_peer_probe_when_glusterd_down(self): + # pylint: disable=too-many-statements + ''' + Test script to verify the behavior when we try to peer + probe a valid node whose glusterd is down + Also post validate to make sure no core files are created + under "/", /var/log/core and /tmp directory + + Ref: BZ#1257394 Provide meaningful error on peer probe and peer detach + Test Steps: + 1 check the current peer status + 2 detach one of the valid nodes which is already part of cluster + 3 stop glusterd on that node + 4 try to attach above node to cluster, which must fail with + Transport End point error + 5 Recheck the test using hostname, expected to see same result + 6 start glusterd on that node + 7 halt/reboot the node + 8 try to peer probe the halted node, which must fail again. + 9 The only error accepted is + "peer probe: failed: Probe returned with Transport endpoint is not + connected" + 10 Check peer status and make sure no other nodes in peer reject state + ''' + + ret, test_timestamp, _ = g.run_local('date +%s') + test_timestamp = test_timestamp.strip() + + # detach one of the nodes which is part of the cluster + g.log.info("detaching server %s ", self.servers[1]) + ret, _, err = peer_detach(self.mnode, self.servers[1]) + msg = 'peer detach: failed: %s is not part of cluster\n' \ + % self.servers[1] + if ret: + self.assertEqual(err, msg, "Failed to detach %s " + % (self.servers[1])) + + # bring down glusterd of the server which has been detached + g.log.info("Stopping glusterd on %s ", self.servers[1]) + ret = stop_glusterd(self.servers[1]) + self.assertTrue(ret, "Fail to stop glusterd on %s " % self.servers[1]) + + # trying to peer probe the node whose glusterd was stopped using its IP + g.log.info("Peer probing %s when glusterd down ", self.servers[1]) + ret, _, err = peer_probe(self.mnode, self.servers[1]) + self.assertNotEqual(ret, 0, "Peer probe should not pass when " + "glusterd is down") + self.assertEqual(err, "peer probe: failed: Probe returned with " + "Transport endpoint is not connected\n") + + # trying to peer probe the same node with hostname + g.log.info("Peer probing node %s using hostname with glusterd down ", + self.servers[1]) + hostname = g.run(self.servers[1], "hostname") + ret, _, err = peer_probe(self.mnode, hostname[1].strip()) + self.assertNotEqual(ret, 0, "Peer probe should not pass when " + "glusterd is down") + self.assertEqual(err, "peer probe: failed: Probe returned with" + " Transport endpoint is not connected\n") + + # start glusterd again for the next set of test steps + g.log.info("starting glusterd on %s ", self.servers[1]) + ret = start_glusterd(self.servers[1]) + self.assertTrue(ret, "glusterd couldn't start successfully on %s" + % self.servers[1]) + + # reboot a server and then trying to peer probe at the time of reboot + g.log.info("Rebooting %s and checking peer probe", self.servers[1]) + reboot = g.run_async(self.servers[1], "reboot") + + # Mandatory sleep for 3 seconds to make sure node is in halted state + sleep(3) + + # Peer probing the node using IP when it is still not online + g.log.info("Peer probing node %s which has been issued a reboot ", + self.servers[1]) + ret, _, err = peer_probe(self.mnode, self.servers[1]) + self.assertNotEqual(ret, 0, "Peer probe passed when it was expected to" + " fail") + self.assertEqual(err, "peer probe: failed: Probe returned with " + "Transport endpoint is not connected\n") + + # Peer probing the node using hostname when it is still not online + g.log.info("Peer probing node %s using hostname which is still " + "not online ", + self.servers[1]) + ret, _, err = peer_probe(self.mnode, hostname[1].strip()) + self.assertNotEqual(ret, 0, "Peer probe should not pass when node " + "has not come online") + self.assertEqual(err, "peer probe: failed: Probe returned with " + "Transport endpoint is not connected\n") + + ret, _, _ = reboot.async_communicate() + self.assertEqual(ret, 255, "reboot failed") + + # Validate if rebooted node is online or not + count = 0 + while count < 40: + sleep(15) + ret, _ = are_nodes_online(self.servers[1]) + if ret: + g.log.info("Node %s is online", self.servers[1]) + break + count += 1 + self.assertTrue(ret, "Node in test not yet online") + + # check if glusterd is running post reboot + ret = wait_for_glusterd_to_start(self.servers[1], + glusterd_start_wait_timeout=120) + self.assertTrue(ret, "Glusterd service is not running post reboot") + + # peer probe the node must pass + g.log.info("peer probing node %s", self.servers[1]) + ret, _, err = peer_probe(self.mnode, self.servers[1]) + self.assertEqual(ret, 0, "Peer probe has failed unexpectedly with " + "%s " % err) + + # checking if core file created in "/", "/tmp" and "/var/log/core" + ret = is_core_file_created(self.servers, test_timestamp) + self.assertTrue(ret, "core file found") + + def tearDown(self): + g.log.info("Peering any nodes which are not part of cluster as " + "part of cleanup") + for server in self.servers: + if not is_peer_connected(self.mnode, server): + ret, _, err = peer_probe(self.mnode, server) + if ret: + raise ExecutionError("Peer probe failed with %s " % err) -- cgit