summaryrefslogtreecommitdiffstats
path: root/tests/functional/glusterd/test_probe_glusterd_down.py
blob: 3705904a900f7f237fbf675bd58103ed37429119 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#  Copyright (C) 2020  Red Hat, Inc. <http://www.redhat.com>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License along
#  with this program; if not, write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

from time import sleep

from glusto.core import Glusto as g
from glustolibs.gluster.gluster_base_class import GlusterBaseClass
from glustolibs.gluster.exceptions import ExecutionError
from glustolibs.gluster.peer_ops import peer_probe
from glustolibs.gluster.lib_utils import is_core_file_created
from glustolibs.gluster.peer_ops import peer_detach, is_peer_connected
from glustolibs.gluster.gluster_init import (stop_glusterd, start_glusterd,
                                             wait_for_glusterd_to_start)
from glustolibs.misc.misc_libs import are_nodes_online


class PeerProbeWhenGlusterdDown(GlusterBaseClass):

    def test_peer_probe_when_glusterd_down(self):
        # pylint: disable=too-many-statements
        '''
        Test script to verify the behavior when we try to peer
        probe a valid node whose glusterd is down
        Also post validate to make sure no core files are created
        under "/", /var/log/core and /tmp  directory

        Ref: BZ#1257394 Provide meaningful error on peer probe and peer detach
        Test Steps:
        1 check the current peer status
        2 detach one of the valid nodes which is already part of cluster
        3 stop glusterd on that node
        4 try to attach above node to cluster, which must fail with
          Transport End point error
        5 Recheck the test using hostname, expected to see same result
        6 start glusterd on that node
        7 halt/reboot the node
        8 try to peer probe the halted node, which must fail again.
        9 The only error accepted is
          "peer probe: failed: Probe returned with Transport endpoint is not
          connected"
        10 Check peer status and make sure no other nodes in peer reject state
        '''

        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # detach one of the nodes which is part of the cluster
        g.log.info("detaching server %s ", self.servers[1])
        ret, _, err = peer_detach(self.mnode, self.servers[1])
        msg = 'peer detach: failed: %s is not part of cluster\n' \
              % self.servers[1]
        if ret:
            self.assertEqual(err, msg, "Failed to detach %s "
                             % (self.servers[1]))

        # bring down glusterd of the server which has been detached
        g.log.info("Stopping glusterd on %s ", self.servers[1])
        ret = stop_glusterd(self.servers[1])
        self.assertTrue(ret, "Fail to stop glusterd on %s " % self.servers[1])

        # trying to peer probe the node whose glusterd was stopped using its IP
        g.log.info("Peer probing %s when glusterd down ", self.servers[1])
        ret, _, err = peer_probe(self.mnode, self.servers[1])
        self.assertNotEqual(ret, 0, "Peer probe should not pass when "
                                    "glusterd is down")
        self.assertEqual(err, "peer probe: failed: Probe returned with "
                              "Transport endpoint is not connected\n")

        # trying to peer probe the same node with hostname
        g.log.info("Peer probing node %s using hostname with glusterd down ",
                   self.servers[1])
        hostname = g.run(self.servers[1], "hostname")
        ret, _, err = peer_probe(self.mnode, hostname[1].strip())
        self.assertNotEqual(ret, 0, "Peer probe should not pass when "
                                    "glusterd is down")
        self.assertEqual(err, "peer probe: failed: Probe returned with"
                              " Transport endpoint is not connected\n")

        # start glusterd again for the next set of test steps
        g.log.info("starting glusterd on %s ", self.servers[1])
        ret = start_glusterd(self.servers[1])
        self.assertTrue(ret, "glusterd couldn't start successfully on %s"
                        % self.servers[1])

        # reboot a server and then trying to peer probe at the time of reboot
        g.log.info("Rebooting %s and checking peer probe", self.servers[1])
        reboot = g.run_async(self.servers[1], "reboot")

        # Mandatory sleep for 3 seconds to make sure node is in halted state
        sleep(3)

        # Peer probing the node using IP when it is still not online
        g.log.info("Peer probing node %s which has been issued a reboot ",
                   self.servers[1])
        ret, _, err = peer_probe(self.mnode, self.servers[1])
        self.assertNotEqual(ret, 0, "Peer probe passed when it was expected to"
                                    " fail")
        self.assertEqual(err, "peer probe: failed: Probe returned with "
                              "Transport endpoint is not connected\n")

        # Peer probing the node using hostname when it is still not online
        g.log.info("Peer probing node %s using hostname which is still "
                   "not online ",
                   self.servers[1])
        ret, _, err = peer_probe(self.mnode, hostname[1].strip())
        self.assertNotEqual(ret, 0, "Peer probe should not pass when node "
                                    "has not come online")
        self.assertEqual(err, "peer probe: failed: Probe returned with "
                              "Transport endpoint is not connected\n")

        ret, _, _ = reboot.async_communicate()
        self.assertEqual(ret, 255, "reboot failed")

        # Validate if rebooted node is online or not
        count = 0
        while count < 40:
            sleep(15)
            ret, _ = are_nodes_online(self.servers[1])
            if ret:
                g.log.info("Node %s is online", self.servers[1])
                break
            count += 1
        self.assertTrue(ret, "Node in test not yet online")

        # check if glusterd is running post reboot
        ret = wait_for_glusterd_to_start(self.servers[1],
                                         glusterd_start_wait_timeout=120)
        self.assertTrue(ret, "Glusterd service is not running post reboot")

        # peer probe the node must pass
        g.log.info("peer probing node %s", self.servers[1])
        ret, _, err = peer_probe(self.mnode, self.servers[1])
        self.assertEqual(ret, 0, "Peer probe has failed unexpectedly with "
                                 "%s " % err)

        # checking if core file created in "/", "/tmp" and "/var/log/core"
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "core file found")

    def tearDown(self):
        g.log.info("Peering any nodes which are not part of cluster as "
                   "part of cleanup")
        for server in self.servers:
            if not is_peer_connected(self.mnode, server):
                ret, _, err = peer_probe(self.mnode, server)
                if ret:
                    raise ExecutionError("Peer probe failed with %s " % err)