summaryrefslogtreecommitdiffstats
path: root/tests/functional/glusterd/test_replace_brick_quorum_not_met.py
blob: da7f7d0ef6b506c83f6d4d90881c220384994636 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
#  Copyright (C) 2018  Red Hat, Inc. <http://www.redhat.com>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License along
#  with this program; if not, write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

# pylint: disable= too-many-statements
""" Description:
    Test replace brick when quorum not met
"""

import random
from time import sleep
from glusto.core import Glusto as g
from glustolibs.gluster.exceptions import ExecutionError
from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
from glustolibs.gluster.lib_utils import form_bricks_list
from glustolibs.gluster.volume_ops import (set_volume_options, volume_start,
                                           volume_create, get_volume_status,
                                           volume_reset)
from glustolibs.gluster.gluster_init import (stop_glusterd,
                                             is_glusterd_running,
                                             start_glusterd)
from glustolibs.gluster.brick_libs import (are_bricks_offline,
                                           are_bricks_online, get_all_bricks)
from glustolibs.gluster.brick_ops import replace_brick


@runs_on([['distributed-replicated'], ['glusterfs']])
class TestReplaceBrickWhenQuorumNotMet(GlusterBaseClass):

    def tearDown(self):
        """
        tearDown for every test
        """
        ret = is_glusterd_running(self.servers)
        if ret:
            ret = start_glusterd(self.servers)
            if not ret:
                raise ExecutionError("Glusterd not started on some of "
                                     "the servers")
        # checking for peer status from every node
        count = 0
        while count < 80:
            ret = self.validate_peers_are_connected()
            if ret:
                break
            sleep(2)
            count += 1

        if not ret:
            raise ExecutionError("Servers are not in peer probed state")

        # reset quorum ratio to default
        g.log.info("resetting quorum ratio")
        ret, _, _ = volume_reset(self.mnode, 'all')
        self.assertEqual(ret, 0, "Failed to reset quorum ratio")
        g.log.info("Successfully resetted quorum ratio")

        # stopping the volume and Cleaning up the volume
        ret = self.cleanup_volume()
        if not ret:
            raise ExecutionError("Failed Cleanup the Volume %s" % self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        # Removing brick directories
        if not self.replace_brick_failed:
            node, brick_path = self.random_brick.split(r':')
            cmd = "rm -rf " + brick_path
            ret, _, _ = g.run(node, cmd)
            if ret:
                raise ExecutionError("Failed to delete the brick dir's")

        # Calling GlusterBaseClass tearDown
        GlusterBaseClass.tearDown.im_func(self)

    def test_replace_brick_quorum(self):

        '''
        -> Create volume
        -> Set quorum type
        -> Set quorum ratio to 95%
        -> Start the volume
        -> Stop the glusterd on one node
        -> Now quorum is in not met condition
        -> Check all bricks went to offline or not
        -> Perform replace brick operation
        -> Start glusterd on same node which is already stopped
        -> Check all bricks are in online or not
        -> Verify in vol info that old brick not replaced with new brick
        '''

        # Forming brick list, 6 bricks for creating volume, 7th brick for
        # performing replace brick operation
        brick_list = form_bricks_list(self.mnode, self.volname, 7,
                                      self.servers, self.all_servers_info)

        # Create Volume
        ret, _, _ = volume_create(self.mnode, self.volname,
                                  brick_list[0:6], replica_count=3)
        self.assertEqual(ret, 0, "Failed to create volume %s" % self.volname)
        g.log.info("Volume created successfully %s", self.volname)

        # Enabling server quorum
        ret = set_volume_options(self.mnode, self.volname,
                                 {'cluster.server-quorum-type': 'server'})
        self.assertTrue(ret, "Failed to set server quorum on volume %s"
                        % self.volname)
        g.log.info("Able to set server quorum successfully on volume %s",
                   self.volname)

        # Setting Quorum ratio in percentage
        ret = set_volume_options(self.mnode, 'all',
                                 {'cluster.server-quorum-ratio': '95%'})
        self.assertTrue(ret, "Failed to set server quorum ratio on %s"
                        % self.servers)
        g.log.info("Able to set server quorum ratio successfully on %s",
                   self.servers)

        # Start the volume
        ret, _, _ = volume_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to start volume %s" % self.volname)
        g.log.info("Volume started successfully %s", self.volname)

        # Stop glusterd on one of the node
        random_server = random.choice(self.servers[1:])
        ret = stop_glusterd(random_server)
        self.assertTrue(ret, "Failed to stop glusterd for %s"
                        % random_server)
        g.log.info("Glusterd stopped successfully on server %s",
                   random_server)

        # Checking whether glusterd is running or not
        ret = is_glusterd_running(random_server)
        self.assertEqual(ret, 1, "Glusterd is still running on the node %s "
                                 "where glusterd stopped"
                         % random_server)
        g.log.info("Glusterd is not running on the server %s",
                   random_server)

        # Verifying node count in volume status after glusterd stopped
        # on one of the server, Its not possible to check the brick status
        # immediately in volume status after glusterd stop
        count = 0
        while count < 100:
            vol_status = get_volume_status(self.mnode, self.volname)
            servers_count = len(vol_status[self.volname].keys())
            if servers_count == 5:
                break
            sleep(2)
            count += 1

        # creating brick list from volume status
        offline_bricks = []
        vol_status = get_volume_status(self.mnode, self.volname)
        for node in vol_status[self.volname]:
            for brick_path in vol_status[self.volname][node]:
                if brick_path != 'Self-heal Daemon':
                    offline_bricks.append(':'.join([node, brick_path]))

        # Checking bricks are offline or not with quorum ratio(95%)
        ret = are_bricks_offline(self.mnode, self.volname, offline_bricks)
        self.assertTrue(ret, "Bricks are online when quorum is in not met "
                             "condition for %s" % self.volname)
        g.log.info("Bricks are offline when quorum is in not met "
                   "condition for %s", self.volname)

        # Getting random brick from offline brick list
        self.random_brick = random.choice(offline_bricks)

        # Performing replace brick commit force when quorum not met
        self.replace_brick_failed = False
        ret, _, _ = replace_brick(self.mnode, self.volname, self.random_brick,
                                  brick_list[6])
        self.assertNotEqual(ret, 0, "Replace brick should fail when quorum is "
                                    "in not met condition but replace brick "
                                    "success on %s" % self.volname)
        g.log.info("Failed to replace brick when quorum is in not met "
                   "condition %s", self.volname)
        self.replace_brick_failed = True

        # Start glusterd on one of the node
        ret = start_glusterd(random_server)
        self.assertTrue(ret, "Failed to start glusterd on server %s"
                        % random_server)
        g.log.info("Glusterd started successfully on server %s",
                   random_server)

        # Verifying node count in volume status after glusterd started
        # on one of the servers, Its not possible to check the brick status
        # immediately in volume status after glusterd start
        count = 0
        while count < 100:
            vol_status = get_volume_status(self.mnode, self.volname)
            servers_count = len(vol_status[self.volname].keys())
            if servers_count == 6:
                break
            sleep(2)
            count += 1

        # Checking bricks are online or not
        count = 0
        while count < 100:
            ret = are_bricks_online(self.mnode, self.volname,
                                    brick_list[0:6])
            if ret:
                break
            sleep(2)
            count += 1
        self.assertTrue(ret, "All bricks are not online for %s"
                        % self.volname)
        g.log.info("All bricks are online for volume %s", self.volname)

        # Comparing brick lists of before and after performing replace brick
        # operation
        after_brick_list = get_all_bricks(self.mnode, self.volname)
        self.assertListEqual(after_brick_list, brick_list[0:6],
                             "Bricks are not same before and after performing "
                             "replace brick operation for volume %s"
                             % self.volname)
        g.log.info("Bricks are same before and after performing replace "
                   "brick operation for volume %s", self.volname)