tests/functional/afr/test_heal_split_brain_command.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264

#  Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License along
#  with this program; if not, write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

from glusto.core import Glusto as g

from glustolibs.gluster.brick_libs import (bring_bricks_offline,
                                           bring_bricks_online, get_all_bricks)
from glustolibs.gluster.exceptions import ExecutionError
from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
from glustolibs.gluster.glusterdir import mkdir
from glustolibs.gluster.glusterfile import get_fattr
from glustolibs.gluster.heal_libs import is_volume_in_split_brain
from glustolibs.gluster.heal_ops import heal_info, heal_info_split_brain
from glustolibs.gluster.volume_ops import set_volume_options


# pylint: disable=too-many-locals, too-many-statements
@runs_on([['arbiter', 'replicated'], ['glusterfs']])
class TestSplitBrain(GlusterBaseClass):
    def setUp(self):
        self.get_super_method(self, 'setUp')()

        # A single mount is enough for all the tests
        self.mounts = [self.mounts[0]]

        if not self.setup_volume_and_mount_volume(mounts=self.mounts):
            raise ExecutionError('Failed to setup and mount '
                                 '{}'.format(self.volname))

    def tearDown(self):
        if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts):
            raise ExecutionError('Not able to unmount and cleanup '
                                 '{}'.format(self.volname))
        self.get_super_method(self, 'tearDown')()

    def _run_cmd_and_validate(self, client, cmd, paths):
        """
        Run `cmd` from `paths` on `client`
        """
        for path in paths:
            ret, _, _ = g.run(client, cmd % path)
            self.assertEqual(
                ret, 0, 'Unable to perform `{}` from `{}` on `{}`'.format(
                    cmd, path, client))

    @staticmethod
    def _transform_gfids(gfids):
        """
        Returns  list of `gfids` joined by `-` at required places

        Example of one elemnt:
        Input:   0xd4653ea0289548eb81b35c91ffb73eff
        Returns: d4653ea0-2895-48eb-81b3-5c91ffb73eff
        """
        split_pos = [10, 14, 18, 22]
        rout = []
        for gfid in gfids:
            rout.append('-'.join(
                gfid[start:stop]
                for start, stop in zip([2] + split_pos, split_pos + [None])))
        return rout

    def test_split_brain_from_heal_command(self):
        """
        Description: Simulate and validate data, metadata and entry split brain

        Steps:
        - Create and mount a replicated volume and disable quorum, self-heal
          deamon
        - Create ~10 files from the mount point and simulate data, metadata
          split-brain for 2 files each
        - Create a dir with some files and simulate entry/gfid split brain
        - Validate volume successfully recognizing split-brain
        - Validate a lookup on split-brain files fails with EIO error on mount
        - Validate `heal info` and `heal info split-brain` command shows only
          the files that are in split-brain
        - Validate new files and dir's can be created from the mount
        """
        io_cmd = 'cat /dev/urandom | tr -dc [:space:][:print:] | head -c '
        client, m_point = (self.mounts[0].client_system,
                           self.mounts[0].mountpoint)
        arbiter = self.volume_type.find('arbiter') >= 0

        # Disable self-heal daemon and set `quorum-type` option to `none`
        ret = set_volume_options(self.mnode, self.volname, {
            'self-heal-daemon': 'off',
            'cluster.quorum-type': 'none'
        })
        self.assertTrue(
            ret, 'Not able to disable `quorum-type` and '
            '`self-heal` daemon volume options')

        # Create required dir's from the mount
        fqpath = '{}/dir'.format(m_point)
        file_io = ('cd %s; for i in {1..6}; do ' + io_cmd +
                   ' 2M > file$i; done;')
        file_cmd = 'cd %s; touch file{7..10}'
        ret = mkdir(client, fqpath)
        self.assertTrue(ret, 'Unable to create a directory from mount point')

        # Create empty files and data files
        for cmd in (file_io, file_cmd):
            self._run_cmd_and_validate(client, cmd, [m_point, fqpath])

        all_bricks = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(
            all_bricks, 'Unable to get list of bricks '
            'associated with the volume')

        # Data will be appended to the files `file1, file2` resulting in data
        # split brain
        data_split_cmd = ';'.join(io_cmd + '2M >> ' + each_file
                                  for each_file in ('file1', 'file2'))

        # File permissions will be changed for `file4, file5` to result in
        # metadata split brain
        meta_split_cmd = ';'.join('chmod 0555 ' + each_file
                                  for each_file in ('file4', 'file5'))

        # Files will be deleted and created with data to result in data,
        # metadata split brain on files and entry(gfid) split brain on dir
        entry_split_cmd = ';'.join('rm -f ' + each_file + ' && ' + io_cmd +
                                   ' 2M > ' + each_file
                                   for each_file in ('dir/file1', 'dir/file2'))

        # Need to always select arbiter(3rd) brick if volume is arbiter type or
        # any two bricks for replicated volume
        for bricks in zip(all_bricks, all_bricks[1:] + [all_bricks[0]]):

            # Skip iteration if volume type is arbiter and `bricks` doesn't
            # contain arbiter brick
            if arbiter and (all_bricks[-1] not in bricks):
                continue

            # Bring bricks offline
            ret = bring_bricks_offline(self.volname, list(bricks))
            self.assertTrue(ret, 'Unable to bring {} offline'.format(bricks))

            # Run cmd to bring files into split brain
            for cmd, msg in ((data_split_cmd, 'data'),
                             (meta_split_cmd, 'meta'), (entry_split_cmd,
                                                        'entry')):
                ret, _, _ = g.run(client, 'cd {}; {}'.format(m_point, cmd))
                self.assertEqual(
                    ret, 0, 'Unable to run cmd for bringing files '
                    'into {} split brain'.format(msg))

            # Bring offline bricks online
            ret = bring_bricks_online(
                self.mnode,
                self.volname,
                bricks,
                bring_bricks_online_methods='volume_start_force')
            self.assertTrue(ret, 'Unable to bring {} online'.format(bricks))

        # Validate volume is in split-brain
        self.assertTrue(is_volume_in_split_brain(self.mnode, self.volname),
                        'Volume should be in split-brain')

        # Validate `head` lookup on split brain files fails with EIO
        for each_file in ('file1', 'file2', 'file4', 'file5', 'dir/file1',
                          'dir/file2'):
            ret, _, err = g.run(client,
                                'cd {}; head {}'.format(m_point, each_file))
            self.assertNotEqual(
                ret, 0, 'Lookup on split-brain file {} should '
                'fail'.format(each_file))
            self.assertIn(
                'Input/output error', err,
                'File {} should result in EIO error'.format(each_file))

        # Validate presence of split-brain files and absence of other files in
        # `heal info` and `heal info split-brain` commands
        ret, info, _ = heal_info(self.mnode, self.volname)
        self.assertEqual(ret, 0, 'Unable to query for `heal info`')
        ret, info_spb, _ = heal_info_split_brain(self.mnode, self.volname)
        self.assertEqual(ret, 0, 'Unable to query for `heal info split-brain`')

        # Collect `gfid's` of files in data and metadata split-brain
        common_gfids = []
        host, path = all_bricks[0].split(':')
        for each_file in ('file1', 'file2', 'file4', 'file5', 'dir'):
            fattr = get_fattr(host, path + '/{}'.format(each_file),
                              'trusted.gfid')
            self.assertIsNotNone(
                fattr, 'Unable to get `gfid` for {}'.format(each_file))
            common_gfids.append(fattr)

        # GFID for files under an entry split brain dir differs from it's peers
        uniq_gfids = []
        for brick in all_bricks[:-1] if arbiter else all_bricks:
            host, path = brick.split(':')
            for each_file in ('dir/file1', 'dir/file2'):
                fattr = get_fattr(host, path + '/{}'.format(each_file),
                                  'trusted.gfid')
                self.assertIsNotNone(
                    fattr, 'Unable to get `gfid` for {}'.format(each_file))
                uniq_gfids.append(fattr)

        # Transform GFIDs to match against o/p of `heal info` and `split-brain`
        common_gfids[:] = self._transform_gfids(common_gfids)
        uniq_gfids[:] = self._transform_gfids(uniq_gfids)

        # Just enough validation by counting occurences asserting success
        common_files = ['/file1 -', '/file2 -', '/file4', '/file5', '/dir ']
        uniq_files = ['/dir/file1', '/dir/file2']

        # Common files should occur 3 times each in `heal info` and
        # `heal info split-brain` or 2 times for arbiter
        occur = 2 if arbiter else 3
        for each_file, gfid in zip(common_files, common_gfids):

            # Check against `heal info` cmd
            self.assertEqual(
                info.count(gfid) + info.count(each_file), occur,
                'File {} with gfid {} should exist in `heal info` '
                'command'.format(each_file[:6], gfid))

            # Check against `heal info split-brain` cmd
            self.assertEqual(
                info_spb.count(gfid) + info_spb.count(each_file[:6].rstrip()),
                occur, 'File {} with gfid {} should exist in `heal info '
                'split-brain` command'.format(each_file[:6], gfid))

        # Entry split files will be listed only in `heal info` cmd
        for index, each_file in enumerate(uniq_files):

            # Collect file and it's associated gfid's
            entries = (uniq_files + uniq_gfids)[index::2]
            count = sum(info.count(entry) for entry in entries)
            self.assertEqual(
                count, occur, 'Not able to find existence of '
                'entry split brain file {} in `heal info`'.format(each_file))

        # Assert no other file is counted as in split-brain
        for cmd, rout, exp_str in (('heal info', info, 'entries: 7'),
                                   ('heal info split-brain', info_spb,
                                    'split-brain: 5')):
            self.assertEqual(
                rout.count(exp_str), occur, 'Each node should '
                'list only {} entries in {} command'.format(exp_str[-1], cmd))

        # Validate new files and dir can be created from mount
        fqpath = '{}/temp'.format(m_point)
        ret = mkdir(client, fqpath)
        self.assertTrue(
            ret, 'Unable to create a dir from mount post split-brain of files')
        for cmd in (file_io, file_cmd):
            self._run_cmd_and_validate(client, cmd, [fqpath])

        g.log.info('Pass: Validated data, metadata and entry split brain')