summaryrefslogtreecommitdiffstats
path: root/tests/functional/arbiter/test_split_brain.py
blob: e2684be49772954b36c9e60309e597642a3f1802 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#  Copyright (C) 2020  Red Hat, Inc. <http://www.redhat.com>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License along
#  with this program; if not, write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

# pylint: disable=too-many-statements, too-many-locals
from glusto.core import Glusto as g

from glustolibs.gluster.exceptions import ExecutionError
from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
from glustolibs.gluster.brick_libs import (bring_bricks_offline,
                                           bring_bricks_online,
                                           wait_for_bricks_to_be_online)
from glustolibs.misc.misc_libs import upload_scripts
from glustolibs.gluster.volume_ops import set_volume_options
from glustolibs.io.utils import validate_io_procs
from glustolibs.gluster.heal_libs import is_volume_in_split_brain
from glustolibs.gluster.volume_libs import get_subvols


@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']])
class TestSplitBrain(GlusterBaseClass):

    @classmethod
    def setUpClass(cls):

        # Calling GlusterBaseClass setUpClass
        cls.get_super_method(cls, 'setUpClass')()

        # Upload io scripts for running IO on mounts
        cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
                                  "file_dir_ops.py")
        ret = upload_scripts(cls.clients, cls.script_upload_path)
        if not ret:
            raise ExecutionError("Failed to upload IO scripts "
                                 "to clients %s" % cls.clients)

        # Setup Volume and Mount Volume
        ret = cls.setup_volume_and_mount_volume(cls.mounts, True)
        if not ret:
            raise ExecutionError("Failed to Setup_Volume and Mount_Volume")

    @classmethod
    def tearDownClass(cls):
        """
        Cleanup Volume
        """
        ret = cls.unmount_volume_and_cleanup_volume(cls.mounts)
        if not ret:
            raise ExecutionError("Failed to create volume")

        cls.get_super_method(cls, 'tearDownClass')()

    def _bring_bricks_online(self):
        """
        Bring bricks online and monitor heal completion
        """
        # Bring bricks online
        ret = bring_bricks_online(
            self.mnode,
            self.volname,
            self.bricks_to_bring_offline,
            bring_bricks_online_methods=['volume_start_force'])
        self.assertTrue(ret, 'Failed to bring bricks online')

        # Wait for volume processes to be online
        ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume {} processes to "
                              "be online".format(self.volname)))

    def test_split_brain(self):

        """
        Description: Create split-brain on files and check if IO's fail
        - Disable self-heal and cluster-quorum-type
        - Get the bricks from the volume
        - Write IO and validate IO
        - Bring 1st set of brick offline(1 Data brick and arbiter brick)
        - Write IO and validate IO
        - Bring 2nd set of bricks offline(1 Data brick and arbiter brick)
        - Write IO and validate IO
        - Check volume is in split-brain
        - Write IO and validate IO - should fail
        - Enable self-heal and cluster-quorum-type
        - Write IO and validate IO - should fail
        """
        # Disable self-heal and cluster-quorum-type
        options = {"self-heal-daemon": "off",
                   "cluster.quorum-type": "none"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set volume option %s for "
                              "volume %s" % (options, self.volname)))

        # Get the bricks from the volume
        sub_vols = get_subvols(self.mnode, self.volname)
        self.bricks_to_bring_offline = list(sub_vols['volume_subvols'][0])

        # Write IO's
        write_cmd = ("/usr/bin/env python %s create_files -f 1 "
                     "--base-file-name test_file --fixed-file-size 1k %s" %
                     (self.script_upload_path,
                      self.mounts[0].mountpoint))
        ret, _, _ = g.run(self.mounts[0].client_system, write_cmd)

        # Bring 1st set of brick offline(1 Data brick and arbiter brick)
        for bricks in ((0, -1), (1, -1)):
            down_bricks = []
            for brick in bricks:
                down_bricks.append(self.bricks_to_bring_offline[brick])
            ret = bring_bricks_offline(self.volname, down_bricks)
            self.assertTrue(ret, 'Failed to bring bricks {} offline'.
                            format(down_bricks))
            proc = g.run_async(self.mounts[0].client_system, write_cmd)

            # Validate I/O
            self.assertTrue(
                validate_io_procs([proc], self.mounts),
                "IO failed on some of the clients"
            )

            # Bring bricks online
            self._bring_bricks_online()

        # Check volume is in split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertTrue(ret, "unable to create split-brain scenario")
        g.log.info("Successfully created split brain scenario")

        # Write IO's
        proc2 = g.run_async(self.mounts[0].client_system, write_cmd)

        # Validate I/O
        self.assertFalse(
            validate_io_procs([proc2], self.mounts),
            "IO passed on split-brain"
        )
        g.log.info("Expected - IO's failed due to split-brain")

        # Enable self-heal and cluster-quorum-type
        options = {"self-heal-daemon": "on",
                   "cluster.quorum-type": "auto"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set volume option %s for "
                              "volume %s" % (options, self.volname)))

        # Write IO's
        proc3 = g.run_async(self.mounts[0].client_system, write_cmd)

        # Validate I/O
        self.assertFalse(
            validate_io_procs([proc3], self.mounts),
            "IO passed on split-brain"
        )
        g.log.info("Expected - IO's failed due to split-brain")