summaryrefslogtreecommitdiffstats
path: root/tests/functional/dht/test_remove_brick_no_commit_followed_by_rebalance.py
blob: dc80a35446aff74171d53dea92eb1ef9d13535e8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#  Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License along
#  with this program; if not, write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

from time import sleep
from glusto.core import Glusto as g
from glustolibs.gluster.exceptions import ExecutionError
from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
from glustolibs.io.utils import collect_mounts_arequal, validate_io_procs
from glustolibs.misc.misc_libs import upload_scripts
from glustolibs.gluster.volume_libs import (form_bricks_list_to_remove_brick,
                                            expand_volume)
from glustolibs.gluster.brick_ops import remove_brick
from glustolibs.gluster.rebalance_ops import (rebalance_start,
                                              wait_for_rebalance_to_complete)


@runs_on([['distributed', 'distributed-replicated',
           'distributed-dispersed', 'distributed-arbiter'],
          ['glusterfs']])
class TestRemoveBrickNoCommitFollowedByRebalance(GlusterBaseClass):
    @classmethod
    def setUpClass(cls):
        cls.get_super_method(cls, 'setUpClass')()

        # Upload io scripts for running IO on mounts
        cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
                                  "file_dir_ops.py")
        ret = upload_scripts(cls.clients, cls.script_upload_path)
        if not ret:
            raise ExecutionError("Failed to upload IO scripts "
                                 "to clients %s" % cls.clients)
        g.log.info("Successfully uploaded IO scripts to clients %s",
                   cls.clients)

    def setUp(self):
        """
        Setup and mount volume or raise ExecutionError
        """
        # Calling GlusterBaseClass setUp
        self.get_super_method(self, 'setUp')()

        # Setup Volume
        ret = self.setup_volume_and_mount_volume([self.mounts[0]])
        if not ret:
            raise ExecutionError("Failed to Setup and Mount Volume")

    def tearDown(self):

        # Unmount and cleanup original volume
        ret = self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]])
        if not ret:
            raise ExecutionError("Failed to umount the vol & cleanup Volume")
        g.log.info("Successful in umounting the volume and Cleanup")

        # Calling GlusterBaseClass tearDown
        self.get_super_method(self, 'tearDown')()

    def test_remove_brick_no_commit_followed_by_rebalance(self):
        """
        Description: Tests to check that there is no data loss when
                     remove-brick operation is stopped and then new bricks
                     are added to the volume.
         Steps :
         1) Create a volume.
         2) Mount the volume using FUSE.
         3) Create files and dirs on the mount-point.
         4) Calculate the arequal-checksum on the mount-point
         5) Start remove-brick operation on the volume.
         6) While migration is in progress, stop the remove-brick
            operation.
         7) Add-bricks to the volume and trigger rebalance.
         8) Wait for rebalance to complete.
         9) Calculate the arequal-checksum on the mount-point.
         """
        # Start IO on mounts
        m_point = self.mounts[0].mountpoint
        cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
               "--dir-length 10 --dir-depth 2 --max-num-of-dirs 1 "
               "--num-of-files 50 --file-type empty-file %s" % (
                   self.script_upload_path, m_point))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd, user=self.mounts[0].user)
        g.log.info("IO on %s:%s is started successfully",
                   self.mounts[0].client_system, m_point)

        # Validate IO
        self.assertTrue(
            validate_io_procs([proc], self.mounts[0]),
            "IO failed on some of the clients"
        )

        # Calculate arequal-checksum before starting remove-brick
        ret, arequal_before = collect_mounts_arequal(self.mounts[0])
        self.assertTrue(ret, "Collecting arequal-checksum failed")

        # Form bricks list for volume shrink
        remove_brick_list = form_bricks_list_to_remove_brick(
            self.mnode, self.volname, subvol_name=1)
        self.assertIsNotNone(remove_brick_list, ("Volume %s: Failed to "
                                                 "form bricks list for "
                                                 "shrink", self.volname))
        g.log.info("Volume %s: Formed bricks list for shrink", self.volname)

        # Shrink volume by removing bricks
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 remove_brick_list, "start")
        self.assertEqual(ret, 0, ("Volume %s shrink failed ",
                                  self.volname))
        g.log.info("Volume %s shrink started ", self.volname)

        # Log remove-brick status
        ret, out, _ = remove_brick(self.mnode, self.volname,
                                   remove_brick_list, "status")
        self.assertEqual(ret, 0, ("Remove-brick status failed on %s ",
                                  self.volname))

        # Check if migration is in progress
        if r'in progress' in out:
            # Stop remove-brick process
            g.log.info("Stop removing bricks from volume")
            ret, out, _ = remove_brick(self.mnode, self.volname,
                                       remove_brick_list, "stop")
            self.assertEqual(ret, 0, "Failed to stop remove-brick process")
            g.log.info("Stopped remove-brick process successfully")
        else:
            g.log.error("Migration for remove-brick is complete")

        # Sleep for 30 secs so that any running remove-brick process stops
        sleep(30)

        # Add bricks to the volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Volume %s: Add-brick failed", self.volname))
        g.log.info("Volume %s: Add-brick successful", self.volname)

        # Tigger rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Volume %s: Failed to start rebalance",
                                  self.volname))
        g.log.info("Volume %s: Rebalance started ", self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, "Rebalance has not completed")
        g.log.info("Rebalance has completed successfully")

        # Calculate arequal-checksum on mount-point
        ret, arequal_after = collect_mounts_arequal(self.mounts[0])
        self.assertTrue(ret, "Collecting arequal-checksum failed")

        # Check if there is any data loss
        self.assertEqual(set(arequal_before), set(arequal_after),
                         ("There is data loss"))
        g.log.info("The checksum before and after rebalance is same."
                   " There is no data loss.")