summaryrefslogtreecommitdiffstats
path: root/tests/functional/disperse/test_ec_io_hang_clientside_heal.py
blob: 974170930ba9aac491e80301e60fbc784d14a4da (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#  Copyright (C) 2018  Red Hat, Inc. <http://www.redhat.com>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License along
#  with this program; if not, write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

"""
ec_io_hang_during_clientside_heal:
    Disable server side heal.
    Perform IO on mount point and kill some bricks and bring them up.
    Check that the heal should complete via client side heal and it
    should not hang any IO.
"""
from glusto.core import Glusto as g
from glustolibs.gluster.exceptions import ExecutionError
from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on)
from glustolibs.gluster.brick_libs import (
    bring_bricks_offline,
    bring_bricks_online,
    are_bricks_online,
    get_all_bricks)
from glustolibs.gluster.volume_libs import (
    log_volume_info_and_status)
from glustolibs.gluster.heal_ops import (
    disable_heal)
from glustolibs.gluster.heal_libs import (
    monitor_heal_completion)
from glustolibs.gluster.heal_ops import get_heal_info
from glustolibs.gluster.glusterfile import set_file_permissions


def ec_check_heal_comp(self):
    g.log.info("Get the pending heal info for the volume %s",
               self.volname)
    heal_info = get_heal_info(self.mnode, self.volname)
    g.log.info("Successfully got heal info for the volume %s",
               self.volname)
    g.log.info("Heal Entries %s : %s", self.volname, heal_info)

    # Monitor heal completion
    ret = monitor_heal_completion(self.mnode, self.volname)
    self.assertTrue(ret, 'Heal has not yet completed')


@runs_on([['dispersed'], ['glusterfs']])
class EcClientHealHangTest(GlusterBaseClass):
    # Method to setup the environment for test case
    def setUp(self):
        # Setup Volume and Mount Volume
        g.log.info("Starting to Setup Volume and Mount Volume")
        ret = self.setup_volume_and_mount_volume(mounts=self.mounts,
                                                 volume_create_force=True)
        if not ret:
            raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
        g.log.info("Successful in Setup Volume and Mount Volume")

    # Test Case
    def test_heal_client_io_hang(self):
        mountpoint = self.mounts[0].mountpoint

        # disable server side heal
        ret = disable_heal(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to disable server side heal"))
        g.log.info("Successfully disabled server side heal")

        # Log Volume Info and Status after disabling client side heal
        g.log.info("Logging volume info and status")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed "
                              "on volume %s", self.volname))

        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the bricks list")

        # Create files
        cmd = ("cd %s; mkdir test; cd test; for i in `seq 1 100` ;"
               "do touch file$i; done" % mountpoint)

        ret, _, err = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Finished creating files while all the bricks are UP')

        # Bring bricks offline
        ret = bring_bricks_offline(self.volname, bricks_list[0:1])
        self.assertTrue(ret, "Failed to bring down the bricks")
        g.log.info("Successfully brought the bricks down")

        # Start pumping IO from client
        cmd = ("cd %s; mkdir test; cd test; for i in `seq 1 100` ;"
               "do dd if=/dev/urandom of=file$i bs=1M "
               "count=5;done" % mountpoint)

        ret, _, err = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Finished writing on files while a brick is DOWN')

        # Bring bricks online
        ret = bring_bricks_online(self.mnode, self.volname, bricks_list[0:1])
        self.assertTrue(ret, "Failed to bring up the bricks")
        g.log.info("Successfully brought the bricks up")

        # Verifying all bricks online
        ret = are_bricks_online(self.mnode, self.volname, bricks_list)
        self.assertTrue(ret, "All bricks are not online")

        # Start client side heal by reading/writing files and directories
        appendcmd = ("cd %s; mkdir test; cd test; for i in `seq 1 100` ;"
                     "do dd if=/dev/urandom of=file$i bs=1M "
                     "count=1 oflag=append conv=notrunc;done" % mountpoint)

        readcmd = ("cd %s; mkdir test; cd test; for i in `seq 1 100` ;"
                   "do dd if=file$i of=/dev/null bs=1M "
                   "count=5;done" % mountpoint)
        ret = set_file_permissions(self.mounts[0].client_system,
                                   "%s/test" % mountpoint, 777)
        self.assertTrue(ret, "Failed to set permission for directory")
        g.log.info("Successfully set permissions for directory")

        ret, _, err = g.run(self.mounts[0].client_system, appendcmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Finished append on files after bringing bricks online')

        ret, _, err = g.run(self.mounts[0].client_system, readcmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Finished read on files after bringing bricks online')

        # check the heal info and completion
        ec_check_heal_comp(self)

        # Log Volume Info and Status after bringing the brick up
        g.log.info("Logging volume info and status")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed "
                              "on volume %s", self.volname))
        g.log.info("Successful in logging volume info and status "
                   "of volume %s", self.volname)

    # Method to cleanup test setup
    def tearDown(self):
        # Stopping the volume
        g.log.info("Starting to Unmount Volume and Cleanup Volume")
        ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
        if not ret:
            raise ExecutionError("Failed to Unmount Volume and Cleanup Volume")
        g.log.info("Successful in Unmount Volume and Cleanup Volume")