summaryrefslogtreecommitdiffstats
path: root/tests/functional/resource_leak/test_verify_gluster_memleak_with_management_encryption.py
blob: 25f8325dffdbbb7545046a8d3de1b84c25f2afcb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#  Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License along
#  with this program; if not, write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.


from datetime import datetime, timedelta
from glusto.core import Glusto as g
from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
from glustolibs.gluster.lib_utils import get_usable_size_per_disk
from glustolibs.gluster.volume_libs import (get_subvols, bulk_volume_creation,
                                            volume_stop, volume_start,
                                            set_volume_options)
from glustolibs.io.memory_and_cpu_utils import (
    wait_for_logging_processes_to_stop)
from glustolibs.gluster.brick_libs import get_all_bricks
from glustolibs.gluster.exceptions import ExecutionError
from glustolibs.io.utils import validate_io_procs
from glustolibs.gluster.brickmux_ops import (enable_brick_mux,
                                             disable_brick_mux,
                                             is_brick_mux_enabled)
from glustolibs.gluster.mount_ops import mount_volume, umount_volume


@runs_on([['distributed-replicated'], ['glusterfs']])
class TestMemLeakAfterMgmntEncrypEnabled(GlusterBaseClass):

    def setUp(self):
        """
        Setup and mount volume or raise ExecutionError
        """
        self.get_super_method(self, 'setUp')()
        self.test_id = self.id()
        # Setup Volume
        self.volume['dist_count'] = 2
        self.volume['replica_count'] = 3

        ret = self.setup_volume_and_mount_volume([self.mounts[0]])
        if not ret:
            raise ExecutionError("Failed to Setup and Mount Volume")

        # Disable I/O encryption
        self._disable_io_encryption()

    def tearDown(self):
        # Disable brick_mux
        if is_brick_mux_enabled(self.mnode):
            ret = disable_brick_mux(self.mnode)
            self.assertTrue(ret, "Failed to brick multiplex")
            g.log.info("Disable brick multiplex")

        # Unmount and cleanup original volume
        ret = self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]])
        if not ret:
            raise ExecutionError("Failed to umount the vol & cleanup Volume")
        g.log.info("Successful in umounting the volume and Cleanup")

        # Calling GlusterBaseClass tearDown
        self.get_super_method(self, 'tearDown')()

    def _run_io(self):
        """ Run IO and fill vol upto ~88%"""
        bricks = get_all_bricks(self.mnode, self.volname)
        usable_size = int(get_usable_size_per_disk(bricks[0]) * 0.88)

        self.procs = []
        counter = 1
        for _ in get_subvols(self.mnode, self.volname)['volume_subvols']:
            filename = "{}/test_file_{}".format(self.mounts[0].mountpoint,
                                                str(counter))
            proc = g.run_async(self.mounts[0].client_system,
                               "fallocate -l {}G {}".format(usable_size,
                                                            filename))
            self.procs.append(proc)
            counter += 1

    def _perform_gluster_v_heal_for_12_hrs(self):
        """ Run 'guster v heal info' for 12 hours"""
        # Perform gluster heal info for 12 hours
        end_time = datetime.now() + timedelta(hours=12)
        while True:
            curr_time = datetime.now()
            cmd = "gluster volume heal %s info" % self.volname
            ret, _, _ = g.run(self.mnode, cmd)
            self.assertEqual(ret, 0, "Failed to execute heal info cmd")
            if curr_time > end_time:
                g.log.info("Successfully ran for 12 hours. Checking for "
                           "memory leaks")
                break

    def _verify_memory_leak(self):
        """ Verify memory leak is found """

        ret = self.check_for_memory_leaks_and_oom_kills_on_servers(
            self.test_id)
        self.assertFalse(ret,
                         "Memory leak and OOM kills check failed on servers")

        ret = self.check_for_memory_leaks_and_oom_kills_on_clients(
            self.test_id)
        self.assertFalse(ret,
                         "Memory leak and OOM kills check failed on clients")

    def _disable_io_encryption(self):
        """ Disables IO encryption """
        # UnMount Volume
        g.log.info("Starting to Unmount Volume %s", self.volname)
        ret, _, _ = umount_volume(self.mounts[0].client_system,
                                  self.mounts[0].mountpoint,
                                  mtype=self.mount_type)
        self.assertEqual(ret, 0, "Failed to Unmount volume")

        # Stop Volume
        ret, _, _ = volume_stop(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to Stop volume")

        # Disable server and client SSL usage
        options = {"server.ssl": "off",
                   "client.ssl": "off"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, "Failed to set volume options")

        # Start Volume
        ret, _, _ = volume_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to Start volume")

        # Mount Volume
        ret, _, _ = mount_volume(self.volname, mtype=self.mount_type,
                                 mpoint=self.mounts[0].mountpoint,
                                 mserver=self.mnode,
                                 mclient=self.mounts[0].client_system)
        self.assertEqual(ret, 0, "Failed to mount the volume back")

    def test_mem_leak_on_gluster_procs_with_management_encrpytion(self):
        """
        Steps:
        1) Enable management encryption on the cluster.
        2) Create a 2X3 volume.
        3) Mount the volume using FUSE on a client node.
        4) Start doing IO on the mount (ran IO till the volume is ~88% full)
        5) Simultaneously start collecting the memory usage for
           'glusterfsd' process.
        6) Issue the command "# gluster v heal <volname> info" continuously
           in a loop.
        """
        # Run IO
        self._run_io()

        # Start monitoring resource usage on servers and clients
        # default interval = 60 sec, count = 780 (60 *12)  => for 12 hrs
        monitor_proc_dict = self.start_memory_and_cpu_usage_logging(
            self.test_id, count=780)
        self.assertIsNotNone(monitor_proc_dict,
                             "Failed to start monitoring on servers and "
                             "clients")

        ret = validate_io_procs(self.procs, self.mounts)
        self.assertTrue(ret, "IO Failed")

        self._perform_gluster_v_heal_for_12_hrs()

        # Wait for monitoring processes to complete
        ret = wait_for_logging_processes_to_stop(monitor_proc_dict,
                                                 cluster=True)
        self.assertTrue(ret, "ERROR: Failed to stop monitoring processes")

        # Check if there are any memory leaks and OOM killers
        self._verify_memory_leak()
        g.log.info("No memory leaks/OOM kills found on serves and clients")

    def test_mem_leak_on_gluster_procs_with_brick_multiplex(self):
        """
        Steps:
        1) Enable cluster.brick-multiplex
        2) Enable SSL on management layer
        3) Start creating volumes
        4) Mount a volume and starting I/O
        5) Monitor the memory consumption by glusterd process
        """

        # Enable cluster.brick-mulitplex
        ret = enable_brick_mux(self.mnode)
        self.assertTrue(ret, "Failed to enable brick-multiplex")

        # Verify the operation
        ret = is_brick_mux_enabled(self.mnode)
        self.assertTrue(ret, "Brick mux enble op not successful")

        # Create few volumes
        self.volume['replica_count'] = 3
        ret = bulk_volume_creation(self.mnode, 20, self.all_servers_info,
                                   self.volume, is_force=True)

        self.assertTrue(ret, "Failed to create bulk volume")

        # Run IO
        self._run_io()

        # Start memory usage logging
        monitor_proc_dict = self.start_memory_and_cpu_usage_logging(
            self.test_id, count=60)
        self.assertIsNotNone(monitor_proc_dict,
                             "Failed to start monitoring on servers and "
                             "clients")

        ret = validate_io_procs(self.procs, self.mounts)
        self.assertTrue(ret, "IO Failed")

        # Wait for monitoring processes to complete
        ret = wait_for_logging_processes_to_stop(monitor_proc_dict,
                                                 cluster=True)
        self.assertTrue(ret, "ERROR: Failed to stop monitoring processes")

        # Check if there are any memory leaks and OOM killers
        self._verify_memory_leak()
        g.log.info("No memory leaks/OOM kills found on serves and clients")

        # Disable Brick multiplex
        ret = disable_brick_mux(self.mnode)
        self.assertTrue(ret, "Failed to brick multiplex")