From 12171f154d5c6d2ee5fd23b7439770f0b51d39fb Mon Sep 17 00:00:00 2001 From: Sahina Bose Date: Tue, 27 May 2014 12:21:29 +0530 Subject: nagios-common: Enhanced geo rep status parsing Enhanced the gluster cli - geo rep status- parsing to consider other node states like stopped and to combine it with volume topology. Change-Id: I21876339f82a0137c41f7ec7ccc0672815590e88 Bug-Url: https://bugzilla.redhat.com/1101218 Signed-off-by: Sahina Bose Reviewed-on: http://review.gluster.org/7888 Reviewed-by: Ramesh N Reviewed-by: Aravinda VK --- glusternagios/glustercli.py | 102 +++++++++++++++++++++++++++++--------------- tests/test_glustercli.py | 98 +++++++++++++++++++++++++++++++++--------- 2 files changed, 145 insertions(+), 55 deletions(-) diff --git a/glusternagios/glustercli.py b/glusternagios/glustercli.py index 77e514e..4029905 100755 --- a/glusternagios/glustercli.py +++ b/glusternagios/glustercli.py @@ -100,6 +100,7 @@ class GeoRepStatus: NOT_STARTED = "NOT_STARTED" FAULTY = "FAULTY" PARTIAL_FAULTY = "PARTIAL_FAULTY" + STOPPED = "STOPPED" class TransportType: @@ -494,44 +495,70 @@ def _parseVolumeSelfHealSplitBrainInfo(out): def _parseVolumeGeoRepStatus(volumeName, out): - detail = "" - status = GeoRepStatus.OK # https://bugzilla.redhat.com/show_bug.cgi?id=1090910 - opened for xml # output. For now parsing below string output format # MASTER NODE MASTER VOL MASTER BRICK # SLAVE STATUS CHECKPOINT STATUS CRAWL STATUS slaves = {} - all_status = ['ACTIVE', 'PASSIVE', 'FAULTY', 'NOT STARTED', 'INITIALISING'] - for line in out: - if any(gstatus in line.upper() for gstatus in all_status): - nodeline = line.split() - slave = nodeline[3] - if slaves.get(slave) is None: - slaves[slave] = {'nodecount': 0, - 'faultcount': 0, - 'notstarted': 0} - slaves[slave]['nodecount'] += 1 - if GeoRepStatus.FAULTY in line.upper() \ - or "NOT STARTED" in line.upper(): - node = nodeline[0] - if GeoRepStatus.FAULTY == nodeline[4].upper(): - slaves[slave]['faultcount'] += 1 - tempstatus = GeoRepStatus.FAULTY - else: - slaves[slave]['notstarted'] += 1 - tempstatus = GeoRepStatus.NOT_STARTED - detail += ("%s - %s - %s;" % (slave, - node, - tempstatus)) + other_status = ['ACTIVE', 'INITIALIZING...'] + for line in out[3:]: + tempstatus = None + nodeline = line.split() + node = nodeline[0] + brick = nodeline[2] + slave = nodeline[3] + if slaves.get(slave) is None: + slaves[slave] = {'nodecount': 0, + 'faulty': 0, + 'notstarted': 0, + 'stopped': 0, + 'passive': 0, + 'detail': '', + 'status': GeoRepStatus.OK + } + slaves[slave]['nodecount'] += 1 + if GeoRepStatus.FAULTY in line.upper(): + slaves[slave]['faulty'] += 1 + tempstatus = GeoRepStatus.FAULTY + elif "NOT STARTED" in line.upper(): + slaves[slave]['notstarted'] += 1 + tempstatus = GeoRepStatus.NOT_STARTED + elif "PASSIVE" in line.upper(): + slaves[slave]['passive'] += 1 + tempstatus = "PASSIVE" + elif GeoRepStatus.STOPPED in line.upper(): + slaves[slave]['stopped'] += 1 + tempstatus = GeoRepStatus.STOPPED + elif not any(gstatus in line.upper() for gstatus in other_status): + tempstatus = nodeline[4] + + if tempstatus: + slaves[slave]['detail'] += ("%s:%s - %s;" % (node, + brick, + tempstatus)) + volumes = volumeInfo(volumeName) + replicaCount = volumes[volumeName]["brickCount"] + if "REPLICATE" in volumes[volumeName]["volumeType"]: + replicaCount = volumes[volumeName]["replicaCount"] + for slave, count_dict in slaves.iteritems(): - if count_dict['nodecount'] == count_dict['faultcount']: - status = GeoRepStatus.FAULTY - break - elif count_dict['faultcount'] > 0: - status = GeoRepStatus.PARTIAL_FAULTY - elif count_dict['notstarted'] > 0 and status == GeoRepStatus.OK: - status = GeoRepStatus.NOT_STARTED - return {volumeName: {'status': status, 'detail': detail}} + if count_dict['faulty'] > 0: + # georep cli status does not give the node name in the same way as + # gluster volume info - there's no way to compare and get the + # subvolume. So if fault+passive > than num of primary bricks, + # moving to faulty + if (count_dict['faulty'] + count_dict['passive'] + > count_dict['nodecount']/replicaCount): + slaves[slave]['status'] = GeoRepStatus.FAULTY + else: + slaves[slave]['status'] = GeoRepStatus.PARTIAL_FAULTY + elif (count_dict['notstarted'] > 0 and + slaves[slave]['status'] == GeoRepStatus.OK): + slaves[slave]['status'] = GeoRepStatus.NOT_STARTED + elif (count_dict['stopped'] > 0 and + slaves[slave]['status'] == GeoRepStatus.OK): + slaves[slave]['status'] = GeoRepStatus.STOPPED + return {volumeName: {'slaves': slaves}} def volumeGeoRepStatus(volumeName, remoteServer=None): @@ -539,8 +566,15 @@ def volumeGeoRepStatus(volumeName, remoteServer=None): Arguments: * VolumeName Returns: - {VOLUMENAME: {'status': GEOREPSTATUS, - 'detail': detailed message}} + {VOLUMENAME: {'slaves': [{SLAVENAME:{ + 'nodecount': COUNT, + 'faulty': COUNT, + 'notstarted': 0, + 'stopped': 0, + 'passive':0, + 'detail': detailed message, + 'status': GEOREPSTATUS}} + ]} """ command = _getGlusterVolCmd() + ["geo-replication", volumeName, "status"] if remoteServer: diff --git a/tests/test_glustercli.py b/tests/test_glustercli.py index ebaeac4..e10a11f 100644 --- a/tests/test_glustercli.py +++ b/tests/test_glustercli.py @@ -1135,38 +1135,80 @@ class GlusterCliTests(TestCaseBase): @mock.patch('glusternagios.utils.execCmd') @mock.patch('glusternagios.glustercli._getGlusterVolCmd') - def test_getVolumeGeoRepStatus(self, mock_glusterVolCmd, + @mock.patch('glusternagios.glustercli.volumeInfo') + def test_getVolumeGeoRepStatus(self, + mock_volumeInfo, + mock_glusterVolCmd, mock_execCmd,): mock_glusterVolCmd.return_value = ["gluster", "volume"] mock_execCmd.return_value = (0, self.__getGlusterGeoRepStatusResult(), None) + mock_volumeInfo.return_value = {'test-vol': + {'volumeType': 'REPLICATE', + 'replicaCount': 2, + 'brickCount': 2 + } + } expectedOut = {'test-vol': - {'status': gcli.GeoRepStatus.FAULTY, - 'detail': "10.70.43.68::slave-vol - " - "rhs3.novalocal - FAULTY;" - "10.70.43.68::slave-vol - " - "rhs3-2.novalocal - FAULTY;"}} + {'slaves': {'10.70.43.68::slave-vol': + {'faulty': 2, + 'nodecount': 2, + 'notstarted': 0, + 'stopped': 0, + 'passive': 0, + 'detail': 'rhs3.novalocal:' + '/bricks/b3 - FAULTY;' + 'rhs3-2.novalocal:' + '/bricks/b3 - FAULTY;', + 'status': gcli.GeoRepStatus.FAULTY} + }}} status = gcli.volumeGeoRepStatus("test-vol") print(status) self.assertEquals(status, expectedOut) @mock.patch('glusternagios.utils.execCmd') @mock.patch('glusternagios.glustercli._getGlusterVolCmd') - def test_getVolumeGeoRepStatusMuliSlave(self, mock_glusterVolCmd, - mock_execCmd,): + @mock.patch('glusternagios.glustercli.volumeInfo') + def test_getVolumeGeoRepStatusMultiSlave(self, + mock_volumeInfo, + mock_glusterVolCmd, + mock_execCmd,): mock_glusterVolCmd.return_value = ["gluster", "volume"] mock_execCmd.return_value = (0, self.__getGlusterGeoRepStatusResult2(), None) + mock_volumeInfo.return_value = {'test-vol': + {'volumeType': 'REPLICATE', + 'replicaCount': 2, + 'brickCount': 2 + } + } expectedOut = {'test-vol': - {'status': gcli.GeoRepStatus.PARTIAL_FAULTY, - 'detail': "10.70.43.68::slave-vol - " - "rhs3-2.novalocal - FAULTY;" - "10.70.43.68::slave-vol2 - " - "rhs3.novalocal - NOT_STARTED;" - "10.70.43.68::slave-vol2 - " - "rhs3-2.novalocal - NOT_STARTED;"}} + {'slaves': + {'10.70.43.68::slave-vol': + {'faulty': 1, + 'nodecount': 2, + 'notstarted': 0, + 'stopped': 0, + 'passive': 1, + 'detail': 'rhs3.novalocal:/bricks/b3 ' + '- PASSIVE;' + 'rhs3-2.novalocal:/bricks/b3 ' + '- FAULTY;', + 'status': gcli.GeoRepStatus.FAULTY}, + '10.70.43.68::slave-vol2': + {'faulty': 0, + 'nodecount': 2, + 'notstarted': 2, + 'stopped': 0, + 'passive': 0, + 'detail': 'rhs3.novalocal:/bricks/b3 ' + '- NOT_STARTED;' + 'rhs3-2.novalocal:/bricks/b3 ' + '- NOT_STARTED;', + 'status': gcli.GeoRepStatus.NOT_STARTED} + }}} status = gcli.volumeGeoRepStatus("test-vol") print(status) self.assertEquals(status, expectedOut) @@ -1174,9 +1216,20 @@ class GlusterCliTests(TestCaseBase): self.__getGlusterGeoRepStatusResult3(), None) expectedOut = {'test-vol': - {'status': gcli.GeoRepStatus.NOT_STARTED, - 'detail': "10.70.43.68::slave-vol - " - "rhs3-2.novalocal - NOT_STARTED;"}} + {'slaves': + {'10.70.43.68::slave-vol': + {'faulty': 0, + 'nodecount': 2, + 'notstarted': 1, + 'stopped': 0, + 'passive': 1, + 'detail': 'rhs3.novalocal:/bricks/b3 ' + '- PASSIVE;' + 'rhs3-2.novalocal:' + '/bricks/b3 ' + '- NOT_STARTED;', + 'status': gcli.GeoRepStatus.NOT_STARTED + }}}} status = gcli.volumeGeoRepStatus("test-vol") print(status) self.assertEquals(status, expectedOut) @@ -1214,7 +1267,8 @@ class GlusterCliTests(TestCaseBase): "/dir.7/file.4"] def __getGlusterGeoRepStatusResult(self): - return ["MASTER NODE MASTER VOL MASTER BRICK " + return [" ", + "MASTER NODE MASTER VOL MASTER BRICK " "SLAVE STATUS CHECKPOINT STATUS " "CRAWL STATUS", "--------------------------------------------------------" @@ -1228,7 +1282,8 @@ class GlusterCliTests(TestCaseBase): "N/A N/A "] def __getGlusterGeoRepStatusResult2(self): - return ["MASTER NODE MASTER VOL MASTER BRICK " + return [" ", + "MASTER NODE MASTER VOL MASTER BRICK " "SLAVE STATUS CHECKPOINT STATUS " "CRAWL STATUS", "--------------------------------------------------------" @@ -1248,7 +1303,8 @@ class GlusterCliTests(TestCaseBase): "N/A N/A "] def __getGlusterGeoRepStatusResult3(self): - return ["MASTER NODE MASTER VOL MASTER BRICK " + return [" ", + "MASTER NODE MASTER VOL MASTER BRICK " "SLAVE STATUS CHECKPOINT STATUS " "CRAWL STATUS", "--------------------------------------------------------" -- cgit