From eb3ce65fef8751d38f785ae6f69f4dbb31f8e0b4 Mon Sep 17 00:00:00 2001 From: Sahina Bose Date: Wed, 30 Sep 2015 18:19:28 +0530 Subject: nagios-plugins: Added heal info monitoring Added a plugin to monitor heal info for a volume Corrected pep8 errors Changed default monitoring interval to 1 min for heal info monitoring Bug-Url: https://bugzilla.redhat.com/1267586 BUG: 1267586 Change-Id: I1c51d4d0f2129a7bb19f44b63382fcfc7384dc40 Signed-off-by: Sahina Bose Reviewed-on: http://review.gluster.org/12262 Reviewed-by: Ramesh N --- config/gluster-commands.cfg | 5 +++++ config/gluster-templates.cfg | 6 ++++++ plugins/check_vol_server.py | 15 +++++++++++---- plugins/config_generator.py | 15 +++++++++++++++ tests/test_config_generator.py | 6 +++++- 5 files changed, 42 insertions(+), 5 deletions(-) diff --git a/config/gluster-commands.cfg b/config/gluster-commands.cfg index b4df742..44a3cbf 100644 --- a/config/gluster-commands.cfg +++ b/config/gluster-commands.cfg @@ -96,6 +96,11 @@ define command { command_line $USER1$/gluster/check_vol_server.py $ARG1$ $ARG2$ -o self-heal } +define command { + command_name check_vol_heal_info + command_line $USER1$/gluster/check_vol_server.py $ARG1$ $ARG2$ -o heal-info +} + define command { command_name check_vol_georep_status command_line $USER1$/gluster/check_vol_server.py $ARG1$ $ARG2$ -o geo-rep diff --git a/config/gluster-templates.cfg b/config/gluster-templates.cfg index b9e38da..bcfa003 100644 --- a/config/gluster-templates.cfg +++ b/config/gluster-templates.cfg @@ -77,6 +77,12 @@ define service { register 0 } +define service { + name gluster-heal-service-with-graph + use gluster-service-with-graph + check_interval 1 +} + define service { name gluster-service-without-graph use gluster-service diff --git a/plugins/check_vol_server.py b/plugins/check_vol_server.py index 0e108d0..2301e61 100755 --- a/plugins/check_vol_server.py +++ b/plugins/check_vol_server.py @@ -58,6 +58,10 @@ def _getVolSelfHealStatusNRPECommand(volume): return ("check_vol_status -a %s %s" % (volume, 'self-heal')) +def _getVolSelfHealInfoNRPECommand(volume): + return ("check_vol_status -a %s %s" % (volume, 'heal-info')) + + def _getVolGeoRepStatusNRPECommand(volume): return ("check_vol_status -a %s %s" % (volume, 'geo-rep')) @@ -223,10 +227,10 @@ def _getVolumeQuotaStatusOutput(hostgroup, volume): def _getQuorumStatusOutput(hostgroup): # get current volume quorum status table = json.loads(livestatus.readLiveStatusAsJSON("GET services\n" - "Columns: state plugin_output\n" - "Filter: description = " - "Cluster - Quorum Status\n" - "Filter: host_name = %s\n" % hostgroup)) + "Columns: state plugin_output\n" + "Filter: description = " + "Cluster - Quorum Status\n" + "Filter: host_name = %s\n" % hostgroup)) servicestatus = utils.PluginStatusCode.UNKNOWN pluginoutput = '' for row in table: @@ -294,6 +298,8 @@ def showVolumeOutput(args): command = _getVolSelfHealStatusNRPECommand(args.volume) elif args.option == 'geo-rep': command = _getVolGeoRepStatusNRPECommand(args.volume) + elif args.option == 'heal-info': + command = _getVolSelfHealInfoNRPECommand(args.volume) elif args.option == 'quorum': return _getQuorumStatusOutput(args.hostgroup) @@ -331,6 +337,7 @@ def parse_input(): 'status', 'quota', 'self-heal', + 'heal-info', 'geo-rep', 'quorum']) parser.add_argument('-t', '--timeout', diff --git a/plugins/config_generator.py b/plugins/config_generator.py index 64b093a..746cefa 100644 --- a/plugins/config_generator.py +++ b/plugins/config_generator.py @@ -121,6 +121,18 @@ class GlusterNagiosConfManager: volumeService['check_command'] = checkCommand return volumeService + def __createVolumeHealInfoService(self, volume, clusterName): + volumeService = {} + volumeService['host_name'] = clusterName + volumeService['use'] = 'gluster-heal-service-with-graph' + serviceDesc = 'Volume Heal info - %s' % (volume['name']) + volumeService['service_description'] = serviceDesc + volumeService[VOL_NAME] = volume['name'] + checkCommand = 'check_vol_heal_info!%s!%s' % \ + (clusterName, volume['name']) + volumeService['check_command'] = checkCommand + return volumeService + def __createVolumeGeoRepStatusService(self, volume, clusterName): volumeService = {} volumeService['host_name'] = clusterName @@ -174,6 +186,9 @@ class GlusterNagiosConfManager: volumeService = self.__createVolumeHealStatusService( volume, clusterName) volumeServices.append(volumeService) + volumeService = self.__createVolumeHealInfoService( + volume, clusterName) + volumeServices.append(volumeService) if volume.get('geo-rep') == "on": volumeService = self.__createVolumeGeoRepStatusService( volume, clusterName) diff --git a/tests/test_config_generator.py b/tests/test_config_generator.py index 91fce1e..c4a54c6 100644 --- a/tests/test_config_generator.py +++ b/tests/test_config_generator.py @@ -106,7 +106,11 @@ class TestGlusterNagiosConfManager(TestCaseBase): service = self._findServiceInList(serviceList, serviceDesc) self.assertNotEqual(service, None, "Volume Split-brain service is not created") - serviceCount += 1 + serviceDesc = 'Volume Heal info - %s' % (volume['name']) + service = self._findServiceInList(serviceList, serviceDesc) + self.assertNotEqual(service, None, + "Volume Heal info service is not created") + serviceCount += 2 return serviceCount def _findServiceInList(self, serviceList, serviceDescription): -- cgit