From 26b98e7239222704bb7438bcc47793fc6be60f2c Mon Sep 17 00:00:00 2001 From: Shubhendu Tripathi Date: Fri, 21 Mar 2014 13:53:46 +0530 Subject: nagios-server-addons: Test case for host event handler Added unit test cases for host event handler with minor code fixes Change-Id: Id9516303aaa1e4f14e781a06d4f73158bfcdebf4 Signed-off-by: Shubhendu Tripathi Reviewed-on: https://code.engineering.redhat.com/gerrit/21669 Reviewed-by: Darshan Narayana Murthy Reviewed-by: Sahina Bose --- plugins/Makefile.am | 3 +- plugins/check_remote_host.py | 119 ----------------------- plugins/check_remote_host.py.in | 108 +++++++++++++++++++++ plugins/gluster_host_service_handler.py | 145 ----------------------------- plugins/gluster_host_service_handler.py.in | 140 ++++++++++++++++++++++++++++ 5 files changed, 250 insertions(+), 265 deletions(-) delete mode 100755 plugins/check_remote_host.py create mode 100755 plugins/check_remote_host.py.in delete mode 100755 plugins/gluster_host_service_handler.py create mode 100755 plugins/gluster_host_service_handler.py.in (limited to 'plugins') diff --git a/plugins/Makefile.am b/plugins/Makefile.am index b7917ec..ef199f0 100644 --- a/plugins/Makefile.am +++ b/plugins/Makefile.am @@ -2,10 +2,11 @@ dist_glusternagiosplugins_PYTHON = \ constants.py \ check_cluster_vol_usage.py \ check_remote_host.py \ + check_vol_utilization_server.py \ gluster_host_service_handler.py \ livestatus.py \ notify_ovirt_engine_handler.py \ - check_vol_utilization_server.py \ + server_utils.py \ $(NULL) EXTRA_DIST = \ diff --git a/plugins/check_remote_host.py b/plugins/check_remote_host.py deleted file mode 100755 index 6f540df..0000000 --- a/plugins/check_remote_host.py +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/bin/python -# -# check_remote_host.py -- nagios plugin uses Mklivestatus to get the overall -# status -# of a host. The services considered by default for the status of the host -# are - -# 1. LV/Inode Service status -# 2. CPU Utilization -# 3. Memory Utilization -# 4. Network Utilization -# 5. Swap Utilization -# -# Copyright (C) 2014 Red Hat Inc -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA -# - -import os -import sys -import getopt -#import socket -import json - -import livestatus - -STATUS_OK = 0 -STATUS_WARNING = 1 -STATUS_CRITICAL = 2 -STATUS_UNKNOWN = 3 -_commandStatusStrs = {STATUS_OK: 'OK', STATUS_WARNING: 'WARNING', - STATUS_CRITICAL: 'CRITICAL', STATUS_UNKNOWN: 'UNKNOWN'} - - -# Load the host monitoring services list -def loadSrvcList(): - srvc_list = [] - with open("/etc/nagios/gluster/host-monitoring-services.in") as data_file: - srvc_list = json.load(data_file)['serviceList'] - return srvc_list - - -# Method to execute livestatus -def checkLiveStatus(hostAddr, srvc): - cmd = "GET services\nColumns: state\nFilter: " \ - "description = %s\n" \ - "Filter: host_address = %s" % (srvc, hostAddr) - - table = livestatus.readLiveStatus(cmd) - - if len(table) > 0 and len(table[0]) > 0: - return int(table[0][0]) - else: - return STATUS_UNKNOWN - - -# Method to show the usage -def showUsage(): - usage = "Usage: %s -H \n" % os.path.basename(sys.argv[0]) - sys.stderr.write(usage) - - -# Main method -if __name__ == "__main__": - try: - opts, args = getopt.getopt(sys.argv[1:], "hH:", ["help", "host="]) - except getopt.GetoptError as e: - print (str(e)) - showUsage() - sys.exit(STATUS_CRITICAL) - - hostAddr = '' - if len(opts) == 0: - showUsage() - sys.exit(STATUS_CRITICAL) - else: - for opt, arg in opts: - if opt in ("-h", "--help"): - showUsage() - sys.exit() - elif opt in ("-H", "--host"): - hostAddr = arg - else: - showUsage() - sys.exit(STATUS_CRITICAL) - - # Load the services list - srvc_list = loadSrvcList() - - # Calculate the consolidated status for the host based on above - # status of individual services - finalStatus = STATUS_OK - criticalSrvcs = [] - for srvc in srvc_list: - srvc_status = checkLiveStatus(hostAddr, srvc) - finalStatus = finalStatus | srvc_status - if srvc_status == STATUS_CRITICAL: - criticalSrvcs.append(str(srvc)) - - # Return the status - if finalStatus == STATUS_CRITICAL: - print "Host Status %s - Service(s) %s in CRITICAL state" % \ - (_commandStatusStrs[STATUS_WARNING], criticalSrvcs) - sys.exit(STATUS_WARNING) - - print "Host Status %s - Services in good health" % \ - _commandStatusStrs[STATUS_OK] - sys.exit(STATUS_OK) diff --git a/plugins/check_remote_host.py.in b/plugins/check_remote_host.py.in new file mode 100755 index 0000000..0ef101e --- /dev/null +++ b/plugins/check_remote_host.py.in @@ -0,0 +1,108 @@ +#!/usr/bin/python +# +# check_remote_host.py -- nagios plugin uses Mklivestatus to get the overall +# status +# of a host. The services considered by default for the status of the host +# are - +# 1. LV/Inode Service status +# 2. CPU Utilization +# 3. Memory Utilization +# 4. Network Utilization +# 5. Swap Utilization +# +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA +# + +import os +import sys +import getopt +import json + +import livestatus +from glusternagios import utils + + +# Method to execute livestatus +def checkLiveStatus(hostAddr, srvc): + cmd = "GET services\nColumns: state\nFilter: " \ + "description = %s\n" \ + "Filter: host_address = %s" % (srvc, hostAddr) + + table = livestatus.readLiveStatus(cmd) + + if len(table) > 0 and len(table[0]) > 0: + return int(table[0][0]) + else: + return utils.PluginStatusCode.UNKNOWN + + +def _getHostMonitoringSrvcList(): + srvc_list = [] + with open("@hostmonitoringserviceslist@") as data_file: + srvc_list = json.load(data_file)['serviceList'] + return srvc_list + + +# Method to show the usage +def showUsage(): + usage = "Usage: %s -H \n" % os.path.basename(sys.argv[0]) + sys.stderr.write(usage) + + +# Main method +if __name__ == "__main__": + try: + opts, args = getopt.getopt(sys.argv[1:], "hH:", ["help", "host="]) + except getopt.GetoptError as e: + print (str(e)) + showUsage() + sys.exit(utils.PluginStatusCode.CRITICAL) + + hostAddr = '' + if len(opts) == 0: + showUsage() + sys.exit(utils.PluginStatusCode.CRITICAL) + else: + for opt, arg in opts: + if opt in ("-h", "--help"): + showUsage() + sys.exit() + elif opt in ("-H", "--host"): + hostAddr = arg + else: + showUsage() + sys.exit(utils.PluginStatusCode.CRITICAL) + + # Calculate the consolidated status for the host based on above + # status of individual services + finalStatus = utils.PluginStatusCode.OK + criticalSrvcs = [] + for srvc in _getHostMonitoringSrvcList(): + srvc_status = checkLiveStatus(hostAddr, srvc) + finalStatus = finalStatus | srvc_status + if srvc_status == utils.PluginStatusCode.CRITICAL: + criticalSrvcs.append(str(srvc)) + + # Return the status + if finalStatus == utils.PluginStatusCode.CRITICAL: + print "Host Status %s - Service(s) %s in CRITICAL state" % \ + (utils.PluginStatus.WARNING, criticalSrvcs) + sys.exit(utils.PluginStatusCode.WARNING) + + print "Host Status %s - Services in good health" % \ + utils.PluginStatus.OK + sys.exit(utils.PluginStatusCode.OK) diff --git a/plugins/gluster_host_service_handler.py b/plugins/gluster_host_service_handler.py deleted file mode 100755 index 2a62108..0000000 --- a/plugins/gluster_host_service_handler.py +++ /dev/null @@ -1,145 +0,0 @@ -#!/usr/bin/python -# -# gluster_host_service_handler.py -- Event handler which checks the -# status of defined services and accordingly changes the host status -# -# Copyright (C) 2014 Red Hat Inc -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA -# - -import os -import sys -import datetime -import getopt - -import livestatus - -STATUS_OK = "OK" -STATUS_WARNING = "WARNING" -STATUS_CRITICAL = "CRITICAL" -STATUS_UNKNOWN = "UNKNOWN" -SRVC_STATE_TYPE_SOFT = "SOFT" -SRVC_STATE_TYPE_HARD = "HARD" -statusCodes = {STATUS_OK: 0, STATUS_WARNING: 1, STATUS_CRITICAL: 2, - STATUS_UNKNOWN: 3} -NAGIOS_COMMAND_FILE = "/var/spool/nagios/cmd/nagios.cmd" -SRVC_LIST = ['Disk Utilization', 'Cpu Utilization', 'Memory Utilization', - 'Swap Utilization', 'Network Utilization'] - - -# Shows the usage of the script -def showUsage(): - usage = "Usage: %s -s " \ - "-t " \ - " -a " \ - "-l " \ - " -n \n" % os.path.basename(sys.argv[0]) - sys.stderr.write(usage) - - -# Method to change the host status -def update_host_state(hostAddr, srvcName, statusCode): - now = datetime.datetime.now() - if statusCode == statusCodes[STATUS_WARNING]: - cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;" \ - "Host Status WARNING - " \ - "Service(s) ['%s'] in CRITICAL state\n" \ - % (now, hostAddr, statusCode, srvcName) - else: - cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;Host Status OK - " \ - "Services in good health\n" % (now, hostAddr, statusCode) - - f = open(NAGIOS_COMMAND_FILE, "w") - f.write(cmdStr) - f.close() - - -# Method to execute livestatus -def checkLiveStatus(hostAddr, srvc): - cmd = "GET services\nColumns: state\nFilter: " \ - "description = %s\nFilter: host_address = %s" % (srvc, hostAddr) - - table = livestatus.readLiveStatus(cmd) - - if len(table) > 0 and len(table[0]) > 0: - return int(table[0][0]) - else: - return statusCodes[STATUS_UNKNOWN] - - -# Method to change the host state to UP based on other service type status -def check_and_update_host_state_to_up(hostAddr, srvcName): - finalState = 0 - for item in SRVC_LIST: - if item != srvcName: - finalState = finalState | checkLiveStatus(hostAddr, item) - - if finalState == statusCodes[STATUS_OK]: - update_host_state(hostAddr, srvcName, statusCodes[STATUS_OK]) - - -# Main method -if __name__ == "__main__": - try: - opts, args = getopt.getopt(sys.argv[1:], "hs:t:a:l:n:", - ["help", "state=", "type=", - "attempts=", "location=", "name="]) - except getopt.GetoptError as e: - print (str(e)) - showUsage() - sys.exit(STATUS_CRITICAL) - - srvcState = '' - srvcStateType = '' - attempts = '' - hostAddr = '' - srvcName = '' - if len(opts) == 0: - showUsage() - else: - for opt, arg in opts: - if opt in ('-h', '--help'): - showUsage() - sys.exit() - elif opt in ('-s', '--state'): - srvcState = arg - elif opt in ('-t', '--type'): - srvcStateType = arg - elif opt in ('-a', '--attempts'): - attempts = arg - elif opt in ('-l', '--location'): - hostAddr = arg - elif opt in ('-n', '--name'): - srvcName = arg - else: - showUsage() - sys.exit() - - # Swicth over the service state values and do the needful - if srvcState == STATUS_CRITICAL: - if srvcStateType == SRVC_STATE_TYPE_SOFT: - if int(attempts) == 3: - print "Updating the host status to warning " \ - "(3rd SOFT critical state)..." - update_host_state(hostAddr, srvcName, - statusCodes[STATUS_WARNING]) - elif srvcStateType == SRVC_STATE_TYPE_HARD: - print "Updating the host status to warning..." - update_host_state(hostAddr, srvcName, statusCodes[STATUS_WARNING]) - elif srvcState == STATUS_OK: - check_and_update_host_state_to_up(hostAddr, srvcName) - - sys.exit(0) diff --git a/plugins/gluster_host_service_handler.py.in b/plugins/gluster_host_service_handler.py.in new file mode 100755 index 0000000..2d5bff0 --- /dev/null +++ b/plugins/gluster_host_service_handler.py.in @@ -0,0 +1,140 @@ +#!/usr/bin/python +# +# gluster_host_service_handler.py -- Event handler which checks the +# status of defined services and accordingly changes the host status +# +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA +# + +import os +import sys +import datetime +import argparse + +import livestatus +from glusternagios import utils + +SRVC_STATE_TYPE_SOFT = "SOFT" +SRVC_STATE_TYPE_HARD = "HARD" + + +def _writeNagiosCommand(cmdStr): + with open("@nagioscommandfilepath@", "w") as f: + f.write(cmdStr) + + +# Method to change the host status +def update_host_state(hostAddr, srvcName, statusCode): + now = datetime.datetime.now() + if statusCode == utils.PluginStatusCode.WARNING: + cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;" \ + "Host Status WARNING - " \ + "Service(s) ['%s'] in CRITICAL state\n" \ + % (now, hostAddr, statusCode, srvcName) + else: + cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;Host Status OK - " \ + "Services in good health\n" % (now, hostAddr, statusCode) + + _writeNagiosCommand(cmdStr) + + +# Method to execute livestatus +def checkLiveStatus(hostAddr, srvc): + cmd = "GET services\nColumns: state\nFilter: " \ + "description = %s\nFilter: host_address = %s" % (srvc, hostAddr) + + table = livestatus.readLiveStatus(cmd) + + if len(table) > 0 and len(table[0]) > 0: + return int(table[0][0]) + else: + return utils.PluginStatusCode.UNKNOWN + + +def _getHostMonitoringSrvcList(): + srvc_list = [] + with open("@hostmonitoringserviceslist@") as data_file: + srvc_list = json.load(data_file)['serviceList'] + return srvc_list + + +# Method to change the host state to UP based on other service type status +def check_and_update_host_state_to_up(hostAddr, srvcName): + finalState = utils.PluginStatusCode.OK + for item in _getHostMonitoringSrvcList(): + if item != srvcName: + finalState = finalState | checkLiveStatus(hostAddr, item) + + if finalState == utils.PluginStatusCode.OK: + update_host_state(hostAddr, srvcName, utils.PluginStatusCode.OK) + + +# Main method +if __name__ == "__main__": + parser = argparse.ArgumentParser( + usage='%(prog)s -s -t ' + ' -a -l ' + ' -n ') + parser.add_argument( + "-s", + "--state", + action="store", + required=True, + type=str, + help="Current State of the service (CRITICAL/WARNING/OK/UNKNOWN)") + parser.add_argument( + "-t" + "--statetype", + action="store", + required=True, + type=str, + help="State Type of the service (SOFT/HARD)") + parser.add_argument( + "-a", + "--attempts", + action="store", + required=True, + type=int, + help="No of attempts") + parser.add_argument( + "-l", + "--location", + action="store", + required=True, + type=str, + help="Address of the host") + parser.add_argument( + "-n", + "--name", + action="store", + required=True, + type=str, + help="Service Name") + + args = parser.parse_args() + + # Swicth over the service state values and update state + if args.state == utils.PluginStatus.CRITICAL \ + and args.t__statetype == SRVC_STATE_TYPE_HARD: + print "Updating the host status to warning..." + update_host_state(args.location, + args.name, + utils.PluginStatusCode.WARNING) + elif args.state == utils.PluginStatusCode.OK: + check_and_update_host_state_to_up(args.location, args.name) + + sys.exit(utils.PluginStatusCode.OK) -- cgit