diff options
-rw-r--r-- | Makefile.am | 2 | ||||
-rw-r--r-- | config/Makefile.am | 17 | ||||
-rw-r--r-- | config/gluster-commands.cfg | 35 | ||||
-rw-r--r-- | config/gluster-host-groups.cfg | 15 | ||||
-rw-r--r-- | config/gluster-host-services.cfg | 46 | ||||
-rw-r--r-- | config/gluster-templates.cfg | 32 | ||||
-rw-r--r-- | config/node1.cfg | 8 | ||||
-rw-r--r-- | configure.ac | 3 | ||||
-rw-r--r-- | nagios-server-addons.spec.in | 126 | ||||
-rw-r--r-- | plugins/Makefile.am | 6 | ||||
-rw-r--r-- | plugins/__init__.py | 0 | ||||
-rwxr-xr-x | plugins/check_remote_host.py | 199 | ||||
-rwxr-xr-x | plugins/gluster_host_service_handler.py | 154 | ||||
-rw-r--r-- | templates/Makefile.am | 12 | ||||
-rw-r--r-- | templates/check_cpu_multicore.php | 102 | ||||
-rw-r--r-- | templates/check_disk_and_inode.php | 69 | ||||
-rw-r--r-- | templates/check_interfaces.php | 53 | ||||
-rw-r--r-- | templates/check_memory.php | 59 | ||||
-rw-r--r-- | templates/check_swap_usage.php | 41 | ||||
-rw-r--r-- | tests/Makefile.am | 1 | ||||
-rw-r--r-- | tests/test_check_remote_host.py | 67 |
21 files changed, 1045 insertions, 2 deletions
diff --git a/Makefile.am b/Makefile.am index 8c4235f..cd3d363 100644 --- a/Makefile.am +++ b/Makefile.am @@ -20,7 +20,9 @@ # keep sorted SUBDIRS = \ + config \ plugins \ + templates \ $(NULL) # The tests should be always last as they need the rest of the source to be diff --git a/config/Makefile.am b/config/Makefile.am new file mode 100644 index 0000000..7c3c8be --- /dev/null +++ b/config/Makefile.am @@ -0,0 +1,17 @@ +glusternagiosconfdir = $(sysconfdir)/nagios/gluster +glusternagiosconf_DATA = \ + gluster-commands.cfg \ + gluster-host-groups.cfg \ + gluster-host-services.cfg \ + gluster-templates.cfg \ + $(NULL) + +glusternagiosdefaultconfdir = $(sysconfdir)/nagios/gluster/default +glusternagiosdefaultconf_DATA = \ + node1.cfg \ + $(NULL) + +EXTRA_DIST = \ + $(glusternagiosconf_DATA) \ + $(glusternagiosdefaultconf_DATA) \ + $(NULL) diff --git a/config/gluster-commands.cfg b/config/gluster-commands.cfg new file mode 100644 index 0000000..5c335c7 --- /dev/null +++ b/config/gluster-commands.cfg @@ -0,0 +1,35 @@ +### gluster nagios template ### +define command { + command_name check_disk_and_inode + command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c check_disk_and_inode +} + +define command { + command_name check_cpu_multicore + command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c check_cpu_multicore +} + +define command { + command_name check_memory + command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c check_memory +} + +define command { + command_name check_swap_usage + command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c check_swap_usage +} + +define command { + command_name check_interfaces + command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c check_interfaces +} + +define command { + command_name check_remote_host + command_line $USER1$/check_remote_host.py -H $HOSTADDRESS$ +} + +define command { + command_name host_service_handler + command_line $USER1$/gluster_host_service_handler.py -s $SERVICESTATE$ -t $SERVICESTATETYPE$ -a $SERVICEATTEMPT$ -l $HOSTADDRESS$ -n $SERVICEDESC$ +} diff --git a/config/gluster-host-groups.cfg b/config/gluster-host-groups.cfg new file mode 100644 index 0000000..c2f5ed2 --- /dev/null +++ b/config/gluster-host-groups.cfg @@ -0,0 +1,15 @@ +define hostgroup{ + hostgroup_name gluster_hosts + alias gluster_hosts +} + +define hostgroup{ + hostgroup_name gluster_clusters + alias gluster_clusters +} + +define hostgroup{ + hostgroup_name gluster_volumes + alias gluster_volumes +} + diff --git a/config/gluster-host-services.cfg b/config/gluster-host-services.cfg new file mode 100644 index 0000000..31f67b5 --- /dev/null +++ b/config/gluster-host-services.cfg @@ -0,0 +1,46 @@ +#gluster nagios template + +define service{ + use gluster-service-with-graph + hostgroup_name gluster_hosts + service_description Memory Utilization + normal_check_interval 1 + event_handler host_service_handler + check_command check_nrpe!check_memory +} + +define service{ + use gluster-service-with-graph + hostgroup_name gluster_hosts + service_description Swap Utilization + normal_check_interval 1 + event_handler host_service_handler + check_command check_nrpe!check_swap_usage +} + +define service{ + use gluster-service-with-graph + hostgroup_name gluster_hosts + normal_check_interval 1 + service_description Disk Utilization + event_handler host_service_handler + check_command check_nrpe!check_disk_and_inode +} + +define service{ + use gluster-service-with-graph + hostgroup_name gluster_hosts + service_description Cpu Utilization + normal_check_interval 1 + check_command check_nrpe!check_cpu_multicore +} + +define service{ + use gluster-service-with-graph + hostgroup_name gluster_hosts + service_description Network Utilization + normal_check_interval 1 + event_handler host_service_handler + check_command check_nrpe!check_interfaces +} + diff --git a/config/gluster-templates.cfg b/config/gluster-templates.cfg new file mode 100644 index 0000000..3ee66d4 --- /dev/null +++ b/config/gluster-templates.cfg @@ -0,0 +1,32 @@ +define host { + name gluster-host + use linux-server + check_command check_remote_host + register 0 +} + +define host { + name gluster-cluster + use linux-server + register 0 +} + +define host{ + name gluster-volume + use linux-server + register 0 +} + +define service { + name gluster-service-with-graph + use generic-service + action_url /pnp4nagios/index.php/graph?host=$HOSTNAME$&srv=$SERVICEDESC$' class='tips' rel='/pnp4nagios/index.php/popup?host=$HOSTNAME$&srv=$SERVICEDESC$ + register 0 +} + +define service { + name gluster-service-without-graph + use generic-service + register 0 +} + diff --git a/config/node1.cfg b/config/node1.cfg new file mode 100644 index 0000000..d9669cb --- /dev/null +++ b/config/node1.cfg @@ -0,0 +1,8 @@ +define host{ + use gluster-host + host_name node1 + alias web-node01 + address localhost + hostgroups gluster_hosts +} + diff --git a/configure.ac b/configure.ac index 5cb7d16..b400b9c 100644 --- a/configure.ac +++ b/configure.ac @@ -82,6 +82,7 @@ fi AX_PYTHON_MODULE([argparse], [fatal]) AX_PYTHON_MODULE([ethtool], [fatal]) AX_PYTHON_MODULE([glusternagios], [fatal]) +AX_PYTHON_MODULE([mock], [fatal]) AX_PYTHON_MODULE([netaddr], [fatal]) AX_PYTHON_MODULE([pthreading], [fatal]) AX_PYTHON_MODULE([pyinotify], [fatal]) @@ -91,7 +92,9 @@ AX_PYTHON_MODULE([selinux], [fatal]) AC_CONFIG_FILES([ Makefile nagios-server-addons.spec + config/Makefile plugins/Makefile + templates/Makefile tests/Makefile tests/run_tests_local.sh tests/run_tests.sh diff --git a/nagios-server-addons.spec.in b/nagios-server-addons.spec.in index 3450ba2..0266936 100644 --- a/nagios-server-addons.spec.in +++ b/nagios-server-addons.spec.in @@ -3,7 +3,11 @@ %global _for_fedora_koji_builds 0 %if ( 0%{?fedora} && 0%{?fedora} > 16 ) || ( 0%{?rhel} && 0%{?rhel} > 6 ) -%global _with_systemd true +%global _with_systemd true +%endif + +%if ( 0%{?fedora} && 0%{?fedora} > 17 ) || ( 0%{?rhel} && 0%{?rhel} > 6 ) +%global _with_firewalld true %endif # From https://fedoraproject.org/wiki/Packaging:Python#Macros @@ -12,6 +16,22 @@ %{!?python_sitearch: %global python_sitearch %(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib(1))")} %endif +%if ( 0%{?_with_systemd:1} ) +%define _init_enable() /bin/systemctl enable %1.service ; +%define _init_disable() /bin/systemctl disable %1.service ; +%define _init_restart() /bin/systemctl restart %1.service ; +%define _init_try_restart() /bin/systemctl try-restart %1.service ; +%define _init_stop() /bin/systemctl stop %1.service ; +%define _init_install() install -D -p -m 0644 %1 %{buildroot}%{_unitdir}/%2.service ; +%else +%define _init_enable() /sbin/chkconfig --add %1 ; +%define _init_disable() /sbin/chkconfig --del %1 ; +%define _init_restart() /sbin/service %1 restart &>/dev/null ; +%define _init_try_restart() /sbin/service %1 condrestart &>/dev/null ; +%define _init_stop() /sbin/service %1 stop &>/dev/null ; +%define _init_install() install -D -p -m 0755 %1 %{buildroot}%{_sysconfdir}/init.d/%2 ; +%endif + Summary: Gluster node management add-ons for Nagios Name: @PACKAGE_NAME@ Version: @PACKAGE_VERSION@ @@ -24,15 +44,35 @@ Source0: %{name}-%{version}.tar.gz BuildRoot: %{_tmppath}/%{name}-%{version}-root BuildRequires: pyflakes BuildRequires: python-pep8 +BuildRequires: python-mock BuildRequires: python-nose BuildRequires: python-devel +%if ( 0%{?_with_systemd:1} ) +BuildRequires: systemd-units +Requires(post): systemd-units +%else +Requires(post): /sbin/service +Requires(post): /sbin/chkconfig +%endif +%if ( 0%{?_with_firewalld:1} ) +Requires(post): firewalld +%else +Requires(post): iptables +%endif +Requires: check-mk-livestatus Requires: gluster-nagios-common +Requires: httpd +Requires: nagios +Requires: nagios-plugins-nrpe +Requires: php +Requires: pnp4nagios Requires: python-argparse Requires: python-ethtool Requires: python-netaddr Requires: python-pthreading Requires: python-pyinotify Requires: python-selinux +Requires: rrdtool-perl %description Nagios plugin, scripts, configuration files etc for gluster nodes. @@ -43,6 +83,7 @@ Group: Development/Tools Requires: %{name} = %{version}-%{release} Requires: pyflakes Requires: python-pep8 +Requires: python-mock Requires: python-nose Requires: python-devel @@ -66,9 +107,90 @@ make install DESTDIR=%{buildroot} %clean rm -rf %{buildroot} +%post +NagiosCFGFile="/etc/nagios/nagios.cfg" +sed -i '/etc\/nagios\/objects\/localhost.cfg/d' $NagiosCFGFile + +if grep -q "#process_performance_data=0" $NagiosCFGFile; then + sed -i -e 's/#process_performance_data=0/process_performance_data=1/g' $NagiosCFGFile +elif grep -q "process_performance_data=0" $NagiosCFGFile ; then + sed -i -e 's/process_performance_data=0/process_performance_data=1/g' $NagiosCFGFile +fi + +if grep -q "#enable_environment_macros=0" $NagiosCFGFile; then + sed -i -e 's/#enable_environment_macros=0/enable_environment_macros=1/g' $NagiosCFGFile +elif grep -q "process_performance_data=0" $NagiosCFGFile ; then + sed -i -e 's/process_performance_data=0/process_performance_data=1/g' $NagiosCFGFile +fi + +if ! grep -q "#rhs performance monitoring" $NagiosCFGFile; then +cat >> $NagiosCFGFile <<EOF +#rhs performance monitoring + +# Definitions specific to gluster +cfg_dir=/etc/nagios/gluster + +service_perfdata_command=process-service-perfdata +host_perfdata_command=process-host-perfdata +broker_module=/usr/lib64/check_mk/livestatus.o /var/spool/nagios/cmd/live +EOF +fi + +CommandFile="/etc/nagios/objects/commands.cfg" +if [ -f $CommandFile ]; then +sed -i -e "/# 'process-host-perfdata' command definition/,+5d" $CommandFile +sed -i -e "/# 'process-service-perfdata' command definition/,+5d" $CommandFile + +if ! grep -q "check_nrpe" $CommandFile; then +cat >> $CommandFile <<EOF +define command{ + command_name check_nrpe + command_line \$USER1\$/check_nrpe -H \$HOSTADDRESS\$ -c \$ARG1\$ +} +EOF +fi + +if ! grep -q "gluster nagios template" $CommandFile; then +cat >> $CommandFile <<EOF + +### gluster nagios template ### +define command { + command_name process-service-perfdata + command_line /usr/bin/perl /usr/libexec/pnp4nagios/process_perfdata.pl +} + +define command { + command_name process-host-perfdata + command_line /usr/bin/perl /usr/libexec/pnp4nagios/process_perfdata.pl -d HOSTPERFDATA +} + +EOF +fi +fi + +%if ( 0%{?_with_firewalld:1} ) +/bin/firewall-cmd --permanent --add-port=80/tcp +/bin/firewall-cmd --add-port=80/tcp +%else +/sbin/iptables -I INPUT 1 -p tcp --dport 80 -j ACCEPT +/sbin/service iptables save +%_init_restart iptables +%endif + +%_init_enable nagios +%_init_enable httpd +%_init_restart nagios +%_init_restart httpd + %files %defattr(-,root,root,-) -%{_libdir}/nagios/* +%{_datadir}/nagios/html/pnp4nagios/templates.dist/check_cpu_multicore.php +%{_datadir}/nagios/html/pnp4nagios/templates.dist/check_disk_and_inode.php +%{_datadir}/nagios/html/pnp4nagios/templates.dist/check_interfaces.php +%{_datadir}/nagios/html/pnp4nagios/templates.dist/check_memory.php +%{_datadir}/nagios/html/pnp4nagios/templates.dist/check_swap_usage.php +%{_libdir}/nagios/plugins/gluster/* +%{_sysconfdir}/nagios/gluster/* %files tests %defattr(-,root,root,-) diff --git a/plugins/Makefile.am b/plugins/Makefile.am index c12520c..329af89 100644 --- a/plugins/Makefile.am +++ b/plugins/Makefile.am @@ -1,2 +1,8 @@ dist_glusternagiosplugins_PYTHON = \ + check_remote_host.py \ + gluster_host_service_handler.py \ + $(NULL) + +EXTRA_DIST = \ + __init__.py \ $(NULL) diff --git a/plugins/__init__.py b/plugins/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/plugins/__init__.py diff --git a/plugins/check_remote_host.py b/plugins/check_remote_host.py new file mode 100755 index 0000000..7350e27 --- /dev/null +++ b/plugins/check_remote_host.py @@ -0,0 +1,199 @@ +#!/usr/bin/python +# +# check_remote_host.py -- nagios plugin uses Mklivestatus to get the overall +# status +# of a host. The entities considered for the status of the host are - +# 1. Host is reachable +# 2. LV/Inode Service status +# 3. CPU Utilization +# 4. Memory Utilization +# 5. Network Utilization +# 6. Swap Utilization +# +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA +# + +import os +import sys +import shlex +import subprocess +import socket +import getopt + +STATUS_OK = 0 +STATUS_WARNING = 1 +STATUS_CRITICAL = 2 +STATUS_UNKNOWN = 3 +_checkPingCommand = "/usr/lib64/nagios/plugins/check_ping" +_commandStatusStrs = {STATUS_OK: 'OK', STATUS_WARNING: 'WARNING', + STATUS_CRITICAL: 'CRITICAL', STATUS_UNKNOWN: 'UNKNOWN'} +_socketPath = '/var/spool/nagios/cmd/live' + + +# Class for exception definition +class checkPingCmdExecFailedException(Exception): + message = "check_ping command failed" + + def __init__(self, rc=0, out=(), err=()): + self.rc = rc + self.out = out + self.err = err + + def __str__(self): + o = '\n'.join(self.out) + e = '\n'.join(self.err) + if o and e: + m = o + '\n' + e + else: + m = o or e + + s = self.message + if m: + s += '\nerror: ' + m + if self.rc: + s += '\nreturn code: %s' % self.rc + return s + + +# Method to execute a command +def execCmd(command): + proc = subprocess.Popen(command, + close_fds=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + (out, err) = proc.communicate() + return (proc.returncode, out, err) + + +# Method to check the ing status of the host +def getPingStatus(hostAddr): + cmd = "%s -H %s" % (_checkPingCommand, hostAddr) + cmd += " -w 3000.0,80% -c 5000.0,100%" + + try: + (rc, out, err) = execCmd(shlex.split(cmd)) + except (OSError, ValueError) as e: + raise checkPingCmdExecFailedException(err=[str(e)]) + + if rc != 0: + raise checkPingCmdExecFailedException(rc, [out], [err]) + + return rc + + +# Method to execute livestatus +def checkLiveStatus(hostAddr, srvc): + s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + s.connect(_socketPath) + + # Write command to socket + cmd = "GET services\nColumns: state\nFilter: " + "description = %s\nFilter: host_address = %s\n" % (srvc, hostAddr) + s.send(cmd) + + # Close socket + s.shutdown(socket.SHUT_WR) + + # Read the answer + answer = s.recv(1000000) + + # Parse the answer into a table + table = [line.split(';') for line in answer.split('\n')[:-1]] + + if len(table) > 0 and len(table[0]) > 0: + return int(table[0][0]) + else: + return STATUS_UNKNOWN + + +# Method to show the usage +def showUsage(): + usage = "Usage: %s -H <Host Address>\n" % os.path.basename(sys.argv[0]) + sys.stderr.write(usage) + + +# Main method +if __name__ == "__main__": + try: + opts, args = getopt.getopt(sys.argv[1:], "hH:", ["help", "host="]) + except getopt.GetoptError as e: + print (str(e)) + showUsage() + sys.exit(STATUS_CRITICAL) + + hostAddr = '' + if len(opts) == 0: + showUsage() + sys.exit(STATUS_CRITICAL) + else: + for opt, arg in opts: + if opt in ("-h", "--help"): + showUsage() + sys.exit() + elif opt in ("-H", "--host"): + hostAddr = arg + else: + showUsage() + sys.exit(STATUS_CRITICAL) + + # Check ping status of the node, if its not reachable exit + try: + pingStatus = getPingStatus(hostAddr) + except (checkPingCmdExecFailedException) as e: + print "Host Status %s - Host not reachable" % \ + (_commandStatusStrs[STATUS_UNKNOWN]) + sys.exit(_commandStatusStrs[STATUS_UNKNOWN]) + + if pingStatus != STATUS_OK: + print "Host Status %s - Host not reachable" % \ + (_commandStatusStrs[STATUS_UNKNOWN]) + sys.exit(pingStatus) + + # Check the various performance statuses for the host + diskPerfStatus = checkLiveStatus(hostAddr, 'Disk Utilization') + cpuPerfStatus = checkLiveStatus(hostAddr, 'Cpu Utilization') + memPerfStatus = checkLiveStatus(hostAddr, 'Memory Utilization') + swapPerfStatus = checkLiveStatus(hostAddr, 'Swap Utilization') + nwPerfStatus = checkLiveStatus(hostAddr, 'Network Utilization') + + # Calculate the consolidated status for the host based on above status + # details + finalStatus = pingStatus | diskPerfStatus | cpuPerfStatus | \ + memPerfStatus | swapPerfStatus | nwPerfStatus + + # Get the list of ciritical services + criticalSrvcs = [] + if diskPerfStatus == STATUS_CRITICAL: + criticalSrvcs.append('Disk Utilization') + if cpuPerfStatus == STATUS_CRITICAL: + criticalSrvcs.append('Cpu Utilization') + if memPerfStatus == STATUS_CRITICAL: + criticalSrvcs.append('Memory Utilization') + if swapPerfStatus == STATUS_CRITICAL: + criticalSrvcs.append('Swap Utilization') + if nwPerfStatus == STATUS_CRITICAL: + criticalSrvcs.append('Network Utilization') + + # Return the status + if finalStatus == STATUS_CRITICAL: + print "Host Status %s - Service(s) %s in CRITICAL state" % \ + (_commandStatusStrs[STATUS_WARNING], criticalSrvcs) + sys.exit(STATUS_WARNING) + + print "Host Status %s - Services in good health" % \ + _commandStatusStrs[STATUS_OK] + sys.exit(STATUS_OK) diff --git a/plugins/gluster_host_service_handler.py b/plugins/gluster_host_service_handler.py new file mode 100755 index 0000000..283ac69 --- /dev/null +++ b/plugins/gluster_host_service_handler.py @@ -0,0 +1,154 @@ +#!/usr/bin/python +# +# gluster_host_service_handler.py -- Event handler which checks the +# status of defined services and accordingly changes the host status +# +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA +# + +import os +import sys +import datetime +import socket +import getopt + +STATUS_OK = "OK" +STATUS_WARNING = "WARNING" +STATUS_CRITICAL = "CRITICAL" +STATUS_UNKNOWN = "UNKNOWN" +SRVC_STATE_TYPE_SOFT = "SOFT" +SRVC_STATE_TYPE_HARD = "HARD" +statusCodes = {STATUS_OK: 0, STATUS_WARNING: 1, STATUS_CRITICAL: 2, + STATUS_UNKNOWN: 3} +NAGIOS_COMMAND_FILE = "/var/spool/nagios/cmd/nagios.cmd" +SRVC_LIST = ['Disk Utilization', 'Cpu Utilization', 'Memory Utilization', + 'Swap Utilization', 'Network Utilization'] +_socketPath = '/var/spool/nagios/cmd/live' + + +# Shows the usage of the script +def showUsage(): + usage = "Usage: %s -s <Service State (OK/WARNING/CRITICAL/UNKNOWN)> " + "-t <Service State Type (SOFT/HARD)> -a <No of Service attempts> " + "-l <Host Address> -n <Service Name>\n" % os.path.basename(sys.argv[0]) + sys.stderr.write(usage) + + +# Method to change the host status +def update_host_state(hostAddr, srvcName, statusCode): + now = datetime.datetime.now() + if statusCode == statusCodes[STATUS_WARNING]: + cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;Host Status WARNING - " + "Service(s) ['%s'] in CRITICAL state\n" % (now, hostAddr, statusCode, + srvcName) + else: + cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;Host Status OK - " + "Services in good health\n" % (now, hostAddr, statusCode) + + f = open(NAGIOS_COMMAND_FILE, "w") + f.write(cmdStr) + f.close() + + +# Method to execute livestatus +def checkLiveStatus(hostAddr, srvc): + s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + s.connect(_socketPath) + + # Write command to socket + cmd = "GET services\nColumns: state\nFilter: " + "description = %s\nFilter: host_address = %s\n" % (srvc, hostAddr) + s.send(cmd) + + # Close socket + s.shutdown(socket.SHUT_WR) + + # Read the answer + answer = s.recv(1000) + + # Parse the answer into a table + table = [line.split(';') for line in answer.split('\n')[:-1]] + + if len(table) > 0 and len(table[0]) > 0: + return int(table[0][0]) + else: + return statusCodes[STATUS_UNKNOWN] + + +# Method to change the host state to UP based on other service type status +def check_and_update_host_state_to_up(hostAddr, srvcName): + finalState = 0 + for item in SRVC_LIST: + if item != srvcName: + finalState = finalState | checkLiveStatus(hostAddr, item) + + if finalState == statusCodes[STATUS_OK]: + update_host_state(hostAddr, srvcName, statusCodes[STATUS_OK]) + + +# Main method +if __name__ == "__main__": + try: + opts, args = getopt.getopt(sys.argv[1:], "hs:t:a:l:n:", + ["help", "state=", "type=", + "attempts=", "location=", "name="]) + except getopt.GetoptError as e: + print (str(e)) + showUsage() + sys.exit(STATUS_CRITICAL) + + srvcState = '' + srvcStateType = '' + attempts = '' + hostAddr = '' + srvcName = '' + if len(opts) == 0: + showUsage() + else: + for opt, arg in opts: + if opt in ('-h', '--help'): + showUsage() + sys.exit() + elif opt in ('-s', '--state'): + srvcState = arg + elif opt in ('-t', '--type'): + srvcStateType = arg + elif opt in ('-a', '--attempts'): + attempts = arg + elif opt in ('-l', '--location'): + hostAddr = arg + elif opt in ('-n', '--name'): + srvcName = arg + else: + showUsage() + sys.exit() + + # Swicth over the service state values and do the needful + if srvcState == STATUS_CRITICAL: + if srvcStateType == SRVC_STATE_TYPE_SOFT: + if int(attempts) == 3: + print "Updating the host status to warning " + "(3rd SOFT critical state)..." + update_host_state(hostAddr, srvcName, + statusCodes[STATUS_WARNING]) + elif srvcStateType == SRVC_STATE_TYPE_HARD: + print "Updating the host status to warning..." + update_host_state(hostAddr, srvcName, statusCodes[STATUS_WARNING]) + elif srvcState == STATUS_OK: + check_and_update_host_state_to_up(hostAddr, srvcName) + + sys.exit(0) diff --git a/templates/Makefile.am b/templates/Makefile.am new file mode 100644 index 0000000..ccd3006 --- /dev/null +++ b/templates/Makefile.am @@ -0,0 +1,12 @@ +pnp4nagiostemplatedir = $(datarootdir)/nagios/html/pnp4nagios/templates.dist +pnp4nagiostemplate_DATA = \ + check_cpu_multicore.php \ + check_disk_and_inode.php \ + check_interfaces.php \ + check_memory.php \ + check_swap_usage.php \ + $(NULL) + +EXTRA_DIST = \ + $(pnp4nagiostemplate_DATA) \ + $(NULL) diff --git a/templates/check_cpu_multicore.php b/templates/check_cpu_multicore.php new file mode 100644 index 0000000..0cb3942 --- /dev/null +++ b/templates/check_cpu_multicore.php @@ -0,0 +1,102 @@ +<?php +############################################################################ +# Author: Nishanth Thomas e-mail: nthomas@redhat.com # +############################################################################ +# PNP4Nagios Template: check_cpu_multicore.php (this file) # +# For Nagios Plugin: check_cpu_multicore.py # +# # +# This will plot graphs for: # +# A graph for average CPU utilization across all cores # +# Separate graphs for CPU utilization for each cores # +# # +############################################################################ +# +# check_interfaces -- template to generate RRD graph +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# + +# Graph Total CPU usage (average across all cpu cores) +$def[1]=""; $opt[1]=""; $ds_name[1]=""; +$opt[1] = "--vertical-label \"% Usage\" -r --lower-limit 0 --upper-limit 100 --title \"CPU for $hostname / $servicedesc\" --slope-mode -u 100 -N"; +$ds_name[1] = "CPU Utilization - Average across all cores"; + +$def[1] = "DEF:total_cpu_in=$RRDFILE[1]:$DS[2]:AVERAGE " ; +$def[1] .= "DEF:system_cpu_in=$RRDFILE[1]:$DS[3]:AVERAGE " ; +$def[1] .= "DEF:user_cpu_in=$RRDFILE[1]:$DS[4]:AVERAGE " ; + +$def[1] .= "CDEF:user_cpu_out=user_cpu_in "; +$def[1] .= "LINE1:user_cpu_out#0000FF:\"User\t\t\" "; +$def[1] .= rrd::gprint("user_cpu_out", array("LAST", "AVERAGE", "MAX"), "%6.2lf%%"); + +$def[1] .= rrd::cdef("system_cpu_out", "system_cpu_in"); +$def[1] .= "LINE1:system_cpu_out#008000:\"System\t\" "; +$def[1] .= rrd::gprint("system_cpu_in", array("LAST", "AVERAGE", "MAX"), "%6.2lf%%"); + +$def[1] .= rrd::cdef("total_cpu_out", "total_cpu_in"); +$def[1] .= "LINE1:total_cpu_out#800080:\"Total\t\t\" "; +$def[1] .= rrd::gprint("total_cpu_in", array("LAST", "AVERAGE", "MAX"), "%6.2lf%%"); + +if ($WARN[2] != ""){ + $def[1] .= "LINE2:$WARN[2]#FFA500:\"Warning\\n\" "; +} +if ($CRIT[2] != ""){ + $def[1] .= "LINE2:$CRIT[2]#FF0000:\"Critical\\n\" "; +} + +# Graph Per-Core CPU usage +$def_n=2; +$index = 6; +$no_cpu=$ACT[1]; + +if($no_cpu>1) + +for( $cpu_n=0; $cpu_n<$no_cpu; $cpu_n++) { + $def[$def_n]=''; + $ds_name[$def_n] = "CPU Utlilization for core: $cpu_n"; + $opt[$def_n] = "--vertical-label \"% Usage\" --lower-limit 0 --upper-limit 100 --title \"CPU for $hostname / $servicedesc\" --slope-mode -N"; + + $index_of_threshold_val = $index; + $def[$def_n] = "DEF:total_cpu_in=$RRDFILE[$index]:$DS[$index]:AVERAGE " ; + $index += 1; + $def[$def_n] .= "DEF:system_cpu_in=$RRDFILE[$index]:$DS[$index]:AVERAGE " ; + $index += 1; + $def[$def_n] .= "DEF:user_cpu_in=$RRDFILE[$index]:$DS[$index]:AVERAGE " ; + $index += 1; + $def[$def_n] .= "DEF:idle_cpu_in=$RRDFILE[$index]:$DS[$index]:AVERAGE " ; + $index += 1; + + $def[$def_n] .= "CDEF:user_cpu_out=user_cpu_in "; + $def[$def_n] .= "LINE1:user_cpu_out#0000FF:\"User\t\t\" "; + $def[$def_n] .= rrd::gprint("user_cpu_out", array("LAST", "AVERAGE", "MAX"), "%6.2lf%%"); + + $def[$def_n] .= rrd::cdef("system_cpu_out", "system_cpu_in"); + $def[$def_n] .= "LINE1:system_cpu_out#008000:\"System\t\" "; + $def[$def_n] .= rrd::gprint("system_cpu_in", array("LAST", "AVERAGE", "MAX"), "%6.2lf%%"); + + $def[$def_n] .= rrd::cdef("total_cpu_out", "total_cpu_in"); + $def[$def_n] .= "LINE1:total_cpu_out#800079:\"Total\t\t\" "; + $def[$def_n] .= rrd::gprint("total_cpu_in", array("LAST", "AVERAGE", "MAX"), "%6.2lf%%"); + + if ($WARN[$index_of_threshold_val] != ""){ + $def[$def_n] .= "LINE2:$WARN[$index_of_threshold_val]#FFFF00:\"Warning\\n\" "; + } + if ($CRIT[$index_of_threshold_val] != ""){ + $def[$def_n] .= "LINE2:$CRIT[$index_of_threshold_val]#FF0000:\"Critical\\n\" "; + } + $def_n++; +} +?> diff --git a/templates/check_disk_and_inode.php b/templates/check_disk_and_inode.php new file mode 100644 index 0000000..3185544 --- /dev/null +++ b/templates/check_disk_and_inode.php @@ -0,0 +1,69 @@ +<?php +# +# check_interfaces -- template to generate RRD graph +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# + +# +# set graph labels +$i = 0; +$k = 0; +foreach ($this->DS as $KEY=>$VAL) { + if ($i == 0) { + $VAL['NAME'] = str_replace("_","/",$VAL['NAME']); + $ds_name[$KEY] = "Disk Utilization"; + $name[$KEY] = "Disk Utilization for mount: " . $VAL['NAME']; + + # set graph labels + $opt[$KEY] = "--vertical-label \"% Usage\" --lower-limit 0 --upper-limit 100 --title \"$name[$KEY]\" "; + # Graph Definitions + $def[$KEY] = rrd::def( "var1", $VAL['RRDFILE'], $VAL['DS'], "AVERAGE" ); + + # disk graph rendering + if ($VAL['ACT'] >= $VAL['CRIT']) { + $def[$KEY] .= rrd::line1( "var1", "#008000", "Disk Usage" ); + } elseif ($VAL['ACT'] >= $VAL['WARN']) { + $def[$KEY] .= rrd::line1( "var1", "#008000", "Disk Usage" ); + }else { + $def[$KEY] .= rrd::line1( "var1", "#008000", "Disk Usage" ); + } + $def[$KEY] .= rrd::gprint ("var1", array("LAST","MAX","AVERAGE"), "%3.4lf %S%%"); + $i = 1; + $k = $KEY; + } + else { + # inode graph rendering + $def[$k] .= rrd::def( "var2", $VAL['RRDFILE'], $VAL['DS'], "AVERAGE" ); + if ($VAL['ACT'] >= $VAL['CRIT']) { + $def[$k] .= rrd::line1( "var2", "#0000FF", "Inode Usage" ); + } elseif ($VAL['ACT'] >= $VAL['WARN']) { + $def[$k] .= rrd::line1( "var2", "#0000FF", "Inode Usage" ); + }else { + $def[$k] .= rrd::line1( "var2", "#0000FF", "Inode Usage" ); + } + $def[$k] .= rrd::gprint ("var2", array("LAST","MAX","AVERAGE"), "%3.4lf %S%%"); + $i = 0; + + # create warning line and legend + $def[$k] .= rrd::line2( $VAL['WARN'], "#FFA500", "Warning\\n"); + + # create critical line and legend + $def[$k] .= rrd::line2( $VAL['CRIT'], "#FF0000", "Critical\\n"); + + } +} +?> diff --git a/templates/check_interfaces.php b/templates/check_interfaces.php new file mode 100644 index 0000000..96226e9 --- /dev/null +++ b/templates/check_interfaces.php @@ -0,0 +1,53 @@ +<?php +# +# check_interfaces -- template to generate RRD graph +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# + +$VALUE_COUNT = 4; + +$name = $NAME; +asort($name); +$c = count($name); +if ($c % $VALUE_COUNT != 0) { + exit; +} + +$interface_count = $c / $VALUE_COUNT; + +for ($i = 0; $i < $interface_count; $i++) { + $index = ($i * $VALUE_COUNT) + 1; + + list ($interface, $data_type) = explode (".", $name[$index+2]); + $interface = str_replace(";","",$interface); + $opt[$index+1] = "--vertical-label \"Speed in Gbps\" -X 0 -l 0 -u 1 --title \"Network Interface Load for $hostname / $interface\" "; + + $ds_name[$index+1] = "$interface:: Receiving and "; + $def[$index+1] = rrd::def("value1", $RRDFILE[$index+2], $DS[$index+2], "AVERAGE"); + $def[$index+1] .= rrd::cdef ("value2","value1,125000,/"); + $def[$index+1] .= rrd::line1("value2", "#008000", $data_type); + $def[$index+1] .= rrd::gprint ("value2", array("LAST", "AVERAGE", "MAX"), "%10.4lf Gbps"); + + list ($interface, $data_type) = explode (".", $name[$index+3]); + $interface = str_replace(";","",$interface); + $ds_name[$index+1] .= "Transmission speed"; + $def[$index+1] .= rrd::def ("value3", $RRDFILE[$index+3], $DS[$index+3], "AVERAGE"); + $def[$index+1] .= rrd::cdef ("value4","value3,125000,/"); + $def[$index+1] .= rrd::line1 ("value4", "#0000ff", $data_type) ; + $def[$index+1] .= rrd::gprint ("value4", array("LAST", "AVERAGE", "MAX"), "%10.4lf Gbps"); + } +?> diff --git a/templates/check_memory.php b/templates/check_memory.php new file mode 100644 index 0000000..d620b66 --- /dev/null +++ b/templates/check_memory.php @@ -0,0 +1,59 @@ +<?php +# +# check_interfaces -- template to generate RRD graph +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# +$def[1]=""; $opt[1]=""; $ds_name[1]=""; +$upper_limit = round($MAX[1] / (1048576),2) +$opt[1] = "--vertical-label \"$UNIT[1](Total:$upper_limit GB)\" -l 0 -u $upper_limit -r --title \"Memory usage for $hostname / $servicedesc\" --slope-mode -N"; +$ds_name[1] = "Memory Usage"; + + + +$def[1] = "DEF:total_mem_in=$RRDFILE[1]:$DS[1]:AVERAGE " ; +$def[1] .= "DEF:used_mem_in=$RRDFILE[1]:$DS[2]:AVERAGE " ; +$def[1] .= "DEF:buffer_mem_in=$RRDFILE[1]:$DS[3]:AVERAGE " ; +$def[1] .= "DEF:cached_mem_in=$RRDFILE[1]:$DS[4]:AVERAGE " ; + +$def[1] .= rrd::cdef("used_mem_out","used_mem_in,1048576,/"); +$def[1] .= "LINE1:used_mem_out#0000ff:\"Used \" "; +$def[1] .= "GPRINT:used_mem_in:LAST:\"%8.2lf GB LAST \" "; +$def[1] .= "GPRINT:used_mem_in:MAX:\"%8.2lf GB MAX \" "; +$def[1] .= "GPRINT:used_mem_in" . ':AVERAGE:"%8.2lf GB AVERAGE \j" '; + +$def[1] .= rrd::cdef("buffer_mem_out","buffer_mem_in,1048576,/"); +$def[1] .= "LINE1:buffer_mem_out#800080:\"Buffer \" "; +$def[1] .= "GPRINT:buffer_mem_in:LAST:\"%8.2lf GB LAST \" "; +$def[1] .= "GPRINT:buffer_mem_in:MAX:\"%8.2lf GB MAX \" "; +$def[1] .= "GPRINT:buffer_mem_in" . ':AVERAGE:"%8.2lf GB AVERAGE \j" '; + +$def[1] .= rrd::cdef("cached_mem_out","cached_mem_in,1048576,/"); +$def[1] .= "LINE1:cached_mem_out#008000:\"Cached \" "; +$def[1] .= "GPRINT:cached_mem_in:LAST:\"%8.2lf GB LAST \" "; +$def[1] .= "GPRINT:cached_mem_in:MAX:\"%8.2lf GB MAX \" "; +$def[1] .= "GPRINT:cached_mem_in" . ':AVERAGE:"%8.2lf GB AVERAGE \j" '; + +if ($WARN[1] != ""){ + $WAR = $WARN[1] / 1048576 ; + $def[1] .= "LINE2:$WAR#FFA500:\"Warning \\n\" "; +} +if ($CRIT[1] != "") { + $CRT = $CRIT[1] / 1048576 ; + $def[1] .= "LINE2:$CRT#FF0000:\"Critical \\n\" "; + } + +?>
\ No newline at end of file diff --git a/templates/check_swap_usage.php b/templates/check_swap_usage.php new file mode 100644 index 0000000..4d1c0ff --- /dev/null +++ b/templates/check_swap_usage.php @@ -0,0 +1,41 @@ +1;3406;0c<?php +# +# check_interfaces -- template to generate RRD graph +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# +$upper_limit = round($MAX[1] / (1048576),2); +$opt[1] = "-X 0 --vertical-label \"$UNIT[1](Total:$upper_limit GB)\" -l 0 -r -u $upper_limit --title \"Swap usage $hostname / $servicedesc\" "; +$ds_name[1] = "Swap Usage"; +$def[1] = "DEF:used_swap_in=$RRDFILE[1]:$DS[1]:AVERAGE " ; + +$def[1] .= rrd::cdef("used_swap_out","used_swap_in,1048576,/"); +$def[1] .= "AREA:used_swap_out#3D1AA8:\"Used \" "; +$def[1] .= "GPRINT:used_swap_out:LAST:\"%3.2lf GB LAST \" "; +$def[1] .= "GPRINT:used_swap_out:MAX:\"%3.2lf GB MAX \" "; +$def[1] .= "GPRINT:used_swap_out" . ':AVERAGE:"%3.2lf GB AVERAGE \j" '; + +if ($WARN[1] != ""){ + $WAR = $WARN[1] / 1048576 ; + $def[1] .= "LINE2:$WAR#FFA500:\"Warning \\n\" "; +} +if ($CRIT[1] != "") { + $CRT = $CRIT[1] / 1048576 ; + $def[1] .= "LINE2:$CRT#FF0000:\"Critical \\n\" "; +} + + +?>
\ No newline at end of file diff --git a/tests/Makefile.am b/tests/Makefile.am index cc557cc..1a5887a 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -19,6 +19,7 @@ # test_modules = \ + test_check_remote_host.py \ $(NULL) dist_nagiosserveraddonstests_DATA = \ diff --git a/tests/test_check_remote_host.py b/tests/test_check_remote_host.py new file mode 100644 index 0000000..c5c602d --- /dev/null +++ b/tests/test_check_remote_host.py @@ -0,0 +1,67 @@ +# +# Copyright 2014 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +# Refer to the README and COPYING files for full details of the license +# + +import mock + +from testrunner import PluginsTestCase as TestCaseBase +from plugins.check_remote_host import * + + +class TestHello(TestCaseBase): + # Method to test the execCmd() method + @mock.patch('check_remote_host.subprocess.Popen') + def testExecCmd(self, mock_popen): + reference = subprocess.Popen('any command', close_fds=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + out = "sample output" + err = "" + reference.communicate.return_value = (out, err) + self.assertTrue(reference.communicate, "communicate called") + + # Method to test the getPingStatus() method + @mock.patch('check_remote_host.execCmd') + def testGetPingStatus(self, mock_execCmd): + rc = 0 + out = "sample output" + err = "" + mock_execCmd.return_value = (rc, out, err) + getPingStatus('dummy host') + mock_execCmd.assert_called_with([ + '/usr/lib64/nagios/plugins/check_ping', '-H', 'dummy', 'host', + '-w', '3000.0,80%', '-c', '5000.0,100%']) + self.assertRaises(OSError, execCmd, + ['/usr/lib64/nagios/plugins/check_ping', '-H', + 'dummy', 'host', '-w', '3000.0,80%', '-c', + '5000.0,100%']) + + # Method to test the checkLiveStatus() method + @mock.patch('check_remote_host.socket.socket') + def testCheckLiveStatus(self, mock_socket): + reference = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + self.assertTrue(mock_socket, "called") + reference.recv.return_value = "0\n" + checkLiveStatus("dummy host", "dummy srvc") + reference.connect.assert_called_with('/var/spool/nagios/cmd/live') + reference.send.assert_called_with("GET services\nColumns: state\n" + "Filter: description = dummy srvc\n" + "Filter: host_address = " + "dummy host\n") + self.assertEquals(0, checkLiveStatus("dummy host", "dummy srvc")) |