diff options
-rw-r--r-- | Makefile.am | 1 | ||||
-rw-r--r-- | config/Makefile.am | 14 | ||||
-rw-r--r-- | config/glusternagios.conf.in | 17 | ||||
-rw-r--r-- | config/nagios_server.conf | 13 | ||||
-rw-r--r-- | configure.ac | 4 | ||||
-rw-r--r-- | gluster-nagios-addons.spec.in | 3 | ||||
-rw-r--r-- | plugins/Makefile.am | 2 | ||||
-rwxr-xr-x | plugins/check_gluster_syslog.py | 117 | ||||
-rwxr-xr-x | plugins/check_vol_status.py | 44 | ||||
-rw-r--r-- | plugins/nscautils.py.in | 53 | ||||
-rw-r--r-- | tests/Makefile.am | 1 | ||||
-rw-r--r-- | tests/test_check_gluster_syslog.py | 46 |
12 files changed, 280 insertions, 35 deletions
diff --git a/Makefile.am b/Makefile.am index 5ffc3af..fd78c86 100644 --- a/Makefile.am +++ b/Makefile.am @@ -20,6 +20,7 @@ # keep sorted SUBDIRS = \ + config \ plugins \ $(NULL) diff --git a/config/Makefile.am b/config/Makefile.am new file mode 100644 index 0000000..428ac43 --- /dev/null +++ b/config/Makefile.am @@ -0,0 +1,14 @@ +rsyslogconfdir = $(sysconfdir)/rsyslog.d +rsyslogconf_DATA = \ + glusternagios.conf \ + $(NULL) + +nagiosconfdir = $(sysconfdir)/nagios +nagiosconf_DATA = \ + nagios_server.conf \ + $(NULL) + +EXTRA_DIST = \ + $(nagiosconf_DATA) \ + $(rsyslogconf_DATA) \ + $(NULL) diff --git a/config/glusternagios.conf.in b/config/glusternagios.conf.in new file mode 100644 index 0000000..004b375 --- /dev/null +++ b/config/glusternagios.conf.in @@ -0,0 +1,17 @@ +##### glusternagios.conf ##### + +$ModLoad omprog +$actionomprogbinary @glusternagiospluginsdir@/check_gluster_syslog.py + +# +## Pass logs to omprog if app-name is 'gluster' +# +$template GLFS_NAG_Template,"%msgid%/%syslogfacility-text:::uppercase%/%syslogseverity-text:::uppercase%/%app-name:::uppercase% [%TIMESTAMP:::date-rfc3339%] %msg:::sp-if-no-1st-sp%%msg:::drop-last-lf%\n" + +if $app-name contains 'glusterfsd' then :omprog:;GLFS_NAG_Template + + +# +## discard logs where app-name is 'gluster' as we processed already +# +#if $app-name contains 'gluster' then ~ diff --git a/config/nagios_server.conf b/config/nagios_server.conf new file mode 100644 index 0000000..56c8995 --- /dev/null +++ b/config/nagios_server.conf @@ -0,0 +1,13 @@ +# NAGIOS SERVER +# The nagios server IP address or FQDN to which the NSCA command +# needs to be sent +[NAGIOS-SERVER] +nagios_server= + + +# CLUSTER NAME +# The host name of the logical cluster configured in Nagios under which +# the gluster volume services reside +[NAGIOS-DEFINTIONS] +cluster_name= + diff --git a/configure.ac b/configure.ac index db4147e..dffb3ea 100644 --- a/configure.ac +++ b/configure.ac @@ -53,6 +53,7 @@ AC_SUBST([nagiospluginsdir], ['${libdir}/nagios/plugins']) AC_SUBST([glusternagiospluginsdir], ['${nagiospluginsdir}/gluster']) AC_SUBST([glusternagioscommonpylibdir], ['${pyexecdir}/glusternagios']) AC_SUBST([glusternagiosaddonstestsdir], ['${datarootdir}/${PACKAGE_NAME}/tests']) +AC_SUBST([nagiosconf], ['/etc/nagios']) # Checking for pyflakes AC_PATH_PROG([PYFLAKES], [pyflakes]) @@ -94,7 +95,10 @@ AX_PYTHON_MODULE([selinux], [fatal]) AC_CONFIG_FILES([ Makefile gluster-nagios-addons.spec + config/Makefile + config/glusternagios.conf plugins/Makefile + plugins/nscautils.py plugins/volcap/Makefile tests/Makefile tests/run_tests_local.sh diff --git a/gluster-nagios-addons.spec.in b/gluster-nagios-addons.spec.in index 390d6e5..969bded 100644 --- a/gluster-nagios-addons.spec.in +++ b/gluster-nagios-addons.spec.in @@ -139,6 +139,7 @@ command[discoverlogicalcomponents]=/usr/lib64/nagios/plugins/gluster/discoverlog EOF %_init_enable nrpe %_init_restart crond +%_init_restart rsyslog %preun @@ -155,6 +156,8 @@ sed -i '/check_vol_quota_status/d' %{_sysconfdir}/nagios/nrpe.cfg %defattr(-,root,root,-) %attr(0755, -, -) %{_libdir}/nagios/plugins/gluster/* %{_sysconfdir}/cron.d/gluster-sysstat.crontab +%{_sysconfdir}/rsyslog.d/glusternagios.conf +%{_sysconfdir}/nagios/nagios_server.conf %files tests %defattr(-,root,root,-) diff --git a/plugins/Makefile.am b/plugins/Makefile.am index c74cc3e..9bba2d4 100644 --- a/plugins/Makefile.am +++ b/plugins/Makefile.am @@ -9,6 +9,7 @@ crond_DATA = \ dist_glusternagiosplugins_PYTHON = \ check_disk_and_inode.py \ + check_gluster_syslog.py \ check_vol_utilization.py \ check_vol_status.py \ check_volume_status.py \ @@ -19,6 +20,7 @@ dist_glusternagiosplugins_PYTHON = \ __init__.py \ memory.py \ network.py \ + nscautils.py \ sadf.py \ swap.py \ $(NULL) diff --git a/plugins/check_gluster_syslog.py b/plugins/check_gluster_syslog.py new file mode 100755 index 0000000..a52667c --- /dev/null +++ b/plugins/check_gluster_syslog.py @@ -0,0 +1,117 @@ +#! /usr/bin/python +# check_gluster_syslog.py +# Script to act on syslog messages related to gluster +# and send output to Nagios via nsca +# +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +# +import re +import sys +import select + +import nscautils +from glusternagios import utils + +# skeleton config parameters +__pollPeriod = 0.75 # the number of seconds between polling for new messages +__maxAtOnce = 1024 # max no of messages that are processed within one batch + + +def findVolName(pattern): + # pattern is of the form <graphid>-<volume name>-<translator name> + return pattern[pattern.find('-') + 1:pattern.rfind('-')] + + +def getStatusCode(alertlevel): + if alertlevel == 'ALERT': + return utils.PluginStatusCode.CRITICAL + else: + return utils.PluginStatusCode.WARNING + + +def processQuotaMsg(msg, alertlevel): + quotapat = re.compile(r'\b\d*-[a-zA-Z0-9_-]*-quota\b') + matches = quotapat.search(msg) + if matches: + volname = findVolName(matches.group()) + # Now get the actual msg + alertMsg = "QUOTA: " + msg[msg.rfind(matches.group()) + + len(matches.group()) + 1:] + serviceName = nscautils.vol_service_name(volname, "Quota") + nscautils.send_to_nsca(nscautils.getNagiosClusterName(), + serviceName, + getStatusCode(alertlevel), + alertMsg) + + +def processMsg(msg): + 'Check if msg is indeed from gluster app' + custom_logvars = msg[:msg.find(' ')] + level = custom_logvars.split('/')[2] + appname = custom_logvars.split('/')[3] + if appname != 'GLUSTERFSD': + return + # For gluster messages, need to check the source of message + logsource = msg[msg.rfind('['):msg.rfind(']')] + if logsource.find('quota') > -1: + processQuotaMsg(msg, level) + + +def onReceive(msgs): + """This is the entry point where actual work needs to be done. It receives + a list with all messages pulled from rsyslog. The list is of variable + length, but contains all messages that are currently available. It is + suggested NOT to use any further buffering, as we do not know when the + next message will arrive. It may be in a nanosecond from now, but it + may also be in three hours... + """ + for msg in msgs: + processMsg(msg) + + +""" +------------------------------------------------------- +This is plumbing that DOES NOT need to be CHANGED +------------------------------------------------------- +Implementor's note: Python seems to very agressively +buffer stdouot. The end result was that rsyslog does not +receive the script's messages in a timely manner (sometimes +even never, probably due to races). To prevent this, we +flush stdout after we have done processing. This is especially +important once we get to the point where the plugin does +two-way conversations with rsyslog. Do NOT change this! +See also: https://github.com/rsyslog/rsyslog/issues/22 +""" +if __name__ == '__main__': + keepRunning = 1 + while keepRunning == 1: + while keepRunning and sys.stdin in \ + select.select([sys.stdin], [], [], __pollPeriod)[0]: + msgs = [] + while keepRunning and sys.stdin in \ + select.select([sys.stdin], [], [], 0)[0]: + line = sys.stdin.readline() + if line: + msgs.append(line) + else: # an empty line means stdin has been closed + keepRunning = 0 + if len(msgs) >= __maxAtOnce: + break + if len(msgs) > 0: + onReceive(msgs) + sys.stdout.flush() # important,Python buffers far too much + sys.exit(0) diff --git a/plugins/check_vol_status.py b/plugins/check_vol_status.py index 9e526da..33e26ee 100755 --- a/plugins/check_vol_status.py +++ b/plugins/check_vol_status.py @@ -1,9 +1,11 @@ #!/usr/bin/python + import re -import commands import argparse +import commands import xml.etree.ElementTree as ET from glusternagios import utils +import nscautils def parseXml(xmldoc, searchStr): @@ -25,37 +27,6 @@ def getVolumeStatus(vol_status_out): return vol_status -def getNagiosServerIP(): - nagiosIP = "" - nscaConfig = open("/etc/nagios/nagios_server.cfg", "r+") - for line in nscaConfig.readlines(): - if "nagios_server" in line: - #print line.rstrip() - line = line.rstrip() - nagiosIP = line.rpartition('=')[2] - #print nagiosIP - return nagiosIP - - -def send_to_nsca(hostName, serviceName, exitStatus, resultString): - #print hostName - #print serviceName - #print exitStatus - #print resultString - f = open('out.txt', 'w') - print >> f, '%s\t%s\t%s\t%s' % (hostName, - serviceName, - exitStatus, - resultString) - f.close() - nagiosIP = getNagiosServerIP() - command_send_nsca = "send_nsca -H " + nagiosIP + \ - " -c /etc/nagios/send_nsca.cfg < out.txt" - #print command_send_nsca - commands.getoutput(command_send_nsca) - #print nsca_stat - - def showBrickStatus(vol_status_out): ipPat = re.compile("^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$") xmlElemList = [] @@ -83,7 +54,10 @@ def showBrickStatus(vol_status_out): else: exitStatus = utils.PluginStatusCode.CRITICAL resultString = "Brick Status: CRITICAL" - send_to_nsca(brickIP, brickName, exitStatus, resultString) + nscautils.send_to_nsca(brickIP, + brickName, + exitStatus, + resultString) def showVolumeStatus(vol_status_out, volName, clusterName): @@ -94,7 +68,7 @@ def showVolumeStatus(vol_status_out, volName, clusterName): #brick_list = [] resultString = "" exitStatus = utils.PluginStatusCode.OK - serviceName = "Volume-%s-Status" % volName + serviceName = nscautils.vol_service_name(volName) ipPat = re.compile("^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$") vol_status = getVolumeStatus(vol_status_out) if vol_status == "Started": @@ -132,7 +106,7 @@ def showVolumeStatus(vol_status_out, volName, clusterName): brick_online) exitStatus = utils.PluginStatusCode.OK - send_to_nsca(clusterName, serviceName, exitStatus, resultString) + nscautils.send_to_nsca(clusterName, serviceName, exitStatus, resultString) def parse_input(): diff --git a/plugins/nscautils.py.in b/plugins/nscautils.py.in new file mode 100644 index 0000000..289d0d1 --- /dev/null +++ b/plugins/nscautils.py.in @@ -0,0 +1,53 @@ +# nscautils.py --utility methods to interact with Nagios NSCA +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +# + +import ConfigParser + +from glusternagios import utils + +__NAGIOSSERVER_CONF = "@nagiosconf@/nagios_server.conf" +__NSCA_CONF_PATH = "@nagiosconf@/send_nsca.cfg" +__NSCA_CMD_PATH = utils.CommandPath("nsca", "/usr/sbin/send_nsca") + + +def getNagiosServerIP(): + config = ConfigParser.ConfigParser() + config.read(__NAGIOSSERVER_CONF) + return config.get('NAGIOS-SERVER', 'nagios_server') + + +def getNagiosClusterName(): + config = ConfigParser.ConfigParser() + config.read(__NAGIOSSERVER_CONF) + return config.get('NAGIOS-DEFINTIONS', 'cluster_name') + + +def send_to_nsca(hostName, serviceName, exitStatus, resultString): + cmddata = '%s\t%s\t%s\t%s' % (hostName, + serviceName, + exitStatus, + resultString) + nagiosIP = getNagiosServerIP() + command_send_nsca = [__NSCA_CMD_PATH, '-H', nagiosIP, + '-c', __NSCA_CONF_PATH] + ret, out, err = utils.execCmd(command_send_nsca, data=cmddata) + return ret + + +def vol_service_name(volName, statusType=None): + return "Volume Status %s - %s" % (statusType, volName) diff --git a/tests/Makefile.am b/tests/Makefile.am index e8ab026..a540f11 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -19,6 +19,7 @@ # test_modules = \ + test_check_gluster_syslog.py \ test_check_volume_status.py \ test_cpu.py \ test_cpu_dataFile.py \ diff --git a/tests/test_check_gluster_syslog.py b/tests/test_check_gluster_syslog.py new file mode 100644 index 0000000..a6dce45 --- /dev/null +++ b/tests/test_check_gluster_syslog.py @@ -0,0 +1,46 @@ +# +# Copyright 2014 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +# Refer to the README and COPYING files for full details of the license +# + +import mock + +from testrunner import PluginsTestCase as TestCaseBase +from plugins import check_gluster_syslog +from glusternagios import utils + + +class TestGlusterSyslog(TestCaseBase): + + # Method to test volume perf data when no matching host method + @mock.patch('plugins.nscautils.getNagiosClusterName') + @mock.patch('plugins.nscautils.send_to_nsca') + def test_checkProcessMsg(self, mock_send_to_nsca, + mock_getNagiosClusterName): + mock_getNagiosClusterName.return_value = "test-cluster" + message = ("-/USER/CRIT/GLUSTERFSD [2014-04-06T21:45:33.378443+05:30] " + "glusterfsd: [2014-04-06 15:46:59.390038] " + "A [quota.c:3670:quota_log_usage] 0-test-vol-quota:" + "Usage is above soft limit: 300.0KB used by /test/") + check_gluster_syslog.processMsg(message) + mock_send_to_nsca.assert_called_with("test-cluster", + "Volume Status Quota - test-vol", + utils.PluginStatusCode.WARNING, + "QUOTA: Usage is " + "above soft limit: " + "300.0KB used by /test/") |