summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile.am1
-rw-r--r--config/Makefile.am14
-rw-r--r--config/glusternagios.conf.in17
-rw-r--r--config/nagios_server.conf13
-rw-r--r--configure.ac4
-rw-r--r--gluster-nagios-addons.spec.in3
-rw-r--r--plugins/Makefile.am2
-rwxr-xr-xplugins/check_gluster_syslog.py117
-rwxr-xr-xplugins/check_vol_status.py44
-rw-r--r--plugins/nscautils.py.in53
-rw-r--r--tests/Makefile.am1
-rw-r--r--tests/test_check_gluster_syslog.py46
12 files changed, 280 insertions, 35 deletions
diff --git a/Makefile.am b/Makefile.am
index 5ffc3af..fd78c86 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -20,6 +20,7 @@
# keep sorted
SUBDIRS = \
+ config \
plugins \
$(NULL)
diff --git a/config/Makefile.am b/config/Makefile.am
new file mode 100644
index 0000000..428ac43
--- /dev/null
+++ b/config/Makefile.am
@@ -0,0 +1,14 @@
+rsyslogconfdir = $(sysconfdir)/rsyslog.d
+rsyslogconf_DATA = \
+ glusternagios.conf \
+ $(NULL)
+
+nagiosconfdir = $(sysconfdir)/nagios
+nagiosconf_DATA = \
+ nagios_server.conf \
+ $(NULL)
+
+EXTRA_DIST = \
+ $(nagiosconf_DATA) \
+ $(rsyslogconf_DATA) \
+ $(NULL)
diff --git a/config/glusternagios.conf.in b/config/glusternagios.conf.in
new file mode 100644
index 0000000..004b375
--- /dev/null
+++ b/config/glusternagios.conf.in
@@ -0,0 +1,17 @@
+##### glusternagios.conf #####
+
+$ModLoad omprog
+$actionomprogbinary @glusternagiospluginsdir@/check_gluster_syslog.py
+
+#
+## Pass logs to omprog if app-name is 'gluster'
+#
+$template GLFS_NAG_Template,"%msgid%/%syslogfacility-text:::uppercase%/%syslogseverity-text:::uppercase%/%app-name:::uppercase% [%TIMESTAMP:::date-rfc3339%] %msg:::sp-if-no-1st-sp%%msg:::drop-last-lf%\n"
+
+if $app-name contains 'glusterfsd' then :omprog:;GLFS_NAG_Template
+
+
+#
+## discard logs where app-name is 'gluster' as we processed already
+#
+#if $app-name contains 'gluster' then ~
diff --git a/config/nagios_server.conf b/config/nagios_server.conf
new file mode 100644
index 0000000..56c8995
--- /dev/null
+++ b/config/nagios_server.conf
@@ -0,0 +1,13 @@
+# NAGIOS SERVER
+# The nagios server IP address or FQDN to which the NSCA command
+# needs to be sent
+[NAGIOS-SERVER]
+nagios_server=
+
+
+# CLUSTER NAME
+# The host name of the logical cluster configured in Nagios under which
+# the gluster volume services reside
+[NAGIOS-DEFINTIONS]
+cluster_name=
+
diff --git a/configure.ac b/configure.ac
index db4147e..dffb3ea 100644
--- a/configure.ac
+++ b/configure.ac
@@ -53,6 +53,7 @@ AC_SUBST([nagiospluginsdir], ['${libdir}/nagios/plugins'])
AC_SUBST([glusternagiospluginsdir], ['${nagiospluginsdir}/gluster'])
AC_SUBST([glusternagioscommonpylibdir], ['${pyexecdir}/glusternagios'])
AC_SUBST([glusternagiosaddonstestsdir], ['${datarootdir}/${PACKAGE_NAME}/tests'])
+AC_SUBST([nagiosconf], ['/etc/nagios'])
# Checking for pyflakes
AC_PATH_PROG([PYFLAKES], [pyflakes])
@@ -94,7 +95,10 @@ AX_PYTHON_MODULE([selinux], [fatal])
AC_CONFIG_FILES([
Makefile
gluster-nagios-addons.spec
+ config/Makefile
+ config/glusternagios.conf
plugins/Makefile
+ plugins/nscautils.py
plugins/volcap/Makefile
tests/Makefile
tests/run_tests_local.sh
diff --git a/gluster-nagios-addons.spec.in b/gluster-nagios-addons.spec.in
index 390d6e5..969bded 100644
--- a/gluster-nagios-addons.spec.in
+++ b/gluster-nagios-addons.spec.in
@@ -139,6 +139,7 @@ command[discoverlogicalcomponents]=/usr/lib64/nagios/plugins/gluster/discoverlog
EOF
%_init_enable nrpe
%_init_restart crond
+%_init_restart rsyslog
%preun
@@ -155,6 +156,8 @@ sed -i '/check_vol_quota_status/d' %{_sysconfdir}/nagios/nrpe.cfg
%defattr(-,root,root,-)
%attr(0755, -, -) %{_libdir}/nagios/plugins/gluster/*
%{_sysconfdir}/cron.d/gluster-sysstat.crontab
+%{_sysconfdir}/rsyslog.d/glusternagios.conf
+%{_sysconfdir}/nagios/nagios_server.conf
%files tests
%defattr(-,root,root,-)
diff --git a/plugins/Makefile.am b/plugins/Makefile.am
index c74cc3e..9bba2d4 100644
--- a/plugins/Makefile.am
+++ b/plugins/Makefile.am
@@ -9,6 +9,7 @@ crond_DATA = \
dist_glusternagiosplugins_PYTHON = \
check_disk_and_inode.py \
+ check_gluster_syslog.py \
check_vol_utilization.py \
check_vol_status.py \
check_volume_status.py \
@@ -19,6 +20,7 @@ dist_glusternagiosplugins_PYTHON = \
__init__.py \
memory.py \
network.py \
+ nscautils.py \
sadf.py \
swap.py \
$(NULL)
diff --git a/plugins/check_gluster_syslog.py b/plugins/check_gluster_syslog.py
new file mode 100755
index 0000000..a52667c
--- /dev/null
+++ b/plugins/check_gluster_syslog.py
@@ -0,0 +1,117 @@
+#! /usr/bin/python
+# check_gluster_syslog.py
+# Script to act on syslog messages related to gluster
+# and send output to Nagios via nsca
+#
+# Copyright (C) 2014 Red Hat Inc
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+import re
+import sys
+import select
+
+import nscautils
+from glusternagios import utils
+
+# skeleton config parameters
+__pollPeriod = 0.75 # the number of seconds between polling for new messages
+__maxAtOnce = 1024 # max no of messages that are processed within one batch
+
+
+def findVolName(pattern):
+ # pattern is of the form <graphid>-<volume name>-<translator name>
+ return pattern[pattern.find('-') + 1:pattern.rfind('-')]
+
+
+def getStatusCode(alertlevel):
+ if alertlevel == 'ALERT':
+ return utils.PluginStatusCode.CRITICAL
+ else:
+ return utils.PluginStatusCode.WARNING
+
+
+def processQuotaMsg(msg, alertlevel):
+ quotapat = re.compile(r'\b\d*-[a-zA-Z0-9_-]*-quota\b')
+ matches = quotapat.search(msg)
+ if matches:
+ volname = findVolName(matches.group())
+ # Now get the actual msg
+ alertMsg = "QUOTA: " + msg[msg.rfind(matches.group()) +
+ len(matches.group()) + 1:]
+ serviceName = nscautils.vol_service_name(volname, "Quota")
+ nscautils.send_to_nsca(nscautils.getNagiosClusterName(),
+ serviceName,
+ getStatusCode(alertlevel),
+ alertMsg)
+
+
+def processMsg(msg):
+ 'Check if msg is indeed from gluster app'
+ custom_logvars = msg[:msg.find(' ')]
+ level = custom_logvars.split('/')[2]
+ appname = custom_logvars.split('/')[3]
+ if appname != 'GLUSTERFSD':
+ return
+ # For gluster messages, need to check the source of message
+ logsource = msg[msg.rfind('['):msg.rfind(']')]
+ if logsource.find('quota') > -1:
+ processQuotaMsg(msg, level)
+
+
+def onReceive(msgs):
+ """This is the entry point where actual work needs to be done. It receives
+ a list with all messages pulled from rsyslog. The list is of variable
+ length, but contains all messages that are currently available. It is
+ suggested NOT to use any further buffering, as we do not know when the
+ next message will arrive. It may be in a nanosecond from now, but it
+ may also be in three hours...
+ """
+ for msg in msgs:
+ processMsg(msg)
+
+
+"""
+-------------------------------------------------------
+This is plumbing that DOES NOT need to be CHANGED
+-------------------------------------------------------
+Implementor's note: Python seems to very agressively
+buffer stdouot. The end result was that rsyslog does not
+receive the script's messages in a timely manner (sometimes
+even never, probably due to races). To prevent this, we
+flush stdout after we have done processing. This is especially
+important once we get to the point where the plugin does
+two-way conversations with rsyslog. Do NOT change this!
+See also: https://github.com/rsyslog/rsyslog/issues/22
+"""
+if __name__ == '__main__':
+ keepRunning = 1
+ while keepRunning == 1:
+ while keepRunning and sys.stdin in \
+ select.select([sys.stdin], [], [], __pollPeriod)[0]:
+ msgs = []
+ while keepRunning and sys.stdin in \
+ select.select([sys.stdin], [], [], 0)[0]:
+ line = sys.stdin.readline()
+ if line:
+ msgs.append(line)
+ else: # an empty line means stdin has been closed
+ keepRunning = 0
+ if len(msgs) >= __maxAtOnce:
+ break
+ if len(msgs) > 0:
+ onReceive(msgs)
+ sys.stdout.flush() # important,Python buffers far too much
+ sys.exit(0)
diff --git a/plugins/check_vol_status.py b/plugins/check_vol_status.py
index 9e526da..33e26ee 100755
--- a/plugins/check_vol_status.py
+++ b/plugins/check_vol_status.py
@@ -1,9 +1,11 @@
#!/usr/bin/python
+
import re
-import commands
import argparse
+import commands
import xml.etree.ElementTree as ET
from glusternagios import utils
+import nscautils
def parseXml(xmldoc, searchStr):
@@ -25,37 +27,6 @@ def getVolumeStatus(vol_status_out):
return vol_status
-def getNagiosServerIP():
- nagiosIP = ""
- nscaConfig = open("/etc/nagios/nagios_server.cfg", "r+")
- for line in nscaConfig.readlines():
- if "nagios_server" in line:
- #print line.rstrip()
- line = line.rstrip()
- nagiosIP = line.rpartition('=')[2]
- #print nagiosIP
- return nagiosIP
-
-
-def send_to_nsca(hostName, serviceName, exitStatus, resultString):
- #print hostName
- #print serviceName
- #print exitStatus
- #print resultString
- f = open('out.txt', 'w')
- print >> f, '%s\t%s\t%s\t%s' % (hostName,
- serviceName,
- exitStatus,
- resultString)
- f.close()
- nagiosIP = getNagiosServerIP()
- command_send_nsca = "send_nsca -H " + nagiosIP + \
- " -c /etc/nagios/send_nsca.cfg < out.txt"
- #print command_send_nsca
- commands.getoutput(command_send_nsca)
- #print nsca_stat
-
-
def showBrickStatus(vol_status_out):
ipPat = re.compile("^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")
xmlElemList = []
@@ -83,7 +54,10 @@ def showBrickStatus(vol_status_out):
else:
exitStatus = utils.PluginStatusCode.CRITICAL
resultString = "Brick Status: CRITICAL"
- send_to_nsca(brickIP, brickName, exitStatus, resultString)
+ nscautils.send_to_nsca(brickIP,
+ brickName,
+ exitStatus,
+ resultString)
def showVolumeStatus(vol_status_out, volName, clusterName):
@@ -94,7 +68,7 @@ def showVolumeStatus(vol_status_out, volName, clusterName):
#brick_list = []
resultString = ""
exitStatus = utils.PluginStatusCode.OK
- serviceName = "Volume-%s-Status" % volName
+ serviceName = nscautils.vol_service_name(volName)
ipPat = re.compile("^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")
vol_status = getVolumeStatus(vol_status_out)
if vol_status == "Started":
@@ -132,7 +106,7 @@ def showVolumeStatus(vol_status_out, volName, clusterName):
brick_online)
exitStatus = utils.PluginStatusCode.OK
- send_to_nsca(clusterName, serviceName, exitStatus, resultString)
+ nscautils.send_to_nsca(clusterName, serviceName, exitStatus, resultString)
def parse_input():
diff --git a/plugins/nscautils.py.in b/plugins/nscautils.py.in
new file mode 100644
index 0000000..289d0d1
--- /dev/null
+++ b/plugins/nscautils.py.in
@@ -0,0 +1,53 @@
+# nscautils.py --utility methods to interact with Nagios NSCA
+# Copyright (C) 2014 Red Hat Inc
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+import ConfigParser
+
+from glusternagios import utils
+
+__NAGIOSSERVER_CONF = "@nagiosconf@/nagios_server.conf"
+__NSCA_CONF_PATH = "@nagiosconf@/send_nsca.cfg"
+__NSCA_CMD_PATH = utils.CommandPath("nsca", "/usr/sbin/send_nsca")
+
+
+def getNagiosServerIP():
+ config = ConfigParser.ConfigParser()
+ config.read(__NAGIOSSERVER_CONF)
+ return config.get('NAGIOS-SERVER', 'nagios_server')
+
+
+def getNagiosClusterName():
+ config = ConfigParser.ConfigParser()
+ config.read(__NAGIOSSERVER_CONF)
+ return config.get('NAGIOS-DEFINTIONS', 'cluster_name')
+
+
+def send_to_nsca(hostName, serviceName, exitStatus, resultString):
+ cmddata = '%s\t%s\t%s\t%s' % (hostName,
+ serviceName,
+ exitStatus,
+ resultString)
+ nagiosIP = getNagiosServerIP()
+ command_send_nsca = [__NSCA_CMD_PATH, '-H', nagiosIP,
+ '-c', __NSCA_CONF_PATH]
+ ret, out, err = utils.execCmd(command_send_nsca, data=cmddata)
+ return ret
+
+
+def vol_service_name(volName, statusType=None):
+ return "Volume Status %s - %s" % (statusType, volName)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index e8ab026..a540f11 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -19,6 +19,7 @@
#
test_modules = \
+ test_check_gluster_syslog.py \
test_check_volume_status.py \
test_cpu.py \
test_cpu_dataFile.py \
diff --git a/tests/test_check_gluster_syslog.py b/tests/test_check_gluster_syslog.py
new file mode 100644
index 0000000..a6dce45
--- /dev/null
+++ b/tests/test_check_gluster_syslog.py
@@ -0,0 +1,46 @@
+#
+# Copyright 2014 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+# Refer to the README and COPYING files for full details of the license
+#
+
+import mock
+
+from testrunner import PluginsTestCase as TestCaseBase
+from plugins import check_gluster_syslog
+from glusternagios import utils
+
+
+class TestGlusterSyslog(TestCaseBase):
+
+ # Method to test volume perf data when no matching host method
+ @mock.patch('plugins.nscautils.getNagiosClusterName')
+ @mock.patch('plugins.nscautils.send_to_nsca')
+ def test_checkProcessMsg(self, mock_send_to_nsca,
+ mock_getNagiosClusterName):
+ mock_getNagiosClusterName.return_value = "test-cluster"
+ message = ("-/USER/CRIT/GLUSTERFSD [2014-04-06T21:45:33.378443+05:30] "
+ "glusterfsd: [2014-04-06 15:46:59.390038] "
+ "A [quota.c:3670:quota_log_usage] 0-test-vol-quota:"
+ "Usage is above soft limit: 300.0KB used by /test/")
+ check_gluster_syslog.processMsg(message)
+ mock_send_to_nsca.assert_called_with("test-cluster",
+ "Volume Status Quota - test-vol",
+ utils.PluginStatusCode.WARNING,
+ "QUOTA: Usage is "
+ "above soft limit: "
+ "300.0KB used by /test/")