summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--configure.ac1
-rw-r--r--gluster-nagios-addons.spec.in2
-rw-r--r--plugins/Makefile.am8
-rw-r--r--plugins/__init__.py0
-rwxr-xr-xplugins/check_disk_and_inode.py195
-rwxr-xr-xplugins/check_remote_host.py199
-rwxr-xr-xplugins/gluster_host_service_handler.py154
-rwxr-xr-xplugins/sadf.py328
-rw-r--r--tests/Makefile.am2
-rw-r--r--tests/test_check_remote_host.py67
-rw-r--r--tests/test_sadf.py190
11 files changed, 1146 insertions, 0 deletions
diff --git a/configure.ac b/configure.ac
index 6b6f1d2..059f6a6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -82,6 +82,7 @@ fi
AX_PYTHON_MODULE([argparse], [fatal])
AX_PYTHON_MODULE([ethtool], [fatal])
AX_PYTHON_MODULE([glusternagios], [fatal])
+AX_PYTHON_MODULE([mock], [fatal])
AX_PYTHON_MODULE([netaddr], [fatal])
AX_PYTHON_MODULE([pthreading], [fatal])
AX_PYTHON_MODULE([pyinotify], [fatal])
diff --git a/gluster-nagios-addons.spec.in b/gluster-nagios-addons.spec.in
index 3450ba2..b350e80 100644
--- a/gluster-nagios-addons.spec.in
+++ b/gluster-nagios-addons.spec.in
@@ -24,6 +24,7 @@ Source0: %{name}-%{version}.tar.gz
BuildRoot: %{_tmppath}/%{name}-%{version}-root
BuildRequires: pyflakes
BuildRequires: python-pep8
+BuildRequires: python-mock
BuildRequires: python-nose
BuildRequires: python-devel
Requires: gluster-nagios-common
@@ -43,6 +44,7 @@ Group: Development/Tools
Requires: %{name} = %{version}-%{release}
Requires: pyflakes
Requires: python-pep8
+Requires: python-mock
Requires: python-nose
Requires: python-devel
diff --git a/plugins/Makefile.am b/plugins/Makefile.am
index c12520c..12ebab7 100644
--- a/plugins/Makefile.am
+++ b/plugins/Makefile.am
@@ -1,2 +1,10 @@
dist_glusternagiosplugins_PYTHON = \
+ check_disk_and_inode.py \
+ check_remote_host.py \
+ gluster_host_service_handler.py \
+ sadf.py \
+ $(NULL)
+
+EXTRA_DIST = \
+ __init__.py \
$(NULL)
diff --git a/plugins/__init__.py b/plugins/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/plugins/__init__.py
diff --git a/plugins/check_disk_and_inode.py b/plugins/check_disk_and_inode.py
new file mode 100755
index 0000000..052df3a
--- /dev/null
+++ b/plugins/check_disk_and_inode.py
@@ -0,0 +1,195 @@
+#!/usr/bin/python
+# sadf.py -- nagios plugin uses sadf output for perf data
+# Copyright (C) 2014 Red Hat Inc
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+
+import re
+import sys
+import commands
+from optparse import OptionParser
+
+
+def getUsageAndFree(command, lvm):
+ status = commands.getstatusoutput(command)[1].split()
+ path = status[-1]
+ usagePer = status[-2]
+ availSpace = status[-3]
+ usedSpace = status[-4]
+ device = status[-6].split("-")[-1]
+ dmatch = re.compile('[0-9]+').match(usagePer)
+ if (dmatch):
+ usage = eval(dmatch.group(0))
+ return (float(usage), float(100 - usage), usedSpace,
+ availSpace, device, path)
+ else:
+ return None, None, None, None, None, None
+
+
+def getDisk(path, readable=False, lvm=False):
+ if readable:
+ return getUsageAndFree("df -m %s" % path, lvm)
+ else:
+ return getUsageAndFree("df -kh %s" % path, lvm)
+
+
+def getInode(path, readable=False, lvm=False):
+ return getUsageAndFree("df -i %s" % path, lvm)
+
+
+def appendStatus(lst, level, typ, device, mpath, usage):
+ if 2 == level:
+ level = "crit"
+ elif 1 == level:
+ level = "warn"
+ else:
+ level = "ok"
+ lst.append("%s:%s:%s;%s;%s" % (level, device, mpath, usage))
+
+
+def getMounts(searchQuery=None, excludeList=[]):
+ mountPaths = []
+ f = open("/etc/mtab")
+ for i in f.readlines():
+ if searchQuery and i.startswith(searchQuery):
+ if not excludeList:
+ mountPaths.append(i.split()[0])
+ else:
+ device = i.split()
+ if not device[0] in options.exclude and\
+ not device[1] in options.exclude:
+ mountPaths.append(device[0])
+ f.close()
+ return mountPaths
+
+
+def parse_input():
+ parser = OptionParser()
+ parser.add_option('-w', '--warning', action='store', type='int',
+ dest='warn', help='Warning count in %', default=80)
+ parser.add_option('-c', '--critical', action='store', type='int',
+ dest='crit', help='Critical count in %', default=90)
+ parser.add_option('-u', '--usage', action="store_true", dest='usage',
+ help='Output disk and inode usage', default=False)
+ parser.add_option('-l', '--lvm', action="store_true",
+ dest='lvm', help='List lvm mounts', default=False)
+ parser.add_option('-a', '--all', action="store_true",
+ dest='all', help='List all mounts', default=False)
+ parser.add_option('-n', '--ignore', action="store_true",
+ dest='ignore', help='Ignore errors', default=False)
+ parser.add_option('-i', '--include', action='append', type='string',
+ dest='mountPath', help='Mount path', default=[])
+ parser.add_option('-x', '--exclude', action="append", type='string',
+ dest='exclude', help='Exclude disk')
+ return parser.parse_args()
+
+
+if __name__ == '__main__':
+ disk = []
+ warnList = []
+ critList = []
+ diskList = []
+ mounts = []
+ level = -1
+ (options, args) = parse_input()
+
+ if len(args) > 2:
+ if args[0].isdigit() and args[1].isdigit():
+ warn = int(args[0])
+ crit = int(args[1])
+ options.mountPath = args[2:]
+ else:
+ warn = 80
+ crit = 90
+ options.mountPath = args
+ else:
+ crit = options.crit
+ warn = options.warn
+
+ if options.lvm:
+ searchQuery = "/dev/mapper"
+ elif options.all:
+ searchQuery = None
+ else:
+ searchQuery = "/"
+
+ if not options.mountPath or options.lvm or options.all:
+ options.mountPath += getMounts(searchQuery, options.exclude)
+
+ #if not options.mountPath:
+ # parser.print_help()
+ # sys.exit(1)
+
+ for path in options.mountPath:
+ diskUsage, diskFree, used, avail, dev, mpath = getDisk(path,
+ options.usage,
+ options.lvm)
+ inodeUsage, inodeFree, iused, iavail, idev, ipath = getInode(
+ path,
+ options.usage,
+ options.lvm)
+ if mpath in mounts:
+ continue
+ if not used or not iused:
+ if options.ignore:
+ continue
+ else:
+ sys.exit(3)
+
+ mounts.append(mpath)
+ if options.usage:
+ total = (float(used) + float(avail)) / 1000
+ itot = (float(iused) + float(iavail)) / 1000
+ disk.append("%s=%.1f;%.1f;%.1f;0;%.1f %s=%.1f;%.1f;%.1f;0;%.1f" % (
+ mpath, float(used)/1000, warn*total/100, crit*total/100, total,
+ ipath, float(iused)/1000, warn*itot/100, crit*itot/100, itot))
+ else:
+ disk.append("%s=%.2f;%s;%s;0;100 %s=%.2f;%s;%s;0;100" % (
+ mpath, diskUsage, warn, crit, ipath, inodeUsage, warn, crit))
+
+ if diskUsage >= crit or inodeUsage >= crit:
+ if diskUsage >= crit:
+ critList.append("crit:disk:%s;%s;%s" % (dev, mpath, diskUsage))
+ else:
+ critList.append("crit:inode:%s;%s;%s" % (idev, ipath,
+ inodeUsage))
+ if not level > 1:
+ level = 2
+ elif (diskUsage >= warn and diskUsage < crit) or (
+ inodeUsage >= warn and inodeUsage < crit):
+ if diskUsage >= warn:
+ warnList.append("warn:disk:%s;%s;%s" % (dev, mpath, diskUsage))
+ else:
+ warnList.append("warn:inode:%s;%s;%s" % (idev, ipath,
+ inodeUsage))
+ if not level > 0:
+ level = 1
+ else:
+ diskList.append("%s:%s" % (dev, mpath))
+
+ msg = " ".join(critList + warnList)
+ if not msg:
+ msg += " disks:mounts:(" + ",".join(diskList) + ")"
+
+ if 2 == level:
+ print "CRITICAL : %s | %s" % (msg, " ".join(disk))
+ sys.exit(2)
+ elif 1 == level:
+ print "WARNING : %s | %s" % (msg, " ".join(disk))
+ sys.exit(1)
+ else:
+ print "OK : %s | %s" % (msg, " ".join(disk))
diff --git a/plugins/check_remote_host.py b/plugins/check_remote_host.py
new file mode 100755
index 0000000..7350e27
--- /dev/null
+++ b/plugins/check_remote_host.py
@@ -0,0 +1,199 @@
+#!/usr/bin/python
+#
+# check_remote_host.py -- nagios plugin uses Mklivestatus to get the overall
+# status
+# of a host. The entities considered for the status of the host are -
+# 1. Host is reachable
+# 2. LV/Inode Service status
+# 3. CPU Utilization
+# 4. Memory Utilization
+# 5. Network Utilization
+# 6. Swap Utilization
+#
+# Copyright (C) 2014 Red Hat Inc
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA
+#
+
+import os
+import sys
+import shlex
+import subprocess
+import socket
+import getopt
+
+STATUS_OK = 0
+STATUS_WARNING = 1
+STATUS_CRITICAL = 2
+STATUS_UNKNOWN = 3
+_checkPingCommand = "/usr/lib64/nagios/plugins/check_ping"
+_commandStatusStrs = {STATUS_OK: 'OK', STATUS_WARNING: 'WARNING',
+ STATUS_CRITICAL: 'CRITICAL', STATUS_UNKNOWN: 'UNKNOWN'}
+_socketPath = '/var/spool/nagios/cmd/live'
+
+
+# Class for exception definition
+class checkPingCmdExecFailedException(Exception):
+ message = "check_ping command failed"
+
+ def __init__(self, rc=0, out=(), err=()):
+ self.rc = rc
+ self.out = out
+ self.err = err
+
+ def __str__(self):
+ o = '\n'.join(self.out)
+ e = '\n'.join(self.err)
+ if o and e:
+ m = o + '\n' + e
+ else:
+ m = o or e
+
+ s = self.message
+ if m:
+ s += '\nerror: ' + m
+ if self.rc:
+ s += '\nreturn code: %s' % self.rc
+ return s
+
+
+# Method to execute a command
+def execCmd(command):
+ proc = subprocess.Popen(command,
+ close_fds=True,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ (out, err) = proc.communicate()
+ return (proc.returncode, out, err)
+
+
+# Method to check the ing status of the host
+def getPingStatus(hostAddr):
+ cmd = "%s -H %s" % (_checkPingCommand, hostAddr)
+ cmd += " -w 3000.0,80% -c 5000.0,100%"
+
+ try:
+ (rc, out, err) = execCmd(shlex.split(cmd))
+ except (OSError, ValueError) as e:
+ raise checkPingCmdExecFailedException(err=[str(e)])
+
+ if rc != 0:
+ raise checkPingCmdExecFailedException(rc, [out], [err])
+
+ return rc
+
+
+# Method to execute livestatus
+def checkLiveStatus(hostAddr, srvc):
+ s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ s.connect(_socketPath)
+
+ # Write command to socket
+ cmd = "GET services\nColumns: state\nFilter: "
+ "description = %s\nFilter: host_address = %s\n" % (srvc, hostAddr)
+ s.send(cmd)
+
+ # Close socket
+ s.shutdown(socket.SHUT_WR)
+
+ # Read the answer
+ answer = s.recv(1000000)
+
+ # Parse the answer into a table
+ table = [line.split(';') for line in answer.split('\n')[:-1]]
+
+ if len(table) > 0 and len(table[0]) > 0:
+ return int(table[0][0])
+ else:
+ return STATUS_UNKNOWN
+
+
+# Method to show the usage
+def showUsage():
+ usage = "Usage: %s -H <Host Address>\n" % os.path.basename(sys.argv[0])
+ sys.stderr.write(usage)
+
+
+# Main method
+if __name__ == "__main__":
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "hH:", ["help", "host="])
+ except getopt.GetoptError as e:
+ print (str(e))
+ showUsage()
+ sys.exit(STATUS_CRITICAL)
+
+ hostAddr = ''
+ if len(opts) == 0:
+ showUsage()
+ sys.exit(STATUS_CRITICAL)
+ else:
+ for opt, arg in opts:
+ if opt in ("-h", "--help"):
+ showUsage()
+ sys.exit()
+ elif opt in ("-H", "--host"):
+ hostAddr = arg
+ else:
+ showUsage()
+ sys.exit(STATUS_CRITICAL)
+
+ # Check ping status of the node, if its not reachable exit
+ try:
+ pingStatus = getPingStatus(hostAddr)
+ except (checkPingCmdExecFailedException) as e:
+ print "Host Status %s - Host not reachable" % \
+ (_commandStatusStrs[STATUS_UNKNOWN])
+ sys.exit(_commandStatusStrs[STATUS_UNKNOWN])
+
+ if pingStatus != STATUS_OK:
+ print "Host Status %s - Host not reachable" % \
+ (_commandStatusStrs[STATUS_UNKNOWN])
+ sys.exit(pingStatus)
+
+ # Check the various performance statuses for the host
+ diskPerfStatus = checkLiveStatus(hostAddr, 'Disk Utilization')
+ cpuPerfStatus = checkLiveStatus(hostAddr, 'Cpu Utilization')
+ memPerfStatus = checkLiveStatus(hostAddr, 'Memory Utilization')
+ swapPerfStatus = checkLiveStatus(hostAddr, 'Swap Utilization')
+ nwPerfStatus = checkLiveStatus(hostAddr, 'Network Utilization')
+
+ # Calculate the consolidated status for the host based on above status
+ # details
+ finalStatus = pingStatus | diskPerfStatus | cpuPerfStatus | \
+ memPerfStatus | swapPerfStatus | nwPerfStatus
+
+ # Get the list of ciritical services
+ criticalSrvcs = []
+ if diskPerfStatus == STATUS_CRITICAL:
+ criticalSrvcs.append('Disk Utilization')
+ if cpuPerfStatus == STATUS_CRITICAL:
+ criticalSrvcs.append('Cpu Utilization')
+ if memPerfStatus == STATUS_CRITICAL:
+ criticalSrvcs.append('Memory Utilization')
+ if swapPerfStatus == STATUS_CRITICAL:
+ criticalSrvcs.append('Swap Utilization')
+ if nwPerfStatus == STATUS_CRITICAL:
+ criticalSrvcs.append('Network Utilization')
+
+ # Return the status
+ if finalStatus == STATUS_CRITICAL:
+ print "Host Status %s - Service(s) %s in CRITICAL state" % \
+ (_commandStatusStrs[STATUS_WARNING], criticalSrvcs)
+ sys.exit(STATUS_WARNING)
+
+ print "Host Status %s - Services in good health" % \
+ _commandStatusStrs[STATUS_OK]
+ sys.exit(STATUS_OK)
diff --git a/plugins/gluster_host_service_handler.py b/plugins/gluster_host_service_handler.py
new file mode 100755
index 0000000..283ac69
--- /dev/null
+++ b/plugins/gluster_host_service_handler.py
@@ -0,0 +1,154 @@
+#!/usr/bin/python
+#
+# gluster_host_service_handler.py -- Event handler which checks the
+# status of defined services and accordingly changes the host status
+#
+# Copyright (C) 2014 Red Hat Inc
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA
+#
+
+import os
+import sys
+import datetime
+import socket
+import getopt
+
+STATUS_OK = "OK"
+STATUS_WARNING = "WARNING"
+STATUS_CRITICAL = "CRITICAL"
+STATUS_UNKNOWN = "UNKNOWN"
+SRVC_STATE_TYPE_SOFT = "SOFT"
+SRVC_STATE_TYPE_HARD = "HARD"
+statusCodes = {STATUS_OK: 0, STATUS_WARNING: 1, STATUS_CRITICAL: 2,
+ STATUS_UNKNOWN: 3}
+NAGIOS_COMMAND_FILE = "/var/spool/nagios/cmd/nagios.cmd"
+SRVC_LIST = ['Disk Utilization', 'Cpu Utilization', 'Memory Utilization',
+ 'Swap Utilization', 'Network Utilization']
+_socketPath = '/var/spool/nagios/cmd/live'
+
+
+# Shows the usage of the script
+def showUsage():
+ usage = "Usage: %s -s <Service State (OK/WARNING/CRITICAL/UNKNOWN)> "
+ "-t <Service State Type (SOFT/HARD)> -a <No of Service attempts> "
+ "-l <Host Address> -n <Service Name>\n" % os.path.basename(sys.argv[0])
+ sys.stderr.write(usage)
+
+
+# Method to change the host status
+def update_host_state(hostAddr, srvcName, statusCode):
+ now = datetime.datetime.now()
+ if statusCode == statusCodes[STATUS_WARNING]:
+ cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;Host Status WARNING - "
+ "Service(s) ['%s'] in CRITICAL state\n" % (now, hostAddr, statusCode,
+ srvcName)
+ else:
+ cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;Host Status OK - "
+ "Services in good health\n" % (now, hostAddr, statusCode)
+
+ f = open(NAGIOS_COMMAND_FILE, "w")
+ f.write(cmdStr)
+ f.close()
+
+
+# Method to execute livestatus
+def checkLiveStatus(hostAddr, srvc):
+ s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ s.connect(_socketPath)
+
+ # Write command to socket
+ cmd = "GET services\nColumns: state\nFilter: "
+ "description = %s\nFilter: host_address = %s\n" % (srvc, hostAddr)
+ s.send(cmd)
+
+ # Close socket
+ s.shutdown(socket.SHUT_WR)
+
+ # Read the answer
+ answer = s.recv(1000)
+
+ # Parse the answer into a table
+ table = [line.split(';') for line in answer.split('\n')[:-1]]
+
+ if len(table) > 0 and len(table[0]) > 0:
+ return int(table[0][0])
+ else:
+ return statusCodes[STATUS_UNKNOWN]
+
+
+# Method to change the host state to UP based on other service type status
+def check_and_update_host_state_to_up(hostAddr, srvcName):
+ finalState = 0
+ for item in SRVC_LIST:
+ if item != srvcName:
+ finalState = finalState | checkLiveStatus(hostAddr, item)
+
+ if finalState == statusCodes[STATUS_OK]:
+ update_host_state(hostAddr, srvcName, statusCodes[STATUS_OK])
+
+
+# Main method
+if __name__ == "__main__":
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "hs:t:a:l:n:",
+ ["help", "state=", "type=",
+ "attempts=", "location=", "name="])
+ except getopt.GetoptError as e:
+ print (str(e))
+ showUsage()
+ sys.exit(STATUS_CRITICAL)
+
+ srvcState = ''
+ srvcStateType = ''
+ attempts = ''
+ hostAddr = ''
+ srvcName = ''
+ if len(opts) == 0:
+ showUsage()
+ else:
+ for opt, arg in opts:
+ if opt in ('-h', '--help'):
+ showUsage()
+ sys.exit()
+ elif opt in ('-s', '--state'):
+ srvcState = arg
+ elif opt in ('-t', '--type'):
+ srvcStateType = arg
+ elif opt in ('-a', '--attempts'):
+ attempts = arg
+ elif opt in ('-l', '--location'):
+ hostAddr = arg
+ elif opt in ('-n', '--name'):
+ srvcName = arg
+ else:
+ showUsage()
+ sys.exit()
+
+ # Swicth over the service state values and do the needful
+ if srvcState == STATUS_CRITICAL:
+ if srvcStateType == SRVC_STATE_TYPE_SOFT:
+ if int(attempts) == 3:
+ print "Updating the host status to warning "
+ "(3rd SOFT critical state)..."
+ update_host_state(hostAddr, srvcName,
+ statusCodes[STATUS_WARNING])
+ elif srvcStateType == SRVC_STATE_TYPE_HARD:
+ print "Updating the host status to warning..."
+ update_host_state(hostAddr, srvcName, statusCodes[STATUS_WARNING])
+ elif srvcState == STATUS_OK:
+ check_and_update_host_state_to_up(hostAddr, srvcName)
+
+ sys.exit(0)
diff --git a/plugins/sadf.py b/plugins/sadf.py
new file mode 100755
index 0000000..0bafb4a
--- /dev/null
+++ b/plugins/sadf.py
@@ -0,0 +1,328 @@
+#!/usr/bin/python
+# sadf.py -- nagios plugin uses sadf output for perf data
+# Copyright (C) 2014 Red Hat Inc
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+import sys
+import shlex
+import subprocess
+import datetime
+import argparse
+import xml.etree.ElementTree as ET
+from collections import defaultdict
+_twoMinutes = datetime.timedelta(minutes=2)
+_sadfCpuCommand = "sadf -x -- -P ALL"
+_sadfMemoryCommand = "sadf -x -- -r"
+_sadfNetworkCommand = "sadf -x -- -n DEV"
+_sadfSwapSpaceCommand = "sadf -x -- -S"
+
+
+class sadfCmdExecFailedException(Exception):
+ message = "sadf command failed"
+
+ def __init__(self, rc=0, out=(), err=()):
+ self.rc = rc
+ self.out = out
+ self.err = err
+
+ def __str__(self):
+ o = '\n'.join(self.out)
+ e = '\n'.join(self.err)
+ if o and e:
+ m = o + '\n' + e
+ else:
+ m = o or e
+
+ s = self.message
+ if m:
+ s += '\nerror: ' + m
+ if self.rc:
+ s += '\nreturn code: %s' % self.rc
+ return s
+
+
+def execCmd(command):
+ proc = subprocess.Popen(command,
+ close_fds=True,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ (out, err) = proc.communicate()
+ return (proc.returncode, out, err)
+
+
+def etree_to_dict(t):
+ d = {t.tag: {} if t.attrib else None}
+ children = list(t)
+ if children:
+ dd = defaultdict(list)
+ for dc in map(etree_to_dict, children):
+ for k, v in dc.iteritems():
+ dd[k].append(v)
+ x = {}
+ for k, v in dd.iteritems():
+ x[k] = v[0] if len(v) == 1 else v
+ d = {t.tag: x}
+ if t.attrib:
+ d[t.tag].update((k, v) for k, v in t.attrib.iteritems())
+ if t.text:
+ text = t.text.strip()
+ if children or t.attrib:
+ if text:
+ d[t.tag]['#text'] = text
+ else:
+ d[t.tag] = text
+ return d
+
+
+def _sadfExecCmd(sadfCmd):
+ now = datetime.datetime.now()
+ start = (now - _twoMinutes).strftime("%H:%M:%S")
+ end = now.strftime("%H:%M:%S")
+ cmd = sadfCmd + " -s %s -e %s" % (start, end)
+
+ try:
+ (rc, out, err) = execCmd(shlex.split(cmd))
+ except (OSError, ValueError) as e:
+ raise sadfCmdExecFailedException(err=[str(e)])
+
+ if rc != 0:
+ raise sadfCmdExecFailedException(rc, [out], [err])
+
+ root = ET.fromstring(out)
+ d = etree_to_dict(root)
+ return d['sysstat']['host']['statistics']['timestamp']
+
+
+def _getLatestStat(stats):
+ if not stats:
+ return {}
+ if not isinstance(stats, list):
+ return stats
+ lstat = stats[0]
+ latestTime = datetime.datetime.strptime(lstat['time'],
+ "%H:%M:%S")
+ for s in stats[1:]:
+ thisTime = datetime.datetime.strptime(s['time'],
+ "%H:%M:%S")
+ if latestTime < thisTime:
+ lstat = s
+ latestTime = thisTime
+
+ return lstat
+
+
+def getLatestSadfCpuStat():
+ return _getLatestStat(_sadfExecCmd(_sadfCpuCommand))
+
+
+def getLatestSadfMemStat():
+ return _getLatestStat(_sadfExecCmd(_sadfMemoryCommand))
+
+
+def getLatestSadfNetStat():
+ return _getLatestStat(_sadfExecCmd(_sadfNetworkCommand))
+
+
+def getLatestSadfSwapStat():
+ return _getLatestStat(_sadfExecCmd(_sadfSwapSpaceCommand))
+
+
+def showCpuStat(warnLevel, critLevel):
+ s = getLatestSadfCpuStat()
+ if not s:
+ sys.stdout.write("CPU UNKNOWN\n")
+ sys.exit(3)
+ perfLines = []
+ idleCpu = 0
+ for cpu in s['cpu-load']['cpu']:
+ if cpu['number'] == 'all':
+ idleCpu = cpu['idle']
+ perfLines.append(
+ ("cpu_%s_total=%s%%;%s;%s cpu_%s_system=%s%% "
+ "cpu_%s_user=%s%% cpu_%s_idle=%s%%" % (
+ cpu['number'], 100-float(cpu['idle']),
+ warnLevel, critLevel,
+ cpu['number'], cpu['system'],
+ cpu['number'], cpu['user'],
+ cpu['number'], cpu['idle'])))
+ if len(s['cpu-load']['cpu'])-1 == 1:
+ break
+ totalCpuUsage = 100 - float(idleCpu)
+ if totalCpuUsage > critLevel:
+ sys.stdout.write(
+ ("CPU Status CRITICAL: Total CPU:%s%% Idle CPU:%s%% "
+ "| num_of_cpu=%s %s\n" % (totalCpuUsage, idleCpu,
+ len(s['cpu-load']['cpu'])-1,
+ " ".join(perfLines))))
+ elif totalCpuUsage > warnLevel:
+ sys.stdout.write(
+ ("CPU Status WARNING: Total CPU:%s%% Idle CPU:%s%% "
+ "| num_of_cpu=%s %s\n" % (totalCpuUsage, idleCpu,
+ len(s['cpu-load']['cpu'])-1,
+ " ".join(perfLines))))
+ else:
+ sys.stdout.write(
+ ("CPU Status OK: Total CPU:%s%% Idle CPU:%s%% "
+ "| num_of_cpu=%s %s\n" % (totalCpuUsage, idleCpu,
+ len(s['cpu-load']['cpu'])-1,
+ " ".join(perfLines))))
+
+ sys.exit(0)
+
+
+def showSwapStat(warning, critical):
+ s = getLatestSadfSwapStat()
+ if not s:
+ sys.stdout.write("IFACE UNKNOWN\n")
+ sys.exit(3)
+ totalSwap = int(s['memory']['swpfree']) + int(s['memory']['swpused'])
+ crit_value = (totalSwap * critical) / 100
+ war_value = (totalSwap * warning) / 100
+ if int(s['memory']['swpused']) >= crit_value:
+ sys.stdout.write("CRITICAL")
+ eStat = 2
+ elif int(s['memory']['swpused']) >= war_value:
+ sys.stdout.write("WARNING")
+ eStat = 1
+ else:
+ sys.stdout.write("OK")
+ eStat = 0
+ sys.stdout.write("- %.2f%% used(%skB out of %skB)|Used=%skB;%s;"
+ "%s;0;%s\n" % (float(s['memory']['swpused-percent']),
+ s['memory']['swpused'],
+ totalSwap,
+ s['memory']['swpused'],
+ war_value,
+ crit_value,
+ totalSwap))
+ sys.exit(eStat)
+
+
+def showMemStat(warning, critical):
+ s = getLatestSadfMemStat()
+ if not s:
+ sys.stdout.write("IFACE UNKNOWN\n")
+ sys.exit(3)
+ totalMem = int(s['memory']['memfree']) + int(s['memory']['memused'])
+ crit_value = (totalMem * critical) / 100
+ war_value = (totalMem * warning) / 100
+ if int(s['memory']['memused']) >= crit_value:
+ sys.stdout.write("CRITICAL")
+ eStat = 2
+ elif int(s['memory']['memused']) >= war_value:
+ sys.stdout.write("WARNING")
+ eStat = 1
+ else:
+ sys.stdout.write("OK")
+ eStat = 0
+ sys.stdout.write("- %.2f%% used(%skB out of %skB)|Total=%skB;%s;%s;0;%s"
+ " Used=%skB Buffered=%skB"
+ " Cached=%skB\n" % (float(s['memory']['memused-percent']),
+ s['memory']['memused'],
+ totalMem,
+ totalMem,
+ war_value,
+ crit_value,
+ totalMem,
+ s['memory']['memused'],
+ s['memory']['buffers'],
+ s['memory']['cached']))
+ sys.exit(eStat)
+
+
+def showNetStat(iface_list=None, list_type=None):
+ s = getLatestSadfNetStat()
+ if not s:
+ sys.stdout.write("IFACE UNKNOWN\n")
+ sys.exit(3)
+
+ devNames = []
+ perfLines = []
+ for dev in s['network']['net-dev']:
+ if list_type == "exclude":
+ if dev['iface'] in iface_list:
+ continue
+ elif list_type == "include":
+ if dev['iface'] not in iface_list:
+ continue
+ devNames.append(dev['iface'])
+ perfLines.append("%s.rxpck=%s %s.txpck=%s %s.rxkB=%s %s.txkB=%s"
+ % (dev['iface'], dev['rxpck'],
+ dev['iface'], dev['txpck'],
+ dev['iface'], dev['rxkB'],
+ dev['iface'], dev['txkB']))
+
+ sys.stdout.write("IFACE OK: %s |%s\n" % (", ".join(devNames),
+ " ".join(perfLines)))
+ sys.exit(0)
+
+
+def parse_input():
+ parser = argparse.ArgumentParser(usage='%(prog)s [-h] (\
+\n-m -w <warning> -c <critical> |\n-s -w <warning> -c <critical>\
+ |\n-cp -w <warning> -c <critical> |\n-n [-e <exclude>\
+ | -i <include>])')
+ group1 = parser.add_mutually_exclusive_group(required=True)
+ group1.add_argument('-m', '--memory', action='store_true',
+ help="Gives details related to memory")
+ group1.add_argument('-s', '--swap', action='store_true',
+ help="Gives details related to swap")
+ group1.add_argument('-cp', '--cpu', action='store_true',
+ help="Gives details related to cpu")
+ group1.add_argument('-n', '--network', action='store_true',
+ help="Gives details related to network")
+ parser.add_argument("-w", "--warning", action="store", type=int,
+ help="Warning threshold in percentage")
+ parser.add_argument("-c", "--critical", action="store", type=int,
+ help="Critical threshold in percentage")
+ group2 = parser.add_mutually_exclusive_group()
+ group2.add_argument("-e", "--exclude", action="append",
+ help="Parameters to be excluded")
+ group2.add_argument("-i", "--include", action="append",
+ help="Parameters to be included")
+ args = parser.parse_args()
+ if args.memory or args.swap or args.cpu:
+ if not args.critical or not args.warning:
+ print "UNKNOWN:Missing critical/warning threshold value."
+ sys.exit(3)
+ if args.exclude or args.include:
+ print "UNKNOWN:Exclude/Include is not valid for the given option."
+ sys.exit(3)
+ if args.critical <= args.warning:
+ print "UNKNOWN:Critical must be greater than Warning."
+ sys.exit(3)
+ else:
+ if args.critical or args.warning:
+ print "UNKNOWN:Warning/Critical is not valid for the given option."
+ sys.exit(3)
+ return args
+
+
+if __name__ == '__main__':
+ args = parse_input()
+ if args.memory:
+ showMemStat(args.warning, args.critical)
+ if args.swap:
+ showSwapStat(args.warning, args.critical)
+ if args.cpu:
+ showCpuStat(args.warning, args.critical)
+ if args.network:
+ if args.exclude:
+ showNetStat(args.exclude, "exclude")
+ if args.include:
+ showNetStat(args.include, "include")
+ showNetStat()
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 65843f4..e06a2ba 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -19,6 +19,8 @@
#
test_modules = \
+ test_check_remote_host.py \
+ test_sadf.py \
$(NULL)
dist_glusternagiosaddonstests_DATA = \
diff --git a/tests/test_check_remote_host.py b/tests/test_check_remote_host.py
new file mode 100644
index 0000000..c5c602d
--- /dev/null
+++ b/tests/test_check_remote_host.py
@@ -0,0 +1,67 @@
+#
+# Copyright 2014 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+# Refer to the README and COPYING files for full details of the license
+#
+
+import mock
+
+from testrunner import PluginsTestCase as TestCaseBase
+from plugins.check_remote_host import *
+
+
+class TestHello(TestCaseBase):
+ # Method to test the execCmd() method
+ @mock.patch('check_remote_host.subprocess.Popen')
+ def testExecCmd(self, mock_popen):
+ reference = subprocess.Popen('any command', close_fds=True,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ out = "sample output"
+ err = ""
+ reference.communicate.return_value = (out, err)
+ self.assertTrue(reference.communicate, "communicate called")
+
+ # Method to test the getPingStatus() method
+ @mock.patch('check_remote_host.execCmd')
+ def testGetPingStatus(self, mock_execCmd):
+ rc = 0
+ out = "sample output"
+ err = ""
+ mock_execCmd.return_value = (rc, out, err)
+ getPingStatus('dummy host')
+ mock_execCmd.assert_called_with([
+ '/usr/lib64/nagios/plugins/check_ping', '-H', 'dummy', 'host',
+ '-w', '3000.0,80%', '-c', '5000.0,100%'])
+ self.assertRaises(OSError, execCmd,
+ ['/usr/lib64/nagios/plugins/check_ping', '-H',
+ 'dummy', 'host', '-w', '3000.0,80%', '-c',
+ '5000.0,100%'])
+
+ # Method to test the checkLiveStatus() method
+ @mock.patch('check_remote_host.socket.socket')
+ def testCheckLiveStatus(self, mock_socket):
+ reference = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ self.assertTrue(mock_socket, "called")
+ reference.recv.return_value = "0\n"
+ checkLiveStatus("dummy host", "dummy srvc")
+ reference.connect.assert_called_with('/var/spool/nagios/cmd/live')
+ reference.send.assert_called_with("GET services\nColumns: state\n"
+ "Filter: description = dummy srvc\n"
+ "Filter: host_address = "
+ "dummy host\n")
+ self.assertEquals(0, checkLiveStatus("dummy host", "dummy srvc"))
diff --git a/tests/test_sadf.py b/tests/test_sadf.py
new file mode 100644
index 0000000..ced037e
--- /dev/null
+++ b/tests/test_sadf.py
@@ -0,0 +1,190 @@
+#
+# Copyright 2014 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+# Refer to the README and COPYING files for full details of the license
+#
+
+import xml.etree.cElementTree as etree
+
+from testrunner import PluginsTestCase as TestCaseBase
+from plugins import sadf
+
+
+class sadfTests(TestCaseBase):
+
+ def _etree_to_dict_arg_test(self):
+ out = """<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE sysstat PUBLIC "DTD v2.15 sysstat //EN"
+"http://pagesperso-orange.fr/sebastien.godard/sysstat-2.15.dtd">
+<sysstat>
+<sysdata-version>2.15</sysdata-version>
+<host nodename="dhcp-0-171.blr.redhat.com">
+<sysname>Linux</sysname>
+<release>3.11.3-201.fc19.x86_64</release>
+<machine>x86_64</machine>
+<number-of-cpus>4</number-of-cpus>
+<file-date>2014-03-07</file-date>
+<statistics>
+<timestamp date="2014-03-07" time="05:00:01" utc="1" interval="59">
+<memory per="second" unit="kB">
+<memfree>6821428</memfree>
+<memused>1049448</memused>
+<memused-percent>13.33</memused-percent>
+<buffers>49416</buffers>
+<cached>536932</cached>
+<commit>2127484</commit>
+<commit-percent>7.38</commit-percent>
+<active>361428</active>
+<inactive>487048</inactive>
+<dirty>1256</dirty>
+</memory>
+</timestamp>
+</statistics>
+<restarts>
+<boot date="2014-03-07" time="04:58:08" utc="1"/>
+</restarts>
+</host>
+</sysstat>
+"""
+ tree = etree.fromstring(out)
+ expected_dict = \
+ {'sysstat': {'host':
+ {'sysname': 'Linux',
+ 'statistics': {'timestamp':
+ {'date': '2014-03-07',
+ 'utc': '1', 'interval': '59',
+ 'time': '05:00:01',
+ 'memory':
+ {'memused-percent': '13.33',
+ 'cached': '536932',
+ 'unit': 'kB',
+ 'per': 'second',
+ 'memfree': '6821428',
+ 'inactive': '487048',
+ 'commit-percent': '7.38',
+ 'active': '361428',
+ 'commit': '2127484',
+ 'memused': '1049448',
+ 'buffers': '49416',
+ 'dirty': '1256'}}},
+ 'nodename': 'dhcp-0-171.blr.redhat.com',
+ 'file-date': '2014-03-07',
+ 'number-of-cpus': '4',
+ 'restarts': {'boot':
+ {'date': '2014-03-07', 'utc': '1',
+ 'time': '04:58:08'}},
+ 'machine': 'x86_64',
+ 'release': '3.11.3-201.fc19.x86_64'},
+ 'sysdata-version': '2.15'}}
+
+ actual_dict = sadf.etree_to_dict(tree)
+ self.assertEquals(actual_dict, expected_dict)
+
+ def _etree_to_dict_string_test(self):
+ out = """<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE sysstat PUBLIC "DTD v2.15 sysstat //EN"
+"http://pagesperso-orange.fr/sebastien.godard/sysstat-2.15.dtd">
+<sysstat>
+<sysdata-version>2.15</sysdata-version>
+<host nodename="dhcp-0-171.blr.redhat.com">
+<sysname>Linux</sysname>
+<release>3.11.3-201.fc19.x86_64</release>
+<machine>x86_64</machine>
+<number-of-cpus>4</number-of-cpus>
+<file-date>2014-03-07</file-date>
+<statistics>
+<timestamp date="2014-03-07" time="05:00:01" utc="1" interval="59">
+<memory per="second" unit="kB">
+Test string
+<memfree>6821428</memfree>
+<memused>1049448</memused>
+<memused-percent>13.33</memused-percent>
+<buffers>49416</buffers>
+<cached>536932</cached>
+<commit>2127484</commit>
+<commit-percent>7.38</commit-percent>
+<active>361428</active>
+<inactive>487048</inactive>
+<dirty>1256</dirty>
+</memory>
+</timestamp>
+</statistics>
+<restarts>
+<boot date="2014-03-07" time="04:58:08" utc="1"/>
+</restarts>
+</host>
+</sysstat>
+"""
+ tree = etree.fromstring(out)
+ expected_dict = \
+ {'sysstat': {'host':
+ {'sysname': 'Linux',
+ 'statistics': {'timestamp':
+ {'date': '2014-03-07',
+ 'utc': '1', 'interval': '59',
+ 'time': '05:00:01', 'memory':
+ {'#text': 'Test string',
+ 'memused-percent': '13.33',
+ 'cached': '536932', 'unit': 'kB',
+ 'per': 'second',
+ 'memfree': '6821428',
+ 'inactive': '487048',
+ 'commit-percent': '7.38',
+ 'active': '361428',
+ 'commit': '2127484',
+ 'memused': '1049448',
+ 'buffers': '49416',
+ 'dirty': '1256'}}},
+ 'nodename': 'dhcp-0-171.blr.redhat.com',
+ 'file-date': '2014-03-07', 'number-of-cpus': '4',
+ 'restarts': {'boot': {'date': '2014-03-07',
+ 'utc': '1',
+ 'time': '04:58:08'}},
+ 'machine': 'x86_64',
+ 'release': '3.11.3-201.fc19.x86_64'},
+ 'sysdata-version': '2.15'}}
+ actual_dict = sadf.etree_to_dict(tree)
+ #print actual_dict
+ #exit(0)
+ self.assertEquals(actual_dict, expected_dict)
+
+ def _etree_to_dict_empty_test(self):
+ out = """<?xml version="1.0" encoding="UTF-8"?>
+<sysstat>
+<buffers></buffers>
+<cached></cached>
+<commit>2127484</commit>
+<commit-percent>7.38</commit-percent>
+<active>361428</active>
+<inactive>487048</inactive>
+</sysstat>
+"""
+ tree = etree.fromstring(out)
+ expected_dict = \
+ {'sysstat': {'cached': None,
+ 'inactive': '487048',
+ 'commit-percent': '7.38',
+ 'active': '361428',
+ 'commit': '2127484',
+ 'buffers': None}}
+ actual_dict = sadf.etree_to_dict(tree)
+ self.assertEquals(actual_dict, expected_dict)
+
+ def test_etree_to_dict_test(self):
+ self._etree_to_dict_arg_test()
+ self._etree_to_dict_string_test()
+ self._etree_to_dict_empty_test()