diff options
-rw-r--r-- | configure.ac | 1 | ||||
-rw-r--r-- | gluster-nagios-addons.spec.in | 2 | ||||
-rw-r--r-- | plugins/Makefile.am | 8 | ||||
-rw-r--r-- | plugins/__init__.py | 0 | ||||
-rwxr-xr-x | plugins/check_disk_and_inode.py | 195 | ||||
-rwxr-xr-x | plugins/check_remote_host.py | 199 | ||||
-rwxr-xr-x | plugins/gluster_host_service_handler.py | 154 | ||||
-rwxr-xr-x | plugins/sadf.py | 328 | ||||
-rw-r--r-- | tests/Makefile.am | 2 | ||||
-rw-r--r-- | tests/test_check_remote_host.py | 67 | ||||
-rw-r--r-- | tests/test_sadf.py | 190 |
11 files changed, 1146 insertions, 0 deletions
diff --git a/configure.ac b/configure.ac index 6b6f1d2..059f6a6 100644 --- a/configure.ac +++ b/configure.ac @@ -82,6 +82,7 @@ fi AX_PYTHON_MODULE([argparse], [fatal]) AX_PYTHON_MODULE([ethtool], [fatal]) AX_PYTHON_MODULE([glusternagios], [fatal]) +AX_PYTHON_MODULE([mock], [fatal]) AX_PYTHON_MODULE([netaddr], [fatal]) AX_PYTHON_MODULE([pthreading], [fatal]) AX_PYTHON_MODULE([pyinotify], [fatal]) diff --git a/gluster-nagios-addons.spec.in b/gluster-nagios-addons.spec.in index 3450ba2..b350e80 100644 --- a/gluster-nagios-addons.spec.in +++ b/gluster-nagios-addons.spec.in @@ -24,6 +24,7 @@ Source0: %{name}-%{version}.tar.gz BuildRoot: %{_tmppath}/%{name}-%{version}-root BuildRequires: pyflakes BuildRequires: python-pep8 +BuildRequires: python-mock BuildRequires: python-nose BuildRequires: python-devel Requires: gluster-nagios-common @@ -43,6 +44,7 @@ Group: Development/Tools Requires: %{name} = %{version}-%{release} Requires: pyflakes Requires: python-pep8 +Requires: python-mock Requires: python-nose Requires: python-devel diff --git a/plugins/Makefile.am b/plugins/Makefile.am index c12520c..12ebab7 100644 --- a/plugins/Makefile.am +++ b/plugins/Makefile.am @@ -1,2 +1,10 @@ dist_glusternagiosplugins_PYTHON = \ + check_disk_and_inode.py \ + check_remote_host.py \ + gluster_host_service_handler.py \ + sadf.py \ + $(NULL) + +EXTRA_DIST = \ + __init__.py \ $(NULL) diff --git a/plugins/__init__.py b/plugins/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/plugins/__init__.py diff --git a/plugins/check_disk_and_inode.py b/plugins/check_disk_and_inode.py new file mode 100755 index 0000000..052df3a --- /dev/null +++ b/plugins/check_disk_and_inode.py @@ -0,0 +1,195 @@ +#!/usr/bin/python +# sadf.py -- nagios plugin uses sadf output for perf data +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +# + + +import re +import sys +import commands +from optparse import OptionParser + + +def getUsageAndFree(command, lvm): + status = commands.getstatusoutput(command)[1].split() + path = status[-1] + usagePer = status[-2] + availSpace = status[-3] + usedSpace = status[-4] + device = status[-6].split("-")[-1] + dmatch = re.compile('[0-9]+').match(usagePer) + if (dmatch): + usage = eval(dmatch.group(0)) + return (float(usage), float(100 - usage), usedSpace, + availSpace, device, path) + else: + return None, None, None, None, None, None + + +def getDisk(path, readable=False, lvm=False): + if readable: + return getUsageAndFree("df -m %s" % path, lvm) + else: + return getUsageAndFree("df -kh %s" % path, lvm) + + +def getInode(path, readable=False, lvm=False): + return getUsageAndFree("df -i %s" % path, lvm) + + +def appendStatus(lst, level, typ, device, mpath, usage): + if 2 == level: + level = "crit" + elif 1 == level: + level = "warn" + else: + level = "ok" + lst.append("%s:%s:%s;%s;%s" % (level, device, mpath, usage)) + + +def getMounts(searchQuery=None, excludeList=[]): + mountPaths = [] + f = open("/etc/mtab") + for i in f.readlines(): + if searchQuery and i.startswith(searchQuery): + if not excludeList: + mountPaths.append(i.split()[0]) + else: + device = i.split() + if not device[0] in options.exclude and\ + not device[1] in options.exclude: + mountPaths.append(device[0]) + f.close() + return mountPaths + + +def parse_input(): + parser = OptionParser() + parser.add_option('-w', '--warning', action='store', type='int', + dest='warn', help='Warning count in %', default=80) + parser.add_option('-c', '--critical', action='store', type='int', + dest='crit', help='Critical count in %', default=90) + parser.add_option('-u', '--usage', action="store_true", dest='usage', + help='Output disk and inode usage', default=False) + parser.add_option('-l', '--lvm', action="store_true", + dest='lvm', help='List lvm mounts', default=False) + parser.add_option('-a', '--all', action="store_true", + dest='all', help='List all mounts', default=False) + parser.add_option('-n', '--ignore', action="store_true", + dest='ignore', help='Ignore errors', default=False) + parser.add_option('-i', '--include', action='append', type='string', + dest='mountPath', help='Mount path', default=[]) + parser.add_option('-x', '--exclude', action="append", type='string', + dest='exclude', help='Exclude disk') + return parser.parse_args() + + +if __name__ == '__main__': + disk = [] + warnList = [] + critList = [] + diskList = [] + mounts = [] + level = -1 + (options, args) = parse_input() + + if len(args) > 2: + if args[0].isdigit() and args[1].isdigit(): + warn = int(args[0]) + crit = int(args[1]) + options.mountPath = args[2:] + else: + warn = 80 + crit = 90 + options.mountPath = args + else: + crit = options.crit + warn = options.warn + + if options.lvm: + searchQuery = "/dev/mapper" + elif options.all: + searchQuery = None + else: + searchQuery = "/" + + if not options.mountPath or options.lvm or options.all: + options.mountPath += getMounts(searchQuery, options.exclude) + + #if not options.mountPath: + # parser.print_help() + # sys.exit(1) + + for path in options.mountPath: + diskUsage, diskFree, used, avail, dev, mpath = getDisk(path, + options.usage, + options.lvm) + inodeUsage, inodeFree, iused, iavail, idev, ipath = getInode( + path, + options.usage, + options.lvm) + if mpath in mounts: + continue + if not used or not iused: + if options.ignore: + continue + else: + sys.exit(3) + + mounts.append(mpath) + if options.usage: + total = (float(used) + float(avail)) / 1000 + itot = (float(iused) + float(iavail)) / 1000 + disk.append("%s=%.1f;%.1f;%.1f;0;%.1f %s=%.1f;%.1f;%.1f;0;%.1f" % ( + mpath, float(used)/1000, warn*total/100, crit*total/100, total, + ipath, float(iused)/1000, warn*itot/100, crit*itot/100, itot)) + else: + disk.append("%s=%.2f;%s;%s;0;100 %s=%.2f;%s;%s;0;100" % ( + mpath, diskUsage, warn, crit, ipath, inodeUsage, warn, crit)) + + if diskUsage >= crit or inodeUsage >= crit: + if diskUsage >= crit: + critList.append("crit:disk:%s;%s;%s" % (dev, mpath, diskUsage)) + else: + critList.append("crit:inode:%s;%s;%s" % (idev, ipath, + inodeUsage)) + if not level > 1: + level = 2 + elif (diskUsage >= warn and diskUsage < crit) or ( + inodeUsage >= warn and inodeUsage < crit): + if diskUsage >= warn: + warnList.append("warn:disk:%s;%s;%s" % (dev, mpath, diskUsage)) + else: + warnList.append("warn:inode:%s;%s;%s" % (idev, ipath, + inodeUsage)) + if not level > 0: + level = 1 + else: + diskList.append("%s:%s" % (dev, mpath)) + + msg = " ".join(critList + warnList) + if not msg: + msg += " disks:mounts:(" + ",".join(diskList) + ")" + + if 2 == level: + print "CRITICAL : %s | %s" % (msg, " ".join(disk)) + sys.exit(2) + elif 1 == level: + print "WARNING : %s | %s" % (msg, " ".join(disk)) + sys.exit(1) + else: + print "OK : %s | %s" % (msg, " ".join(disk)) diff --git a/plugins/check_remote_host.py b/plugins/check_remote_host.py new file mode 100755 index 0000000..7350e27 --- /dev/null +++ b/plugins/check_remote_host.py @@ -0,0 +1,199 @@ +#!/usr/bin/python +# +# check_remote_host.py -- nagios plugin uses Mklivestatus to get the overall +# status +# of a host. The entities considered for the status of the host are - +# 1. Host is reachable +# 2. LV/Inode Service status +# 3. CPU Utilization +# 4. Memory Utilization +# 5. Network Utilization +# 6. Swap Utilization +# +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA +# + +import os +import sys +import shlex +import subprocess +import socket +import getopt + +STATUS_OK = 0 +STATUS_WARNING = 1 +STATUS_CRITICAL = 2 +STATUS_UNKNOWN = 3 +_checkPingCommand = "/usr/lib64/nagios/plugins/check_ping" +_commandStatusStrs = {STATUS_OK: 'OK', STATUS_WARNING: 'WARNING', + STATUS_CRITICAL: 'CRITICAL', STATUS_UNKNOWN: 'UNKNOWN'} +_socketPath = '/var/spool/nagios/cmd/live' + + +# Class for exception definition +class checkPingCmdExecFailedException(Exception): + message = "check_ping command failed" + + def __init__(self, rc=0, out=(), err=()): + self.rc = rc + self.out = out + self.err = err + + def __str__(self): + o = '\n'.join(self.out) + e = '\n'.join(self.err) + if o and e: + m = o + '\n' + e + else: + m = o or e + + s = self.message + if m: + s += '\nerror: ' + m + if self.rc: + s += '\nreturn code: %s' % self.rc + return s + + +# Method to execute a command +def execCmd(command): + proc = subprocess.Popen(command, + close_fds=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + (out, err) = proc.communicate() + return (proc.returncode, out, err) + + +# Method to check the ing status of the host +def getPingStatus(hostAddr): + cmd = "%s -H %s" % (_checkPingCommand, hostAddr) + cmd += " -w 3000.0,80% -c 5000.0,100%" + + try: + (rc, out, err) = execCmd(shlex.split(cmd)) + except (OSError, ValueError) as e: + raise checkPingCmdExecFailedException(err=[str(e)]) + + if rc != 0: + raise checkPingCmdExecFailedException(rc, [out], [err]) + + return rc + + +# Method to execute livestatus +def checkLiveStatus(hostAddr, srvc): + s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + s.connect(_socketPath) + + # Write command to socket + cmd = "GET services\nColumns: state\nFilter: " + "description = %s\nFilter: host_address = %s\n" % (srvc, hostAddr) + s.send(cmd) + + # Close socket + s.shutdown(socket.SHUT_WR) + + # Read the answer + answer = s.recv(1000000) + + # Parse the answer into a table + table = [line.split(';') for line in answer.split('\n')[:-1]] + + if len(table) > 0 and len(table[0]) > 0: + return int(table[0][0]) + else: + return STATUS_UNKNOWN + + +# Method to show the usage +def showUsage(): + usage = "Usage: %s -H <Host Address>\n" % os.path.basename(sys.argv[0]) + sys.stderr.write(usage) + + +# Main method +if __name__ == "__main__": + try: + opts, args = getopt.getopt(sys.argv[1:], "hH:", ["help", "host="]) + except getopt.GetoptError as e: + print (str(e)) + showUsage() + sys.exit(STATUS_CRITICAL) + + hostAddr = '' + if len(opts) == 0: + showUsage() + sys.exit(STATUS_CRITICAL) + else: + for opt, arg in opts: + if opt in ("-h", "--help"): + showUsage() + sys.exit() + elif opt in ("-H", "--host"): + hostAddr = arg + else: + showUsage() + sys.exit(STATUS_CRITICAL) + + # Check ping status of the node, if its not reachable exit + try: + pingStatus = getPingStatus(hostAddr) + except (checkPingCmdExecFailedException) as e: + print "Host Status %s - Host not reachable" % \ + (_commandStatusStrs[STATUS_UNKNOWN]) + sys.exit(_commandStatusStrs[STATUS_UNKNOWN]) + + if pingStatus != STATUS_OK: + print "Host Status %s - Host not reachable" % \ + (_commandStatusStrs[STATUS_UNKNOWN]) + sys.exit(pingStatus) + + # Check the various performance statuses for the host + diskPerfStatus = checkLiveStatus(hostAddr, 'Disk Utilization') + cpuPerfStatus = checkLiveStatus(hostAddr, 'Cpu Utilization') + memPerfStatus = checkLiveStatus(hostAddr, 'Memory Utilization') + swapPerfStatus = checkLiveStatus(hostAddr, 'Swap Utilization') + nwPerfStatus = checkLiveStatus(hostAddr, 'Network Utilization') + + # Calculate the consolidated status for the host based on above status + # details + finalStatus = pingStatus | diskPerfStatus | cpuPerfStatus | \ + memPerfStatus | swapPerfStatus | nwPerfStatus + + # Get the list of ciritical services + criticalSrvcs = [] + if diskPerfStatus == STATUS_CRITICAL: + criticalSrvcs.append('Disk Utilization') + if cpuPerfStatus == STATUS_CRITICAL: + criticalSrvcs.append('Cpu Utilization') + if memPerfStatus == STATUS_CRITICAL: + criticalSrvcs.append('Memory Utilization') + if swapPerfStatus == STATUS_CRITICAL: + criticalSrvcs.append('Swap Utilization') + if nwPerfStatus == STATUS_CRITICAL: + criticalSrvcs.append('Network Utilization') + + # Return the status + if finalStatus == STATUS_CRITICAL: + print "Host Status %s - Service(s) %s in CRITICAL state" % \ + (_commandStatusStrs[STATUS_WARNING], criticalSrvcs) + sys.exit(STATUS_WARNING) + + print "Host Status %s - Services in good health" % \ + _commandStatusStrs[STATUS_OK] + sys.exit(STATUS_OK) diff --git a/plugins/gluster_host_service_handler.py b/plugins/gluster_host_service_handler.py new file mode 100755 index 0000000..283ac69 --- /dev/null +++ b/plugins/gluster_host_service_handler.py @@ -0,0 +1,154 @@ +#!/usr/bin/python +# +# gluster_host_service_handler.py -- Event handler which checks the +# status of defined services and accordingly changes the host status +# +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA +# + +import os +import sys +import datetime +import socket +import getopt + +STATUS_OK = "OK" +STATUS_WARNING = "WARNING" +STATUS_CRITICAL = "CRITICAL" +STATUS_UNKNOWN = "UNKNOWN" +SRVC_STATE_TYPE_SOFT = "SOFT" +SRVC_STATE_TYPE_HARD = "HARD" +statusCodes = {STATUS_OK: 0, STATUS_WARNING: 1, STATUS_CRITICAL: 2, + STATUS_UNKNOWN: 3} +NAGIOS_COMMAND_FILE = "/var/spool/nagios/cmd/nagios.cmd" +SRVC_LIST = ['Disk Utilization', 'Cpu Utilization', 'Memory Utilization', + 'Swap Utilization', 'Network Utilization'] +_socketPath = '/var/spool/nagios/cmd/live' + + +# Shows the usage of the script +def showUsage(): + usage = "Usage: %s -s <Service State (OK/WARNING/CRITICAL/UNKNOWN)> " + "-t <Service State Type (SOFT/HARD)> -a <No of Service attempts> " + "-l <Host Address> -n <Service Name>\n" % os.path.basename(sys.argv[0]) + sys.stderr.write(usage) + + +# Method to change the host status +def update_host_state(hostAddr, srvcName, statusCode): + now = datetime.datetime.now() + if statusCode == statusCodes[STATUS_WARNING]: + cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;Host Status WARNING - " + "Service(s) ['%s'] in CRITICAL state\n" % (now, hostAddr, statusCode, + srvcName) + else: + cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;Host Status OK - " + "Services in good health\n" % (now, hostAddr, statusCode) + + f = open(NAGIOS_COMMAND_FILE, "w") + f.write(cmdStr) + f.close() + + +# Method to execute livestatus +def checkLiveStatus(hostAddr, srvc): + s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + s.connect(_socketPath) + + # Write command to socket + cmd = "GET services\nColumns: state\nFilter: " + "description = %s\nFilter: host_address = %s\n" % (srvc, hostAddr) + s.send(cmd) + + # Close socket + s.shutdown(socket.SHUT_WR) + + # Read the answer + answer = s.recv(1000) + + # Parse the answer into a table + table = [line.split(';') for line in answer.split('\n')[:-1]] + + if len(table) > 0 and len(table[0]) > 0: + return int(table[0][0]) + else: + return statusCodes[STATUS_UNKNOWN] + + +# Method to change the host state to UP based on other service type status +def check_and_update_host_state_to_up(hostAddr, srvcName): + finalState = 0 + for item in SRVC_LIST: + if item != srvcName: + finalState = finalState | checkLiveStatus(hostAddr, item) + + if finalState == statusCodes[STATUS_OK]: + update_host_state(hostAddr, srvcName, statusCodes[STATUS_OK]) + + +# Main method +if __name__ == "__main__": + try: + opts, args = getopt.getopt(sys.argv[1:], "hs:t:a:l:n:", + ["help", "state=", "type=", + "attempts=", "location=", "name="]) + except getopt.GetoptError as e: + print (str(e)) + showUsage() + sys.exit(STATUS_CRITICAL) + + srvcState = '' + srvcStateType = '' + attempts = '' + hostAddr = '' + srvcName = '' + if len(opts) == 0: + showUsage() + else: + for opt, arg in opts: + if opt in ('-h', '--help'): + showUsage() + sys.exit() + elif opt in ('-s', '--state'): + srvcState = arg + elif opt in ('-t', '--type'): + srvcStateType = arg + elif opt in ('-a', '--attempts'): + attempts = arg + elif opt in ('-l', '--location'): + hostAddr = arg + elif opt in ('-n', '--name'): + srvcName = arg + else: + showUsage() + sys.exit() + + # Swicth over the service state values and do the needful + if srvcState == STATUS_CRITICAL: + if srvcStateType == SRVC_STATE_TYPE_SOFT: + if int(attempts) == 3: + print "Updating the host status to warning " + "(3rd SOFT critical state)..." + update_host_state(hostAddr, srvcName, + statusCodes[STATUS_WARNING]) + elif srvcStateType == SRVC_STATE_TYPE_HARD: + print "Updating the host status to warning..." + update_host_state(hostAddr, srvcName, statusCodes[STATUS_WARNING]) + elif srvcState == STATUS_OK: + check_and_update_host_state_to_up(hostAddr, srvcName) + + sys.exit(0) diff --git a/plugins/sadf.py b/plugins/sadf.py new file mode 100755 index 0000000..0bafb4a --- /dev/null +++ b/plugins/sadf.py @@ -0,0 +1,328 @@ +#!/usr/bin/python +# sadf.py -- nagios plugin uses sadf output for perf data +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +# + +import sys +import shlex +import subprocess +import datetime +import argparse +import xml.etree.ElementTree as ET +from collections import defaultdict +_twoMinutes = datetime.timedelta(minutes=2) +_sadfCpuCommand = "sadf -x -- -P ALL" +_sadfMemoryCommand = "sadf -x -- -r" +_sadfNetworkCommand = "sadf -x -- -n DEV" +_sadfSwapSpaceCommand = "sadf -x -- -S" + + +class sadfCmdExecFailedException(Exception): + message = "sadf command failed" + + def __init__(self, rc=0, out=(), err=()): + self.rc = rc + self.out = out + self.err = err + + def __str__(self): + o = '\n'.join(self.out) + e = '\n'.join(self.err) + if o and e: + m = o + '\n' + e + else: + m = o or e + + s = self.message + if m: + s += '\nerror: ' + m + if self.rc: + s += '\nreturn code: %s' % self.rc + return s + + +def execCmd(command): + proc = subprocess.Popen(command, + close_fds=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + (out, err) = proc.communicate() + return (proc.returncode, out, err) + + +def etree_to_dict(t): + d = {t.tag: {} if t.attrib else None} + children = list(t) + if children: + dd = defaultdict(list) + for dc in map(etree_to_dict, children): + for k, v in dc.iteritems(): + dd[k].append(v) + x = {} + for k, v in dd.iteritems(): + x[k] = v[0] if len(v) == 1 else v + d = {t.tag: x} + if t.attrib: + d[t.tag].update((k, v) for k, v in t.attrib.iteritems()) + if t.text: + text = t.text.strip() + if children or t.attrib: + if text: + d[t.tag]['#text'] = text + else: + d[t.tag] = text + return d + + +def _sadfExecCmd(sadfCmd): + now = datetime.datetime.now() + start = (now - _twoMinutes).strftime("%H:%M:%S") + end = now.strftime("%H:%M:%S") + cmd = sadfCmd + " -s %s -e %s" % (start, end) + + try: + (rc, out, err) = execCmd(shlex.split(cmd)) + except (OSError, ValueError) as e: + raise sadfCmdExecFailedException(err=[str(e)]) + + if rc != 0: + raise sadfCmdExecFailedException(rc, [out], [err]) + + root = ET.fromstring(out) + d = etree_to_dict(root) + return d['sysstat']['host']['statistics']['timestamp'] + + +def _getLatestStat(stats): + if not stats: + return {} + if not isinstance(stats, list): + return stats + lstat = stats[0] + latestTime = datetime.datetime.strptime(lstat['time'], + "%H:%M:%S") + for s in stats[1:]: + thisTime = datetime.datetime.strptime(s['time'], + "%H:%M:%S") + if latestTime < thisTime: + lstat = s + latestTime = thisTime + + return lstat + + +def getLatestSadfCpuStat(): + return _getLatestStat(_sadfExecCmd(_sadfCpuCommand)) + + +def getLatestSadfMemStat(): + return _getLatestStat(_sadfExecCmd(_sadfMemoryCommand)) + + +def getLatestSadfNetStat(): + return _getLatestStat(_sadfExecCmd(_sadfNetworkCommand)) + + +def getLatestSadfSwapStat(): + return _getLatestStat(_sadfExecCmd(_sadfSwapSpaceCommand)) + + +def showCpuStat(warnLevel, critLevel): + s = getLatestSadfCpuStat() + if not s: + sys.stdout.write("CPU UNKNOWN\n") + sys.exit(3) + perfLines = [] + idleCpu = 0 + for cpu in s['cpu-load']['cpu']: + if cpu['number'] == 'all': + idleCpu = cpu['idle'] + perfLines.append( + ("cpu_%s_total=%s%%;%s;%s cpu_%s_system=%s%% " + "cpu_%s_user=%s%% cpu_%s_idle=%s%%" % ( + cpu['number'], 100-float(cpu['idle']), + warnLevel, critLevel, + cpu['number'], cpu['system'], + cpu['number'], cpu['user'], + cpu['number'], cpu['idle']))) + if len(s['cpu-load']['cpu'])-1 == 1: + break + totalCpuUsage = 100 - float(idleCpu) + if totalCpuUsage > critLevel: + sys.stdout.write( + ("CPU Status CRITICAL: Total CPU:%s%% Idle CPU:%s%% " + "| num_of_cpu=%s %s\n" % (totalCpuUsage, idleCpu, + len(s['cpu-load']['cpu'])-1, + " ".join(perfLines)))) + elif totalCpuUsage > warnLevel: + sys.stdout.write( + ("CPU Status WARNING: Total CPU:%s%% Idle CPU:%s%% " + "| num_of_cpu=%s %s\n" % (totalCpuUsage, idleCpu, + len(s['cpu-load']['cpu'])-1, + " ".join(perfLines)))) + else: + sys.stdout.write( + ("CPU Status OK: Total CPU:%s%% Idle CPU:%s%% " + "| num_of_cpu=%s %s\n" % (totalCpuUsage, idleCpu, + len(s['cpu-load']['cpu'])-1, + " ".join(perfLines)))) + + sys.exit(0) + + +def showSwapStat(warning, critical): + s = getLatestSadfSwapStat() + if not s: + sys.stdout.write("IFACE UNKNOWN\n") + sys.exit(3) + totalSwap = int(s['memory']['swpfree']) + int(s['memory']['swpused']) + crit_value = (totalSwap * critical) / 100 + war_value = (totalSwap * warning) / 100 + if int(s['memory']['swpused']) >= crit_value: + sys.stdout.write("CRITICAL") + eStat = 2 + elif int(s['memory']['swpused']) >= war_value: + sys.stdout.write("WARNING") + eStat = 1 + else: + sys.stdout.write("OK") + eStat = 0 + sys.stdout.write("- %.2f%% used(%skB out of %skB)|Used=%skB;%s;" + "%s;0;%s\n" % (float(s['memory']['swpused-percent']), + s['memory']['swpused'], + totalSwap, + s['memory']['swpused'], + war_value, + crit_value, + totalSwap)) + sys.exit(eStat) + + +def showMemStat(warning, critical): + s = getLatestSadfMemStat() + if not s: + sys.stdout.write("IFACE UNKNOWN\n") + sys.exit(3) + totalMem = int(s['memory']['memfree']) + int(s['memory']['memused']) + crit_value = (totalMem * critical) / 100 + war_value = (totalMem * warning) / 100 + if int(s['memory']['memused']) >= crit_value: + sys.stdout.write("CRITICAL") + eStat = 2 + elif int(s['memory']['memused']) >= war_value: + sys.stdout.write("WARNING") + eStat = 1 + else: + sys.stdout.write("OK") + eStat = 0 + sys.stdout.write("- %.2f%% used(%skB out of %skB)|Total=%skB;%s;%s;0;%s" + " Used=%skB Buffered=%skB" + " Cached=%skB\n" % (float(s['memory']['memused-percent']), + s['memory']['memused'], + totalMem, + totalMem, + war_value, + crit_value, + totalMem, + s['memory']['memused'], + s['memory']['buffers'], + s['memory']['cached'])) + sys.exit(eStat) + + +def showNetStat(iface_list=None, list_type=None): + s = getLatestSadfNetStat() + if not s: + sys.stdout.write("IFACE UNKNOWN\n") + sys.exit(3) + + devNames = [] + perfLines = [] + for dev in s['network']['net-dev']: + if list_type == "exclude": + if dev['iface'] in iface_list: + continue + elif list_type == "include": + if dev['iface'] not in iface_list: + continue + devNames.append(dev['iface']) + perfLines.append("%s.rxpck=%s %s.txpck=%s %s.rxkB=%s %s.txkB=%s" + % (dev['iface'], dev['rxpck'], + dev['iface'], dev['txpck'], + dev['iface'], dev['rxkB'], + dev['iface'], dev['txkB'])) + + sys.stdout.write("IFACE OK: %s |%s\n" % (", ".join(devNames), + " ".join(perfLines))) + sys.exit(0) + + +def parse_input(): + parser = argparse.ArgumentParser(usage='%(prog)s [-h] (\ +\n-m -w <warning> -c <critical> |\n-s -w <warning> -c <critical>\ + |\n-cp -w <warning> -c <critical> |\n-n [-e <exclude>\ + | -i <include>])') + group1 = parser.add_mutually_exclusive_group(required=True) + group1.add_argument('-m', '--memory', action='store_true', + help="Gives details related to memory") + group1.add_argument('-s', '--swap', action='store_true', + help="Gives details related to swap") + group1.add_argument('-cp', '--cpu', action='store_true', + help="Gives details related to cpu") + group1.add_argument('-n', '--network', action='store_true', + help="Gives details related to network") + parser.add_argument("-w", "--warning", action="store", type=int, + help="Warning threshold in percentage") + parser.add_argument("-c", "--critical", action="store", type=int, + help="Critical threshold in percentage") + group2 = parser.add_mutually_exclusive_group() + group2.add_argument("-e", "--exclude", action="append", + help="Parameters to be excluded") + group2.add_argument("-i", "--include", action="append", + help="Parameters to be included") + args = parser.parse_args() + if args.memory or args.swap or args.cpu: + if not args.critical or not args.warning: + print "UNKNOWN:Missing critical/warning threshold value." + sys.exit(3) + if args.exclude or args.include: + print "UNKNOWN:Exclude/Include is not valid for the given option." + sys.exit(3) + if args.critical <= args.warning: + print "UNKNOWN:Critical must be greater than Warning." + sys.exit(3) + else: + if args.critical or args.warning: + print "UNKNOWN:Warning/Critical is not valid for the given option." + sys.exit(3) + return args + + +if __name__ == '__main__': + args = parse_input() + if args.memory: + showMemStat(args.warning, args.critical) + if args.swap: + showSwapStat(args.warning, args.critical) + if args.cpu: + showCpuStat(args.warning, args.critical) + if args.network: + if args.exclude: + showNetStat(args.exclude, "exclude") + if args.include: + showNetStat(args.include, "include") + showNetStat() diff --git a/tests/Makefile.am b/tests/Makefile.am index 65843f4..e06a2ba 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -19,6 +19,8 @@ # test_modules = \ + test_check_remote_host.py \ + test_sadf.py \ $(NULL) dist_glusternagiosaddonstests_DATA = \ diff --git a/tests/test_check_remote_host.py b/tests/test_check_remote_host.py new file mode 100644 index 0000000..c5c602d --- /dev/null +++ b/tests/test_check_remote_host.py @@ -0,0 +1,67 @@ +# +# Copyright 2014 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +# Refer to the README and COPYING files for full details of the license +# + +import mock + +from testrunner import PluginsTestCase as TestCaseBase +from plugins.check_remote_host import * + + +class TestHello(TestCaseBase): + # Method to test the execCmd() method + @mock.patch('check_remote_host.subprocess.Popen') + def testExecCmd(self, mock_popen): + reference = subprocess.Popen('any command', close_fds=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + out = "sample output" + err = "" + reference.communicate.return_value = (out, err) + self.assertTrue(reference.communicate, "communicate called") + + # Method to test the getPingStatus() method + @mock.patch('check_remote_host.execCmd') + def testGetPingStatus(self, mock_execCmd): + rc = 0 + out = "sample output" + err = "" + mock_execCmd.return_value = (rc, out, err) + getPingStatus('dummy host') + mock_execCmd.assert_called_with([ + '/usr/lib64/nagios/plugins/check_ping', '-H', 'dummy', 'host', + '-w', '3000.0,80%', '-c', '5000.0,100%']) + self.assertRaises(OSError, execCmd, + ['/usr/lib64/nagios/plugins/check_ping', '-H', + 'dummy', 'host', '-w', '3000.0,80%', '-c', + '5000.0,100%']) + + # Method to test the checkLiveStatus() method + @mock.patch('check_remote_host.socket.socket') + def testCheckLiveStatus(self, mock_socket): + reference = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + self.assertTrue(mock_socket, "called") + reference.recv.return_value = "0\n" + checkLiveStatus("dummy host", "dummy srvc") + reference.connect.assert_called_with('/var/spool/nagios/cmd/live') + reference.send.assert_called_with("GET services\nColumns: state\n" + "Filter: description = dummy srvc\n" + "Filter: host_address = " + "dummy host\n") + self.assertEquals(0, checkLiveStatus("dummy host", "dummy srvc")) diff --git a/tests/test_sadf.py b/tests/test_sadf.py new file mode 100644 index 0000000..ced037e --- /dev/null +++ b/tests/test_sadf.py @@ -0,0 +1,190 @@ +# +# Copyright 2014 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +# Refer to the README and COPYING files for full details of the license +# + +import xml.etree.cElementTree as etree + +from testrunner import PluginsTestCase as TestCaseBase +from plugins import sadf + + +class sadfTests(TestCaseBase): + + def _etree_to_dict_arg_test(self): + out = """<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE sysstat PUBLIC "DTD v2.15 sysstat //EN" +"http://pagesperso-orange.fr/sebastien.godard/sysstat-2.15.dtd"> +<sysstat> +<sysdata-version>2.15</sysdata-version> +<host nodename="dhcp-0-171.blr.redhat.com"> +<sysname>Linux</sysname> +<release>3.11.3-201.fc19.x86_64</release> +<machine>x86_64</machine> +<number-of-cpus>4</number-of-cpus> +<file-date>2014-03-07</file-date> +<statistics> +<timestamp date="2014-03-07" time="05:00:01" utc="1" interval="59"> +<memory per="second" unit="kB"> +<memfree>6821428</memfree> +<memused>1049448</memused> +<memused-percent>13.33</memused-percent> +<buffers>49416</buffers> +<cached>536932</cached> +<commit>2127484</commit> +<commit-percent>7.38</commit-percent> +<active>361428</active> +<inactive>487048</inactive> +<dirty>1256</dirty> +</memory> +</timestamp> +</statistics> +<restarts> +<boot date="2014-03-07" time="04:58:08" utc="1"/> +</restarts> +</host> +</sysstat> +""" + tree = etree.fromstring(out) + expected_dict = \ + {'sysstat': {'host': + {'sysname': 'Linux', + 'statistics': {'timestamp': + {'date': '2014-03-07', + 'utc': '1', 'interval': '59', + 'time': '05:00:01', + 'memory': + {'memused-percent': '13.33', + 'cached': '536932', + 'unit': 'kB', + 'per': 'second', + 'memfree': '6821428', + 'inactive': '487048', + 'commit-percent': '7.38', + 'active': '361428', + 'commit': '2127484', + 'memused': '1049448', + 'buffers': '49416', + 'dirty': '1256'}}}, + 'nodename': 'dhcp-0-171.blr.redhat.com', + 'file-date': '2014-03-07', + 'number-of-cpus': '4', + 'restarts': {'boot': + {'date': '2014-03-07', 'utc': '1', + 'time': '04:58:08'}}, + 'machine': 'x86_64', + 'release': '3.11.3-201.fc19.x86_64'}, + 'sysdata-version': '2.15'}} + + actual_dict = sadf.etree_to_dict(tree) + self.assertEquals(actual_dict, expected_dict) + + def _etree_to_dict_string_test(self): + out = """<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE sysstat PUBLIC "DTD v2.15 sysstat //EN" +"http://pagesperso-orange.fr/sebastien.godard/sysstat-2.15.dtd"> +<sysstat> +<sysdata-version>2.15</sysdata-version> +<host nodename="dhcp-0-171.blr.redhat.com"> +<sysname>Linux</sysname> +<release>3.11.3-201.fc19.x86_64</release> +<machine>x86_64</machine> +<number-of-cpus>4</number-of-cpus> +<file-date>2014-03-07</file-date> +<statistics> +<timestamp date="2014-03-07" time="05:00:01" utc="1" interval="59"> +<memory per="second" unit="kB"> +Test string +<memfree>6821428</memfree> +<memused>1049448</memused> +<memused-percent>13.33</memused-percent> +<buffers>49416</buffers> +<cached>536932</cached> +<commit>2127484</commit> +<commit-percent>7.38</commit-percent> +<active>361428</active> +<inactive>487048</inactive> +<dirty>1256</dirty> +</memory> +</timestamp> +</statistics> +<restarts> +<boot date="2014-03-07" time="04:58:08" utc="1"/> +</restarts> +</host> +</sysstat> +""" + tree = etree.fromstring(out) + expected_dict = \ + {'sysstat': {'host': + {'sysname': 'Linux', + 'statistics': {'timestamp': + {'date': '2014-03-07', + 'utc': '1', 'interval': '59', + 'time': '05:00:01', 'memory': + {'#text': 'Test string', + 'memused-percent': '13.33', + 'cached': '536932', 'unit': 'kB', + 'per': 'second', + 'memfree': '6821428', + 'inactive': '487048', + 'commit-percent': '7.38', + 'active': '361428', + 'commit': '2127484', + 'memused': '1049448', + 'buffers': '49416', + 'dirty': '1256'}}}, + 'nodename': 'dhcp-0-171.blr.redhat.com', + 'file-date': '2014-03-07', 'number-of-cpus': '4', + 'restarts': {'boot': {'date': '2014-03-07', + 'utc': '1', + 'time': '04:58:08'}}, + 'machine': 'x86_64', + 'release': '3.11.3-201.fc19.x86_64'}, + 'sysdata-version': '2.15'}} + actual_dict = sadf.etree_to_dict(tree) + #print actual_dict + #exit(0) + self.assertEquals(actual_dict, expected_dict) + + def _etree_to_dict_empty_test(self): + out = """<?xml version="1.0" encoding="UTF-8"?> +<sysstat> +<buffers></buffers> +<cached></cached> +<commit>2127484</commit> +<commit-percent>7.38</commit-percent> +<active>361428</active> +<inactive>487048</inactive> +</sysstat> +""" + tree = etree.fromstring(out) + expected_dict = \ + {'sysstat': {'cached': None, + 'inactive': '487048', + 'commit-percent': '7.38', + 'active': '361428', + 'commit': '2127484', + 'buffers': None}} + actual_dict = sadf.etree_to_dict(tree) + self.assertEquals(actual_dict, expected_dict) + + def test_etree_to_dict_test(self): + self._etree_to_dict_arg_test() + self._etree_to_dict_string_test() + self._etree_to_dict_empty_test() |