summaryrefslogtreecommitdiffstats
path: root/plugins
diff options
context:
space:
mode:
authorShubhendu Tripathi <shtripat@redhat.com>2014-03-14 12:56:53 +0530
committerBala.FA <barumuga@redhat.com>2014-04-29 10:21:36 +0530
commit795fc58ef57127f257ea06c8c77ff8fba0662a4e (patch)
treeb3fd7e1ccfb670961257a51249f66d413f998e04 /plugins
parentad004f45088ee81723b2a2420b3bce01f0bd845b (diff)
nagios-server-addons: Check remote host plugin
Plugins to ckeck the remote host status based on various services. Change-Id: I1e260829901aa8dd831f0ca1d58609addb9bcf1b Signed-off-by: Shubhendu Tripathi <shtripat@redhat.com> Reviewed-on: https://cuckoo.blr.redhat.com:8443/9 Reviewed-by: Sahina Bose <sabose@redhat.com> Tested-by: Sahina Bose <sabose@redhat.com>
Diffstat (limited to 'plugins')
-rwxr-xr-xplugins/check_remote_host.py144
-rwxr-xr-xplugins/gluster_host_service_handler.py45
2 files changed, 50 insertions, 139 deletions
diff --git a/plugins/check_remote_host.py b/plugins/check_remote_host.py
index 7350e27..31ff9dd 100755
--- a/plugins/check_remote_host.py
+++ b/plugins/check_remote_host.py
@@ -2,13 +2,13 @@
#
# check_remote_host.py -- nagios plugin uses Mklivestatus to get the overall
# status
-# of a host. The entities considered for the status of the host are -
-# 1. Host is reachable
-# 2. LV/Inode Service status
-# 3. CPU Utilization
-# 4. Memory Utilization
-# 5. Network Utilization
-# 6. Swap Utilization
+# of a host. The services considered by default for the status of the host
+# are -
+# 1. LV/Inode Service status
+# 2. CPU Utilization
+# 3. Memory Utilization
+# 4. Network Utilization
+# 5. Swap Utilization
#
# Copyright (C) 2014 Red Hat Inc
#
@@ -29,90 +29,35 @@
import os
import sys
-import shlex
-import subprocess
-import socket
import getopt
+#import socket
+import json
+
+import livestatus
STATUS_OK = 0
STATUS_WARNING = 1
STATUS_CRITICAL = 2
STATUS_UNKNOWN = 3
-_checkPingCommand = "/usr/lib64/nagios/plugins/check_ping"
_commandStatusStrs = {STATUS_OK: 'OK', STATUS_WARNING: 'WARNING',
STATUS_CRITICAL: 'CRITICAL', STATUS_UNKNOWN: 'UNKNOWN'}
-_socketPath = '/var/spool/nagios/cmd/live'
-
-
-# Class for exception definition
-class checkPingCmdExecFailedException(Exception):
- message = "check_ping command failed"
-
- def __init__(self, rc=0, out=(), err=()):
- self.rc = rc
- self.out = out
- self.err = err
-
- def __str__(self):
- o = '\n'.join(self.out)
- e = '\n'.join(self.err)
- if o and e:
- m = o + '\n' + e
- else:
- m = o or e
- s = self.message
- if m:
- s += '\nerror: ' + m
- if self.rc:
- s += '\nreturn code: %s' % self.rc
- return s
-
-# Method to execute a command
-def execCmd(command):
- proc = subprocess.Popen(command,
- close_fds=True,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
- (out, err) = proc.communicate()
- return (proc.returncode, out, err)
-
-
-# Method to check the ing status of the host
-def getPingStatus(hostAddr):
- cmd = "%s -H %s" % (_checkPingCommand, hostAddr)
- cmd += " -w 3000.0,80% -c 5000.0,100%"
-
- try:
- (rc, out, err) = execCmd(shlex.split(cmd))
- except (OSError, ValueError) as e:
- raise checkPingCmdExecFailedException(err=[str(e)])
-
- if rc != 0:
- raise checkPingCmdExecFailedException(rc, [out], [err])
-
- return rc
+# Load the host monitoring services list
+def loadSrvcList():
+ srvc_list = []
+ with open("/etc/nagios/gluster/host-monitoring-services.in") as data_file:
+ srvc_list = json.load(data_file)['serviceList']
+ return srvc_list
# Method to execute livestatus
def checkLiveStatus(hostAddr, srvc):
- s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
- s.connect(_socketPath)
-
- # Write command to socket
- cmd = "GET services\nColumns: state\nFilter: "
- "description = %s\nFilter: host_address = %s\n" % (srvc, hostAddr)
- s.send(cmd)
+ cmd = "GET services\nColumns: state\nFilter: " \
+ "description = %s\n" \
+ "Filter: host_address = %s" % (srvc, hostAddr)
- # Close socket
- s.shutdown(socket.SHUT_WR)
-
- # Read the answer
- answer = s.recv(1000000)
-
- # Parse the answer into a table
- table = [line.split(';') for line in answer.split('\n')[:-1]]
+ table = livestatus.readLiveStatus(cmd)
if len(table) > 0 and len(table[0]) > 0:
return int(table[0][0])
@@ -150,43 +95,18 @@ if __name__ == "__main__":
showUsage()
sys.exit(STATUS_CRITICAL)
- # Check ping status of the node, if its not reachable exit
- try:
- pingStatus = getPingStatus(hostAddr)
- except (checkPingCmdExecFailedException) as e:
- print "Host Status %s - Host not reachable" % \
- (_commandStatusStrs[STATUS_UNKNOWN])
- sys.exit(_commandStatusStrs[STATUS_UNKNOWN])
-
- if pingStatus != STATUS_OK:
- print "Host Status %s - Host not reachable" % \
- (_commandStatusStrs[STATUS_UNKNOWN])
- sys.exit(pingStatus)
-
- # Check the various performance statuses for the host
- diskPerfStatus = checkLiveStatus(hostAddr, 'Disk Utilization')
- cpuPerfStatus = checkLiveStatus(hostAddr, 'Cpu Utilization')
- memPerfStatus = checkLiveStatus(hostAddr, 'Memory Utilization')
- swapPerfStatus = checkLiveStatus(hostAddr, 'Swap Utilization')
- nwPerfStatus = checkLiveStatus(hostAddr, 'Network Utilization')
-
- # Calculate the consolidated status for the host based on above status
- # details
- finalStatus = pingStatus | diskPerfStatus | cpuPerfStatus | \
- memPerfStatus | swapPerfStatus | nwPerfStatus
-
- # Get the list of ciritical services
+ # Load the services list
+ srvc_list = loadSrvcList()
+
+ # Calculate the consolidated status for the host based on above
+ # status of individual services
+ finalStatus = STATUS_OK
criticalSrvcs = []
- if diskPerfStatus == STATUS_CRITICAL:
- criticalSrvcs.append('Disk Utilization')
- if cpuPerfStatus == STATUS_CRITICAL:
- criticalSrvcs.append('Cpu Utilization')
- if memPerfStatus == STATUS_CRITICAL:
- criticalSrvcs.append('Memory Utilization')
- if swapPerfStatus == STATUS_CRITICAL:
- criticalSrvcs.append('Swap Utilization')
- if nwPerfStatus == STATUS_CRITICAL:
- criticalSrvcs.append('Network Utilization')
+ for srvc in srvc_list:
+ srvc_status = checkLiveStatus(hostAddr, srvc)
+ finalStatus = finalStatus | srvc_status
+ if srvc_status == STATUS_CRITICAL:
+ criticalSrvcs.append(srvc)
# Return the status
if finalStatus == STATUS_CRITICAL:
diff --git a/plugins/gluster_host_service_handler.py b/plugins/gluster_host_service_handler.py
index 283ac69..2a62108 100755
--- a/plugins/gluster_host_service_handler.py
+++ b/plugins/gluster_host_service_handler.py
@@ -23,9 +23,10 @@
import os
import sys
import datetime
-import socket
import getopt
+import livestatus
+
STATUS_OK = "OK"
STATUS_WARNING = "WARNING"
STATUS_CRITICAL = "CRITICAL"
@@ -37,14 +38,15 @@ statusCodes = {STATUS_OK: 0, STATUS_WARNING: 1, STATUS_CRITICAL: 2,
NAGIOS_COMMAND_FILE = "/var/spool/nagios/cmd/nagios.cmd"
SRVC_LIST = ['Disk Utilization', 'Cpu Utilization', 'Memory Utilization',
'Swap Utilization', 'Network Utilization']
-_socketPath = '/var/spool/nagios/cmd/live'
# Shows the usage of the script
def showUsage():
- usage = "Usage: %s -s <Service State (OK/WARNING/CRITICAL/UNKNOWN)> "
- "-t <Service State Type (SOFT/HARD)> -a <No of Service attempts> "
- "-l <Host Address> -n <Service Name>\n" % os.path.basename(sys.argv[0])
+ usage = "Usage: %s -s <Service State (OK/WARNING/CRITICAL/UNKNOWN)> " \
+ "-t <Service State Type (SOFT/HARD)>" \
+ " -a <No of Service attempts> " \
+ "-l <Host Address>" \
+ " -n <Service Name>\n" % os.path.basename(sys.argv[0])
sys.stderr.write(usage)
@@ -52,12 +54,13 @@ def showUsage():
def update_host_state(hostAddr, srvcName, statusCode):
now = datetime.datetime.now()
if statusCode == statusCodes[STATUS_WARNING]:
- cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;Host Status WARNING - "
- "Service(s) ['%s'] in CRITICAL state\n" % (now, hostAddr, statusCode,
- srvcName)
+ cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;" \
+ "Host Status WARNING - " \
+ "Service(s) ['%s'] in CRITICAL state\n" \
+ % (now, hostAddr, statusCode, srvcName)
else:
- cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;Host Status OK - "
- "Services in good health\n" % (now, hostAddr, statusCode)
+ cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;Host Status OK - " \
+ "Services in good health\n" % (now, hostAddr, statusCode)
f = open(NAGIOS_COMMAND_FILE, "w")
f.write(cmdStr)
@@ -66,22 +69,10 @@ def update_host_state(hostAddr, srvcName, statusCode):
# Method to execute livestatus
def checkLiveStatus(hostAddr, srvc):
- s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
- s.connect(_socketPath)
-
- # Write command to socket
- cmd = "GET services\nColumns: state\nFilter: "
- "description = %s\nFilter: host_address = %s\n" % (srvc, hostAddr)
- s.send(cmd)
-
- # Close socket
- s.shutdown(socket.SHUT_WR)
-
- # Read the answer
- answer = s.recv(1000)
+ cmd = "GET services\nColumns: state\nFilter: " \
+ "description = %s\nFilter: host_address = %s" % (srvc, hostAddr)
- # Parse the answer into a table
- table = [line.split(';') for line in answer.split('\n')[:-1]]
+ table = livestatus.readLiveStatus(cmd)
if len(table) > 0 and len(table[0]) > 0:
return int(table[0][0])
@@ -141,8 +132,8 @@ if __name__ == "__main__":
if srvcState == STATUS_CRITICAL:
if srvcStateType == SRVC_STATE_TYPE_SOFT:
if int(attempts) == 3:
- print "Updating the host status to warning "
- "(3rd SOFT critical state)..."
+ print "Updating the host status to warning " \
+ "(3rd SOFT critical state)..."
update_host_state(hostAddr, srvcName,
statusCodes[STATUS_WARNING])
elif srvcStateType == SRVC_STATE_TYPE_HARD: