diff options
author | Timothy Asir <tim.gluster@gmail.com> | 2014-04-01 17:31:44 +0530 |
---|---|---|
committer | Bala.FA <barumuga@redhat.com> | 2014-04-29 10:14:33 +0530 |
commit | 0682f3b48c06a5bf53f305dd77250cdb796693a5 (patch) | |
tree | 3c1705697a0f9846ae119f328f839d4bc7460906 /plugins | |
parent | 666983202013dd3c1e236eb458b75b3a95d74782 (diff) |
Add gluster process monitoring plugin
This helps monitoring gluster related process such as glusterd,
glusterfsd, gluster self heal process, nfs and also helps to
monitor other running services
Change-Id: I8c6f252270a847e7ff007c358125a9310525f201
Signed-off-by: Timothy Asir <tjeyasin@redhat.com>
Diffstat (limited to 'plugins')
-rw-r--r-- | plugins/Makefile.am | 2 | ||||
-rwxr-xr-x | plugins/check_proc_status.py | 195 | ||||
-rw-r--r-- | plugins/gluster-proc.crontab | 1 |
3 files changed, 198 insertions, 0 deletions
diff --git a/plugins/Makefile.am b/plugins/Makefile.am index 9bba2d4..b1592de 100644 --- a/plugins/Makefile.am +++ b/plugins/Makefile.am @@ -5,6 +5,7 @@ SUBDIRS = \ cronddir = $(sysconfdir)/cron.d crond_DATA = \ gluster-sysstat.crontab \ + gluster-proc.crontab \ $(NULL) dist_glusternagiosplugins_PYTHON = \ @@ -13,6 +14,7 @@ dist_glusternagiosplugins_PYTHON = \ check_vol_utilization.py \ check_vol_status.py \ check_volume_status.py \ + check_proc_status.py \ cpu.py \ discoverpeers.py \ discoverlogicalcomponents.py \ diff --git a/plugins/check_proc_status.py b/plugins/check_proc_status.py new file mode 100755 index 0000000..80a41c8 --- /dev/null +++ b/plugins/check_proc_status.py @@ -0,0 +1,195 @@ +#!/usr/bin/python +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +# + +import sys +import errno +import socket +import psutil +import nscautils +import glusternagios + +from glusternagios import utils +from glusternagios import glustercli + + +_checkProc = utils.CommandPath('check_proc', + '/usr/lib64/nagios/plugins/check_procs') + +_glusterVolPath = "/var/lib/glusterd/vols" +_checkNfsCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a", "nfs"] +_checkShdCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a", + "glustershd"] +_checkSmbCmd = [_checkProc.cmd, "-C", "smb"] +_checkQuotaCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a", + "quotad"] +_checkBrickCmd = [_checkProc.cmd, "-C", "glusterfsd"] +_checkGlusterdCmd = [_checkProc.cmd, "-c", "1:", "-w", "1:1", "-C", "glusterd"] +_nfsService = "Glusterfs NFS Daemon" +_shdService = "Glusterfs Self-Heal Daemon" +_smbService = "CIFS" +_brickService = "Brick Status - " +_glusterdService = "Gluster Management Daemon" +_quotadService = "Gluster Quota Daemon" + + +def sendBrickStatus(hostName, volInfo): + hostUuid = glustercli.hostUUIDGet() + status = None + for volumeName, volumeInfo in volInfo.iteritems(): + if volumeInfo['volumeStatus'] == glustercli.VolumeStatus.OFFLINE: + continue + for brick in volumeInfo['bricksInfo']: + if brick.get('hostUuid') != hostUuid: + continue + brickService = "Brick Status - %s" % brick['name'] + pidFile = brick['name'].replace( + ":/", "-").replace("/", "-") + ".pid" + try: + with open("%s/%s/run/%s" % ( + _glusterVolPath, volumeName, pidFile)) as f: + if psutil.pid_exists(int(f.read().strip())): + status = utils.PluginStatusCode.OK + else: + status = utils.PluginStatusCode.CRITICAL + except IOError, e: + if e.errno == errno.ENOENT: + status = utils.PluginStatusCode.CRITICAL + else: + status = utils.PluginStatusCode.UNKNOWN + msg = "UNKNOWN: Brick %s: %s" % (brick['name'], str(e)) + finally: + if status == utils.PluginStatusCode.OK: + msg = "OK: Brick %s" % brick['name'] + elif status != utils.PluginStatusCode.UNKNOWN: + msg = "CRITICAL: Brick %s is down" % brick['name'] + nscautils.send_to_nsca(hostName, brickService, status, msg) + + +def sendNfsStatus(hostName, volInfo): + # if nfs is already running we need not to check further + status, msg, error = utils.execCmd(_checkNfsCmd) + if status == utils.PluginStatusCode.OK: + nscautils.send_to_nsca(hostName, _nfsService, status, msg) + return + + # if nfs is not running and any of the volume uses nfs + # then its required to alert the user + for k, v in volInfo.iteritems(): + nfsStatus = v.get('options', {}).get('nfs.disable', 'off') + if nfsStatus == 'off': + msg = "CRITICAL: Process glusterfs-nfs is not running" + status = utils.PluginStatusCode.CRITICAL + break + else: + msg = "OK: No gluster volume uses nfs" + status = utils.PluginStatusCode.OK + nscautils.send_to_nsca(hostName, _nfsService, status, msg) + + +def sendSmbStatus(hostName, volInfo): + status, msg, error = utils.execCmd(_checkSmbCmd) + if status == utils.PluginStatusCode.OK: + nscautils.send_to_nsca(hostName, _smbService, status, msg) + return + + # if smb is not running and any of the volume uses smb + # then its required to alert the use + for k, v in volInfo.iteritems(): + cifsStatus = v.get('options', {}).get('user.cifs', '') + smbStatus = v.get('options', {}).get('user.smb', '') + if cifsStatus == 'disable' or smbStatus == 'disable': + msg = "CRITICAL: Process smb is not running" + status = utils.PluginStatusCode.CRITICAL + break + else: + msg = "OK: No gluster volume uses smb" + status = utils.PluginStatusCode.OK + nscautils.send_to_nsca(hostName, _smbService, status, msg) + + +def sendQuotadStatus(hostName, volInfo): + # if quota is already running we need not to check further + status, msg, error = utils.execCmd(_checkQuotaCmd) + if status == utils.PluginStatusCode.OK: + nscautils.send_to_nsca(hostName, _quotadService, status, msg) + return + + # if quota is not running and any of the volume uses quota + # then the quotad process should be running in the host + for k, v in volInfo.iteritems(): + quotadStatus = v.get('options', {}).get('features.quota', '') + if quotadStatus == 'on': + msg = "CRITICAL: Process quotad is not running" + utils.PluginStatusCode.CRITICAL + break + else: + msg = "OK: Quota not enabled" + status = utils.PluginStatusCode.OK + nscautils.send_to_nsca(hostName, _quotadService, status, msg) + + +def sendShdStatus(hostName, volInfo): + status, msg, error = utils.execCmd(_checkShdCmd) + if status == utils.PluginStatusCode.OK: + nscautils.send_to_nsca(hostName, _shdService, status, msg) + return + + hostUuid = glustercli.hostUUIDGet() + for volumeName, volumeInfo in volInfo.iteritems(): + if volumeInfo['volumeStatus'] == glustercli.VolumeStatus.OFFLINE: + continue + for brick in volumeInfo['bricksInfo']: + if brick['hostUuid'] == hostUuid and \ + int(volumeInfo['replicaCount']) > 1: + status = utils.PluginStatusCode.CRITICAL + msg = "CRITICAL: Gluster Self Heal Daemon not running" + break + else: + msg = "OK: Process Gluster Self Heal Daemon" + status = utils.PluginStatusCode.OK + nscautils.send_to_nsca(hostName, _shdService, status, msg) + + +if __name__ == '__main__': + #Get the volume status + #status = 0 + hostName = socket.getfqdn() + if hostName == "localhost.localdomain" or hostName == "localhost": + sys.stderr.write("failed to find localhost fqdn") + + ### service check ### + status, msg, error = utils.execCmd(_checkGlusterdCmd) + nscautils.send_to_nsca(hostName, _glusterdService, status, msg) + + # Get the volume status only if glusterfs is running to avoid + # unusual delay + if status != utils.PluginStatusCode.OK: + sys.exit(status) + + try: + volInfo = glustercli.volumeInfo() + except glusternagios.glustercli.GlusterCmdFailedException as e: + sys.exit(utils.PluginStatusCode.UNKNOWN) + + sendNfsStatus(hostName, volInfo) + sendSmbStatus(hostName, volInfo) + sendShdStatus(hostName, volInfo) + sendQuotadStatus(hostName, volInfo) + sendBrickStatus(hostName, volInfo) + + sys.exit(utils.PluginStatusCode.OK) diff --git a/plugins/gluster-proc.crontab b/plugins/gluster-proc.crontab new file mode 100644 index 0000000..7344ea2 --- /dev/null +++ b/plugins/gluster-proc.crontab @@ -0,0 +1 @@ +*/1 * * * * root /usr/lib64/nagios/gluster/plugins/check_proc_status.py |