summaryrefslogtreecommitdiffstats
path: root/plugins
diff options
context:
space:
mode:
Diffstat (limited to 'plugins')
-rw-r--r--plugins/Makefile.am2
-rwxr-xr-xplugins/check_proc_status.py195
-rw-r--r--plugins/gluster-proc.crontab1
3 files changed, 198 insertions, 0 deletions
diff --git a/plugins/Makefile.am b/plugins/Makefile.am
index 9bba2d4..b1592de 100644
--- a/plugins/Makefile.am
+++ b/plugins/Makefile.am
@@ -5,6 +5,7 @@ SUBDIRS = \
cronddir = $(sysconfdir)/cron.d
crond_DATA = \
gluster-sysstat.crontab \
+ gluster-proc.crontab \
$(NULL)
dist_glusternagiosplugins_PYTHON = \
@@ -13,6 +14,7 @@ dist_glusternagiosplugins_PYTHON = \
check_vol_utilization.py \
check_vol_status.py \
check_volume_status.py \
+ check_proc_status.py \
cpu.py \
discoverpeers.py \
discoverlogicalcomponents.py \
diff --git a/plugins/check_proc_status.py b/plugins/check_proc_status.py
new file mode 100755
index 0000000..80a41c8
--- /dev/null
+++ b/plugins/check_proc_status.py
@@ -0,0 +1,195 @@
+#!/usr/bin/python
+# Copyright (C) 2014 Red Hat Inc
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+import sys
+import errno
+import socket
+import psutil
+import nscautils
+import glusternagios
+
+from glusternagios import utils
+from glusternagios import glustercli
+
+
+_checkProc = utils.CommandPath('check_proc',
+ '/usr/lib64/nagios/plugins/check_procs')
+
+_glusterVolPath = "/var/lib/glusterd/vols"
+_checkNfsCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a", "nfs"]
+_checkShdCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a",
+ "glustershd"]
+_checkSmbCmd = [_checkProc.cmd, "-C", "smb"]
+_checkQuotaCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a",
+ "quotad"]
+_checkBrickCmd = [_checkProc.cmd, "-C", "glusterfsd"]
+_checkGlusterdCmd = [_checkProc.cmd, "-c", "1:", "-w", "1:1", "-C", "glusterd"]
+_nfsService = "Glusterfs NFS Daemon"
+_shdService = "Glusterfs Self-Heal Daemon"
+_smbService = "CIFS"
+_brickService = "Brick Status - "
+_glusterdService = "Gluster Management Daemon"
+_quotadService = "Gluster Quota Daemon"
+
+
+def sendBrickStatus(hostName, volInfo):
+ hostUuid = glustercli.hostUUIDGet()
+ status = None
+ for volumeName, volumeInfo in volInfo.iteritems():
+ if volumeInfo['volumeStatus'] == glustercli.VolumeStatus.OFFLINE:
+ continue
+ for brick in volumeInfo['bricksInfo']:
+ if brick.get('hostUuid') != hostUuid:
+ continue
+ brickService = "Brick Status - %s" % brick['name']
+ pidFile = brick['name'].replace(
+ ":/", "-").replace("/", "-") + ".pid"
+ try:
+ with open("%s/%s/run/%s" % (
+ _glusterVolPath, volumeName, pidFile)) as f:
+ if psutil.pid_exists(int(f.read().strip())):
+ status = utils.PluginStatusCode.OK
+ else:
+ status = utils.PluginStatusCode.CRITICAL
+ except IOError, e:
+ if e.errno == errno.ENOENT:
+ status = utils.PluginStatusCode.CRITICAL
+ else:
+ status = utils.PluginStatusCode.UNKNOWN
+ msg = "UNKNOWN: Brick %s: %s" % (brick['name'], str(e))
+ finally:
+ if status == utils.PluginStatusCode.OK:
+ msg = "OK: Brick %s" % brick['name']
+ elif status != utils.PluginStatusCode.UNKNOWN:
+ msg = "CRITICAL: Brick %s is down" % brick['name']
+ nscautils.send_to_nsca(hostName, brickService, status, msg)
+
+
+def sendNfsStatus(hostName, volInfo):
+ # if nfs is already running we need not to check further
+ status, msg, error = utils.execCmd(_checkNfsCmd)
+ if status == utils.PluginStatusCode.OK:
+ nscautils.send_to_nsca(hostName, _nfsService, status, msg)
+ return
+
+ # if nfs is not running and any of the volume uses nfs
+ # then its required to alert the user
+ for k, v in volInfo.iteritems():
+ nfsStatus = v.get('options', {}).get('nfs.disable', 'off')
+ if nfsStatus == 'off':
+ msg = "CRITICAL: Process glusterfs-nfs is not running"
+ status = utils.PluginStatusCode.CRITICAL
+ break
+ else:
+ msg = "OK: No gluster volume uses nfs"
+ status = utils.PluginStatusCode.OK
+ nscautils.send_to_nsca(hostName, _nfsService, status, msg)
+
+
+def sendSmbStatus(hostName, volInfo):
+ status, msg, error = utils.execCmd(_checkSmbCmd)
+ if status == utils.PluginStatusCode.OK:
+ nscautils.send_to_nsca(hostName, _smbService, status, msg)
+ return
+
+ # if smb is not running and any of the volume uses smb
+ # then its required to alert the use
+ for k, v in volInfo.iteritems():
+ cifsStatus = v.get('options', {}).get('user.cifs', '')
+ smbStatus = v.get('options', {}).get('user.smb', '')
+ if cifsStatus == 'disable' or smbStatus == 'disable':
+ msg = "CRITICAL: Process smb is not running"
+ status = utils.PluginStatusCode.CRITICAL
+ break
+ else:
+ msg = "OK: No gluster volume uses smb"
+ status = utils.PluginStatusCode.OK
+ nscautils.send_to_nsca(hostName, _smbService, status, msg)
+
+
+def sendQuotadStatus(hostName, volInfo):
+ # if quota is already running we need not to check further
+ status, msg, error = utils.execCmd(_checkQuotaCmd)
+ if status == utils.PluginStatusCode.OK:
+ nscautils.send_to_nsca(hostName, _quotadService, status, msg)
+ return
+
+ # if quota is not running and any of the volume uses quota
+ # then the quotad process should be running in the host
+ for k, v in volInfo.iteritems():
+ quotadStatus = v.get('options', {}).get('features.quota', '')
+ if quotadStatus == 'on':
+ msg = "CRITICAL: Process quotad is not running"
+ utils.PluginStatusCode.CRITICAL
+ break
+ else:
+ msg = "OK: Quota not enabled"
+ status = utils.PluginStatusCode.OK
+ nscautils.send_to_nsca(hostName, _quotadService, status, msg)
+
+
+def sendShdStatus(hostName, volInfo):
+ status, msg, error = utils.execCmd(_checkShdCmd)
+ if status == utils.PluginStatusCode.OK:
+ nscautils.send_to_nsca(hostName, _shdService, status, msg)
+ return
+
+ hostUuid = glustercli.hostUUIDGet()
+ for volumeName, volumeInfo in volInfo.iteritems():
+ if volumeInfo['volumeStatus'] == glustercli.VolumeStatus.OFFLINE:
+ continue
+ for brick in volumeInfo['bricksInfo']:
+ if brick['hostUuid'] == hostUuid and \
+ int(volumeInfo['replicaCount']) > 1:
+ status = utils.PluginStatusCode.CRITICAL
+ msg = "CRITICAL: Gluster Self Heal Daemon not running"
+ break
+ else:
+ msg = "OK: Process Gluster Self Heal Daemon"
+ status = utils.PluginStatusCode.OK
+ nscautils.send_to_nsca(hostName, _shdService, status, msg)
+
+
+if __name__ == '__main__':
+ #Get the volume status
+ #status = 0
+ hostName = socket.getfqdn()
+ if hostName == "localhost.localdomain" or hostName == "localhost":
+ sys.stderr.write("failed to find localhost fqdn")
+
+ ### service check ###
+ status, msg, error = utils.execCmd(_checkGlusterdCmd)
+ nscautils.send_to_nsca(hostName, _glusterdService, status, msg)
+
+ # Get the volume status only if glusterfs is running to avoid
+ # unusual delay
+ if status != utils.PluginStatusCode.OK:
+ sys.exit(status)
+
+ try:
+ volInfo = glustercli.volumeInfo()
+ except glusternagios.glustercli.GlusterCmdFailedException as e:
+ sys.exit(utils.PluginStatusCode.UNKNOWN)
+
+ sendNfsStatus(hostName, volInfo)
+ sendSmbStatus(hostName, volInfo)
+ sendShdStatus(hostName, volInfo)
+ sendQuotadStatus(hostName, volInfo)
+ sendBrickStatus(hostName, volInfo)
+
+ sys.exit(utils.PluginStatusCode.OK)
diff --git a/plugins/gluster-proc.crontab b/plugins/gluster-proc.crontab
new file mode 100644
index 0000000..7344ea2
--- /dev/null
+++ b/plugins/gluster-proc.crontab
@@ -0,0 +1 @@
+*/1 * * * * root /usr/lib64/nagios/gluster/plugins/check_proc_status.py