summaryrefslogtreecommitdiffstats
path: root/plugins
diff options
context:
space:
mode:
authorTimothy Asir <tim.gluster@gmail.com>2014-04-01 17:31:44 +0530
committerBala.FA <barumuga@redhat.com>2014-04-29 10:14:33 +0530
commit0682f3b48c06a5bf53f305dd77250cdb796693a5 (patch)
tree3c1705697a0f9846ae119f328f839d4bc7460906 /plugins
parent666983202013dd3c1e236eb458b75b3a95d74782 (diff)
Add gluster process monitoring plugin
This helps monitoring gluster related process such as glusterd, glusterfsd, gluster self heal process, nfs and also helps to monitor other running services Change-Id: I8c6f252270a847e7ff007c358125a9310525f201 Signed-off-by: Timothy Asir <tjeyasin@redhat.com>
Diffstat (limited to 'plugins')
-rw-r--r--plugins/Makefile.am2
-rwxr-xr-xplugins/check_proc_status.py195
-rw-r--r--plugins/gluster-proc.crontab1
3 files changed, 198 insertions, 0 deletions
diff --git a/plugins/Makefile.am b/plugins/Makefile.am
index 9bba2d4..b1592de 100644
--- a/plugins/Makefile.am
+++ b/plugins/Makefile.am
@@ -5,6 +5,7 @@ SUBDIRS = \
cronddir = $(sysconfdir)/cron.d
crond_DATA = \
gluster-sysstat.crontab \
+ gluster-proc.crontab \
$(NULL)
dist_glusternagiosplugins_PYTHON = \
@@ -13,6 +14,7 @@ dist_glusternagiosplugins_PYTHON = \
check_vol_utilization.py \
check_vol_status.py \
check_volume_status.py \
+ check_proc_status.py \
cpu.py \
discoverpeers.py \
discoverlogicalcomponents.py \
diff --git a/plugins/check_proc_status.py b/plugins/check_proc_status.py
new file mode 100755
index 0000000..80a41c8
--- /dev/null
+++ b/plugins/check_proc_status.py
@@ -0,0 +1,195 @@
+#!/usr/bin/python
+# Copyright (C) 2014 Red Hat Inc
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+import sys
+import errno
+import socket
+import psutil
+import nscautils
+import glusternagios
+
+from glusternagios import utils
+from glusternagios import glustercli
+
+
+_checkProc = utils.CommandPath('check_proc',
+ '/usr/lib64/nagios/plugins/check_procs')
+
+_glusterVolPath = "/var/lib/glusterd/vols"
+_checkNfsCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a", "nfs"]
+_checkShdCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a",
+ "glustershd"]
+_checkSmbCmd = [_checkProc.cmd, "-C", "smb"]
+_checkQuotaCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a",
+ "quotad"]
+_checkBrickCmd = [_checkProc.cmd, "-C", "glusterfsd"]
+_checkGlusterdCmd = [_checkProc.cmd, "-c", "1:", "-w", "1:1", "-C", "glusterd"]
+_nfsService = "Glusterfs NFS Daemon"
+_shdService = "Glusterfs Self-Heal Daemon"
+_smbService = "CIFS"
+_brickService = "Brick Status - "
+_glusterdService = "Gluster Management Daemon"
+_quotadService = "Gluster Quota Daemon"
+
+
+def sendBrickStatus(hostName, volInfo):
+ hostUuid = glustercli.hostUUIDGet()
+ status = None
+ for volumeName, volumeInfo in volInfo.iteritems():
+ if volumeInfo['volumeStatus'] == glustercli.VolumeStatus.OFFLINE:
+ continue
+ for brick in volumeInfo['bricksInfo']:
+ if brick.get('hostUuid') != hostUuid:
+ continue
+ brickService = "Brick Status - %s" % brick['name']
+ pidFile = brick['name'].replace(
+ ":/", "-").replace("/", "-") + ".pid"
+ try:
+ with open("%s/%s/run/%s" % (
+ _glusterVolPath, volumeName, pidFile)) as f:
+ if psutil.pid_exists(int(f.read().strip())):
+ status = utils.PluginStatusCode.OK
+ else:
+ status = utils.PluginStatusCode.CRITICAL
+ except IOError, e:
+ if e.errno == errno.ENOENT:
+ status = utils.PluginStatusCode.CRITICAL
+ else:
+ status = utils.PluginStatusCode.UNKNOWN
+ msg = "UNKNOWN: Brick %s: %s" % (brick['name'], str(e))
+ finally:
+ if status == utils.PluginStatusCode.OK:
+ msg = "OK: Brick %s" % brick['name']
+ elif status != utils.PluginStatusCode.UNKNOWN:
+ msg = "CRITICAL: Brick %s is down" % brick['name']
+ nscautils.send_to_nsca(hostName, brickService, status, msg)
+
+
+def sendNfsStatus(hostName, volInfo):
+ # if nfs is already running we need not to check further
+ status, msg, error = utils.execCmd(_checkNfsCmd)
+ if status == utils.PluginStatusCode.OK:
+ nscautils.send_to_nsca(hostName, _nfsService, status, msg)
+ return
+
+ # if nfs is not running and any of the volume uses nfs
+ # then its required to alert the user
+ for k, v in volInfo.iteritems():
+ nfsStatus = v.get('options', {}).get('nfs.disable', 'off')
+ if nfsStatus == 'off':
+ msg = "CRITICAL: Process glusterfs-nfs is not running"
+ status = utils.PluginStatusCode.CRITICAL
+ break
+ else:
+ msg = "OK: No gluster volume uses nfs"
+ status = utils.PluginStatusCode.OK
+ nscautils.send_to_nsca(hostName, _nfsService, status, msg)
+
+
+def sendSmbStatus(hostName, volInfo):
+ status, msg, error = utils.execCmd(_checkSmbCmd)
+ if status == utils.PluginStatusCode.OK:
+ nscautils.send_to_nsca(hostName, _smbService, status, msg)
+ return
+
+ # if smb is not running and any of the volume uses smb
+ # then its required to alert the use
+ for k, v in volInfo.iteritems():
+ cifsStatus = v.get('options', {}).get('user.cifs', '')
+ smbStatus = v.get('options', {}).get('user.smb', '')
+ if cifsStatus == 'disable' or smbStatus == 'disable':
+ msg = "CRITICAL: Process smb is not running"
+ status = utils.PluginStatusCode.CRITICAL
+ break
+ else:
+ msg = "OK: No gluster volume uses smb"
+ status = utils.PluginStatusCode.OK
+ nscautils.send_to_nsca(hostName, _smbService, status, msg)
+
+
+def sendQuotadStatus(hostName, volInfo):
+ # if quota is already running we need not to check further
+ status, msg, error = utils.execCmd(_checkQuotaCmd)
+ if status == utils.PluginStatusCode.OK:
+ nscautils.send_to_nsca(hostName, _quotadService, status, msg)
+ return
+
+ # if quota is not running and any of the volume uses quota
+ # then the quotad process should be running in the host
+ for k, v in volInfo.iteritems():
+ quotadStatus = v.get('options', {}).get('features.quota', '')
+ if quotadStatus == 'on':
+ msg = "CRITICAL: Process quotad is not running"
+ utils.PluginStatusCode.CRITICAL
+ break
+ else:
+ msg = "OK: Quota not enabled"
+ status = utils.PluginStatusCode.OK
+ nscautils.send_to_nsca(hostName, _quotadService, status, msg)
+
+
+def sendShdStatus(hostName, volInfo):
+ status, msg, error = utils.execCmd(_checkShdCmd)
+ if status == utils.PluginStatusCode.OK:
+ nscautils.send_to_nsca(hostName, _shdService, status, msg)
+ return
+
+ hostUuid = glustercli.hostUUIDGet()
+ for volumeName, volumeInfo in volInfo.iteritems():
+ if volumeInfo['volumeStatus'] == glustercli.VolumeStatus.OFFLINE:
+ continue
+ for brick in volumeInfo['bricksInfo']:
+ if brick['hostUuid'] == hostUuid and \
+ int(volumeInfo['replicaCount']) > 1:
+ status = utils.PluginStatusCode.CRITICAL
+ msg = "CRITICAL: Gluster Self Heal Daemon not running"
+ break
+ else:
+ msg = "OK: Process Gluster Self Heal Daemon"
+ status = utils.PluginStatusCode.OK
+ nscautils.send_to_nsca(hostName, _shdService, status, msg)
+
+
+if __name__ == '__main__':
+ #Get the volume status
+ #status = 0
+ hostName = socket.getfqdn()
+ if hostName == "localhost.localdomain" or hostName == "localhost":
+ sys.stderr.write("failed to find localhost fqdn")
+
+ ### service check ###
+ status, msg, error = utils.execCmd(_checkGlusterdCmd)
+ nscautils.send_to_nsca(hostName, _glusterdService, status, msg)
+
+ # Get the volume status only if glusterfs is running to avoid
+ # unusual delay
+ if status != utils.PluginStatusCode.OK:
+ sys.exit(status)
+
+ try:
+ volInfo = glustercli.volumeInfo()
+ except glusternagios.glustercli.GlusterCmdFailedException as e:
+ sys.exit(utils.PluginStatusCode.UNKNOWN)
+
+ sendNfsStatus(hostName, volInfo)
+ sendSmbStatus(hostName, volInfo)
+ sendShdStatus(hostName, volInfo)
+ sendQuotadStatus(hostName, volInfo)
+ sendBrickStatus(hostName, volInfo)
+
+ sys.exit(utils.PluginStatusCode.OK)
diff --git a/plugins/gluster-proc.crontab b/plugins/gluster-proc.crontab
new file mode 100644
index 0000000..7344ea2
--- /dev/null
+++ b/plugins/gluster-proc.crontab
@@ -0,0 +1 @@
+*/1 * * * * root /usr/lib64/nagios/gluster/plugins/check_proc_status.py