summaryrefslogtreecommitdiffstats
path: root/plugins
diff options
context:
space:
mode:
authorShubhendu Tripathi <shtripat@redhat.com>2014-05-06 14:39:42 +0530
committerSahina Bose <sabose@redhat.com>2014-05-20 00:20:50 -0700
commit0c6651331d3a8fc867a1799b18baed4c0789ba69 (patch)
treec09a84fd2f806b1a5da4e10ac2884f39cd3ba917 /plugins
parent25a9105f9583cf165526db0c24c67bd12c4336fe (diff)
nagios-server-addons: NRPE command with timeout
Introduced a utility method to return a NRPE base command with timeout set externally. Currently if a plugin internally executes NRPE to get the details from the node, there is no mechanism that timeout could be set to more than 10 sec. This method provides the NRPE command with timeout (if passed). This is required for some of the NRPE calls where gluster commands get executed and they might take more time. All the plugins, which execute a NRPE within, can provide optional command line argument for timeout, and same can be used for forming the proper NRPE call with timeout value. Change-Id: Id97624df743664a320a585acc4a85cfcf64d0a07 Signed-off-by: Shubhendu Tripathi <shtripat@redhat.com> Reviewed-on: http://review.gluster.org/7682 Reviewed-by: Sahina Bose <sabose@redhat.com> Tested-by: Sahina Bose <sabose@redhat.com>
Diffstat (limited to 'plugins')
-rwxr-xr-xplugins/check_vol_server.py37
-rwxr-xr-xplugins/discovery.py86
-rwxr-xr-xplugins/server_utils.py43
3 files changed, 100 insertions, 66 deletions
diff --git a/plugins/check_vol_server.py b/plugins/check_vol_server.py
index 1c5863a..b512317 100755
--- a/plugins/check_vol_server.py
+++ b/plugins/check_vol_server.py
@@ -1,6 +1,5 @@
#!/usr/bin/python
import sys
-import commands
import json
import random
import argparse
@@ -8,7 +7,7 @@ import livestatus
import os
from glusternagios import utils
-from constants import NRPE_PATH
+import server_utils
def _getListHosts(args):
@@ -50,17 +49,8 @@ def _getVolGeoRepStatusNRPECommand(args):
return ("check_vol_status -a %s %s" % (args.volume, 'geo-rep'))
-def _getNRPEBaseCmd(host):
- return NRPE_PATH + " -H " + host + " -c "
-
-
-def execNRPECommand(command):
- status, output = commands.getstatusoutput(command)
- return os.WEXITSTATUS(status), output
-
-
def _getVolumeStatusOutput(args):
- status, output = _executeRandomHost(_getVolStatusNRPECommand(args))
+ status, output = _executeRandomHost(_getVolStatusNRPECommand(args), args)
if status == utils.PluginStatusCode.OK:
#Following query will return the output in format [[2,0]]
@@ -101,16 +91,23 @@ def _getVolumeQuotaStatusOutput(args):
statusoutput.find("QUOTA: OK") > -1):
# if ok, don't poll
return servicestatus, statusoutput
- return _executeRandomHost(_getVolQuotaStatusNRPECommand(args))
+ return _executeRandomHost(_getVolQuotaStatusNRPECommand(args), args)
-def _executeRandomHost(command):
+def execNRPECommand(command):
+ status, output, err = utils.execCmd(command.split(), raw=True)
+ return os.WEXITSTATUS(status), output
+
+
+def _executeRandomHost(command, args):
list_hosts = _getListHosts(args)
host = random.choice(list_hosts)
#Get the address of the host
host_address = _getHostAddress(host)
- status, output = execNRPECommand(_getNRPEBaseCmd(host_address) + command)
+ status, output = execNRPECommand(server_utils.getNRPEBaseCommand(
+ host_address,
+ timeout=args.timeout) + command)
if status != utils.PluginStatusCode.UNKNOWN:
return status, output
@@ -119,8 +116,9 @@ def _executeRandomHost(command):
#in the host group and send the command until
#the command is successful
for host in list_hosts:
- status, output = execNRPECommand(_getNRPEBaseCmd(_getHostAddress(host))
- + command)
+ status, output = execNRPECommand(server_utils.getNRPEBaseCommand(
+ host,
+ timeout=args.timeout) + command)
if status != utils.PluginStatusCode.UNKNOWN:
return status, output
return status, output
@@ -139,7 +137,7 @@ def showVolumeOutput(args):
elif args.option == 'geo-rep':
command = _getVolGeoRepStatusNRPECommand(args)
- return _executeRandomHost(command)
+ return _executeRandomHost(command, args)
def parse_input():
@@ -174,6 +172,9 @@ def parse_input():
'quota',
'self-heal',
'geo-rep'])
+ parser.add_argument('-t', '--timeout',
+ action='store',
+ help='NRPE timeout')
args = parser.parse_args()
if args.critical <= args.warning:
print "UNKNOWN:Critical must be greater than Warning."
diff --git a/plugins/discovery.py b/plugins/discovery.py
index 3e3b0a5..08b2229 100755
--- a/plugins/discovery.py
+++ b/plugins/discovery.py
@@ -17,7 +17,6 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
#
import argparse
-import json
import datetime
import os
import shutil
@@ -29,7 +28,6 @@ from config_generator import GlusterNagiosConfManager
import server_utils
import submit_external_command
from constants import DEFAULT_AUTO_CONFIG_DIR
-from constants import NRPE_PATH
from config_generator import CHANGE_MODE_ADD
@@ -37,49 +35,25 @@ from config_generator import CHANGE_MODE_REMOVE
from config_generator import CHANGE_MODE_UPDATE
-nrpeCmdPath = utils.CommandPath("nrpe", NRPE_PATH, )
-
-
-def execNRPECommand(host, command, arguments=None, jsonOutput=True):
- nrpeCmd = [nrpeCmdPath.cmd, "-H", host, "-c", command]
- if arguments:
- nrpeCmd.append('-a')
- nrpeCmd.extend(arguments)
- (returncode, outputStr, err) = utils.execCmd(nrpeCmd, raw=True)
- if returncode == 0:
- if jsonOutput:
- try:
- #convert to dictionary
- resultDict = json.loads(outputStr)
- except Exception as e:
- e.args += (outputStr,)
- raise
- return resultDict
- else:
- return outputStr
- else:
- print "Failed to execute NRPE command '%s' in host '%s' " \
- "\nError : %s" \
- "Make sure NPRE server in host '%s' is configured to accept " \
- "requests from Nagios server" % (command, host, outputStr, host)
- sys.exit(utils.PluginStatusCode.CRITICAL)
-
-
#Discovers volumes info one by one.
#First it fetches the volumes list and then it fetches the bricks
#details of volume one by one. Its an work around for size limitation issue
#in NRPE.
-def discoverVolumes(hostip):
+def discoverVolumes(hostip, timeout):
resultDict = {'volumes': []}
- volumeList = execNRPECommand(hostip, "discover_volume_list")
+ volumeList = server_utils.execNRPECommand(hostip,
+ "discover_volume_list",
+ timeout=timeout)
for volumeName in volumeList.keys():
- volumeDetail = execNRPECommand(hostip, "discover_volume_info",
- [volumeName])
+ volumeDetail = server_utils.execNRPECommand(hostip,
+ "discover_volume_info",
+ arguments=[volumeName],
+ timeout=timeout)
resultDict['volumes'].append(volumeDetail.get(volumeName))
return resultDict
-def discoverCluster(hostip, cluster):
+def discoverCluster(hostip, cluster, timeout):
"""
This method helps to discover the nodes, volumes and bricks in the given
gluster. It uses NRPE commands to contact the gluster nodes.
@@ -106,15 +80,20 @@ def discoverCluster(hostip, cluster):
clusterdata = {}
#Discover the logical components
- componentlist = discoverVolumes(hostip)
+ componentlist = discoverVolumes(hostip, timeout)
#Discover the peers
- hostlist = execNRPECommand(hostip, "discoverpeers")
+ hostlist = server_utils.execNRPECommand(hostip,
+ "discoverpeers",
+ timeout=timeout)
#Add the ip address of the root node given by the user to the peer list
hostlist[0]['hostip'] = hostip
for host in hostlist:
#Get host names for all the connected hosts
if host['status'] == HostStatus.CONNECTED:
- hostDetails = execNRPECommand(host['hostip'], "discoverhostparams")
+ hostDetails = server_utils.execNRPECommand(
+ host['hostip'],
+ "discoverhostparams",
+ timeout=timeout)
host.update(hostDetails)
#Get the list of bricks for this host and add to dictionary
host['bricks'] = []
@@ -307,6 +286,9 @@ def parse_input():
' output files will be written')
parser.add_argument('-f', '--force', action='store_true', dest='force',
help="Force sync the Cluster configuration")
+ parser.add_argument('-t', '--timeout', action='store', dest='timeout',
+ type=str,
+ help="No of secs NRPE should timeout getting details")
args = parser.parse_args()
return args
@@ -348,7 +330,7 @@ def formatTextForMail(text):
#Configure the gluster node to send passive check results through NSCA
-def configureNodes(clusterDelta, nagiosServerAddress, mode):
+def configureNodes(clusterDelta, nagiosServerAddress, mode, timeout):
for host in clusterDelta['_hosts']:
#Only when a new node is added or whole cluster is added freshly.
if (clusterDelta.get('changeMode') == CHANGE_MODE_ADD or
@@ -367,10 +349,13 @@ def configureNodes(clusterDelta, nagiosServerAddress, mode):
#Configure the nodes. clusterName, Nagios server address and
#host_name is passed as an argument to nrpe command
#'configure_gluster_node'
- execNRPECommand(
+ server_utils.execNRPECommand(
host['address'], 'configure_gluster_node',
- [clusterDelta['hostgroup_name'], nagiosServerAddress,
- host['host_name']], False)
+ arguments=[clusterDelta['hostgroup_name'],
+ nagiosServerAddress,
+ host['host_name']],
+ timeout=timeout,
+ json_output=False)
return nagiosServerAddress
@@ -390,9 +375,16 @@ def updateNagiosAddressInAutoConfig(clusterHostConfig, nagiosServerAddress):
#Write the cluster configurations. If force mode is used then it will clean
#the config directory before writing the changes.
-def writeDelta(clusterDelta, configManager, force, nagiosServerAddress, mode):
- nagiosServerAddress = configureNodes(clusterDelta, nagiosServerAddress,
- mode)
+def writeDelta(clusterDelta,
+ configManager,
+ force,
+ nagiosServerAddress,
+ mode,
+ timeout):
+ nagiosServerAddress = configureNodes(clusterDelta,
+ nagiosServerAddress,
+ mode,
+ timeout)
#Find the cluster host using host group name
clusterHostConfig = findHostInList(clusterDelta['_hosts'],
clusterDelta['hostgroup_name'])
@@ -463,7 +455,7 @@ def getAllNonConnectedHosts(hostList):
if __name__ == '__main__':
args = parse_input()
- clusterdata = discoverCluster(args.hostip, args.cluster)
+ clusterdata = discoverCluster(args.hostip, args.cluster, args.timeout)
configManager = getConfigManager(args)
clusterDelta = configManager.generateNagiosConfig(clusterdata)
if args.force:
@@ -484,7 +476,7 @@ if __name__ == '__main__':
"Are you sure, you want to commit the changes?", "Yes")
if confirmation:
writeDelta(clusterDelta, configManager, args.force,
- args.nagiosServerIP, args.mode)
+ args.nagiosServerIP, args.mode, args.timeout)
print "Cluster configurations synced successfully from host %s" % \
(args.hostip)
#If Nagios is running then try to restart. Otherwise don't do
diff --git a/plugins/server_utils.py b/plugins/server_utils.py
index b3767bd..46a8cf4 100755
--- a/plugins/server_utils.py
+++ b/plugins/server_utils.py
@@ -1,5 +1,5 @@
#!/usr/bin/python
-# discovery.py Nagios plugin to discover Gluster entities using NRPE
+# server_utils.py Utility methods used by nagios-server-addons module
# Copyright (C) 2014 Red Hat Inc
#
# This program is free software; you can redistribute it and/or
@@ -16,14 +16,18 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
#
+import sys
+import json
import datetime
from pynag import Model
from glusternagios import utils
import submit_external_command
+from constants import NRPE_PATH
serviceCmdPath = utils.CommandPath("service", "/sbin/service", )
+nrpeCmdPath = utils.CommandPath("check_nrpe", NRPE_PATH, )
def restartNagios():
@@ -76,3 +80,40 @@ def getHostGroup(name):
return hostgroup[0]
else:
return None
+
+
+def getNRPEBaseCommand(host, timeout=None):
+ command = NRPE_PATH + " -H " + host
+ if timeout is not None:
+ command += " -t %s" % timeout
+ command += " -c "
+ return command
+
+
+def execNRPECommand(host,
+ command,
+ arguments=None,
+ timeout=None,
+ json_output=True):
+ nrpeCmd = getNRPEBaseCommand(host, timeout).split()
+ nrpeCmd.append(command)
+ if arguments:
+ nrpeCmd.append('-a')
+ nrpeCmd.extend(arguments)
+ (returncode, outputStr, err) = utils.execCmd(nrpeCmd, raw=True)
+ if returncode == 0:
+ if json_output:
+ try:
+ resultDict = json.loads(outputStr)
+ except Exception as e:
+ e.args += (outputStr,)
+ raise
+ return resultDict
+ else:
+ return outputStr
+ else:
+ print "Failed to execute NRPE command '%s' in host '%s' " \
+ "\nError : %s" \
+ "Make sure NPRE server in host '%s' is configured to accept " \
+ "requests from Nagios server" % (command, host, outputStr, host)
+ sys.exit(utils.PluginStatusCode.CRITICAL)