diff options
-rwxr-xr-x | plugins/check_vol_server.py | 37 | ||||
-rwxr-xr-x | plugins/discovery.py | 86 | ||||
-rwxr-xr-x | plugins/server_utils.py | 43 | ||||
-rw-r--r-- | tests/test_discovery.py | 10 |
4 files changed, 106 insertions, 70 deletions
diff --git a/plugins/check_vol_server.py b/plugins/check_vol_server.py index 1c5863a..b512317 100755 --- a/plugins/check_vol_server.py +++ b/plugins/check_vol_server.py @@ -1,6 +1,5 @@ #!/usr/bin/python import sys -import commands import json import random import argparse @@ -8,7 +7,7 @@ import livestatus import os from glusternagios import utils -from constants import NRPE_PATH +import server_utils def _getListHosts(args): @@ -50,17 +49,8 @@ def _getVolGeoRepStatusNRPECommand(args): return ("check_vol_status -a %s %s" % (args.volume, 'geo-rep')) -def _getNRPEBaseCmd(host): - return NRPE_PATH + " -H " + host + " -c " - - -def execNRPECommand(command): - status, output = commands.getstatusoutput(command) - return os.WEXITSTATUS(status), output - - def _getVolumeStatusOutput(args): - status, output = _executeRandomHost(_getVolStatusNRPECommand(args)) + status, output = _executeRandomHost(_getVolStatusNRPECommand(args), args) if status == utils.PluginStatusCode.OK: #Following query will return the output in format [[2,0]] @@ -101,16 +91,23 @@ def _getVolumeQuotaStatusOutput(args): statusoutput.find("QUOTA: OK") > -1): # if ok, don't poll return servicestatus, statusoutput - return _executeRandomHost(_getVolQuotaStatusNRPECommand(args)) + return _executeRandomHost(_getVolQuotaStatusNRPECommand(args), args) -def _executeRandomHost(command): +def execNRPECommand(command): + status, output, err = utils.execCmd(command.split(), raw=True) + return os.WEXITSTATUS(status), output + + +def _executeRandomHost(command, args): list_hosts = _getListHosts(args) host = random.choice(list_hosts) #Get the address of the host host_address = _getHostAddress(host) - status, output = execNRPECommand(_getNRPEBaseCmd(host_address) + command) + status, output = execNRPECommand(server_utils.getNRPEBaseCommand( + host_address, + timeout=args.timeout) + command) if status != utils.PluginStatusCode.UNKNOWN: return status, output @@ -119,8 +116,9 @@ def _executeRandomHost(command): #in the host group and send the command until #the command is successful for host in list_hosts: - status, output = execNRPECommand(_getNRPEBaseCmd(_getHostAddress(host)) - + command) + status, output = execNRPECommand(server_utils.getNRPEBaseCommand( + host, + timeout=args.timeout) + command) if status != utils.PluginStatusCode.UNKNOWN: return status, output return status, output @@ -139,7 +137,7 @@ def showVolumeOutput(args): elif args.option == 'geo-rep': command = _getVolGeoRepStatusNRPECommand(args) - return _executeRandomHost(command) + return _executeRandomHost(command, args) def parse_input(): @@ -174,6 +172,9 @@ def parse_input(): 'quota', 'self-heal', 'geo-rep']) + parser.add_argument('-t', '--timeout', + action='store', + help='NRPE timeout') args = parser.parse_args() if args.critical <= args.warning: print "UNKNOWN:Critical must be greater than Warning." diff --git a/plugins/discovery.py b/plugins/discovery.py index 3e3b0a5..08b2229 100755 --- a/plugins/discovery.py +++ b/plugins/discovery.py @@ -17,7 +17,6 @@ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA # import argparse -import json import datetime import os import shutil @@ -29,7 +28,6 @@ from config_generator import GlusterNagiosConfManager import server_utils import submit_external_command from constants import DEFAULT_AUTO_CONFIG_DIR -from constants import NRPE_PATH from config_generator import CHANGE_MODE_ADD @@ -37,49 +35,25 @@ from config_generator import CHANGE_MODE_REMOVE from config_generator import CHANGE_MODE_UPDATE -nrpeCmdPath = utils.CommandPath("nrpe", NRPE_PATH, ) - - -def execNRPECommand(host, command, arguments=None, jsonOutput=True): - nrpeCmd = [nrpeCmdPath.cmd, "-H", host, "-c", command] - if arguments: - nrpeCmd.append('-a') - nrpeCmd.extend(arguments) - (returncode, outputStr, err) = utils.execCmd(nrpeCmd, raw=True) - if returncode == 0: - if jsonOutput: - try: - #convert to dictionary - resultDict = json.loads(outputStr) - except Exception as e: - e.args += (outputStr,) - raise - return resultDict - else: - return outputStr - else: - print "Failed to execute NRPE command '%s' in host '%s' " \ - "\nError : %s" \ - "Make sure NPRE server in host '%s' is configured to accept " \ - "requests from Nagios server" % (command, host, outputStr, host) - sys.exit(utils.PluginStatusCode.CRITICAL) - - #Discovers volumes info one by one. #First it fetches the volumes list and then it fetches the bricks #details of volume one by one. Its an work around for size limitation issue #in NRPE. -def discoverVolumes(hostip): +def discoverVolumes(hostip, timeout): resultDict = {'volumes': []} - volumeList = execNRPECommand(hostip, "discover_volume_list") + volumeList = server_utils.execNRPECommand(hostip, + "discover_volume_list", + timeout=timeout) for volumeName in volumeList.keys(): - volumeDetail = execNRPECommand(hostip, "discover_volume_info", - [volumeName]) + volumeDetail = server_utils.execNRPECommand(hostip, + "discover_volume_info", + arguments=[volumeName], + timeout=timeout) resultDict['volumes'].append(volumeDetail.get(volumeName)) return resultDict -def discoverCluster(hostip, cluster): +def discoverCluster(hostip, cluster, timeout): """ This method helps to discover the nodes, volumes and bricks in the given gluster. It uses NRPE commands to contact the gluster nodes. @@ -106,15 +80,20 @@ def discoverCluster(hostip, cluster): clusterdata = {} #Discover the logical components - componentlist = discoverVolumes(hostip) + componentlist = discoverVolumes(hostip, timeout) #Discover the peers - hostlist = execNRPECommand(hostip, "discoverpeers") + hostlist = server_utils.execNRPECommand(hostip, + "discoverpeers", + timeout=timeout) #Add the ip address of the root node given by the user to the peer list hostlist[0]['hostip'] = hostip for host in hostlist: #Get host names for all the connected hosts if host['status'] == HostStatus.CONNECTED: - hostDetails = execNRPECommand(host['hostip'], "discoverhostparams") + hostDetails = server_utils.execNRPECommand( + host['hostip'], + "discoverhostparams", + timeout=timeout) host.update(hostDetails) #Get the list of bricks for this host and add to dictionary host['bricks'] = [] @@ -307,6 +286,9 @@ def parse_input(): ' output files will be written') parser.add_argument('-f', '--force', action='store_true', dest='force', help="Force sync the Cluster configuration") + parser.add_argument('-t', '--timeout', action='store', dest='timeout', + type=str, + help="No of secs NRPE should timeout getting details") args = parser.parse_args() return args @@ -348,7 +330,7 @@ def formatTextForMail(text): #Configure the gluster node to send passive check results through NSCA -def configureNodes(clusterDelta, nagiosServerAddress, mode): +def configureNodes(clusterDelta, nagiosServerAddress, mode, timeout): for host in clusterDelta['_hosts']: #Only when a new node is added or whole cluster is added freshly. if (clusterDelta.get('changeMode') == CHANGE_MODE_ADD or @@ -367,10 +349,13 @@ def configureNodes(clusterDelta, nagiosServerAddress, mode): #Configure the nodes. clusterName, Nagios server address and #host_name is passed as an argument to nrpe command #'configure_gluster_node' - execNRPECommand( + server_utils.execNRPECommand( host['address'], 'configure_gluster_node', - [clusterDelta['hostgroup_name'], nagiosServerAddress, - host['host_name']], False) + arguments=[clusterDelta['hostgroup_name'], + nagiosServerAddress, + host['host_name']], + timeout=timeout, + json_output=False) return nagiosServerAddress @@ -390,9 +375,16 @@ def updateNagiosAddressInAutoConfig(clusterHostConfig, nagiosServerAddress): #Write the cluster configurations. If force mode is used then it will clean #the config directory before writing the changes. -def writeDelta(clusterDelta, configManager, force, nagiosServerAddress, mode): - nagiosServerAddress = configureNodes(clusterDelta, nagiosServerAddress, - mode) +def writeDelta(clusterDelta, + configManager, + force, + nagiosServerAddress, + mode, + timeout): + nagiosServerAddress = configureNodes(clusterDelta, + nagiosServerAddress, + mode, + timeout) #Find the cluster host using host group name clusterHostConfig = findHostInList(clusterDelta['_hosts'], clusterDelta['hostgroup_name']) @@ -463,7 +455,7 @@ def getAllNonConnectedHosts(hostList): if __name__ == '__main__': args = parse_input() - clusterdata = discoverCluster(args.hostip, args.cluster) + clusterdata = discoverCluster(args.hostip, args.cluster, args.timeout) configManager = getConfigManager(args) clusterDelta = configManager.generateNagiosConfig(clusterdata) if args.force: @@ -484,7 +476,7 @@ if __name__ == '__main__': "Are you sure, you want to commit the changes?", "Yes") if confirmation: writeDelta(clusterDelta, configManager, args.force, - args.nagiosServerIP, args.mode) + args.nagiosServerIP, args.mode, args.timeout) print "Cluster configurations synced successfully from host %s" % \ (args.hostip) #If Nagios is running then try to restart. Otherwise don't do diff --git a/plugins/server_utils.py b/plugins/server_utils.py index b3767bd..46a8cf4 100755 --- a/plugins/server_utils.py +++ b/plugins/server_utils.py @@ -1,5 +1,5 @@ #!/usr/bin/python -# discovery.py Nagios plugin to discover Gluster entities using NRPE +# server_utils.py Utility methods used by nagios-server-addons module # Copyright (C) 2014 Red Hat Inc # # This program is free software; you can redistribute it and/or @@ -16,14 +16,18 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA # +import sys +import json import datetime from pynag import Model from glusternagios import utils import submit_external_command +from constants import NRPE_PATH serviceCmdPath = utils.CommandPath("service", "/sbin/service", ) +nrpeCmdPath = utils.CommandPath("check_nrpe", NRPE_PATH, ) def restartNagios(): @@ -76,3 +80,40 @@ def getHostGroup(name): return hostgroup[0] else: return None + + +def getNRPEBaseCommand(host, timeout=None): + command = NRPE_PATH + " -H " + host + if timeout is not None: + command += " -t %s" % timeout + command += " -c " + return command + + +def execNRPECommand(host, + command, + arguments=None, + timeout=None, + json_output=True): + nrpeCmd = getNRPEBaseCommand(host, timeout).split() + nrpeCmd.append(command) + if arguments: + nrpeCmd.append('-a') + nrpeCmd.extend(arguments) + (returncode, outputStr, err) = utils.execCmd(nrpeCmd, raw=True) + if returncode == 0: + if json_output: + try: + resultDict = json.loads(outputStr) + except Exception as e: + e.args += (outputStr,) + raise + return resultDict + else: + return outputStr + else: + print "Failed to execute NRPE command '%s' in host '%s' " \ + "\nError : %s" \ + "Make sure NPRE server in host '%s' is configured to accept " \ + "requests from Nagios server" % (command, host, outputStr, host) + sys.exit(utils.PluginStatusCode.CRITICAL) diff --git a/tests/test_discovery.py b/tests/test_discovery.py index 5e03732..a3d1347 100644 --- a/tests/test_discovery.py +++ b/tests/test_discovery.py @@ -16,14 +16,14 @@ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA # -from plugins import discovery +from plugins import discovery, server_utils from glusternagios.glustercli import HostStatus from testrunner import PluginsTestCase as TestCaseBase class TestDiscovery(TestCaseBase): def _mockExecNRPECommand(self, host, command, arguments=None, - jsonOutput=True): + timeout=None): if command == "discover_volume_list": return self._getVolumeNames() elif command == "discover_volume_info": @@ -96,8 +96,10 @@ class TestDiscovery(TestCaseBase): # Method to test the discoverCluster() method def testDiscoverCluster(self): - discovery.execNRPECommand = self._mockExecNRPECommand + server_utils.execNRPECommand = self._mockExecNRPECommand clusterName = "test-cluster" host = "172.16.53.1" - clusterdata = discovery.discoverCluster(host, clusterName) + clusterdata = discovery.discoverCluster(host, + clusterName, + timeout=None) self._verifyClusterData(clusterdata, clusterName, host) |