├── 3ware_raid ├── 3ware_disks_snmp.py └── 3ware_raid_snmp.py ├── LICENSE ├── README.md ├── adaptec_raid └── adaptec_raid.py ├── backuppc ├── backuppc_good_age.py └── hash-to-json.pl ├── barracuda_waf ├── barracuda_waf_attacks.py ├── barracuda_waf_ha.py └── barracuda_waf_system_load.py ├── chef_age └── chef_age.py ├── chef_nodes ├── README.md ├── chef_nodes.sh └── knife_status.rb ├── cisco_ace ├── cisco_ace_cpu.py └── cisco_ace_peer_status.py ├── generic_local ├── README.md └── generic_local.py ├── haproxy ├── haproxy.py └── haproxychecks.py ├── lsi_megariad └── lsi_megaraid.py ├── mod_status ├── README └── mod_status-dump.py ├── mysql_rep └── mysql_rep.sh ├── sonicwall ├── sonicwall_cpu.py └── sonicwall_mem.py ├── sun_hardware └── sun_hw_win.bat ├── tcp_check └── tcp_check.py └── xen_license ├── README.md └── check_xen_license.py /3ware_raid/3ware_disks_snmp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Very quick hack to monitor 3ware RAID cards via SNMP with check_mk 4 | # Tested using Windows 3ware SNMP plugin (3wSnmp.msi from http://kb.lsi.com) 5 | # Hereward Cooper - Sep 2012 6 | 7 | # MIB: TW-RAID-MIB 8 | # .1.3.6.1.4.1.1458.100.22.1.10.X = twRaidDriveStatus.X 9 | # 255 = OK 10 | 11 | 12 | def inventory_3ware_disks_snmp(checkname, info): 13 | inventory = [] 14 | # If 'info' isn't empty, add it to the inventory 15 | if info != []: 16 | inventory.append( (None, None) ) 17 | return inventory 18 | 19 | 20 | def check_3ware_disks_snmp(item, params, info): 21 | 22 | output = "" 23 | retval = 0 24 | 25 | # Check the results and return appropriately 26 | for count, rawcheck in enumerate(info): 27 | check = rawcheck[0] 28 | if check == '255': 29 | output += "Disk %s OK, " % (count) 30 | else: 31 | output += "Disk %s Error (code: %s), " % (count, check) 32 | retval = 2 33 | 34 | if retval == 0: 35 | output = "OK - " + output[:-2] 36 | elif retval == 2: 37 | output = "CRITICAL - " + output[:-2] 38 | return (retval, output) 39 | 40 | 41 | check_info["3ware_disks_snmp"] = (check_3ware_disks_snmp, "3ware RAID Disks", 0, inventory_3ware_disks_snmp) 42 | snmp_info["3ware_disks_snmp"] = ( ".1.3.6.1.4.1.1458.100.22.1", ["10"] ) 43 | -------------------------------------------------------------------------------- /3ware_raid/3ware_raid_snmp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Very quick hack to monitor 3ware RAID cards via SNMP with check_mk 4 | # Tested using Windows 3ware SNMP plugin (3wSnmp.msi from http://kb.lsi.com) 5 | # Hereward Cooper - Sep 2012 6 | 7 | # MIB: TW-RAID-MIB 8 | # .1.3.6.1.4.1.1458.100.22.1.10.X = twRaidDriveStatus.X 9 | # 255 = OK 10 | 11 | 12 | def inventory_3ware_raid_snmp(checkname, info): 13 | inventory = [] 14 | # If 'info' isn't empty, add it to the inventory 15 | if info != []: 16 | inventory.append( (None, None) ) 17 | return inventory 18 | 19 | 20 | def check_3ware_raid_snmp(item, params, info): 21 | 22 | output = "" 23 | retval = 0 24 | 25 | # Check the results and return appropriately 26 | for count, rawcheck in enumerate(info): 27 | check = rawcheck[0] 28 | if check == '0': 29 | output += "Array %s OK, " % (count) 30 | else: 31 | output += "Array %s Error (code: %s), " % (count, check) 32 | retval = 2 33 | 34 | if retval == 0: 35 | output = "OK - " + output[:-2] 36 | elif retval == 2: 37 | output = "CRITICAL - " + output[:-2] 38 | return (retval, output) 39 | 40 | 41 | check_info["3ware_raid_snmp"] = (check_3ware_raid_snmp, "3ware RAID Arrays", 0, inventory_3ware_raid_snmp) 42 | snmp_info["3ware_raid_snmp"] = ( ".1.3.6.1.4.1.1458.100.23.1", ["7"] ) 43 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Hereward Cooper 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | A bunch of check_mk plugins 2 | =========================== 3 | Just read the scripts, the comments explain most things. 4 | 5 | SNMP checks 6 | ----------- 7 | These are installed on the check_mk server, (e.g. /usr/share/check_mk/checks), and if the 8 | specific SNMP OID for that check returns a result it will be added to the inventory. I've had 9 | to write these mainly for proprietary piece of equipment (physical firewalls, raid cards etc). 10 | **Note:** remove the file extension when placing them in the 'checks' directory. 11 | 12 | Local checks 13 | ------------ 14 | These are installed on the client server, (e.g. /usr/lib/check_mk_agent/local). They have 15 | no server-side part, and handle all the logic needed to perform the check themselves. 16 | -------------------------------------------------------------------------------- /adaptec_raid/adaptec_raid.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # check-aacraid.py modified version of check from http://exchange.nagios.org/ 3 | # 4 | # Additions: 5 | # - switch between check_mk and nrpe modes 6 | # - customize the command (so it works on Linux or Windows) 7 | 8 | # Original by: Oliver Hookins, Paul De Audney and Barney Desmond. 9 | # This version by Hereward Cooper 10 | 11 | # Change mode between 'check_mk' and 'nrpe' to reflect monitoring setup 12 | mode = "check_mk" 13 | #mode = "nrpe" 14 | 15 | #command = "/usr/bin/sudo /usr/StorMan/arcconf" 16 | command = '\"E:\Adaptec\Adaptec Storage Manager\\arcconf\"' 17 | 18 | import sys, os, re, string 19 | 20 | c_status_re = re.compile('^\s*Controller Status\s*:\s*(.*)$') 21 | l_status_re = re.compile('^\s*Status of logical device\s*:\s*(.*)$') 22 | l_device_re = re.compile('^Logical device number ([0-9]+).*$') 23 | c_defunct_re = re.compile('^\s*Defunct disk drive count\s:\s*([0-9]+).*$') 24 | c_degraded_re = re.compile('^\s*Logical devices/Failed/Degraded\s*:\s*([0-9]+)/([0-9]+)/([0-9]+).*$') 25 | b_status_re = re.compile('^\s*Status\s*:\s*(.*)$') 26 | b_temp_re = re.compile('^\s*Over temperature\s*:\s*(.*)$') 27 | b_capacity_re = re.compile('\s*Capacity remaining\s*:\s*([0-9]+)\s*percent.*$') 28 | b_time_re = re.compile('\s*Time remaining \(at current draw\)\s*:\s*([0-9]+) days, ([0-9]+) hours, ([0-9]+) minutes.*$') 29 | 30 | cstatus = lstatus = ldevice = cdefunct = cdegraded = bstatus = btemp = bcapacity = btime = "" 31 | lnum = "" 32 | check_status = 0 33 | result = "" 34 | 35 | # Get logical drive status 36 | for line in os.popen4(command + " GETCONFIG 1 LD")[1].readlines(): 37 | # Match the regexs 38 | ldevice = l_device_re.match(line) 39 | if ldevice: 40 | lnum = ldevice.group(1) 41 | continue 42 | 43 | lstatus = l_status_re.match(line) 44 | if lstatus: 45 | if lstatus.group(1) != "Optimal": 46 | check_status = 2 47 | result += "Logical Device " + lnum + " " + lstatus.group(1) + "," 48 | 49 | # Get general card status 50 | for line in os.popen4(command + " GETCONFIG 1 AD")[1].readlines(): 51 | # Match the regexs 52 | cstatus = c_status_re.match(line) 53 | if cstatus: 54 | if cstatus.group(1) != "Optimal": 55 | check_status = 2 56 | result += "Controller " + cstatus.group(1) + "," 57 | continue 58 | 59 | cdefunct = c_defunct_re.match(line) 60 | if cdefunct: 61 | if int(cdefunct.group(1)) > 0: 62 | check_status = 2 63 | result += "Defunct drives " + cdefunct_group(1) + "," 64 | continue 65 | 66 | cdegraded = c_degraded_re.match(line) 67 | if cdegraded: 68 | if int(cdegraded.group(2)) > 0: 69 | check_status = 2 70 | result += "Failed drives " + cdegraded.group(2) + "," 71 | if int(cdegraded.group(3)) > 0: 72 | check_status = 2 73 | result += "Degraded drives " + cdegraded.group(3) + "," 74 | continue 75 | 76 | bstatus = b_status_re.match(line) 77 | if bstatus: 78 | if bstatus.group(1) == "Not Installed": 79 | continue 80 | 81 | if bstatus.group(1) == "Charging": 82 | if check_status < 2: 83 | check_status = 1 84 | elif bstatus.group(1) != "Optimal": 85 | check_status = 2 86 | result += "Battery Status " + bstatus.group(1) + "," 87 | continue 88 | 89 | btemp = b_temp_re.match(line) 90 | if btemp: 91 | if btemp.group(1) != "No": 92 | check_status = 2 93 | result += "Battery Overtemp " + btemp.group(1) + "," 94 | continue 95 | 96 | bcapacity = b_capacity_re.match(line) 97 | if bcapacity: 98 | result += "Battery Capacity " + bcapacity.group(1) + "%," 99 | if bcapacity.group(1) < 50: 100 | if check_status < 2: 101 | check_status = 1 102 | if bcapacity.group(1) < 25: 103 | check_status = 2 104 | continue 105 | 106 | btime = b_time_re.match(line) 107 | if btime: 108 | timemins = int(btime.group(1)) * 1440 + int(btime.group(2)) * 60 + int(btime.group(3)) 109 | if timemins < 1440: 110 | if check_status < 2: 111 | check_status = 1 112 | if timemins < 720: 113 | check_status = 2 114 | result += "Battery Time " 115 | if timemins < 60: 116 | result += str(timemins) + "mins," 117 | else: 118 | result += str(timemins/60) + "hours," 119 | 120 | if result == "": 121 | result = "No output from arcconf!" 122 | check_status = 3 123 | 124 | # strip the trailing "," from the result string. 125 | result = result.rstrip(",") 126 | 127 | if mode == "check_mk": 128 | print check_status, "Adaptec_RAID -", result 129 | elif mode == "nrpe": 130 | print result 131 | 132 | # Delete log once we've finished 133 | try: 134 | cwd = os.getcwd() 135 | fullpath = os.path.join(cwd,'UcliEvt.log') 136 | os.unlink(fullpath) 137 | except: 138 | pass 139 | 140 | if mode == "nrpe": 141 | sys.exit(check_status) 142 | -------------------------------------------------------------------------------- /backuppc/backuppc_good_age.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ''' 4 | Check there's recent good backups for each host in backuppc. 5 | Requires: 6 | backuppc's cli tools (BackupPC_serverMesg) 7 | python's shutil module 8 | hash-to-json script.pl (in this repo, installed at /usr/local/bin) 9 | ''' 10 | 11 | import json 12 | import sh 13 | import time 14 | import os 15 | 16 | os.setuid(114) # Switch to backuppc user 17 | 18 | # Thresholds, in hours. 19 | WARN = 36 20 | CRIT = 72 21 | 22 | now = time.time() 23 | 24 | # Use backuppc's cli tools to get the data we want, then convert the perl hash output 25 | # into json. It's pretty hacky. 26 | srv_msg=sh.Command('/usr/share/backuppc/bin/BackupPC_serverMesg') 27 | hash_to_json=sh.Command('/usr/local/bin/hash-to-json.pl') 28 | j=json.loads(hash_to_json(sh.sed(srv_msg('status hosts'),'s/Got reply: //')).stdout) 29 | 30 | for host, info in j.items(): 31 | # Skip backuppc's housekeeping jobs. 32 | if host in (' admin ', ' admin1 ', ' trashClean '): 33 | continue 34 | 35 | d = now - info["lastGoodBackupTime"] 36 | d_nice = round(d/60/60, 2) 37 | 38 | if d < (60*60*WARN): 39 | print( 40 | f"0 Backup_age_{host} - {host}'s last good backup is {d_nice}h old." 41 | ) 42 | elif (60*60*WARN) < d < (60*60*CRIT): 43 | print( 44 | f"1 Backup_age_{host} - {host}'s last good backup is {d_nice}h old." 45 | ) 46 | else: 47 | print( 48 | f"2 Backup_age_{host} - {host}'s last good backup is {d_nice}h old." 49 | ) 50 | -------------------------------------------------------------------------------- /backuppc/hash-to-json.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # A cli tool I wanted to use outputted a perl hash as a string. 3 | # This reads it in from stdin, and returns json instead. 4 | use JSON; 5 | my @lines = ; 6 | 7 | my %Status; # TODO: Unhard-code this. 8 | eval $lines[0]; #TODO: handle multiple lines? 9 | 10 | my $json = encode_json \%Status; 11 | print $json; 12 | -------------------------------------------------------------------------------- /barracuda_waf/barracuda_waf_attacks.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Monitor the number of attacks a Barracuda WAF is fighting off. 4 | # Herward Cooper - 2012 5 | 6 | # OID .1.3.6.1.4.1.20632.8 is meant to return the number of attacks in the last hour. 7 | # However due to a bug (or wrong documentation) it actually returns the total number 8 | # of attacks. We therefore use check_mk's internal 'get_counter' function to track this 9 | # value, and turn it into a rate which we can monitor. 10 | 11 | barracuda_waf_attacks_default_values = (3, 20) 12 | 13 | def inventory_barracuda_waf_attacks(checkname, info): 14 | inventory=[] 15 | status = int(info[0][0]) 16 | inventory.append( (None, None, "barracuda_waf_attacks_default_values") ) 17 | return inventory 18 | 19 | 20 | def check_barracuda_waf_attacks(item, params, info): 21 | this_time = float(time.time()) 22 | state = int(info[0][0]) 23 | attacks_timedif, attacks_rate = get_counter("barracuda_waf_attacks", this_time, state) 24 | warn, crit = params 25 | perfdata = [ ( "attacks", attacks_rate, warn, crit ) ] 26 | if attacks_rate > crit: 27 | return (2, "CRITICAL - %s attacks per second" % attacks_rate, perfdata) 28 | elif attacks_rate > warn: 29 | return (1, "WARNING - %s attacks per second" % attacks_rate, perfdata) 30 | else: 31 | return (0, "OK - %s attacks per second" % attacks_rate, perfdata) 32 | 33 | check_info["barracuda_waf_attacks"] = (check_barracuda_waf_attacks, "Barracuda WAF Attacks", 1, inventory_barracuda_waf_attacks) 34 | 35 | snmp_info["barracuda_waf_attacks"] = ( ".1.3.6.1.4.1.20632.8", ["4"] ) -------------------------------------------------------------------------------- /barracuda_waf/barracuda_waf_ha.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Monitor the HA status of a pair of Barracuda WAFs via SNMP 4 | # Herward Cooper - 2012 5 | 6 | # When run against each of the members of the pair, a response of 7 | # 'Primary:Active' or 'Backup:Standby' is considered healthy. 8 | 9 | def inventory_barracuda_waf_ha(checkname, info): 10 | inventory=[] 11 | status = info[0][0] 12 | if status: 13 | inventory.append( (None, None) ) 14 | return inventory 15 | 16 | 17 | def check_barracuda_waf_ha(item, params, info): 18 | state = info[0][0] 19 | if state == "Primary:Active" or state == "Backup:Standby": 20 | return (0, "OK - Currently %s" % state) 21 | else: 22 | return (2, "CRITICAL - Currently %s" % state) 23 | return (3, "UNKNOWN - unhandled problem") 24 | 25 | check_info["barracuda_waf_ha"] = (check_barracuda_waf_ha, "Barracuda WAF HA Status", 0, inventory_barracuda_waf_ha) 26 | 27 | snmp_info["barracuda_waf_ha"] = ( ".1.3.6.1.4.1.20632.8", [ "14" ] ) -------------------------------------------------------------------------------- /barracuda_waf/barracuda_waf_system_load.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Monitor the system load of a Barracuda WAFs via SNMP 4 | # Herward Cooper - 2012 5 | 6 | # OID .1.3.6.1.4.1.20632.8.8 7 | 8 | barracuda_waf_system_load_default_values = (50, 75) 9 | 10 | def inventory_barracuda_waf_system_load(checkname, info): 11 | inventory=[] 12 | status = int(info[0][0]) 13 | inventory.append( (None, None, "barracuda_waf_system_load_default_values") ) 14 | return inventory 15 | 16 | 17 | def check_barracuda_waf_system_load(item, params, info): 18 | warn, crit = params 19 | state = int(info[0][0]) 20 | perfdata = [ ( "load", state, warn, crit ) ] 21 | if state > crit: 22 | return (2, "CRITICAL - Load %s percent" % state, perfdata) 23 | elif state > warn: 24 | return (1, "WARNING - Load %s percent" % state, perfdata) 25 | else: 26 | return (0, "OK - Load %s percent" % state, perfdata) 27 | 28 | check_info["barracuda_waf_system_load"] = (check_barracuda_waf_system_load, "Barracuda WAF System Load", 1, inventory_barracuda_waf_system_load) 29 | 30 | snmp_info["barracuda_waf_system_load"] = ( ".1.3.6.1.4.1.20632.8", ["8"] ) -------------------------------------------------------------------------------- /chef_age/chef_age.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ''' 4 | Some hosts run chef via cron like this: 5 | /usr/bin/chef-solo && touch /run/chef.last_success 6 | This check monitors the age of the last_success file. 7 | ''' 8 | 9 | import os 10 | import time 11 | 12 | # Thresholds, in hours. 13 | WARN = 3 14 | CRIT = 24 15 | 16 | try: 17 | s = os.stat('/run/chef.last_success') 18 | d = time.time() - s.st_mtime 19 | d_nice = round(d/60/60, 2) 20 | 21 | if d < (60*60*WARN): 22 | print(f"0 Chef_success_age - chef successfully ran {d_nice} hours ago.") 23 | elif (60*60*WARN) < d < (60*60*CRIT): 24 | print(f"1 Chef_success_age - chef hasn't run successfully for {d_nice} hours.") 25 | else: 26 | print(f"2 Chef_success_age - chef hasn't run successfully for {d_nice} hours.") 27 | except: 28 | print(f"3 Chef_success_age - UNKNOWN failure in calculating chef age.") -------------------------------------------------------------------------------- /chef_nodes/README.md: -------------------------------------------------------------------------------- 1 | This is a local check_mk script for monitoring the last time chef nodes (i.e. clients) 2 | checked-in with a chef server. A shell script wrapper executes a `knife` script to produce 3 | data about each chef node. Alarm thresholds are configured in `knife_status.rb`. 4 | 5 | Place `chef_nodes.sh` in your check_mk local directory on the machine with knife setup, and amend the paths. 6 | Place `knife_status.rb` somewhere convient e.g. `$KNIFE_DIR/scripts/` 7 | 8 | Example output: 9 | 10 | ``` 11 | ~$ telnet localhost 6556 12 | [...] 13 | <<>> 14 | 0 chef_node-mailserver age=871 OK - [base,cron-delvalidate,check-mk-agent,devenv,ssh_authorized_keys::internal,qemu-guest] 15 | 0 chef_node-webserver1 age=32 OK - [base,cron-delvalidate,check-mk-agent,devenv,ssh_authorized_keys::internal,qemu-guest] 16 | 0 chef_node-webserver2 age=841 OK - [base,cron-delvalidate,check-mk-agent,devenv,ssh_authorized_keys::internal,qemu-guest] 17 | 0 chef_node-proxyserver age=271 OK - [base,cron-delvalidate,check-mk-agent,devenv,ssh_authorized_keys::internal,qemu-guest] 18 | 0 chef_node-smtpserver age=3271 CRITICAL - [base,cron-delvalidate,check-mk-agent,devenv,ssh_authorized_keys::internal,qemu-guest] 19 | [...] 20 | ``` 21 | -------------------------------------------------------------------------------- /chef_nodes/chef_nodes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | export HOME=/home/hcooper # needed to keep knife happy 3 | cd /home/hcooper/chef-repo # your kniife directory 4 | /usr/bin/knife exec scripts/knife_status.rb 5 | -------------------------------------------------------------------------------- /chef_nodes/knife_status.rb: -------------------------------------------------------------------------------- 1 | # This is run from your knife folder, e.g. "knife exec scripts/knife_status.rb" 2 | stats=Array.new 3 | now = Time.now.to_i 4 | criticaloffset = 7200 5 | warningoffset = 3600 6 | 7 | nodes.all do |thisnode| 8 | checkintime=Time.at(thisnode['ohai_time']).to_i 9 | recipes = thisnode.run_list.expand(thisnode.chef_environment).recipes.join(",") 10 | 11 | if checkintime + criticaloffset < now then 12 | print "2 chef_node-%s age=%s CRITICAL - [%s]\n" % [thisnode.name, now - checkintime, recipes] 13 | 14 | elsif checkintime + warningoffset < now then 15 | print "1 chef_node-%s age=%s WARNING - [%s]\n" % [thisnode.name, now - checkintime, recipes] 16 | 17 | else 18 | print "0 chef_node-%s age=%s OK - [%s]\n" % [thisnode.name, now - checkintime, recipes] 19 | 20 | end 21 | end 22 | -------------------------------------------------------------------------------- /cisco_ace/cisco_ace_cpu.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Monitoring the CPU usage of a Cisco ACE load balancer 4 | # Herward Cooper - 2012 5 | 6 | # Uses OID .1.3.6.1.4.1.9.9.109.1.1.1.1.8.1 7 | 8 | cisco_ace_cpu_default_values = (35, 40) 9 | 10 | def inventory_cisco_ace_cpu(checkname, info): 11 | inventory=[] 12 | status = int(info[0][0]) 13 | if status < 11: 14 | inventory.append( (None, None, "cisco_ace_cpu_default_values") ) 15 | return inventory 16 | 17 | 18 | def check_cisco_ace_cpu(item, params, info): 19 | warn, crit = params 20 | state = int(info[0][0]) 21 | perfdata = [ ( "cpu", state, warn, crit ) ] 22 | if state > crit: 23 | return (2, "CRITICAL - CPU is %s percent" % state, perfdata) 24 | elif state > warn: 25 | return (1, "WARNING - CPU is %s percent" % state, perfdata) 26 | else: 27 | return (0, "OK - CPU is %s percent" % state, perfdata) 28 | 29 | check_info["cisco_ace_cpu"] = (check_cisco_ace_cpu, "Cisco ACE CPU 5min Avg", 1, inventory_cisco_ace_cpu) 30 | 31 | snmp_info["cisco_ace_cpu"] = ( ".1.3.6.1.4.1.9.9.109.1.1.1.1.8", [ "1" ] ) -------------------------------------------------------------------------------- /cisco_ace/cisco_ace_peer_status.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Monitoring the HA status of a pair of Cisco ACE load balancers. 4 | # Herward Cooper - 2012 5 | 6 | # We take the response 'OK - Peer is compatible' to mean HA is healthy 7 | # Used OID .1.3.6.1.4.1.9.9.650.1.1.2.1.1.1 8 | 9 | def inventory_cisco_ace_peer_status(checkname, info): 10 | inventory=[] 11 | status = int(info[0][0]) 12 | if status < 11: 13 | inventory.append( (None, None) ) 14 | return inventory 15 | 16 | 17 | def check_cisco_ace_peer_status(item, params, info): 18 | state = int(info[0][0]) 19 | if state == 8: 20 | return (0, "OK - Peer is compatible") 21 | else: 22 | return (2, "CRITICAL - Peer is not compatible!") 23 | return (3, "UNKNOWN - unhandled problem") 24 | 25 | check_info["cisco_ace_peer_status"] = (check_cisco_ace_peer_status, "Cisco ACE Peer Status", 0, inventory_cisco_ace_peer_status) 26 | 27 | snmp_info["cisco_ace_peer_status"] = ( ".1.3.6.1.4.1.9.9.650.1.1.2.1.1", [ "1" ] ) -------------------------------------------------------------------------------- /generic_local/README.md: -------------------------------------------------------------------------------- 1 | # generic_local check_mk plugin 2 | 3 | ## Overview 4 | The generic_local plugin for check_mk provides an easy way to monitor the status 5 | of a service without having to script a custom plugin from scratch. 6 | 7 | Originally started to monitoring any given values reported by "SHOW STATUS" in 8 | MySQL, is was updated to be generic and allow monitoring of any service 9 | which can present it's stats in a two column format. 10 | 11 | For more information on check_mk local plugins, and the format check_mk expects 12 | have a look at the author's [website](http://mathias-kettner.de/checkmk_localchecks.html). 13 | 14 | ## Installation 15 | The plugin is designed to be a *local* plugin, which doesn't require any 16 | configuration on the server side. This means changes to thresholds are done 17 | locally too. Just place the script (set as executable) in the check_mk_agent local 18 | directory (/usr/lib/check_mk_agent/local on Debian). 19 | 20 | ## Testing 21 | As soon as the script is in place it's results should be included in the output 22 | of check_mk. Test this simply with "*telnet localhost 6556*". 23 | 24 | To makes the checks live, on the nagios server run "*check_mk -I hostname*", 25 | followed by a nagios reload "*check_mk -O*". 26 | 27 | ## Example Configurations 28 | ### Varnish 29 | 30 | ``` 31 | status_command="/usr/bin/varnishstat -1" 32 | prefix = "Varnish_" 33 | checks = [ 34 | ( "backend_fail", 10, 100), 35 | ( "client_conn", 40, 100) 36 | ] 37 | ``` 38 | 39 | ####Example output: 40 | 41 | ``` 42 | 2 Varnish_client_conn - CRITICAL: Varnish_client_conn 4630 43 | 1 Varnish_backend_fail - WARNING: Varnish_backend_fail 18 44 | ``` 45 | 46 | ### MySQL 47 | 48 | ``` 49 | status_command="/usr/bin/mysql -e 'SHOW STATUS'" 50 | prefix = "MySQL_" 51 | checks = [ 52 | ( "Qcache_lowmem_prunes", 10, 100), 53 | ( "Max_used_connections", 25, 30), 54 | ( "Threads_connected", 5, 10), 55 | ( "Open_files", 512, 1024), 56 | ( "Open_tables", 256, 512), 57 | ( "Slow_queries", 10, 100) 58 | ] 59 | ``` 60 | 61 | ####Example output: 62 | 63 | ``` 64 | 0 MySQL_Max_used_connections - OK: MySQL_Max_used_connections 20 65 | 0 MySQL_Open_files - OK: MySQL_Open_files 445 66 | 0 MySQL_Open_tables - OK: MySQL_Open_tables 256 67 | 0 MySQL_Qcache_lowmem_prunes - OK: MySQL_Qcache_lowmem_prunes 0 68 | 0 MySQL_Slow_queries - OK: MySQL_Slow_queries 0 69 | 0 MySQL_Threads_connected - OK: MySQL_Threads_connected 1 70 | ``` 71 | -------------------------------------------------------------------------------- /generic_local/generic_local.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This is a local plugin for check_mk, suitable for monitoring 4 | # any service variables which are presented in a list. Currently 5 | # sucessfully tested with MySQL and Varnish. 6 | 7 | # Hereward Cooper 8 | # https://github.com/hcooper/check_mk-plugins 9 | 10 | #------------------------------------------------------------ 11 | # CONFIGURATION 12 | #------------------------------------------------------------ 13 | 14 | # What's the command to run to get our raw data? 15 | #status_command="/usr/bin/varnishstat -1" 16 | status_command="mysql -e 'show status'" 17 | #status_command="mysqladmin status | sed 's/ /\\n/g' | sed 's/ /_/g' | sed 's/:_/ /g'" 18 | 19 | 20 | # What's a human-friendly prefix to name our checks? 21 | #prefix = "Varnish_" 22 | prefix = "MySQL_" 23 | 24 | # What variables shall we actually check for? Format: 25 | # ("variable name", warning threshold, critical threshold) 26 | # Set the warn and crit to None and it will never alert, only get graphed 27 | checks = [ 28 | # ( "Queries_per_second_avg", 1, 5), 29 | ( "Qcache_lowmem_prunes", 10, 100), 30 | ( "Max_used_connections", 40, 60), 31 | ( "Threads_connected", None, None), 32 | ( "Open_files", 512, 1024), 33 | ( "Open_tables", 256, 512), 34 | ( "Slow_queries", 10, 100) 35 | ] 36 | 37 | #------------------------------------------------------------ 38 | # PREPARE FOR BATTLE 39 | #------------------------------------------------------------ 40 | 41 | import sys 42 | import os 43 | 44 | # Run the command to retrieve the raw data 45 | status = os.popen(status_command).read() 46 | 47 | 48 | #------------------------------------------------------------ 49 | # DEBUG 50 | #------------------------------------------------------------ 51 | 52 | # To save commenting and uncommenting each time, just call this debug function 53 | def debug(): 54 | print "-----------------------" 55 | print "DEBUG: Parsed Variables" 56 | print "-----------------------" 57 | for line in status.split('\n'): 58 | try: 59 | sys.stdout.write("VAR: " + line.split()[0]) 60 | except: 61 | continue 62 | try: 63 | print " - " + line.split()[1] 64 | except: 65 | continue 66 | 67 | print "----------------" 68 | print "DEBUG: My Checks" 69 | print "----------------" 70 | for check,warn,crit in checks: 71 | print check, warn, crit 72 | 73 | #------------------------------------------------------------ 74 | # THE MAGIC 75 | #------------------------------------------------------------ 76 | 77 | # Function to output the check result in check_mk format 78 | def output(state,chkname,text,value,warn,crit): 79 | if warn == None and crit == None: 80 | # Don't print the warn/crit values if they are set to None 81 | print "%s %s %s=%s %s - %s %s" % (state,chkname,chkname,value,text,chkname,value) 82 | else: 83 | print "%s %s %s=%s;%s;%s %s - %s %s" % (state,chkname,chkname,value,warn,crit,text,chkname,value) 84 | 85 | # Read through each line in the output of SHOW STATUS 86 | def run_checks(): 87 | for line in status.splitlines(): 88 | 89 | # Sometimes we have a mare reading the first and last lines. Skip if needed. 90 | try: 91 | var = line.split()[0] 92 | except: 93 | continue 94 | 95 | # Some variables don't actually have a value set. Handle this (skip for now). 96 | try: 97 | var_value = float(line.split()[1]) 98 | except: 99 | continue 100 | 101 | # Read through each of our configured variables to check 102 | for check,warn,crit in checks: 103 | 104 | # Is the variable on the list to check? 105 | if var == check: 106 | 107 | # Merge the prefix and variable name now so we don't have to keep doing it. 108 | chkname = prefix + var 109 | 110 | # If the crit and warn are None, we're not alerting, just graphing 111 | if warn == None and crit == None: 112 | output(0,chkname,"OK",var_value,warn,crit) 113 | 114 | # Otherwise do normal alerting 115 | elif var_value > crit: 116 | output(2,chkname,"CRITICAL",var_value,warn,crit) 117 | elif var_value > warn: 118 | output(1,chkname,"WARNING",var_value,warn,crit) 119 | else: 120 | output(0,chkname,"OK",var_value,warn,crit) 121 | 122 | # Actually do some work! 123 | run_checks() 124 | #debug() 125 | -------------------------------------------------------------------------------- /haproxy/haproxy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # A check_mk plugin to monitor the status of an HAProxy server. 4 | # Hereward Cooper 5 | 6 | # Requirements: 7 | # - HAproxy socket 8 | # (config: "stats socket /var/run/haproxy.socket") 9 | 10 | import os 11 | import re 12 | import sys 13 | from io import StringIO 14 | import socket 15 | from time import time 16 | 17 | __version__ = "0.2" 18 | __author__ = "Hereward Cooper " 19 | __website__ = "http://github.com/hcooper/haproxy-tools/" 20 | 21 | 22 | def build_array(rawstats): 23 | """ Convert the raw stats into nested arrays. Much nicer to use. 24 | This functions creates an array, with each element being a dictonary of checks for each server 25 | e.g. servers = [ {pxname: app1, rate: 15...}, {pxname: app2, rate: 7...} ] """ 26 | 27 | stats=[] 28 | 29 | for line in rawstats.split('\n'): 30 | 31 | if re.match(r'^\s*$', line): # skip empty lines 32 | continue 33 | 34 | values = line.split(',') 35 | 36 | if re.match(r'^#', line): # first line contains the header names 37 | titles = values 38 | titles[0] = titles[0][2:] # remove the '# ' from the first element 39 | continue 40 | 41 | stats.append(dict(zip(titles,values))) # create the dict containing our results 42 | 43 | return stats 44 | 45 | 46 | def run_checks(servers): 47 | """ Interate through each server, and then each defined check, and compare the values to 48 | the critical/warning levels, then alert if need be. """ 49 | 50 | for server in servers: 51 | server['fullname'] = server['pxname'] + "/" + server['svname'] # Combine 'svname' and 'pxname' to get a unique name 52 | 53 | # Define some variables before use 54 | result="" # The complete set of results for each server's checks 55 | allperf="" # The complete set of all performance data for a server 56 | alert_warn=False # Flag set if check makes a server WARN 57 | alert_crit=False # " " " " " " " " CRIT 58 | 59 | for check,warn,crit in checks: 60 | output="" 61 | perfdata="" 62 | 63 | # If the value we're looking for isn't present, skip it. (e.g. FRONTEND doesn't have chkfail) 64 | if not server[check]: 65 | continue 66 | 67 | # Special check for the "status" field as it's not a numeric value 68 | if check == "status": 69 | if server['status'] == "UP": 70 | output += "status UP" 71 | elif server['status'] == "DOWN": 72 | output += "status DOWN" 73 | alert_crit = True 74 | elif server['status'] == "OPEN": 75 | output += "status OPEN" 76 | # Add more status options here 77 | 78 | # Generic check for the other fields which are numeric 79 | # Make sure int() is used when needed! 80 | else: 81 | if int(server[check]) >= int(warn) and int(server[check]) < int(crit): 82 | output += check + " WARN " + server[check] + ", " 83 | alert_warn = True 84 | if int(server[check]) >= int(crit): 85 | output += check + " CRIT " + server[check] + ", " 86 | alert_crit = True 87 | #if server[check] < warn: # Disabled so OK doesn't give out stats 88 | #output += "| " + check + " OK " + server[check] 89 | 90 | perfdata += check + "=" + server[check] + ";" + warn + ";" + crit 91 | 92 | # Build the output performance data, putting | in the right places 93 | if allperf == "": 94 | allperf = perfdata 95 | elif perfdata != "": 96 | allperf += "|" + perfdata 97 | 98 | # Build the check output 99 | result += output 100 | 101 | # If any of our checks have set the crit/warn flags, act on it 102 | if alert_crit: 103 | print ("2 HAProxy_%s %s CRITICAL - [%s]" % (server['fullname'], allperf, result)) 104 | elif alert_warn: 105 | print ("1 HAProxy_%s %s WARNING - [%s]" % (server['fullname'], allperf, result)) 106 | else: 107 | print ("0 HAProxy_%s %s OK - [%s]" % (server['fullname'], allperf, result)) 108 | 109 | 110 | class HAProxyStats(object): 111 | """ Used for communicating with HAProxy through its local UNIX socket interface""" 112 | 113 | def __init__(self, socket_name=None): 114 | self.socket_name = socket_name 115 | 116 | def getstats(self, timeout=200): 117 | """ Executes a HAProxy command by sending a message to a HAProxy's local 118 | UNIX socket and waiting up to 'timeout' milliseconds for the response """ 119 | 120 | buff = StringIO() 121 | end = time() + timeout 122 | 123 | client = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) 124 | 125 | try: 126 | client.connect(self.socket_name) 127 | client.send(('show stat' + '\n').encode()) 128 | 129 | while time() <= end: 130 | databyte = client.recv(4096) 131 | data = databyte.decode() 132 | if data: 133 | buff.write(data) 134 | else: 135 | return build_array(buff.getvalue()) 136 | except: 137 | print ("Failed to retrieve stats") 138 | sys.exit(1) 139 | finally: 140 | client.close() 141 | 142 | 143 | 144 | if __name__ == "__main__": 145 | 146 | socketfile = "/var/run/haproxy.socket" 147 | 148 | if not os.path.exists(socketfile): 149 | print ("Socket does not exist") 150 | sys.exit(1) 151 | 152 | statssocket = HAProxyStats(socketfile) 153 | stats = statssocket.getstats() 154 | 155 | from haproxychecks import checks 156 | 157 | run_checks(stats) 158 | -------------------------------------------------------------------------------- /haproxy/haproxychecks.py: -------------------------------------------------------------------------------- 1 | # Example checks for the HAProxy plugin 2 | checks = [ 3 | #fieldname, warning, critical 4 | ['scur', '250', '500'], 5 | ['chkfail', '15', '25'], 6 | ['status', '', ''] # status stays at the end, just for formatting purposes 7 | ] 8 | 9 | if __name__ == "__main__": 10 | print ("This file is not meant to be called directly") 11 | -------------------------------------------------------------------------------- /lsi_megariad/lsi_megaraid.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Quick hack to monitoring LSI MegaRaid via SNMP with check_mk 4 | # Tested using Windows LSI tools 5 | # Hereward Cooper - Sep 2012 6 | 7 | # Currently checks 4 values, with a base OID of 1.3.6.1.4.1.3582.4.1.4.1.2.1 8 | # .19 = vdDegradedCount 9 | # .20 = vdOfflineCount 10 | # .24 = pdDiskFailedCount 11 | # .23 = pdDiskPredFailureCount 12 | 13 | def inventory_lsi_megaraid(checkname, info): 14 | inventory = [] 15 | # If 'info' isn't empty, add it to the inventory 16 | if info != []: 17 | inventory.append( (None, None) ) 18 | return inventory 19 | 20 | 21 | def check_lsi_megaraid(item, params, info): 22 | 23 | # The 'nice' names for our checks 24 | checks = ['vdDegradedCount', 'vdOfflineCount', 'pdDiskFailedCount', 'pdDiskPredFailureCount'] 25 | 26 | # Make a dictonary of the check name and the result 27 | results = dict(zip(checks, info[0])) 28 | 29 | # Check the results and return appropriately 30 | for check in results: 31 | if results[check] == '0': 32 | continue 33 | else: 34 | return (2, "CRITICAL - %s: %s" % (check, results[check])) 35 | 36 | # If we haven't returned with an error so far, return OK now 37 | return (0, "OK - No reported errors") 38 | 39 | 40 | check_info["lsi_megaraid"] = (check_lsi_megaraid, "LSI MegaRAID", 0, inventory_lsi_megaraid) 41 | snmp_info["lsi_megaraid"] = ( ".1.3.6.1.4.1.3582.4.1.4.1.2.1", ["19", "20", "24", "23"] ) 42 | -------------------------------------------------------------------------------- /mod_status/README: -------------------------------------------------------------------------------- 1 | This script is designed to dump apache's mod_status output into a format which the 2 | generic_local check can work with: 3 | 4 | https://github.com/hcooper/check_mk-plugins/tree/master/generic_local 5 | -------------------------------------------------------------------------------- /mod_status/mod_status-dump.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ Fetch Apache stats via mod_status 4 | By Hereward Cooper 5 | (modified from Zabbix code by Paulson McIntyre) 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | """ 20 | 21 | import urllib 22 | from optparse import OptionParser 23 | import os 24 | from tempfile import mkstemp 25 | import StringIO 26 | import csv 27 | 28 | 29 | def fetchURL(url, user = None, passwd = None): 30 | """ Return the data from a URL """ 31 | if user and passwd: 32 | parts = url.split('://') 33 | url = parts[0] + "://" + user + ":" + passwd + "@" + parts[1] 34 | 35 | conn = urllib.urlopen(url) 36 | try: 37 | data = conn.read() 38 | finally: 39 | conn.close() 40 | return data 41 | 42 | def clean(string, chars): 43 | for i in chars: 44 | string = string.replace(i, '') 45 | return string 46 | 47 | def parse(data): 48 | """ Parse the CSV file into a dict of data 49 | """ 50 | mapping = { 51 | "_":"Waiting for Connection", 52 | "S":"Starting up", 53 | "R":"Reading Request", 54 | "W":"Sending Reply", 55 | "K":"Keepalive (read)", 56 | "D":"DNS Lookup", 57 | "C":"Closing connection", 58 | "L":"Logging", 59 | "G":"Gracefully finishing", 60 | "I":"Idle cleanup of worker", 61 | ".":"Open slot with no current process", 62 | } 63 | # Clean out certian chars 64 | replace = '() ' 65 | csvobj = csv.reader(StringIO.StringIO(data), delimiter = ":", skipinitialspace = True) 66 | ret = {} 67 | for (key, val) in csvobj: 68 | if key == 'Scoreboard': 69 | sb = { 70 | "Waiting for Connection":0, 71 | "Starting up":0, 72 | "Reading Request":0, 73 | "Sending Reply":0, 74 | "Keepalive (read)":0, 75 | "DNS Lookup":0, 76 | "Closing connection":0, 77 | "Logging":0, 78 | "Gracefully finishing":0, 79 | "Idle cleanup of worker":0, 80 | "Open slot with no current process":0, 81 | } 82 | for i in val: 83 | sb[mapping[i]] += 1 84 | ret[key] = sb 85 | else: 86 | ret[key] = val 87 | ret2 = {} 88 | for (key, val) in ret.items(): 89 | if key == "Scoreboard": 90 | for (key, val) in val.items(): 91 | ret2[clean(key, replace)] = val 92 | else: 93 | ret2[clean(key, replace)] = val 94 | 95 | return ret2 96 | 97 | if __name__ == "__main__": 98 | parser = OptionParser( 99 | usage = "%prog [-o ]", 100 | version = "%prog $Revision$", 101 | prog = "mod_status-dump", 102 | description = """This program gathers data from Apache's built-in status page 103 | and dumps them into a simple list. 104 | """, 105 | ) 106 | parser.add_option( 107 | "-l", 108 | "--url", 109 | action = "store", 110 | type = "string", 111 | dest = "url", 112 | default = None, 113 | help = "Override the automatically generated URL with one of your own", 114 | ) 115 | parser.add_option( 116 | "-o", 117 | "--host", 118 | action = "store", 119 | type = "string", 120 | dest = "host", 121 | default = "localhost", 122 | help = "Host to connect to. [default: %default]", 123 | ) 124 | parser.add_option( 125 | "-p", 126 | "--port", 127 | action = "store", 128 | type = "int", 129 | dest = "port", 130 | default = 80, 131 | help = "Port to connect on. [default: %default]", 132 | ) 133 | parser.add_option( 134 | "-r", 135 | "--proto", 136 | action = "store", 137 | type = "string", 138 | dest = "proto", 139 | default = "http", 140 | help = "Protocol to connect on. Can be http or https. [default: %default]", 141 | ) 142 | parser.add_option( 143 | "-u", 144 | "--user", 145 | action = "store", 146 | type = "string", 147 | dest = "user", 148 | default = None, 149 | help = "HTTP authentication user to use when connection. [default: None]", 150 | ) 151 | parser.add_option( 152 | "-a", 153 | "--passwd", 154 | action = "store", 155 | type = "string", 156 | dest = "passwd", 157 | default = None, 158 | help = "HTTP authentication password to use when connecting. [default: None]", 159 | ) 160 | (opts, args) = parser.parse_args() 161 | 162 | if opts.url and (opts.port != 80 or opts.proto != "http"): 163 | parser.error("Can't specify -u with -p or -r") 164 | 165 | if not opts.url: 166 | opts.url = "%s://%s:%s/server-status?auto" % (opts.proto, opts.host, opts.port) 167 | 168 | data = fetchURL(opts.url, user = opts.user, passwd = opts.passwd) 169 | 170 | 171 | try: 172 | data = parse(data = data) 173 | except csv.Error: 174 | parser.error("Error parsing returned data") 175 | 176 | 177 | try: 178 | for key, val in data.items(): 179 | print "%s %s" % (key, val) 180 | except: 181 | parser.error("Error printing values") -------------------------------------------------------------------------------- /mysql_rep/mysql_rep.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # A mysql replication plugin for the check_mk nagios system. 4 | # Place me in /usr/lib/check_mk_agent/local on the client 5 | # Hereward Cooper - 16/06/11 6 | 7 | MYSQL_USER="user" 8 | MYSQL_PASS="Pa$$word" 9 | 10 | # Anything below DELAY_WARNING is fine. Anything between 11 | # DELAY_WARNING and DELAY_CRITICAL is throw up a warning. Anything 12 | # above DELAY_CRITICAL will trigger a critical alert. 13 | DELAY_WARNING=60 14 | DELAY_CRITICAL=360 15 | 16 | # Graph details 17 | MIN=0 18 | MAX=400 19 | 20 | 21 | MYSQL_STATUS=`mysql -u$MYSQL_USER -p$MYSQL_PASS -e "SHOW SLAVE STATUS\G" | egrep 'Slave_.*_Running|Seconds_Behind_Master' | sed 's/^ *//'` 22 | 23 | echo "$MYSQL_STATUS" | sed -n '1p' | grep -q Yes && IO=1 || IO=0 24 | echo "$MYSQL_STATUS" | sed -n '2p' | grep -q Yes && SQL=1 || SQL=0 25 | DELAY=`echo "$MYSQL_STATUS" | sed -n '3p' | cut -d " " -f 2` 26 | 27 | ## Check Slave_IO_Running status 28 | if [ $IO = "1" ]; then 29 | echo "0 MySQL_Rep_IO - OK - Replication IO Running" 30 | else 31 | echo "2 MySQL_Rep_IO - CRITICAL - Repication IO Stopped" 32 | fi 33 | 34 | ## Check Slave_SQL_Running status 35 | if [ $SQL = "1" ]; then 36 | echo "0 MySQL_Rep_SQL - OK - Replication SQL Running" 37 | else 38 | echo "2 MySQL_Rep_SQL - CRITICAL - Replication SQL Stopped" 39 | fi 40 | 41 | ## Check Seconds_Behind_Master value 42 | if [ $DELAY = "NULL" ]; then 43 | echo "2 MySQL_Rep_Delay delay=$DELAY;$DELAY_WARNING;$DELAY_CRITICAL;$MIN;$MAX CRITICAL - Replication delay NULL" 44 | elif [ $DELAY -lt $DELAY_WARNING ]; then 45 | echo "0 MySQL_Rep_Delay delay=$DELAY;$DELAY_WARNING;$DELAY_CRITICAL;$MIN;$MAX OK - Replication delay $DELAY seconds" 46 | elif [ $DELAY -lt $DELAY_CRITICAL ]; then 47 | echo "1 MySQL_Rep_Delay delay=$DELAY;$DELAY_WARNING;$DELAY_CRITICAL;$MIN;$MAX WARNING - Replication delay $DELAY seconds" 48 | elif [ $DELAY -ge $DELAY_CRITICAL ]; then 49 | echo "2 MySQL_Rep_Delay delay=$DELAY;$DELAY_WARNING;$DELAY_CRITICAL;$MIN;$MAX CRITICAL - Replication delay $DELAY seconds" 50 | fi 51 | -------------------------------------------------------------------------------- /sonicwall/sonicwall_cpu.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Monitoring the CPU usage of a Sonicwall 4 | # Herward Cooper - 2012 5 | 6 | # Uses OID .1.3.6.1.4.1.8741.1.3.1.3.0 7 | 8 | sonicwall_cpu_default_values = (35, 40) 9 | 10 | def inventory_sonicwall_cpu(checkname, info): 11 | inventory=[] 12 | inventory.append( (None, None, "sonicwall_cpu_default_values") ) 13 | return inventory 14 | 15 | 16 | def check_sonicwall_cpu(item, params, info): 17 | warn, crit = params 18 | state = int(info[0][0]) 19 | perfdata = [ ( "cpu", state, warn, crit ) ] 20 | if state > crit: 21 | return (2, "CRITICAL - CPU is %s percent" % state, perfdata) 22 | elif state > warn: 23 | return (1, "WARNING - CPU is %s percent" % state, perfdata) 24 | else: 25 | return (0, "OK - CPU is %s percent" % state, perfdata) 26 | 27 | check_info["sonicwall_cpu"] = (check_sonicwall_cpu, "Sonicwall CPU", 1, inventory_sonicwall_cpu) 28 | 29 | snmp_info["sonicwall_cpu"] = ( ".1.3.6.1.4.1.8741.1.3.1.3", [ "0" ] ) 30 | -------------------------------------------------------------------------------- /sonicwall/sonicwall_mem.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Monitoring the Mem usage of a Sonicwall 4 | # Herward Cooper - 2012 5 | 6 | # Uses OID 1.3.6.1.4.1.8741.1.3.1.4.0 7 | 8 | sonicwall_mem_default_values = (35, 40) 9 | 10 | def inventory_sonicwall_mem(checkname, info): 11 | inventory=[] 12 | inventory.append( (None, None, "sonicwall_mem_default_values") ) 13 | return inventory 14 | 15 | 16 | def check_sonicwall_mem(item, params, info): 17 | warn, crit = params 18 | state = int(info[0][0]) 19 | perfdata = [ ( "cpu", state, warn, crit ) ] 20 | if state > crit: 21 | return (2, "CRITICAL - Mem is %s percent" % state, perfdata) 22 | elif state > warn: 23 | return (1, "WARNING - Mem is %s percent" % state, perfdata) 24 | else: 25 | return (0, "OK - Mem is %s percent" % state, perfdata) 26 | 27 | check_info["sonicwall_mem"] = (check_sonicwall_mem, "Sonicwall Mem", 1, inventory_sonicwall_mem) 28 | 29 | snmp_info["sonicwall_mem"] = ( ".1.3.6.1.4.1.8741.1.3.1.4", [ "0" ] ) 30 | -------------------------------------------------------------------------------- /sun_hardware/sun_hw_win.bat: -------------------------------------------------------------------------------- 1 | GOTO EndComment 2 | This script was written to get the status of Sun Hardware via IPMI in Window Server 2003. 3 | The output is returned in a format which check_mk can understand. It's a bit of a hack 4 | but it works! 5 | Hereward Cooper - Sometime 2011 6 | :EndComment 7 | @echo off 8 | set COUNT= 9 | 10 | :: Retrieve every "ON" alarm from sunoem 11 | C:\ipmiutil.exe sunoem sbled get 2> nul | find /C "ON" > C:\nrpe_nt\sun.txt 12 | 13 | :: Count the alarms, and if there are more than 2 print an alert, otherwise print an OK. 14 | :: (this vaule of 2 will depend on specific IPMI/Sun configuration, but on the server here 15 | :: there are two 'alarms' which are set to ON when in a fault-free state). 16 | for /F %%A in (C:\nrpe_nt\sun.txt) do set COUNT=%%A 17 | IF %COUNT% EQU 2 (echo 0 Sun_Hardware - OK: 0 Alarms Detected) ELSE (echo 2 Sun_Hardware - CRITICAL: Alarms Detected) 18 | -------------------------------------------------------------------------------- /tcp_check/tcp_check.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # check_mk script to check TCP connections to a remote host. 3 | # first written to simply check transit down a leased line 4 | # Herward Cooper - 2011 5 | 6 | from socket import * 7 | 8 | # Out list of checks, in the form: ['ip', port] 9 | checks = [ 10 | ['11.22.33.44', 8443], 11 | ['12.13.14.15', 6000], 12 | ['127.0.0.1', 80], 13 | ] 14 | 15 | # What to prefix all the check names with 16 | prefix = "TCP_Check_" 17 | 18 | # How long to wait for each check? 19 | timeout = 5 20 | 21 | setdefaulttimeout(timeout) 22 | 23 | for ip,port in checks: 24 | 25 | # Define a new socket 26 | s = socket(AF_INET, SOCK_STREAM) 27 | 28 | # Form the check name 29 | chkname = prefix + ip + ":" + str(port) 30 | 31 | # Open the connection 32 | result = s.connect_ex((ip,port)) 33 | 34 | # Report the result 35 | if ( result == 0 ): 36 | print "0 %s - OK - %s:%d Reachable" % (chkname,ip,port) 37 | else: 38 | print "2 %s - CRITICAL - %s:%d Unreachable" % (chkname,ip,port) 39 | 40 | # Close the connection 41 | s.close() 42 | -------------------------------------------------------------------------------- /xen_license/README.md: -------------------------------------------------------------------------------- 1 | XenServer License Checker 2 | ========================= 3 | 4 | A port of Nick Anderson's NRPE plugin for checking 5 | the validity of XenServer licenses, which works with 6 | check_mk. 7 | 8 | Original code: 9 | https://github.com/nickanderson/nagios-plugins-check_xs-license 10 | -------------------------------------------------------------------------------- /xen_license/check_xen_license.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) 2010, Nick Anderson 3 | # All rights reserved. 4 | 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # 13 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 14 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 | # ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 17 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | 24 | import sys 25 | import time 26 | import getpass 27 | import optparse 28 | 29 | from datetime import date 30 | from string import Template 31 | 32 | import XenAPI 33 | 34 | __version__ = "1.2" 35 | __author__ = "Nick Anderson " 36 | __website__ = "http://github.com/nickanderson/check_citrix_xenserver_license" 37 | 38 | def main(session, options): 39 | """ 40 | Get number of days until license expires 41 | Return 0 if license not expired, 2 if expired 42 | """ 43 | hosts = session.xenapi.host.get_all() 44 | host = hosts[0] 45 | record = session.xenapi.host.get_record(host) 46 | # Get the date part of the string 47 | expires = time.strptime(record["license_params"]["expiry"][0:8], 48 | "%Y%m%d")[0:3] 49 | # create date object for finding difference in days 50 | expires_on = date(int(expires[0]), int(expires[1]), int(expires[2])) 51 | expire_days = (expires_on - date.today()).days 52 | display = Template('$status') 53 | 54 | if int(expire_days) > int(options.warning_days): 55 | print display.substitute(status='0 XenServer_License - OK Expiring in %s days'%expire_days) 56 | session.xenapi.session.logout() 57 | sys.exit(0) 58 | 59 | # If number of days until expire is less than warning 60 | elif int(expire_days) <= int(options.warning_days): 61 | if int(expire_days) <= int(options.critical_days): 62 | print display.substitute(status='2 XenServer_License - CRITICAL Expiring in %s days'%expire_days) 63 | session.xenapi.session.logout() 64 | sys.exit(2) 65 | else: 66 | print display.substitute(status='1 XenServer_License - WARNING Expiring in %s days'%expire_days) 67 | session.xenapi.session.logout() 68 | sys.exit(1) 69 | else: 70 | print display.substitute(status='3 XenServer_License - UNKOWN') 71 | session.xenapi.session.logout() 72 | sys.exit(3) 73 | 74 | if __name__ == "__main__": 75 | # define options 76 | op = optparse.OptionParser("usage: %prog [options]", 77 | version = "%%prog v%s\nAuthor: %s\nWebsite: %s" % 78 | (__version__, __author__, __website__)) 79 | 80 | og_sess = optparse.OptionGroup(op, "Session Options") 81 | og_sess.add_option('--server', 82 | dest='server', 83 | help="xenserver host (default: %default)") 84 | og_sess.add_option('--username', 85 | dest='username', 86 | help="xenserver username (defaut: %default)") 87 | og_sess.add_option('--password', 88 | dest="password", 89 | help="xenserver password") 90 | op.add_option_group(og_sess) 91 | 92 | og_nag = optparse.OptionGroup(op, "Nagios Options") 93 | og_nag.add_option('-c', 94 | dest='critical_days', 95 | help="critical threshold for days (default: %default)") 96 | og_nag.add_option('-w', 97 | dest='warning_days', 98 | help="warn threshold for days (default: %default)") 99 | op.add_option_group(og_nag) 100 | 101 | op.set_defaults(server='localhost', 102 | username = 'root', 103 | password = '', 104 | warning_days = 30, 105 | critical_days = 10) 106 | 107 | # parse and validate 108 | (options, args) = op.parse_args() 109 | if options.password == '': 110 | options.password = getpass.getpass("password: ") 111 | 112 | # First acquire a valid session by logging in: 113 | session = XenAPI.Session("https://"+options.server) 114 | try: 115 | session.xenapi.login_with_password(options.username, options.password) 116 | except XenAPI.Failure, e: 117 | if e.details[0]=='HOST_IS_SLAVE': 118 | session=XenAPI.Session('https://'+e.details[1]) 119 | session.login_with_password(options.username,options.password) 120 | except: 121 | print '3 XenServer_License - UNKOWN, can\'t connect' 122 | sys.exit(3) 123 | 124 | main(session, options) 125 | --------------------------------------------------------------------------------