├── 3ware_raid
    ├── 3ware_disks_snmp.py
    └── 3ware_raid_snmp.py
├── LICENSE
├── README.md
├── adaptec_raid
    └── adaptec_raid.py
├── backuppc
    ├── backuppc_good_age.py
    └── hash-to-json.pl
├── barracuda_waf
    ├── barracuda_waf_attacks.py
    ├── barracuda_waf_ha.py
    └── barracuda_waf_system_load.py
├── chef_age
    └── chef_age.py
├── chef_nodes
    ├── README.md
    ├── chef_nodes.sh
    └── knife_status.rb
├── cisco_ace
    ├── cisco_ace_cpu.py
    └── cisco_ace_peer_status.py
├── generic_local
    ├── README.md
    └── generic_local.py
├── haproxy
    ├── haproxy.py
    └── haproxychecks.py
├── lsi_megariad
    └── lsi_megaraid.py
├── mod_status
    ├── README
    └── mod_status-dump.py
├── mysql_rep
    └── mysql_rep.sh
├── sonicwall
    ├── sonicwall_cpu.py
    └── sonicwall_mem.py
├── sun_hardware
    └── sun_hw_win.bat
├── tcp_check
    └── tcp_check.py
└── xen_license
    ├── README.md
    └── check_xen_license.py


/3ware_raid/3ware_disks_snmp.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Very quick hack to monitor 3ware RAID cards via SNMP with check_mk
 4 | # Tested using Windows 3ware SNMP plugin (3wSnmp.msi from http://kb.lsi.com)
 5 | # Hereward Cooper <coops@fawk.eu> - Sep 2012
 6 | 
 7 | # MIB: TW-RAID-MIB
 8 | # .1.3.6.1.4.1.1458.100.22.1.10.X = twRaidDriveStatus.X
 9 | # 255 = OK
10 | 
11 | 
12 | def inventory_3ware_disks_snmp(checkname, info):
13 |     inventory = []
14 |     # If 'info' isn't empty, add it to the inventory
15 |     if info != []:
16 |             inventory.append( (None, None) )
17 |     return inventory
18 | 
19 | 
20 | def check_3ware_disks_snmp(item, params, info):
21 | 
22 |     output = ""
23 |     retval = 0
24 | 
25 |     # Check the results and return appropriately
26 |     for count, rawcheck in enumerate(info):
27 |         check = rawcheck[0]
28 |         if check == '255':
29 |             output += "Disk %s OK, " % (count)
30 |         else:
31 |             output += "Disk %s Error (code: %s), " % (count, check)
32 |             retval = 2
33 | 
34 |     if retval == 0:
35 |             output = "OK - " + output[:-2]
36 |     elif retval == 2:
37 |             output = "CRITICAL - " + output[:-2]
38 |     return (retval, output)
39 | 
40 | 
41 | check_info["3ware_disks_snmp"] = (check_3ware_disks_snmp, "3ware RAID Disks", 0, inventory_3ware_disks_snmp)
42 | snmp_info["3ware_disks_snmp"] = ( ".1.3.6.1.4.1.1458.100.22.1", ["10"] )
43 | 


--------------------------------------------------------------------------------
/3ware_raid/3ware_raid_snmp.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Very quick hack to monitor 3ware RAID cards via SNMP with check_mk
 4 | # Tested using Windows 3ware SNMP plugin (3wSnmp.msi from http://kb.lsi.com)
 5 | # Hereward Cooper <coops@fawk.eu> - Sep 2012
 6 | 
 7 | # MIB: TW-RAID-MIB
 8 | # .1.3.6.1.4.1.1458.100.22.1.10.X = twRaidDriveStatus.X
 9 | # 255 = OK
10 | 
11 | 
12 | def inventory_3ware_raid_snmp(checkname, info):
13 |     inventory = []
14 |     # If 'info' isn't empty, add it to the inventory
15 |     if info != []:
16 |             inventory.append( (None, None) )
17 |     return inventory
18 | 
19 | 
20 | def check_3ware_raid_snmp(item, params, info):
21 | 
22 |     output = ""
23 |     retval = 0
24 | 
25 |     # Check the results and return appropriately
26 |     for count, rawcheck in enumerate(info):
27 |         check = rawcheck[0]
28 |         if check == '0':
29 |             output += "Array %s OK, " % (count)
30 |         else:
31 |             output += "Array %s Error (code: %s), " % (count, check)
32 |             retval = 2
33 | 
34 |     if retval == 0:
35 |             output = "OK - " + output[:-2]
36 |     elif retval == 2:
37 |             output = "CRITICAL - " + output[:-2]
38 |     return (retval, output)
39 | 
40 | 
41 | check_info["3ware_raid_snmp"] = (check_3ware_raid_snmp, "3ware RAID Arrays", 0, inventory_3ware_raid_snmp)
42 | snmp_info["3ware_raid_snmp"] = ( ".1.3.6.1.4.1.1458.100.23.1", ["7"] )
43 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Hereward Cooper
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | A bunch of check_mk plugins
 2 | ===========================
 3 | Just read the scripts, the comments explain most things.
 4 | 
 5 | SNMP checks
 6 | -----------
 7 | These are installed on the check_mk server, (e.g. /usr/share/check_mk/checks), and if the
 8 | specific SNMP OID for that check returns a result it will be added to the inventory. I've had
 9 | to write these mainly for proprietary piece of equipment (physical firewalls, raid cards etc).
10 | **Note:** remove the file extension when placing them in the 'checks' directory.
11 | 
12 | Local checks
13 | ------------
14 | These are installed on the client server, (e.g. /usr/lib/check_mk_agent/local). They have
15 | no server-side part, and handle all the logic needed to perform the check themselves.
16 | 


--------------------------------------------------------------------------------
/adaptec_raid/adaptec_raid.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # check-aacraid.py modified version of check from http://exchange.nagios.org/
  3 | #
  4 | # Additions:
  5 | #  - switch between check_mk and nrpe modes
  6 | #  - customize the command (so it works on Linux or Windows)
  7 | 
  8 | # Original by: Oliver Hookins, Paul De Audney and Barney Desmond.
  9 | # This version by Hereward Cooper <coops@fawk.eu>
 10 | 
 11 | # Change mode between 'check_mk' and 'nrpe' to reflect monitoring setup
 12 | mode = "check_mk"
 13 | #mode = "nrpe"
 14 | 
 15 | #command = "/usr/bin/sudo /usr/StorMan/arcconf"
 16 | command = '\"E:\Adaptec\Adaptec Storage Manager\\arcconf\"'
 17 | 
 18 | import sys, os, re, string
 19 | 
 20 | c_status_re = re.compile('^\s*Controller Status\s*:\s*(.*)$')
 21 | l_status_re = re.compile('^\s*Status of logical device\s*:\s*(.*)$')
 22 | l_device_re = re.compile('^Logical device number ([0-9]+).*$')
 23 | c_defunct_re = re.compile('^\s*Defunct disk drive count\s:\s*([0-9]+).*$')
 24 | c_degraded_re = re.compile('^\s*Logical devices/Failed/Degraded\s*:\s*([0-9]+)/([0-9]+)/([0-9]+).*$')
 25 | b_status_re = re.compile('^\s*Status\s*:\s*(.*)$')
 26 | b_temp_re = re.compile('^\s*Over temperature\s*:\s*(.*)$')
 27 | b_capacity_re = re.compile('\s*Capacity remaining\s*:\s*([0-9]+)\s*percent.*$')
 28 | b_time_re = re.compile('\s*Time remaining \(at current draw\)\s*:\s*([0-9]+) days, ([0-9]+) hours, ([0-9]+) minutes.*$')
 29 | 
 30 | cstatus = lstatus = ldevice = cdefunct = cdegraded = bstatus = btemp = bcapacity = btime = ""
 31 | lnum = ""
 32 | check_status = 0
 33 | result = ""
 34 | 
 35 | # Get logical drive status
 36 | for line in os.popen4(command + " GETCONFIG 1 LD")[1].readlines():
 37 |         # Match the regexs
 38 |         ldevice = l_device_re.match(line)
 39 |         if ldevice:
 40 |                 lnum = ldevice.group(1)
 41 |                 continue
 42 | 
 43 |         lstatus = l_status_re.match(line)
 44 |         if lstatus:
 45 |                 if lstatus.group(1) != "Optimal":
 46 |                         check_status = 2
 47 |                 result += "Logical Device " + lnum + " " + lstatus.group(1) + ","
 48 | 
 49 | # Get general card status
 50 | for line in os.popen4(command + " GETCONFIG 1 AD")[1].readlines():
 51 |         # Match the regexs
 52 |         cstatus = c_status_re.match(line)
 53 |         if cstatus:
 54 |                 if cstatus.group(1) != "Optimal":
 55 |                         check_status = 2
 56 |                 result += "Controller " + cstatus.group(1) + ","
 57 |                 continue
 58 | 
 59 |         cdefunct = c_defunct_re.match(line)
 60 |         if cdefunct:
 61 |                 if int(cdefunct.group(1)) > 0:
 62 |                         check_status = 2
 63 |                         result += "Defunct drives " + cdefunct_group(1) + ","
 64 |                 continue
 65 | 
 66 |         cdegraded = c_degraded_re.match(line)
 67 |         if cdegraded:
 68 |                 if int(cdegraded.group(2)) > 0:
 69 |                         check_status = 2
 70 |                         result += "Failed drives " + cdegraded.group(2) + ","
 71 |                 if int(cdegraded.group(3)) > 0:
 72 |                         check_status = 2
 73 |                         result += "Degraded drives " + cdegraded.group(3) + ","
 74 |                 continue
 75 | 
 76 |         bstatus = b_status_re.match(line)
 77 |         if bstatus:
 78 |                 if bstatus.group(1) == "Not Installed":
 79 |                         continue
 80 | 
 81 |                 if bstatus.group(1) == "Charging":
 82 |                         if check_status < 2:
 83 |                                 check_status = 1
 84 |                 elif bstatus.group(1) != "Optimal":
 85 |                         check_status = 2
 86 |                 result += "Battery Status " + bstatus.group(1) + ","
 87 |                 continue
 88 | 
 89 |         btemp = b_temp_re.match(line)
 90 |         if btemp:
 91 |                 if btemp.group(1) != "No":
 92 |                         check_status = 2
 93 |                         result += "Battery Overtemp " + btemp.group(1) + ","
 94 |                 continue
 95 | 
 96 |         bcapacity = b_capacity_re.match(line)
 97 |         if bcapacity:
 98 |                 result += "Battery Capacity " + bcapacity.group(1) + "%,"
 99 |                 if bcapacity.group(1) < 50:
100 |                         if check_status < 2:
101 |                                 check_status = 1
102 |                 if bcapacity.group(1) < 25:
103 |                         check_status = 2
104 |                 continue
105 | 
106 |         btime = b_time_re.match(line)
107 |         if btime:
108 |                 timemins = int(btime.group(1)) * 1440 + int(btime.group(2)) * 60 + int(btime.group(3))
109 |                 if timemins < 1440:
110 |                         if check_status < 2:
111 |                                 check_status = 1
112 |                 if timemins < 720:
113 |                         check_status = 2
114 |                 result += "Battery Time "
115 |                 if timemins < 60:
116 |                         result += str(timemins) + "mins,"
117 |                 else:
118 |                         result += str(timemins/60) + "hours,"
119 | 
120 | if result == "":
121 |         result = "No output from arcconf!"
122 |         check_status = 3
123 | 
124 | # strip the trailing "," from the result string.
125 | result = result.rstrip(",")
126 | 
127 | if mode == "check_mk":
128 |         print check_status, "Adaptec_RAID -", result
129 | elif mode == "nrpe":
130 |         print result
131 | 
132 | # Delete log once we've finished
133 | try:
134 |         cwd = os.getcwd()
135 |         fullpath = os.path.join(cwd,'UcliEvt.log')
136 |         os.unlink(fullpath)
137 | except:
138 |         pass
139 | 
140 | if mode == "nrpe":
141 |         sys.exit(check_status)
142 | 


--------------------------------------------------------------------------------
/backuppc/backuppc_good_age.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | '''
 4 | Check there's recent good backups for each host in backuppc.
 5 | Requires:
 6 |   backuppc's cli tools (BackupPC_serverMesg)
 7 |   python's shutil module
 8 |   hash-to-json script.pl (in this repo, installed at /usr/local/bin)
 9 | '''
10 | 
11 | import json
12 | import sh
13 | import time
14 | import os
15 | 
16 | os.setuid(114) # Switch to backuppc user
17 | 
18 | # Thresholds, in hours.
19 | WARN = 36
20 | CRIT = 72
21 | 
22 | now = time.time()
23 | 
24 | # Use backuppc's cli tools to get the data we want, then convert the perl hash output
25 | # into json. It's pretty hacky.
26 | srv_msg=sh.Command('/usr/share/backuppc/bin/BackupPC_serverMesg')
27 | hash_to_json=sh.Command('/usr/local/bin/hash-to-json.pl')
28 | j=json.loads(hash_to_json(sh.sed(srv_msg('status hosts'),'s/Got reply: //')).stdout)
29 | 
30 | for host, info in j.items():
31 |     # Skip backuppc's housekeeping jobs.
32 |     if host in (' admin ', ' admin1 ', ' trashClean '):
33 |         continue
34 | 
35 |     d = now - info["lastGoodBackupTime"]
36 |     d_nice = round(d/60/60, 2)
37 | 
38 |     if d < (60*60*WARN):
39 |         print(
40 |             f"0 Backup_age_{host} - {host}'s last good backup is {d_nice}h old."
41 |         )
42 |     elif (60*60*WARN) < d < (60*60*CRIT):
43 |         print(
44 |             f"1 Backup_age_{host} - {host}'s last good backup is {d_nice}h old."
45 |         )
46 |     else:
47 |         print(
48 |             f"2 Backup_age_{host} - {host}'s last good backup is {d_nice}h old."
49 |         )
50 | 


--------------------------------------------------------------------------------
/backuppc/hash-to-json.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # A cli tool I wanted to use outputted a perl hash as a string.
 3 | # This reads it in from stdin, and returns json instead.
 4 | use JSON;
 5 | my @lines = <STDIN>;
 6 | 
 7 | my %Status;  # TODO: Unhard-code this.
 8 | eval $lines[0]; #TODO: handle multiple lines?
 9 | 
10 | my $json = encode_json \%Status;
11 | print $json;
12 | 


--------------------------------------------------------------------------------
/barracuda_waf/barracuda_waf_attacks.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Monitor the number of attacks a Barracuda WAF is fighting off.
 4 | # Herward Cooper <coops@fawk.eu> - 2012
 5 | 
 6 | # OID .1.3.6.1.4.1.20632.8 is meant to return the number of attacks in the last hour.
 7 | # However due to a bug (or wrong documentation) it actually returns the total number
 8 | # of attacks. We therefore use check_mk's internal 'get_counter' function to track this
 9 | # value, and turn it into a rate which we can monitor.
10 | 
11 | barracuda_waf_attacks_default_values = (3, 20)
12 | 
13 | def inventory_barracuda_waf_attacks(checkname, info):
14 |     inventory=[]
15 |     status = int(info[0][0])
16 |     inventory.append( (None, None, "barracuda_waf_attacks_default_values") )
17 |     return inventory
18 | 
19 | 
20 | def check_barracuda_waf_attacks(item, params, info):
21 |     this_time = float(time.time())
22 |     state = int(info[0][0])
23 |     attacks_timedif, attacks_rate = get_counter("barracuda_waf_attacks", this_time, state)
24 |     warn, crit = params
25 |     perfdata = [ ( "attacks", attacks_rate, warn, crit ) ]
26 |     if attacks_rate > crit:
27 |         return (2, "CRITICAL - %s attacks per second" % attacks_rate, perfdata)
28 |     elif attacks_rate > warn:
29 |         return (1, "WARNING - %s attacks per second" % attacks_rate, perfdata)
30 |     else:
31 |         return (0, "OK - %s attacks per second" % attacks_rate, perfdata)
32 | 
33 | check_info["barracuda_waf_attacks"] = (check_barracuda_waf_attacks, "Barracuda WAF Attacks", 1, inventory_barracuda_waf_attacks)
34 | 
35 | snmp_info["barracuda_waf_attacks"] = ( ".1.3.6.1.4.1.20632.8", ["4"] )


--------------------------------------------------------------------------------
/barracuda_waf/barracuda_waf_ha.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Monitor the HA status of a pair of Barracuda WAFs via SNMP
 4 | # Herward Cooper <coops@fawk.eu> - 2012
 5 | 
 6 | # When run against each of the members of the pair, a response of
 7 | # 'Primary:Active' or 'Backup:Standby' is considered healthy.
 8 | 
 9 | def inventory_barracuda_waf_ha(checkname, info):
10 |     inventory=[]
11 |     status = info[0][0]
12 |     if status:
13 |         inventory.append( (None, None) )
14 |     return inventory
15 | 
16 | 
17 | def check_barracuda_waf_ha(item, params, info):
18 |     state = info[0][0]
19 |     if state == "Primary:Active" or state == "Backup:Standby":
20 |         return (0, "OK - Currently %s" % state)
21 |     else:
22 |         return (2, "CRITICAL - Currently %s" % state)
23 |     return (3, "UNKNOWN - unhandled problem")
24 | 
25 | check_info["barracuda_waf_ha"] = (check_barracuda_waf_ha, "Barracuda WAF HA Status", 0, inventory_barracuda_waf_ha)
26 | 
27 | snmp_info["barracuda_waf_ha"] = ( ".1.3.6.1.4.1.20632.8", [ "14" ] )


--------------------------------------------------------------------------------
/barracuda_waf/barracuda_waf_system_load.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Monitor the system load of a Barracuda WAFs via SNMP
 4 | # Herward Cooper <coops@fawk.eu> - 2012
 5 | 
 6 | # OID .1.3.6.1.4.1.20632.8.8
 7 | 
 8 | barracuda_waf_system_load_default_values = (50, 75)
 9 | 
10 | def inventory_barracuda_waf_system_load(checkname, info):
11 |     inventory=[]
12 |     status = int(info[0][0])
13 |     inventory.append( (None, None, "barracuda_waf_system_load_default_values") )
14 |     return inventory
15 | 
16 | 
17 | def check_barracuda_waf_system_load(item, params, info):
18 |     warn, crit = params
19 |     state = int(info[0][0])
20 |     perfdata = [ ( "load", state, warn, crit ) ]
21 |     if state > crit:
22 |         return (2, "CRITICAL - Load %s percent" % state, perfdata)
23 |     elif state > warn:
24 |         return (1, "WARNING - Load %s percent" % state, perfdata)
25 |     else:
26 |         return (0, "OK - Load %s percent" % state, perfdata)
27 | 
28 | check_info["barracuda_waf_system_load"] = (check_barracuda_waf_system_load, "Barracuda WAF System Load", 1, inventory_barracuda_waf_system_load)
29 | 
30 | snmp_info["barracuda_waf_system_load"] = ( ".1.3.6.1.4.1.20632.8", ["8"] )


--------------------------------------------------------------------------------
/chef_age/chef_age.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | '''
 4 | Some hosts run chef via cron like this:
 5 |   /usr/bin/chef-solo  && touch /run/chef.last_success
 6 | This check monitors the age of the last_success file.
 7 | '''
 8 | 
 9 | import os
10 | import time
11 | 
12 | # Thresholds, in hours.
13 | WARN = 3
14 | CRIT = 24
15 | 
16 | try:
17 |     s = os.stat('/run/chef.last_success')
18 |     d = time.time() - s.st_mtime
19 |     d_nice = round(d/60/60, 2)
20 | 
21 |     if d < (60*60*WARN):
22 |         print(f"0 Chef_success_age - chef successfully ran {d_nice} hours ago.")
23 |     elif (60*60*WARN) < d < (60*60*CRIT):
24 |         print(f"1 Chef_success_age - chef hasn't run successfully for {d_nice} hours.")
25 |     else:
26 |         print(f"2 Chef_success_age - chef hasn't run successfully for {d_nice} hours.")
27 | except:
28 |     print(f"3 Chef_success_age - UNKNOWN failure in calculating chef age.")


--------------------------------------------------------------------------------
/chef_nodes/README.md:
--------------------------------------------------------------------------------
 1 | This is a local check_mk script for monitoring the last time chef nodes (i.e. clients)
 2 | checked-in with a chef server. A shell script wrapper executes a `knife` script to produce
 3 | data about each chef node. Alarm thresholds are configured in `knife_status.rb`.
 4 | 
 5 | Place `chef_nodes.sh` in your check_mk local directory on the machine with knife setup, and amend the paths.
 6 | Place `knife_status.rb` somewhere convient e.g. `$KNIFE_DIR/scripts/`
 7 | 
 8 | Example output:
 9 | 
10 | ```
11 | ~$ telnet localhost 6556
12 | [...]
13 | <<<local>>>
14 | 0 chef_node-mailserver age=871 OK - [base,cron-delvalidate,check-mk-agent,devenv,ssh_authorized_keys::internal,qemu-guest]
15 | 0 chef_node-webserver1 age=32 OK - [base,cron-delvalidate,check-mk-agent,devenv,ssh_authorized_keys::internal,qemu-guest]
16 | 0 chef_node-webserver2 age=841 OK - [base,cron-delvalidate,check-mk-agent,devenv,ssh_authorized_keys::internal,qemu-guest]
17 | 0 chef_node-proxyserver age=271 OK - [base,cron-delvalidate,check-mk-agent,devenv,ssh_authorized_keys::internal,qemu-guest]
18 | 0 chef_node-smtpserver age=3271 CRITICAL - [base,cron-delvalidate,check-mk-agent,devenv,ssh_authorized_keys::internal,qemu-guest]
19 | [...]
20 | ```
21 | 


--------------------------------------------------------------------------------
/chef_nodes/chef_nodes.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | export HOME=/home/hcooper   # needed to keep knife happy
3 | cd /home/hcooper/chef-repo  # your kniife directory
4 | /usr/bin/knife exec scripts/knife_status.rb
5 | 


--------------------------------------------------------------------------------
/chef_nodes/knife_status.rb:
--------------------------------------------------------------------------------
 1 | # This is run from your knife folder, e.g. "knife exec scripts/knife_status.rb"
 2 | stats=Array.new
 3 | now = Time.now.to_i
 4 | criticaloffset = 7200
 5 | warningoffset = 3600
 6 | 
 7 | nodes.all do |thisnode|
 8 |   checkintime=Time.at(thisnode['ohai_time']).to_i
 9 |   recipes = thisnode.run_list.expand(thisnode.chef_environment).recipes.join(",")
10 | 
11 |   if checkintime + criticaloffset < now then
12 |     print "2 chef_node-%s age=%s CRITICAL - [%s]\n" % [thisnode.name, now - checkintime, recipes]
13 | 
14 |   elsif checkintime + warningoffset < now then
15 |     print "1 chef_node-%s age=%s WARNING - [%s]\n" % [thisnode.name, now - checkintime, recipes]
16 | 
17 |   else
18 |     print "0 chef_node-%s age=%s OK - [%s]\n" % [thisnode.name, now - checkintime, recipes]
19 | 
20 |   end
21 | end
22 | 


--------------------------------------------------------------------------------
/cisco_ace/cisco_ace_cpu.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Monitoring the CPU usage of a Cisco ACE load balancer
 4 | # Herward Cooper <coops@fawk.eu> - 2012
 5 | 
 6 | # Uses OID .1.3.6.1.4.1.9.9.109.1.1.1.1.8.1
 7 | 
 8 | cisco_ace_cpu_default_values = (35, 40)
 9 | 
10 | def inventory_cisco_ace_cpu(checkname, info):
11 |     inventory=[]
12 |     status = int(info[0][0])
13 |     if status < 11:
14 |         inventory.append( (None, None, "cisco_ace_cpu_default_values") )
15 |     return inventory
16 | 
17 | 
18 | def check_cisco_ace_cpu(item, params, info):
19 |     warn, crit = params
20 |     state = int(info[0][0])
21 |     perfdata = [ ( "cpu", state, warn, crit ) ]
22 |     if state > crit:
23 |         return (2, "CRITICAL - CPU is %s percent" % state, perfdata)
24 |     elif state > warn:
25 |         return (1, "WARNING - CPU is %s percent" % state, perfdata)
26 |     else:
27 |         return (0, "OK - CPU is %s percent" % state, perfdata)
28 | 
29 | check_info["cisco_ace_cpu"] = (check_cisco_ace_cpu, "Cisco ACE CPU 5min Avg", 1, inventory_cisco_ace_cpu)
30 | 
31 | snmp_info["cisco_ace_cpu"] = ( ".1.3.6.1.4.1.9.9.109.1.1.1.1.8", [ "1" ] )


--------------------------------------------------------------------------------
/cisco_ace/cisco_ace_peer_status.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Monitoring the HA status of a pair of Cisco ACE load balancers.
 4 | # Herward Cooper <coops@fawk.eu> - 2012
 5 | 
 6 | # We take the response 'OK - Peer is compatible' to mean HA is healthy
 7 | # Used OID .1.3.6.1.4.1.9.9.650.1.1.2.1.1.1
 8 | 
 9 | def inventory_cisco_ace_peer_status(checkname, info):
10 |     inventory=[]
11 |     status = int(info[0][0])
12 |     if status < 11:
13 |         inventory.append( (None, None) )
14 |     return inventory
15 | 
16 | 
17 | def check_cisco_ace_peer_status(item, params, info):
18 |     state = int(info[0][0])
19 |     if state == 8:
20 |         return (0, "OK - Peer is compatible")
21 |     else:
22 |         return (2, "CRITICAL - Peer is not compatible!")
23 |     return (3, "UNKNOWN - unhandled problem")
24 | 
25 | check_info["cisco_ace_peer_status"] = (check_cisco_ace_peer_status, "Cisco ACE Peer Status", 0, inventory_cisco_ace_peer_status)
26 | 
27 | snmp_info["cisco_ace_peer_status"] = ( ".1.3.6.1.4.1.9.9.650.1.1.2.1.1", [ "1" ] )


--------------------------------------------------------------------------------
/generic_local/README.md:
--------------------------------------------------------------------------------
 1 | # generic_local check_mk plugin
 2 | 
 3 | ## Overview
 4 | The generic_local plugin for check_mk provides an easy way to monitor the status
 5 | of a service without having to script a custom plugin from scratch.
 6 | 
 7 | Originally started to monitoring any given values reported by "SHOW STATUS" in
 8 | MySQL, is was updated to be generic and allow monitoring of any service
 9 | which can present it's stats in a two column format.
10 | 
11 | For more information on check_mk local plugins, and the format check_mk expects
12 | have a look at the author's [website](http://mathias-kettner.de/checkmk_localchecks.html).
13 | 
14 | ## Installation
15 | The plugin is designed to be a *local* plugin, which doesn't require any
16 | configuration on the server side. This means changes to thresholds are done
17 | locally too. Just place the script (set as executable) in the check_mk_agent local
18 | directory (/usr/lib/check_mk_agent/local on Debian).
19 | 
20 | ## Testing
21 | As soon as the script is in place it's results should be included in the output
22 | of check_mk. Test this simply with "*telnet localhost 6556*".
23 | 
24 | To makes the checks live, on the nagios server run "*check_mk -I hostname*", 
25 | followed by a nagios reload "*check_mk -O*".
26 | 
27 | ## Example Configurations
28 | ### Varnish
29 | 
30 | ```
31 | status_command="/usr/bin/varnishstat -1"
32 | prefix = "Varnish_"
33 | checks = [
34 |        ( "backend_fail", 10, 100),
35 |        ( "client_conn", 40, 100)
36 | ]
37 | ```
38 | 
39 | ####Example output:
40 | 
41 | ```
42 | 2 Varnish_client_conn - CRITICAL: Varnish_client_conn 4630
43 | 1 Varnish_backend_fail - WARNING: Varnish_backend_fail 18
44 | ```
45 | 
46 | ### MySQL
47 | 
48 | ```
49 | status_command="/usr/bin/mysql -e 'SHOW STATUS'"
50 | prefix = "MySQL_"
51 | checks = [
52 |         ( "Qcache_lowmem_prunes", 10, 100),
53 |         ( "Max_used_connections", 25, 30),
54 |         ( "Threads_connected", 5, 10),
55 |         ( "Open_files", 512, 1024),
56 |         ( "Open_tables", 256, 512),
57 |         ( "Slow_queries", 10, 100)
58 | ]
59 | ```
60 | 
61 | ####Example output:
62 | 
63 | ```
64 | 0 MySQL_Max_used_connections - OK: MySQL_Max_used_connections 20
65 | 0 MySQL_Open_files - OK: MySQL_Open_files 445
66 | 0 MySQL_Open_tables - OK: MySQL_Open_tables 256
67 | 0 MySQL_Qcache_lowmem_prunes - OK: MySQL_Qcache_lowmem_prunes 0
68 | 0 MySQL_Slow_queries - OK: MySQL_Slow_queries 0
69 | 0 MySQL_Threads_connected - OK: MySQL_Threads_connected 1
70 | ```
71 | 


--------------------------------------------------------------------------------
/generic_local/generic_local.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # This is a local plugin for check_mk, suitable for monitoring
  4 | # any service variables which are presented in a list. Currently
  5 | # sucessfully tested with MySQL and Varnish.
  6 | 
  7 | # Hereward Cooper <coops@fawk.eu>
  8 | # https://github.com/hcooper/check_mk-plugins
  9 | 
 10 | #------------------------------------------------------------
 11 | # CONFIGURATION
 12 | #------------------------------------------------------------
 13 | 
 14 | # What's the command to run to get our raw data?
 15 | #status_command="/usr/bin/varnishstat -1"
 16 | status_command="mysql -e 'show status'"
 17 | #status_command="mysqladmin status | sed 's/  /\\n/g' | sed 's/ /_/g' | sed 's/:_/ /g'"
 18 | 
 19 | 
 20 | # What's a human-friendly prefix to name our checks?
 21 | #prefix = "Varnish_"
 22 | prefix = "MySQL_"
 23 | 
 24 | # What variables shall we actually check for? Format:
 25 | # ("variable name", warning threshold, critical threshold)
 26 | # Set the warn and crit to None and it will never alert, only get graphed
 27 | checks = [
 28 | #   ( "Queries_per_second_avg", 1, 5),
 29 |     ( "Qcache_lowmem_prunes", 10, 100),
 30 |     ( "Max_used_connections", 40, 60),
 31 |     ( "Threads_connected", None, None),
 32 |     ( "Open_files", 512, 1024),
 33 |     ( "Open_tables", 256, 512),
 34 |     ( "Slow_queries", 10, 100)
 35 | ]
 36 | 
 37 | #------------------------------------------------------------
 38 | # PREPARE FOR BATTLE
 39 | #------------------------------------------------------------
 40 | 
 41 | import sys
 42 | import os
 43 | 
 44 | # Run the command to retrieve the raw data
 45 | status = os.popen(status_command).read()
 46 | 
 47 | 
 48 | #------------------------------------------------------------
 49 | # DEBUG
 50 | #------------------------------------------------------------
 51 | 
 52 | # To save commenting and uncommenting each time, just call this debug function
 53 | def debug():
 54 |         print "-----------------------"
 55 |         print "DEBUG: Parsed Variables"
 56 |         print "-----------------------"
 57 |         for line in status.split('\n'):
 58 |                 try:
 59 |                         sys.stdout.write("VAR: " + line.split()[0])
 60 |                 except:
 61 |                         continue
 62 |                 try:
 63 |                     print " - " + line.split()[1]
 64 |                 except:
 65 |                     continue
 66 | 
 67 |         print "----------------"
 68 |         print "DEBUG: My Checks"
 69 |         print "----------------"
 70 |         for check,warn,crit in checks:
 71 |             print check, warn, crit
 72 | 
 73 | #------------------------------------------------------------
 74 | # THE MAGIC
 75 | #------------------------------------------------------------
 76 | 
 77 | # Function to output the check result in check_mk format
 78 | def output(state,chkname,text,value,warn,crit):
 79 |     if warn == None and crit == None:
 80 |         # Don't print the warn/crit values if they are set to None
 81 |         print "%s %s %s=%s %s - %s %s" % (state,chkname,chkname,value,text,chkname,value)
 82 |     else:
 83 |         print "%s %s %s=%s;%s;%s %s - %s %s" % (state,chkname,chkname,value,warn,crit,text,chkname,value)
 84 | 
 85 | # Read through each line in the output of SHOW STATUS
 86 | def run_checks():
 87 |         for line in status.splitlines():
 88 | 
 89 |                 # Sometimes we have a mare reading the first and last lines. Skip if needed.
 90 |                 try:
 91 |                     var = line.split()[0]
 92 |                 except:
 93 |                     continue
 94 |         
 95 |                 # Some variables don't actually have a value set. Handle this (skip for now).
 96 |                 try:
 97 |                     var_value = float(line.split()[1])
 98 |                 except:
 99 |                     continue
100 |         
101 |                 # Read through each of our configured variables to check
102 |                 for check,warn,crit in checks:
103 | 
104 |                     # Is the variable on the list to check?
105 |                     if var == check:
106 | 
107 |                         # Merge the prefix and variable name now so we don't have to keep doing it.
108 |                         chkname = prefix + var
109 | 
110 |                         # If the crit and warn are None, we're not alerting, just graphing
111 |                         if warn == None and crit == None:
112 |                             output(0,chkname,"OK",var_value,warn,crit)
113 | 
114 |                         # Otherwise do normal alerting
115 |                         elif var_value > crit:
116 |                             output(2,chkname,"CRITICAL",var_value,warn,crit)
117 |                         elif var_value > warn:
118 |                             output(1,chkname,"WARNING",var_value,warn,crit)
119 |                         else:
120 |                             output(0,chkname,"OK",var_value,warn,crit)
121 | 
122 | # Actually do some work!
123 | run_checks()
124 | #debug()
125 | 


--------------------------------------------------------------------------------
/haproxy/haproxy.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | # A check_mk plugin to monitor the status of an HAProxy server.
  4 | # Hereward Cooper <coops@fawk.eu>
  5 | 
  6 | # Requirements:
  7 | # - HAproxy socket
  8 | #   (config: "stats socket /var/run/haproxy.socket")
  9 | 
 10 | import os
 11 | import re
 12 | import sys
 13 | from io import StringIO
 14 | import socket
 15 | from time import time
 16 | 
 17 | __version__ = "0.2"
 18 | __author__ = "Hereward Cooper <coops@fawk.eu>"
 19 | __website__ = "http://github.com/hcooper/haproxy-tools/"
 20 | 
 21 | 
 22 | def build_array(rawstats):
 23 |     """ Convert the raw stats into nested arrays. Much nicer to use.
 24 |     This functions creates an array, with each element being a dictonary of checks for each server
 25 |     e.g. servers = [ {pxname: app1, rate: 15...}, {pxname: app2, rate: 7...} ] """
 26 | 
 27 |     stats=[]
 28 | 
 29 |     for line in rawstats.split('\n'):
 30 | 
 31 |         if re.match(r'^\s*$', line):  # skip empty lines
 32 |             continue
 33 | 
 34 |         values = line.split(',')
 35 | 
 36 |         if re.match(r'^#', line):  # first line contains the header names
 37 |             titles = values
 38 |             titles[0] = titles[0][2:]  # remove the '# ' from the first element
 39 |             continue
 40 | 
 41 |         stats.append(dict(zip(titles,values)))  # create the dict containing our results
 42 | 
 43 |     return stats
 44 | 
 45 | 
 46 | def run_checks(servers):
 47 |     """ Interate through each server, and then each defined check, and compare the values to
 48 |     the critical/warning levels, then alert if need be. """
 49 | 
 50 |     for server in servers:
 51 |         server['fullname'] = server['pxname'] + "/" + server['svname']  # Combine 'svname' and 'pxname' to get a unique name
 52 | 
 53 |         # Define some variables before use
 54 |         result=""           # The complete set of results for each server's checks
 55 |         allperf=""          # The complete set of all performance data for a server
 56 |         alert_warn=False    # Flag set if  check makes a server WARN
 57 |         alert_crit=False    #   "   "   "   "   "   "   "   "   CRIT
 58 | 
 59 |         for check,warn,crit in checks:
 60 |             output=""
 61 |             perfdata=""
 62 | 
 63 |             # If the value we're looking for isn't present, skip it. (e.g. FRONTEND doesn't have chkfail)
 64 |             if not server[check]:
 65 |                 continue
 66 | 
 67 |             # Special check for the "status" field as it's not a numeric value
 68 |             if check == "status":
 69 |                 if server['status'] == "UP":
 70 |                     output += "status UP"
 71 |                 elif server['status'] == "DOWN":
 72 |                     output += "status DOWN"
 73 |                     alert_crit = True
 74 |                 elif server['status'] == "OPEN":
 75 |                     output += "status OPEN"
 76 |                 # Add more status options here
 77 |     
 78 |             # Generic check for the other fields which are numeric
 79 |             # Make sure int() is used when needed!
 80 |             else:
 81 |                 if int(server[check]) >= int(warn) and int(server[check]) < int(crit):
 82 |                     output += check + " WARN " + server[check] + ", "
 83 |                     alert_warn = True
 84 |                 if int(server[check]) >= int(crit):
 85 |                     output += check + " CRIT " + server[check] + ", "
 86 |                     alert_crit = True
 87 |                 #if server[check] < warn:          # Disabled so OK doesn't give out stats 
 88 |                     #output += "| " + check + " OK " + server[check]
 89 | 
 90 |                 perfdata += check + "=" + server[check] + ";" + warn + ";" + crit
 91 | 
 92 |             # Build the output performance data, putting | in the right places
 93 |             if allperf == "":
 94 |                 allperf = perfdata
 95 |             elif perfdata != "":
 96 |                 allperf += "|" + perfdata
 97 | 
 98 |             # Build the check output
 99 |             result += output
100 | 
101 |         # If any of our checks have set the crit/warn flags, act on it
102 |         if alert_crit:
103 |             print ("2 HAProxy_%s %s CRITICAL - [%s]" % (server['fullname'], allperf, result))
104 |         elif alert_warn:
105 |             print ("1 HAProxy_%s %s WARNING - [%s]" % (server['fullname'], allperf, result))
106 |         else:
107 |             print ("0 HAProxy_%s %s OK - [%s]" % (server['fullname'], allperf, result))
108 | 
109 | 
110 | class HAProxyStats(object):
111 |     """ Used for communicating with HAProxy through its local UNIX socket interface"""
112 | 
113 |     def __init__(self, socket_name=None):
114 |         self.socket_name = socket_name
115 | 
116 |     def getstats(self, timeout=200):
117 |         """ Executes a HAProxy command by sending a message to a HAProxy's local
118 |         UNIX socket and waiting up to 'timeout' milliseconds for the response """
119 | 
120 |         buff = StringIO()
121 |         end = time() + timeout
122 | 
123 |         client = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
124 | 
125 |         try:
126 |             client.connect(self.socket_name)
127 |             client.send(('show stat' + '\n').encode())
128 | 
129 |             while time() <=  end:
130 |                 databyte = client.recv(4096)
131 |                 data = databyte.decode()
132 |                 if data:
133 |                     buff.write(data)
134 |                 else:
135 |                     return build_array(buff.getvalue())
136 |         except:
137 |             print ("Failed to retrieve stats")
138 |             sys.exit(1)
139 |         finally:
140 |             client.close()
141 | 
142 | 
143 | 
144 | if __name__ == "__main__":
145 | 
146 |     socketfile = "/var/run/haproxy.socket"
147 | 
148 |     if not os.path.exists(socketfile):
149 |             print ("Socket does not exist")
150 |             sys.exit(1)
151 | 
152 |     statssocket = HAProxyStats(socketfile)
153 |     stats = statssocket.getstats()
154 | 
155 |     from haproxychecks import checks
156 | 
157 |     run_checks(stats)
158 | 


--------------------------------------------------------------------------------
/haproxy/haproxychecks.py:
--------------------------------------------------------------------------------
 1 | # Example checks for the HAProxy plugin
 2 | checks = [
 3 |     #fieldname, warning, critical
 4 |     ['scur', '250', '500'],
 5 |     ['chkfail', '15', '25'],
 6 |     ['status', '', ''] # status stays at the end, just for formatting purposes
 7 | ]
 8 | 
 9 | if __name__ == "__main__":
10 |         print ("This file is not meant to be called directly")
11 | 


--------------------------------------------------------------------------------
/lsi_megariad/lsi_megaraid.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Quick hack to monitoring LSI MegaRaid via SNMP with check_mk
 4 | # Tested using Windows LSI tools
 5 | # Hereward Cooper <coops@fawk.eu> - Sep 2012
 6 | 
 7 | # Currently checks 4 values, with a base OID of 1.3.6.1.4.1.3582.4.1.4.1.2.1
 8 | # .19 = vdDegradedCount
 9 | # .20 = vdOfflineCount
10 | # .24 = pdDiskFailedCount
11 | # .23 = pdDiskPredFailureCount
12 | 
13 | def inventory_lsi_megaraid(checkname, info):
14 |     inventory = []
15 |     # If 'info' isn't empty, add it to the inventory
16 |     if info != []:
17 |             inventory.append( (None, None) )
18 |     return inventory
19 | 
20 | 
21 | def check_lsi_megaraid(item, params, info):
22 | 
23 |     # The 'nice' names for our checks
24 |     checks = ['vdDegradedCount', 'vdOfflineCount', 'pdDiskFailedCount', 'pdDiskPredFailureCount']
25 | 
26 |     # Make a dictonary of the check name and the result
27 |     results = dict(zip(checks, info[0]))
28 | 
29 |     # Check the results and return appropriately
30 |     for check in results:
31 |         if results[check] == '0':
32 |             continue
33 |         else:
34 |             return (2, "CRITICAL - %s: %s" % (check, results[check]))
35 | 
36 |     # If we haven't returned with an error so far, return OK now
37 |     return (0, "OK - No reported errors")
38 | 
39 | 
40 | check_info["lsi_megaraid"] = (check_lsi_megaraid, "LSI MegaRAID", 0, inventory_lsi_megaraid)
41 | snmp_info["lsi_megaraid"] = ( ".1.3.6.1.4.1.3582.4.1.4.1.2.1", ["19", "20", "24", "23"] )
42 | 


--------------------------------------------------------------------------------
/mod_status/README:
--------------------------------------------------------------------------------
1 | This script is designed to dump apache's mod_status output into a format which the
2 | generic_local check can work with:
3 | 
4 | https://github.com/hcooper/check_mk-plugins/tree/master/generic_local
5 | 


--------------------------------------------------------------------------------
/mod_status/mod_status-dump.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """ Fetch Apache stats via mod_status
  4 | By Hereward Cooper
  5 | (modified from Zabbix code by Paulson McIntyre)
  6 | 
  7 | This program is free software: you can redistribute it and/or modify
  8 | it under the terms of the GNU General Public License as published by
  9 | the Free Software Foundation, either version 3 of the License, or
 10 | (at your option) any later version.
 11 | 
 12 | This program is distributed in the hope that it will be useful,
 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 | GNU General Public License for more details.
 16 | 
 17 | You should have received a copy of the GNU General Public License
 18 | along with this program.  If not, see <http://www.gnu.org/licenses/>.
 19 | """
 20 | 
 21 | import urllib
 22 | from optparse import OptionParser
 23 | import os
 24 | from tempfile import mkstemp
 25 | import StringIO
 26 | import csv
 27 | 
 28 | 
 29 | def fetchURL(url, user = None, passwd = None):
 30 |     """ Return the data from a URL """
 31 |     if user and passwd:
 32 |         parts = url.split('://')
 33 |         url = parts[0] + "://" + user + ":" + passwd + "@" + parts[1]
 34 |     
 35 |     conn = urllib.urlopen(url)
 36 |     try:
 37 |         data = conn.read()
 38 |     finally:
 39 |         conn.close()
 40 |     return data
 41 | 
 42 | def clean(string, chars):
 43 |     for i in chars:
 44 |         string = string.replace(i, '')
 45 |     return string
 46 | 
 47 | def parse(data):
 48 |     """ Parse the CSV file into a dict of data
 49 |     """
 50 |     mapping = {
 51 |         "_":"Waiting for Connection",
 52 |         "S":"Starting up",
 53 |         "R":"Reading Request",
 54 |         "W":"Sending Reply",
 55 |         "K":"Keepalive (read)",
 56 |         "D":"DNS Lookup",
 57 |         "C":"Closing connection",
 58 |         "L":"Logging",
 59 |         "G":"Gracefully finishing",
 60 |         "I":"Idle cleanup of worker",
 61 |         ".":"Open slot with no current process",
 62 |         }
 63 |     # Clean out certian chars
 64 |     replace = '() '
 65 |     csvobj = csv.reader(StringIO.StringIO(data), delimiter = ":", skipinitialspace = True)
 66 |     ret = {}
 67 |     for (key, val) in csvobj:
 68 |         if key == 'Scoreboard':
 69 |             sb = {
 70 |                 "Waiting for Connection":0,
 71 |                 "Starting up":0,
 72 |                 "Reading Request":0,
 73 |                 "Sending Reply":0,
 74 |                 "Keepalive (read)":0,
 75 |                 "DNS Lookup":0,
 76 |                 "Closing connection":0,
 77 |                 "Logging":0,
 78 |                 "Gracefully finishing":0,
 79 |                 "Idle cleanup of worker":0,
 80 |                 "Open slot with no current process":0,
 81 |                 }
 82 |             for i in val:
 83 |                 sb[mapping[i]] += 1
 84 |             ret[key] = sb
 85 |         else:
 86 |             ret[key] = val
 87 |     ret2 = {}
 88 |     for (key, val) in ret.items():
 89 |         if key == "Scoreboard":
 90 |             for (key, val) in val.items():
 91 |                 ret2[clean(key, replace)] = val
 92 |         else:
 93 |             ret2[clean(key, replace)] = val
 94 |             
 95 |     return ret2
 96 | 
 97 | if __name__ == "__main__":
 98 |     parser = OptionParser(
 99 |                         usage = "%prog [-o <Apache hostname or IP>]",
100 |                         version = "%prog $Revision$",
101 |                         prog = "mod_status-dump",
102 |                         description = """This program gathers data from Apache's built-in status page
103 |                         and dumps them into a simple list.
104 |                         """,
105 |                         )
106 |     parser.add_option(
107 |                       "-l",
108 |                       "--url",
109 |                       action = "store",
110 |                       type = "string",
111 |                       dest = "url",
112 |                       default = None,
113 |                       help = "Override the automatically generated URL with one of your own",
114 |                       )
115 |     parser.add_option(
116 |                       "-o",
117 |                       "--host",
118 |                       action = "store",
119 |                       type = "string",
120 |                       dest = "host",
121 |                       default = "localhost",
122 |                       help = "Host to connect to. [default: %default]",
123 |                       )
124 |     parser.add_option(
125 |                       "-p",
126 |                       "--port",
127 |                       action = "store",
128 |                       type = "int",
129 |                       dest = "port",
130 |                       default = 80,
131 |                       help = "Port to connect on. [default: %default]",
132 |                       )
133 |     parser.add_option(
134 |                       "-r",
135 |                       "--proto",
136 |                       action = "store",
137 |                       type = "string",
138 |                       dest = "proto",
139 |                       default = "http",
140 |                       help = "Protocol to connect on. Can be http or https. [default: %default]",
141 |                       )
142 |     parser.add_option(
143 |                       "-u",
144 |                       "--user",
145 |                       action = "store",
146 |                       type = "string",
147 |                       dest = "user",
148 |                       default = None,
149 |                       help = "HTTP authentication user to use when connection. [default: None]",
150 |                       )
151 |     parser.add_option(
152 |                       "-a",
153 |                       "--passwd",
154 |                       action = "store",
155 |                       type = "string",
156 |                       dest = "passwd",
157 |                       default = None,
158 |                       help = "HTTP authentication password to use when connecting. [default: None]",
159 |                       )
160 |     (opts, args) = parser.parse_args()
161 | 
162 |     if opts.url and (opts.port != 80 or opts.proto != "http"):
163 |         parser.error("Can't specify -u with  -p or -r")
164 | 
165 |     if not opts.url:
166 |         opts.url = "%s://%s:%s/server-status?auto" % (opts.proto, opts.host, opts.port)
167 | 
168 |     data = fetchURL(opts.url, user = opts.user, passwd = opts.passwd)
169 | 
170 | 
171 |     try:
172 |         data = parse(data = data)
173 |     except csv.Error:
174 |         parser.error("Error parsing returned data")
175 | 
176 | 
177 |     try:
178 |         for key, val in data.items():
179 |             print "%s %s" % (key, val)
180 |     except:
181 |         parser.error("Error printing values")


--------------------------------------------------------------------------------
/mysql_rep/mysql_rep.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # A mysql replication plugin for the check_mk nagios system.
 4 | # Place me in /usr/lib/check_mk_agent/local on the client
 5 | # Hereward Cooper <coops@fawk.eu> - 16/06/11
 6 | 
 7 | MYSQL_USER="user"
 8 | MYSQL_PASS="Pa$$word"
 9 | 
10 | # Anything below DELAY_WARNING is fine. Anything between
11 | # DELAY_WARNING and DELAY_CRITICAL is throw up a warning. Anything
12 | # above DELAY_CRITICAL will trigger a critical alert.
13 | DELAY_WARNING=60
14 | DELAY_CRITICAL=360
15 | 
16 | # Graph details
17 | MIN=0
18 | MAX=400
19 | 
20 | 
21 | MYSQL_STATUS=`mysql -u$MYSQL_USER -p$MYSQL_PASS -e "SHOW SLAVE STATUS\G" | egrep 'Slave_.*_Running|Seconds_Behind_Master' | sed 's/^ *//'`
22 | 
23 | echo "$MYSQL_STATUS" | sed -n '1p' | grep -q Yes && IO=1 || IO=0
24 | echo "$MYSQL_STATUS" | sed -n '2p' | grep -q Yes && SQL=1 || SQL=0
25 | DELAY=`echo "$MYSQL_STATUS" | sed -n '3p' | cut -d " " -f 2`
26 | 
27 | ## Check Slave_IO_Running status
28 | if [ $IO = "1" ]; then
29 |         echo "0 MySQL_Rep_IO - OK - Replication IO Running"
30 | else
31 |         echo "2 MySQL_Rep_IO - CRITICAL - Repication IO Stopped"
32 | fi
33 | 
34 | ## Check Slave_SQL_Running status
35 | if [ $SQL = "1" ]; then
36 |         echo "0 MySQL_Rep_SQL - OK - Replication SQL Running"
37 | else
38 |         echo "2 MySQL_Rep_SQL - CRITICAL - Replication SQL Stopped"
39 | fi
40 | 
41 | ## Check Seconds_Behind_Master value
42 | if [ $DELAY = "NULL" ]; then
43 |         echo "2 MySQL_Rep_Delay delay=$DELAY;$DELAY_WARNING;$DELAY_CRITICAL;$MIN;$MAX CRITICAL - Replication delay NULL"
44 | elif [ $DELAY -lt $DELAY_WARNING ]; then
45 |         echo "0 MySQL_Rep_Delay delay=$DELAY;$DELAY_WARNING;$DELAY_CRITICAL;$MIN;$MAX OK - Replication delay $DELAY seconds"
46 | elif [ $DELAY -lt $DELAY_CRITICAL ]; then
47 |         echo "1 MySQL_Rep_Delay delay=$DELAY;$DELAY_WARNING;$DELAY_CRITICAL;$MIN;$MAX WARNING - Replication delay $DELAY seconds"
48 | elif [ $DELAY -ge $DELAY_CRITICAL ]; then
49 |         echo "2 MySQL_Rep_Delay delay=$DELAY;$DELAY_WARNING;$DELAY_CRITICAL;$MIN;$MAX CRITICAL - Replication delay $DELAY seconds"
50 | fi
51 | 


--------------------------------------------------------------------------------
/sonicwall/sonicwall_cpu.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Monitoring the CPU usage of a Sonicwall
 4 | # Herward Cooper <coops@fawk.eu> - 2012
 5 | 
 6 | # Uses OID .1.3.6.1.4.1.8741.1.3.1.3.0
 7 | 
 8 | sonicwall_cpu_default_values = (35, 40)
 9 | 
10 | def inventory_sonicwall_cpu(checkname, info):
11 |     inventory=[]
12 |     inventory.append( (None, None, "sonicwall_cpu_default_values") )
13 |     return inventory
14 | 
15 | 
16 | def check_sonicwall_cpu(item, params, info):
17 |     warn, crit = params
18 |     state = int(info[0][0])
19 |     perfdata = [ ( "cpu", state, warn, crit ) ]
20 |     if state > crit:
21 |         return (2, "CRITICAL - CPU is %s percent" % state, perfdata)
22 |     elif state > warn:
23 |         return (1, "WARNING - CPU is %s percent" % state, perfdata)
24 |     else:
25 |         return (0, "OK - CPU is %s percent" % state, perfdata)
26 | 
27 | check_info["sonicwall_cpu"] = (check_sonicwall_cpu, "Sonicwall CPU", 1, inventory_sonicwall_cpu)
28 | 
29 | snmp_info["sonicwall_cpu"] = ( ".1.3.6.1.4.1.8741.1.3.1.3", [ "0" ] )
30 | 


--------------------------------------------------------------------------------
/sonicwall/sonicwall_mem.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Monitoring the Mem usage of a Sonicwall
 4 | # Herward Cooper <coops@fawk.eu> - 2012
 5 | 
 6 | # Uses OID 1.3.6.1.4.1.8741.1.3.1.4.0
 7 | 
 8 | sonicwall_mem_default_values = (35, 40)
 9 | 
10 | def inventory_sonicwall_mem(checkname, info):
11 |     inventory=[]
12 |     inventory.append( (None, None, "sonicwall_mem_default_values") )
13 |     return inventory
14 | 
15 | 
16 | def check_sonicwall_mem(item, params, info):
17 |     warn, crit = params
18 |     state = int(info[0][0])
19 |     perfdata = [ ( "cpu", state, warn, crit ) ]
20 |     if state > crit:
21 |         return (2, "CRITICAL - Mem is %s percent" % state, perfdata)
22 |     elif state > warn:
23 |         return (1, "WARNING - Mem is %s percent" % state, perfdata)
24 |     else:
25 |         return (0, "OK - Mem is %s percent" % state, perfdata)
26 | 
27 | check_info["sonicwall_mem"] = (check_sonicwall_mem, "Sonicwall Mem", 1, inventory_sonicwall_mem)
28 | 
29 | snmp_info["sonicwall_mem"] = ( ".1.3.6.1.4.1.8741.1.3.1.4", [ "0" ] )
30 | 


--------------------------------------------------------------------------------
/sun_hardware/sun_hw_win.bat:
--------------------------------------------------------------------------------
 1 | GOTO EndComment
 2 | This script was written to get the status of Sun Hardware via IPMI in Window Server 2003.
 3 | The output is returned in a format which check_mk can understand. It's a bit of a hack
 4 | but it works!
 5 | Hereward Cooper <coops@fawk.eu> - Sometime 2011
 6 | :EndComment
 7 | @echo off
 8 | set COUNT=
 9 | 
10 | :: Retrieve every "ON" alarm from sunoem
11 | C:\ipmiutil.exe sunoem sbled get 2> nul | find /C "ON" > C:\nrpe_nt\sun.txt
12 | 
13 | :: Count the alarms, and if there are more than 2 print an alert, otherwise print an OK.
14 | :: (this vaule of 2 will depend on specific IPMI/Sun configuration, but on the server here
15 | :: there are two 'alarms' which are set to ON when in a fault-free state).
16 | for /F %%A in (C:\nrpe_nt\sun.txt) do set COUNT=%%A
17 | IF %COUNT% EQU 2 (echo 0 Sun_Hardware - OK: 0 Alarms Detected) ELSE (echo 2 Sun_Hardware - CRITICAL: Alarms Detected)
18 | 


--------------------------------------------------------------------------------
/tcp_check/tcp_check.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # check_mk script to check TCP connections to a remote host.
 3 | # first written to simply check transit down a leased line
 4 | # Herward Cooper <coops@fawk.eu> - 2011
 5 | 
 6 | from socket import *
 7 | 
 8 | # Out list of checks, in the form: ['ip', port]
 9 | checks = [
10 |         ['11.22.33.44', 8443],
11 |         ['12.13.14.15', 6000],
12 |         ['127.0.0.1', 80],
13 | ]
14 | 
15 | # What to prefix all the check names with
16 | prefix = "TCP_Check_"
17 | 
18 | # How long to wait for each check?
19 | timeout = 5
20 | 
21 | setdefaulttimeout(timeout)
22 | 
23 | for ip,port in checks:
24 | 
25 |     # Define a new socket
26 |     s = socket(AF_INET, SOCK_STREAM)
27 | 
28 |     # Form the check name
29 |     chkname = prefix + ip + ":" + str(port)
30 | 
31 |     # Open the connection
32 |     result = s.connect_ex((ip,port))
33 | 
34 |     # Report the result
35 |     if ( result == 0 ):
36 |         print "0 %s - OK - %s:%d Reachable" % (chkname,ip,port)
37 |     else:
38 |         print "2 %s - CRITICAL - %s:%d Unreachable" % (chkname,ip,port)
39 | 
40 |     # Close the connection
41 |     s.close()
42 | 


--------------------------------------------------------------------------------
/xen_license/README.md:
--------------------------------------------------------------------------------
 1 | XenServer License Checker
 2 | =========================
 3 | 
 4 | A port of Nick Anderson's NRPE plugin for checking
 5 | the validity of XenServer licenses, which works with
 6 | check_mk.
 7 | 
 8 | Original code:
 9 | https://github.com/nickanderson/nagios-plugins-check_xs-license
10 | 


--------------------------------------------------------------------------------
/xen_license/check_xen_license.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Copyright (c) 2010, Nick Anderson <nick@cmdln.org>
  3 | # All rights reserved.
  4 |  
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are met:
  7 | # * Redistributions of source code must retain the above copyright
  8 | # notice, this list of conditions and the following disclaimer.
  9 | # * Redistributions in binary form must reproduce the above copyright
 10 | # notice, this list of conditions and the following disclaimer in the
 11 | # documentation and/or other materials provided with the distribution.
 12 | #
 13 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 14 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
 15 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 16 | # ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
 17 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 18 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 19 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 20 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 21 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 22 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 23 | 
 24 | import sys
 25 | import time
 26 | import getpass
 27 | import optparse
 28 | 
 29 | from datetime import date
 30 | from string import Template
 31 | 
 32 | import XenAPI
 33 | 
 34 | __version__ = "1.2"
 35 | __author__ = "Nick Anderson <nick@cmdln.org>"
 36 | __website__ = "http://github.com/nickanderson/check_citrix_xenserver_license"
 37 | 
 38 | def main(session, options):
 39 |     """
 40 |     Get number of days until license expires 
 41 |     Return 0 if license not expired, 2 if expired
 42 |     """
 43 |     hosts = session.xenapi.host.get_all()
 44 |     host = hosts[0]
 45 |     record = session.xenapi.host.get_record(host)
 46 |     # Get the date part of the string
 47 |     expires = time.strptime(record["license_params"]["expiry"][0:8], 
 48 | 		    "%Y%m%d")[0:3]
 49 |     # create date object for finding difference in days
 50 |     expires_on = date(int(expires[0]), int(expires[1]), int(expires[2]))
 51 |     expire_days = (expires_on - date.today()).days
 52 |     display = Template('$status')
 53 | 
 54 |     if int(expire_days) > int(options.warning_days):
 55 |         print display.substitute(status='0 XenServer_License - OK Expiring in %s days'%expire_days) 
 56 |         session.xenapi.session.logout()
 57 |         sys.exit(0)
 58 | 
 59 |     # If number of days until expire is less than warning
 60 |     elif int(expire_days) <= int(options.warning_days):
 61 |         if int(expire_days) <= int(options.critical_days):
 62 |             print display.substitute(status='2 XenServer_License - CRITICAL Expiring in %s days'%expire_days) 
 63 |             session.xenapi.session.logout()
 64 |             sys.exit(2)
 65 |         else:
 66 |             print display.substitute(status='1 XenServer_License - WARNING Expiring in %s days'%expire_days) 
 67 |             session.xenapi.session.logout()
 68 |             sys.exit(1)
 69 |     else:
 70 |         print display.substitute(status='3 XenServer_License - UNKOWN')
 71 |         session.xenapi.session.logout()
 72 |         sys.exit(3)
 73 | 
 74 | if __name__ == "__main__":
 75 |     # define options
 76 |     op = optparse.OptionParser("usage: %prog [options]", 
 77 |             version = "%%prog v%s\nAuthor: %s\nWebsite: %s" % 
 78 |             (__version__, __author__, __website__))
 79 | 
 80 |     og_sess = optparse.OptionGroup(op, "Session Options")
 81 |     og_sess.add_option('--server',
 82 |             dest='server',
 83 |             help="xenserver host (default: %default)")
 84 |     og_sess.add_option('--username',
 85 |             dest='username',
 86 |             help="xenserver username (defaut: %default)")
 87 |     og_sess.add_option('--password',
 88 |             dest="password",
 89 |             help="xenserver password")
 90 |     op.add_option_group(og_sess)
 91 | 
 92 |     og_nag = optparse.OptionGroup(op, "Nagios Options")
 93 |     og_nag.add_option('-c',
 94 |             dest='critical_days',
 95 |             help="critical threshold for days (default: %default)")
 96 |     og_nag.add_option('-w',
 97 |             dest='warning_days',
 98 |             help="warn threshold for days (default: %default)")
 99 |     op.add_option_group(og_nag)
100 | 
101 |     op.set_defaults(server='localhost',
102 |             username = 'root',
103 |             password = '',
104 |             warning_days = 30,
105 |             critical_days = 10)
106 | 
107 |     # parse and validate
108 |     (options, args) = op.parse_args()
109 |     if options.password == '':
110 |         options.password = getpass.getpass("password: ")
111 | 
112 |     # First acquire a valid session by logging in:
113 |     session = XenAPI.Session("https://"+options.server)
114 |     try:
115 |         session.xenapi.login_with_password(options.username, options.password)
116 |     except XenAPI.Failure, e:
117 |         if e.details[0]=='HOST_IS_SLAVE':
118 |             session=XenAPI.Session('https://'+e.details[1])
119 |             session.login_with_password(options.username,options.password)
120 |     except:
121 |         print '3 XenServer_License - UNKOWN, can\'t connect'
122 |         sys.exit(3)
123 | 
124 |     main(session, options)
125 | 


--------------------------------------------------------------------------------