├── linux-diskstats-parser ├── cacti │ ├── README.markdown │ ├── cacti_graph_template_io_device_busy.xml │ ├── cacti_graph_template_io_device_volume.xml │ ├── cacti_graph_template_io_device_operations.xml │ └── cacti_data_template_nrpe_disk_stats.xml ├── README.markdown └── diskstatsparse.rb ├── puppet ├── COPYING ├── check_puppetdb_nodes.rb └── check_puppet.rb ├── cacti-lighttpd-fastcgi-statistics ├── lighttpd_fastcgi_statistics.xml ├── query_lighttpd_statistics.rb └── sample.txt ├── cacti-tomcat-statistics └── query_tomcat_statistics.rb ├── nagios-checks ├── check_lighttpd_load.rb ├── check_dir.rb └── check_cert.rb └── activemq ├── activemq-cacti-plugin.rb ├── check_activemq.rb └── check_activemq_queue.rb /linux-diskstats-parser/cacti/README.markdown: -------------------------------------------------------------------------------- 1 | Cacti templates to query disk stats and create 3 graphs for each disk. 2 | 3 | When importing you will need to adjust the path to your nrpe binary 4 | in the data input method 5 | -------------------------------------------------------------------------------- /puppet/COPYING: -------------------------------------------------------------------------------- 1 | Copyright 2009 R.I.Pienaar 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | 15 | -------------------------------------------------------------------------------- /linux-diskstats-parser/README.markdown: -------------------------------------------------------------------------------- 1 | Synopsis 2 | ======== 3 | Simple script to parse /proc/diskstats and pull out stats for a certain 4 | partition 5 | 6 | 7 | Installation 8 | ------------ 9 | This script is intended to run from SNMP using the exec directive, 10 | sample config: 11 | 12 | exec .1.3.6.1.4.1.xxxxxx.1 sdaStats /usr/local/bin/diskstatsparse.rb --device sda 13 | 14 | Replace the xxx above with your registered OID number. 15 | 16 | You can also use this from NRPE or something similar to feed data to Cacti that way: 17 | 18 | command[cacti_sdb_stats]=/usr/local/bin/diskstatsparse.rb --device sdb --mode cacti 19 | 20 | In this mode it will output a series of named fields in Cacti standard format. 21 | 22 | Usage 23 | ----- 24 | diskstatsparse.rb --device DEVICE 25 | 26 | --device DEVICE The device to retrieve stats for, example "sda" 27 | 28 | --mode MODE The output mode to use, snmp or cacti 29 | 30 | --help Shows this help page 31 | 32 | 33 | Author 34 | ------ 35 | R.I.Pienaar 36 | 37 | -------------------------------------------------------------------------------- /cacti-lighttpd-fastcgi-statistics/lighttpd_fastcgi_statistics.xml: -------------------------------------------------------------------------------- 1 | 2 | Get lighttpd FastCGI stats 3 | /usr/local/bin/query_lighttpd_statistics.rb 4 | |host_hostname| 5 | index 6 | query 7 | get 8 | : 9 | 10 | 11 | 12 | cgi 13 | input 14 | cgi 15 | 16 | 17 | backendname 18 | input 19 | cgi 20 | 21 | 22 | died 23 | output 24 | died 25 | 26 | 27 | disabled 28 | output 29 | disabled 30 | 31 | 32 | load 33 | output 34 | load 35 | 36 | 37 | overloaded 38 | output 39 | overloaded 40 | 41 | 42 | processes 43 | output 44 | processes 45 | 46 | 47 | connected 48 | output 49 | connected 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /cacti-tomcat-statistics/query_tomcat_statistics.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | # Simple script to fetch memory and connector stats from tomcat manager 4 | 5 | require 'net/http' 6 | require 'rexml/document' 7 | require 'optparse' 8 | require 'uri' 9 | require 'pp' 10 | 11 | @options = {:user => nil, 12 | :password => nil, 13 | :url => "http://localhost/manager/status/", 14 | :connector => "http-8080"} 15 | 16 | opt = OptionParser.new 17 | 18 | opt.on("--user [USER]", "-u", "Connect as user") do |val| 19 | @options[:user] = val 20 | end 21 | 22 | opt.on("--password [PASSWORD]", "-p", "Passwod to connect with") do |val| 23 | @options[:password] = val 24 | end 25 | 26 | opt.on("--url [URL]", "-U", "Tomcat manager stats url") do |val| 27 | @options[:url] = val 28 | end 29 | 30 | opt.on("--connector [CONNECTOR]", "Connector to monitor") do |val| 31 | @options[:connector] = val 32 | end 33 | 34 | opt.parse! 35 | 36 | def get_url(address, user=nil, password=nil) 37 | url = URI.parse(address) 38 | req = Net::HTTP::Get.new(url.path + "?XML=true") 39 | req.basic_auth user, password if user && password 40 | 41 | res = Net::HTTP.start(url.host, url.port) {|http| 42 | http.request(req) 43 | } 44 | return res.body 45 | end 46 | 47 | xml_data = get_url(@options[:url], @options[:user], @options[:password]) 48 | 49 | doc = REXML::Document.new(xml_data) 50 | 51 | output = [] 52 | 53 | doc.root.elements["jvm"].elements["memory"].attributes.each_pair do |attribute, value| 54 | output << "memory_#{attribute}:#{value}" 55 | end 56 | 57 | doc.root.elements["connector[@name='#{@options[:connector]}']"].elements["threadInfo"].attributes.each_pair do |attribute, value| 58 | output << "#{attribute}:#{value}" 59 | end 60 | 61 | puts output.join(" ") 62 | -------------------------------------------------------------------------------- /nagios-checks/check_lighttpd_load.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | # Parses the Lighttpd server status BusyServers value and reports critical 4 | # or warning 5 | # 6 | # To set up lighttpd for this, add something like: 7 | # 8 | # $HTTP["remoteip"] =~ "^(10|127)" { 9 | # status.status-url = "/server-status" 10 | # } 11 | # 12 | # R.I.Pienaar Apache version 2 license 13 | 14 | require 'net/http' 15 | require 'optparse' 16 | require 'yaml' 17 | require 'pp' 18 | 19 | critical = warn = 0 20 | host = "localhost" 21 | statsurl = "/server-status" 22 | 23 | opt = OptionParser.new 24 | 25 | opt.on("--critical [CRIT]", "-c", Integer, "Critical load") do |f| 26 | critical = f.to_i 27 | end 28 | 29 | opt.on("--warn [WARN]", "-w", Integer, "Warning load") do |f| 30 | warn = f.to_i 31 | end 32 | 33 | opt.on("--url [URL]", "-u", "Status URL") do |f| 34 | statsurl = f 35 | end 36 | 37 | opt.on("--host [HOST]", "-h", "Host to check") do |f| 38 | host = f 39 | end 40 | 41 | opt.parse! 42 | 43 | # Retrieves a url from a remote host 44 | def get(url) 45 | uri = URI.parse(url) 46 | http = Net::HTTP.new(uri.host, uri.port) 47 | 48 | response = http.get("#{uri.path}?auto") 49 | 50 | unless response.code == "200" 51 | puts "Failed to retrieve #{url}: #{response.code}" 52 | exit 3 53 | end 54 | 55 | response.body 56 | end 57 | 58 | stats = YAML.load(get("http://#{host}/#{statsurl}")) 59 | 60 | if stats.include?("BusyServers") 61 | if stats["BusyServers"] >= critical 62 | puts "CRITICAL: #{stats['BusyServers']} >= #{critical} lighttpd busy servers" 63 | exit 2 64 | elsif stats["BusyServers"] >= warn 65 | puts "WARNING: #{stats['BusyServers']} >= #{warn} lighttpd busy servers" 66 | exit 1 67 | else 68 | puts "OK: #{stats['BusyServers']} lighttpd busy servers" 69 | exit 0 70 | end 71 | else 72 | puts "Could not parse lighttpd statistics" 73 | exit 3 74 | end 75 | -------------------------------------------------------------------------------- /nagios-checks/check_dir.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | # == Synopsis 3 | # Simple nagios plugin to count files in a directory and optionally 4 | # match names to a regex 5 | # 6 | # == Usage 7 | # check_dir --dir DIRNAME --warn WARN --crit CRIT [--regex REGEX] 8 | # 9 | # --dir DIRNAME 10 | # The directory to check 11 | # 12 | # --warn WARN 13 | # Number of files to raise a warning for 14 | # 15 | # --crit CRIT 16 | # Number of files to raise a critical for 17 | # 18 | # --regex REGEX 19 | # Regular expression to match found files again 20 | # 21 | # --help 22 | # Show this help 23 | # 24 | # == Author 25 | # R.I.Pienaar 26 | 27 | 28 | require 'getoptlong' 29 | require 'find' 30 | 31 | opts = GetoptLong.new( 32 | [ '--directory', '-d', GetoptLong::REQUIRED_ARGUMENT], 33 | [ '--pattern', '-p', GetoptLong::REQUIRED_ARGUMENT], 34 | [ '--regex', '-r', GetoptLong::REQUIRED_ARGUMENT], 35 | [ '--warn', '-w', GetoptLong::REQUIRED_ARGUMENT], 36 | [ '--crit', '-c', GetoptLong::REQUIRED_ARGUMENT], 37 | [ '--help', '-h', GetoptLong::NO_ARGUMENT] 38 | ) 39 | 40 | def showhelp 41 | begin 42 | require 'rdoc/ri/ri_paths' 43 | require 'rdoc/usage' 44 | RDoc::usage 45 | rescue LoadError => e 46 | puts("Install RDoc::usage or view the comments in the top of the script to get detailed help.") 47 | end 48 | end 49 | 50 | dirname = "" 51 | regex = "" 52 | warn = "" 53 | crit = "" 54 | 55 | opts.each { |opt, arg| 56 | case opt 57 | when '--help' 58 | showhelp 59 | exit 60 | when '--directory' 61 | dirname = arg 62 | when '--regex' 63 | regex = arg 64 | when '--warn' 65 | warn = arg.to_i 66 | when '--crit' 67 | crit = arg.to_i 68 | end 69 | } 70 | 71 | if dirname == "" || warn == "" || crit == "" 72 | showhelp 73 | exit 74 | end 75 | 76 | if warn > crit 77 | puts("UNKNOWN: Parameters does not make sense, warn >= crit") 78 | exit(3) 79 | end 80 | 81 | fcount = 0 82 | 83 | if FileTest.directory?(dirname) 84 | Dir.entries(dirname).each do |path| 85 | next if path =~ /^(\.|\.\.)$/ 86 | 87 | if regex == "" 88 | fcount = fcount + 1 89 | else 90 | if File.basename(path) =~ /#{regex}/ 91 | fcount = fcount + 1 92 | end 93 | end 94 | end 95 | else 96 | puts("UNKNOWN: #{dirname} does not exist or is not a directory") 97 | exit(3) 98 | end 99 | 100 | if fcount >= crit 101 | puts("CRITICAL: #{fcount} files found in #{dirname} expected <= #{crit}") 102 | exit(2) 103 | elsif fcount >= warn 104 | puts("WARNING: #{fcount} files found in #{dirname} expected <= #{warn}") 105 | exit(1) 106 | else 107 | puts("OK: #{fcount} files found in #{dirname}") 108 | exit(0) 109 | end 110 | -------------------------------------------------------------------------------- /linux-diskstats-parser/diskstatsparse.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | # == Synopsis 3 | # Simple script to parse /proc/diskstats and pull out stats for a certain partition 4 | # 5 | # == Installation 6 | # This script is intended to run from SNMP using the exec directive, sample config: 7 | # 8 | # exec .1.3.6.1.4.1.xxxxxx.1 sdaStats /usr/local/bin/diskstatsparse.rb --device sda 9 | # 10 | # Replace the xxx above with your registered OID number. 11 | # 12 | # You can also use this from NRPE or something similar to feed data to Cacti that way: 13 | # 14 | # command[cacti_sdb_stats]=/usr/local/bin/diskstatsparse.rb --device sdb --mode cacti 15 | # 16 | # In this mode it will output a series of named fields in Cacti standard format. 17 | # 18 | # == Usage 19 | # diskstatsparse.rb --device DEVICE 20 | # 21 | # --device DEVICE 22 | # The device to retrieve stats for, example "sda" 23 | # 24 | # --mode MODE 25 | # The mode to operate in, either snmp or cacti. snmp is default 26 | # 27 | # --help 28 | # Shows this help page 29 | # 30 | # == Author 31 | # R.I.Pienaar 32 | 33 | require 'getoptlong' 34 | 35 | opts = GetoptLong.new( 36 | [ '--device', '-d', GetoptLong::REQUIRED_ARGUMENT], 37 | [ '--mode', '-m', GetoptLong::REQUIRED_ARGUMENT], 38 | [ '--help', '-h', GetoptLong::NO_ARGUMENT] 39 | ) 40 | 41 | def showhelp 42 | begin 43 | require 'rdoc/ri/ri_paths' 44 | require 'rdoc/usage' 45 | RDoc::usage 46 | rescue LoadError => e 47 | puts("Install RDoc::usage or view the comments in the top of the script to get detailed help.") 48 | end 49 | end 50 | 51 | device = "" 52 | mode = "snmp" 53 | 54 | opts.each do |opt, arg| 55 | case opt 56 | when '--help' 57 | showhelp 58 | exit 59 | when '--device' 60 | device = arg 61 | when '--mode' 62 | mode = arg 63 | end 64 | end 65 | 66 | if device == "" 67 | showhelp 68 | exit 69 | end 70 | 71 | begin 72 | line = File.open("/proc/diskstats", 'r').select do |l| 73 | l =~ /\s#{device}\s/ 74 | end 75 | 76 | if (line.size > 0) 77 | if mode == "snmp" 78 | puts(line[0].split) 79 | elsif mode == "cacti" 80 | stats = ["reads", "merged_reads", "sectors_read", "read_time", "writes", "writes_merged", "sectors_written", "write_time", "io_in_progress", "io_time", "weighted_io_time"] 81 | result = [] 82 | 83 | line[0].split[3,13].each_with_index do |item, idx| 84 | result << "#{stats[idx]}:#{item}" 85 | end 86 | 87 | puts result.join(" ") 88 | else 89 | puts "Unknown mode #{mode} should be 'snmp' or 'cacti'" 90 | end 91 | else 92 | raise("Could not find stats for device #{device}") 93 | end 94 | rescue Exception => e 95 | puts("Failed to parse /proc/diskstats: #{e}") 96 | exit(2) 97 | end 98 | -------------------------------------------------------------------------------- /cacti-lighttpd-fastcgi-statistics/query_lighttpd_statistics.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | # Where on the lighttpd server to find the server statistics 4 | COUNTER_PATH="/server-counters" 5 | 6 | require 'net/http' 7 | require 'optparse' 8 | require 'pp' 9 | 10 | # Gets the stat file and prints out the unique CGIs found in it 11 | def index(host) 12 | backends = {} 13 | 14 | get(host) do |stat| 15 | backends[stat[:backend]] = 1 16 | end 17 | 18 | puts backends.keys.sort.join("\n") 19 | end 20 | 21 | # Returns a specific stat for all CGI or just one if backend is given 22 | def query(host, field, backend=nil) 23 | if backend.nil? 24 | parse(host).each_pair do |backend, stats| 25 | puts "#{backend}:#{stats[field]}" 26 | end 27 | else 28 | puts parse(host)[backend][field] 29 | end 30 | end 31 | 32 | # Retrieves the stat and builds a hash of hashes representing it 33 | def parse(host) 34 | backends = {} 35 | 36 | new_backend = {:cgi => "", :connected => 0, :died => 0, :disabled => 0, 37 | :load => 0, :overloaded => 0, :processes => 0} 38 | 39 | get(host) do |stat| 40 | backend = stat[:backend] 41 | 42 | unless backends.include?(backend) 43 | backends[backend] = new_backend.clone 44 | backends[backend][:cgi] = backend 45 | end 46 | 47 | backends[backend][stat[:stat]] = backends[backend].fetch(stat[:stat], 0) + stat[:value] 48 | end 49 | 50 | backends 51 | end 52 | 53 | # Parses a backend line returning a hash of its bits 54 | def parse_line(line) 55 | ret = {} 56 | 57 | if line =~ /^fastcgi\.backend\.(.+)\.(\d+)\.(connected|died|disabled|load|overloaded): (\d+)$/ 58 | ret[:backend] = $1 59 | ret[:instance_number] = $2.to_i 60 | ret[:stat] = $3.to_sym 61 | ret[:value] = $4.to_i 62 | else 63 | raise "Unparsable line: #{line}" 64 | end 65 | 66 | ret 67 | end 68 | 69 | # Retrieves a url from a remote host 70 | def get(host) 71 | url = "http://#{host}#{COUNTER_PATH}" 72 | uri = URI.parse(url) 73 | http = Net::HTTP.new(uri.host, uri.port) 74 | 75 | response = http.get(uri.path) 76 | 77 | unless response.code == "200" 78 | puts "Failed to retrieve #{url}: #{response.code}" 79 | exit 1 80 | end 81 | 82 | unless block_given? 83 | response.body.split(/\n/) 84 | else 85 | response.body.split(/\n/).each do |line| 86 | begin 87 | yield(parse_line(line)) 88 | rescue Exception => e 89 | end 90 | end 91 | end 92 | end 93 | 94 | host = nil 95 | command = nil 96 | field = nil 97 | backend = nil 98 | 99 | if ARGV.size > 1 100 | host = ARGV[0] 101 | command = ARGV[1].to_sym 102 | 103 | field = ARGV[2].to_sym if ARGV.size > 2 104 | backend = ARGV[3] if ARGV.size > 3 105 | end 106 | 107 | unless host && command 108 | puts "Please specify a host and command" 109 | exit 1 110 | end 111 | 112 | case command 113 | when :index 114 | index(host) 115 | 116 | when :query 117 | query(host, field) 118 | 119 | when :get 120 | query(host, field, backend) 121 | 122 | else 123 | puts "Unknown command: #{command}" 124 | exit 1 125 | end 126 | -------------------------------------------------------------------------------- /activemq/activemq-cacti-plugin.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | # A plugin for cacti that connects to an ActiveMQ instance with the 4 | # ActiveMQ Statistics Plugin[1] enabled to fetch stats about the broker 5 | # and about queues 6 | # 7 | # Report stats for the queue foo.bar 8 | # 9 | # activemq-cacti-plugin.rb --report foo.bar 10 | # 11 | # Report stats for the broker 12 | # 13 | # activemq-cacti-plugin.rb --report broker 14 | # 15 | # See --help for full arguments like setting credentials and which 16 | # broker to connect to 17 | # 18 | # Multiple brokers can be specified with the --host argument, in 19 | # that case the user/pass/port on them all should be the same it 20 | # will then attempt to connect to them all till a connection is made 21 | # this is for active/passive clusters 22 | # 23 | # R.I.Pienaar 24 | # Apache 2.0 Licence 25 | # 26 | # [1] http://activemq.apache.org/statisticsplugin.html 27 | require 'rexml/document' 28 | require 'rubygems' 29 | require 'optparse' 30 | require 'timeout' 31 | require 'stomp' 32 | require 'pp' 33 | 34 | include REXML 35 | 36 | @options = {:user => "nagios", 37 | :password => nil, 38 | :host => nil, 39 | :port => 6163, 40 | :mode => :broker} 41 | 42 | opt = OptionParser.new 43 | 44 | opt.on("--user USER", "Connect as user") do |f| 45 | @options[:user] = f 46 | end 47 | 48 | opt.on("--password PASSWORD", "Connection password") do |f| 49 | @options[:password] = f 50 | end 51 | 52 | opt.on("--host HOST", "Host to connect to") do |f| 53 | if @options[:host] 54 | @options[:host] << f 55 | else 56 | @options[:host] = [f] 57 | end 58 | end 59 | 60 | opt.on("--port PORT", Integer, "Port to connect to") do |f| 61 | @options[:port] = f 62 | end 63 | 64 | opt.on("--report [broker|queue.name]", "What to report broker or queue name") do |f| 65 | case f 66 | when "broker" 67 | @options[:mode] = :broker 68 | else 69 | @options[:mode] = f 70 | end 71 | end 72 | 73 | opt.parse! 74 | 75 | if @options[:host].nil? 76 | puts "CRITICAL: No host to monitor supplied" 77 | exit 2 78 | end 79 | 80 | def amqxmldecode(amqmap) 81 | map = Hash.new 82 | 83 | Document.new(amqmap).root.each_element do |element| 84 | value = name = nil 85 | 86 | element.each_element_with_text do |e,t| 87 | name = e.text unless name 88 | 89 | if name 90 | case e.name 91 | when "string" 92 | map[name] = e.text 93 | 94 | when /int|long/ 95 | map[name] = e.text.to_i 96 | 97 | when "double" 98 | map[name] = e.text.to_f 99 | 100 | else 101 | raise("Unknown data type #{e.name}") 102 | end 103 | end 104 | end 105 | end 106 | 107 | map 108 | end 109 | 110 | # dont spew any stuff to stderr 111 | class EventLogger 112 | def on_miscerr(params=nil); end 113 | def on_connectfail(params=nil); end 114 | end 115 | 116 | begin 117 | Timeout::timeout(2) do 118 | hostname = `hostname`.chomp 119 | 120 | connection = {:hosts => [], :logger => EventLogger.new} 121 | 122 | @options[:host].each do |host| 123 | connection[:hosts] << {:host => host, :port => @options[:port], :login => @options[:user], :passcode => @options[:password]} 124 | end 125 | 126 | conn = Stomp::Connection.open(connection) 127 | 128 | conn.subscribe("/topic/nagios.statresults.#{hostname}", { "transformation" => "jms-map-xml"}) 129 | 130 | if @options[:mode] == :broker 131 | conn.publish("/queue/ActiveMQ.Statistics.Broker", "", {"reply-to" => "/topic/nagios.statresults.#{hostname}"}) 132 | else 133 | conn.publish("/queue/ActiveMQ.Statistics.Destination.#{@options[:mode]}", "", {"reply-to" => "/topic/nagios.statresults.#{hostname}"}) 134 | end 135 | 136 | s = conn.receive.body 137 | conn.disconnect 138 | 139 | map = amqxmldecode(s) 140 | 141 | map.each_pair do |k, v| 142 | next if k.match(/\+/) 143 | 144 | print("#{k}:#{v} ") 145 | end 146 | 147 | puts 148 | end 149 | rescue Exception => e 150 | puts("Failed to get stats: #{e}") 151 | end 152 | 153 | -------------------------------------------------------------------------------- /nagios-checks/check_cert.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | # == Synopsis 3 | # Simple nagios plugin to check expiry times for certificates 4 | # and CRLs 5 | # 6 | # == Usage 7 | # Check a certificate: 8 | # check_cert --cert /path/to/cert --warn WARN --crit CRIT 9 | # 10 | # Check a crl: 11 | # check_cert --crl /path/to/crl --warn WARN --crit CRIT 12 | # 13 | # --warn WARN 14 | # Seconds before expiry to raise a warning 15 | # 16 | # --crit CRIT 17 | # Seconds before expiry to raise a critical 18 | # 19 | # --help 20 | # Show this help 21 | # 22 | # == Author 23 | # R.I.Pienaar 24 | 25 | 26 | require 'getoptlong' 27 | require 'date' 28 | 29 | opts = GetoptLong.new( 30 | [ '--cert', GetoptLong::REQUIRED_ARGUMENT], 31 | [ '--crl', GetoptLong::REQUIRED_ARGUMENT], 32 | [ '--warn', '-w', GetoptLong::REQUIRED_ARGUMENT], 33 | [ '--crit', '-c', GetoptLong::REQUIRED_ARGUMENT], 34 | [ '--help', '-h', GetoptLong::NO_ARGUMENT] 35 | ) 36 | 37 | def showhelp 38 | begin 39 | require 'rdoc/ri/ri_paths' 40 | require 'rdoc/usage' 41 | RDoc::usage 42 | rescue LoadError => e 43 | puts("Install RDoc::usage or view the comments in the top of the script to get detailed help.") 44 | end 45 | end 46 | 47 | cert = "" 48 | crl = "" 49 | warn = "" 50 | crit = "" 51 | 52 | opts.each { |opt, arg| 53 | case opt 54 | when '--help' 55 | showhelp 56 | exit 57 | when '--cert' 58 | cert = arg 59 | when '--crl' 60 | crl = arg 61 | when '--warn' 62 | warn = arg.to_i 63 | when '--crit' 64 | crit = arg.to_i 65 | end 66 | } 67 | 68 | if (cert == "" && crl == "") || warn == "" || crit == "" 69 | showhelp 70 | exit 71 | end 72 | 73 | if warn < crit 74 | puts("UNKNOWN: Parameters does not make sense, warn <= crit") 75 | exit(3) 76 | end 77 | 78 | # Takes a period of time in seconds and returns it in human-readable form (down to minutes) 79 | def time_period_to_s(time_period) 80 | out_str = '' 81 | interval_array = [ [:years, 31556926], [:weeks, 604800], [:days, 86400], [:hours, 3600], [:mins, 60] ] 82 | 83 | interval_array.each do |sub| 84 | if time_period>= sub[1] then 85 | time_val, time_period = time_period.divmod( sub[1] ) 86 | name = sub[0].to_s 87 | ( sub[0] != :mins ? out_str += ", " : out_str += " and " ) if out_str != '' 88 | out_str += time_val.to_s + " #{name}" 89 | end 90 | end 91 | return out_str 92 | end 93 | 94 | def alert_age(file, enddate, warn, crit) 95 | seconds = Date.parse(enddate).strftime('%s').to_i - Time.now.strftime('%s').to_i 96 | 97 | if seconds < crit 98 | puts("CRITICAL: #{file} expires in #{time_period_to_s(seconds)}") 99 | exit(2) 100 | elsif seconds < warn 101 | puts("WARN: #{file} expires in #{time_period_to_s(seconds)}") 102 | exit(1) 103 | else 104 | puts("OK: #{file} expires in #{time_period_to_s(seconds)}") 105 | exit(0) 106 | end 107 | end 108 | 109 | def check_cert(cert, warn, crit) 110 | if File.exists?(cert) 111 | enddate = %x{openssl x509 -in #{cert} -noout -enddate} 112 | 113 | if enddate =~ /notAfter=(.+)/ 114 | enddate = $1 115 | else 116 | puts("UNKNOWN: Certifcate end date could not be parsed") 117 | exit(3) 118 | end 119 | 120 | alert_age(cert, enddate, warn, crit) 121 | else 122 | puts("UNKNOWN: Certificate #{cert} doesn't exist") 123 | exit(3) 124 | end 125 | end 126 | 127 | def check_crl(crl, warn, crit) 128 | if File.exists?(crl) 129 | enddate = %x{openssl crl -in #{crl} -noout -nextupdate} 130 | 131 | if enddate =~ /nextUpdate=(.+)/ 132 | enddate = $1 133 | else 134 | puts("UNKNOWN: CRL next update date could not be parsed") 135 | exit(3) 136 | end 137 | 138 | alert_age(crl, enddate, warn, crit) 139 | else 140 | puts("UNKNOWN: CRL #{crl} doesn't exist") 141 | exit(3) 142 | end 143 | end 144 | 145 | if cert != "" 146 | check_cert(cert, warn, crit) 147 | elsif crl != "" 148 | check_crl(crl, warn, crit) 149 | else 150 | puts("UNKNOWN: Don't know what to check, crl and cert is unset") 151 | exit(3) 152 | end 153 | -------------------------------------------------------------------------------- /activemq/check_activemq.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | # A simple plugin that publishes a message to a destination 4 | # queue or topic and wait for it to return. If the reply 5 | # is not received in a specified time alerts are raised 6 | # 7 | # Connection issues also raise alerts. 8 | # 9 | # The password can be supplied on the command line can either 10 | # be a string for the password or a path to a file that has 11 | # the password on the first line. 12 | # 13 | # If you have multiple nagios instances monitoring the same 14 | # infrastructure you should not use queues but topics and 15 | # each nagios instance should use a unique topic. 16 | # 17 | # We could use temp topics but unfortunately these fail in 18 | # certain middleware topologies. 19 | # 20 | # You can specify --host multiple times but port, user, password 21 | # etc should be the same for all the hosts in that case 22 | # 23 | # R.I.Pienaar 24 | # Apache 2.0 License 25 | 26 | require 'rubygems' 27 | require 'stomp' 28 | require 'timeout' 29 | require 'optparse' 30 | 31 | options = {:user => "nagios", 32 | :password => nil, 33 | :host => nil, 34 | :port => 61613, 35 | :destination => "/topic/nagios.monitor", 36 | :warning => 2, 37 | :critical => 5} 38 | 39 | opt = OptionParser.new 40 | 41 | opt.on("--user USER", "-u", "User to connect as") do |v| 42 | options[:user] = v 43 | end 44 | 45 | opt.on("--password PASSWORD", "-p", "Password to connect with") do |v| 46 | if v.start_with?("/") && File.exist?(v) 47 | options[:password] = File.read(v).split("\n").first.chomp 48 | else 49 | options[:password] = v 50 | end 51 | end 52 | 53 | opt.on("--destination DEST", "-d", "The topic of queue to use for monitoring") do |v| 54 | options[:destination] = v 55 | end 56 | 57 | opt.on("--warning WARN", "-w", "Warning threshold for turn around time") do |v| 58 | options[:warning] = v.to_i 59 | end 60 | 61 | opt.on("--critical CRIT", "-c", "Critical threshold for turn around time") do |v| 62 | options[:critical] = v.to_i 63 | end 64 | 65 | opt.on("--host HOST", "-h", "Host to connect to") do |v| 66 | if options[:host] 67 | options[:host] << v 68 | else 69 | options[:host] = [v] 70 | end 71 | end 72 | 73 | opt.on("--port PORT", "-p", "Port to connect to") do |v| 74 | options[:port] = v.to_i 75 | end 76 | 77 | opt.parse! 78 | 79 | if options[:host].nil? 80 | puts "CRITICAL: No host to monitor supplied" 81 | exit 2 82 | end 83 | 84 | starttime = Time.now 85 | 86 | message = nil 87 | status = 3 88 | 89 | # dont spew any stuff to stderr 90 | class EventLogger 91 | def on_miscerr(params=nil); end 92 | def on_connectfail(params=nil); end 93 | end 94 | 95 | begin 96 | Timeout::timeout(options[:critical]) do 97 | connection = {:hosts => [], :logger => EventLogger.new} 98 | 99 | options[:host].each do |host| 100 | connection[:hosts] << {:host => host, :port => options[:port], :login => options[:user], :passcode => options[:password]} 101 | end 102 | 103 | conn = Stomp::Connection.open(connection) 104 | 105 | conn.subscribe(options[:destination]) 106 | 107 | msg = "" 108 | 10.times { msg += rand(100).to_s } 109 | 110 | send_time = Time.now 111 | conn.publish(options[:destination], msg) 112 | 113 | body = conn.receive.body 114 | 115 | if msg == body 116 | status = 0 117 | else 118 | message = "CRITICAL: sent #{msg} but received #{body} possible corruption of miss configuration" 119 | status = 2 120 | end 121 | end 122 | rescue Timeout::Error 123 | status = 2 124 | rescue Exception => e 125 | message = "CRITICAL: Unexpected error during test: #{e}" 126 | status = 2 127 | end 128 | 129 | testtime = (Time.now - starttime).to_f 130 | 131 | if testtime >= options[:critical] 132 | message = "CRITICAL: Test took %.2f to complete expected < %d" % [ testtime, options[:critical] ] 133 | status = 2 134 | elsif testtime >= options[:warning] 135 | puts "WARNING: Test took %.2f to complete expected < %d" % [ testtime, options[:warning] ] 136 | status = 1 137 | end 138 | 139 | if status == 0 140 | if message 141 | puts "%s|seconds=%f" % [ message, testtime ] 142 | else 143 | puts "OK: Test completed in %.2f seconds|seconds=%f" % [ testtime, testtime ] 144 | end 145 | 146 | exit 0 147 | else 148 | if message 149 | puts "%s|seconds=%f" % [ message, testtime ] 150 | else 151 | statusses = {0 => "OK", 1 => "WARNING", 2 => "CRITICAL", 3 => "UNKNOWN"} 152 | 153 | puts "%s: Test completed in %.2f seconds|seconds=%f" % [ statusses[status], testtime, testtime ] 154 | end 155 | 156 | exit status 157 | end 158 | -------------------------------------------------------------------------------- /linux-diskstats-parser/cacti/cacti_graph_template_io_device_busy.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | IO Device % Busy 4 | 5 | on 6 | |host_description| - /dev/sdb % busy 7 | 8 | 1 9 | 10 | 120 11 | 12 | 500 13 | 14 | on 15 | 16 | on 17 | 18 | 2 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | on 27 | 28 | on 29 | 30 | 110 31 | 32 | 0 33 | 34 | 1000 35 | 36 | 37 | 38 | 39 | 40 | % Utilization 41 | 42 | 43 | 44 | 0 45 | 00CF00 46 | FF 47 | 7 48 | 1 49 | hash_050019ed5775ff2a5ccb8a8483c1bbb7d2e6a6 50 | 51 | hash_060019e9c43831e54eca8069317a2ce8c6f751 52 | Overall % 53 | 54 | 9 55 | 56 | 57 | 0 58 | 0 59 | FF 60 | 9 61 | 4 62 | hash_050019ed5775ff2a5ccb8a8483c1bbb7d2e6a6 63 | 64 | hash_060019e9c43831e54eca8069317a2ce8c6f751 65 | Current: 66 | 67 | 10 68 | 69 | 70 | 0 71 | 0 72 | FF 73 | 9 74 | 1 75 | hash_050019ed5775ff2a5ccb8a8483c1bbb7d2e6a6 76 | 77 | hash_060019e9c43831e54eca8069317a2ce8c6f751 78 | Average: 79 | 80 | 11 81 | 82 | 83 | 0 84 | 0 85 | FF 86 | 9 87 | 3 88 | hash_050019ed5775ff2a5ccb8a8483c1bbb7d2e6a6 89 | 90 | hash_060019e9c43831e54eca8069317a2ce8c6f751 91 | Maximum: 92 | on 93 | 12 94 | 95 | 96 | 97 | 98 | Device IO Time 99 | 100 | task_item_id 101 | hash_0000198d831f6463b8b3d234341761f2b6ee91|hash_000019e1541b35764c02b629cf780c5f4192e0|hash_000019935a2a0666afb5bcee7a368af077c68d|hash_000019728fe93c540c15a8cd1cee22ec5e0b8d 102 | 103 | 104 | 105 | 106 | Divide by 10 107 | 108 | 109 | 1 110 | 4 111 | CURRENT_DATA_SOURCE 112 | 113 | 114 | 2 115 | 6 116 | 10 117 | 118 | 119 | 3 120 | 2 121 | 4 122 | 123 | 124 | 125 | 126 | Normal 127 | %8.2lf%s 128 | 129 | -------------------------------------------------------------------------------- /activemq/check_activemq_queue.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | # A plugin for Nagios that connects to an ActiveMQ instance with the 4 | # ActiveMQ Statistics Plugin[1] enabled to monitor the size of a queue 5 | # 6 | # Report stats for the queue foo.bar with thresholds 7 | # 8 | # activemq_activemq_queue.rb --queue foo.bar ----queue-warn 10 --queue-crit 20 9 | # 10 | # See --help for full arguments like setting credentials and which 11 | # broker to connect to 12 | # 13 | # Defaults: 14 | # 15 | # host: n/a 16 | # port: 6163 17 | # user: nagios 18 | # queue warn: 100 19 | # queue crit: 500 20 | # mem warn 50 21 | # mem crit 75 22 | # 23 | # R.I.Pienaar 24 | # Apache 2.0 License 25 | # 26 | # [1] http://activemq.apache.org/statisticsplugin.html 27 | require 'rexml/document' 28 | require 'rubygems' 29 | require 'optparse' 30 | require 'timeout' 31 | require 'stomp' 32 | 33 | include REXML 34 | 35 | @options = {:user => "nagios", 36 | :password => nil, 37 | :host => nil, 38 | :port => 6163, 39 | :queue_warn => 100, 40 | :queue_crit => 500, 41 | :memory_percent_warn => 50, 42 | :memory_percent_crit => 75, 43 | :queue => nil} 44 | 45 | opt = OptionParser.new 46 | 47 | opt.on("--user USER", "Connect as user") do |f| 48 | @options[:user] = f 49 | end 50 | 51 | opt.on("--password PASSWORD", "Connection password") do |f| 52 | @options[:password] = f 53 | end 54 | 55 | opt.on("--host HOST", "Host to connect to") do |f| 56 | if @options[:host] 57 | @options[:host] << f 58 | else 59 | @options[:host] = [f] 60 | end 61 | end 62 | 63 | opt.on("--port PORT", Integer, "Port to connect to") do |f| 64 | @options[:port] = f 65 | end 66 | 67 | opt.on("--queue QUEUE.NAME", "What queue to monitor") do |f| 68 | @options[:queue] = f 69 | end 70 | 71 | opt.on("--queue-crit CRIT", Integer, "Critical queue size") do |f| 72 | @options[:queue_crit] = f 73 | end 74 | 75 | opt.on("--queue-warn WARN", Integer, "Warning queue size") do |f| 76 | @options[:queue_warn] = f 77 | end 78 | 79 | opt.on("--mem-crit CRIT", Integer, "Critical percentage memory used") do |f| 80 | @options[:memory_percent_crit] = f 81 | end 82 | 83 | opt.on("--mem-warn WARN", Integer, "Warning percentage memory used") do |f| 84 | @options[:memory_percent_warn] = f 85 | end 86 | opt.parse! 87 | 88 | if @options[:queue].nil? 89 | puts "Please specify a queue name with --queue" 90 | exit(3) 91 | end 92 | 93 | def amqxmldecode(amqmap) 94 | map = Hash.new 95 | 96 | Document.new(amqmap).root.each_element do |element| 97 | value = name = nil 98 | 99 | element.each_element_with_text do |e,t| 100 | name = e.text.to_sym unless name 101 | 102 | if name 103 | case e.name 104 | when "string" 105 | map[name] = e.text 106 | 107 | when /int|long/ 108 | map[name] = e.text.to_i 109 | 110 | when "double" 111 | map[name] = e.text.to_f 112 | 113 | else 114 | raise("Unknown data type #{e.name}") 115 | end 116 | end 117 | end 118 | end 119 | 120 | map 121 | end 122 | 123 | output = ["ActiveMQ"] 124 | statuses = [0] 125 | perfdata = [] 126 | 127 | # dont spew any stuff to stderr 128 | class EventLogger 129 | def on_miscerr(params=nil); end 130 | def on_connectfail(params=nil); end 131 | end 132 | 133 | begin 134 | Timeout::timeout(2) do 135 | hostname = `hostname`.chomp 136 | 137 | connection = {:hosts => [], :logger => EventLogger.new} 138 | 139 | @options[:host].each do |host| 140 | connection[:hosts] << {:host => host, :port => @options[:port], :login => @options[:user], :passcode => @options[:password]} 141 | end 142 | 143 | conn = Stomp::Connection.open(connection) 144 | 145 | conn.subscribe("/topic/nagios.statresults.#{hostname}", { "transformation" => "jms-map-xml"}) 146 | 147 | conn.publish("/queue/ActiveMQ.Statistics.Destination.#{@options[:queue]}", "", {"reply-to" => "/topic/nagios.statresults.#{hostname}"}) 148 | 149 | s = conn.receive.body 150 | conn.disconnect 151 | 152 | map = amqxmldecode(s) 153 | 154 | 155 | perfdata << "size=#{map[:size]}" 156 | perfdata << "memory_pct=#{map[:memoryPercentUsage]}" 157 | 158 | if map[:size] >= @options[:queue_crit] 159 | output << "CRIT: #{@options[:queue]} has #{map[:size]} messages" 160 | statuses << 2 161 | elsif map[:size] >= @options[:queue_warn] 162 | output << "WARN: #{@options[:queue]} has #{map[:size]} messages" 163 | statuses << 1 164 | else 165 | output << "#{@options[:queue]} has #{map[:size]} messages" 166 | statuses << 0 167 | end 168 | 169 | if map[:memoryPercentUsage] >= @options[:memory_percent_crit] 170 | output << "CRIT: #{map[:memoryPercentUsage]} % memory used" 171 | statuses << 2 172 | elsif map[:memoryPercentUsage] >= @options[:memory_percent_warn] 173 | output << "WARN: #{map[:memoryPercentUsage]} % memory used" 174 | statuses << 1 175 | else 176 | output << "#{map[:memoryPercentUsage]} % memory used" 177 | statuses << 0 178 | end 179 | end 180 | rescue Exception => e 181 | output = ["UNKNOWN: Failed to get ActiveMQ stats: #{e}"] 182 | statuses = [3] 183 | end 184 | 185 | puts "%s|%s" % [output.join(" "), perfdata.join(" ")] 186 | 187 | exit(statuses.max) 188 | -------------------------------------------------------------------------------- /cacti-lighttpd-fastcgi-statistics/sample.txt: -------------------------------------------------------------------------------- 1 | fastcgi.active-requests: 0 2 | fastcgi.backend.fastcgi1.0.connected: 0 3 | fastcgi.backend.fastcgi1.0.died: 0 4 | fastcgi.backend.fastcgi1.0.disabled: 0 5 | fastcgi.backend.fastcgi1.0.load: 0 6 | fastcgi.backend.fastcgi1.0.overloaded: 0 7 | fastcgi.backend.fastcgi1.1.connected: 0 8 | fastcgi.backend.fastcgi1.1.died: 0 9 | fastcgi.backend.fastcgi1.1.disabled: 0 10 | fastcgi.backend.fastcgi1.1.load: 0 11 | fastcgi.backend.fastcgi1.1.overloaded: 0 12 | fastcgi.backend.fastcgi1.2.connected: 0 13 | fastcgi.backend.fastcgi1.2.died: 0 14 | fastcgi.backend.fastcgi1.2.disabled: 0 15 | fastcgi.backend.fastcgi1.2.load: 0 16 | fastcgi.backend.fastcgi1.2.overloaded: 0 17 | fastcgi.backend.fastcgi1.3.connected: 0 18 | fastcgi.backend.fastcgi1.3.died: 0 19 | fastcgi.backend.fastcgi1.3.disabled: 0 20 | fastcgi.backend.fastcgi1.3.load: 0 21 | fastcgi.backend.fastcgi1.3.overloaded: 0 22 | fastcgi.backend.fastcgi1.4.connected: 0 23 | fastcgi.backend.fastcgi1.4.died: 0 24 | fastcgi.backend.fastcgi1.4.disabled: 0 25 | fastcgi.backend.fastcgi1.4.load: 0 26 | fastcgi.backend.fastcgi1.4.overloaded: 0 27 | fastcgi.backend.fastcgi1.5.connected: 0 28 | fastcgi.backend.fastcgi1.5.died: 0 29 | fastcgi.backend.fastcgi1.5.disabled: 0 30 | fastcgi.backend.fastcgi1.5.load: 0 31 | fastcgi.backend.fastcgi1.5.overloaded: 0 32 | fastcgi.backend.fastcgi1.load: 0 33 | fastcgi.backend.fastcgi2.0.connected: 0 34 | fastcgi.backend.fastcgi2.0.died: 0 35 | fastcgi.backend.fastcgi2.0.disabled: 0 36 | fastcgi.backend.fastcgi2.0.load: 0 37 | fastcgi.backend.fastcgi2.0.overloaded: 0 38 | fastcgi.backend.fastcgi2.1.connected: 0 39 | fastcgi.backend.fastcgi2.1.died: 0 40 | fastcgi.backend.fastcgi2.1.disabled: 0 41 | fastcgi.backend.fastcgi2.1.load: 0 42 | fastcgi.backend.fastcgi2.1.overloaded: 0 43 | fastcgi.backend.fastcgi2.2.connected: 0 44 | fastcgi.backend.fastcgi2.2.died: 0 45 | fastcgi.backend.fastcgi2.2.disabled: 0 46 | fastcgi.backend.fastcgi2.2.load: 0 47 | fastcgi.backend.fastcgi2.2.overloaded: 0 48 | fastcgi.backend.fastcgi2.3.connected: 0 49 | fastcgi.backend.fastcgi2.3.died: 0 50 | fastcgi.backend.fastcgi2.3.disabled: 0 51 | fastcgi.backend.fastcgi2.3.load: 0 52 | fastcgi.backend.fastcgi2.3.overloaded: 0 53 | fastcgi.backend.fastcgi2.load: 0 54 | fastcgi.backend.fastcgi3.0.connected: 0 55 | fastcgi.backend.fastcgi3.0.died: 0 56 | fastcgi.backend.fastcgi3.0.disabled: 0 57 | fastcgi.backend.fastcgi3.0.load: 0 58 | fastcgi.backend.fastcgi3.0.overloaded: 0 59 | fastcgi.backend.fastcgi3.load: 0 60 | fastcgi.backend.fastcgi4.0.connected: 18 61 | fastcgi.backend.fastcgi4.0.died: 0 62 | fastcgi.backend.fastcgi4.0.disabled: 0 63 | fastcgi.backend.fastcgi4.0.load: 0 64 | fastcgi.backend.fastcgi4.0.overloaded: 0 65 | fastcgi.backend.fastcgi4.1.connected: 17 66 | fastcgi.backend.fastcgi4.1.died: 0 67 | fastcgi.backend.fastcgi4.1.disabled: 0 68 | fastcgi.backend.fastcgi4.1.load: 0 69 | fastcgi.backend.fastcgi4.1.overloaded: 0 70 | fastcgi.backend.fastcgi4.10.connected: 18 71 | fastcgi.backend.fastcgi4.10.died: 0 72 | fastcgi.backend.fastcgi4.10.disabled: 0 73 | fastcgi.backend.fastcgi4.10.load: 0 74 | fastcgi.backend.fastcgi4.10.overloaded: 0 75 | fastcgi.backend.fastcgi4.11.connected: 18 76 | fastcgi.backend.fastcgi4.11.died: 0 77 | fastcgi.backend.fastcgi4.11.disabled: 0 78 | fastcgi.backend.fastcgi4.11.load: 0 79 | fastcgi.backend.fastcgi4.11.overloaded: 0 80 | fastcgi.backend.fastcgi4.12.connected: 18 81 | fastcgi.backend.fastcgi4.12.died: 0 82 | fastcgi.backend.fastcgi4.12.disabled: 0 83 | fastcgi.backend.fastcgi4.12.load: 0 84 | fastcgi.backend.fastcgi4.12.overloaded: 0 85 | fastcgi.backend.fastcgi4.13.connected: 18 86 | fastcgi.backend.fastcgi4.13.died: 0 87 | fastcgi.backend.fastcgi4.13.disabled: 0 88 | fastcgi.backend.fastcgi4.13.load: 0 89 | fastcgi.backend.fastcgi4.13.overloaded: 0 90 | fastcgi.backend.fastcgi4.14.connected: 19 91 | fastcgi.backend.fastcgi4.14.died: 0 92 | fastcgi.backend.fastcgi4.14.disabled: 0 93 | fastcgi.backend.fastcgi4.14.load: 0 94 | fastcgi.backend.fastcgi4.14.overloaded: 0 95 | fastcgi.backend.fastcgi4.2.connected: 17 96 | fastcgi.backend.fastcgi4.2.died: 0 97 | fastcgi.backend.fastcgi4.2.disabled: 0 98 | fastcgi.backend.fastcgi4.2.load: 0 99 | fastcgi.backend.fastcgi4.2.overloaded: 0 100 | fastcgi.backend.fastcgi4.3.connected: 17 101 | fastcgi.backend.fastcgi4.3.died: 0 102 | fastcgi.backend.fastcgi4.3.disabled: 0 103 | fastcgi.backend.fastcgi4.3.load: 0 104 | fastcgi.backend.fastcgi4.3.overloaded: 0 105 | fastcgi.backend.fastcgi4.4.connected: 17 106 | fastcgi.backend.fastcgi4.4.died: 0 107 | fastcgi.backend.fastcgi4.4.disabled: 0 108 | fastcgi.backend.fastcgi4.4.load: 0 109 | fastcgi.backend.fastcgi4.4.overloaded: 0 110 | fastcgi.backend.fastcgi4.5.connected: 17 111 | fastcgi.backend.fastcgi4.5.died: 0 112 | fastcgi.backend.fastcgi4.5.disabled: 0 113 | fastcgi.backend.fastcgi4.5.load: 0 114 | fastcgi.backend.fastcgi4.5.overloaded: 0 115 | fastcgi.backend.fastcgi4.6.connected: 18 116 | fastcgi.backend.fastcgi4.6.died: 0 117 | fastcgi.backend.fastcgi4.6.disabled: 0 118 | fastcgi.backend.fastcgi4.6.load: 0 119 | fastcgi.backend.fastcgi4.6.overloaded: 0 120 | fastcgi.backend.fastcgi4.7.connected: 18 121 | fastcgi.backend.fastcgi4.7.died: 0 122 | fastcgi.backend.fastcgi4.7.disabled: 0 123 | fastcgi.backend.fastcgi4.7.load: 0 124 | fastcgi.backend.fastcgi4.7.overloaded: 0 125 | fastcgi.backend.fastcgi4.8.connected: 18 126 | fastcgi.backend.fastcgi4.8.died: 0 127 | fastcgi.backend.fastcgi4.8.disabled: 0 128 | fastcgi.backend.fastcgi4.8.load: 0 129 | fastcgi.backend.fastcgi4.8.overloaded: 0 130 | fastcgi.backend.fastcgi4.9.connected: 18 131 | fastcgi.backend.fastcgi4.9.died: 0 132 | fastcgi.backend.fastcgi4.9.disabled: 0 133 | fastcgi.backend.fastcgi4.9.load: 0 134 | fastcgi.backend.fastcgi4.9.overloaded: 0 135 | fastcgi.backend.fastcgi4.load: 1 136 | fastcgi.backend.fastcgi5.0.connected: 0 137 | fastcgi.backend.fastcgi5.0.died: 0 138 | fastcgi.backend.fastcgi5.0.disabled: 0 139 | fastcgi.backend.fastcgi5.0.load: 0 140 | fastcgi.backend.fastcgi5.0.overloaded: 0 141 | fastcgi.backend.fastcgi5.load: 0 142 | fastcgi.backend.fastcgi6.0.connected: 0 143 | fastcgi.backend.fastcgi6.0.died: 0 144 | fastcgi.backend.fastcgi6.0.disabled: 0 145 | fastcgi.backend.fastcgi6.0.load: 0 146 | fastcgi.backend.fastcgi6.0.overloaded: 0 147 | fastcgi.backend.fastcgi6.1.connected: 0 148 | fastcgi.backend.fastcgi6.1.died: 0 149 | fastcgi.backend.fastcgi6.1.disabled: 0 150 | fastcgi.backend.fastcgi6.1.load: 0 151 | fastcgi.backend.fastcgi6.1.overloaded: 0 152 | fastcgi.backend.fastcgi6.2.connected: 0 153 | fastcgi.backend.fastcgi6.2.died: 0 154 | fastcgi.backend.fastcgi6.2.disabled: 0 155 | fastcgi.backend.fastcgi6.2.load: 0 156 | fastcgi.backend.fastcgi6.2.overloaded: 0 157 | fastcgi.backend.fastcgi6.3.connected: 0 158 | fastcgi.backend.fastcgi6.3.died: 0 159 | fastcgi.backend.fastcgi6.3.disabled: 0 160 | fastcgi.backend.fastcgi6.3.load: 0 161 | fastcgi.backend.fastcgi6.3.overloaded: 0 162 | fastcgi.backend.fastcgi6.load: 0 163 | fastcgi.backend.fastcgi7.0.connected: 0 164 | fastcgi.backend.fastcgi7.0.died: 0 165 | fastcgi.backend.fastcgi7.0.disabled: 0 166 | fastcgi.backend.fastcgi7.0.load: 0 167 | fastcgi.backend.fastcgi7.0.overloaded: 0 168 | fastcgi.backend.fastcgi7.load: 0 169 | fastcgi.requests: 266 170 | -------------------------------------------------------------------------------- /linux-diskstats-parser/cacti/cacti_graph_template_io_device_volume.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | IO Device Volume 4 | 5 | on 6 | |host_description| - /dev/sdb volume 7 | 8 | 1 9 | 10 | 120 11 | 12 | 500 13 | 14 | on 15 | 16 | on 17 | 18 | 2 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | on 27 | 28 | on 29 | 30 | 100 31 | 32 | 0 33 | 34 | 1000 35 | 36 | 37 | 38 | 39 | 40 | sectors / sec 41 | 42 | 43 | 44 | 0 45 | 00004D 46 | FF 47 | 5 48 | 1 49 | 0 50 | 51 | hash_060019e9c43831e54eca8069317a2ce8c6f751 52 | Read 53 | 54 | 1 55 | 56 | 57 | 0 58 | 0 59 | FF 60 | 9 61 | 4 62 | 0 63 | 64 | hash_060019e9c43831e54eca8069317a2ce8c6f751 65 | Current: 66 | 67 | 2 68 | 69 | 70 | 0 71 | 0 72 | FF 73 | 9 74 | 1 75 | 0 76 | 77 | hash_060019e9c43831e54eca8069317a2ce8c6f751 78 | Average: 79 | 80 | 3 81 | 82 | 83 | 0 84 | 0 85 | FF 86 | 9 87 | 3 88 | 0 89 | 90 | hash_060019e9c43831e54eca8069317a2ce8c6f751 91 | Maximum: 92 | on 93 | 4 94 | 95 | 96 | 0 97 | 00CF00 98 | FF 99 | 5 100 | 1 101 | 0 102 | 103 | hash_060019e9c43831e54eca8069317a2ce8c6f751 104 | Write 105 | 106 | 5 107 | 108 | 109 | 0 110 | 0 111 | FF 112 | 9 113 | 4 114 | 0 115 | 116 | hash_060019e9c43831e54eca8069317a2ce8c6f751 117 | Current: 118 | 119 | 6 120 | 121 | 122 | 0 123 | 0 124 | FF 125 | 9 126 | 1 127 | 0 128 | 129 | hash_060019e9c43831e54eca8069317a2ce8c6f751 130 | Average: 131 | 132 | 7 133 | 134 | 135 | 0 136 | 0 137 | FF 138 | 9 139 | 3 140 | 0 141 | 142 | hash_060019e9c43831e54eca8069317a2ce8c6f751 143 | Maximum: 144 | on 145 | 8 146 | 147 | 148 | 149 | 150 | Sectors Read 151 | 152 | task_item_id 153 | hash_0000191f690c09f183477930f9d295d82d4155|hash_000019275ad941f106b41986c4f2a56199dfde|hash_000019d9ec871c5d67f25facd9fbfe9a0c9f90|hash_000019460f5a45bced6e1e34998e0865006d43 154 | 155 | 156 | Sectors Written 157 | 158 | task_item_id 159 | hash_0000196c7cbc4f54da8f41da712b2827bdfc0b|hash_000019160e8dd2566bbcc8e530258804b400bc|hash_0000197d6d6fef8fcecbb48af1192055911619|hash_000019df5552c3724e9eeae07cd82de11ec75e 160 | 161 | 162 | 163 | 164 | Normal 165 | %8.2lf%s 166 | 167 | -------------------------------------------------------------------------------- /linux-diskstats-parser/cacti/cacti_graph_template_io_device_operations.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | IO Device Operations 4 | 5 | on 6 | |host_description| - /dev/sdb operations 7 | 8 | 1 9 | 10 | 120 11 | 12 | 500 13 | 14 | on 15 | 16 | on 17 | 18 | 2 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | on 27 | 28 | on 29 | 30 | 100 31 | 32 | 0 33 | 34 | 1000 35 | 36 | 37 | 38 | 39 | 40 | operations / sec 41 | 42 | 43 | 44 | 0 45 | 00004D 46 | FF 47 | 5 48 | 1 49 | 0 50 | 51 | hash_060019e9c43831e54eca8069317a2ce8c6f751 52 | Reads 53 | 54 | 1 55 | 56 | 57 | 0 58 | 0 59 | FF 60 | 9 61 | 4 62 | 0 63 | 64 | hash_060019e9c43831e54eca8069317a2ce8c6f751 65 | Current: 66 | 67 | 2 68 | 69 | 70 | 0 71 | 0 72 | FF 73 | 9 74 | 1 75 | 0 76 | 77 | hash_060019e9c43831e54eca8069317a2ce8c6f751 78 | Average: 79 | 80 | 3 81 | 82 | 83 | 0 84 | 0 85 | FF 86 | 9 87 | 3 88 | 0 89 | 90 | hash_060019e9c43831e54eca8069317a2ce8c6f751 91 | Maximum: 92 | on 93 | 4 94 | 95 | 96 | 0 97 | 00CF00 98 | FF 99 | 5 100 | 1 101 | 0 102 | 103 | hash_060019e9c43831e54eca8069317a2ce8c6f751 104 | Writes 105 | 106 | 5 107 | 108 | 109 | 0 110 | 0 111 | FF 112 | 9 113 | 4 114 | 0 115 | 116 | hash_060019e9c43831e54eca8069317a2ce8c6f751 117 | Current: 118 | 119 | 6 120 | 121 | 122 | 0 123 | 0 124 | FF 125 | 9 126 | 1 127 | 0 128 | 129 | hash_060019e9c43831e54eca8069317a2ce8c6f751 130 | Average: 131 | 132 | 7 133 | 134 | 135 | 0 136 | 0 137 | FF 138 | 9 139 | 3 140 | 0 141 | 142 | hash_060019e9c43831e54eca8069317a2ce8c6f751 143 | Maximum: 144 | on 145 | 8 146 | 147 | 148 | 149 | 150 | Read Count 151 | 152 | task_item_id 153 | hash_0000191a169d7e0eb6eacefa63ba549fb6966d|hash_0000191837fbb14189d6ce3081f9f3f0583070|hash_0000195b57debbf51b646f224336085ac20f02|hash_000019b9d0fec46f5369ab0e41452b17036706 154 | 155 | 156 | Write Count 157 | 158 | task_item_id 159 | hash_00001956a9bcb62edaef62792c1c4e3ea82468|hash_0000198428cf47e096dab7d004164d290dc2b2|hash_0000192de7a4bf3c8a3ea4678954ac2c6566c7|hash_000019d51a9e0bf1b7da05fb0b952235955625 160 | 161 | 162 | 163 | 164 | Normal 165 | %8.2lf%s 166 | 167 | -------------------------------------------------------------------------------- /puppet/check_puppetdb_nodes.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | # script that connects to puppetdb and check for last-seen ages or node counts 4 | 5 | require 'rubygems' 6 | require 'time' 7 | require 'pp' 8 | require 'optparse' 9 | 10 | class PuppetDB 11 | require 'puppet' 12 | require 'puppet/util/puppetdb' 13 | require 'puppet/util/run_mode' 14 | require 'puppet/network/http_pool' 15 | require 'json' 16 | require 'uri' 17 | 18 | attr_reader :server, :port 19 | 20 | def initialize(mode = :master, server=nil, port=nil, ssl=true) 21 | configure(mode) 22 | 23 | @server = server || Puppet::Util::Puppetdb.server 24 | @port = port || Puppet::Util::Puppetdb.port 25 | @ssl = ssl 26 | 27 | reset! 28 | end 29 | 30 | def configure(mode = :master) 31 | if Puppet.settings.app_defaults_initialized? 32 | unless mode == Puppet.run_mode.name 33 | raise "Puppet is already configured for %s mode, can't reconfigure for %s mode" % [Puppet.run_mode.name, mode] 34 | end 35 | 36 | return 37 | end 38 | 39 | Puppet.settings.preferred_run_mode = mode 40 | Puppet.settings.initialize_global_settings 41 | Puppet.settings.initialize_app_defaults(Puppet::Settings.app_defaults_for_run_mode(Puppet.run_mode)) 42 | end 43 | 44 | def connection 45 | @connection ||= Puppet::Network::HttpPool.http_instance(@server, @port, @ssl) 46 | end 47 | 48 | def query(query) 49 | headers = { "Accept" => "application/json" } 50 | resp = connection.get(query, headers) 51 | JSON.parse(resp.body) 52 | end 53 | 54 | def active_nodes 55 | nodes.reject{|n| n["deactivated"]} 56 | end 57 | 58 | def deactivated_nodes 59 | nodes.select{|n| n["deactivated"]} 60 | end 61 | 62 | def nodes 63 | @nodes ||= query("/v3/nodes") 64 | end 65 | 66 | def reset! 67 | @nodes = nil 68 | @connection = nil 69 | end 70 | 71 | def to_s 72 | "PuppetDB at %s:%s" % [@server, @port] 73 | end 74 | end 75 | 76 | class NodeChecker 77 | NAGIOS_OK = 0 78 | NAGIOS_WARN = 1 79 | NAGIOS_CRIT = 2 80 | NAGIOS_UNKNOWN = 3 81 | 82 | def initialize(puppetdb, config) 83 | @puppetdb = puppetdb 84 | @config = config 85 | 86 | validate! 87 | end 88 | 89 | def validate! 90 | raise("Please specify a critical threshold") unless @config[:critical] 91 | raise("Please specify a warning threshold") unless @config[:warning] 92 | end 93 | 94 | def report_and_exit(check) 95 | puts "%s | %s" % [check[:message], check[:stats].map{|k,v| "%s=%.2f" % [k,v]}.join(", ")] 96 | exit check[:status] 97 | end 98 | 99 | def nodes 100 | @nodes ||= @puppetdb.active_nodes.sort_by{|n| Time.parse(n["catalog_timestamp"])}.reverse 101 | end 102 | 103 | def older_than(seconds) 104 | nodes.select do |node| 105 | seconds <= (Time.now - Time.parse(node["catalog_timestamp"])).to_i 106 | end 107 | end 108 | 109 | def newest 110 | nodes.first 111 | end 112 | 113 | def oldest 114 | nodes.last 115 | end 116 | 117 | def newest_age 118 | Time.now - Time.parse(newest["catalog_timestamp"]) 119 | end 120 | 121 | def oldest_age 122 | Time.now - Time.parse(oldest["catalog_timestamp"]) 123 | end 124 | 125 | def stats 126 | {:oldest => oldest_age, 127 | :newest => newest_age, 128 | :count => nodes.size} 129 | end 130 | end 131 | 132 | class NodeCountChecker < NodeChecker 133 | def check 134 | @puppetdb.reset! 135 | 136 | if @config[:critical] >= @config[:warning] 137 | if nodes.size >= @config[:critical] 138 | {:status => NAGIOS_CRIT, 139 | :message => "CRITICAL: %d nodes in population but expected < %d" % [nodes.size, @config[:critical]], 140 | :stats => stats} 141 | elsif nodes.size > @config[:warning] 142 | {:status => NAGIOS_WARN, 143 | :message => "WARNING: %d nodes in population but expected < %d" % [nodes.size, @config[:warning]], 144 | :stats => stats} 145 | else 146 | {:status => NAGIOS_OK, 147 | :message => "OK: %d nodes in population" % nodes.size, 148 | :stats => stats} 149 | end 150 | else 151 | if nodes.size <= @config[:critical] 152 | {:status => NAGIOS_CRIT, 153 | :message => "CRITICAL: %d nodes in population but expected > %d" % [nodes.size, @config[:critical]], 154 | :stats => stats} 155 | elsif nodes.size < @config[:warning] 156 | {:status => NAGIOS_WARN, 157 | :message => "WARNING: %d nodes in population but expected > %d" % [nodes.size, @config[:warning]], 158 | :stats => stats} 159 | else 160 | {:status => NAGIOS_OK, 161 | :message => "OK: %d nodes in population" % nodes.size, 162 | :stats => stats} 163 | end 164 | end 165 | end 166 | end 167 | 168 | class AgeChecker < NodeChecker 169 | def validate! 170 | super 171 | raise("Critical threshold is smaller than warning threshold") if @config[:critical] < @config[:warning] 172 | end 173 | 174 | def check 175 | @puppetdb.reset! 176 | 177 | return({:status => NAGIOS_UNKNOWN, :message => "Could not find any nodes", :stats => stats}) if nodes.empty? 178 | 179 | if oldest_age >= @config[:critical] 180 | {:status => NAGIOS_CRIT, 181 | :message => "CRITICAL: %d nodes not seen in %d seconds" % [older_than(@config[:critical]).size, @config[:critical]], 182 | :stats => stats} 183 | 184 | elsif oldest_age >= @config[:warning] 185 | {:status => NAGIOS_WARN, 186 | :message => "WARNING: %d nodes not seen in %d seconds" % [older_than(@config[:warning]).size, @config[:warning]], 187 | :stats => stats} 188 | 189 | else 190 | {:status => NAGIOS_OK, 191 | :message => "OK: %d nodes checking in sooner than %d seconds" % [nodes.size, @config[:warning]], 192 | :stats => stats} 193 | end 194 | end 195 | end 196 | 197 | @config = {:mode => nil, :critical => nil, :warning => nil, :port => nil, :host => nil, :ssl => true} 198 | 199 | opt = OptionParser.new 200 | 201 | opt.on("--check-age", "--age", "Checks for nodes that have not checked in") do 202 | @config[:mode] = :age 203 | end 204 | 205 | opt.on("--check-nodes", "--nodes", "Checks for the amount of active nodes") do 206 | @config[:mode] = :node_count 207 | end 208 | 209 | opt.on("--critical THRESHOLD", Integer, "Critical threshold") do |v| 210 | @config[:critical] = v 211 | end 212 | 213 | opt.on("--warning THRESHOLD", Integer, "Warning threshold") do |v| 214 | @config[:warning] = v 215 | end 216 | 217 | opt.on("--host HOST", "Hostname where PuppetDB runs") do |v| 218 | @config[:host] = v 219 | end 220 | 221 | opt.on("--port PORT", Integer, "Port where PuppetDB runs") do |v| 222 | @config[:port] = v 223 | end 224 | 225 | opt.on("--[no-]ssl", "Use SSL to connect to PuppetDB") do |v| 226 | @config[:ssl] = v 227 | end 228 | 229 | opt.parse! 230 | 231 | puppetdb = PuppetDB.new(:master, @config[:host], @config[:port], @config[:ssl]) 232 | 233 | case @config[:mode] 234 | when :age 235 | checker = AgeChecker.new(puppetdb, @config) 236 | 237 | when :node_count 238 | checker = NodeCountChecker.new(puppetdb, @config) 239 | 240 | else 241 | abort("A mode like --check-age is needed") 242 | end 243 | 244 | checker.report_and_exit(checker.check) 245 | -------------------------------------------------------------------------------- /puppet/check_puppet.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | # A simple nagios check that should be run as root 4 | # perhaps under the mcollective NRPE plugin and 5 | # can check when the last run was done of puppet. 6 | # It can also check fail counts and skip machines 7 | # that are not enabled 8 | # 9 | # The script will use the puppet last_run_summary.yaml 10 | # file to determine when last Puppet ran else the age 11 | # of the statefile. 12 | # 13 | # 19/12/2013 --- Change to lock files and handling of a puppet agent in a disabled state (WARNING) 14 | # *** These changes are made to work with puppet 3.X and MAY cause some issues with 2.x users 15 | # *** The script should still continue to work for 2.x, but may not handle the lockfiles correctly 16 | # *** and will require the proper arguments to match 2.x filename. 17 | require 'optparse' 18 | require 'yaml' 19 | 20 | statedir_puppet_3 = "/var/lib/puppet/state" 21 | statedir_puppet_4 = "/opt/puppetlabs/puppet/cache/state" 22 | publicdir_puppet_8 = "/opt/puppetlabs/puppet/public" 23 | statedir = [ publicdir_puppet_8, statedir_puppet_4, statedir_puppet_3 ].find { |dir| File.directory?(dir) } 24 | agent_lockfile = statedir + "/agent_catalog_run.lock" 25 | agent_disabled_lockfile = statedir + "/agent_disabled.lock" 26 | statefile = statedir + "/state.yaml" 27 | summaryfile = statedir + "/last_run_summary.yaml" 28 | enabled = true 29 | running = false 30 | lastrun_failed = false 31 | lastrun = 0 32 | lastrun_time = 0 33 | failcount_resources = 0 34 | failcount_events = 0 35 | warn = 0 36 | crit = 0 37 | total_failure = false 38 | enabled_only = false 39 | failures = false 40 | disable_perfdata = false 41 | disabled_message = "reason not specified" 42 | 43 | opt = OptionParser.new 44 | 45 | opt.on("--critical [CRIT]", "-c", Integer, "Critical threshold, time or failed resources") do |f| 46 | crit = f.to_i 47 | end 48 | 49 | opt.on("--warn [WARN]", "-w", Integer, "Warning threshold, time or failed resources") do |f| 50 | warn = f.to_i 51 | end 52 | 53 | opt.on("--check-failures", "-f", "Check for failed resources instead of time since run") do |f| 54 | failures = true 55 | end 56 | 57 | opt.on("--only-enabled", "-e", "Only alert if Puppet is enabled") do |f| 58 | enabled_only = true 59 | end 60 | 61 | opt.on("--state-dir [FILE]", "Location of the state directory containing lock and state files, default #{statedir}, will change location of the files") do |f| 62 | statedir = f 63 | agent_lockfile = statedir + "/agent_catalog_run.lock" 64 | agent_disabled_lockfile = statedir + "/agent_disabled.lock" 65 | statefile = statedir + "/state.yaml" 66 | summaryfile = statedir + "/last_run_summary.yaml" 67 | end 68 | 69 | opt.on("--agent-lock-file [FILE]", "-l", "Location of the agent run lock file, default #{agent_lockfile}") do |f| 70 | agent_lockfile = f 71 | end 72 | 73 | opt.on("--agent-disabled-lock-file [FILE]", "-d", "Location of the agent disabled lock file, default #{agent_disabled_lockfile}") do |f| 74 | agent_disabled_lockfile = f 75 | end 76 | 77 | opt.on("--state-file [FILE]", "-t", "Location of the state file, default #{statefile}") do |f| 78 | statefile = f 79 | end 80 | 81 | opt.on("--summary-file [FILE]", "-s", "Location of the summary file, default #{summaryfile}") do |f| 82 | summaryfile = f 83 | end 84 | 85 | opt.on("--disable-perfdata", "-x", "Disable performance data output") do |f| 86 | disable_perfdata = f 87 | end 88 | 89 | opt.parse! 90 | 91 | if warn == 0 || crit == 0 92 | puts "Please specify a warning and critical level" 93 | exit 3 94 | end 95 | 96 | if File.exist?(agent_lockfile) 97 | if File::Stat.new(agent_lockfile).zero? 98 | enabled = false 99 | else 100 | running = true 101 | end 102 | end 103 | 104 | if File.exist?(agent_disabled_lockfile) 105 | enabled = false 106 | disabled_message = File.open(agent_disabled_lockfile, 'r').read.gsub(/.*\"(.*)\"\}/, '\1') || "reason not specified" 107 | end 108 | 109 | 110 | lastrun = File.stat(statefile).mtime.to_i if File.exist?(statefile) 111 | 112 | unless File.readable?(summaryfile) 113 | puts "UNKNOWN: Summary file not found or not readable. Check #{summaryfile}" 114 | exit 3 115 | else 116 | begin 117 | summary = YAML.load_file(summaryfile) 118 | lastrun = summary["time"]["last_run"] 119 | lastrun_time = (summary["time"]["total"] || 0).round(2) 120 | 121 | # machines that outright failed to run like on missing dependencies 122 | # are treated as huge failures. The yaml file will be valid but 123 | # it wont have anything but last_run in it 124 | unless summary.include?("events") 125 | failcount_resources = 99 126 | failcount_events = 99 127 | total_failure = true 128 | else 129 | # and unless there are failures, the events hash just wont have the failure count 130 | failcount_resources = summary["resources"]["failed"] || 0 131 | failcount_events = summary["events"]["failure"] || 0 132 | end 133 | rescue 134 | failcount_resources = 0 135 | failcount_events = 0 136 | summary = nil 137 | end 138 | end 139 | 140 | time_since_last_run = Time.now.to_i - lastrun 141 | 142 | time_since_last_run_string = "#{time_since_last_run} seconds ago" 143 | if time_since_last_run >= 3600 144 | time_since_last_run_string = "#{time_since_last_run / 60 / 60} hours ago at #{Time.at(Time.now - time_since_last_run).utc.strftime('%R:%S')} UTC" 145 | elsif time_since_last_run >= 60 146 | time_since_last_run_string = "#{time_since_last_run / 60} minutes ago" 147 | end 148 | 149 | if disable_perfdata 150 | perfdata_time = "" 151 | else 152 | perfdata_time = "|time_since_last_run=#{time_since_last_run}s;#{warn};#{crit};0 failed_resources=#{failcount_resources};;;0 failed_events=#{failcount_events};;;0 last_run_duration=#{lastrun_time};;;0" 153 | end 154 | 155 | unless failures 156 | if enabled_only && enabled == false 157 | puts "OK: Puppet is currently disabled, not alerting. Last run #{time_since_last_run_string} with #{failcount_resources} failed resources #{failcount_events} failed events. Disabled with reason: #{disabled_message}#{perfdata_time}" 158 | exit 0 159 | end 160 | 161 | if total_failure 162 | puts "CRITICAL: FAILED - Puppet failed to run. Missing dependencies? Catalog compilation failed? Last run #{time_since_last_run_string}#{perfdata_time}" 163 | exit 2 164 | elsif time_since_last_run >= crit 165 | puts "CRITICAL: last run #{time_since_last_run_string}, expected < #{crit}s#{perfdata_time}" 166 | exit 2 167 | 168 | elsif time_since_last_run >= warn 169 | puts "WARNING: last run #{time_since_last_run_string}, expected < #{warn}s#{perfdata_time}" 170 | exit 1 171 | 172 | else 173 | if enabled 174 | puts "OK: last run #{time_since_last_run_string} with #{failcount_resources} failed resources #{failcount_events} failed events and currently enabled#{perfdata_time}" 175 | else 176 | puts "WARNING: last run #{time_since_last_run_string} with #{failcount_resources} failed resources #{failcount_events} failed events and currently disabled with reason: #{disabled_message}#{perfdata_time}" 177 | exit 1 178 | end 179 | 180 | exit 0 181 | end 182 | else 183 | if enabled_only && enabled == false 184 | puts "OK: Puppet is currently disabled, not alerting. Last run #{time_since_last_run_string} with #{failcount_resources} failed resources #{failcount_events} failed events. Disabled with reason: #{disabled_message}#{perfdata_time}" 185 | exit 0 186 | end 187 | 188 | if total_failure 189 | puts "CRITICAL: FAILED - Puppet failed to run. Missing dependencies? Catalog compilation failed? Last run #{time_since_last_run_string}#{perfdata_time}" 190 | exit 2 191 | elsif failcount_resources >= crit 192 | puts "CRITICAL: Puppet last ran had #{failcount_resources} failed resources #{failcount_events} failed events, expected < #{crit}#{perfdata_time}" 193 | exit 2 194 | 195 | elsif failcount_resources >= warn 196 | puts "WARNING: Puppet last ran had #{failcount_resources} failed resources #{failcount_events} failed events, expected < #{warn}#{perfdata_time}" 197 | exit 1 198 | 199 | else 200 | if enabled 201 | puts "OK: last run #{time_since_last_run_string} with #{failcount_resources} failed resources #{failcount_events} failed events and currently enabled#{perfdata_time}" 202 | else 203 | puts "WARNING: last run #{time_since_last_run_string} with #{failcount_resources} failed resources #{failcount_events} failed events and currently disabled with reason: #{disabled_message}#{perfdata_time}" 204 | exit 1 205 | end 206 | 207 | exit 0 208 | end 209 | end 210 | -------------------------------------------------------------------------------- /linux-diskstats-parser/cacti/cacti_data_template_nrpe_disk_stats.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | NRPE Disk Stats 4 | 5 | on 6 | |host_description| - xxxx Disk Stats 7 | hash_030019312c765c86ad4d3cbb5b9435374a690f 8 | 9 | 10 | 300 11 | 12 | on 13 | hash_150019c21df5178e5c955013591239eb0afd46|hash_1500190d9c0af8b8acdc7807943937b3208e29|hash_1500196fc2d038fb42950138b0ce3e9874cc60|hash_150019e36f3adb9f152adfa5dc50fd2b23337e|hash_150019283ea2bf1634d92ce081ec82a634f513 14 | 15 | 16 | 17 | 18 | io_in_progress 19 | 20 | 0 21 | 22 | 0 23 | 24 | 1 25 | 26 | 600 27 | 28 | hash_070019c4584611aae3e30c13acc6947280eae0 29 | 30 | 31 | 32 | io_time 33 | 34 | 0 35 | 36 | 0 37 | 38 | 3 39 | 40 | 600 41 | 42 | hash_070019c4ed582db5529de1c6f4c115097d1049 43 | 44 | 45 | 46 | merged_reads 47 | 48 | 0 49 | 50 | 0 51 | 52 | 3 53 | 54 | 600 55 | 56 | hash_0700197ef895278567c0ae7a6131b7d0ac7a68 57 | 58 | 59 | 60 | reads 61 | 62 | 0 63 | 64 | 0 65 | 66 | 3 67 | 68 | 600 69 | 70 | hash_070019d3735c2855f6b53219758e40a92871f6 71 | 72 | 73 | 74 | read_time 75 | 76 | 0 77 | 78 | 0 79 | 80 | 3 81 | 82 | 600 83 | 84 | hash_070019b219c673101e79400ccb1733e2232145 85 | 86 | 87 | 88 | sectors_read 89 | 90 | 0 91 | 92 | 0 93 | 94 | 3 95 | 96 | 600 97 | 98 | hash_070019a617f69cc7640e9862b0b4c88d0351a7 99 | 100 | 101 | 102 | sectors_written 103 | 104 | 0 105 | 106 | 0 107 | 108 | 3 109 | 110 | 600 111 | 112 | hash_070019c166d6445ef9a27b4e7bde4e93e98837 113 | 114 | 115 | 116 | weighted_io_time 117 | 118 | 0 119 | 120 | 0 121 | 122 | 3 123 | 124 | 600 125 | 126 | hash_070019df7b6117a5c17d3d21051cc8189a9d93 127 | 128 | 129 | 130 | writes 131 | 132 | 0 133 | 134 | 0 135 | 136 | 3 137 | 138 | 600 139 | 140 | hash_0700193760d3892a46ee6cc133906e6d36aa4e 141 | 142 | 143 | 144 | writes_merged 145 | 146 | 0 147 | 148 | 0 149 | 150 | 3 151 | 152 | 600 153 | 154 | hash_070019664cf50c465cd7e6b2ee6015840761ef 155 | 156 | 157 | 158 | write_time 159 | 160 | 0 161 | 162 | 0 163 | 164 | 3 165 | 166 | 600 167 | 168 | hash_070019df2feb71e2fca3791b66e1fde1de7bb8 169 | 170 | 171 | 172 | 173 | hash_070019c864e1bac39589c0f80fa15896327312 174 | on 175 | 176 | 177 | 178 | hash_07001915691c45bb650bcc86ac02a15ff42e62 179 | 180 | 181 | 182 | 183 | 184 | 185 | NRPE Disk Stats 186 | 1 187 | /usr/lib/nagios/plugins/check_nrpe -H <hostname> -c cacti_diskstats_<disk> 188 | 189 | 190 | Hostname 191 | 192 | 193 | 194 | hostname 195 | in 196 | hostname 197 | 198 | 199 | Disk Drive 200 | 201 | 202 | 203 | 204 | in 205 | disk 206 | 207 | 208 | Read Operations 209 | on 210 | 211 | 212 | 213 | out 214 | reads 215 | 216 | 217 | Merged Reads 218 | on 219 | 220 | 221 | 222 | out 223 | merged_reads 224 | 225 | 226 | Sectors Read 227 | on 228 | 229 | 230 | 231 | out 232 | sectors_read 233 | 234 | 235 | Read Time 236 | on 237 | 238 | 239 | 240 | out 241 | read_time 242 | 243 | 244 | Write Operations 245 | on 246 | 247 | 248 | 249 | out 250 | writes 251 | 252 | 253 | Merged Writes 254 | on 255 | 256 | 257 | 258 | out 259 | writes_merged 260 | 261 | 262 | Sectors Written 263 | on 264 | 265 | 266 | 267 | out 268 | sectors_written 269 | 270 | 271 | Write Time 272 | on 273 | 274 | 275 | 276 | out 277 | write_time 278 | 279 | 280 | IO Operations in Progress 281 | on 282 | 283 | 284 | 285 | out 286 | io_in_progress 287 | 288 | 289 | IO Time 290 | on 291 | 292 | 293 | 294 | out 295 | io_time 296 | 297 | 298 | Weighted IO Time 299 | on 300 | 301 | 302 | 303 | out 304 | weighted_io_time 305 | 306 | 307 | 308 | 309 | Daily (5 Minute Average) 310 | 0.5 311 | 1 312 | 600 313 | 86400 314 | 1|3 315 | 316 | 317 | Weekly (30 Minute Average) 318 | 0.5 319 | 6 320 | 700 321 | 604800 322 | 1|3 323 | 324 | 325 | Monthly (2 Hour Average) 326 | 0.5 327 | 24 328 | 775 329 | 2678400 330 | 1|3 331 | 332 | 333 | Yearly (1 Day Average) 334 | 0.5 335 | 288 336 | 797 337 | 33053184 338 | 1|3 339 | 340 | 341 | Hourly (1 Minute Average) 342 | 0.5 343 | 1 344 | 500 345 | 14400 346 | 1|3 347 | 348 | --------------------------------------------------------------------------------