├── network ├── tcpconn_breakdown │ ├── README │ └── ganglia_tcp_connections.pl ├── connections_list │ ├── README │ └── connections_list.pl ├── ib_perf │ ├── README │ └── ib_perf.py ├── webpage_loadtime │ ├── README │ └── webpage_loadtime.py ├── lvs_active_inactive_connections │ ├── README │ └── lvs_active_inactive_connections.sh ├── lvs_connections │ ├── README │ └── lvs_connections.sh ├── lvs_connections_enhanced │ ├── README │ └── lvs_connections_enhanced.sh ├── aoe │ └── ganglia_coraid_err.pl ├── ethtool_interface_stats │ └── ganglia_ethtool_stats.pl └── network_gmetric.sh ├── database ├── oracle_connections │ ├── oracle_connections.sh │ └── README ├── mongodb │ ├── README │ ├── LICENSE │ └── ganglia_mongodb.py ├── mysql_replication │ ├── README │ └── mysql_replication.py ├── mysql_threads │ ├── README │ └── mysql_threads.sh ├── cassandra │ ├── README │ └── cassandra_gmetric.pl ├── redis │ ├── README.markdown │ └── redis_gmetric.rb ├── mysql_stats │ └── ganglia_mysql_cluster.pl └── postgres │ └── ganglia_postgres.rb ├── ldap └── openldap │ ├── README.md │ └── ganglia_ldap.py ├── system ├── filehandles_inuse_2.4_kernel │ ├── filehandles_inuse_2.4_kernel.sh │ └── README ├── gpu_usage │ ├── README │ └── gpu_usage.sh ├── logged_in_users │ ├── README │ └── logged_in_users.sh ├── per_user_stats │ ├── README │ └── per_user_stats.pl ├── io_user_load │ ├── README │ └── io_user_load.sh ├── resource_usage_rank │ ├── README │ └── resource_usage_rank.sh ├── per_user_stats_ustat │ └── README └── process_mem_usage │ └── ganglia_proc_mem_usage.sh ├── netapp ├── netapp_iops_report.json ├── netapp_disk_report.json ├── netapp_network_report.json └── README.md ├── nfs ├── nfs_client_calls │ ├── README │ └── nfs_client_calls.sh ├── nfs_stats │ ├── README │ └── nfs_stats.pl └── nfs_stats_vvuksan │ └── ganglia_nfs_stats.pl ├── health ├── ipmi_temp │ ├── README │ └── ipmi_temp.sh ├── lm_sensors │ ├── README │ └── lm_sensors.sh └── hplog_fanspeed │ ├── README │ └── hplog_fanspeed.pl ├── hpc ├── sge_jobs │ ├── README │ ├── sge_jobs.sh │ └── jobqueue_report.php ├── pbs_jobs │ ├── README │ └── pbs_jobs.sh ├── slurm_jobs │ ├── README │ └── slurm_jobs.sh └── cray_nodestat │ ├── README │ └── cray_nodestat.sh ├── power └── apc_stats │ ├── README │ └── apc_stats.pl ├── disk ├── diskusage │ ├── README │ └── diskusage.pl ├── diskio.py │ ├── README │ └── diskio.py ├── disk_wait_gmetric.sh └── disk_gmetric.sh ├── mail ├── qmail │ ├── README │ └── qmail.php └── powermta │ └── ganglia_powermta.php ├── http ├── apache_error │ ├── README │ └── apache_error.pl ├── apache_semaphores │ └── ganglia_apache_semaphores.sh ├── lighttpd │ └── gmetric_lighttpd.sh ├── haproxy │ └── gmetric_haproxy.sh ├── nginx │ └── nginx_stats.sh ├── phpfpm │ └── phpfpm_stats.sh └── varnish │ └── ganglia-varnish-stats.pl ├── backup └── netbackup_jobs │ ├── README │ └── netbackup_jobs.sh ├── memcached ├── memcached.sh │ ├── README │ └── memcached.sh └── memcached.pl │ └── ganglia_memcached.pl ├── README ├── pdns └── README ├── arista ├── README ├── ganglia_arista_transceiver.py └── ganglia_arista_interfaces.py ├── ping └── ganglia_ping.py ├── bgp └── ganglia_bird.py ├── dns └── ganglia_bind_stats.pl ├── apache └── ganglia_apache.pl ├── ARCHIVE └── mysql │ └── mysql_gmetric.sh └── zfs └── gmetric-zpool-status.py /network/tcpconn_breakdown/README: -------------------------------------------------------------------------------- 1 | Shows breakdown of TCP connections as reported by ss 2 | -------------------------------------------------------------------------------- /database/oracle_connections/oracle_connections.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | CLIENT="/usr/bin/gmetric" 3 | VALUE=`/bin/ps -ef |/bin/grep -c LOCAL` 4 | $CLIENT -t uint16 -n ORACLE_Connections -v $VALUE 5 | 6 | -------------------------------------------------------------------------------- /ldap/openldap/README.md: -------------------------------------------------------------------------------- 1 | This script allows you to connect OpenLDAP metrics. It has been contributed 2 | by engineering team at Etsy. To use 3 | 4 | Add following to slapd.conf 5 | 6 | database monitor 7 | 8 | access to dn="cn=monitor" 9 | by * read 10 | 11 | Then run gmetric_ldap.py script periodically. 12 | -------------------------------------------------------------------------------- /system/filehandles_inuse_2.4_kernel/filehandles_inuse_2.4_kernel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | CLIENT="/usr/bin/gmetric" 4 | 5 | FDS=(`< /proc/sys/fs/file-nr`) 6 | 7 | system_reserved=${FDS[0]} 8 | current_used=${FDS[1]} 9 | fd_limit=${FDS[2]} 10 | 11 | #echo $system_reserved 12 | #echo $current_used 13 | #echo $fd_limit 14 | 15 | exec $CLIENT -t uint16 -n fd_inuse -v $current_used 16 | -------------------------------------------------------------------------------- /system/gpu_usage/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | Author: Kris Howard and Vincenzo Annaloro 4 | 5 | Description: 6 | 7 | Script to query gpus for usage and temperature. 8 | Also checks how many processes are using the GPUs. 9 | 10 | Language: Shell 11 | 12 | Category: Statistics :: Cluster 13 | 14 | Dependencies: awk, nvidia-smi, lsof 15 | -------------------------------------------------------------------------------- /database/mongodb/README: -------------------------------------------------------------------------------- 1 | A simple python script that shells out key metrics from mongodb to gmetric. 2 | Right now it tracks the following: 3 | 4 | * Operations per second 5 | * Memory usage 6 | * Btree statistics 7 | * Master/Slave status 8 | * Current connections 9 | * Database sizes (if pymongo is available) 10 | 11 | It is intended to be run every minute as a cron. 12 | 13 | Thanks! 14 | -------------------------------------------------------------------------------- /netapp/netapp_iops_report.json: -------------------------------------------------------------------------------- 1 | { 2 | "report_name" : "netapp_iops_report", 3 | "report_type" : "standard", 4 | "title" : "Netapp IOPS", 5 | "vertical_label" : "ops/sec", 6 | "series" : [ 7 | { "metric": "netapp_cifs_ops", "color": "3333bb", "label": "CIFS", "line_width": "2", "type": "stack" }, 8 | { "metric": "netapp_nfs_ops", "color": "ffea00", "label": "NFS", "line_width": "2", "type": "stack" } 9 | ] 10 | } 11 | -------------------------------------------------------------------------------- /netapp/netapp_disk_report.json: -------------------------------------------------------------------------------- 1 | { 2 | "report_name" : "netapp_disk_report", 3 | "report_type" : "standard", 4 | "title" : "Netapp Disk Report", 5 | "vertical_label" : "bytes/sec", 6 | "series" : [ 7 | { "metric": "netapp_diskio_readbytes", "color": "3333bb", "label": "Disk Read", "line_width": "2", "type": "stack" }, 8 | { "metric": "netapp_diskio_writebytes", "color": "dd0000", "label": "Disk Write", "line_width": "2", "type": "stack" } 9 | ] 10 | } 11 | -------------------------------------------------------------------------------- /netapp/netapp_network_report.json: -------------------------------------------------------------------------------- 1 | { 2 | "report_name" : "netapp_network_report", 3 | "report_type" : "standard", 4 | "title" : "Netapp Network Report", 5 | "vertical_label" : "bytes/sec", 6 | "series" : [ 7 | { "metric": "netapp_net_sent_bytes", "color": "3333bb", "label": "Net Sent", "line_width": "2", "type": "stack" }, 8 | { "metric": "netapp_net_rcvd_bytes", "color": "dd0000", "label": "Net Recv", "line_width": "2", "type": "stack" } 9 | ] 10 | } 11 | -------------------------------------------------------------------------------- /nfs/nfs_client_calls/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Karl Kopper 8 | 9 | Description: 10 | 11 | Linux NFS client GETATTR, READ and WRITE calls. 12 | 13 | Language: BASH 14 | 15 | Category: Linux::NFS 16 | 17 | Dependencies: None 18 | -------------------------------------------------------------------------------- /health/ipmi_temp/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Dave Love 8 | 9 | Description: 10 | 11 | Temperature data via IPMI 12 | 13 | Language: shell 14 | 15 | Category: Health Monitoring::Temperature 16 | 17 | Dependencies: bash, ipmitool 18 | -------------------------------------------------------------------------------- /hpc/sge_jobs/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Jesse Becker 8 | 9 | Description: 10 | 11 | Reports number of jobs running, queued, and in error states. 12 | 13 | Language: Shell 14 | 15 | Category: Statistics :: Cluster 16 | 17 | Dependencies: awk 18 | -------------------------------------------------------------------------------- /power/apc_stats/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Greg Wimpey 8 | 9 | Description: 10 | 11 | Collects statistics from an APC UPS monitored by apcupsd 12 | 13 | Language: Perl 14 | 15 | Category: Statistics::Power 16 | 17 | Dependencies: apcupsd 18 | -------------------------------------------------------------------------------- /system/logged_in_users/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Miles Davis 8 | 9 | Description: 10 | 11 | Reports the number of users logged in (from 'who -q') 12 | 13 | Language: bash 14 | 15 | Category: Resource :: Users 16 | 17 | Dependencies: none 18 | -------------------------------------------------------------------------------- /disk/diskusage/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Ryan Sweet 8 | 9 | Description: 10 | 11 | report disk percent used for each local filesystem 12 | 13 | Language: perl 14 | 15 | Category: Disk::Usage 16 | 17 | Dependencies: df, perl>5.0 18 | -------------------------------------------------------------------------------- /mail/qmail/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Pablo Godel 8 | 9 | Description: 10 | 11 | Ganglia gmetric script to display local/remote queue message count. 12 | 13 | Language: PHP 14 | 15 | Category: Network::Email 16 | 17 | Dependencies: php, qmail 18 | -------------------------------------------------------------------------------- /network/connections_list/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Mirko Mariotti 8 | 9 | Description: 10 | 11 | List of connections established from or to a given port 12 | 13 | Language: Perl 14 | 15 | Category: Network :: usage 16 | 17 | Dependencies: none 18 | -------------------------------------------------------------------------------- /network/ib_perf/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Hristo Iliev 8 | 9 | Description: 10 | 11 | Measures average InfiniBand network performance 12 | 13 | Language: Python 14 | 15 | Category: Network :: InfiniBand 16 | 17 | Dependencies: OFED binaries 18 | -------------------------------------------------------------------------------- /health/lm_sensors/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Mike Snitzer 8 | 9 | Description: 10 | 11 | lm_sensors bash script to report cpu temp and fan speed 12 | 13 | Language: Bash 14 | 15 | Category: Health Monitoring::CPU 16 | 17 | Dependencies: lm_sensors 18 | -------------------------------------------------------------------------------- /system/per_user_stats/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Ryan Sweet 8 | 9 | Description: 10 | 11 | reports per user stats for mem/cpu/mum_procs from ps output 12 | 13 | Language: perl 14 | 15 | Category: Statistics::PerUser 16 | 17 | Dependencies: perl, ps 18 | -------------------------------------------------------------------------------- /health/hplog_fanspeed/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Jordi Prats Catala (CESCA) 8 | 9 | Description: 10 | 11 | Measures fan speed using HP's tools 12 | 13 | Language: Perl 14 | 15 | Category: Health Monitoring::Fan speed 16 | 17 | Dependencies: /sbin/hplog 18 | -------------------------------------------------------------------------------- /http/apache_error/README: -------------------------------------------------------------------------------- 1 | For additional information about the repository, please see the 2 | README in the repository top-level. 3 | 4 | Author: Nicolas Marchildon 5 | 6 | Description: 7 | 8 | Reports the number of web server error response codes 9 | 10 | This script can be called by Apache by setting up a special logger: 11 | 12 | LogFormat "%>s" status_only 13 | CustomLog "|/path/to/apache-logs-to-ganglia.pl -d 10" status_only 14 | 15 | Language: Perl 16 | 17 | Category: Services::HTTP 18 | 19 | Dependencies: Getopt::Long 20 | -------------------------------------------------------------------------------- /system/filehandles_inuse_2.4_kernel/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Lester Vecsey 8 | 9 | Description: 10 | 11 | file descriptors in use 12 | 13 | Language: Bourne Shell 14 | 15 | Category: Filesystem :: fd's in use 16 | 17 | Dependencies: Linux 2.4.x /proc 18 | -------------------------------------------------------------------------------- /database/oracle_connections/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Craig Simpson 8 | 9 | Description: 10 | 11 | Reports number of Oracle Client Connections 12 | 13 | Language: sh 14 | 15 | Category: Network :: Oracle Connections 16 | 17 | Dependencies: ps netstat grep 18 | -------------------------------------------------------------------------------- /database/mysql_replication/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: C Viven Rajendra 8 | 9 | Description: 10 | 11 | gmetric to monitor mysql replication delay between master and slave 12 | 13 | Language: Python 14 | 15 | Category: MYSQL replication 16 | 17 | Dependencies: None 18 | -------------------------------------------------------------------------------- /database/mysql_threads/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Steve Traylen 8 | 9 | Description: 10 | 11 | Publishes the number of mysql threads ( active connections). 12 | 13 | Language: bash 14 | 15 | Category: Database :: MySQL :: Connections 16 | 17 | Dependencies: mysql 18 | -------------------------------------------------------------------------------- /network/webpage_loadtime/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: C Viven Rajendra 8 | 9 | Description: 10 | 11 | gmetric to monitor how long it take for a web page to load textually 12 | 13 | Language: Python 14 | 15 | Category: Network::Latency 16 | 17 | Dependencies: urllib2 18 | -------------------------------------------------------------------------------- /nfs/nfs_stats/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Greg Wimpey 8 | 9 | Description: 10 | 11 | Linux NFS client and/or server GETATTR, READ, and WRITE calls (extends K. Kopper's script) 12 | 13 | Language: Perl 14 | 15 | Category: Linux :: NFS 16 | 17 | Dependencies: none 18 | -------------------------------------------------------------------------------- /system/logged_in_users/logged_in_users.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Report number of users logged in. Sorry it's not more exciting. :) 4 | # 5 | # Miles Davis 6 | # 7 | CLIENT="/usr/bin/gmetric" 8 | 9 | # Last line in output of "who -q" is in the form "^# users=N$", so just 10 | # grab the last line & split on the equals sign. This works on Linux, IRIX, 11 | # Solaris, & probably most other un*xes. 12 | USERS=`who -q | tail -1 | cut -d = -f 2` 13 | 14 | #echo $USERS 15 | 16 | $CLIENT -t uint16 -n users -v $USERS 17 | 18 | exit $? -------------------------------------------------------------------------------- /hpc/pbs_jobs/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Matt Cuttler 8 | 9 | Description: 10 | 11 | Displays the number of running PBS batch jobs on a given compute node 12 | 13 | Language: Bourne Shell 14 | 15 | Category: Statistics :: Cluster 16 | 17 | Dependencies: PBS or OpenPBS 18 | -------------------------------------------------------------------------------- /hpc/slurm_jobs/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Jesse Becker 8 | Edited By: Kris Howard 9 | 10 | Description: 11 | 12 | Reports number of jobs running, queued, and in error states. 13 | 14 | Language: Shell 15 | 16 | Category: Statistics :: Cluster 17 | 18 | Dependencies: awk 19 | -------------------------------------------------------------------------------- /network/lvs_active_inactive_connections/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Frank Zwart 8 | 9 | Description: 10 | 11 | Dynamically monitor available LVS VIP\'s active & inactive connections 12 | 13 | Language: Bash 14 | 15 | Category: network::LVS 16 | 17 | Dependencies: none 18 | -------------------------------------------------------------------------------- /system/io_user_load/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Andrei Chevel 8 | 9 | Description: 10 | 11 | The script measures a range of values (eth0 in and out, scsi in and out, etc.) 12 | 13 | Language: bash 14 | 15 | Category: I/O usage + user load 16 | 17 | Dependencies: none 18 | -------------------------------------------------------------------------------- /system/resource_usage_rank/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Sumanth J.V 8 | 9 | Description: 10 | 11 | Displays the top 5 resource hungry processes currently running. 12 | 13 | Language: BASH 14 | 15 | Category: Resource::Monitoring 16 | 17 | Dependencies: sed, awk, seq, top 18 | -------------------------------------------------------------------------------- /network/lvs_connections/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Lorn Kay 8 | 9 | Description: 10 | 11 | Monitor IPVS (LVS) Real Server connections 12 | 13 | Language: BASH 14 | 15 | Category: Network::LVS 16 | 17 | Dependencies: Working LVS Load Balancer. See: www.linuxvirtualserver.org 18 | -------------------------------------------------------------------------------- /disk/diskio.py/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Jason J. W. Williams 8 | 9 | Description: 10 | 11 | Measures Disk IO (Reads & Writes) & Sends That Back To gmetad 12 | 13 | Language: Python 14 | 15 | Category: Disk :: Performance 16 | 17 | Dependencies: python, PCP (Performance CoPilot) 18 | -------------------------------------------------------------------------------- /system/per_user_stats_ustat/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Alexander Sudakov 8 | 9 | Description: 10 | 11 | reports per user stats for "cpu", "mem" , "rss" , "num_procs", "time" using USTAT 12 | 13 | Language: Perl 14 | 15 | Category: Statistics::PerUser 16 | 17 | Dependencies: none 18 | -------------------------------------------------------------------------------- /backup/netbackup_jobs/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Jordi Prats Catala (CESCA) 8 | 9 | Description: 10 | 11 | Measures the number of active and queued jobs on NetBackup 12 | 13 | Language: Bash 14 | 15 | Category: Backup Monitoring::NetBackup 16 | 17 | Dependencies: Netbackup, grep, wc 18 | -------------------------------------------------------------------------------- /memcached/memcached.sh/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Ben Hartshorne 8 | 9 | Description: 10 | 11 | Measures MemCache: num objects, bypets used, num connections, hit percentage 12 | 13 | Language: bash 14 | 15 | Category: Application::memcached 16 | 17 | Dependencies: memcached 1.1.13+ 18 | -------------------------------------------------------------------------------- /network/lvs_connections_enhanced/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Jordi Prats Catala (CESCA) 8 | 9 | Description: 10 | 11 | Monitor IPVS (LVS) Real Server connections (extends Lorn Kay's script) 12 | 13 | Language: bash 14 | 15 | Category: Network::LVS 16 | 17 | Dependencies: LVS Load Balancer 18 | -------------------------------------------------------------------------------- /hpc/cray_nodestat/README: -------------------------------------------------------------------------------- 1 | Gmetric script pulled from http://ganglia.info/gmetric 2 | 3 | If you are the author of the script, and have an updated version, please fork the repo and 4 | submit a pull request. For additional information about the repository, please see the 5 | README in the repository top-level. 6 | 7 | Author: Kris Howard and Fabio Verzelloni 8 | 9 | Description: 10 | 11 | Reports number of nodes up, down, available, and in use as reported by apstat. 12 | 13 | Language: Shell 14 | 15 | Category: Statistics :: Cluster 16 | 17 | Dependencies: awk, apstat 18 | -------------------------------------------------------------------------------- /hpc/pbs_jobs/pbs_jobs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Simple script which displays the number of running PBS batch 4 | # jobs on a given compute node (usually something boring like 5 | # zero, one or two for for SMP compute nodes). 6 | # 7 | # Contributed by Matt Cuttler 8 | 9 | 10 | GMETRIC="/usr/bin/gmetric" 11 | NODE=`/bin/hostname -s` 12 | 13 | # Might have to change path to reflect your PBS install.. 14 | QSTAT=`/usr/local/PBS/bin/qstat -n | grep -i $NODE | wc -l` 15 | 16 | $GMETRIC --name PBSJOBS --type uint16 --units jobs --value $QSTAT 17 | 18 | -------------------------------------------------------------------------------- /backup/netbackup_jobs/netbackup_jobs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ### Author: Jordi Prats Catala - CESCA - 2007 4 | ### License to use, modify, and distribute under the GPL 5 | ### http://www.gnu.org/licenses/gpl.txt 6 | 7 | ACTIVE=`/usr/openv/netbackup/bin/admincmd/bpdbjobs -noheader | grep Active | wc -l` 8 | QUEUE=`/usr/openv/netbackup/bin/admincmd/bpdbjobs -noheader | grep -i Queued | wc -l` 9 | 10 | VALUEACTIVE=${ACTIVE## * } 11 | /usr/bin/gmetric -t uint16 -n NB_active_jobs -v$VALUEACTIVE -u '#' 12 | 13 | VALUEQUEUE=${QUEUE## * } 14 | /usr/bin/gmetric -t uint16 -n NB_queued_jobs -v$VALUEQUEUE -u '#' 15 | -------------------------------------------------------------------------------- /database/cassandra/README: -------------------------------------------------------------------------------- 1 | Author: Scott Dworkis 2 | 3 | Description: 4 | 5 | uses cassandra nodetool to sample deltas for counters and 6 | instantaneous values for cfstats and tpstats. note i have only tested 7 | with a special csv mode of gmetric... you can bypass this mode and use 8 | vanilla gmetric with --nocsv, but beware it will generate over 100 9 | forks on a trivial cassandra schema. the patch for the csv enabled 10 | gmetric is here: 11 | 12 | http://bugzilla.ganglia.info/cgi-bin/bugzilla/show_bug.cgi?id=273 13 | 14 | Language: perl 15 | 16 | Category: Database 17 | 18 | Dependencies: cassandra 19 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | This is the official repository for hosting all user-contributed gmetric scripts. 2 | 3 | To have your scripts added here, please fork the repository, create separate sub-directories for each script 4 | and submit a pull request. 5 | 6 | If you have any questions, you could reach us at: 7 | 8 | ganglia-developers@lists.sourceforge.net 9 | 10 | (subscription required: http://lists.sourceforge.net/lists/listinfo/ganglia-developers) 11 | 12 | Alternatively, you could join our IRC channel on irc.freenode.net #ganglia and 13 | ping one of the developers. 14 | 15 | Thank you for your contribution! 16 | 17 | -- Ganglia Development Team 18 | -------------------------------------------------------------------------------- /pdns/README: -------------------------------------------------------------------------------- 1 | Ganglia gmetric wrapper to submit statistics about powerdns, aka pdns 2 | Specifically, this reports on statistics from the pdns_recursor process 3 | 4 | Copyright Ben Hartshorne, Wikimedia Foundation, Inc, 2012 5 | Released under the GPL v2 or later. Full text of this license is at 6 | http://www.gnu.org/licenses/gpl.txt 7 | 8 | This script should be called by cron once every minute. 9 | 10 | You may have to tweak some variables: 11 | * location of the gmond.conf file 12 | * whether your version of gmetric supports the group flag (>3.1 IIRC) 13 | * the location of the rec_control binary (for talking to the pdns_recursor process) 14 | 15 | Feedback to ben@hartshorne.net welcome, or find me in #ganglia in IRC on Freenode. 16 | 17 | -------------------------------------------------------------------------------- /hpc/slurm_jobs/slurm_jobs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | squeue | awk ' 4 | BEGIN { pending=running=error=0; } 5 | ($5 ~ /^PD/) { pending++; } 6 | ($5 ~ /[rRt]/) { running++; } 7 | ($5 ~ /E/ ) { error++; } 8 | END { 9 | cmd="/usr/bin/gmetric --name slurmq_pending --value "pending" --type uint16"; 10 | system(cmd); 11 | cmd="/usr/bin/gmetric --name slurmq_running --value "running" --type uint16"; 12 | system(cmd); 13 | cmd="/usr/bin/gmetric --name slurmq_error --value "error" --type uint16"; 14 | system(cmd); 15 | #print "Pending="pending" Running="running" Errors="error; 16 | }' 17 | 18 | 19 | exit 20 | 21 | -------------------------------------------------------------------------------- /system/resource_usage_rank/resource_usage_rank.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Author Sumanth J.V (sumanth@cse.unl.edu) 3 | # Date 17-June-2002 4 | 5 | # Ensure that top, seq, sed , awk are in ur path 6 | # Also check if this is the right location of gmetric 7 | GMETRIC=/usr/bin/gmetric 8 | 9 | list=(` 10 | top -b -n 1 |\ 11 | sed -n -e "11,15p" |\ 12 | awk '{print $12, $2, $9, $10, $11}' 13 | `) 14 | 15 | for i in `seq 0 5 24` 16 | do 17 | let id=id+1 18 | 19 | val="Proc:${list[${i}]}_User:${list[$((${i}+1))]}\ 20 | _CPU:${list[$((${i}+2))]}_Mem:${list[$((${i}+3))]}\ 21 | _Time:${list[$((${i}+4))]}" 22 | 23 | $GMETRIC --name "Resource_Usage_Rank ${id}" \ 24 | --value $val --type string --units ' ' 25 | done 26 | 27 | exit 0 28 | -------------------------------------------------------------------------------- /http/apache_semaphores/ganglia_apache_semaphores.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ############################################################## 4 | # You will need util-linux package for this script to work 5 | # On Centos/RHEL type yum install util-linux 6 | # Debian/Ubuntu apt-get install util-linux 7 | # 8 | # This script collects the number of Apache semaphores. Trying 9 | # to avoid this problem 10 | # 11 | # http://rackerhacker.com/2007/08/24/apache-no-space-left-on-device-couldnt-create-accept-lock/ 12 | ############################################################## 13 | NUM_SEMAPHORES=`ipcs -s | egrep "apache|www-data" | wc -l` 14 | 15 | GMETRIC_BIN="/usr/bin/gmetric" 16 | 17 | if [ "x$NUM_SEMAPHORES" != "x" ]; then 18 | $GMETRIC_BIN -d 180 -t uint16 -n apache_semaphores -v $NUM_SEMAPHORES 19 | else 20 | echo "Nothing to report." 21 | fi 22 | -------------------------------------------------------------------------------- /health/hplog_fanspeed/hplog_fanspeed.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | ### Author: Jordi Prats Catala - CESCA - 2007 4 | ### License to use, modify, and distribute under the GPL 5 | ### http://www.gnu.org/licenses/gpl.txt 6 | 7 | @HPLOGRES=`/sbin/hplog -f`; 8 | 9 | shift @HPLOGRES; 10 | pop @HPLOGRES; 11 | 12 | for $line (@HPLOGRES) 13 | { 14 | $line=~s/\(\s*(\d+)\)/ $1 /; 15 | @values=split(/\s+/,$line); 16 | @values=reverse @values; 17 | 18 | my $speed=shift @values; 19 | $speed=~s/\W+//ig; 20 | 21 | shift @values; # desc. speed 22 | shift @values; # redundant 23 | shift @values; # status 24 | 25 | pop @values; #null 26 | pop @values; #ID 27 | 28 | my $description=""; 29 | 30 | $description.=$_ for (reverse @values); 31 | 32 | #some cleaning 33 | $description=~s/\W//g; 34 | 35 | system("/usr/bin/gmetric --name ".$description." --value ".$speed." --type uint16 --units rpm"); 36 | } 37 | -------------------------------------------------------------------------------- /database/redis/README.markdown: -------------------------------------------------------------------------------- 1 | Redis gmetric 2 | ============= 3 | 4 | Sends redis metric using gmetric. 5 | Requires ganglia > 3.1.X where --slope=positive will create a COUNTER data source type. 6 | 7 | Usage 8 | ----- 9 | ```sh 10 | redis_gmetric.rb [-h [-p ] [test] 11 | ``` 12 | 13 | If 'test' is there (or any other string), just print the commands, do not execute. 14 | 15 | Commands send 16 | -------------- 17 | ```bash 18 | /usr/bin/gmetric -c /etc/ganglia/gmond.conf --name=redis_clients --type=int32 --units=clients --value=2 --slope=both --dmax=600 19 | /usr/bin/gmetric -c /etc/ganglia/gmond.conf --name=redis_used_memory --type=int32 --units=bytes --value=20434336 --slope=both --dmax=600 20 | /usr/bin/gmetric -c /etc/ganglia/gmond.conf --name=redis_connections --type=int32 --units=Conn/s --value=178842 --slope=positive --dmax=600 21 | /usr/bin/gmetric -c /etc/ganglia/gmond.conf --name=redis_commands --type=int32 --units=Cmds/s --value=79281489 --slope=positive --dmax=600 22 | ``` 23 | -------------------------------------------------------------------------------- /network/connections_list/connections_list.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | $gmetric="/usr/local/ganglia/bin/gmetric"; 4 | $iface="eth0"; 5 | $port="22"; 6 | $metricname="active_ssh"; 7 | 8 | 9 | @connlist=`netstat -atn`; 10 | $esta="none"; 11 | 12 | foreach $i(@connlist) 13 | { 14 | if ($i=~/ESTABLISHED/) 15 | { 16 | $_=$i; 17 | ($arg1,$arg2,$arg3,$from,$to,$state)=split(" "); 18 | $_=$from; 19 | ($fromaddr,$fromport)=split(":"); 20 | $fromaddr=~s/\n//g; 21 | $fromport=~s/\n//g; 22 | $_=$to; 23 | ($toaddr,$toport)=split(":"); 24 | $toaddr=~s/\n//g; 25 | $toport=~s/\n//g; 26 | if ($toport eq $port ) 27 | { 28 | $esta=$esta."To:$toaddr "; 29 | $esta=~s/none//; 30 | } 31 | if ($fromport eq $port ) 32 | { 33 | $esta=$esta."From:$toaddr "; 34 | $esta=~s/none//; 35 | } 36 | } 37 | } 38 | #print "$gmetric -n$metricname -v\"$esta\" -tstring -i$iface"; 39 | `$gmetric -n$metricname -v\"$esta\" -tstring -i$iface`; 40 | -------------------------------------------------------------------------------- /system/process_mem_usage/ganglia_proc_mem_usage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ########################################################################### 4 | # Author: Vladimir Vuksan http://vuksan.com/blog/ 5 | # 6 | # This shell script collects total memory usage for a set of processes 7 | # matching a name e.g. you want to keep track of full memory usage of 8 | # all Apache processes 9 | ########################################################################### 10 | if [ $# -ne 2 ]; then 11 | echo "You need to supply process name and name of metric e.g." 12 | echo " $0 httpd apache_mem_usage" 13 | echo "Exiting ...." 14 | exit 1 15 | fi 16 | 17 | PROCESS_NAME="${1}" 18 | METRIC_NAME="${2}" 19 | GMETRIC_BIN="/usr/bin/gmetric -d 120 " 20 | 21 | MEM_USAGE=`ps -ylC ${PROCESS_NAME} --sort:rss | awk '{ SUM += $8 } END { print SUM*1024 }'` 22 | 23 | if [ "x$MEM_USAGE" != "x" ]; then 24 | $GMETRIC_BIN -t float -n $METRIC_NAME -v $MEM_USAGE -u Bytes 25 | else 26 | echo "Nothing to report. Check process name" 27 | fi 28 | -------------------------------------------------------------------------------- /disk/diskusage/diskusage.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # contributed by ryan sweet 3 | my $gmetric="gmetric"; 4 | my @df = `df -kl | grep -v "Filesystem"`; # RS: get info from df, leave out first line 5 | 6 | my $calcpercentused; 7 | foreach (@df) # RS: for each line of df output 8 | { 9 | my @line = split(/\s+/, $_); # RS: split the line on whitespace 10 | my @reversed = reverse @line; # RS: reverse the order of @line - this is because IRIX df outputs different items than linux 11 | my $size = $reversed[4]; # RS: the filesystem size is the fifth element in the reversed list 12 | my $used = $reversed[3]; 13 | # RS: calculated percent used (df gets it wrong sometimes) is (100(used))/size 14 | $used = $used * 100; 15 | $calcpercentused = int($used/$size); 16 | my $fsname=$line[5]; # RS: get the mount point 17 | $fsname =~ s/\//_/; # RS: replace / with _ 18 | if ($fsname eq "_") { $fsname="_root"; } 19 | # RS: send the data to gmond using gmetric 20 | system("$gmetric --name=disk_percent_used$fsname --value=$calcpercentused --type=uint8 --units=\precent_free"); 21 | } 22 | -------------------------------------------------------------------------------- /network/lvs_connections/lvs_connections.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # watch_lvs 4 | # 5 | # 6 | # Simple script to report the number of established connections to 7 | # each real server in an LVS cluster. See www.linuxvirtualserver.org. 8 | # 9 | # You must set the list of "servers" and the LVS "serviceport" before 10 | # using this script. 11 | # 12 | servers="127.0.0.1 10.0.0.2 10.0.0.3 10.0.0.4 10.0.0.5 10.0.0.6" 13 | 14 | # Load balanced service in this example is telnet. 15 | serviceport="23" 16 | 17 | for server in $servers 18 | do 19 | 20 | totalconnections=`/sbin/ipvsadm -L -c -n | grep "$server:$serviceport" |grep ESTABLISHED | wc -l` 21 | 22 | # Pull out last octet of host IP for Ganglia report. 23 | host=`/bin/echo $server | /bin/cut -d"." -f 4-` 24 | 25 | # Using a hack to set the hostname in the loopback case. 26 | if [ "$host" = "1" ]; then 27 | host="localhost" 28 | fi 29 | 30 | /usr/bin/gmetric --name host_$host_port_$serviceport --value $totalconnections --type int16 --units Connections 31 | 32 | done 33 | -------------------------------------------------------------------------------- /database/mysql_threads/mysql_threads.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Source a conf file read only by root to get the mysql USER 4 | # we should use. 5 | if [ ! -f /etc/mysql-threads-gmetric.conf ] ; then 6 | echo "/etc/mysql-threads-gmetric.conf does not exist" 7 | exit 1 8 | fi 9 | 10 | . /etc/mysql-threads-gmetric.conf 11 | 12 | # Check there is a gmond.conf file. 13 | if [ ! -f /etc/gmond.conf ] ; then 14 | echo "/etc/gmond.conf does not exist" 15 | exit 1 16 | fi 17 | 18 | # Work out what multicast channel we are on (rather assumes there is only one space). 19 | MCAST=`grep '^mcast_channel' /etc/gmond.conf | cut -d' ' -f 2` 20 | PORT=`grep '^mcast_port' /etc/gmond.conf | cut -d' ' -f 2` 21 | TTL=`grep '^mcast_ttl' /etc/gmond.conf | cut -d' ' -f 2` 22 | 23 | [ -z "$MCAST" ] && MCAST="239.2.11.70" 24 | [ -z "$PORT" ] && PORT=8649 25 | [ -z "$TTL" ] && TTL=1 26 | 27 | 28 | STRING=`mysqladmin -u $USER status` 29 | THREADS=`echo $STRING | sed 's/.*Threads: \([0-9]*\) .*/\1/'` 30 | 31 | 32 | gmetric -tuint32 -c$MCAST -p$PORT -l$TTL -x180 -d300 -nmysql_threads -v$THREADS 33 | 34 | 35 | -------------------------------------------------------------------------------- /arista/README: -------------------------------------------------------------------------------- 1 | Included are two scripts to collect metrics off of Arista switches. 2 | Arista runs Fedora Core 14 i686. You will need to build Ganglia for it 3 | then install. 4 | 5 | 1. ganglia_arista_interfaces.py - provides interface statistics such as 6 | number of bytes in/out, errors, collisions etc. 7 | 2. ganglia_arista_transceivers.py - provides transceiver stats such as 8 | voltage, optical power, temperature 9 | 10 | **30-Apr-2014 - Update to eAPI** 11 | Updated ganglia_arista_interfaces.py to use eAPI which is the support method 12 | for interfacing with EOS programmically. eAPI must be configured on the 13 | node. To configure eAPI, enter the following commands in config 14 | mode: 15 | 16 | ``` 17 | eos#configure 18 | eos(config)#username eapi secret password 19 | eos(config)#management api http-commands 20 | eos(config-mgmt-api-http-cmds)#no shutdown 21 | eos(config-mgmt-api-http-cmds)# 22 | ``` 23 | 24 | In addition the script can now be run either on-box or off-box. Use 25 | python ganglia_arisa_interfaces.py --help to see command line options for 26 | using the updated script with eAPI. 27 | 28 | -------------------------------------------------------------------------------- /database/mongodb/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010 Etsy 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following 10 | conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /netapp/README.md: -------------------------------------------------------------------------------- 1 | Netapp metrics gathering script 2 | =============================== 3 | 4 | Principle of operation 5 | =============================== 6 | 7 | This script uses snmpwalk to fetch OID of interest from a Netapp server then 8 | injects those metrics into Ganglia. This script will use Ganglia's Spoof 9 | functionality to create "Netapp" hosts in Ganglia. 10 | 11 | 12 | Install 13 | ======= 14 | 15 | To use modify the ganglia_netapp.php script. You will need to change following 16 | variables in the script. 17 | 18 | $servers= array("serv1","serv2"); 19 | 20 | This is a list of all the Netapp servers you have 21 | 22 | $community = "public"; 23 | 24 | This is your SNMP community string. 25 | 26 | You should run this script quite often ie. I use a shell script like this which I run in a screen session (or you can use runit) 27 | 28 | ``while [ 1 ]; do php ganglia_netapp.php; sleep 15; done`` 29 | 30 | This will run the script every 15 seconds 31 | 32 | Graph reports 33 | =============================== 34 | 35 | If you want report graphs like e.g. network traffic drop the *.json files in this 36 | directory in $GANGLIA_WEB_HOME/graph.d e.g. /var/www/html/ganglia/graph.d 37 | -------------------------------------------------------------------------------- /network/lvs_connections_enhanced/lvs_connections_enhanced.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # watch_lvs 4 | # 5 | # 6 | # Simple script to report the number of established connections to 7 | # each real server in an LVS cluster. See www.linuxvirtualserver.org. 8 | # 9 | # You must set the list of "servers" and the LVS "serviceport" and 10 | # "servicesports" before using this script. 11 | # 12 | # 13 | # Original: Lorn Kay 14 | # Modified: Jordi Prats Catala - CESCA - 2007 15 | # 16 | 17 | servers="192.168.11.1 192.168.11.2 192.168.11.121 192.168.11.122" 18 | servicesports="8080 80 81 8089 2641" 19 | 20 | for serviceport in $servicesports 21 | do 22 | for server in $servers 23 | do 24 | 25 | totalconnections=`/sbin/ipvsadm -L -c -n | grep "$server:$serviceport" |grep ESTABLISHED | wc -l` 26 | 27 | # Pull out last octet of host IP for Ganglia report. 28 | host=`/bin/echo $server | /bin/cut -d"." -f 4-` 29 | 30 | # Using a hack to set the hostname in the loopback case. 31 | if [ "$host" = "1" ]; then 32 | host="localhost" 33 | fi 34 | 35 | /usr/bin/gmetric --name host_${server}_port_${serviceport} --value $totalconnections --type uint16 --units Connections 36 | 37 | done 38 | 39 | done 40 | -------------------------------------------------------------------------------- /http/lighttpd/gmetric_lighttpd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Script to get lighttpd stats 3 | # Created by @hdanniel 4 | # http://hdanniel.com/sl 5 | 6 | # This script needs the mod_status module enabled in lighttpd.conf 7 | # http://redmine.lighttpd.net/projects/lighttpd/wiki/Docs:ModStatus 8 | 9 | # lighttpd.conf 10 | # server.modules = ( ..., "mod_status", ... ) 11 | # 12 | # $HTTP["remoteip"] == "127.0.0.1" { 13 | # status.status-url = "/server-status" 14 | # } 15 | 16 | # This script must run as root 17 | 18 | # Paths 19 | WGET="/usr/bin/wget" 20 | GMETRIC="/usr/bin/gmetric" 21 | 22 | STATUS_URL="http://127.0.0.1/server-status" 23 | METRICS_TEMPFILE="/tmp/lighttpdmetrics" 24 | ERRORS_TEMPFILE="/tmp/lighttpderrors" 25 | 26 | USER="" 27 | PASSWORD="" 28 | 29 | $WGET --user=$USER --password=$PASSWORD -q -O - $STATUS_URL?auto > $METRICS_TEMPFILE 30 | if [ -s $METRICS_TEMPFILE ]; then 31 | echo "OK" 32 | $GMETRIC --name lighttpd_busy_servers --value `grep BusyServers $METRICS_TEMPFILE | cut -d " " -f 2` --type float --units "Busy Servers" 33 | $GMETRIC --name lighttpd_idle_servers --value `grep IdleServers $METRICS_TEMPFILE | cut -d " " -f 2` --type float --units "Idle Servers" 34 | else 35 | echo "Can't connect to $STATUS_URL" > $ERRORS_TEMPFILE 36 | fi 37 | rm -f $METRICS_TEMPFILE 38 | 39 | 40 | -------------------------------------------------------------------------------- /health/lm_sensors/lm_sensors.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # author: Mike Snitzer 3 | # desc: used to make lm_sensors metrics available to ganglia 4 | 5 | # /etc/sysconfig/ganglia is used to specify INTERFACE 6 | CONFIG=/etc/sysconfig/ganglia 7 | [ -f $CONFIG ] && . $CONFIG 8 | 9 | #default to eth0 10 | if [ -z "$MCAST_IF" ]; then 11 | MCAST_IF=eth0 12 | fi 13 | 14 | GMETRIC_BIN=/usr/bin/gmetric 15 | # establish a base commandline 16 | GMETRIC="$GMETRIC_BIN -i $MCAST_IF" 17 | 18 | SENSORS=/usr/bin/sensors 19 | 20 | # load the lm_sensors modules 21 | module=`/sbin/lsmod | awk '{print $1}' | grep i2c-piix4` 22 | if [ -z "$module" ]; then 23 | /sbin/modprobe i2c-piix4 24 | # lm87 is for supermicro P3TDLE, replace when appropriate 25 | /sbin/modprobe lm87 26 | fi 27 | 28 | # send cpu temps if gmond is running 29 | `/sbin/service gmond status > /dev/null` 30 | if [ $? -eq 0 ]; then 31 | # send cpu temperatures 32 | let count=0 33 | for temp in `${SENSORS} | grep emp | cut -b 13-16`; do 34 | $GMETRIC -t float -n "cpu${count}_temp" -u "C" -v $temp 35 | let count+=1 36 | done 37 | 38 | # send cpu fan speed 39 | let count=0 40 | for fan in `${SENSORS} | grep fan | cut -b 9-14`; do 41 | $GMETRIC -t uint32 -n "cpu${count}_fan" -u "RPM" -v $fan 42 | let count+=1 43 | done 44 | fi -------------------------------------------------------------------------------- /network/lvs_active_inactive_connections/lvs_active_inactive_connections.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Frank Zwart frank.kanariepietje.nl 3 | # Script detects VIP\\\'s in use and collects active/inactive connections 4 | 5 | 6 | `cat /proc/net/ip_vs > /tmp/ganglia_ipvs_tmp` 7 | echo \\\"TCP\\\" >> /tmp/ganglia_ipvs_tmp 8 | 9 | ACTIVE_CONNECTIONS=0 10 | INACTIVE_CONNECTIONS=0 11 | 12 | while read VAL1 VAL2 VAL3 VAL4 VAL5 VAL6 VAL7; do 13 | 14 | if [ \\\"${VAL1}\\\" = \\\"TCP\\\" ] ; then 15 | if [[ ${PREVIOUS_WAS_REALSERVER} = \\\"YES\\\" ]] && [[ ${SERVICE} != \\\"\\\" ]];then 16 | H1=`echo ${SERVICE} | cut -b1-4` 17 | H2=`echo ${SERVICE} | cut -b5-8` 18 | H3=`echo ${SERVICE} | cut -b10-13` 19 | SERVICE=`printf \\\"VIP_%d.%d.%d.%d_port_%d\\\" 0x${H1%??} 0x${H1#??} 0x${H2%??} 0x${H2#??} 0x${H3}` 20 | /usr/bin/gmetric --type uint32 --units ActiveConnections --name ${SERVICE}-Active --value ${ACTIVE_CONNECTIONS} 21 | /usr/bin/gmetric --type uint32 --units InactiveConnections --name ${SERVICE}-Inactive --value ${INACTIVE_CONNECTIONS} 22 | ACTIVE_CONNECTIONS=0 23 | INACTIVE_CONNECTIONS=0 24 | fi 25 | SERVICE=${VAL2} 26 | PROTOCOL=${VAL1} 27 | elif [ \\\"${VAL3}\\\" = \\\"Route\\\" ]; then 28 | ACTIVE_CONNECTIONS=`expr ${ACTIVE_CONNECTIONS} + ${VAL5}` 29 | INACTIVE_CONNECTIONS=`expr ${INACTIVE_CONNECTIONS} + ${VAL6}` 30 | PREVIOUS_WAS_REALSERVER=\\\"YES\\\" 31 | fi 32 | done < /tmp/ganglia_ipvs_tmp -------------------------------------------------------------------------------- /system/io_user_load/io_user_load.sh: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/bash 2 | TOP=/usr/bin/top 3 | AWK=/bin/awk 4 | GMETRIC=/usr/bin/gmetric 5 | 6 | $TOP -ibn 1 | $AWK /COMMAND/,/++++++++++/ | head -2 | tail -1 > /tmp/t$$ 7 | $GMETRIC --name UserName --value `$AWK '{print($2)}' /tmp/t$$` --type string --units 'name' 8 | $GMETRIC --name UserProg --value `$AWK '{print($12)}' /tmp/t$$` --type string --units 'name' 9 | $GMETRIC --name UserCPU --value `$AWK '{print($9)}' /tmp/t$$` --type float --units '%' 10 | $GMETRIC --name UserTime --value `$AWK '{print($11)}' /tmp/t$$` --type string --units 'min:sec' 11 | 12 | $GMETRIC --name eth0_out --value ` grep eth0 /proc/net/dev | awk -F\: '{print($2)}' | awk '{print($9)}' ` --type uint32 --units 'bytes' 13 | $GMETRIC --name eth1_out --value ` grep eth1 /proc/net/dev | awk -F\: '{print($2)}' | awk '{print($9)}' ` --type uint32 --units 'bytes' 14 | 15 | 16 | $GMETRIC --name eth0_in --value ` grep eth0 /proc/net/dev | awk -F\: '{print($2)}' | awk '{print($1)}' ` --type uint32 --units 'bytes' 17 | $GMETRIC --name eth1_in --value ` grep eth1 /proc/net/dev | awk -F\: '{print($2)}' | awk '{print($1)}' ` --type uint32 --units 'bytes' 18 | 19 | $GMETRIC --name SCSI0read --value ` grep 'Total transfers' /proc/scsi/aic7xxx/0 | awk -F\( '{print($2)}' | awk '{print($1)}' ` --type uint32 --units 'qnty' 20 | $GMETRIC --name SCSI0writ --value ` grep 'Total transfers' /proc/scsi/aic7xxx/0 | awk -F\( '{print($2)}' | awk '{print($4)}' ` --type uint32 --units 'qnty' 21 | -------------------------------------------------------------------------------- /hpc/sge_jobs/sge_jobs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #adjust for the local environment 4 | source /usr/local/sge/default/common/settings.sh 5 | 6 | qstat | awk ' 7 | BEGIN { pending=running=error=0; } 8 | /^[ 1-9][0-9]/ && ($5 ~ /^qw/) { pending++; } 9 | /^[ 1-9][0-9]/ && ($5 ~ /[rRt]/) { running++; } 10 | /^[ 1-9][0-9]/ && ($5 ~ /E/ ) { error++; } 11 | END { 12 | cmd="/usr/bin/gmetric --name sge_pending --value "pending" --type uint16"; 13 | system(cmd); 14 | cmd="/usr/bin/gmetric --name sge_running --value "running" --type uint16"; 15 | system(cmd); 16 | cmd="/usr/bin/gmetric --name sge_error --value "error" --type uint16"; 17 | system(cmd); 18 | #print "Pending="pending" Running="running" Errors="error; 19 | }' 20 | 21 | 22 | exit 23 | 24 | ####################################################################### 25 | 26 | 27 | QP=`grep ' qw ' /tmp/qstat.$$ | wc -l` 28 | if [ $QP -ge 3 ]; then 29 | QP=$(($QP-2)) 30 | fi 31 | /usr/bin/gmetric --name sge_pending --value $QP --type uint16 32 | 33 | QP=`grep ' [rRt] ' /tmp/qstat.$$ | wc -l` 34 | if [ $QP -ge 3 ]; then 35 | QP=$(($QP-2)) 36 | fi 37 | /usr/bin/gmetric --name sge_running --value $QP --type uint16 38 | 39 | QP=`grep ' E ' /tmp/qstat.$$ | wc -l` 40 | if [ $QP -ge 3 ]; then 41 | QP=$(($QP-2)) 42 | fi 43 | /usr/bin/gmetric --name sge_error --value $QP --type uint16 44 | 45 | -------------------------------------------------------------------------------- /hpc/cray_nodestat/cray_nodestat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Script to collect stats about a Cray XT or XE system. 4 | # Reports up, down, avail, and in use 5 | 6 | /usr/bin/apstat -n | tail -1 | awk '{system("/usr/bin/gmetric -nnode_total -v" $2 " -tuint16 -u"$2"")} \ 7 | {system("/usr/bin/gmetric -nnode_avail -v" $6 " -tuint16 -u"$6"")} \ 8 | {system("/usr/bin/gmetric -nnode_up -v" $3 " -tuint16 -u"$3"")} \ 9 | {system("/usr/bin/gmetric -nnode_down -v" $7 " -tuint16 -u"$7"")} \ 10 | {system("/usr/bin/gmetric -nnode_use -v" $4 " -tuint16 -u"$4"")}' 11 | 12 | ######################################################################### 13 | # Previous Iteration 14 | ######################################################################## 15 | 16 | #NODE_TOTAL=$(/usr/bin/apstat -n | tail -1 | awk '{print $2}') 17 | #NODE_AVAIL=$(/usr/bin/apstat -n | tail -1 | awk '{print $6}') 18 | #NODE_UP=$(/usr/bin/apstat -n | tail -1 | awk '{print $3}') 19 | #NODE_DOWN=$(/usr/bin/apstat -n | tail -1 | awk '{print $7}') 20 | #NODE_USE=$(/usr/bin/apstat -n | tail -1 | awk '{print $4}') 21 | 22 | #$APPS/system/ganglia-3.1.7/bin/gmetric -n node_total -v $NODE_TOTAL -t string -u nodes 23 | #$APPS/system/ganglia-3.1.7/bin/gmetric -n node_avail -v $NODE_AVAIL -t float -u nodes 24 | #$APPS/system/ganglia-3.1.7/bin/gmetric -n node_up -v $NODE_UP -t float -u nodes 25 | #$APPS/system/ganglia-3.1.7/bin/gmetric -n node_down -v $NODE_DOWN -t float -u nodes 26 | #$APPS/system/ganglia-3.1.7/bin/gmetric -n node_use -v $NODE_USE -t float -u nodes 27 | 28 | -------------------------------------------------------------------------------- /database/mysql_stats/ganglia_mysql_cluster.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -W 2 | 3 | ############################################################################## 4 | # This script queries NDB MGM for data and index usages on the particular 5 | # node. Run out of cron on NDB nodes 6 | ############################################################################## 7 | 8 | # NEED TO MODIFY FOLLOWING 9 | # Adjust this variables appropriately. Feel free to add any options to gmetric_command 10 | # necessary for running gmetric in your environment to gmetric_options e.g. -c /etc/gmond.conf 11 | my $gmetric_exec = "/usr/bin/gmetric"; 12 | my $gmetric_options = ""; 13 | 14 | # DON"T TOUCH BELOW UNLESS YOU KNOW WHAT YOU ARE DOING 15 | if ( ! -x $gmetric_exec ) { 16 | die("Gmetric binary is not executable. Exiting..."); 17 | } 18 | 19 | my $gmetric_command = "echo " . $gmetric_exec . " " . $gmetric_options; 20 | my $debug = 0; 21 | 22 | my $node_id = 1; 23 | 24 | $stats_command = 'ndb_mgm -e "$node_id REPORT MemoryUsage"'; 25 | 26 | ##################################################### 27 | # Get the new stats 28 | ##################################################### 29 | open(NEWSTATUS, $stats_command . " |"); 30 | 31 | while() { 32 | if (/^Node (.*): (.*) usage is (\S+)(\%)/) { 33 | my $name = lc($2); 34 | my $value = $3; 35 | 36 | if ( $debug == 0 ) { 37 | system($gmetric_command . " -g mysql_cluster -u 'pct' -tfloat -n ndb_" . $name . "_usage -v " . $value); 38 | } else { 39 | print "$name is $value\n"; 40 | } 41 | 42 | } 43 | } 44 | close(NEWSTATUS); 45 | -------------------------------------------------------------------------------- /mail/qmail/qmail.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/php 2 | __toString(); 28 | 29 | if ( $qdir[0] != '.' ) 30 | { 31 | foreach( new DirectoryIterator($path.DIRECTORY_SEPARATOR.$qdir) as $file ) { 32 | $fname = $file->__toString(); 33 | if ( $fname[0] != '.') $total++; 34 | } 35 | 36 | } 37 | 38 | } 39 | 40 | return $total; 41 | 42 | } 43 | 44 | public function countRemote() 45 | { 46 | $path = $this->queue_path.DIRECTORY_SEPARATOR.'remote'; 47 | $remote = $this->countQueue( $path ); 48 | 49 | return $remote; 50 | } 51 | 52 | public function countLocal() 53 | { 54 | 55 | $path = $this->queue_path.DIRECTORY_SEPARATOR.'local'; 56 | $local = $this->countQueue( $path ); 57 | 58 | return $local; 59 | } 60 | 61 | } 62 | 63 | $q = new Qmail(); 64 | 65 | $r = $q->countRemote(); 66 | $l = $q->countLocal(); 67 | 68 | exec( "/usr/bin/gmetric --name qmail_remote_queue --value $r --type int16 --units Messages" ); 69 | exec( "/usr/bin/gmetric --name qmail_local_queue --value $l --type int16 --units Messages" ); -------------------------------------------------------------------------------- /http/haproxy/gmetric_haproxy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Script to get haproxy stats 3 | # Created by @hdanniel 4 | # http://hdanniel.com/sl 5 | 6 | # This script needs the stats socket option in haproxy.cfg 7 | 8 | # haproxy.cfg 9 | # global 10 | # ... 11 | # stats socket /path/to/haproxy.sock 12 | # ... 13 | 14 | # Other requirements 15 | # * must run as root 16 | # * socat installed 17 | # * only one backend 18 | 19 | # Also you can use Net-SNMP perl plugin for HAProxy to get stats through SNMP 20 | # http://haproxy.1wt.eu/download/contrib/netsnmp-perl/haproxy.pl 21 | 22 | GMETRIC="/usr/bin/gmetric" 23 | SOCKET_PATH="/var/run/haproxy.sock" 24 | METRICS_TEMPFILE="/tmp/haproxymetrics" 25 | ERRORS_TEMPFILE="/tmp/haproxyerrors" 26 | 27 | declare -a SERVER 28 | 29 | echo "show stat -1 7 -1" | socat unix-connect:$SOCKET_PATH stdio | grep -v svname > $METRICS_TEMPFILE 30 | if [ -s $METRICS_TEMPFILE ]; then 31 | echo "OK" 32 | cat $METRICS_TEMPFILE | while read line; 33 | do 34 | PX=($(echo "${line}"|cut -d "," -f 1,5,34|tr ',' ' ')) 35 | pxname=$(echo ${PX[0]} | tr '[:upper:]' '[:lower:]') 36 | SERVER=($(echo "${line}"|cut -d "," -f 2,5,34|tr ',' ' ')) 37 | svname=$(echo ${SERVER[0]} | tr '[:upper:]' '[:lower:]') 38 | scur=${SERVER[1]} 39 | rate=${SERVER[2]} 40 | $GMETRIC --name "ha_"$pxname"_"$svname"_current_sessions" --value $scur --type float --units "Current Sessions" 41 | $GMETRIC --name "ha_"$pxname"_"$svname"_session_rate" --value $rate --type float --units "Session Rate" 42 | done 43 | else 44 | echo "Can't connect to $SOCKET_PATH" > $ERRORS_TEMPFILE 45 | fi 46 | rm -f $METRICS_TEMPFILE 47 | -------------------------------------------------------------------------------- /system/gpu_usage/gpu_usage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Script to collect stats about NVidia GPUs 4 | # Written for 2 GPUs per machine 5 | # Reports cpu usage, mem usage, temp, fan speed, and number of processes using the GPUs 6 | 7 | # Variables 8 | # Set the variables we are collecting 9 | # 10 | GMETRIC="/usr/bin/gmetric" 11 | 12 | # GPU Usage for card 0 and 1 13 | GPU_USAGE_0=$(nvidia-smi -q -g 0 |egrep "GPU.[^0].*\:" |awk {' print $3 '} |sed s/%//g) 14 | GPU_USAGE_1=$(nvidia-smi -q -g 1 |egrep "GPU.[^0].*\:" |awk {' print $3 '} |sed s/%//g) 15 | MEM_USAGE_0=$(nvidia-smi -q -g 0 |egrep "Mem" |awk {' print $3 '} |sed s/%//g) 16 | MEM_USAGE_1=$(nvidia-smi -q -g 1 |egrep "Mem" |awk {' print $3 '} |sed s/%//g) 17 | TEMP_0=$(nvidia-smi -q -g 0 |egrep "Temp" |awk {' print $3 '} |sed s/%//g) 18 | TEMP_1=$(nvidia-smi -q -g 1 |egrep "Temp" |awk {' print $3 '} |sed s/%//g) 19 | FAN_0=$(nvidia-smi -q -g 0 |egrep "Fan" |awk {' print $4 '} |sed s/%//g) 20 | FAN_1=$(nvidia-smi -q -g 1 |egrep "Fan" |awk {' print $4 '} |sed s/%//g) 21 | NPROC_0=$(/usr/sbin/lsof /dev/nvidia0|awk {' print $2 '} | grep -v PID|sort -u |wc -l) 22 | NPROC_1=$(/usr/sbin/lsof /dev/nvidia1|awk {' print $2 '} | grep -v PID|sort -u |wc -l) 23 | 24 | 25 | $GMETRIC -n GPU_USAGE_0 -v $GPU_USAGE_0 -t uint16 -u '%' 26 | $GMETRIC -n GPU_USAGE_1 -v $GPU_USAGE_1 -t uint16 -u '%' 27 | $GMETRIC -n MEM_USAGE_0 -v $GPU_USAGE_0 -t uint16 -u '%' 28 | $GMETRIC -n MEM_USAGE_1 -v $GPU_USAGE_1 -t uint16 -u '%' 29 | $GMETRIC -n TEMP_GPU_0 -v $TEMP_0 -t uint16 -u Celcius 30 | $GMETRIC -n TEMP_GPU_1 -v $TEMP_1 -t uint16 -u Celcius 31 | $GMETRIC -n FAN_GPU_0 -v $FAN_0 -t uint16 -u '%' 32 | $GMETRIC -n FAN_GPU_1 -v $FAN_1 -t uint16 -u '%' 33 | $GMETRIC -n Num_Procs_GPU_0 -v $NPROC_0 -t uint16 -u Procs 34 | $GMETRIC -n Num_Procs_GPU_1 -v $NPROC_1 -t uint16 -u Procs 35 | -------------------------------------------------------------------------------- /network/webpage_loadtime/webpage_loadtime.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | ############################################################### 3 | # gmetric to monitor how long it take for a web page to load textually 4 | # put this in a cronjob for every minute or 5 mins or however often 5 | # you desire it to run, change the url to want to monitor below 6 | ############################################################### 7 | # REQs: python, gmetric 8 | # DATE: 01 July 2008 9 | # C Viven Rajendra, vivenrajendra@gmail.com 10 | ############################################################### 11 | import time, os, urllib2 12 | import urllib2, gzip, StringIO 13 | 14 | ######################################### 15 | # change this to the appropriate values 16 | url_to_monitor = "http://www.cse.iitb.ac.in" 17 | ## do not change below two default values unless you have done it on your machine 18 | mcast_channel = '239.2.11.71' 19 | mcast_port = 8649 20 | ########################################## 21 | 22 | 23 | def get(uri): 24 | try: 25 | request = urllib2.Request(uri) 26 | request.add_header("Accept-encoding", "gzip") 27 | usock = urllib2.urlopen(request) 28 | data = usock.read() 29 | if usock.headers.get('content-encoding', None) == 'gzip': 30 | data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read() 31 | return data 32 | except Exception, e: 33 | print e # your error handling here 34 | 35 | def wget_time(urli): 36 | start_t = time.time() 37 | get(urli) 38 | end_t = time.time() 39 | total_delay = end_t - start_t 40 | gangliaMetric = "/usr/bin/gmetric --name=wget.index.page --value=" + str(total_delay) + " --type=double --units=seconds --mcast_channel='" + mcast_channel +"' --mcast_port=" + str(mcast_port)" 41 | res = os.system(gangliaMetric) 42 | 43 | 44 | if __name__ == "__main__": 45 | wget_time(url_to_monitor) 46 | -------------------------------------------------------------------------------- /ping/ganglia_ping.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | ############################################################################### 4 | # Get ping metrics. Supply an IP address or a name 5 | # 6 | # Uses ping command. Make sure you can run ping as the user. 7 | # 8 | # AUTHOR: Vladimir Vuksan 9 | ############################################################################### 10 | import subprocess 11 | import sys 12 | import re 13 | import os 14 | 15 | # Variables that can be set 16 | ping_binary = "/bin/ping" 17 | ganglia_metric_group = "gw_ping" 18 | gmetric_bin = "/usr/bin/gmetric" 19 | number_of_pings = "5" 20 | 21 | if (len(sys.argv) < 2): 22 | print "\nSupply name of host to ping....\n" 23 | exit(1) 24 | 25 | host = sys.argv[1] 26 | 27 | ping = subprocess.Popen( 28 | [ ping_binary, "-W", "1", "-c", number_of_pings , host], 29 | stdout = subprocess.PIPE, 30 | stderr = subprocess.PIPE 31 | ) 32 | 33 | out, error = ping.communicate() 34 | 35 | rtt_re = re.compile("(.*)min(.*) = (?P\d+.\d+)/(?P\d+.\d+)/(?P\d+.\d+)/(?P\d+.\d+) ms$") 36 | pkts_re = re.compile("(.*), (?P.*)% packet loss") 37 | 38 | gmetric_bin = gmetric_bin + " -g " + ganglia_metric_group + " -t float" 39 | 40 | for line in out.split('\n'): 41 | regMatch = rtt_re.match(line) 42 | if regMatch: 43 | linebits = regMatch.groupdict() 44 | for key in linebits: 45 | try: 46 | dur = float(linebits[key]) / 1000; 47 | except Exception: 48 | dur = 2 49 | 50 | os.system(gmetric_bin + " -u sec -n ping_time_" + host.replace(".","_") + "_" + key + " -v " + str(dur)) 51 | 52 | 53 | regMatch2 = pkts_re.match(line) 54 | if regMatch2: 55 | linebits2 = regMatch2.groupdict() 56 | os.system(gmetric_bin + " -u sec -n ping_pktloss_" + host.replace(".","_") + " -v " + str(linebits2['packet_loss'])) 57 | -------------------------------------------------------------------------------- /database/redis/redis_gmetric.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | # 4 | # Author:: Gilles Devaux () 5 | # Copyright:: Copyright (c) 2011 Formspring.me 6 | # License:: Apache License, Version 2.0 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | # 20 | 21 | host = ARGV[0] || '127.0.0.1' 22 | port = ARGV[1] || 6379 23 | @test = ARGV[2] == 'test' 24 | 25 | def gmetric(group, name, units, value, slope='both') 26 | cmd = "/usr/bin/gmetric -c /etc/ganglia/gmond.conf --name=#{group}_#{name} --type=#{type} --units=#{units} --value=#{value} --slope=#{slope} --dmax=600" 27 | #ganglia 3.2 28 | #cmd = "/usr/bin/gmetric -c /etc/ganglia/gmond.conf --group=#{group} --name=redis_#{name} --type=#{type} --units=#{units} --value=#{value} --slope=#{slope} --dmax=600" 29 | @test ? puts(cmd) : `#{cmd}` 30 | end 31 | 32 | output = {} 33 | IO.popen("redis-cli -h #{host} -p #{port} info").each do |line| 34 | next if line.empty? 35 | s = line.split(':') 36 | output[s[0]] = s[1].chomp 37 | end 38 | exit if output.empty? 39 | 40 | all_metrics = {'used_memory' => ['used_memory', 'bytes', 'both'], 41 | 'connected_clients' => ['clients', 'clients', 'both'], 42 | 'total_commands_processed' => ['commands', 'Cmds/s', 'positive'], 43 | 'total_connections_received' => ['connections', 'Conn/s', 'positive']} 44 | 45 | all_metrics.each do |name, params| 46 | gmetric('redis', params[0], params[1], output[name], params[2]) 47 | end 48 | -------------------------------------------------------------------------------- /power/apc_stats/apc_stats.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # 3 | # Grab APC Uninterruptible Power Supply (UPS) stats and report to ganglia 4 | # Requires that apcupsd and associated utilities (i.e., apcaccess) are 5 | # installed. This script has been tested on a RedHat Linux 7.3 system 6 | # running on an APC SmartUPS5000 power supply connected via serial port. 7 | # You may find apcupsd at: http://www.apcupsd.com 8 | # 9 | # This script creates 3 metrics: 10 | # ups_load: Load on UPS as percentage of capacity 11 | # ups_batt_chg: Battery charge as percentage of capacity 12 | # ups_time_left: UPS runtime left in minutes 13 | # 14 | # a typical /etc/cron.d line for this script would be: 15 | # 16 | # * * * * * root /usr/local/bin/apcups_metric > /dev/null 2>&1 17 | # 18 | # Author: Greg Wimpey, Colorado School of Mines 26 May 2004 19 | # Email: gwimpey mines edu 20 | # 21 | # This script may be freely copied, distributed, or modified 22 | # as long as authorship and copyright information is maintained. 23 | # Copyright 2004 Colorado School of Mines 24 | # 25 | # 26 | 27 | $apcaccess='/sbin/apcaccess'; # location of apcaccess command 28 | $statusarg='status'; # argument for apcaccess 29 | $gmetric='/usr/bin/gmetric'; # ganglia gmetric command 30 | 31 | # initialize metrics 32 | $loadpct=0.0; 33 | $bcharge=0.0; 34 | $timeleft=0.0; 35 | 36 | ( -x $apcaccess ) || die "Can't execute $apcaccess\n"; 37 | 38 | open APC,"$apcaccess $statusarg |" || 39 | die "Can't open pipe from $apcaccess $statusarg\n"; 40 | while () { 41 | @field = split ':'; 42 | if ($field[0] =~ /LOADPCT/) { 43 | ($loadpct,$junk) = split ' ',$field[1]; 44 | } 45 | elsif ($field[0] =~ /BCHARGE/) { 46 | ($bcharge,$junk) = split ' ',$field[1]; 47 | } 48 | elsif ($field[0] =~ /TIMELEFT/) { 49 | ($timeleft,$junk) = split ' ',$field[1]; 50 | } 51 | } 52 | close APC; 53 | 54 | # send metrics to ganglia as floats 55 | system("$gmetric -nups_load -v$loadpct -tfloat -u%"); 56 | system("$gmetric -nups_batt_chg -v$bcharge -tfloat -u%"); 57 | system("$gmetric -nups_time_left -v$timeleft -tfloat -umins"); 58 | 59 | exit 0; 60 | -------------------------------------------------------------------------------- /network/tcpconn_breakdown/ganglia_tcp_connections.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use File::Temp; 7 | my $ft = File::Temp->new( 8 | UNLINK => 0, 9 | TEMPLATE => '/tmp/ganglia_tcp.XXXXXXXXXX', 10 | ); 11 | 12 | my $temp_file = $ft->filename; 13 | 14 | $ENV{'PATH'} = '/bin:/usr/bin:/sbin'; 15 | my $ss_command="ss -an | grep -v ^State"; 16 | my $gmetric_command = "/usr/bin/gmetric -g tcpconn -t uint32 -d 86400 "; 17 | 18 | # Dump ss command results into a temp file. We want to do it as quickly as 19 | # possible to avoid blocking any sockets 20 | system("$ss_command > " . $temp_file); 21 | 22 | # Let's get breakdown of types of TCP connections 23 | open(STATUS, "-|", "/bin/cat " . $temp_file . "| cut -d ' ' -f 1 | sort | uniq -c | sort -rn"); 24 | 25 | my $total = 0; 26 | my $synrecv = 0; 27 | 28 | while() { 29 | my ($value, $metric) = split; 30 | $metric =~ s/-//g; 31 | $metric = lc($metric); 32 | # 33 | if ( $metric eq "synrecv" ) { 34 | $synrecv = $value; 35 | } 36 | my $full_metric_name = "tcpconn_" . $metric; 37 | system($gmetric_command . " -u conn -n " . $full_metric_name . " -v " . $value); 38 | $total += $value; 39 | } 40 | 41 | close(STATUS); 42 | 43 | ################################################################################ 44 | # Calculate SYN-RECV percentage 45 | ################################################################################ 46 | my $syn_percentage = 100 * ($synrecv / $total); 47 | system($gmetric_command . " -n tcpconn_synrecv_percentage -u pct -T 'Pct of connections in Syn-Recv' -v " . $syn_percentage); 48 | 49 | ################################################################################ 50 | # Find out how many Unique IPS 51 | ################################################################################ 52 | my $uniq_ips = `/bin/cat $temp_file | awk '{ print \$5 }' | cut -f1 -d: | sort | uniq | wc -l`; 53 | 54 | if ( $uniq_ips >= 0 ) { 55 | system($gmetric_command . " -n tcpconn_uniq_ips -u ips -T 'Unique IPs connecting' -v " . $uniq_ips); 56 | } 57 | 58 | 59 | unlink($temp_file); 60 | -------------------------------------------------------------------------------- /http/nginx/nginx_stats.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Usage: nginx_stats.sh http://127.0.0.1/server-status 4 | 5 | # Point it to the URL you have your nginx status enabled (see 6 | # http://nginx.org/en/docs/http/ngx_http_stub_status_module.html) 7 | # 8 | # Recommendation: set up a background process with sleep or watch 9 | # that runs this script every N seconds 10 | 11 | RM=/bin/rm 12 | MKTEMP=/bin/mktemp 13 | WGET=/usr/bin/wget 14 | GMETRIC=/usr/bin/gmetric 15 | GREP=/bin/grep 16 | AWK=/bin/awk 17 | 18 | 19 | STATUS_URL=$1 20 | 21 | TMPFILE=`$MKTEMP` 22 | http_proxy= $WGET -q -O $TMPFILE $STATUS_URL 23 | if [ $? -eq 0 ]; then 24 | 25 | ACTIVE_CONNECTIONS=`$GREP '^Active' $TMPFILE | $AWK '{print $3}'` 26 | ACCEPTED_CONNECTIONS=`$GREP '^ ' $TMPFILE | $AWK '{print $1}'` 27 | HANDLED_CONNECTIONS=`$GREP '^ ' $TMPFILE | $AWK '{print $2}'` 28 | REQUESTS=`$GREP '^ ' $TMPFILE | $AWK '{print $3}'` 29 | READING_CONNECTIONS=`$GREP '^Read' $TMPFILE | $AWK '{ print $2 }'` 30 | WRITING_CONNECTIONS=`$GREP '^Read' $TMPFILE | $AWK '{ print $4 }'` 31 | WAITING_CONNECTIONS=`$GREP '^Read' $TMPFILE | $AWK '{ print $6 }'` 32 | 33 | $GMETRIC -t uint32 -n nginx_active -x 60 -u connections -g nginx -D "Total number of active connections" -s both -v $ACTIVE_CONNECTIONS 34 | $GMETRIC -t uint32 -n nginx_accepts -x 60 -u connections -g nginx -D "Total number of accepted connections" -s positive -v $ACCEPTED_CONNECTIONS 35 | $GMETRIC -t uint32 -n nginx_handled -x 60 -u connections -g nginx -D "Total number of handled connections" -s positive -v $HANDLED_CONNECTIONS 36 | $GMETRIC -t uint32 -n nginx_requests -x 60 -u requests -g nginx -D "Total number of requests" -s positive -v $REQUESTS 37 | $GMETRIC -t uint32 -n nginx_reading -x 60 -u connections -g nginx -D "Current connections in the reading state" -s both -v $READING_CONNECTIONS 38 | $GMETRIC -t uint32 -n nginx_writing -x 60 -u connections -g nginx -D "Current connections in the writing state" -s both -v $WRITING_CONNECTIONS 39 | $GMETRIC -t uint32 -n nginx_waiting -x 60 -u connections -g nginx -D "Current connections in the waiting state" -s both -v $WAITING_CONNECTIONS 40 | 41 | fi 42 | $RM -f $TMPFILE 43 | -------------------------------------------------------------------------------- /arista/ganglia_arista_transceiver.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | ############################################################################### 4 | # You need Ganglia installed on Arista or alternatively install the 5 | # gmetric.py from Ganglia_Contrib repo 6 | # 7 | # Don't run this script too often as it takes quite a bit of time 8 | # 9 | # AUTHOR: Vladimir Vuksan 10 | ############################################################################### 11 | import sys 12 | import re 13 | import os 14 | import datetime 15 | import time 16 | import subprocess 17 | 18 | metric_suffix = "transceiver" 19 | gmetric_cmd = "/usr/bin/gmetric -d 4000 -g transceiver " 20 | 21 | transceiver_re=re.compile('(?P\w+)(\s+)(?P[0-9.]+)(\s+)(?P[0-9.]+)(\s+)(?P[0-9.]+)(\s+)(?P[0-9\-.]+)(\s+)(?P[0-9\-.]+)(\s+)(?P.*)$') 22 | 23 | try: 24 | output = subprocess.check_output(["/usr/bin/Cli", "-c", "show interfaces transceiver"]) 25 | now = time.time() 26 | # Parse output 27 | for line in output.split('\n'): 28 | 29 | # First check whether it is showing the absolute value of routes 30 | regMatch = transceiver_re.match(line) 31 | if regMatch: 32 | linebits = regMatch.groupdict() 33 | port = linebits['port'].lower() 34 | os.system( gmetric_cmd + " -t float -n " + port + "_" + metric_suffix + "_temp -u C -v " + linebits['temp']) 35 | os.system( gmetric_cmd + " -t float -n " + port + "_" + metric_suffix + "_voltage -u V -v " + linebits['voltage']) 36 | os.system( gmetric_cmd + " -t float -n " + port + "_" + metric_suffix + "_bias_current -u mA -v " + linebits['bias_current']) 37 | os.system( gmetric_cmd + " -t float -n " + port + "_" + metric_suffix + "_optical_tx_power -u dBm -v " + linebits['optical_tx_power']) 38 | os.system( gmetric_cmd + " -t float -n " + port + "_" + metric_suffix + "_optical_rx_power -u dBm -v " + linebits['optical_rx_power']) 39 | #time_diff = now - time.mktime(datetime.datetime.strptime(linebits['last_update'], "%Y-%m-%d %H:%M:%S").timetuple()) 40 | #os.system( gmetric_cmd + " -t float -n " + port + "_" + metric_suffix + "_last_update -u sec -v " + str(time_diff)) 41 | 42 | except OSError, e: 43 | print e 44 | -------------------------------------------------------------------------------- /mail/powermta/ganglia_powermta.php: -------------------------------------------------------------------------------- 1 | data->status->traffic->lastMin->out->msg; 20 | $msgs_in = $stats_array->data->status->traffic->lastMin->in->msg; 21 | $traffic_out = $stats_array->data->status->traffic->lastMin->out->kb * 1000; 22 | $traffic_in = $stats_array->data->status->traffic->lastMin->in->kb * 1000; 23 | 24 | 25 | print "msgs_out: " . $msgs_out . "\n"; 26 | print "msgs_in: " . $msgs_in . "\n"; 27 | 28 | send_to_ganglia("pmta_msgs_out", $msgs_out, "uint32", "msgs/min"); 29 | send_to_ganglia("pmta_msgs_in", $msgs_in, "uint32", "msgs/min"); 30 | send_to_ganglia("pmta_traffic_out", $traffic_out, "double", "Bytes/min"); 31 | send_to_ganglia("pmta_traffic_in", $traffic_in, "double", "Bytes/min"); 32 | 33 | $conn_in = $stats_array->data->status->conn->smtpIn->cur; 34 | $conn_out = $stats_array->data->status->conn->smtpOut->cur; 35 | 36 | print "Connections in: " . $conn_in. "\n"; 37 | print "Connections out: " . $conn_out. "\n"; 38 | 39 | send_to_ganglia("pmta_conn_out", $conn_out, "uint32", "conn"); 40 | send_to_ganglia("pmta_conn_in", $conn_in, "uint32", "conn"); 41 | 42 | $queue_smtp_rcp = $stats_array->data->status->queue->smtp->rcp; 43 | $queue_smtp_dom = $stats_array->data->status->queue->smtp->dom; 44 | $queue_smtp_bytes = $stats_array->data->status->queue->smtp->kb * 1000; 45 | 46 | print "Queue SMTP recipients: " . $queue_smtp_rcp . "\n"; 47 | print "Queue SMTP domains: " . $queue_smtp_dom . "\n"; 48 | print "Queue SMTP kB: " . $queue_smtp_bytes . "\n"; 49 | 50 | send_to_ganglia("pmta_queue_rcpt", $queue_smtp_rcp, "uint32", "rcpts"); 51 | send_to_ganglia("pmta_queue_dom", $queue_smtp_dom, "uint32", "domains"); 52 | send_to_ganglia("pmta_queue_size", $queue_smtp_bytes, "uint32", "Bytes"); 53 | 54 | 55 | ?> 56 | -------------------------------------------------------------------------------- /nfs/nfs_client_calls/nfs_client_calls.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Linux NFS Client statistics 4 | # 5 | # Report number of NFS client read, write and getattr calls since we were last called. 6 | # 7 | # (Use utility "nfsstat -c" to look at the same thing). 8 | # 9 | # Note: Uses temp files in /tmp 10 | # 11 | 12 | # GETATTR 13 | if [ -f /tmp/nfsclientgetattr ]; then 14 | thisnfsgetattr=`cat /proc/net/rpc/nfs | tail -1 | awk '{printf "%s\n",$4}'` 15 | lastnfsgetattr=`cat /tmp/nfsclientgetattr` 16 | let "deltagetattr = thisnfsgetattr - lastnfsgetattr" 17 | # echo "delta getattr $deltagetattr" 18 | /usr/bin/gmetric -nnfsgetattr -v$deltagetattr -tuint16 -ucalls 19 | fi 20 | 21 | # READ 22 | if [ -f /tmp/nfsclientread ]; then 23 | thisnfsread=`cat /proc/net/rpc/nfs | tail -1 | awk '{printf "%s\n",$9}'` 24 | lastnfsread=`cat /tmp/nfsclientread` 25 | let "deltaread = thisnfsread - lastnfsread" 26 | # echo "delta read $deltaread" 27 | /usr/bin/gmetric -nnfsread -v$deltaread -tuint16 -ucalls 28 | fi 29 | 30 | # WRITE 31 | if [ -f /tmp/nfsclientwrite ]; then 32 | thisnfswrite=`cat /proc/net/rpc/nfs | tail -1 | awk '{printf "%s\n",$10} 33 | '` 34 | lastnfswrite=`cat /tmp/nfsclientwrite` 35 | let "deltawrite = thisnfswrite - lastnfswrite" 36 | # echo "delta write $deltawrite" 37 | /usr/bin/gmetric -nnfswrite -v$deltawrite -tuint16 -ucalls 38 | fi 39 | 40 | # NFS Quality Assurance RATIO (nfsqaratio) 41 | # If this value shrinks too much then perhaps an application 42 | # program change introduced excessive GETATTR calls into production. 43 | if [ "$deltagetattr" -ne 0 ];then 44 | let "nfsqaratio = (deltaread + deltawrite) / deltagetattr" 45 | /usr/bin/gmetric -nnfsqaratio -v$nfsqaratio -tuint16 -ucalls 46 | fi 47 | 48 | 49 | # Update the old values on disk for the next time around. (We ignore 50 | # the fact that they have probably already changed while we made this 51 | # calculation). 52 | cat /proc/net/rpc/nfs | tail -1 | awk '{printf "%s\n",$9}' > /tmp/nfsclientread 53 | cat /proc/net/rpc/nfs | tail -1 | awk '{printf "%s\n",$10}' > /tmp/nfsclientwrite 54 | cat /proc/net/rpc/nfs | tail -1 | awk '{printf "%s\n",$4}' > /tmp/nfsclientgetattr -------------------------------------------------------------------------------- /database/mysql_replication/mysql_replication.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | ############################################################### 3 | # gmetric to monitor mysql replication delay between master and slave 4 | # put this in a cronjob on the slave for every minute or 5 mins or however 5 | # often you desire it to run, be sure to check the parameters 6 | # given below 7 | ############################################################### 8 | # REQs: python, gmetric 9 | # DATE: 01 July 2008 10 | # C Viven Rajendra, vivenrajendra@gmail.com 11 | ############################################################### 12 | import commands, os 13 | 14 | ######################################### 15 | # change these to the appropriate values 16 | username = "root" 17 | use_passwd = True # change this to False if you do not use a password to connect 18 | password = "db" 19 | ## do not change below two default values unless you have done it on your machine 20 | mcast_channel = '239.2.11.71' 21 | mcast_port = 8649 22 | ########################################## 23 | 24 | 25 | 26 | # mysql -u root -pdb -e 'show slave status\G' | grep 'Seconds_Behind_Master' 27 | 28 | ## do not touch anything below unless you are sure of what you are doing ########## 29 | 30 | if __name__ == "__main__": 31 | gmetricCommand_usingPassword = "mysql -u " + username + " -p" + password +" -e 'show slave status\G' | grep 'Seconds_Behind_Master'" 32 | gmetricCommand_withoutPassword = "mysql -u " + username + " -e 'show slave status\G' | grep 'Seconds_Behind_Master'" 33 | s = None 34 | o = None 35 | if use_passwd: 36 | s,o = commands.getstatusoutput(gmetricCommand_usingPassword) 37 | else: 38 | s,o = commands.getstatusoutput(gmetricCommand_withoutPassword) 39 | print "status", s 40 | print "output", o 41 | if o == "" or s!=0 or s==256: 42 | print "Error : Probabaly, this is not a slave." 43 | elif s==0: 44 | o = o.split() 45 | print o[0] 46 | print o[1] 47 | if o[1] == "NULL": 48 | print "Error : Probabaly, slave cannot connect to master or try 'mysql>start slave'." 49 | else: 50 | gangliaMetric = "/usr/bin/gmetric --name=mysql_SecondsBehindMaster --value=" + str(o[1]) + " --type=uint8 --units=seconds --mcast_channel='" + mcast_channel +"' --mcast_port=" + str(mcast_port) 51 | print gangliaMetric 52 | res = os.system(gangliaMetric) 53 | -------------------------------------------------------------------------------- /health/ipmi_temp/ipmi_temp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Sending temperature data to Ganglia via ipmitool sensor readings. 4 | # Any args are passed as extra args to gmetric. 5 | 6 | # Dave Love / , 2008-07, public domain 7 | 8 | # Can be run from cron, for instance: 9 | # # The multicast channel is currently different on each cluster, 10 | # # due to Streamline. This is for Ganglia 2 config. 11 | # */5 * * * * root /usr/local/sbin/gmetric-temp -c $(awk '/^mcast_channel / {print $2}' /etc/gmond.conf) 12 | 13 | # Avoid sending at the same time as all other nodes (modulo lack of 14 | # synchronization of cron on each host and the slowness of ipmitool, 15 | # which perhaps makes this irrelevant). 16 | sleep $(($RANDOM / 1000)) 17 | 18 | # Sample output from `ipmitool sdr type Temperature': 19 | # X4100: 20 | # sys.tempfail | 03h | ok | 23.0 | Predictive Failure Deasserted 21 | # mb.t_amb | 05h | ok | 7.0 | 31 degrees C 22 | # fp.t_amb | 14h | ok | 12.0 | 25 degrees C 23 | # pdb.t_amb | 1Bh | ok | 19.0 | 27 degrees C 24 | # io.t_amb | 22h | ok | 15.0 | 26 degrees C 25 | # p0.t_core | 29h | ok | 3.0 | 44 degrees C 26 | # p1.t_core | 32h | ok | 3.1 | 43 degrees C 27 | # X2200: 28 | # CPU 0 Temp | 90h | ok | 3.1 | 44 degrees C 29 | # CPU 1 Temp | 91h | ok | 3.2 | 48 degrees C 30 | # Ambient Temp0 | 92h | ok | 7.6 | 33 degrees C 31 | # Ambient Temp1 | 97h | ok | 7.6 | 44 degrees C 32 | # Supermicro: 33 | # CPU 1 | 00h | ok | 7.1 | 45 degrees C 34 | # CPU 2 | 01h | ok | 7.1 | 47 degrees C 35 | # System | 02h | ok | 7.1 | 33 degrees C 36 | 37 | ipmitool sdr type Temperature | 38 | 39 | # filter out non-readings, e.g. 40 | # CPU 1 | 00h | ns | 7.1 | No Reading 41 | grep 'degrees C' | 42 | 43 | # Initially collapsing multiple spaces helps the matching. 44 | # Then pick out the sensor name and value, separating them with |. 45 | # Temperatures always seem to be integer, but allow them to be float. 46 | sed -e 's/ */ /g' \ 47 | -e "s/\([^|][^|]*\) |.* \([0-9.][0-9.]*\) degrees C$/\1|\2/" | 48 | 49 | while IFS='|' read name value; do 50 | # Ganglia (at least the ancient version we have) doesn't like 51 | # spaces in names -- substitute underscores. 52 | gmetric -n ${name// /_} -v $value -t float -u Celsius "$@" 53 | done 54 | -------------------------------------------------------------------------------- /system/per_user_stats/per_user_stats.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # 3 | # a simple script to report some per user stats to ganglia 4 | # contributed by Ryan Sweet 5 | # 6 | my $gmetric="gmetric"; 7 | my $users,@ps; 8 | 9 | # RS: get ps aux output and skip the first line 10 | # RS: ps has different behaviour on IRIX vs Linux 11 | my $uname=`uname`; 12 | if ( $uname =~ /Linux/ ) 13 | { 14 | @ps=`ps aux| grep -v USER`; 15 | }else{ 16 | # RS: pcpu is repeated because this ps doesn't give %mem stats 17 | @ps=`ps -eo user,pid,pcpu,pcpu,vsz,rss,tty,state,stime,time,comm`; 18 | } 19 | 20 | 21 | # RS: iterate over each line of the ps output 22 | foreach my $line (@ps) 23 | { 24 | # RS: eat any leading whitespace 25 | $line =~ s/^\s+//; 26 | 27 | # RS: split the line on whitespace, assigning vars 28 | my ($user,$pid,$cpu,$mem,$vsz,$rss,$tty,$stat,$start,$time,$command,@args) = split(/\s+/, $line); 29 | 30 | # RS: populate the hash %users with references to the cumulative cpu,memz,time vars 31 | $users->{$user}{cpu}+=$cpu; 32 | $users->{$user}{mem}+=$mem; 33 | $users->{$user}{vsz}+=$vsz; 34 | # RS: calculate the time in seconds rather than min:sec 35 | my ($min,$sec)=split(/:/,$time); 36 | $sec+=($min*60); 37 | $users->{$user}{time}+=$time; 38 | $users->{$user}{procs}+=1; # total number of procs per user 39 | 40 | } 41 | 42 | # RS: for each user that was found, send the stats to gmond 43 | foreach my $user (keys %$users) 44 | { 45 | # cpu total 46 | system("gmetric --name=cpu_percent_$user --value=$users->{$user}{cpu} --type=float --units=\%cpu"); 47 | 48 | # mem total (only reported on linux) 49 | if ( $uname =~ /Linux/ ) 50 | { 51 | system("gmetric --name=mem_percent_$user --value=$users->{$user}{mem} --type=float --units=\%mem"); 52 | } 53 | 54 | # vsz total 55 | system("gmetric --name=mem_vsz_kb_$user --value=$users->{$user}{vsz} --type=float --units=kilobytes"); 56 | 57 | # cputime total 58 | system("gmetric --name=cpu_total_time_sec_$user --value=$users->{$user}{time} --type=float --units=seconds"); 59 | 60 | # processes total 61 | system("gmetric --name=procs_total_$user --value=$users->{$user}{procs} --type=float --units=processes"); 62 | 63 | 64 | } 65 | -------------------------------------------------------------------------------- /http/phpfpm/phpfpm_stats.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Usage: phpfpm_stats.sh http://127.0.0.1/phpfpm-status 4 | # 5 | # Point it to the URL you have your php-fpm status enabled (status_path 6 | # setting inside php-fpm configuration file) 7 | # 8 | # Recommendation: set up a background process with sleep or watch 9 | # that runs this script every N seconds 10 | 11 | RM=/bin/rm 12 | MKTEMP=/bin/mktemp 13 | WGET=/usr/bin/wget 14 | GMETRIC=/usr/bin/gmetric 15 | GREP=/bin/grep 16 | AWK=/bin/awk 17 | 18 | STATUS_URL=$1 19 | 20 | TMPFILE=`$MKTEMP` 21 | http_proxy= $WGET -q -O $TMPFILE $STATUS_URL 22 | if [ $? -eq 0 ]; then 23 | 24 | ACCEPTED_CONNECTIONS=`$GREP '^accepted conn:' $TMPFILE | $AWK '{print $3}'` 25 | LISTEN_QUEUE=`$GREP '^listen queue:' $TMPFILE | $AWK '{print $3}'` 26 | MAX_LISTEN_QUEUE=`$GREP '^max listen queue:' $TMPFILE | $AWK '{print $4}'` 27 | IDLE_PROCESSES=`$GREP '^idle processes:' $TMPFILE | $AWK '{print $3}'` 28 | ACTIVE_PROCESSES=`$GREP '^active processes:' $TMPFILE | $AWK '{print $3}'` 29 | TOTAL_PROCESSES=`$GREP '^total processes:' $TMPFILE | $AWK '{print $3}'` 30 | MAX_ACTIVE_PROCESSES=`$GREP '^max active processes:' $TMPFILE | $AWK '{print $4}'` 31 | MAX_CHILDREN_REACHED=`$GREP '^max children reached:' $TMPFILE | $AWK '{print $4}'` 32 | 33 | $GMETRIC -t uint32 -n phpfpm_accepted_connections -x 60 -u connections -g phpfpm \ 34 | -D "Total number of accepted connections" -s positive -v $ACCEPTED_CONNECTIONS 35 | $GMETRIC -t uint32 -n phpfpm_listen_queue -x 60 -u connections -g phpfpm \ 36 | -D "Current number of queued requests" -s both -v $LISTEN_QUEUE 37 | $GMETRIC -t uint32 -n phpfpm_max_listen_queue -x 60 -u connections -g phpfpm \ 38 | -D "Maximum reached number of queued requests" -s both -v $MAX_LISTEN_QUEUE 39 | $GMETRIC -t uint32 -n phpfpm_idle -x 60 -u processes -g phpfpm \ 40 | -D "Current number of idle workers" -s both -v $IDLE_PROCESSES 41 | $GMETRIC -t uint32 -n phpfpm_active -x 60 -u processes -g phpfpm \ 42 | -D "Current number of active workers" -s both -v $ACTIVE_PROCESSES 43 | $GMETRIC -t uint32 -n phpfpm_total_processes -x 60 -u processes -g phpfpm \ 44 | -D "Current number of workers" -s both -v $TOTAL_PROCESSES 45 | $GMETRIC -t uint32 -n phpfpm_max_active -x 60 -u processes -g phpfpm \ 46 | -D "Maximum reached number of active workers" -s both -v $MAX_ACTIVE_PROCESSES 47 | $GMETRIC -t uint32 -n phpfpm_max_children_reached -x 60 -u times -g phpfpm \ 48 | -D "Number of times that max children were reached" -s both -v $MAX_CHILDREN_REACHED 49 | fi 50 | $RM -f $TMPFILE 51 | -------------------------------------------------------------------------------- /disk/diskio.py/diskio.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | ############################################################### 3 | # gmetric For Disk IO 4 | ############################################################### 5 | # REQs: pminfo, gmetric 6 | # DATE: 21 December 2004 7 | # (C)2004 DigiTar, All Rights Reserved 8 | ############################################################### 9 | 10 | import os, re, time 11 | 12 | ### Set Sampling Interval (in secs) 13 | interval = 1 14 | 15 | ### Set PCP Config Parameters 16 | cmdPminfo = "/usr/bin/pminfo -f " 17 | reDiskIO = re.compile(r'"(\w+)"] value (\d+)\n') # RegEx To Compute Value 18 | 19 | ### Set Ganglia Config Parameters 20 | gangliaMetricType = "uint32" 21 | gangliaMcastPort = "8649" 22 | ### NOTE: To add a new PCP disk metric, add the appropriate entry to each dictionary item of gangliaMetrics 23 | ### Each "vertical" column of the dictionary is a different metric entry group. 24 | gangliaMetrics = { "pcpmetric": ["disk.dev.read", "disk.dev.write", "disk.dev.blkread", "disk.dev.blkwrite"], \ 25 | "name": ["diskio_readbytes", "diskio_writebytes", "diskio_readblks", "diskio_writeblks"], \ 26 | "unit": ["Kbytes/s", "Kbytes/s", "Blocks/s", "Blocks/s"], \ 27 | "type": ["uint32", "uint32", "uint32", "uint32"]} 28 | cmdGmetric = "/usr/bin/gmetric" 29 | 30 | ### Zero Sample Lists 31 | ### NOTE: Make sure each sample array has as many (device) sub-arrays as there are pcpmetrics being sampled 32 | ### NOTE: Sub-arrays are artificially sized at 4 disk devices...if you have more disk devices than 4, increase this size. 33 | lastSample = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] 34 | currSample = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] 35 | 36 | ### Read PCP Metrics 37 | while(1): 38 | # Interate Through Each PCP Disk IO Metric Desired 39 | for x in range(0, len(gangliaMetrics["pcpmetric"])): 40 | pminfoInput, pminfoOutput = os.popen2(cmdPminfo + gangliaMetrics["pcpmetric"][x], 't') 41 | deviceLines = pminfoOutput.readlines() 42 | pminfoInput.close() 43 | pminfoOutput.close() 44 | 45 | # Output Metric Data For Each Device Returned By The PCP Metric 46 | deviceIndex = 2 # Skip the first two lines in the buffer 47 | while(deviceIndex < len(deviceLines)): 48 | result = reDiskIO.search(deviceLines[deviceIndex]) 49 | if(result): 50 | currSample[x][deviceIndex] = int(result.group(2)) 51 | cmdExec = cmdGmetric + " --name=" + gangliaMetrics["name"][x] + "_" + \ 52 | result.group(1) + " --value=" + str((currSample[x][deviceIndex] - lastSample[x][deviceIndex])) + \ 53 | " --type=" + gangliaMetrics["type"][x] + " --units=\"" + gangliaMetrics["unit"][x] + "\"" + \ 54 | " --mcast_port=" + gangliaMcastPort 55 | gmetricResult = os.system(cmdExec) 56 | lastSample[x][deviceIndex] = currSample[x][deviceIndex] 57 | deviceIndex = deviceIndex + 1 58 | time.sleep(interval) -------------------------------------------------------------------------------- /database/cassandra/cassandra_gmetric.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use Getopt::Long; 4 | use Data::Dumper; 5 | use strict; 6 | 7 | my(%opts); 8 | my(@opts)=('period=i', 9 | 'nocsv|N', # warning, this could fork hundreds of times 10 | 'debug|d', 11 | 'nodetool|n=s', 12 | ); 13 | 14 | $opts{period} ||= 60; 15 | $opts{nodetool} ||= '/usr/bin/nodetool'; 16 | my($t)=time; 17 | 18 | my $gmetric="/usr/bin/gmetric"; 19 | 20 | die unless GetOptions(\%opts,@opts); 21 | my($s)={}; 22 | 23 | for my $i (0..1){ 24 | my($cfstats) = scalar(`$opts{nodetool} -h localhost cfstats`); 25 | for my $ks ($cfstats =~ /Keyspace: (.*?)----------------/smog){ 26 | my($ksname) = $ks =~ /^(.*?)$/smo; 27 | #print "$ksname ---\n"; 28 | my(@ksv) = $ks =~ /^\t([^\t].*?): (.*?)$/smog; 29 | while(scalar(@ksv)){ 30 | my($n, $v) = (shift(@ksv), shift(@ksv)); 31 | $v =~ s/ ms.//; 32 | next if($v eq 'NaN'); 33 | $n =~ s/([^a-zA-Z0-9-]+)/_/smog; 34 | $n =~ s/(_+)/_/smog; 35 | $n =~ s/^(_)//smo; 36 | $n =~ s/(_)$//smo; 37 | $s->{"ksstats__${ksname}__$n"}[$i] = $v; 38 | #print "ksstats__${ksname}__$n,$v,float,,,,,,\n" 39 | } 40 | for my $cf ($ks =~ /Column Family: (.*?)\n\n/smog){ 41 | my($cfname) = $cf =~ /^(.*?)$/smo; 42 | my(@cfv) = $cf =~ /^\t\t([^\t].*?): (.*?)$/smog; 43 | while(scalar(@cfv)){ 44 | my($n, $v) = (shift(@cfv), shift(@cfv)); 45 | $v =~ s/ ms.//; 46 | next if($v eq 'NaN'); 47 | next if($v eq 'disabled'); 48 | $n =~ s/([^a-zA-Z0-9-]+)/_/smog; 49 | $n =~ s/(_+)/_/smog; 50 | $n =~ s/^(_)//smo; 51 | $n =~ s/(_)$//smo; 52 | $s->{"cfstats__${ksname}__${cfname}__$n"}[$i] = $v; 53 | #print "cfstats__${ksname}__${cfname}__$n,$v,float,,,,,,\n" 54 | } 55 | } 56 | } 57 | 58 | my($tpstats) = scalar(`$opts{nodetool} -h localhost tpstats`); 59 | my(@tp) = $tpstats =~ /^([^ ]*)[ ]*?([^ ]*)[ ]*?([^ ]*)[ ]*?([^ ]*)$/smog; 60 | while(scalar(@tp)){ 61 | my($n, $a, $p, $c) = (shift(@tp), shift(@tp), shift(@tp), shift(@tp)); 62 | #print "tpstats__${n}__active,$a,float,,,,,,\n"; 63 | #print "tpstats__${n}__pending,$p,float,,,,,,\n"; 64 | #print "tpstats__${n}__completed,$c,float,,,,,,\n"; 65 | $s->{"tpstats__${n}__active"}[$i] = $a; 66 | $s->{"tpstats__${n}__pending"}[$i] = $p; 67 | $s->{"tpstats__${n}__completed"}[$i] = $c; 68 | } 69 | sleep $opts{period} unless($i); 70 | #print "-----\n"; 71 | } 72 | 73 | my($f); 74 | if(! $opts{nocsv}){ 75 | if($opts{debug}){ 76 | $f=*STDOUT; 77 | }else{ 78 | open($f,"|$gmetric --csv"); 79 | } 80 | } 81 | 82 | while(my($k, $v) = each(%$s)){ 83 | my($d); 84 | if(($k =~ /count$/i) || 85 | ($k =~ /__completed/)){ 86 | next if(($v->[0] eq undef) || ($v->[1] eq undef)); 87 | $d = ($v->[1] - $v->[0]) / $opts{period}; 88 | }else{ 89 | next if($v->[1] eq undef); 90 | $d = $v->[1]; 91 | } 92 | if($opts{nocsv}){ 93 | `$gmetric --type=float --name=$k --value=$d -d 600`; 94 | }else{ 95 | print $f "$k,$d,float,,,,600,,\n"; 96 | } 97 | } 98 | 99 | close($f); 100 | 101 | -------------------------------------------------------------------------------- /bgp/ganglia_bird.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | ############################################################################### 4 | # This script collects stats for Bird Internet Routing daemon 5 | # http://bird.network.cz/ 6 | # 7 | # Script needs to run as root or you need a figure out a way to talk to 8 | # bird over socked at /var/run/bird.ctl 9 | # 10 | # AUTHOR: Vladimir Vuksan 11 | ############################################################################### 12 | import sys 13 | import socket 14 | import re 15 | import os 16 | import pickle 17 | import time 18 | 19 | if len(sys.argv) > 1: 20 | provider_name = sys.argv[1] 21 | else: 22 | print "You need to specify provider name to match show protocols all ebgp_provider_name" 23 | sys.exit() 24 | 25 | metric_prefix = "bird_" + provider_name; 26 | gmetric_cmd = "/usr/bin/gmetric -d 240 -g bird "; 27 | old_stats_file = "/var/tmp/bird_stats_" + provider_name + ".pkl" 28 | 29 | # In the output these are the metrics we really care about 30 | interesting_bits = ["received", "rejected", "filtered", "ignored", "accepted"] 31 | 32 | # Regex matches 33 | routes_re=re.compile('(\s+)(Routes:)(\s+)(?P[^ ]+) imported, (?P[^ ]+) exported, (?P[^ ]+) preferred') 34 | updates_re=re.compile('(\s+)(?PImport|Export) (?Pupdates|withdraws):(\s+)(?P[0-9-]+)(\s+)(?P[0-9-]+)(\s+)(?P[0-9-]+)(\s+)(?P[0-9-]+)(\s+)(?P[0-9-]+)') 35 | 36 | old_stats = dict() 37 | ############################################################################## 38 | # Read in old stats if the file is present 39 | ############################################################################## 40 | if os.path.isfile(old_stats_file): 41 | pkl_file = open(old_stats_file, 'rb') 42 | old_stats = pickle.load(pkl_file) 43 | pkl_file.close() 44 | old_time = os.stat(old_stats_file).st_mtime 45 | 46 | s = socket.socket(socket.AF_UNIX) 47 | s.connect("/var/run/bird.ctl") 48 | # We don't care about the banner 49 | s.recv(1024) 50 | # Send command to bird 51 | s.send('show protocols all ebgp_' + provider_name + '\n') 52 | data = s.recv(16384) 53 | lines = data.splitlines() 54 | s.close() 55 | 56 | new_time = time.time() 57 | 58 | # Initialize dictionary 59 | new_stats = dict() 60 | 61 | # Parse output 62 | for line in lines: 63 | 64 | # First check whether it is showing the absolute value of routes 65 | regMatch = routes_re.match(line) 66 | if regMatch: 67 | linebits = regMatch.groupdict() 68 | os.system( gmetric_cmd + " -t uint32 -n " + metric_prefix + "_routes_imported -u routes -v " + linebits['imported']) 69 | os.system( gmetric_cmd + " -t uint32 -n " + metric_prefix + "_routes_exported -u routes -v " + linebits['exported']) 70 | os.system( gmetric_cmd + " -t uint32 -n " + metric_prefix + "_routes_preferred -u routes -v " + linebits['preferred']) 71 | 72 | # Import/Export action counters 73 | regMatch = updates_re.match(line) 74 | if regMatch: 75 | linebits = regMatch.groupdict() 76 | key = linebits['type'].lower() + "_" + linebits['action'].lower() 77 | if not key in new_stats: 78 | new_stats[key] = dict() 79 | for bit in interesting_bits: 80 | # Check that it's a number 81 | if linebits[bit].isdigit(): 82 | new_stats[key][bit] = linebits[bit] 83 | else: 84 | new_stats[key][bit] = 0 85 | 86 | output = open(old_stats_file, 'wb') 87 | pickle.dump(new_stats, output) 88 | output.close() 89 | 90 | # Get time difference between last poll and new poll 91 | 92 | # Make sure we have old stats. Otherwise we can't calculate diffs 93 | if len(old_stats) > 0: 94 | time_diff = new_time - old_time 95 | for key in new_stats: 96 | for subkey in new_stats[key]: 97 | diff = (int(new_stats[key][subkey]) - int(old_stats[key][subkey])) / time_diff 98 | if ( diff >= 0 ): 99 | os.system( gmetric_cmd + " -t float -n bird_" + provider_name + "_" + key + "_" + subkey + " -u updates/sec -v " + str(diff)) 100 | -------------------------------------------------------------------------------- /ldap/openldap/ganglia_ldap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import ldap 4 | import pickle 5 | import os 6 | import sys 7 | import time 8 | 9 | 10 | def load_stats(stats_file): 11 | old_stats = None 12 | if os.path.exists(stats_file): 13 | try: 14 | fh = open(stats_file, 'r') 15 | old_stats = pickle.load(fh) 16 | fh.close() 17 | except EOFError: 18 | old_stats = None 19 | return old_stats 20 | 21 | def save_stats(old_stats, stats_file): 22 | fh = open(stats_file, 'w') 23 | pickle.dump(old_stats, fh) 24 | fh.close() 25 | 26 | def main(): 27 | ldap_server = 'localhost' 28 | ldap_port = '389' 29 | stats_file = '/tmp/gmetric-ldap.txt' 30 | gmetric = '/usr/bin/gmetric' 31 | old_stats = load_stats(stats_file) 32 | 33 | stats = {'Total_Connections': {'base': 'cn=Bytes,cn=Statistics,cn=Monitor', 'attr': 'monitorCounter'}, 34 | 'Bytes_Sent': {'base': 'cn=Operations,cn=Monitor', 'attr': 'monitorOpInitiated'}, 35 | 'Initiated_Operations': {'base': 'cn=Operations,cn=Monitor', 'attr': 'monitorOpCompleted'}, 36 | 'Completed_Operations': {'base': 'cn=Referrals,cn=Statistics,cn=Monitor', 'attr': 'monitorCounter'}, 37 | 'Referrals_Sent': {'base': 'cn=Entries,cn=Statistics,cn=Monitor', 'attr': 'monitorCounter'}, 38 | 'Entries_Sent': {'base': 'cn=Bind,cn=Operations,cn=Monitor', 'attr': 'monitorOpInitiated'}, 39 | 'Bind_Operations': {'base': 'cn=Bind,cn=Operations,cn=Monitor', 'attr': 'monitorOpCompleted'}, 40 | 'Unbind_Operations': {'base': 'cn=Unbind,cn=Operations,cn=Monitor', 'attr': 'monitorOpCompleted'}, 41 | 'Add_Operations': {'base': 'cn=Add,cn=Operations,cn=Monitor', 'attr': 'monitorOpInitiated'}, 42 | 'Delete_Operations': {'base': 'cn=Delete,cn=Operations,cn=Monitor', 'attr': 'monitorOpCompleted'}, 43 | 'Modify_Operations': {'base': 'cn=Modify,cn=Operations,cn=Monitor', 'attr': 'monitorOpCompleted'}, 44 | 'Compare_Operations': {'base': 'cn=Compare,cn=Operations,cn=Monitor', 'attr': 'monitorOpCompleted'}, 45 | 'Search_Operations': {'base': 'cn=Search,cn=Operations,cn=Monitor', 'attr': 'monitorOpCompleted'}, 46 | 'Write_Waiters': {'base': 'cn=Write,cn=Waiters,cn=Monitor', 'attr': 'monitorCounter'}, 47 | 'Read_Waiters': {'base': 'cn=Read,cn=Waiters,cn=Monitor', 'attr': 'monitorCounter'} 48 | } 49 | 50 | # Poll ldap, update the stats 51 | conn = ldap.initialize('ldap://%s' % ldap_server) 52 | conn.start_tls_s() 53 | conn.simple_bind_s() 54 | for key in stats.keys(): 55 | attr = stats[key]['attr'] 56 | num = conn.search(stats[key]['base'], 57 | ldap.SCOPE_BASE, 58 | 'objectClass=*', 59 | [attr]) 60 | try: 61 | result_type, result_data = conn.result(num, 0) 62 | stats[key]['value'] = int(result_data[0][1][attr][0]) 63 | except: 64 | print sys.exc_info() 65 | stats[key]['value'] = -1 66 | # Add the timestamp to the stats, so we know when they were gathered 67 | stats['Timestamp'] = int(time.time()) 68 | 69 | # Save these stats for the future 70 | save_stats(stats, stats_file) 71 | 72 | # If old_stats didn't return something earlier, quit now. We'll pick things 73 | # up on the next run when we have something to compare the current values 74 | # to. 75 | if not old_stats: 76 | sys.exit() 77 | 78 | # Now we do calculations to get the values per second, and send them to 79 | # gmetric 80 | timediff = stats['Timestamp'] - old_stats['Timestamp'] 81 | for key in stats.keys(): 82 | if key == 'Timestamp': 83 | continue 84 | rate = (stats[key]['value'] - old_stats[key]['value']) / timediff 85 | print '%s -u "per sec" -tfloat -n %s -v %s' % (gmetric, key, rate) 86 | os.system('%s -u "per sec" -tfloat -n %s -v %s' % (gmetric, key, rate)) 87 | 88 | 89 | if __name__ == '__main__': 90 | main() -------------------------------------------------------------------------------- /network/ib_perf/ib_perf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import os 3 | import sys 4 | import re 5 | import time 6 | from string import atoi 7 | 8 | # Adjust to match your site configuration 9 | PIDFILE = '/var/run/ibstat.py.pid' 10 | PERFQUERY = '/usr/bin/perfquery' 11 | GMETRIC = '/opt/ganglia/lx-x86/bin/gmetric' 12 | 13 | r = re.compile('^(RcvBytes|XmtBytes)[^0-9]*([0-9]+)') 14 | rr = re.compile('^(RcvPkts|XmtPkts)[^0-9]*([0-9]+)') 15 | 16 | def get_ib_stats(): 17 | global r, rr 18 | rxbytes = 0 19 | txbytes = 0 20 | rxpkts = 0 21 | txpkts = 0 22 | p = os.popen(PERFQUERY + " -r", 'r') 23 | ll = p.readlines() 24 | p.close() 25 | for l in ll: 26 | m = r.match(l) 27 | if m: 28 | if m.groups()[0] == 'RcvBytes': 29 | rxbytes = atoi(m.groups()[1]) 30 | else: 31 | txbytes = atoi(m.groups()[1]) 32 | m = rr.match(l) 33 | if m: 34 | if m.groups()[0] == 'RcvPkts': 35 | rxpkts = atoi(m.groups()[1]) 36 | else: 37 | txpkts = atoi(m.groups()[1]) 38 | return (rxbytes, txbytes, rxpkts, txpkts) 39 | 40 | def main(): 41 | oldtime = time.time() 42 | bytes = get_ib_stats() 43 | rbytes = 0 44 | tbytes = 0 45 | rpkts = 0 46 | tpkts = 0 47 | while True: 48 | time.sleep(1) 49 | newtime = time.time() 50 | bytes = get_ib_stats() 51 | rbytes += bytes[0] 52 | tbytes += bytes[1] 53 | rpkts += bytes[2] 54 | tpkts += bytes[3] 55 | # 20 seconds averaging. Adjust if necessary. 56 | if (newtime - oldtime) >= 20.0: 57 | os.spawnl(os.P_WAIT, GMETRIC, 'gmetric', 58 | '--name=ib_bytes_in', 59 | '--value=%f' % (rbytes/(newtime - oldtime)), 60 | '--type=float', 61 | '--units=bytes/sec') 62 | os.spawnl(os.P_WAIT, GMETRIC, 'gmetric', 63 | '--name=ib_bytes_out', 64 | '--value=%f' % (tbytes/(newtime - oldtime)), 65 | '--type=float', 66 | '--units=bytes/sec') 67 | os.spawnl(os.P_WAIT, GMETRIC, 'gmetric', 68 | '--name=ib_pkts_in', 69 | '--value=%f' % (rpkts/(newtime - oldtime)), 70 | '--type=float', 71 | '--units=packets/sec') 72 | os.spawnl(os.P_WAIT, GMETRIC, 'gmetric', 73 | '--name=ib_pkts_out', 74 | '--value=%f' % (tpkts/(newtime - oldtime)), 75 | '--type=float', 76 | '--units=packets/sec') 77 | oldtime = newtime 78 | rbytes = tbytes = 0 79 | rpkts = tpkts = 0 80 | 81 | # Double-fork daemonization 82 | if __name__ == "__main__": 83 | try: 84 | pid = os.fork() 85 | if pid > 0: 86 | sys.exit(0) 87 | except OSError, e: 88 | print >>sys.stderr, "fork #1 failed: %d (%s)" % (e.errno, e.strerror) 89 | sys.exit(1) 90 | 91 | os.chdir("/") 92 | os.setsid() 93 | os.umask(0) 94 | 95 | try: 96 | pid = os.fork() 97 | if pid > 0: 98 | open(PIDFILE, 'w').write("%d" % pid) 99 | sys.exit(0) 100 | except OSError, e: 101 | print >>sys.stderr, "fork #2 failed: %d (%s)" % (e.errno, e.strerror) 102 | 103 | main() 104 | -------------------------------------------------------------------------------- /dns/ganglia_bind_stats.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | ########################################################################### 4 | # Author: Vladimir Vuksan http://vuksan.com/linux/ 5 | # Last Changed: $Date: 2009-09-06 20:51:39 -0400 (Ned, 06 Ruj 2009) $ 6 | # License: GNU Public License (http://www.gnu.org/copyleft/gpl.html) 7 | # NEED TO MODIFY FOLLOWING 8 | # Adjust this variables appropriately. Feel free to add any options to gmetric_command 9 | # necessary for running gmetric in your environment to gmetric_options e.g. -c /etc/gmond.conf 10 | $gmetric_exec = "/usr/bin/gmetric"; 11 | $gmetric_options = "-d 120 "; 12 | 13 | # Path to the rndc binary 14 | $rndc_exec = "/usr/sbin/rndc"; 15 | 16 | # Where to store the last stats file 17 | $tmp_dir_base="/var/named/chroot/tmp/bind_stats"; 18 | 19 | # If you don't care about any of these particular metrics. Just remove them 20 | %counter_metrics = ( 21 | "success" => "req", 22 | "referral" => "req", 23 | "nxrrset" => "req", 24 | "nxdomain" => "req", 25 | "recursion" => "req", 26 | "failure" => "req" 27 | ); 28 | 29 | $bind_stats = "/var/named/chroot/var/named/data/named_stats.txt"; 30 | 31 | # DON"T TOUCH BELOW UNLESS YOU KNOW WHAT YOU ARE DOING 32 | if ( ! -x $gmetric_exec ) { 33 | die("Gmetric binary is not executable. Exiting..."); 34 | } 35 | 36 | if ( ! -x $rndc_exec ) { 37 | die("Rndc binary is not executable. Please check patch for \$rndc_exec. Exiting..."); 38 | } 39 | 40 | $gmetric_command = $gmetric_exec . " " . $gmetric_options; 41 | $debug = 0; 42 | 43 | $tmp_stats_file=$tmp_dir_base . "/" . "bindstats"; 44 | 45 | # If the tmp directory doesn't exit create it 46 | if ( ! -d $tmp_dir_base ) { 47 | print "Attempting to create directory $tmp_dir_base\n"; 48 | system("mkdir -p $tmp_dir_base"); 49 | } 50 | 51 | 52 | ############################################################################### 53 | # We need to store a baseline with statistics. If it's not there let's dump 54 | # it into a file. Don't do anything else 55 | ############################################################################### 56 | if ( ! -f $tmp_stats_file ) { 57 | print "Creating baseline. No output this cycle\n"; 58 | system("echo '' > $bind_stats; $rndc_exec stats ; egrep '^[a-z]* [0-9]*\$' $bind_stats > $tmp_stats_file"); 59 | } else { 60 | 61 | ###################################################### 62 | # Let's read in the file from the last poll 63 | open(OLDSTATUS, "< $tmp_stats_file"); 64 | 65 | while() 66 | { 67 | ($metric, $value) = split (/ /); 68 | $old_stats{$metric}=${value}; 69 | } 70 | 71 | # Get the time stamp when the stats file was last modified 72 | $old_time = (stat $tmp_stats_file)[9]; 73 | close(OLDSTATUS); 74 | 75 | ##################################################### 76 | # Get the new stats 77 | ##################################################### 78 | system("echo > $bind_stats; $rndc_exec stats ; egrep '^[a-z]* [0-9]*\$' $bind_stats > $tmp_stats_file"); 79 | open(NEWSTATUS, "< $tmp_stats_file"); 80 | $new_time = time(); 81 | 82 | while() 83 | { 84 | ($metric, $value) = split (/ /); 85 | $new_stats{$metric}=${value}; 86 | } 87 | close(NEWSTATUS); 88 | 89 | # Time difference between this poll and the last poll 90 | my $time_difference = $new_time - $old_time; 91 | if ( $time_difference < 1 ) { 92 | die("Time difference can't be less than 1"); 93 | } 94 | 95 | ################################################################################# 96 | # Calculate deltas for counter metrics and send them to ganglia 97 | ################################################################################# 98 | while ( my ($metric, $units) = each(%counter_metrics) ) { 99 | my $rate = ($new_stats{$metric} - $old_stats{$metric}) / $time_difference; 100 | 101 | if ( $rate < 0 ) { 102 | print "Something is fishy. Rate for " . $metric . " shouldn't be negative. Perhaps counters were reset. Doing nothing"; 103 | } else { 104 | print "$metric = $rate / sec\n"; 105 | if ( $debug == 0 ) { 106 | system($gmetric_command . " -u '$units/sec' -tfloat -n dns_" . $metric . " -v " . $rate); 107 | } 108 | 109 | } 110 | 111 | } 112 | 113 | } -------------------------------------------------------------------------------- /apache/ganglia_apache.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # 3 | # script found at http://ganglia.sourceforge.net/gmetric/view.php?id=18 4 | # 5 | # Feeds ganglia with web server error rate information. 6 | # 7 | # Can be called by Apache by setting up a special logger: 8 | # 9 | # LogFormat "%>s" status_only 10 | # CustomLog "|/path/to/apache-logs-to-ganglia.pl -d 10" status_only 11 | # 12 | # 13 | # Author: Nicolas Marchildon (nicolas@marchildon.net) 14 | # Date: Date: 2002/11/26 04:15:19 15 | # Revision: Revision: 1.3 16 | # 17 | # Modified by Ben Hartshorne 18 | # $Header: /var/lib/cvs/ops/ganglia/ganglia_apache.pl,v 1.1 2006/07/11 17:29:27 ben Exp $ 19 | 20 | use Getopt::Long; 21 | 22 | # Defaults 23 | $DELAY = 20; 24 | $METRIC = 'Apache'; 25 | $GMETRIC = "/usr/bin/gmetric"; 26 | $GMETRIC_ARGS="-c /etc/gmond.conf"; 27 | 28 | # Parse command line 29 | GetOptions( { d => \$DELAY, delay => \$DELAY, 30 | m => \$METRIC, metric => \$METRIC 31 | }, 32 | 'd|delay=i', 33 | 'p|port=i', 34 | 'h|host=s', 35 | 'm|metric=s'); 36 | 37 | # Validate command line 38 | if ( length($DELAY) == 0 39 | || length($METRIC) == 0) { 40 | print STDERR <) { 136 | parse_line $_; 137 | } 138 | alarm 0; 139 | }; 140 | if ($@ and $@ !~ /alarm clock restart/) { die } 141 | report; 142 | } 143 | 144 | -------------------------------------------------------------------------------- /http/apache_error/apache_error.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # 3 | # Feeds ganglia with web server error rate information. 4 | # 5 | # The latest version can be found on GitHub: 6 | # 7 | # http://github.com/ganglia/gmetric/tree/master/http/apache_error/ 8 | # 9 | # This script can be called by Apache by setting up a special logger: 10 | # 11 | # LogFormat "%>s" status_only 12 | # CustomLog "|/path/to/apache-logs-to-ganglia.pl -d 10" status_only 13 | # 14 | # 15 | # Original Author: Nicolas Marchildon (nicolas@marchildon.net) 16 | # Date: 2002/11/26 04:15:19 17 | # 18 | # Modified by Ben Hartshorne 19 | # $Header: /var/lib/cvs/ops/ganglia/ganglia_apache.pl,v 1.1 2006/07/11 17:29:27 ben Exp $ 20 | 21 | use Getopt::Long; 22 | 23 | # Defaults 24 | $DELAY = 20; 25 | $METRIC = 'Apache'; 26 | $GMETRIC = "/usr/bin/gmetric"; 27 | $GMETRIC_ARGS="-c /etc/gmond.conf"; 28 | 29 | # Parse command line 30 | GetOptions( { d => \$DELAY, delay => \$DELAY, 31 | m => \$METRIC, metric => \$METRIC 32 | }, 33 | 'd|delay=i', 34 | 'p|port=i', 35 | 'h|host=s', 36 | 'm|metric=s'); 37 | 38 | # Validate command line 39 | if ( length($DELAY) == 0 40 | || length($METRIC) == 0) { 41 | print STDERR <) { 137 | parse_line $_; 138 | } 139 | alarm 0; 140 | }; 141 | if ($@ and $@ !~ /alarm clock restart/) { die } 142 | report; 143 | } 144 | 145 | 146 | -------------------------------------------------------------------------------- /nfs/nfs_stats/nfs_stats.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # 3 | # Calculate NFS client and server stats and send them to ganglia 4 | # Inspired by Karl Kopper's client stats BASH script from 5 | # http://ganglia.sourceforge.net/gmetric/view.php?id=26 6 | # 7 | # Author: Greg Wimpey, Colorado School of Mines 20 May 2004 8 | # Email: gwimpey mines edu 9 | # 10 | # This script may be freely copied, distributed, or modified 11 | # as long as authorship and copyright information is maintained. 12 | # Copyright 2004 Colorado School of Mines 13 | # 14 | $nfsproc='/proc/net/rpc/nfs'; 15 | $nfsdproc='/proc/net/rpc/nfsd'; 16 | $tmpclient='/tmp/nfsclientstats'; 17 | $tmpserver='/tmp/nfsserverstats'; 18 | # 19 | # initialize everything in case we can't read it 20 | # (note that by initializing time to 0, we don't get spike on first run) 21 | # 22 | $oldcgetattr=0; 23 | $oldcread=0; 24 | $oldcwrite=0; 25 | $oldctime=0; 26 | $rcgetattr=0.0; 27 | $rcread=0.0; 28 | $rcwrite=0.0; 29 | $nfsqaratio=-1.0; 30 | $oldsgetattr=0; 31 | $oldsread=0; 32 | $oldswrite=0; 33 | $oldstime=0; 34 | $rsgetattr=0.0; 35 | $rsread=0.0; 36 | $rswrite=0.0; 37 | $nfsdqaratio=-1.0; 38 | 39 | if ( -r $nfsproc ) { # we are a client 40 | 41 | open NFS,"<$nfsproc" || die "can't open $nfsproc for reading\n"; 42 | $clienttime=time; 43 | while () { 44 | if(/^proc3/) { # change here if NFS version != 3 45 | @newstats=split; 46 | $clientgetattr=$newstats[3]; 47 | $clientread=$newstats[8]; 48 | $clientwrite=$newstats[9]; 49 | } 50 | } 51 | close NFS; 52 | 53 | if ( -r $tmpclient) { 54 | open CLIENT,"<$tmpclient" || die "can't open $tmpclient for reading\n"; 55 | $junk= || die "can't read stats from $tmpclient\n"; 56 | @oldstats=split ' ',$junk; 57 | close CLIENT; 58 | $oldcgetattr=$oldstats[0]; 59 | $oldcread=$oldstats[1]; 60 | $oldcwrite=$oldstats[2]; 61 | $oldctime=$oldstats[3]; 62 | } 63 | 64 | open CLIENT,">$tmpclient" || die "can't open $tmpclient for writing\n"; 65 | print CLIENT "$clientgetattr $clientread $clientwrite $clienttime\n"; 66 | close CLIENT; 67 | $ictime=1.0/($clienttime-$oldctime); 68 | $rcgetattr=($clientgetattr-$oldcgetattr)*$ictime; 69 | $rcread=($clientread-$oldcread)*$ictime; 70 | $rcwrite=($clientwrite-$oldcwrite)*$ictime; 71 | $nfsqaratio=$rcgetattr != 0 ? ($rcread+$rcwrite)/$rcgetattr : -1.0; 72 | } 73 | 74 | if ( -r $nfsdproc ) { # we are a server 75 | 76 | open NFSD,"<$nfsdproc" || die "can't open $nfsdproc for reading\n"; 77 | $servertime=time; 78 | while () { 79 | if(/^proc3/) { # change here if NFSD version != 3 80 | @newstats=split; 81 | $servergetattr=$newstats[3]; 82 | $serverread=$newstats[8]; 83 | $serverwrite=$newstats[9]; 84 | } 85 | } 86 | close NFSD; 87 | 88 | if ( -r $tmpserver) { 89 | open SERVER,"<$tmpserver" || die "can't open $tmpserver for reading\n"; 90 | $junk= || die "can't read stats from $tmpserver\n"; 91 | @oldstats=split ' ',$junk; 92 | close SERVER; 93 | $oldsgetattr=$oldstats[0]; 94 | $oldsread=$oldstats[1]; 95 | $oldswrite=$oldstats[2]; 96 | $oldstime=$oldstats[3]; 97 | } 98 | 99 | open SERVER,">$tmpserver" || die "can't open $tmpserver for writing\n"; 100 | print SERVER "$servergetattr $serverread $serverwrite $servertime\n"; 101 | close SERVER; 102 | $istime=1.0/($servertime-$oldstime); 103 | $rsgetattr=($servergetattr-$oldsgetattr)*$istime; 104 | $rsread=($serverread-$oldsread)*$istime; 105 | $rswrite=($serverwrite-$oldswrite)*$istime; 106 | $nfsdqaratio=$rsgetattr != 0 ? ($rsread+$rswrite)/$rsgetattr : -1.0; 107 | } 108 | 109 | # 110 | # so that all nodes are reporting, we send back defaults even if 111 | # this node is not both client and server 112 | # 113 | 114 | system("/usr/bin/gmetric -nnfsgetattr -v$rcgetattr -tfloat -ucalls/sec"); 115 | system("/usr/bin/gmetric -nnfsread -v$rcread -tfloat -ucalls/sec"); 116 | system("/usr/bin/gmetric -nnfswrite -v$rcwrite -tfloat -ucalls/sec"); 117 | system("/usr/bin/gmetric -nnfsqaratio -v$nfsqaratio -tfloat -ucalls"); 118 | 119 | system("/usr/bin/gmetric -nnfsdgetattr -v$rsgetattr -tfloat -ucalls/sec"); 120 | system("/usr/bin/gmetric -nnfsdread -v$rsread -tfloat -ucalls/sec"); 121 | system("/usr/bin/gmetric -nnfsdwrite -v$rswrite -tfloat -ucalls/sec"); 122 | system("/usr/bin/gmetric -nnfsdqaratio -v$nfsdqaratio -tfloat -ucalls"); 123 | 124 | exit 0; 125 | -------------------------------------------------------------------------------- /nfs/nfs_stats_vvuksan/ganglia_nfs_stats.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | ########################################################################### 4 | # Author: Vladimir Vuksan http://vuksan.com/linux/ 5 | # Last Changed: $Date: 2009-09-06 20:51:39 -0400 (Ned, 06 Ruj 2009) $ 6 | # License: GNU Public License (http://www.gnu.org/copyleft/gpl.html) 7 | # Collects NFS v3 metrics. 8 | # By default it collects NFSd (server) metrics. If you supply 9 | # the -c argument it will collect NFS client metrics 10 | # Currently it will collect and send to Ganglia following metrics 11 | # 3 -> getattr, 4 -> setattr, 5 -> lookup, 6 -> access, 8-> read 12 | # 9 -> write, 10 -> create, 14 -> remove 13 | # If you would like some of the other stats e.g. mkdir append them 14 | # below to @which_metrics 15 | ########################################################################### 16 | $gmetric_command = "/usr/bin/gmetric"; 17 | 18 | if ( ! -x $gmetric_command ) { 19 | die("Gmetric command is not executable. Exiting..."); 20 | } 21 | 22 | # Check out the nfs_stat has below for the list of all metrics 23 | # and their appropriate index 24 | @which_metrics = split(/ /, "3 4 5 6 8 9 10 14"); 25 | 26 | # Where to store the last stats file 27 | $tmp_dir_base="/tmp/nfs_stats"; 28 | 29 | # Look whether -c argument was supplied 30 | if ( $#ARGV == 0 && $ARGV[0] eq "-c" ) { 31 | print "Collecting NFSv3 client stats\n"; 32 | $proc_file="/proc/net/rpc/nfs"; 33 | $tmp_stats_file=$tmp_dir_base . "/" . "nfs_client_stats"; 34 | $metric_prefix = "nfscl_v3_"; 35 | } else { 36 | print "Collecting NFSdv3 (server) stats\n"; 37 | $proc_file="/proc/net/rpc/nfsd"; 38 | $tmp_stats_file=$tmp_dir_base . "/" . "nfs_server_stats"; 39 | $metric_prefix = "nfsd_v3_"; 40 | } 41 | 42 | ########################################################################### 43 | # This is the order of metrics in /proc/net/rpc/nfsd 44 | ########################################################################### 45 | %nfs_stat = ( 46 | 3 => "getattr", 47 | 4 => "setattr", 48 | 5 => "lookup", 49 | 6 => "access", 50 | 7 => "readlink", 51 | 8 => "read", 52 | 9 => "write", 53 | 10 => "create", 54 | 11 => "mkdir", 55 | 12 => "symlink", 56 | 13 => "mknod", 57 | 14 => "remove", 58 | 15 => "rmdir", 59 | 16 => "rename", 60 | 17 => "link", 61 | 18 => "readdir", 62 | 19 => "readdirplus", 63 | 20 => "fsstat", 64 | 21 => "fsinfo", 65 | 22 => "pathconf", 66 | 23 => "commit" 67 | ); 68 | 69 | # If the tmp directory doesn't exit create it 70 | if ( ! -d $tmp_dir_base ) { 71 | system("mkdir -p $tmp_dir_base"); 72 | } 73 | 74 | ############################################################################### 75 | # We need to store a baseline with statistics. If it's not there let's dump 76 | # it into the file. Don't do anything else 77 | ############################################################################### 78 | if ( ! -f $tmp_stats_file ) { 79 | print "Creating baseline. No output this cycle\n"; 80 | system("cat $proc_file > $tmp_stats_file"); 81 | } else { 82 | 83 | # Let's read in the file from the last poll 84 | open(OLDNFSDSTATUS, "< $tmp_stats_file"); 85 | 86 | while() 87 | { 88 | my($line) = $_; 89 | chomp($line); 90 | if ( /^proc3/ ) { 91 | @old_stats = split(/ /,$line); 92 | last; 93 | } 94 | } 95 | 96 | # Get the time stamp when the stats file was last modified 97 | $old_time = (stat $tmp_stats_file)[9]; 98 | close(OLDNFSDSTATUS); 99 | 100 | open(NFSDSTATUS, "< $proc_file"); 101 | 102 | $new_time = time(); 103 | 104 | while() 105 | { 106 | my($line) = $_; 107 | chomp($line); 108 | if ( /^proc3/ ) { 109 | @new_stats = split(/ /,$line); 110 | system("echo '$line' > $tmp_stats_file"); 111 | last; 112 | } 113 | } 114 | 115 | close(NFSDSTATUS); 116 | 117 | # Time difference between this poll and the last poll 118 | my $time_difference = $new_time - $old_time; 119 | if ( $time_difference < 1 ) { 120 | die("Time difference can't be less than 1"); 121 | } 122 | 123 | # Calculate deltas and send them to ganglia 124 | for ( $i = 0 ; $i <= $#which_metrics; $i++ ) { 125 | my $metric = $which_metrics[$i]; 126 | my $delta = $new_stats[$metric] - $old_stats[$metric]; 127 | my $rate = int($delta / $time_difference); 128 | if ( $rate < 0 ) { 129 | print "Something is fishy. Rate for " . $metric . " shouldn't be negative. Perhaps counters were reset. Doing nothing"; 130 | } else { 131 | print "$nfs_stat{$metric} = $rate / sec\n"; 132 | system($gmetric_command . " -tuint16 -u 'calls/sec' -n " . $metric_prefix . $nfs_stat{$metric} . " -v " . $rate); 133 | } 134 | } 135 | 136 | } -------------------------------------------------------------------------------- /hpc/sge_jobs/jobqueue_report.php: -------------------------------------------------------------------------------- 1 | _report". This 8 | fuction will be called from the graph.php script automatically. 9 | 10 | 2) The *_report script should return an array that contains at least the variables 11 | listed below. Several have been pre-populated, and may not need to be changed. 12 | However, you will have to alter at least these: $series, $title, $vertical_label 13 | 14 | 3) An array variable is passed to the function in order to make sure that certain 15 | variables are available for use. This is PASSED BY REFERENCE and CAN BE CHANGED 16 | by your report function. 17 | 18 | 19 | 20 | A full list of variables that will be used: 21 | 22 | $series (string: holds the meat of the rrdgraph definition) 23 | $title (string: title of the report) 24 | $vertical_label (label for Y-Axis.) 25 | 26 | $start (String: Start time of the graph, can usually be left alone) 27 | $end (String: End time of the graph, also can usually be left alone) 28 | 29 | $width (Strings: Width and height of *graph*, the actual image will be 30 | $height slightly larger due to text elements and padding. These 31 | are normally set automatically, depending on the graph size 32 | chosen from the web UI) 33 | 34 | $upper-limit (Strings: Maximum and minimum Y-value for the graph. RRDTool 35 | $lower-limit normally will auto-scale the Y min and max to fit the 36 | data. You may override this by setting these variables 37 | to specific limits. The default value is a null string, 38 | which will force the auto-scale behavior) 39 | 40 | $color (ARRAY: Sets one or more chart colors. Usually used for setting 41 | the background color of the chart. Valid array keys are 42 | BACK, CANVAS, SADEA, SHADEB, FONT, FRAME and ARROW. Usually, 43 | only BACK is set). 44 | 45 | $extras (Any other custom rrdtool commands can be added to this 46 | this variable. For example, setting a different --base 47 | value or use a --logarithmic scale) 48 | 49 | 50 | For more information and specifics, see the man page for 'rrdgraph'. 51 | 52 | 53 | 54 | */ 55 | 56 | function graph_jobqueue_report ( &$rrdtool_graph ) { 57 | 58 | /* this is just the cpu_report (from revision r920) as an example, but with extra comments */ 59 | 60 | // pull in a number of global variables, many set in conf.php (such as colors and $rrd_dir), 61 | // but other from elsewhere, such as get_context.php 62 | 63 | global $context, 64 | $fudge_2, 65 | $cpu_idle_color, 66 | $cpu_nice_color, 67 | $cpu_system_color, 68 | $cpu_user_color, 69 | $cpu_wio_color, 70 | $hostname, 71 | $rrd_dir, 72 | $size, 73 | $use_fqdn_hostname; 74 | 75 | if (!$use_fqdn_hostname) { 76 | $hostname = strip_domainname($hostname); 77 | } 78 | 79 | // 80 | // You *MUST* set at least the 'title', 'vertical-label', and 'series' variables. 81 | // 82 | $rrdtool_graph['title'] = 'Job queues'; // This will be turned into: "Clustername $TITLE last $timerange", so keep it short 83 | $rrdtool_graph['vertical-label'] = 'Jobs'; 84 | $rrdtool_graph['height'] += $size == 'medium' ? 28 : 0 ; // Fudge to account for number of lines in the chart legend 85 | $rrdtool_graph['lower-limit'] = '.5'; 86 | $rrdtool_graph['extras'] = '--logarithmic --units=si -X 0 --rigid'; 87 | 88 | $core_title = $size == 'large' ? 'Total CPU cores' : 'Cores'; 89 | 90 | 91 | $series = 92 | "DEF:'r_running'='${rrd_dir}/sge_running.rrd':'sum':AVERAGE " 93 | ."DEF:'r_errors'='${rrd_dir}/sge_error.rrd':'sum':AVERAGE " 94 | ."DEF:'r_pending'='${rrd_dir}/sge_pending.rrd':'sum':AVERAGE " 95 | ."DEF:'cores'='${rrd_dir}/cpu_num.rrd':'sum':AVERAGE " 96 | ."VDEF:l_running=r_running,LAST " 97 | ."VDEF:l_pending=r_pending,LAST " 98 | ."VDEF:l_errors=r_errors,LAST " 99 | ."VDEF:l_cores=cores,LAST " 100 | ."CDEF:c_errors=r_errors,0,EQ,UNKN,r_errors,IF " 101 | ."CDEF:c_running=r_running,0,EQ,UNKN,r_running,IF " 102 | ."CDEF:c_pending=r_pending,0,EQ,UNKN,r_pending,IF " 103 | ."LINE3:'cores'#FF7F50:'$core_title\g' " 104 | ."GPRINT:l_cores:'(%.0lf)' " 105 | ."LINE2:'c_running'#33cc00:'Running\g' " 106 | ."GPRINT:l_running:'(%.0lf)' " 107 | ."LINE2:'c_pending'#0033CC:'Pending\g' " 108 | ."GPRINT:l_pending:'(%.0lf)' " 109 | ."LINE3:'c_errors'#CC0000:'Errors\g' " 110 | ."GPRINT:l_errors:'(%.0lf)\l' " 111 | ; 112 | 113 | 114 | $rrdtool_graph['series'] = $series; 115 | 116 | return $rrdtool_graph; 117 | } 118 | 119 | ?> 120 | -------------------------------------------------------------------------------- /network/aoe/ganglia_coraid_err.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # 3 | # Feeds ganglia with AoE error message information. 4 | # 5 | # Original Authors (to watch Apache logs): 6 | # Author: Nicolas Marchildon (nicolas@marchildon.net) 7 | # Modified by Ben Hartshorne 8 | # 9 | # Further modified by Jesse Becker to do AoE stuff in August 2010. 10 | 11 | use Getopt::Long; 12 | use strict; 13 | use POSIX; 14 | use Data::Dumper; 15 | use Time::HiRes qw(time); 16 | 17 | # Defaults 18 | my $delay = 15; 19 | my $METRIC = 'AoE Retrans'; 20 | my $GMETRIC = "/usr/bin/gmetric"; 21 | 22 | my $gmond_conf = -f '/etc/gmond.conf' ? '/etc/gmond.conf' : 23 | -f '/etc/ganglia/gmond.conf' ? '/etc/ganglia/gmond.conf' : '' ; 24 | my $GMETRIC_ARGS="-c $gmond_conf"; 25 | 26 | my $debug = 0; 27 | my $nosend = 0; 28 | 29 | # Parse command line 30 | GetOptions( 'd|delay=i' => \$delay, 31 | 'm|metric=s' => \$METRIC, 32 | 'v|verbose' => \$debug, 33 | 'n|nosend' => \$nosend, 34 | ); 35 | 36 | # Validate command line 37 | if ( length($delay) == 0 or 38 | !$delay or 39 | length($METRIC) == 0) { 40 | print STDERR <0) 50 | (default: $delay) 51 | 52 | EOS 53 | exit 1; 54 | } 55 | 56 | 57 | my %units = ( 58 | _default => 'msg_per_sec', 59 | nout => 'Average value', 60 | nout_max => 'Max value per interval', 61 | ); 62 | 63 | 64 | my %metrics; 65 | 66 | my $start = time; 67 | 68 | my $shucks; 69 | 70 | ################################################################################# 71 | sub catch_hup { 72 | my $signame = shift; 73 | $shucks++; 74 | &report(%metrics); 75 | } 76 | ################################################################################# 77 | $SIG{HUP} = \&catch_hup; # best strategy 78 | 79 | sub broadcast { 80 | my ($metric,$value,$type,$units) = @_; 81 | 82 | my $timeValid = $delay + 10; # Number of seconds this sample is good for 83 | my $cmd = "$GMETRIC $GMETRIC_ARGS --name=$metric --value=$value --type=$type --units='$units' --tmax=$timeValid"; 84 | print '(Silent mode) ' if ($nosend && $debug); 85 | print "CMD: $cmd\n" if $debug; 86 | 87 | 88 | my $ret = system($cmd) / 256; 89 | if ($ret == -1) { 90 | print("Unable to send data to ganglia: $!"); 91 | } 92 | } 93 | 94 | sub report { 95 | my ($signal) = @_; 96 | #print "Reporting... "; 97 | 98 | my %rates; 99 | 100 | my $delta = time - $start; 101 | if ($delta < 1) { $delta =1}; 102 | 103 | print "Reporting on($delta): ". Dumper(\%metrics) if $debug; 104 | 105 | foreach my $metric (keys %metrics) { 106 | my $rate= $metrics{$metric}; 107 | $metrics{$metric} = 0; 108 | 109 | # Metrics of type foo_max and foo_min shouldn't be "averaged" 110 | if ( $metric eq 'nout' ) { 111 | if ($rate > 0 and $metrics{retransmit} > 0) { 112 | $rate /= $metrics{retransmit}; 113 | } 114 | } 115 | 116 | elsif ( $metric !~ /_(?:max|min)$/) { 117 | $rate /= $delta; 118 | } 119 | 120 | my $units = $units{$metric} || $units{_default}; 121 | broadcast ("aoe_$metric", $rate, 'float', $units); 122 | } 123 | 124 | $start = time; 125 | #print "ok.\n"; 126 | alarm $delay; 127 | } 128 | 129 | sub parse_line { 130 | my ($metric_r, $line) = @_; 131 | # retransmit e50.0 oldtag=479651ce@145455266 newtag=47b45266 s=00156004fa93 d=0030486208bd nout=2 132 | # unexpected rsp e50.0 tag=479651ce@145455266 s=0030486208bd d=00156004fa93 133 | 134 | #print Dumper($metric_r, $line); 135 | 136 | if ($line =~ /retransmit.+nout=(\d+)/) { 137 | $metric_r->{retransmit}++; 138 | $metric_r->{nout}+=$1; 139 | $metric_r->{nout_max} = $1 > $metric_r->{nout_max} ? $1 : $metric_r->{nout_max}; 140 | next; 141 | } 142 | 143 | elsif ($line =~ /unexpected rsp/) { 144 | $metric_r->{unexpected_rsp}++; 145 | } 146 | 147 | elsif ($line =~ /no frame available/) { 148 | $metric_r->{no_frame}++; 149 | } 150 | 151 | else { 152 | $metric_r->{unknown}++; 153 | } 154 | 155 | 156 | return; 157 | } 158 | 159 | sysopen(ETHERD, "/dev/etherd/err", O_RDONLY|O_NONBLOCK) || die "Failed open: [$!]"; 160 | 161 | $SIG{ALRM} = \&report; 162 | 163 | alarm $delay; 164 | my $first = 1; 165 | while (1) { 166 | while (my $line=) { 167 | #chomp $line; 168 | #print "Line: [$line]\n"; 169 | parse_line \%metrics, $line; 170 | } 171 | #print 'Metrics after loop: '. Dumper(\%metrics); 172 | if ($first) { 173 | # don't report on first interval... May have bogus old data. 174 | #&report; 175 | $first=0; 176 | } 177 | sleep $delay -0.01; 178 | } 179 | 180 | close ETHERD; 181 | 182 | -------------------------------------------------------------------------------- /disk/disk_wait_gmetric.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ### $Header: /var/lib/cvs/ops/ganglia/disk_wait_gmetric.sh,v 1.3 2006/07/11 17:29:27 ben Exp $ 4 | 5 | ### this script reports disk metrics to ganglia. 6 | ### It should be called from cron every n minutes. 7 | ### It will report blocks per second on each disk, 8 | ### and will automatically adjust for whatever 9 | ### timeframe it is called 10 | 11 | ### Copyright Simply Hired, Inc. 2006 12 | ### License to use, modify, and distribute under the GPL 13 | ### http://www.gnu.org/licenses/gpl.txt 14 | 15 | VERSION=1.0 16 | 17 | GMETRIC="/usr/bin/gmetric" 18 | GANGLIA_ARGS="-c /etc/gmond.conf" 19 | STATEFILE="/var/lib/ganglia/metrics/io_wait.stats" 20 | date=`date +%s` 21 | iostat="/usr/bin/iostat" 22 | 23 | ERROR_CREATE="/tmp/disk_wait_gmetric_create_statefile_failed" 24 | ERROR_IOSTAT="/tmp/disk_wait_gmetric_no_iostat" 25 | ERROR_DEVNAMES="/tmp/disk_wait_gmetric_bad_devname" 26 | ERROR_DEVNAMES2="/tmp/disk_wait_gmetric_bad_devname_didnt_fix" 27 | ERROR_GMETRIC="/tmp/disk_wait_gmetric_no_gmetric" 28 | ERROR_TIMEDIFF="/tmp/disk_wait_gmetric_timediff" 29 | ERROR_NOTROOT="/tmp/disk_wait_gmetric_notroot" 30 | 31 | if [ $UID -ne 0 ] 32 | then 33 | if [ -e $ERROR_NOTROOT ] ; then exit 1; fi 34 | echo "Error: this script must be run as root." 35 | touch $ERROR_NOTROOT 36 | exit 1 37 | fi 38 | rm -f $ERROR_NOTROOT 39 | 40 | if [ "x$1" == "x-h" ] 41 | then 42 | echo "Usage: disk_wait_gmetric.sh [--clean]" 43 | echo " --clean delete all tmp files" 44 | exit 0 45 | fi 46 | 47 | if [ "x$1" == "x--clean" ] 48 | then 49 | rm -f $ERROR_CREATE $ERROR_IOSTAT $ERROR_DEVNAME $ERROR_DEVNAME2 $ERROR_GMETRIC $ERROR_TIMEDIFF $ERROR_NOTROOT $STATEFILE 50 | retval=$? 51 | if [ $retval -ne 0 ] 52 | then 53 | echo "failed to clean up." 54 | exit 1 55 | else 56 | echo "All cleaned up." 57 | exit 0 58 | fi 59 | fi 60 | 61 | # save and turn off /STDERR for th estatefile tests 62 | exec 3>&2 63 | exec 2>/dev/null 64 | 65 | # if the GMETRIC program isn't installed, compain 66 | if [ ! -e $GMETRIC ] 67 | then 68 | if [ -e $ERROR_GMETRIC ] ; then exit 1; fi 69 | echo "" 70 | echo "Error: GMETRIC doesn't seem to be installed." 71 | echo "$GMETRIC doesn't exist." 72 | echo "" 73 | touch $ERROR_GMETRIC 74 | exit 1 75 | fi 76 | 77 | # if the iostat program isn't installed, compain 78 | if [ ! -e $iostat ] 79 | then 80 | if [ -e $ERROR_IOSTAT ] 81 | then 82 | exit 1 83 | fi 84 | echo "" 85 | echo "Error: iostat (from the package sysstat) doesn't seem to be installed." 86 | echo "$iostat doesn't exist." 87 | echo "" 88 | touch $ERROR_IOSTAT 89 | exit 1 90 | fi 91 | 92 | # if the statefile doesn't exist, we either havn't 93 | # run yet or there's something bigger wrong. 94 | if [ ! -e $STATEFILE ] 95 | then 96 | if [ ! -d `dirname $STATEFILE` ] 97 | then 98 | mkdir -p `dirname $STATEFILE` 99 | fi 100 | # iostat -x 1 2 gives a summary and a report for the last second 101 | # we're only interested in the second half. We count the number of 102 | # lines, strip the first, divide by 2, and strip the header. 103 | # this gives us just the interesting part. 104 | tot_lines=`$iostat -x 1 2 | grep -v "^$" | wc -l` 105 | rel_lines=`expr \( $tot_lines - 1 \) / 2 - 1` 106 | echo $tot_lines > $STATEFILE 107 | echo $rel_lines >> $STATEFILE 108 | 109 | if [ ! -e $STATEFILE ] 110 | then 111 | # if it didn't exist and we couldn't create 112 | # it, we should just scream bloody murder and die. 113 | # only scream once though... 114 | if [ -e $ERROR_CREATE ] 115 | then 116 | exit 1 117 | fi 118 | echo "" 119 | echo "ERROR: couldn't create $STATEFILE" 120 | echo "" 121 | touch $ERROR_CREATE 122 | exit 1 123 | fi 124 | echo "Created statefile. Exitting." 125 | exit 0 126 | fi 127 | 128 | # restore stderr 129 | exec 2>&3 130 | exec 3>&- 131 | 132 | # this script uses iostat (part of the sysstat packag) 133 | # to retrieve disk metrics 134 | tot_lines=`$iostat -x 1 2 | grep -v "^$" | wc -l` 135 | old_stats=(`cat $STATEFILE`) 136 | old_tot_lines=${old_stats[0]} 137 | 138 | if [ $tot_lines -ne $old_tot_lines ] 139 | then 140 | echo "something is broken." 141 | echo "the number of lines of iostat output has changed" 142 | echo "current tot_lines=$tot_lines old_tot_lines=$old_tot_lines" 143 | echo "I'm backing up the current statefile ($STATEFILE) " 144 | echo "and will recreate it next time to see if that fixes this." 145 | mydate=`date +%Y%m%d%H%M%S` 146 | mv -fv $STATEFILE{,.${mydate}} 147 | touch $ERROR_DEVNAMES 148 | exit 1 149 | fi 150 | 151 | rel_lines=${old_stats[1]} 152 | #stats=(`$iostat -x 30 2 | grep -v "^$" | tail -$rel_lines`) 153 | stats=(`$iostat -x 5 2 | grep -v "^$" | tail -$rel_lines`) 154 | # the default gmond already reports this one... 155 | #iowait=${stats[3]} 156 | 157 | $GMETRIC $GMETRIC_ARGS --name="cpu_waitio" --value="$iowait" --type="float" --units="%" 158 | 159 | res=0 160 | index=19 161 | while [ $res -eq 0 ] 162 | do 163 | devname=${stats[$index]} 164 | await=${stats[$(($index + 11))]} 165 | util=${stats[$(($index + 13))]} 166 | 167 | $GMETRIC $GMETRIC_ARGS --name="${devname}_await" --value="$await" --type="float" --units="millisec" 168 | $GMETRIC $GMETRIC_ARGS --name="${devname}_util" --value="$util" --type="float" --units="%" 169 | 170 | index=$(($index + 14)) 171 | #if we're done, cut out of the loop 172 | if [ "k${stats[$index]}" == "k" ] 173 | then 174 | res=1 175 | fi 176 | done 177 | 178 | #cleanup 179 | rm -f $ERROR_CREATE $ERROR_IOSTAT $ERROR_DEVNAME2 $ERROR_DEVNAME $ERROR_GMETRIC $ERROR_TIMEDIFF $ERROR_NOTROOT 180 | 181 | -------------------------------------------------------------------------------- /http/varnish/ganglia-varnish-stats.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # 3 | # Script to monitor Varnish metrics via Ganglia 4 | # 5 | # Author: pippoppo 6 | # based on the work of Vladimir Vuksan http://vuksan.com/linux/ (other ganglia perl scripts) 7 | # 8 | # Requirements: 9 | # - Needs access to varnishstat command 10 | # 11 | 12 | use strict; 13 | use warnings FATAL => 'all'; 14 | use XML::Simple; 15 | use Getopt::Long; 16 | 17 | sub print_usage (); 18 | 19 | #################################################################################### 20 | # YOU MAY NEED TO MODIFY FOLLOWING 21 | # Adjust this variables appropriately. Feel free to add any options to gmetric_command 22 | # necessary for running gmetric in your environment to gmetric_options e.g. -c /etc/gmond.conf 23 | #################################################################################### 24 | my $gmetric_exec = "/usr/bin/gmetric"; 25 | my $gmetric_options = " -d 120 "; 26 | #################################################################################### 27 | my $gmetric_command = $gmetric_exec . $gmetric_options; 28 | my $debug = 0; 29 | my $opt_help; 30 | my %newstats; 31 | my %oldstats; 32 | my $oldtime; 33 | my $newtime; 34 | my $varnishstat = "/usr/bin/varnishstat"; 35 | my $varnishstatcommand = $varnishstat . " -1 -x"; 36 | my $tmp_dir_base = "/tmp/varnish_stats"; 37 | my $varnishstatxml = $tmp_dir_base . "/varnishstat.xml"; 38 | my $varnishstatxmlold = $tmp_dir_base . "/varnishstat.xml.old"; 39 | 40 | ################################################################### 41 | #for a complete list of available metrics, please run in debug mode 42 | ################################################################### 43 | 44 | my %counter_metrics = ( 45 | "cache_hit" => "hits", 46 | "cache_miss" => "hits", 47 | ); 48 | 49 | my %absolute_metrics = ( "accept_fail" => "number", ); 50 | 51 | # If the tmp directory doesn't exist create it 52 | if ( !-d $tmp_dir_base ) { 53 | system("mkdir -p $tmp_dir_base"); 54 | } 55 | 56 | my $cmdline = GetOptions( 57 | "help" => \$opt_help, #flag 58 | "d" => \$debug 59 | ); 60 | 61 | unless ($cmdline) { 62 | print_usage; 63 | exit 1; 64 | } 65 | 66 | if ( defined($opt_help) ) { 67 | print_usage; 68 | exit 1; 69 | } 70 | 71 | if ( !-f $varnishstat ) { 72 | die("Missing varnishstat command\n"); 73 | } 74 | 75 | system("$varnishstatcommand > $varnishstatxml"); 76 | if ( !-f $varnishstatxml ) { 77 | die("Missing $varnishstatxml file\n"); 78 | } 79 | else { 80 | $newtime = ( stat $varnishstatxml )[9]; 81 | } 82 | if ( !-f $varnishstatxmlold ) { 83 | print "Missing $varnishstatxmlold file\n"; 84 | print "Creating baseline. No output this cycle\n"; 85 | system("mv $varnishstatxml $varnishstatxmlold"); 86 | exit 0; 87 | } 88 | else { 89 | $oldtime = ( stat $varnishstatxmlold )[9]; 90 | } 91 | my $timediff = $newtime - $oldtime; 92 | if ( $timediff < 1 ) { 93 | die("Time difference can't be less than 1"); 94 | } 95 | my $xs = new XML::Simple; 96 | my $newstatsxml = $xs->XMLin( $varnishstatxml, ForceArray => 1 ); 97 | if ( $debug != 0 ) { 98 | use Data::Dumper; 99 | print Dumper($newstatsxml); 100 | } 101 | my $oldstatsxml = $xs->XMLin( $varnishstatxmlold, ForceArray => 1 ); 102 | if ( $debug != 0 ) { 103 | use Data::Dumper; 104 | print Dumper($oldstatsxml); 105 | } 106 | 107 | foreach my $stat ( @{ $newstatsxml->{stat} } ) { 108 | if ( $debug != 0 ) { 109 | print "NEW " . $stat->{name}->[0] . "=" . $stat->{value}->[0] . "\n"; 110 | } 111 | $newstats{ $stat->{name}->[0] } = $stat->{value}->[0]; 112 | } 113 | 114 | foreach my $stat ( @{ $oldstatsxml->{stat} } ) { 115 | if ( $debug != 0 ) { 116 | print "OLD " . $stat->{name}->[0] . "=" . $stat->{value}->[0] . "\n"; 117 | } 118 | $oldstats{ $stat->{name}->[0] } = $stat->{value}->[0]; 119 | } 120 | 121 | system("mv $varnishstatxml $varnishstatxmlold"); 122 | 123 | if ( $newstats{'uptime'} < $oldstats{'uptime'} ) { 124 | die("negative number, maybe server was restarted"); 125 | } 126 | 127 | ################################################################################# 128 | # Calculate deltas for counter metrics and send them to ganglia 129 | ################################################################################# 130 | while ( my ( $metric, $units ) = each(%counter_metrics) ) { 131 | my $rate = ( $newstats{$metric} - $oldstats{$metric} ) / $timediff; 132 | 133 | if ( $rate < 0 ) { 134 | print "Something is fishy. Rate for " . $metric 135 | . " shouldn't be negative. Perhaps counters were reset. Doing nothing"; 136 | } 137 | else { 138 | print "$metric = $rate / sec\n"; 139 | if ( $debug == 0 ) { 140 | system( $gmetric_command 141 | . " -u '$units/sec' -tfloat -n varnish_" 142 | . $metric . " -v " 143 | . $rate ); 144 | } 145 | } 146 | } 147 | ################################################################################# 148 | # Just send absolute metrics. No need to calculate delta 149 | ################################################################################# 150 | while ( my ( $metric, $units ) = each(%absolute_metrics) ) { 151 | print "$metric = $newstats{$metric}\n"; 152 | if ( $debug == 0 ) { 153 | system( $gmetric_command 154 | . " -u $units -tuint16 -n varnish_" 155 | . $metric . " -v " 156 | . $newstats{$metric} ); 157 | } 158 | } 159 | 160 | exit 0; 161 | 162 | sub print_usage () { 163 | print <<'END_USAGE' 164 | Usage: ganglia-varnish-stats.pl [OPTION]... 165 | Collect varnish statistics 166 | 167 | Options: 168 | -help Usage information 169 | -d Debug flag. If not supplied defaults to false 170 | END_USAGE 171 | ; 172 | exit; 173 | } 174 | -------------------------------------------------------------------------------- /database/mongodb/ganglia_mongodb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # vim: set ts=4 sw=4 et : 3 | 4 | from subprocess import Popen 5 | import os, urllib2, time 6 | 7 | try: 8 | import json 9 | except ImportError: 10 | import simplejson as json 11 | 12 | hasPyMongo = None 13 | try: 14 | import pymongo 15 | hasPyMongo = True 16 | except ImportError: 17 | hasPyMongo = False 18 | 19 | GMETRIC = "/usr/bin/gmetric --name=\"%s\" --value=\"%s\" --type=\"%s\" --units=\"%s\"" 20 | 21 | class ServerStatus: 22 | ops_tmp_file = os.path.join("/", "tmp", "mongo-prevops") 23 | 24 | def __init__(self): 25 | self.status = self.getServerStatus() 26 | # call individual metrics 27 | for f in ["conns", "btree", "mem", "backgroundFlushing", "repl", "ops", "lock"]: 28 | getattr(self, f)() 29 | 30 | if (hasPyMongo): 31 | self.stats = self.getStats() 32 | self.writeStats() 33 | 34 | def getServerStatus(self): 35 | raw = urllib2.urlopen("http://localhost:28017/_status").read() 36 | return json.loads(raw)["serverStatus"] 37 | 38 | def getStats(self): 39 | c = pymongo.Connection("localhost:27017", slave_okay=True) 40 | stats = [] 41 | 42 | for dbName in c.database_names(): 43 | db = c[dbName] 44 | dbStats = db.command("dbstats") 45 | if dbStats["objects"] == 0: 46 | continue 47 | stats.append(dbStats) 48 | 49 | c.disconnect() 50 | return stats 51 | 52 | def writeStats(self): 53 | keys = { "numExtents":"extents", "objects":"objects", 54 | "fileSize": "bytes", "dataSize": "bytes", "indexSize": "bytes", "storageSize": "bytes" } 55 | 56 | totals = {} 57 | totalsNoLocals = {} 58 | for k in keys.keys(): 59 | totalsNoLocals[k] = 0 60 | totals[k] = 0 61 | 62 | for status in self.stats: 63 | dbName = status["db"] 64 | 65 | for k, v in keys.iteritems(): 66 | value = status[k] 67 | self.callGmetric({dbName + "_" + k: (value, v)}) 68 | totals[k] += value 69 | if (dbName != "local"): 70 | totalsNoLocals[k] += value 71 | 72 | for k, v in keys.iteritems(): 73 | self.callGmetric({"total_" + k: (totals[k], v)}) 74 | self.callGmetric({"totalNoLocal_" + k: (totalsNoLocals[k], v)}) 75 | 76 | self.callGmetric({"total_dataAndIndexSize" : (totals["dataSize"]+totals["indexSize"], "bytes")}) 77 | self.callGmetric({"totalNoLocal_dataAndIndexSize" : (totalsNoLocals["dataSize"]+totalsNoLocals["indexSize"], "bytes")}) 78 | 79 | def callGmetric(self, d): 80 | for k, v in d.iteritems(): 81 | unit = None 82 | if (isinstance(v[0], int)): 83 | unit = "int32" 84 | elif (isinstance(v[0], float)): 85 | unit = "double" 86 | else: 87 | raise RuntimeError(str(v[0].__class__) + " unknown (key: " + k + ")") 88 | 89 | cmd = GMETRIC % ("mongodb_" + k, v[0], unit, v[1]) 90 | Popen(cmd, shell=True) 91 | 92 | def conns(self): 93 | ss = self.status 94 | self.callGmetric({ 95 | "connections" : (ss["connections"]["current"], "connections") 96 | }) 97 | 98 | def btree(self): 99 | b = self.status["indexCounters"]["btree"] 100 | self.callGmetric({ 101 | "btree_accesses" : (b["accesses"], "count"), 102 | "btree_hits" : (b["hits"], "count"), 103 | "btree_misses" : (b["misses"], "count"), 104 | "btree_resets" : (b["resets"], "count"), 105 | "btree_miss_ratio" : (b["missRatio"], "ratio"), 106 | }) 107 | 108 | def mem(self): 109 | m = self.status["mem"] 110 | self.callGmetric({ 111 | "mem_resident" : (m["resident"], "MB"), 112 | "mem_virtual" : (m["virtual"], "MB"), 113 | "mem_mapped" : (m["mapped"], "MB"), 114 | }) 115 | 116 | def backgroundFlushing(self): 117 | f = self.status["backgroundFlushing"] 118 | self.callGmetric({ 119 | "flush_average" : (f["average_ms"], "ms"), 120 | }) 121 | 122 | def ops(self): 123 | out = {} 124 | cur_ops = self.status["opcounters"] 125 | 126 | lastChange = None 127 | try: 128 | os.stat_float_times(True) 129 | lastChange = os.stat(self.ops_tmp_file).st_ctime 130 | with open(self.ops_tmp_file, "r") as f: 131 | content = f.read() 132 | prev_ops = json.loads(content) 133 | except (ValueError, IOError): 134 | prev_ops = {} 135 | 136 | for k, v in cur_ops.iteritems(): 137 | if k in prev_ops: 138 | name = k + "s_per_second" 139 | if k == "query": 140 | name = "queries_per_second" 141 | 142 | interval = time.time() - lastChange 143 | if (interval <= 0.0): 144 | continue 145 | out[name] = (max(0, float(v) - float(prev_ops[k])) / interval, "ops/s") 146 | 147 | with open(self.ops_tmp_file, 'w') as f: 148 | f.write(json.dumps(cur_ops)) 149 | 150 | self.callGmetric(out) 151 | 152 | def repl(self): 153 | ismaster = 0; 154 | if (self.status["repl"]["ismaster"]): 155 | ismaster = 1 156 | 157 | self.callGmetric({ 158 | "is_master" : (ismaster, "boolean") 159 | }) 160 | 161 | def lock(self): 162 | self.callGmetric({ 163 | "lock_ratio" : (self.status["globalLock"]["ratio"], "ratio"), 164 | "lock_queue_readers" : (self.status["globalLock"]["currentQueue"]["readers"], "queue size"), 165 | "lock_queue_writers" : (self.status["globalLock"]["currentQueue"]["writers"], "queue size"), 166 | }) 167 | 168 | if __name__ == "__main__": 169 | ServerStatus() 170 | -------------------------------------------------------------------------------- /network/ethtool_interface_stats/ganglia_ethtool_stats.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | 5 | ########################################################################### 6 | # Author: Vladimir Vuksan http://vuksan.com/linux/ 7 | # License: GNU Public License (http://www.gnu.org/copyleft/gpl.html) 8 | # Collects ethtool metrics ie. things you get by running 9 | # ethtool -S 10 | ########################################################################### 11 | 12 | # NEED TO MODIFY FOLLOWING 13 | # Adjust this variables appropriately. Feel free to add any options to gmetric_command 14 | # necessary for running gmetric in your environment to gmetric_options e.g. -c /etc/gmond.conf 15 | my $gmetric_exec = "/usr/bin/gmetric"; 16 | my $gmetric_options = " -g ethtool"; 17 | my $ethtool_bin = "/sbin/ethtool"; 18 | my $metric_prefix = "ethtool"; 19 | 20 | # DON"T TOUCH BELOW UNLESS YOU KNOW WHAT YOU ARE DOING 21 | if ( ! -x $gmetric_exec ) { 22 | die("Gmetric binary is not executable. Exiting..."); 23 | } 24 | 25 | ############################################################################ 26 | # List of metrics we are interested in 27 | ############################################################################ 28 | my @metrics = ( 29 | "multicast", 30 | "collisions", 31 | "rx_over_errors", 32 | "rx_crc_errors", 33 | "rx_frame_errors", 34 | "rx_fifo_errors", 35 | "rx_missed_errors", 36 | "tx_aborted_errors", 37 | "tx_carrier_errors", 38 | "tx_fifo_errors", 39 | "tx_heartbeat_errors", 40 | "lsc_int", 41 | "tx_busy", 42 | "non_eop_descs", 43 | "broadcast", 44 | "rx_no_buffer_count", 45 | "tx_timeout_count", 46 | "tx_restart_queue", 47 | "rx_long_length_errors", 48 | "rx_short_length_errors", 49 | "tx_flow_control_xon", 50 | "rx_flow_control_xon", 51 | "tx_flow_control_xoff", 52 | "rx_flow_control_xoff", 53 | "rx_csum_offload_errors", 54 | "alloc_rx_page_failed", 55 | "alloc_rx_buff_failed", 56 | "lro_aggregated", 57 | "lro_flushed", 58 | "lro_recycled", 59 | "rx_no_dma_resources", 60 | "hw_rsc_aggregated", 61 | "hw_rsc_flushed", 62 | "fdir_match", 63 | "fdir_miss", 64 | "fdir_overflow" 65 | ); 66 | 67 | my $gmetric_command = $gmetric_exec . " " . $gmetric_options; 68 | my $numArgs = $#ARGV + 1; 69 | 70 | unless ( $numArgs >= 1 ) { 71 | die("You need to supply at least one network interface. For more than one use space delimited e.g. eth0 eth1"); 72 | } 73 | 74 | # Where to store the last stats file 75 | my $tmp_dir_base="/root/ethtool_stats"; 76 | 77 | # If the tmp directory doesn't exit create it 78 | if ( ! -d $tmp_dir_base ) { 79 | system("mkdir -p $tmp_dir_base"); 80 | } 81 | 82 | my $interface; 83 | 84 | ############################################################################### 85 | # Now let's look through each supplied interface 86 | ############################################################################### 87 | foreach $interface (<@ARGV>) { 88 | 89 | # Make sure interface actually exists 90 | if ( ! -l "/sys/class/net/${interface}" ) { 91 | print "Interface ${interface} is not a valid ethernet interface. Skipping.\n"; 92 | next; 93 | } 94 | 95 | my $tmp_stats_file=$tmp_dir_base . "/" . "ethtool_${interface}"; 96 | 97 | my $stats_command = "$ethtool_bin -S $interface"; 98 | 99 | ############################################################################### 100 | # We need to store a baseline with statistics. If it's not there let's dump 101 | # it into a file. Don't do anything else 102 | ############################################################################### 103 | if ( ! -f $tmp_stats_file ) { 104 | print "Creating baseline. No output this cycle\n"; 105 | system("$stats_command > $tmp_stats_file"); 106 | } else { 107 | 108 | my %old_stats; 109 | my %new_stats; 110 | 111 | ###################################################### 112 | # Let's read in the file from the last poll 113 | open(OLDSTATUS, "< $tmp_stats_file"); 114 | 115 | while() 116 | { 117 | if (/(\s+)(.*): (\d+)/) { 118 | $old_stats{$2}=${3}; 119 | } 120 | } 121 | 122 | # Get the time stamp when the stats file was last modified 123 | my $old_time = (stat $tmp_stats_file)[9]; 124 | close(OLDSTATUS); 125 | 126 | ##################################################### 127 | # Get the new stats 128 | ##################################################### 129 | system("$stats_command > $tmp_stats_file"); 130 | open(NEWSTATUS, "< $tmp_stats_file"); 131 | my $new_time = time(); 132 | 133 | while() 134 | { 135 | if (/(\s+)(.*): (\d+)/) { 136 | $new_stats{$2}=${3}; 137 | } 138 | } 139 | close(NEWSTATUS); 140 | 141 | # Time difference between this poll and the last poll 142 | my $time_difference = $new_time - $old_time; 143 | if ( $time_difference < 1 ) { 144 | die("Time difference can't be less than 1"); 145 | } 146 | 147 | ################################################################################# 148 | # Calculate deltas for counter metrics and send them to ganglia 149 | ################################################################################# 150 | foreach my $metric ( @metrics ) { 151 | if ( defined $new_stats{$metric} ) { 152 | my $rate = ($new_stats{$metric} - $old_stats{$metric}) / $time_difference; 153 | 154 | if ( $rate < 0 ) { 155 | print "Something is fishy. Rate for " . $metric . " shouldn't be negative. Perhaps counters were reset. Doing nothing"; 156 | } else { 157 | system($gmetric_command . " -tdouble -u '/s' -n ${metric_prefix}_${interface}_" . $metric . " -v " . $rate); 158 | 159 | } 160 | } 161 | } 162 | 163 | } # end of if ( ! -f $tmp_stats_file ) { 164 | 165 | # Send pause parameters for the interface 166 | my $pause_params = `${ethtool_bin} --show-pause ${interface} | xargs echo | sed "s/.*Autonegotiate/Autonegotiate/g"`; 167 | system($gmetric_command . " -tstring -n ${metric_prefix}_pause_parameters_${interface} -v '${pause_params}'"); 168 | 169 | } 170 | 171 | 172 | -------------------------------------------------------------------------------- /arista/ganglia_arista_interfaces.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ###################################################################### 4 | # Uses eAPI 5 | # 6 | # Run it with daemonize /persist/sys/ganglia_arista_interfaces.py 7 | # --username - sets the eAPI username to use 8 | # --password - sets the eAPI password to use 9 | # --protocol [http | https] - sets the protocol to use 10 | # 11 | # Alternatively run it at boot with 12 | # 13 | # daemon ganglia_interfaces 14 | # command /persist/sys/ganglia_arista_interfaces.py 15 | ###################################################################### 16 | import time, socket, json 17 | import sys, os, copy 18 | import argparse 19 | import urlparse 20 | 21 | import jsonrpclib 22 | 23 | METRICS = { 24 | 'time' : 0, 25 | 'data' : {} 26 | } 27 | 28 | LAST_METRICS = dict(METRICS) 29 | 30 | gmetric_cmd = "/usr/bin/gmetric -d 240 -g arista -c /persist/sys/gmond.conf "; 31 | 32 | counters = dict() 33 | 34 | def make_url(host, uid, pwd, proto, port): 35 | if proto not in ['http', 'https']: 36 | raise ValueError('invalid protocol specified') 37 | 38 | if proto == 'http' and not port: 39 | port = 80 40 | elif proto == 'https' and not port: 41 | port = 443 42 | 43 | if int(port) < 1 or port > 65535: 44 | raise ValueError('port value is out of range') 45 | 46 | scheme = proto 47 | netloc = '%s:%s@%s:%s' % (uid, pwd, host, port) 48 | path = '/command-api' 49 | 50 | return urlparse.urlunsplit((scheme, netloc, path, None, None)) 51 | 52 | def make_connection(url): 53 | return jsonrpclib.Server(url) 54 | 55 | def run_command(connection, commands): 56 | assert isinstance(commands, list) 57 | return connection.runCmds(1, commands) 58 | 59 | #################################################################### 60 | # I want to convert any numbers to float. If they are no numbers 61 | # but strings for whatever reason I want those to be set to 0 62 | # e.g. out_discards often shows up as None. I don't want the script 63 | # to die if that is the case so I'm using exceptions. Lame I know 64 | #################################################################### 65 | def format_number(value): 66 | try: 67 | new_value = float(value) 68 | except: 69 | new_value = 0 70 | 71 | return new_value 72 | 73 | #################################################################### 74 | # Daemonize 75 | #################################################################### 76 | def start(connection): 77 | while 1: 78 | 79 | start_fetch = time.time() 80 | new_time = time.time() 81 | 82 | data = run_command(connection, ['show interfaces']) 83 | new_metrics = dict() 84 | 85 | # Loop through any know interfaces 86 | for key, value in data[0]['interfaces'].items(): 87 | if value['lineProtocolStatus'] == 'up': 88 | counters = value['interfaceCounters'] 89 | new_metrics[str(key)] = dict() 90 | new_metrics[str(key)]['pkts_out'] = format_number(counters['outUcastPkts']) 91 | new_metrics[str(key)]['mcast_out'] = format_number(counters['outMulticastPkts']) 92 | new_metrics[str(key)]['bcast_out'] = format_number(counters['outBroadcastPkts']) 93 | new_metrics[str(key)]['pkts_in'] = format_number(counters['inUcastPkts']) 94 | new_metrics[str(key)]['mcast_in'] = format_number(counters['inMulticastPkts']) 95 | new_metrics[str(key)]['bcast_in'] = format_number(counters['inBroadcastPkts']) 96 | new_metrics[str(key)]['bytes_out'] = format_number(counters['outOctets']) 97 | new_metrics[str(key)]['bytes_in'] = format_number(counters['inOctets']) 98 | new_metrics[str(key)]['in_discards'] = format_number(counters['inDiscards']) 99 | new_metrics[str(key)]['in_errors'] = format_number(counters['totalInErrors']) 100 | new_metrics[str(key)]['out_discards'] = format_number(counters['outDiscards']) 101 | new_metrics[str(key)]['out_errors'] = format_number(counters['totalOutErrors']) 102 | 103 | end_fetch = time.time() 104 | 105 | fetch_time = end_fetch - start_fetch 106 | 107 | # Emit metrics 108 | global LAST_METRICS 109 | if LAST_METRICS['time'] != 0: 110 | time_diff = new_time - LAST_METRICS['time'] 111 | for ifname in new_metrics: 112 | ifname_pretty = ifname.replace("Ethernet", "et").replace("Management", "ma").replace("Vlan", "vlan") 113 | 114 | for metric in new_metrics[ifname]: 115 | 116 | try: 117 | diff = (new_metrics[ifname][metric] - LAST_METRICS['data'][ifname][metric]) / time_diff 118 | 119 | except KeyError, e: 120 | pass 121 | 122 | # If difference is negative counters have rolled. 123 | if ( diff >= 0 ): 124 | os.system( gmetric_cmd + " -t float -n " + ifname_pretty + "_" + metric + " -u /sec -v " + str(diff)) 125 | 126 | # update cache 127 | LAST_METRICS = { 128 | 'time': time.time(), 129 | 'data': copy.deepcopy(new_metrics) 130 | } 131 | 132 | time.sleep(30) 133 | 134 | def main(): 135 | parser = argparse.ArgumentParser() 136 | 137 | parser.add_argument('--username', '-u', 138 | default='eapi', 139 | help='Specifies the eAPI username') 140 | 141 | parser.add_argument('--password', '-p', 142 | default='password', 143 | help='Specifies the eAPI password') 144 | 145 | parser.add_argument('--hostname', 146 | default='localhost', 147 | help='Specifies the hostname of the EOS node') 148 | 149 | parser.add_argument('--protocol', 150 | default='https', 151 | choices=['http', 'https'], 152 | help='Specifies the protocol to use (default=https)') 153 | 154 | parser.add_argument('--port', 155 | default=0, 156 | type=int, 157 | help='Specifies the port to use (default=443)') 158 | 159 | args = parser.parse_args() 160 | 161 | url = make_url(args.hostname, args.username, args.password, 162 | args.protocol, args.port) 163 | 164 | try: 165 | connection = make_connection(url) 166 | start(connection) 167 | except KeyboardInterrupt: 168 | parser.exit() 169 | except Exception as exc: 170 | parser.error(exc) 171 | 172 | if __name__ == '__main__': 173 | main() 174 | -------------------------------------------------------------------------------- /ARCHIVE/mysql/mysql_gmetric.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ### $Header: /var/lib/cvs/ops/ganglia/mysql_gmetric.sh,v 1.3 2006/07/11 17:51:13 ben Exp $ 4 | 5 | ### this script is a replacement for mysql_metrics.sh 6 | ### instead of just returning a single metric, this 7 | ### script gets all three and submits them so you 8 | ### only hit mysqladmin once per minute instead of 9 | ### 3 times 10 | 11 | ### Copyright Simply Hired, Inc. 2006 12 | ### License to use, modify, and distribute under the GPL 13 | ### http://www.gnu.org/licenses/gpl.txt 14 | 15 | VERSION=1.5 16 | 17 | GMETRIC="/usr/bin/gmetric" 18 | GMETRIC_ARGS="-c /etc/gmond.conf" 19 | STATEFILE="/var/lib/ganglia/metrics/mysql.stats" 20 | MYSQL_SOCKFILE="/var/lib/mysql/mysql.sock" 21 | ERROR_NOTROOT="/tmp/mysql_gmetric_notroot" 22 | ERROR_NOSOCKFILE="/tmp/mysql_gmetric_nosockfile" 23 | ERROR_CANT_CONNECT="/tmp/mysql_gmetric_cant_connect" 24 | ERROR_CREATE="/tmp/mysql_gmetric_create_statefile_failed" 25 | 26 | # this script requires a user with usage and 'replication slave' privs. if you 27 | # don't check any slaves, you can leave out repl privs it will silently fail 28 | # the slave test and not report the metric. 29 | # usage means 'no privs' so having it on *.* doesn't expose anything. *.* is 30 | # required for replication client. 31 | ### grant USAGE on *.* to 'ganglia'@'localhost' identified by 'xxxxx'; 32 | ### grant REPLICATION CLIENT on *.* to 'ganglia'@'localhost' identified by 'xxxxx'; 33 | MYSQL_USER="ganglia" 34 | MYSQL_PASS="xxxxx" 35 | 36 | date=`date +%s` 37 | 38 | if [ $UID -ne 0 ] 39 | then 40 | if [ -e $ERROR_NOTROOT ] ; then exit 1; fi 41 | echo "Error: this script must be run as root." 42 | touch $ERROR_NOTROOT 43 | exit 1 44 | fi 45 | rm -f $ERROR_NOTROOT 46 | 47 | if [ "x$1" == "x-h" ] 48 | then 49 | echo "Usage: submit_mysql_gmetric.sh [--clean]" 50 | echo " --clean delete all tmp files" 51 | exit 0 52 | fi 53 | 54 | if [ "x$1" == "x--clean" ] 55 | then 56 | rm -f $STATEFILE $ERROR_NOTROOT $ERROR_NOSOCKFILE $ERROR_CANT_CONNECT $ERROR_CREATE 57 | retval=$? 58 | if [ $retval -ne 0 ] 59 | then 60 | echo "failed to clean up." 61 | exit 1 62 | else 63 | echo "All cleaned up." 64 | exit 0 65 | fi 66 | fi 67 | 68 | # if the GMETRIC program isn't installed, compain 69 | if [ ! -e $GMETRIC ] 70 | then 71 | if [ -e $ERROR_GMETRIC ] ; then exit 1; fi 72 | echo "" 73 | echo "Error: GMETRIC doesn't seem to be installed." 74 | echo "$GMETRIC doesn't exist." 75 | echo "" 76 | touch $ERROR_GMETRIC 77 | exit 1 78 | fi 79 | 80 | # if the sockfile doesn't exist, mysql probably isn't running. 81 | if [ ! -e $MYSQL_SOCKFILE ] 82 | then 83 | if [ -e $ERROR_NOSOCKFILE ] ; then exit 1 ; fi 84 | echo "Mysql sock file ($MYSQL_SOCKFILE) doesn't exist." 85 | echo "This usually implies that mysql isn't running." 86 | echo "I'm going to stop reporting until the sock file comes back." 87 | touch $ERROR_NOSOCKFILE 88 | exit 1 89 | fi 90 | 91 | # if we passed the sockfile test, but $ERROR_NOSOCKFILE exists, it was probably just started. 92 | if [ -e $ERROR_NOSOCKFILE ] 93 | then 94 | echo "The sock file has returned. I'm starting up again." 95 | rm $ERROR_NOSOCKFILE 96 | fi 97 | 98 | exec 3>&2 99 | exec 2>/dev/null 100 | string=`mysqladmin --connect_timeout=15 -u $MYSQL_USER -p${MYSQL_PASS} status` 101 | retval=$? 102 | slavestr=`mysql --connect_timeout=15 -u $MYSQL_USER -p${MYSQL_PASS} -e "show slave status\G" | grep "Seconds_Behind_Master"` 103 | exec 2>&3 104 | exec 3>&- 105 | 106 | if [ $retval -ne 0 ] 107 | then 108 | if [ -e $ERROR_CANT_CONNECT ] ; then exit 1 ; fi 109 | echo "Even though the sock file exists, I can't connect to mysql." 110 | echo "Bummer. " 111 | touch $ERROR_CANT_CONNECT 112 | exit 1 113 | fi 114 | 115 | 116 | threads=`echo $string | sed 's/.*Threads: \([0-9]*\) .*/\1/'` 117 | queries=`echo $string | sed -e "s/.*Questions: \([0-9]*\) .*/\1/"` 118 | slow_q=`echo $string | sed -e "s/.*Slow queries: \([0-9]*\) .*/\1/"` 119 | # slave_sec might be empty if this db host is not a slave 120 | slave_sec=`echo $slavestr | sed -e "s/.*Seconds_Behind_Master: \([0-9]*\).*/\1/"` 121 | 122 | # save and turn off /STDERR for th estatefile tests 123 | exec 3>&2 124 | exec 2>/dev/null 125 | 126 | # if the statefile doesn't exist, we either havn't 127 | # run yet or there's something bigger wrong. 128 | if [ ! -e $STATEFILE ] 129 | then 130 | if [ ! -d `dirname $STATEFILE` ] 131 | then 132 | mkdir -p `dirname $STATEFILE` 133 | fi 134 | echo "$date $queries $slow_q" > $STATEFILE 135 | $iostat -d | tail +4 >> $STATEFILE 136 | if [ ! -e $STATEFILE ] 137 | then 138 | # if it didn't exist and we couldn't create 139 | # it, we should just scream bloody murder and die. 140 | # only scream once though... 141 | if [ -e $ERROR_CREATE ] 142 | then 143 | exit 1 144 | fi 145 | echo "" 146 | echo "ERROR: couldn't create $STATEFILE" 147 | echo "" 148 | touch $ERROR_CREATE 149 | exit 1 150 | fi 151 | echo "Created statefile. Exitting." 152 | exit 0 153 | fi 154 | 155 | # restore stderr 156 | exec 2>&3 157 | exec 3>&- 158 | 159 | old_stats=(`cat $STATEFILE`) 160 | old_date=${old_stats[0]} 161 | old_queries=${old_stats[1]} 162 | old_slow_q=${old_stats[2]} 163 | 164 | echo "$date $queries $slow_q" > $STATEFILE 165 | 166 | time_diff=$(($date - $old_date)) 167 | queries_diff=$(($queries - $old_queries)) 168 | slow_q_diff=$((slow_q - $old_slow_q)) 169 | 170 | if [ $time_diff -eq 0 ] 171 | then 172 | if [ -e $ERROR_TIMEDIFF ] ; then exit 1 ; fi 173 | echo "something is broken." 174 | echo "time_diff is 0." 175 | touch $ERROR_TIMEDIFF 176 | exit 1 177 | fi 178 | 179 | if [ $queries_diff -le 0 ] ; then queries_diff=0 ; fi 180 | if [ $slow_q_diff -le 0 ] ; then slow_q_diff=0 ; fi 181 | 182 | 183 | #queries per second 184 | qps=`echo "scale=3;$queries_diff / $time_diff" | bc` 185 | sqps=`echo "scale=3;$slow_q_diff / $time_diff" | bc` 186 | 187 | $GMETRIC $GMETRIC_ARGS --name="mysql_threads" --value=${threads} --type=int16 188 | $GMETRIC $GMETRIC_ARGS --name="mysql_queries" --value=${qps} --type=float --units="qps" 189 | $GMETRIC $GMETRIC_ARGS --name="mysql_slow_queries" --value=${sqps} --type=float --units="qps" 190 | 191 | # if slave sec exists, i.e. this mysqld host is a slave. 192 | # If it's not, don't submit the metric 193 | if [ -n "${slave_sec}" ] 194 | then 195 | $GMETRIC $GMETRIC_ARGS --name="mysql_slave" --value="${slave_sec}" --type="int16" --units="sec" 196 | fi 197 | -------------------------------------------------------------------------------- /database/postgres/ganglia_postgres.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | # Queries a PostgreSQL database and publishes statistics to Ganglia using gmetric. 3 | # 4 | # == Install Dependencies == 5 | # 6 | # sudo apt-get install ruby ganglia-monitor build-essential 7 | # 8 | # == Usage == 9 | # 10 | # postgres_gmetric.rb 11 | # 12 | # 13 | # Author: Vladimir Vuksan 14 | # Date: 2011-09-07 15 | # Changelog: 16 | # - Modified the script to treat counters correctly ie. calculate per second rates 17 | # - Added group to gmetric. Needs gmetric 3.2.0+ 18 | # 19 | # Author: Nicolas Marchildon 20 | # Date: 2009-07 21 | # http://github.com/elecnix/postgres_gmetric 22 | require 'optparse' 23 | require 'yaml' 24 | 25 | (puts "FATAL: gmetric not found" ; exit 1) if !File.exists? "/usr/bin/gmetric" 26 | 27 | $options = {} 28 | optparse = OptionParser.new do |opts| 29 | opts.banner = "Usage: postgres_gmetric.rb [-U ] [-t ] " 30 | 31 | # Define the options, and what they do 32 | $options[:verbose] = false 33 | opts.on( '-v', '--verbose', 'Output collected data' ) do 34 | $options[:verbose] = true 35 | end 36 | 37 | $options[:user] = ENV['LOGNAME'] 38 | opts.on( '-U', '--user USER', 'Connect as USER' ) do |user| 39 | $options[:user] = user 40 | end 41 | 42 | $options[:tmpfile] = "/tmp/postgres_metrics_A12345" 43 | opts.on( '-t', '--tmpfile TMPFILE', "Location to store temporary checkpoint file. Defaults to #{$options[:tmp]}" ) do |tmpfile| 44 | $options[:tmpfile] = tmpfile 45 | end 46 | 47 | opts.on( '-h', '--help', 'Display this screen' ) do 48 | puts opts 49 | exit 50 | end 51 | end 52 | optparse.parse! 53 | 54 | $options[:database]=ARGV[0] 55 | 56 | (puts "Missing database"; exit 1) if $options[:database].empty? 57 | (puts "Missing user"; exit 1) if $options[:user].nil? 58 | 59 | if FileTest.exists?($options[:tmpfile]) 60 | 61 | puts "Loading #{$options[:tmpfile]}" if $options[:verbose] 62 | $metrics = YAML.load_file( $options[:tmpfile] ) 63 | 64 | else 65 | 66 | $metrics = {} 67 | 68 | end 69 | 70 | def exec_query(sql) 71 | `psql -U #{$options[:user]} #{$options[:database]} -A -c "#{sql}"` 72 | end 73 | 74 | def publish(sql, type = "counter") 75 | data=exec_query(sql) 76 | lines=data.split("\n") 77 | values=lines[1].split('|') 78 | col=0 79 | lines[0].split('|').each do |colname| 80 | # If value is absolute 81 | if type == "absolute" 82 | value=values[col] 83 | puts "#{colname}=#{value}" if $options[:verbose] 84 | `gmetric --name "pg_#{colname}" -g postgres --value #{value} --type float --dmax=240` 85 | else 86 | if ! $metrics[colname].nil? 87 | time_diff = Time.now - $metrics[colname]["timestamp"] 88 | value_diff = values[col].to_f - $metrics[colname]["value"] 89 | value = value_diff / time_diff 90 | 91 | puts "#{colname}=#{value}" if $options[:verbose] 92 | if value > 0 93 | `gmetric --name "pg_#{colname}" -g postgres --value #{value} --units 'per second' --type float --dmax=240` 94 | end 95 | end 96 | 97 | $metrics[colname] = { "value" => values[col].to_f, "timestamp" => Time.now } 98 | end 99 | col=col+1 100 | end 101 | end 102 | 103 | 104 | publish "select * from pg_stat_bgwriter;" 105 | publish "select sum(numbackends) as backends, sum(xact_commit) as xact_commit, sum(xact_rollback) as xact_rollback, sum(blks_read) as blks_read, sum(blks_hit) as blks_hit, sum(tup_returned) as tup_returned, sum(tup_fetched) as tup_fetched, sum(tup_inserted) as tup_inserted, sum(tup_updated) as tup_updated, sum(tup_deleted) as tup_deleted from pg_stat_database;" 106 | publish "select sum(seq_scan) as seq_scan, sum(seq_tup_read) as seq_tup_read, sum(idx_scan) as idx_scan, sum(idx_tup_fetch) as idx_tup_fetch, sum(n_tup_ins) as n_tup_ins, sum(n_tup_upd) as n_tup_upd, sum(n_tup_del) as n_tup_del, sum(n_tup_hot_upd) as n_tup_hot_upd, sum(n_live_tup) as n_live_tup, sum(n_dead_tup) as n_dead_tup from pg_stat_all_tables;" 107 | publish "select sum(heap_blks_read) as heap_blks_read, sum(heap_blks_hit) as heap_blks_hit, sum(idx_blks_read) as idx_blks_read_tbl, sum(idx_blks_hit) as idx_blks_hit_tbl, sum(toast_blks_read) as toast_blks_read, sum(toast_blks_hit) as toast_blks_hit, sum(tidx_blks_read) as tidx_blks_read, sum(tidx_blks_hit) as tidx_blks_hit from pg_statio_all_tables;" 108 | publish "select sum(idx_blks_read) as idx_blks_read, sum(idx_blks_hit) as idx_blks_hit from pg_statio_all_indexes;" 109 | publish "select COALESCE(sum(blks_read), 0) as seq_blks_read, COALESCE(sum(blks_hit), 0) as seq_blks_hit from pg_statio_all_sequences;" 110 | # publish check_postgres bloat 111 | publish("select sum(pg_database_size(d.oid)) as size_database FROM pg_database d ORDER BY 1 DESC LIMIT 10;", "absolute") 112 | publish("select sum(pg_relation_size(c.oid)) as size_table FROM pg_class c, pg_namespace n WHERE (relkind = 'r') AND n.oid = c.relnamespace;", "absolute") 113 | publish("select sum(pg_relation_size(c.oid)) as size_index FROM pg_class c, pg_namespace n WHERE (relkind = 'i') AND n.oid = c.relnamespace;", "absolute") 114 | publish("select sum(pg_relation_size(c.oid)) as size_relation FROM pg_class c, pg_namespace n WHERE (relkind = 'i' OR relkind = 'r') AND n.oid = c.relnamespace;", "absolute") 115 | publish("select count(*) as backends_waiting from pg_stat_activity where waiting = 't';", "absolute") 116 | publish("select (SELECT count(*) FROM pg_locks) as locks", "absolute") 117 | publish("select COALESCE(max(COALESCE(ROUND(EXTRACT(epoch FROM now()-query_start)),0)),0) as query_time_max FROM pg_stat_activity WHERE current_query <> '';", "absolute") 118 | publish("select COALESCE(max(COALESCE(ROUND(EXTRACT(epoch FROM now()-query_start)),0)),0) as query_time_idle_in_txn FROM pg_stat_activity WHERE current_query = ' in transaction';", "absolute") 119 | publish("select max(COALESCE(ROUND(EXTRACT(epoch FROM now()-xact_start)),0)) as txn_time_max FROM pg_stat_activity WHERE xact_start IS NOT NULL;", "absolute") 120 | publish("select max(age(datfrozenxid)) as datfrozenxid_age FROM pg_database WHERE datallowconn;", "absolute") 121 | publish("select count(*) as wal_files FROM pg_ls_dir('pg_xlog') WHERE pg_ls_dir ~ E'^[0-9A-F]{24}$';", "absolute") 122 | ["vacuum", "analyze"].each do |type| 123 | ["auto", ""].each do |auto| 124 | criteria = (auto == "auto") ? "pg_stat_get_last_auto#{type}_time(c.oid)" : "GREATEST(pg_stat_get_last_#{type}_time(c.oid), pg_stat_get_last_auto#{type}_time(c.oid))" 125 | publish("select max(CASE WHEN v IS NULL THEN -1 ELSE round(extract(epoch FROM now()-v)) END) as #{auto}#{type}_age FROM (SELECT nspname, relname, #{criteria} AS v FROM pg_class c, pg_namespace n WHERE relkind = 'r' AND n.oid = c.relnamespace AND n.nspname <> 'information_schema' ORDER BY 3) AS foo;", "absolute") 126 | end 127 | end 128 | 129 | # Write counters to a file 130 | File.open($options[:tmpfile], 'w') {|f| f.write(YAML::dump($metrics)) } 131 | -------------------------------------------------------------------------------- /zfs/gmetric-zpool-status.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions are met: 5 | # * Redistributions of source code must retain the above copyright 6 | # notice, this list of conditions and the following disclaimer. 7 | # * Redistributions in binary form must reproduce the above copyright 8 | # notice, this list of conditions and the following disclaimer in the 9 | # documentation and/or other materials provided with the distribution. 10 | # 11 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 12 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 13 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 14 | # DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE FOR ANY 15 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 16 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 17 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 18 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 19 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 20 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 21 | 22 | # Targeting python2.6 for RHEL6 compatibility 23 | 24 | import collections 25 | import optparse 26 | import logging 27 | import subprocess 28 | import sys 29 | 30 | # Numeric for easy > 0 or == 0 numeric checks 31 | ZPOOL_HEALTH = { 32 | 'ONLINE': 0, 33 | 'DEGRADED': 1, 34 | 'FAULTED': 2, 35 | 'OFFLINE': 3, 36 | 'REMOVED': 4, 37 | 'UNAVAIL': 5 38 | } 39 | 40 | METRIC = collections.namedtuple('Metric', ['name', 'value', 'type', 'desc']) 41 | 42 | 43 | # Backported check_output from 2.6 44 | # From: https://gist.github.com/edufelipe/1027906 45 | def check_output(*popenargs, **kwargs): 46 | r"""Run command with arguments and return its output as a byte string. 47 | 48 | Backported from Python 2.7 as it's implemented as pure python on stdlib. 49 | 50 | >>> check_output(['/usr/bin/python', '--version']) 51 | Python 2.6.2 52 | """ 53 | process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) 54 | output, unused_err = process.communicate() 55 | retcode = process.poll() 56 | if retcode: 57 | cmd = kwargs.get("args") 58 | if cmd is None: 59 | cmd = popenargs[0] 60 | error = subprocess.CalledProcessError(retcode, cmd) 61 | error.output = output 62 | raise error 63 | return output 64 | 65 | 66 | def send_metric(metric, dry_run=False): 67 | group = 'zpool' 68 | cmd = ('/usr/bin/gmetric --name=%s --value=%s --type=%s --group=%s --tmax=90 --dmax=600 --desc="%s"' % 69 | (metric.name, str(metric.value), metric.type, group, metric.desc)) 70 | log.debug('Running cmd: %s' % cmd) 71 | if dry_run is False: 72 | try: 73 | subprocess.check_call(cmd, shell=True) 74 | except subprocess.CalledProcessError as e: 75 | log.warn('gmetric sending failed with %i, cmd was: %s' % (e.returncode, cmd)) 76 | 77 | 78 | def zpool_list(): 79 | """ Get the list of all zpools. Capture capacity & health along 80 | the way. """ 81 | out = check_output(['/sbin/zpool', 'list']) 82 | lines = out.splitlines() 83 | del lines[0] 84 | zpools = [] 85 | # NAME SIZE ALLOC FREE CAP DEDUP HEALTH ALTROOT 86 | for line in lines: 87 | columns = map(lambda s: s.strip(), line.split()) 88 | zpool = {} 89 | zpool['name'] = columns[0] 90 | zpool['capacity'] = float(columns[4].strip('%')) 91 | health_s = columns[6] 92 | if health_s in ZPOOL_HEALTH: 93 | zpool['health'] = ZPOOL_HEALTH[health_s] 94 | else: 95 | zpool['health'] = 100 96 | zpools.append(zpool) 97 | return zpools 98 | 99 | 100 | def zpool_find_errors(pool_name): 101 | """ There is no property that corresponds cleanly to the errors 102 | output line from `zpool status`. Instead the full status command 103 | is run and anything other than 'no errors' is considered bad. """ 104 | out = check_output(['/sbin/zpool', 'status', pool_name]) 105 | has_errors = 1 106 | for line in out.splitlines(): 107 | if 'errors:' in line: 108 | msg = line.split('errors:')[1].strip() 109 | if msg == 'No known data errors': 110 | has_errors = 0 111 | break 112 | return has_errors 113 | 114 | 115 | def make_metrics(zpool, has_errors): 116 | metrics = [] 117 | metrics.append(METRIC('zpool.%s.capacity' % zpool['name'], 118 | zpool['capacity'], 'double', 119 | 'Percentage of pool space used.')) 120 | metrics.append(METRIC('zpool.%s.health' % zpool['name'], 121 | zpool['health'], 'uint8', 122 | 'The current health of the pool')) 123 | metrics.append(METRIC('zpool.%s.errors' % zpool['name'], 124 | has_errors, 'uint8', 125 | 'non-zero indicates errors')) 126 | return metrics 127 | 128 | 129 | #### Main and Friends 130 | 131 | def setup_logging(level): 132 | global log 133 | 134 | log = logging.getLogger('zpool-status') 135 | formatter = logging.Formatter(' | '.join(['%(asctime)s', '%(name)s', 136 | '%(levelname)s', '%(message)s'])) 137 | ch = logging.StreamHandler() 138 | ch.setFormatter(formatter) 139 | log.addHandler(ch) 140 | lmap = { 141 | 'CRITICAL': logging.CRITICAL, 142 | 'ERROR': logging.ERROR, 143 | 'WARNING': logging.WARNING, 144 | 'INFO': logging.INFO, 145 | 'DEBUG': logging.DEBUG, 146 | 'NOTSET': logging.NOTSET 147 | } 148 | log.setLevel(lmap[level]) 149 | 150 | 151 | def parse_args(argv): 152 | parser = optparse.OptionParser() 153 | parser.add_option('--log-level', 154 | action='store', dest='log_level', default='WARNING', 155 | choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 156 | 'DEBUG', 'NOTSET']) 157 | parser.add_option('--dry-run', 158 | action='store_true', dest='dry_run', default=False, 159 | help='actually send data via gmetric') 160 | return parser.parse_args(argv) 161 | 162 | 163 | def main(argv): 164 | (opts, args) = parse_args(argv) 165 | setup_logging(opts.log_level) 166 | zpools = zpool_list() 167 | metrics = [] 168 | for zpool in zpools: 169 | has_errors = zpool_find_errors(zpool['name']) 170 | metrics.extend(make_metrics(zpool, has_errors)) 171 | for metric in metrics: 172 | send_metric(metric, dry_run=opts.dry_run) 173 | 174 | 175 | if __name__ == '__main__': 176 | main(sys.argv[1:]) 177 | -------------------------------------------------------------------------------- /memcached/memcached.pl/ganglia_memcached.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | ############################################################# 4 | # This is memcached collection script for Ganglia 5 | # Author: Vladimir Vuksan http://vuksan.com/linux/ 6 | # Based off mymemcalc http://code.google.com/p/mymemcalc/ 7 | # 8 | # You need to install XML::Simple and Cache::Memcached 9 | # modules off CPAN ie. 10 | # perl -MCPAN -e 'install XML::Simple' 11 | # perl -MCPAN -e 'Cache::Memcached' 12 | # 13 | ############################################################# 14 | use strict; 15 | use warnings FATAL => 'all'; 16 | use English qw ( -no_match_vars ); 17 | use XML::Simple; 18 | use Cache::Memcached; 19 | use Getopt::Long; 20 | 21 | #################################################################################### 22 | # YOU MAY NEED TO MODIFY FOLLOWING 23 | # Adjust this variables appropriately. Feel free to add any options to gmetric_command 24 | # necessary for running gmetric in your environment to gmetric_options e.g. -c /etc/gmond.conf 25 | #################################################################################### 26 | my $gmetric_exec = "/usr/bin/gmetric"; 27 | my $gmetric_options = " -d 120 "; 28 | #################################################################################### 29 | my $gmetric_command = $gmetric_exec . $gmetric_options; 30 | my $host = "127.0.0.1"; 31 | my $port = "11211"; 32 | my $opt_help; 33 | my $old_time; 34 | #################################################################################### 35 | # Where to store the last stats file 36 | #################################################################################### 37 | my $tmp_dir_base="/tmp/memcached_stats"; 38 | my $tmp_stats_file=$tmp_dir_base . "/" . "memcached_stats"; 39 | 40 | # If the tmp directory doesn't exit create it 41 | if ( ! -d $tmp_dir_base ) { 42 | system("mkdir -p $tmp_dir_base"); 43 | } 44 | 45 | 46 | GetOptions("help" => \$opt_help, #flag 47 | "h=s" => \$host, 48 | "p=s" => \$port 49 | ); 50 | 51 | if(defined($opt_help)){ 52 | print <<'END_USAGE' 53 | Usage: ganglia_memcached.pl [OPTION]... 54 | Collect memcached statistics 55 | 56 | Options: 57 | -help Usage information 58 | -h Hostname of memcached. If not supplied defaults to 127.0.0.1 59 | -p Port of memcached. If not supplied defaults to 11211 60 | END_USAGE 61 | ; 62 | exit; 63 | } 64 | 65 | my $debug = 0; 66 | 67 | # Set up the memcache pool 68 | my @mp; 69 | 70 | push(@mp, "$host:$port" ); 71 | 72 | my $mcache = new Cache::Memcached(); 73 | $mcache->set_servers(\@mp); 74 | 75 | #use Data::Dumper; 76 | 77 | # Get the stats 78 | my %stats=(); 79 | my %old_stats; 80 | %stats = %{$mcache->stats('misc')}; 81 | 82 | #print Dumper (%stats); 83 | 84 | 85 | ################################################################################# 86 | # Decide if we need to use a metric suffix 87 | ################################################################################# 88 | my $metric_suffix; 89 | # If port metric is not 11211 append the port to metric name 90 | if ( $port != "11211" ) { 91 | $metric_suffix = "_" . $port; 92 | } else { 93 | $metric_suffix = ""; 94 | } 95 | 96 | ################################################################################# 97 | # Memcache Hit Ratio 98 | ################################################################################# 99 | my $hits = $stats{'total'}->{'get_hits'}; 100 | my $misses = $stats{'total'}->{'get_misses'}; 101 | my $hitsplusmisses = $hits + $misses; 102 | my $hit_ratio; 103 | if ( $hitsplusmisses == 0 ) { 104 | $hit_ratio = 0.0; 105 | } else { 106 | $hit_ratio = $hits / $hitsplusmisses; 107 | } 108 | 109 | print 'memcache_hit_ratio: ' . substr( ( $hit_ratio * 100 ), 0, 5 ) . "%\n"; 110 | 111 | if ( $debug == 0 ) { 112 | system($gmetric_command . " -u ratio -tfloat -n memcache_hit_ratio".$metric_suffix." -v " . $hit_ratio); 113 | } 114 | 115 | ################################################################################# 116 | # Calculate Memcache Fill Ratio 117 | ################################################################################# 118 | my $total_bytes = 0; 119 | my $used_bytes = 0; 120 | my $curr_connections = 0; 121 | my $curr_items = 0; 122 | my $fill_ratio = 0; 123 | my $evictions = 0; 124 | my $bytes_read = 0; 125 | my $bytes_written = 0; 126 | my $new_time = time; 127 | 128 | foreach my $host ( keys %{ $stats{ 'hosts' } } ) { 129 | 130 | $total_bytes += $stats { 'hosts' }{ $host }{ 'misc' }->{ 'limit_maxbytes' }; 131 | $used_bytes += $stats { 'hosts' }{ $host }{ 'misc' }->{ 'bytes' }; 132 | $fill_ratio = $used_bytes / $total_bytes; 133 | $curr_connections = $stats { 'hosts' }{ $host }{ 'misc' }->{ 'curr_connections' }; 134 | $curr_items = $stats { 'hosts' }{ $host }{ 'misc' }->{ 'curr_items' }; 135 | # 136 | $evictions = $stats { 'hosts' }{ $host }{ 'misc' }->{ 'evictions' }; 137 | $bytes_read = $stats { 'hosts' }{ $host }{ 'misc' }->{ 'bytes_read' }; 138 | $bytes_written = $stats { 'hosts' }{ $host }{ 'misc' }->{ 'bytes_written' }; 139 | } 140 | 141 | 142 | if ( $debug == 0 ) { 143 | system($gmetric_command . " -u ratio -tfloat -n memcache_fill_ratio".$metric_suffix." -v " . $fill_ratio ); 144 | system($gmetric_command . " -u connections -tfloat -n memcache_curr_connections".$metric_suffix." -v " . $curr_connections ); 145 | system($gmetric_command . " -u items -tfloat -n memcache_curr_items".$metric_suffix." -v " . $curr_items ); 146 | } 147 | 148 | print 'memcache_fill_ratio: ' . substr( ( ( $fill_ratio ) ) * 100, 0, 5 ) . "%\n"; 149 | print "memcache_items: " . $curr_items . "\n"; 150 | print "memcache_curr_conn: " . $curr_connections . "\n"; 151 | 152 | sub write_stats_file () { 153 | 154 | open(NEWSTATUS, "> $tmp_stats_file"); 155 | 156 | print NEWSTATUS "evictions=" . $evictions . "\n"; 157 | print NEWSTATUS "bytes_read=" .$bytes_read . "\n"; 158 | print NEWSTATUS "bytes_written=" . $bytes_written . "\n"; 159 | 160 | close(NEWSTATUS); 161 | 162 | } 163 | 164 | ############################################################################### 165 | # Now I need to calculate counter metrics such as bytes in/out and evictions 166 | # We need to store a baseline with statistics. If it's not there let's dump 167 | # it into a file. Don't do anything else 168 | ############################################################################### 169 | if ( ! -f $tmp_stats_file ) { 170 | print "Creating baseline. No output this cycle\n"; 171 | write_stats_file; 172 | } else { 173 | 174 | ###################################################### 175 | # Let's read in the file from the last poll 176 | open(OLDSTATUS, "< $tmp_stats_file"); 177 | 178 | while() 179 | { 180 | if (/(.*)=(.*)/) { 181 | $old_stats{$1}=${2}; 182 | } 183 | } 184 | 185 | # Get the time stamp when the stats file was last modified 186 | $old_time = (stat $tmp_stats_file)[9]; 187 | close(OLDSTATUS); 188 | 189 | if ( $debug == 0 ) { 190 | # Make sure we are not getting negative numbers since they would indicate server was restarted 191 | if ( $bytes_read >= $old_stats{'bytes_read'} ) { 192 | my $time_diff = $new_time - $old_time; 193 | system($gmetric_command . " -u number -tuint32 -n memcache_evictions".$metric_suffix." -v " . eval($evictions - $old_stats{'evictions'}) ); 194 | system($gmetric_command . " -u bytes/s -tfloat -n memcache_bytes_read".$metric_suffix." -v " . eval ( ($bytes_read - $old_stats{'bytes_read'}) / $time_diff) ); 195 | system($gmetric_command . " -u bytes/s -tfloat -n memcache_bytes_written".$metric_suffix." -v " . eval( ($bytes_written - $old_stats{'bytes_written'}) / $time_diff ) ); 196 | } 197 | write_stats_file; 198 | 199 | } 200 | 201 | 202 | } 203 | -------------------------------------------------------------------------------- /disk/disk_gmetric.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ### $Header: /var/lib/cvs/ops/ganglia/disk_gmetric.sh,v 1.5 2007/11/30 17:29:27 ben Exp $ 4 | 5 | ### this script reports disk metrics to ganglia. 6 | ### It should be called from cron every n minutes. 7 | ### It will report blocks per second on each disk, 8 | ### and will automatically adjust for whatever 9 | ### timeframe it is called 10 | 11 | ### Copyright Simply Hired, Inc. 2006 12 | ### License to use, modify, and distribute under the GPL 13 | ### http://www.gnu.org/licenses/gpl.txt 14 | 15 | VERSION=1.5 16 | 17 | GMETRIC="/usr/bin/gmetric" 18 | GMETRIC_ARGS="-c /etc/gmond.conf" 19 | STATEFILE="/var/lib/ganglia/metrics/io.stats" 20 | date=`date +%s` 21 | iostat="/usr/bin/iostat" 22 | 23 | ERROR_CREATE="/tmp/disk_gmetric_create_statefile_failed" 24 | ERROR_IOSTAT="/tmp/disk_gmetric_no_iostat" 25 | ERROR_DEVNAMES="/tmp/disk_gmetric_bad_devname" 26 | ERROR_DEVNAMES2="/tmp/disk_gmetric_bad_devname_didnt_fix" 27 | ERROR_GMETRIC="/tmp/disk_gmetric_no_gmetric" 28 | ERROR_TIMEDIFF="/tmp/disk_gmetric_timediff" 29 | ERROR_NOTROOT="/tmp/disk_gmetric_notroot" 30 | 31 | if [ $UID -ne 0 ] 32 | then 33 | if [ -e $ERROR_NOTROOT ] ; then exit 1; fi 34 | echo "Error: this script must be run as root." 35 | touch $ERROR_NOTROOT 36 | exit 1 37 | fi 38 | rm -f $ERROR_NOTROOT 39 | 40 | if [ "x$1" == "x-h" ] 41 | then 42 | echo "Usage: disk_gmetric.sh [--clean]" 43 | echo " --clean delete all tmp files" 44 | exit 0 45 | fi 46 | 47 | if [ "x$1" == "x--clean" ] 48 | then 49 | rm -f $ERROR_CREATE $ERROR_IOSTAT $ERROR_DEVNAME $ERROR_DEVNAME2 $ERROR_GMETRIC $ERROR_TIMEDIFF $ERROR_NOTROOT $STATEFILE 50 | retval=$? 51 | if [ $retval -ne 0 ] 52 | then 53 | echo "failed to clean up." 54 | exit 1 55 | else 56 | echo "All cleaned up." 57 | exit 0 58 | fi 59 | fi 60 | 61 | # save and turn off /STDERR for th estatefile tests 62 | exec 3>&2 63 | exec 2>/dev/null 64 | 65 | # if the GMETRIC program isn't installed, compain 66 | if [ ! -e $GMETRIC ] 67 | then 68 | if [ -e $ERROR_GMETRIC ] ; then exit 1; fi 69 | echo "" 70 | echo "Error: GMETRIC doesn't seem to be installed." 71 | echo "$GMETRIC doesn't exist." 72 | echo "" 73 | touch $ERROR_GMETRIC 74 | exit 1 75 | fi 76 | 77 | # if the iostat program isn't installed, compain 78 | if [ ! -e $iostat ] 79 | then 80 | if [ -e $ERROR_IOSTAT ] 81 | then 82 | exit 1 83 | fi 84 | echo "" 85 | echo "Error: iostat doesn't seem to be installed." 86 | echo "$iostat doesn't exist." 87 | echo "" 88 | touch $ERROR_IOSTAT 89 | exit 1 90 | fi 91 | 92 | # if the statefile doesn't exist, we either havn't 93 | # run yet or there's something bigger wrong. 94 | if [ ! -e $STATEFILE ] 95 | then 96 | if [ ! -d `dirname $STATEFILE` ] 97 | then 98 | mkdir -p `dirname $STATEFILE` 99 | fi 100 | echo "$date" > $STATEFILE 101 | $iostat -d | tail +4 >> $STATEFILE 102 | if [ ! -e $STATEFILE ] 103 | then 104 | # if it didn't exist and we couldn't create 105 | # it, we should just scream bloody murder and die. 106 | # only scream once though... 107 | if [ -e $ERROR_CREATE ] 108 | then 109 | exit 1 110 | fi 111 | echo "" 112 | echo "ERROR: couldn't create $STATEFILE" 113 | echo "" 114 | touch $ERROR_CREATE 115 | exit 1 116 | fi 117 | echo "Created statefile. Exitting." 118 | exit 0 119 | fi 120 | 121 | # restore stderr 122 | exec 2>&3 123 | exec 3>&- 124 | 125 | # this script uses iostat (part of the sysstat packag) 126 | # to retrieve disk metrics 127 | stats=(`$iostat -d | tail +4`) 128 | old_stats=(`cat $STATEFILE`) 129 | old_date=${old_stats[0]} 130 | 131 | read=0 132 | write=0 133 | old_read=0 134 | old_write=0 135 | read_sum=0 136 | write_sum=0 137 | 138 | ### function get_rw sets the variables $read and $write 139 | ### to the total number of read blocks and write blocks 140 | ### for a device. Which device is specified as an argument 141 | ### to the function. 142 | ### The function returns 1 if an invalid device number 143 | ### was specified. 144 | function get_rw() { 145 | base=$(($1 * 6 )) 146 | if [ "k${stats[$base]}" == "k" ] 147 | then 148 | # we're done looping 149 | return 1; 150 | else 151 | devname=${stats[$base]} 152 | read=${stats[$(($base + 4))]} 153 | write=${stats[$(($base + 5))]} 154 | return 0 155 | fi 156 | } 157 | 158 | function get_old_rw() { 159 | base=$(($1 * 6 )) 160 | base=$((base + 1)) 161 | if [ "k${old_stats[$base]}" == "k" ] 162 | then 163 | # we're done looping 164 | return 1; 165 | else 166 | old_devname=${old_stats[$base]} 167 | old_read=${old_stats[$(($base + 4))]} 168 | old_write=${old_stats[$(($base + 5))]} 169 | return 0 170 | fi 171 | } 172 | 173 | time_diff=$(($date - $old_date)) 174 | 175 | 176 | devnum=0 177 | get_rw $devnum 178 | get_old_rw $devnum 179 | res=$? 180 | while [ $res -eq 0 ] 181 | do 182 | # if devname and old_devname aren't the same, 183 | # this whole function is invalid. 184 | if [ $devname != $old_devname ] 185 | then 186 | if [ -e $ERROR_DEVNAMES ] 187 | then 188 | if [ -e $ERROR_DEVNAMES2 ] ; then exit 1; fi 189 | echo "Sorry, my attempt at fixing the problem failed." 190 | echo "It's now up to you, dear human." 191 | touch $ERROR_DEVNAMES2 192 | exit 1 193 | fi 194 | echo "something is broken." 195 | echo "devnames are not the same." 196 | echo "devname=$devname old_devname=$old_devname" 197 | echo "I'm backing up the current statefile ($STATEFILE) " 198 | echo "and will recreate it next time to see if that fixes this." 199 | mydate=`date +%Y%m%d%H%M%S` 200 | mv -fv $STATEFILE{,.${mydate}} 201 | touch $ERROR_DEVNAMES 202 | exit 1 203 | fi 204 | rm -f $ERROR_DEVNAMES $ERROR_DEVNAME2 205 | #devname, read, write, old_devname, old_read, old_write 206 | # are all set. calculate stat/sec and report. 207 | read_diff=$(($read - $old_read)) 208 | write_diff=$(($write - $old_write)) 209 | # if read_diff or write_diff are less than 0, the counter has wrapped 210 | # and we should reset ourselves 211 | if [ `expr $read_diff \< 0` -eq 1 -o `expr $write_diff \< 0` -eq 1 ] 212 | then 213 | #just write out the new stats and exit; there's nothing we can do 214 | echo "$date" > $STATEFILE 215 | $iostat -d | tail +4 >> $STATEFILE 216 | exit 1 217 | fi 218 | # if the system gets backed up and multiple invocations are launched 219 | # at the same time, the time difference between them is 0 and the 220 | # metric is meaningless. 221 | if [ $time_diff -eq 0 ] 222 | then 223 | if [ -e $ERROR_TIMEDIFF ] ; then exit 1 ; fi 224 | echo "something is broken." 225 | echo "time_diff is 0." 226 | touch $ERROR_TIMEDIFF 227 | exit 1 228 | fi 229 | rm -f $ERROR_TIMEDIFF 230 | rps=`echo "scale=3;$read_diff / $time_diff" | bc` 231 | wps=`echo "scale=3;$write_diff / $time_diff" | bc` 232 | 233 | read_sum=`echo "scale=3;$read_sum + $rps" | bc` 234 | write_sum=`echo "scale=3;$write_sum + $wps" | bc` 235 | 236 | # report what we have calculated 237 | $GMETRIC $GMETRIC_ARGS --name="${devname}_reads" --value="$rps" --type="float" --units="blocks/sec" 238 | $GMETRIC $GMETRIC_ARGS --name="${devname}_writes" --value="$wps" --type="float" --units="blocks/sec" 239 | 240 | # echo "$devname $rps $wps $read_sum $write_sum " >> /tmp/foo.txt 241 | 242 | devnum=$((devnum + 1)) 243 | get_rw $devnum 244 | get_old_rw $devnum 245 | res=$? 246 | done 247 | 248 | $GMETRIC $GMETRIC_ARGS --name="disk_reads" --value="$read_sum" --type="float" --units="blocks/sec" 249 | $GMETRIC $GMETRIC_ARGS --name="disk_writes" --value="$write_sum" --type="float" --units="blocks/sec" 250 | 251 | echo "$date" > $STATEFILE 252 | $iostat -d | tail +4 >> $STATEFILE 253 | 254 | rm -f $ERROR_CREATE $ERROR_IOSTAT $ERROR_DEVNAME2 $ERROR_DEVNAME $ERROR_GMETRIC $ERROR_TIMEDIFF $ERROR_NOTROOT 255 | 256 | -------------------------------------------------------------------------------- /network/network_gmetric.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ### $Header: /var/lib/cvs/ops/ganglia/network_gmetric.sh,v 1.3 2006/07/11 17:29:27 ben Exp $ 4 | 5 | ### this script reports network metrics to ganglia. 6 | ### It should be called from cron every n minutes. 7 | ### It will report network usage per interface 8 | ### and will automatically adjust for whatever 9 | ### timeframe it is called 10 | 11 | ### Copyright Simply Hired, Inc. 2006 12 | ### License to use, modify, and distribute under the GPL 13 | ### http://www.gnu.org/licenses/gpl.txt 14 | 15 | VERSION=1.3 16 | 17 | GMETRIC="/usr/bin/gmetric" 18 | GMETRIC_ARGS="-c /etc/gmond.conf" 19 | STATEFILE="/var/lib/ganglia/metrics/net.stats" 20 | date=`date +%s` 21 | procfile="/proc/net/dev" 22 | 23 | ERROR_CREATE="/tmp/network_gmetric_create_statefile_failed" 24 | ERROR_IOSTAT="/tmp/network_gmetric_no_procfile" 25 | ERROR_DEVNAMES="/tmp/network_gmetric_bad_devname" 26 | ERROR_DEVNAMES2="/tmp/network_gmetric_bad_devname_didnt_fix" 27 | ERROR_GMETRIC="/tmp/network_gmetric_no_gmetric" 28 | ERROR_TIMEDIFF="/tmp/network_gmetric_timediff" 29 | ERROR_NOTROOT="/tmp/network_gmetric_notroot" 30 | 31 | if [ $UID -ne 0 ] 32 | then 33 | if [ -e $ERROR_NOTROOT ] ; then exit 1; fi 34 | echo "Error: this script must be run as root." 35 | touch $ERROR_NOTROOT 36 | exit 1 37 | fi 38 | rm -f $ERROR_NOTROOT 39 | 40 | if [ "x$1" == "x-h" ] 41 | then 42 | echo "Usage: network_gmetric.sh [--clean]" 43 | echo " --clean delete all tmp files" 44 | exit 0 45 | fi 46 | 47 | if [ "x$1" == "x--clean" ] 48 | then 49 | rm -f $ERROR_CREATE $ERROR_IOSTAT $ERROR_DEVNAME $ERROR_DEVNAME2 $ERROR_GMETRIC $ERROR_TIMEDIFF $ERROR_NOTROOT $STATEFILE 50 | retval=$? 51 | if [ $retval -ne 0 ] 52 | then 53 | echo "failed to clean up." 54 | exit 1 55 | else 56 | echo "All cleaned up." 57 | exit 0 58 | fi 59 | fi 60 | 61 | # save and turn off /STDERR for th estatefile tests 62 | exec 3>&2 63 | exec 2>/dev/null 64 | 65 | # if the GMETRIC program isn't installed, compain 66 | if [ ! -e $GMETRIC ] 67 | then 68 | if [ -e $ERROR_GMETRIC ] ; then exit 1; fi 69 | echo "" 70 | echo "Error: GMETRIC doesn't seem to be installed." 71 | echo "$GMETRIC doesn't exist." 72 | echo "" 73 | touch $ERROR_GMETRIC 74 | exit 1 75 | fi 76 | 77 | # if the /proc/net/dev file doesn't exist (eh?!) complain 78 | if [ ! -e $procfile ] 79 | then 80 | if [ -e $ERROR_IOSTAT ] 81 | then 82 | exit 1 83 | fi 84 | echo "" 85 | echo "Error: $procfile doesn't seem to exist." 86 | echo "" 87 | touch $ERROR_IOSTAT 88 | exit 1 89 | fi 90 | 91 | # if the statefile doesn't exist, we either havn't 92 | # run yet or there's something bigger wrong. 93 | if [ ! -e $STATEFILE ] 94 | then 95 | if [ ! -d `dirname $STATEFILE` ] 96 | then 97 | mkdir -p `dirname $STATEFILE` 98 | fi 99 | echo "$date" > $STATEFILE 100 | cat $procfile | sed -e "s/:/ /" | grep "eth" >> $STATEFILE 101 | if [ ! -e $STATEFILE ] 102 | then 103 | # if it didn't exist and we couldn't create 104 | # it, we should just scream bloody murder and die. 105 | # only scream once though... 106 | if [ -e $ERROR_CREATE ] 107 | then 108 | exit 1 109 | fi 110 | echo "" 111 | echo "ERROR: couldn't create $STATEFILE" 112 | echo "" 113 | touch $ERROR_CREATE 114 | exit 1 115 | fi 116 | echo "Created statefile. Exitting." 117 | exit 0 118 | fi 119 | 120 | # restore stderr 121 | exec 2>&3 122 | exec 3>&- 123 | 124 | # this script uses gets its stats directly from /proc 125 | stats=(`cat $procfile | sed -e "s/:/ /" | grep "eth"`) 126 | old_stats=(`cat $STATEFILE`) 127 | old_date=${old_stats[0]} 128 | 129 | read=0 130 | write=0 131 | old_read=0 132 | old_write=0 133 | read_sum=0 134 | write_sum=0 135 | 136 | ### function get_rw sets the variables $read and $write 137 | ### to the total number of read blocks and write blocks 138 | ### for a device. Which device is specified as an argument 139 | ### to the function. 140 | ### The function returns 1 if an invalid device number 141 | ### was specified. 142 | function get_rw() { 143 | base=$1 144 | let "base *= 17" 145 | if [ "k${stats[$base]}" == "k" ] 146 | then 147 | # we're done looping 148 | return 1; 149 | else 150 | devname=${stats[$base]} 151 | read=${stats[$(($base + 1))]} 152 | write=${stats[$(($base + 9))]} 153 | return 0 154 | fi 155 | } 156 | 157 | function get_old_rw() { 158 | base=$1 159 | let "base *= 17" 160 | let "base += 1" 161 | if [ "k${old_stats[$base]}" == "k" ] 162 | then 163 | # we're done looping 164 | return 1; 165 | else 166 | old_devname=${old_stats[$base]} 167 | old_read=${old_stats[$(($base + 1))]} 168 | old_write=${old_stats[$(($base + 9))]} 169 | return 0 170 | fi 171 | } 172 | 173 | time_diff=$(($date - $old_date)) 174 | 175 | devnum=0 176 | get_rw $devnum 177 | get_old_rw $devnum 178 | res=$? 179 | while [ $res -eq 0 ] 180 | do 181 | # if devname and old_devname aren't the same, 182 | # this whole function is invalid. 183 | if [ $devname != $old_devname ] 184 | then 185 | if [ -e $ERROR_DEVNAMES ] 186 | then 187 | if [ -e $ERROR_DEVNAMES2 ] ; then exit 1; fi 188 | echo "Sorry, my attempt at fixing the problem failed." 189 | echo "It's now up to you, dear human." 190 | touch $ERROR_DEVNAMES2 191 | exit 1 192 | fi 193 | echo "something is broken." 194 | echo "devnames are not the same." 195 | echo "devname=$devname old_devname=$old_devname" 196 | echo "I'm backing up the current statefile ($STATEFILE) " 197 | echo "and will recreate it next time to see if that fixes this." 198 | mydate=`date +%Y%m%d%H%M%S` 199 | mv -fv $STATEFILE{,.${mydate}} 200 | touch $ERROR_DEVNAMES 201 | exit 1 202 | fi 203 | rm -f $ERROR_DEVNAMES $ERROR_DEVNAME2 204 | #devname, read, write, old_devname, old_read, old_write 205 | # are all set. calculate stat/sec and report. 206 | if [ $read -lt $old_read ] 207 | then 208 | # counter wrapped - add 2^32 209 | let "read += 4294967296" 210 | fi 211 | if [ $write -lt $old_write ] 212 | then 213 | # counter wrapped - add 2^32 214 | let "write += 4294967295" 215 | fi 216 | read_diff=$(($read - $old_read)) 217 | write_diff=$(($write - $old_write)) 218 | if [ $time_diff -eq 0 ] 219 | then 220 | if [ -e $ERROR_TIMEDIFF ] ; then exit 1 ; fi 221 | echo "something is broken." 222 | echo "time_diff is 0." 223 | touch $ERROR_TIMEDIFF 224 | exit 1 225 | fi 226 | rm -f $ERROR_TIMEDIFF 227 | rps=`echo "scale=3;$read_diff / $time_diff" | bc` 228 | wps=`echo "scale=3;$write_diff / $time_diff" | bc` 229 | 230 | read_sum=`echo "scale=3;$read_sum + $rps" | bc` 231 | write_sum=`echo "scale=3;$write_sum + $wps" | bc` 232 | 233 | # log current values 234 | # echo `date +%Y.%m.%d.%H:%M:%S` "network_gmetric values: ${devname}: old_read: $old_read old_write: $old_write read: $read write: $write RPS: $rps WPS: $wps" >> /var/log/gmetric.log 235 | 236 | # report what we have calculated 237 | # only send in metric if it's greater than 0 238 | if [ `expr $rps \> 0` -eq 1 ]; 239 | then 240 | $GMETRIC $GMETRIC_ARGS --name="${devname}_rx" --value="$rps" --type="float" --units="bytes/sec" 241 | fi 242 | if [ `expr $wps \> 0` -eq 1 ]; 243 | then 244 | $GMETRIC $GMETRIC_ARGS --name="${devname}_tx" --value="$wps" --type="float" --units="bytes/sec" 245 | fi 246 | 247 | # echo "$devname $rps $wps $read_sum $write_sum " >> /tmp/foo.txt 248 | 249 | devnum=$((devnum + 1)) 250 | get_rw $devnum 251 | get_old_rw $devnum 252 | res=$? 253 | done 254 | 255 | # log current values 256 | #echo `date +%Y.%m.%d.%H:%M:%S` "network_gmetric values: sum: RPS: $read_sum WPS: $write_sum" >> /var/log/gmetric.log 257 | 258 | # only send in metric if it's greater than 0 259 | if [ `expr $read_sum \> 0` -eq 1 ]; 260 | then 261 | $GMETRIC $GMETRIC_ARGS --name="network_rx" --value="$read_sum" --type="float" --units="bytes/sec" 262 | fi 263 | if [ `expr $write_sum \> 0` -eq 1 ]; 264 | then 265 | $GMETRIC $GMETRIC_ARGS --name="network_tx" --value="$write_sum" --type="float" --units="bytes/sec" 266 | fi 267 | 268 | echo "$date" > $STATEFILE 269 | cat $procfile | sed -e "s/:/ /" | grep "eth" >> $STATEFILE 270 | 271 | rm -f $ERROR_CREATE $ERROR_IOSTAT $ERROR_DEVNAME2 $ERROR_DEVNAME $ERROR_GMETRIC $ERROR_TIMEDIFF $ERROR_NOTROOT 272 | 273 | -------------------------------------------------------------------------------- /memcached/memcached.sh/memcached.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ### $Id: mcd_gmetric.sh 16661 2006-11-07 00:56:33Z ben $ 4 | 5 | ### This script queries a memcached server running 6 | ### on localhost and reports a few statistics to 7 | ### ganglia. 8 | ### It reports 9 | ### *mcd_curr_items - the number of objects stored 10 | ### *mcd_curr_bytes - current bytes used 11 | ### *mcd_curr_conns - current number of connections 12 | ### *mcd_hit_perc - hits / gets for current time duration 13 | ### (current hit percentage) 14 | ### For more description on any of these metrics, 15 | ### see the protocols.txt file in the MCD docs. 16 | 17 | ### Copyright Simply Hired, Inc. 2006 18 | ### License to use, modify, and distribute under the GPL 19 | ### http://www.gnu.org/licenses/gpl.txt 20 | 21 | VERSION=1.1 22 | 23 | GMETRIC="/usr/bin/gmetric" 24 | GMETRIC_ARGS="-c /etc/gmond.conf" 25 | STATEFILE="/var/lib/ganglia/metrics/mcd.stats" 26 | ERROR_NOTROOT="/tmp/mcd_gmetric_notroot" 27 | ERROR_CANT_CONNECT="/tmp/mcd_gmetric_cant_connect" 28 | ERROR_CREATE="/tmp/mcd_gmetric_create_statefile_failed" 29 | ERROR_GETS_EMPTY="/tmp/mcd_gets_empty" 30 | 31 | MCD_CONF="/etc/sysconfig/memcached" 32 | MCD_DEFAULT_PORT="11211" 33 | 34 | date=`date +%s` 35 | 36 | if [ $UID -ne 0 ] 37 | then 38 | if [ -e $ERROR_NOTROOT ] ; then exit 1; fi 39 | echo "Error: this script must be run as root." 40 | touch $ERROR_NOTROOT 41 | exit 1 42 | fi 43 | rm -f $ERROR_NOTROOT 44 | 45 | while [ -n "$1" ] 46 | do 47 | case "x$1" in 48 | "x-h" | "x--help" ) 49 | echo "Usage: mcd_gmetric.sh [--clean] [--config ]" 50 | echo " --clean delete all tmp files" 51 | echo " --config the location of the mcd config file to read" 52 | echo " (default ${MCD_CONF})" 53 | exit 0 54 | ;; 55 | "x--clean" ) 56 | rm -f $STATEFILE $ERROR_NOTROOT $ERROR_CANT_CONNECT $ERROR_CREATE 57 | retval=$? 58 | if [ $retval -ne 0 ] 59 | then 60 | echo "failed to clean up." 61 | exit 1 62 | else 63 | echo "All cleaned up." 64 | exit 0 65 | fi 66 | ;; 67 | "x--config" ) 68 | shift 69 | mcd_config=$1 70 | if [ ! -n "$mcd_config" ] 71 | then 72 | echo "mcd configuration filename required" 73 | exit 1 74 | fi 75 | if [ ! -e "$mcd_config" ] 76 | then 77 | echo "mcd configuration file does not exist" 78 | exit 1 79 | fi 80 | if [ ! -r "$mcd_config" ] 81 | then 82 | echo "mcd configuration file cannot be read" 83 | exit 1 84 | fi 85 | source ${mcd_config} 86 | MCD_PORT=${PORT} 87 | ;; 88 | *) 89 | echo "unrecognized option." 90 | exit 1 91 | ;; 92 | esac 93 | shift 94 | done 95 | 96 | # set default MCD port if none specified 97 | MCD_PORT=${MCD_PORT:-$MCD_DEFAULT_PORT} 98 | 99 | # if the GMETRIC program isn't installed, compain 100 | if [ ! -e $GMETRIC ] 101 | then 102 | if [ -e $ERROR_GMETRIC ] ; then exit 1; fi 103 | echo "" 104 | echo "Error: GMETRIC doesn't seem to be installed." 105 | echo "$GMETRIC doesn't exist." 106 | echo "" 107 | touch $ERROR_GMETRIC 108 | exit 1 109 | fi 110 | 111 | # get current statistics 112 | exec 3>&2 #turn off STDERR 113 | exec 2>/dev/null 114 | stats_array=(`echo "stats" | nc localhost $MCD_PORT`) 115 | retval=$? 116 | exec 2>&1 #turn on STDERR 117 | exec 3>&- 118 | 119 | if [ $retval -ne 0 ] 120 | then 121 | if [ -e $ERROR_CANT_CONNECT ] ; then exit 1 ; fi 122 | echo "I can't connect to mcd." 123 | echo "Bummer. " 124 | touch $ERROR_CANT_CONNECT 125 | exit 1 126 | fi 127 | 128 | mcd_curr_items=`echo ${stats_array[23]}|tr -c -d [0-9]` #this tr thing is because there's a trailing ^M on the string from netcat that breaks bc. 129 | mcd_curr_bytes=`echo ${stats_array[29]}|tr -c -d [0-9]` 130 | mcd_curr_conns=`echo ${stats_array[32]}|tr -c -d [0-9]` 131 | mcd_total_gets=`echo ${stats_array[41]}|tr -c -d [0-9]` 132 | mcd_total_sets=`echo ${stats_array[44]}|tr -c -d [0-9]` 133 | mcd_total_hits=`echo ${stats_array[47]}|tr -c -d [0-9]` 134 | 135 | if [ -z "$mcd_total_gets" ] 136 | then 137 | # this actually happens rather often for some reason, so I'm just going to fail silently. 138 | # if [ -e $ERROR_GETS_EMPTY ] ; then exit 1 ; fi 139 | # echo "" 140 | # echo "ERROR: mcd_total_gets empty." 141 | # echo "" 142 | exit 1 143 | fi 144 | rm -f $ERROR_GETS_EMPTY 145 | 146 | 147 | # save and turn off /STDERR for the statefile tests 148 | exec 3>&2 149 | exec 2>/dev/null 150 | 151 | # if the statefile doesn't exist, we either havn't 152 | # run yet or there's something bigger wrong. 153 | if [ ! -e $STATEFILE ] 154 | then 155 | if [ ! -d `dirname $STATEFILE` ] 156 | then 157 | mkdir -p `dirname $STATEFILE` 158 | fi 159 | echo "$date $mcd_curr_items $mcd_curr_bytes $mcd_curr_conns $mcd_total_gets $mcd_total_sets $mcd_total_hits" > $STATEFILE 160 | if [ ! -e $STATEFILE ] 161 | then 162 | # if it didn't exist and we couldn't create 163 | # it, we should just scream bloody murder and die. 164 | # only scream once though... 165 | if [ -e $ERROR_CREATE ] 166 | then 167 | exit 1 168 | fi 169 | echo "" 170 | echo "ERROR: couldn't create $STATEFILE" 171 | echo "" 172 | touch $ERROR_CREATE 173 | exit 1 174 | fi 175 | echo "Created statefile. Exiting." 176 | exit 0 177 | fi 178 | 179 | # restore stderr 180 | exec 2>&3 181 | exec 3>&- 182 | 183 | old_stats_array=(`cat $STATEFILE`) 184 | old_date=${old_stats_array[0]} 185 | old_mcd_curr_items=${old_stats_array[1]} 186 | old_mcd_curr_bytes=${old_stats_array[2]} 187 | old_mcd_curr_conns=${old_stats_array[3]} 188 | old_mcd_total_gets=${old_stats_array[4]} 189 | old_mcd_total_sets=${old_stats_array[5]} 190 | old_mcd_total_hits=${old_stats_array[6]} 191 | 192 | echo "$date $mcd_curr_items $mcd_curr_bytes $mcd_curr_conns $mcd_total_gets $mcd_total_sets $mcd_total_hits" > $STATEFILE 193 | 194 | time_diff=$(($date - $old_date)) 195 | mcd_total_gets_diff=$(($mcd_total_gets - $old_mcd_total_gets)) 196 | mcd_total_sets_diff=$(($mcd_total_sets - $old_mcd_total_sets)) 197 | mcd_total_hits_diff=$(($mcd_total_hits - $old_mcd_total_hits)) 198 | 199 | if [ $time_diff -eq 0 ] 200 | then 201 | if [ -e $ERROR_TIMEDIFF ] ; then exit 1 ; fi 202 | echo "something is broken." 203 | echo "time_diff is 0." 204 | touch $ERROR_TIMEDIFF 205 | exit 1 206 | fi 207 | 208 | # none of these numbers should be less than 1, but if they are, just send back 1. 209 | if [ $mcd_total_gets_diff -le 1 ] ; then mcd_total_gets_diff=1 ; fi 210 | if [ $mcd_total_sets_diff -le 1 ] ; then mcd_total_sets_diff=1 ; fi 211 | if [ $mcd_total_hits_diff -le 1 ] ; then mcd_total_hits_diff=1 ; fi 212 | 213 | mcd_gets_per_sec=`echo "scale=3;${mcd_total_gets_diff}/${time_diff}"|bc` 214 | mcd_sets_per_sec=`echo "scale=3;${mcd_total_sets_diff}/${time_diff}"|bc` 215 | mcd_hits_per_sec=`echo "scale=3;${mcd_total_hits_diff}/${time_diff}"|bc` 216 | mcd_hit_perc=`echo "scale=3; ${mcd_total_hits_diff} * 100 / ${mcd_total_gets_diff}" | bc` 217 | 218 | # if we're running on a non-standard port, it might be the case that 219 | # we've got multiple memcached's being watched. Make the metric name 220 | # differentiate between them. 221 | if [ $MCD_PORT -ne $MCD_DEFAULT_PORT ] 222 | then 223 | metric_name_uniquifier="${MCD_PORT}_" 224 | fi 225 | 226 | $GMETRIC $GMETRIC_ARGS --name="mcd_${metric_name_uniquifier}seconds_measured" --value=${time_diff} --type=uint32 --units="secs" 227 | $GMETRIC $GMETRIC_ARGS --name="mcd_${metric_name_uniquifier}items_cached" --value=${mcd_curr_items} --type=uint32 --units="items" 228 | $GMETRIC $GMETRIC_ARGS --name="mcd_${metric_name_uniquifier}bytes_used" --value=${mcd_curr_bytes} --type=uint32 --units="bytes" 229 | $GMETRIC $GMETRIC_ARGS --name="mcd_${metric_name_uniquifier}conns" --value=${mcd_curr_conns} --type=uint32 --units="connections" 230 | $GMETRIC $GMETRIC_ARGS --name="mcd_${metric_name_uniquifier}gets" --value=${mcd_gets_per_sec} --type=float --units="gps" 231 | $GMETRIC $GMETRIC_ARGS --name="mcd_${metric_name_uniquifier}sets" --value=${mcd_sets_per_sec} --type=float --units="sps" 232 | $GMETRIC $GMETRIC_ARGS --name="mcd_${metric_name_uniquifier}cache_hits" --value=${mcd_hits_per_sec} --type=float --units="hps" 233 | $GMETRIC $GMETRIC_ARGS --name="mcd_${metric_name_uniquifier}cache_hit%" --value=${mcd_hit_perc} --type=float --units="%" 234 | 235 | --------------------------------------------------------------------------------