├── .coveragerc ├── .gitignore ├── .gitmodules ├── .travis.yml ├── LICENSE ├── Makefile ├── README.md ├── contrib ├── install │ ├── data │ │ └── elasticsearch-apt.pub │ └── elasticsearch ├── inventory.py ├── metrics │ ├── rancid-heartbeat │ ├── syslog-heartbeat │ └── tacacs-heartbeat └── weathermap │ ├── generate.sh │ ├── weathermap.py │ └── weathermap_test.py ├── debian ├── changelog ├── compat ├── control ├── dhmon-common.install ├── pinger.install ├── rules ├── snmpcollector.install ├── snmpcollector.links └── source │ ├── format │ └── include-binaries ├── deps └── Makefile └── src ├── Makefile ├── analytics ├── Makefile └── analytics.py └── pinger ├── .gitignore ├── Makefile ├── dhmon-pingerd.init ├── dhmonpinger.c ├── pingerd └── setup.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | cover_pylib = False 3 | branch = False 4 | data_file = .coverage 5 | parallel = True 6 | omit = *_test.* 7 | source = 8 | src/snmpcollector/src 9 | src/pinger 10 | src/analytics 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .*.swp 2 | *.db 3 | *.stats 4 | build/ 5 | *.pyc 6 | node_modules 7 | debian/files 8 | debian/*.log 9 | debian/*.substvars 10 | debian/analytics/ 11 | debian/pinger/ 12 | debian/snmpcollector/ 13 | debian/dhmon-common/ 14 | debian/*.debhelper 15 | debian/tmp 16 | .coverage 17 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "deps/pyyaml"] 2 | path = deps/pyyaml 3 | url = https://github.com/yaml/pyyaml 4 | branch = master 5 | [submodule "deps/pika"] 6 | path = deps/pika 7 | url = https://github.com/pika/pika 8 | [submodule "deps/client_python"] 9 | path = deps/prometheus_client_python 10 | url = https://github.com/prometheus/client_python.git 11 | branch = master 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "2.7" 5 | 6 | before_install: 7 | - echo "deb http://archive.ubuntu.com/ubuntu trusty main universe" | sudo tee -a /etc/apt/sources.list 8 | - sudo apt-get update -qq 9 | - > 10 | sudo apt-get install -qq --install-recommends debhelper debmake 11 | git-buildpackage pypy devscripts build-essential python-dev 12 | libsnmp-dev python-pika python-yaml python-mock 13 | 14 | install: 15 | - wget https://bootstrap.pypa.io/ez_setup.py -O - | sudo pypy 16 | - rm -f setuptools-*.zip 17 | - sudo pip install coveralls 18 | - sudo ln -sf /usr/local/bin/coverage /usr/bin/ 19 | 20 | script: 21 | - make deb 22 | 23 | after_success: 24 | - coveralls 25 | 26 | before_deploy: 27 | - gem install mime-types -v 2.6.2 28 | deploy: 29 | provider: releases 30 | api_key: 31 | secure: eISBbJNB5f9PrKz4fwrnNrvunJHX2cZh2XXnORJ6bO7AAecaMOKJk1IAZd4TDb0UCqwFbp96P5JmC8Q8H/k/dD7QRV2IfXbbZ4E82OS7A3fVJ9x+0nq9KjsXWvxli38+PgUtHsAo9JAxEY5yvMLAp9DcX2YqxcK7kzG1FvD50SA= 32 | file_glob: true 33 | file: "../*.deb" 34 | on: 35 | tags: true 36 | repo: dhtech/dhmon 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | TAG=$(shell git name-rev --tags --name-only $(shell git rev-parse HEAD)) 3 | VERSION=$(shell echo $(TAG) | grep -o 'v[0-9\.]*' \ 4 | | sed -E 's/^v([0-9\.]+)/-N \1-1/') 5 | BRANCH=$(shell git rev-parse --abbrev-ref HEAD) 6 | TREE=$(shell test $(TAG) = undefined && echo $(BRANCH) || echo $(TAG)) 7 | 8 | distclean: 9 | 10 | clean install all: 11 | (test $@ = clean && rm -f .coverage) || true 12 | make -C $(CURDIR)/deps/ $@ 13 | make -C $(CURDIR)/src/ $@ 14 | 15 | test: 16 | coverage erase 17 | TESTBASE=$(CURDIR) make -C $(CURDIR)/src/ $@ 18 | coverage combine 19 | coverage report -m 20 | 21 | deb: 22 | echo Using $(TREE) 23 | git checkout $(TREE) 24 | cp debian/changelog debian/changelog.old 25 | rm -f ../dhmon_*.orig.tar.gz 26 | gbp dch --snapshot --auto --ignore-branch $(VERSION) 27 | gbp buildpackage --git-upstream-tree=$(TREE) --git-submodules \ 28 | --git-ignore-new --git-ignore-branch --git-builder='debuild -i -I -us -uc' 29 | mv debian/changelog.old debian/changelog 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/dhtech/dhmon.svg?branch=master)](https://travis-ci.org/dhtech/dhmon) 2 | [![Coverage Status](https://coveralls.io/repos/dhtech/dhmon/badge.svg?branch=master)](https://coveralls.io/r/dhtech/dhmon) 3 | 4 | dhmon 5 | ===== 6 | 7 | Awesome monitoring system for DreamHack 8 | 9 | See the Wiki https://github.com/dhtech/dhmon/wiki for latest scratch notes. 10 | 11 | ## Products 12 | 13 | dhmon consists of a number of smaller products: 14 | 15 | - **snmpcollector** The SNMP collection daemons 16 | - **pinger** RTT statistics collector 17 | - **analytics** API backend to access processed statistics 18 | 19 | ## Installation 20 | 21 | Install the Debian packages for the products you want. 22 | 23 | ## Building Debian packages 24 | 25 | You need to have `setuptools` for pypy installed 26 | 27 | wget https://bootstrap.pypa.io/ez_setup.py -O - | sudo pypy 28 | 29 | Build the packages 30 | 31 | make deb 32 | 33 | or if you prefer the longer way: 34 | 35 | # Create a new snapshot version 36 | gbp dch --snapshot --auto 37 | 38 | # Clean 39 | rm ../dhmon_*.orig.tar.gz 40 | 41 | # Build 42 | gbp buildpackage --git-upstream-tree=master --git-submodules \ 43 | --git-ignore-new --git-builder='debuild -i -I -k28B92277' 44 | 45 | -------------------------------------------------------------------------------- /contrib/install/data/elasticsearch-apt.pub: -------------------------------------------------------------------------------- 1 | -----BEGIN PGP PUBLIC KEY BLOCK----- 2 | Version: GnuPG v2.0.14 (GNU/Linux) 3 | 4 | mQENBFI3HsoBCADXDtbNJnxbPqB1vDNtCsqhe49vFYsZN9IOZsZXgp7aHjh6CJBD 5 | A+bGFOwyhbd7at35jQjWAw1O3cfYsKAmFy+Ar3LHCMkV3oZspJACTIgCrwnkic/9 6 | CUliQe324qvObU2QRtP4Fl0zWcfb/S8UYzWXWIFuJqMvE9MaRY1bwUBvzoqavLGZ 7 | j3SF1SPO+TB5QrHkrQHBsmX+Jda6d4Ylt8/t6CvMwgQNlrlzIO9WT+YN6zS+sqHd 8 | 1YK/aY5qhoLNhp9G/HxhcSVCkLq8SStj1ZZ1S9juBPoXV1ZWNbxFNGwOh/NYGldD 9 | 2kmBf3YgCqeLzHahsAEpvAm8TBa7Q9W21C8vABEBAAG0RUVsYXN0aWNzZWFyY2gg 10 | KEVsYXN0aWNzZWFyY2ggU2lnbmluZyBLZXkpIDxkZXZfb3BzQGVsYXN0aWNzZWFy 11 | Y2gub3JnPokBOAQTAQIAIgUCUjceygIbAwYLCQgHAwIGFQgCCQoLBBYCAwECHgEC 12 | F4AACgkQ0n1mbNiOQrRzjAgAlTUQ1mgo3nK6BGXbj4XAJvuZDG0HILiUt+pPnz75 13 | nsf0NWhqR4yGFlmpuctgCmTD+HzYtV9fp9qW/bwVuJCNtKXk3sdzYABY+Yl0Cez/ 14 | 7C2GuGCOlbn0luCNT9BxJnh4mC9h/cKI3y5jvZ7wavwe41teqG14V+EoFSn3NPKm 15 | TxcDTFrV7SmVPxCBcQze00cJhprKxkuZMPPVqpBS+JfDQtzUQD/LSFfhHj9eD+Xe 16 | 8d7sw+XvxB2aN4gnTlRzjL1nTRp0h2/IOGkqYfIG9rWmSLNlxhB2t+c0RsjdGM4/ 17 | eRlPWylFbVMc5pmDpItrkWSnzBfkmXL3vO2X3WvwmSFiQbkBDQRSNx7KAQgA5JUl 18 | zcMW5/cuyZR8alSacKqhSbvoSqqbzHKcUQZmlzNMKGTABFG1yRx9r+wa/fvqP6OT 19 | RzRDvVS/cycws8YX7Ddum7x8uI95b9ye1/Xy5noPEm8cD+hplnpU+PBQZJ5XJ2I+ 20 | 1l9Nixx47wPGXeClLqcdn0ayd+v+Rwf3/XUJrvccG2YZUiQ4jWZkoxsA07xx7Bj+ 21 | Lt8/FKG7sHRFvePFU0ZS6JFx9GJqjSBbHRRkam+4emW3uWgVfZxuwcUCn1ayNgRt 22 | KiFv9jQrg2TIWEvzYx9tywTCxc+FFMWAlbCzi+m4WD+QUWWfDQ009U/WM0ks0Kww 23 | EwSk/UDuToxGnKU2dQARAQABiQEfBBgBAgAJBQJSNx7KAhsMAAoJENJ9ZmzYjkK0 24 | c3MIAIE9hAR20mqJWLcsxLtrRs6uNF1VrpB+4n/55QU7oxA1iVBO6IFu4qgsF12J 25 | TavnJ5MLaETlggXY+zDef9syTPXoQctpzcaNVDmedwo1SiL03uMoblOvWpMR/Y0j 26 | 6rm7IgrMWUDXDPvoPGjMl2q1iTeyHkMZEyUJ8SKsaHh4jV9wp9KmC8C+9CwMukL7 27 | vM5w8cgvJoAwsp3Fn59AxWthN3XJYcnMfStkIuWgR7U2r+a210W6vnUxU4oN0PmM 28 | cursYPyeV0NX/KQeUeNMwGTFB6QHS/anRaGQewijkrYYoTNtfllxIu9XYmiBERQ/ 29 | qPDlGRlOgVTd9xUfHFkzB52c70E= 30 | =92oX 31 | -----END PGP PUBLIC KEY BLOCK----- 32 | -------------------------------------------------------------------------------- /contrib/install/elasticsearch: -------------------------------------------------------------------------------- 1 | #!/bin/bash -xe 2 | 3 | IP_DEFAULT=$(sqlite3 /etc/ipplan.db \ 4 | "SELECT ipv4_addr_txt FROM host WHERE name = '$(hostname -f)'") 5 | IP=${IP-${IP_DEFAULT}} 6 | 7 | if [ -z "$IP" ]; then 8 | echo "error: could not determine assigned IP" 9 | exit 1 10 | fi 11 | echo "Using IP $IP" 12 | 13 | cd $(dirname $0) 14 | 15 | cat << EOF > /etc/apt/sources.list.d/elasticsearch.list 16 | deb http://packages.elasticsearch.org/elasticsearch/1.0/debian stable main 17 | EOF 18 | 19 | cat data/elasticsearch-apt.pub | apt-key add - 20 | apt-get update 21 | 22 | apt-get install -y elasticsearch openjdk-7-jre-headless 23 | 24 | cat << EOF > /etc/elasticsearch/elasticsearch.yml 25 | cluster.name: dhmon-es 26 | discovery.zen.ping.multicast.enabled: false 27 | discovery.zen.ping.unicast.hosts: ["metricstore.event.dreamhack.se" ] 28 | network.bind_host: 0.0.0.0 29 | network.publish_host: $IP 30 | EOF 31 | 32 | echo 'RESTART_ON_UPGRADE=true' > /etc/default/elasticsearch 33 | update-rc.d elasticsearch defaults 95 10 34 | 35 | service elasticsearch start 36 | -------------------------------------------------------------------------------- /contrib/inventory.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # Script to list inventory with serial numbers from SNMP. 3 | # TODO(bluecmd): Currently only reads from a JSON dump file: 4 | # python inventory.py host\:d-center-st.event.dreamhack.local.lst 5 | # 6 | # Output example: 7 | # Root 8 | # c3xxx Stack (FOC1842U0JZ) 9 | # WS-C3850-48P (FCW1842C0SK) 10 | # Switch 2 - WS-C3850-48P - Power Supply A Container 11 | # Switch 2 - Power Supply A (DCB1835G08H) 12 | # Switch 2 - WS-C3850-48P - Power Supply B Container 13 | # Switch 2 - WS-C3850-48P - Fan 1 Container 14 | # Switch 2 - WS-C3850-48P - FAN 1 15 | # Switch 2 - WS-C3850-48P - Fan 2 Container 16 | # Switch 2 - WS-C3850-48P - FAN 2 17 | # ... [snip] ... 18 | # GigabitEthernet1/0/2 19 | # Switch 1 Slot 1 FRULink Container 20 | # 4x10G Uplink Module (FOC18436FP1) 21 | # Switch 1 Slot 1 SFP Container 0 22 | # Switch 1 Slot 1 SFP Container 1 23 | # SFP-10GBase-LR (VB13450356 ) 24 | # Switch 1 Slot 1 SFP Container 2 25 | # Switch 1 Slot 1 SFP Container 3 26 | 27 | import base64 28 | import collections 29 | import json 30 | import sys 31 | 32 | SNMP_entPhysicalDescr = '.1.3.6.1.2.1.47.1.1.1.1.2' 33 | SNMP_entPhysicalContainedIn = '.1.3.6.1.2.1.47.1.1.1.1.4' 34 | SNMP_entPhysicalSerialNum = '.1.3.6.1.2.1.47.1.1.1.1.11' 35 | 36 | snmp = collections.defaultdict(dict) 37 | # Tree: Dict index is node ID, list entries are children 38 | inventory = collections.defaultdict(list) 39 | 40 | with file(sys.argv[1]) as f: 41 | for row in f: 42 | struct = json.loads(row) 43 | if isinstance(struct, int): 44 | # Timestamp, skip 45 | continue 46 | 47 | # Skip non-SNMP values 48 | if not struct['metric'].startswith('snmp.1'): 49 | continue 50 | 51 | # Skip VLAN aware contexts 52 | if '@' in struct['metric']: 53 | continue 54 | 55 | # Decode value 56 | value = struct['value'] 57 | if isinstance(value, int): 58 | pass 59 | elif value.startswith('OCTETSTR'): 60 | try: 61 | value = base64.b64decode(value.split(':', 1)[1]).decode() 62 | except UnicodeDecodeError: 63 | # Ignore MAC addresses and stuff like that 64 | continue 65 | else: 66 | # Ignore unknown metric 67 | continue 68 | 69 | oid = struct['metric'][4:] 70 | root, lastoid = oid.rsplit('.', 1) 71 | snmp[root][int(lastoid)] = value 72 | 73 | # Walk the inventory tree 74 | for oid, value in snmp[SNMP_entPhysicalContainedIn].iteritems(): 75 | inventory[value].append(oid) 76 | 77 | 78 | def get_product(lastoid): 79 | """Given a last OID, return the human readable 'Product name (S/N)'""" 80 | if lastoid == 0: 81 | return 'Root' 82 | # TODO(bluecmd): Kill global variable 83 | model = snmp[SNMP_entPhysicalDescr][lastoid] 84 | serial = snmp[SNMP_entPhysicalSerialNum][lastoid] 85 | if serial: 86 | return '%s (%s)' % (model, serial) 87 | return model 88 | 89 | 90 | def print_inventory(inventory, idx, level=0): 91 | """Recursively travel through the inventory database and print it""" 92 | print ' ' * level, get_product(idx) 93 | for child in inventory[idx]: 94 | print_inventory(inventory, child, level+1) 95 | 96 | # Print inventory, start with root 97 | print_inventory(inventory, 0) 98 | -------------------------------------------------------------------------------- /contrib/metrics/rancid-heartbeat: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd /var/lib/rancid/*/configs 4 | 5 | TIMESTAMP=$(date +'%s') 6 | 7 | for host in $(sqlite3 /etc/ipplan.db "SELECT h.name FROM host h, option o WHERE o.name = 'rncd' AND h.node_id = o.node_id") 8 | do 9 | MARKER=$(svn info --xml $host \ 10 | | grep text-updated \ 11 | | sed 's/\(.*\)<\/text-updated>/\1/' \ 12 | | xargs -I{} date --date={} +'%s') 13 | SIZE=$(wc -c $host | awk '{print $1}') 14 | 15 | echo "rancid_config_bytes{host=\"$host\"} $SIZE $(($TIMESTAMP * 1000))" 16 | echo "rancid_config_updated{host=\"$host\"} $SIZE $(($TIMESTAMP * 1000))" 17 | done 18 | -------------------------------------------------------------------------------- /contrib/metrics/syslog-heartbeat: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | now=$(date +"%s") 4 | 5 | IFS=' 6 | ' 7 | for row in $(stat --printf "%n %Y %s\n" /var/log/dh/*/all.log) 8 | do 9 | filename=$(basename $(dirname $(echo $row | awk '{print $1}'))) 10 | stamp=$(echo $row | awk '{print $2}') 11 | size=$(echo $row | awk '{print $3}') 12 | if ! echo "${filename}" | grep '\.' -q; then 13 | # TODO(bluecmd): we probably want full fqdn from servers instead 14 | host="$filename.event.dreamhack.se" 15 | else 16 | host="$filename" 17 | fi 18 | echo "syslog_log_bytes{host=\"$host\"} ${size} $(($now * 1000))" 19 | echo "syslog_log_updated{host=\"$host\"} ${stamp} $(($now * 1000))" 20 | done 21 | -------------------------------------------------------------------------------- /contrib/metrics/tacacs-heartbeat: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | IFS=' 4 | ' 5 | LATEST=$(for line in $(grep tac_plus /var/log/dh/$(hostname)/all.log \ 6 | | grep 'shell login' | awk '{print $1, $2, $3, $6}') 7 | do 8 | DATE=$(date --date="$(echo $line | awk '{print $1, $2, $3}')" +'%s') 9 | HOST=$(echo $line | awk '{print $4}' | sed 's/://g') 10 | echo $DATE $HOST 11 | done | sort -k 2 | uniq -f 1 | sort -n) 12 | 13 | TIMESTAMP=$(date +'%s') 14 | echo "TIMESTAMP=$TIMESTAMP" 15 | for line in $LATEST 16 | do 17 | LAST=$(echo $line | awk '{print $1}') 18 | RAW_HOST=$(echo $line | awk '{print $2}') 19 | HOST=$(host $RAW_HOST) 20 | if [[ "$?" != "0" ]]; then 21 | logger -p warn "Unable to resolve raw host: '$RAW_HOST'" 22 | continue 23 | fi 24 | 25 | if host $RAW_HOST | grep 'domain name pointer' -q; then 26 | HOST=$(echo $HOST | awk '{print $NF}' | sed 's/\.$//') 27 | else 28 | HOST=$(echo $HOST | awk '{print $1}') 29 | fi 30 | echo "$HOST|tacacs.last-login|$(($TIMESTAMP - $LAST)) * 1000" 31 | done | dhmon-metric $TIMESTAMP 32 | -------------------------------------------------------------------------------- /contrib/weathermap/generate.sh: -------------------------------------------------------------------------------- 1 | python weathermap_test.py | twopi -T svg > /var/www/test.svg 2 | -------------------------------------------------------------------------------- /contrib/weathermap/weathermap.py: -------------------------------------------------------------------------------- 1 | #!/usr/env python 2 | import redis 3 | 4 | class NondirectionalEdge(object): 5 | def __init__(self, a, b, weight): 6 | self.a = a 7 | self.b = b 8 | self.weight = weight 9 | 10 | def __eq__(self, other): 11 | # TODO(bluecmd): we don't consider different weights here 12 | return (self.a == other.a and self.b == other.b) or ( 13 | self.a == other.b and self.b == other.a) 14 | 15 | def __hash__(self): 16 | return hash(str(sorted([self.a, self.b]))) 17 | 18 | class NetworkGrapher(object): 19 | 20 | def __init__(self, nodes, neighbor_func): 21 | self.nodes = nodes 22 | self._neighbor_func = neighbor_func 23 | 24 | def _find_edges(self, node): 25 | neighbors = set() 26 | for neighbor, interface, weight in self._neighbor_func(node): 27 | neighbors.add(neighbor) 28 | yield (node, neighbor, interface, weight) 29 | self.visited.add(node) 30 | for neighbor in filter(lambda x: x not in self.visited, neighbors): 31 | self._find_edges(neighbor) 32 | 33 | def build(self): 34 | self.visited = set() 35 | self.edges = set() 36 | for root in self.nodes: 37 | for (a, b, interface, weight) in self._find_edges(root): 38 | self.edges.add((a, b, interface, weight)) 39 | 40 | if __name__ == '__main__': 41 | r = redis.StrictRedis(host='localhost', port=6379, db=1) 42 | # Find all nodes by looking for cdpCacheDeviceId 43 | nodes = [ 44 | key.split(':')[0] for key in r.keys('*:1.3.6.1.4.1.9.9.23.1.2.1.1.6.*.1')] 45 | print '%d nodes found' % len(nodes) 46 | 47 | def resolver(node): 48 | return map(r.get, r.keys('%s:1.3.6.1.4.1.9.9.23.1.2.1.1.6.*.1' % node)) 49 | 50 | grapher = NetworkGrapher(nodes, resolver) 51 | grapher.build() 52 | for edge in grapher.edges: 53 | print '%s -- %s' % (edge.a, edge.b) 54 | -------------------------------------------------------------------------------- /contrib/weathermap/weathermap_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/env python 2 | import collections 3 | import weathermap 4 | 5 | _map = collections.defaultdict(list) 6 | 7 | def build_mock(access=True): 8 | for i in xrange(1, 19): 9 | _map['root:%s' % ('w' if i < 10 else 'e')].append(('dist%d' % i, 'eth%d.1' % i, 100)) 10 | _map['root:%s' % ('e' if i < 10 else 'w')].append(('dist%d' % i, 'eth%d.2' % i, 100)) 11 | _map['dist%d' % i] = [('dist%d' % (i - 1 if i%2 else i+1), 'eth0', 10)] 12 | 13 | if access: 14 | for i in xrange(1, 200): 15 | _map['access%d' % i] = [('dist%d' % (i % 20), 'gig0', 1)] 16 | 17 | def mock_resolver(node): 18 | for a, b in _map.iteritems(): 19 | if a == node: 20 | for n in b: 21 | yield n 22 | 23 | def mock_all_nodes(): 24 | for a, b in _map.iteritems(): 25 | for n in b: 26 | yield n[0] 27 | yield a 28 | 29 | def format_node(node): 30 | if node.startswith('access'): 31 | return '', 'point', 'dodgerblue4' 32 | elif node.startswith('dist'): 33 | return node, 'hexagon', 'goldenrod' 34 | elif node.startswith('root'): 35 | return node, 'circle', 'forestgreen' 36 | 37 | if __name__ == '__main__': 38 | build_mock(access=True) 39 | nodes = set(mock_all_nodes()) 40 | grapher = weathermap.NetworkGrapher(nodes, mock_resolver) 41 | grapher.build() 42 | print 'graph G {' 43 | print ' ranksep=3.4;' 44 | print ' ratio=auto;' 45 | print ' overlap=false;' 46 | print ' splines=true;' 47 | print ' splines=true;' 48 | print ' bgcolor=black;' 49 | for node in set([x.split(':')[0] for x in nodes]): 50 | (label, shape, color) = format_node(node) 51 | print '"%s" [ label="%s",shape="%s",style="filled",fillcolor="%s" ];' % ( 52 | node, label, shape, color) 53 | for edge in grapher.edges: 54 | weight = edge[3] 55 | penwidth = 1 56 | if weight < 100: 57 | penwidth = 0.5 58 | elif weight < 10: 59 | penwidth = 0.1 60 | color = int(edge[1][4:]) % 9 + 1 61 | print '%s -- %s [colorscheme=rdylgn9, weight=%d, color=%s, penwidth=%f]' % (edge[0], edge[1], edge[3], color, penwidth) 62 | print '}' 63 | -------------------------------------------------------------------------------- /debian/changelog: -------------------------------------------------------------------------------- 1 | dhmon (0.1-1) UNRELEASED; urgency=medium 2 | 3 | * Initial release. 4 | 5 | -- Christian Svensson Thu, 25 Jun 2015 22:51:54 +0200 6 | -------------------------------------------------------------------------------- /debian/compat: -------------------------------------------------------------------------------- 1 | 9 2 | -------------------------------------------------------------------------------- /debian/control: -------------------------------------------------------------------------------- 1 | Source: dhmon 2 | Maintainer: Christian Svensson 3 | Section: misc 4 | Priority: optional 5 | Standards-Version: 3.9.2 6 | Build-Depends: debhelper (>= 9), pypy, libsnmp-dev, python-dev, 7 | python-pika, python-yaml, python-mock 8 | 9 | Package: dhmon-common 10 | Architecture: any 11 | Depends: ${shlibs:Depends}, ${misc:Depends} 12 | Description: Common dependencies for dhmon 13 | Dependencies that all or almost all dhmon components share. 14 | 15 | Package: snmpcollector 16 | Architecture: any 17 | Depends: ${shlibs:Depends}, ${misc:Depends}, dhmon-common, 18 | python-netsnmp, python-pika (>=0.9.14), pypy 19 | Description: SNMP collector for dhmon 20 | SNMP collection for the DreamHack monitoring system. 21 | 22 | Package: pinger 23 | Architecture: any 24 | Depends: ${shlibs:Depends}, ${misc:Depends}, dhmon-common 25 | Description: RTT collector for dhmon 26 | RTT collection for the DreamHack monitoring system. 27 | 28 | #Package: analytics 29 | #Architecture: all 30 | #Depends: ${shlibs:Depends}, ${misc:Depends}, dhmon-common 31 | #Description: Analytics API server for dhmon 32 | # API backend for for the DreamHack monitoring system. 33 | 34 | -------------------------------------------------------------------------------- /debian/dhmon-common.install: -------------------------------------------------------------------------------- 1 | usr/local/lib/python2.7/dist-packages/prometheus_client* usr/lib/python2.7/dist-packages/prometheus_client 2 | usr/local/lib/pypy2.7/dist-packages/prometheus_client* usr/lib/pypy/dist-packages/ 3 | -------------------------------------------------------------------------------- /debian/pinger.install: -------------------------------------------------------------------------------- 1 | opt/pinger/pingerd /usr/bin/ 2 | usr/local/lib/python2.7/dist-packages/dhmonpinger* usr/lib/python2.7/dist-packages/ 3 | -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | %: 3 | dh $@ 4 | 5 | override_dh_installinit: 6 | dh_installinit --onlyscripts 7 | -------------------------------------------------------------------------------- /debian/snmpcollector.install: -------------------------------------------------------------------------------- 1 | opt/snmpcollector/src/* /usr/share/snmpcollector/ 2 | etc/snmpcollector.yaml 3 | etc/default/snmpcollector 4 | etc/init.d/snmpcollector 5 | usr/local/lib/python2.7/dist-packages/mibresolver* usr/lib/python2.7/dist-packages/ 6 | usr/local/lib/pypy2.7/dist-packages/yaml* usr/lib/pypy/dist-packages/ 7 | usr/local/lib/pypy2.7/dist-packages/pika* usr/lib/pypy/dist-packages/ 8 | -------------------------------------------------------------------------------- /debian/snmpcollector.links: -------------------------------------------------------------------------------- 1 | /usr/share/snmpcollector/trigger.py /usr/bin/snmpcollector-trigger 2 | /usr/share/snmpcollector/snmptest.py /usr/bin/snmpcollector-test 3 | -------------------------------------------------------------------------------- /debian/source/format: -------------------------------------------------------------------------------- 1 | 3.0 (quilt) 2 | -------------------------------------------------------------------------------- /debian/source/include-binaries: -------------------------------------------------------------------------------- 1 | deps/pyyaml/tests/data/invalid-character.loader-error 2 | deps/pyyaml/tests/data/odd-utf16.stream-error 3 | deps/pyyaml/tests/data/spec-05-01-utf16be.data 4 | deps/pyyaml/tests/data/spec-05-01-utf16le.data 5 | deps/pyyaml/tests/data/spec-05-02-utf16be.data 6 | deps/pyyaml/tests/data/spec-05-02-utf16le.data 7 | deps/pyyaml/tests/data/utf16be.data 8 | deps/pyyaml/tests/data/utf16le.data 9 | -------------------------------------------------------------------------------- /deps/Makefile: -------------------------------------------------------------------------------- 1 | clean: 2 | (cd pyyaml; pypy setup.py clean) 3 | (cd pika; pypy setup.py clean) 4 | (cd prometheus_client_python; pypy setup.py clean) 5 | (cd prometheus_client_python; python setup.py clean) 6 | rm -fr */build 7 | rm -fr */*.egg-info 8 | 9 | all: 10 | true 11 | 12 | install: 13 | mkdir -p $(DESTDIR)/usr/lib/pypy/ 14 | (cd pyyaml; pypy setup.py install --root $(DESTDIR) $(COMPILE)) 15 | (cd pika; pypy setup.py install --root $(DESTDIR) $(COMPILE)) 16 | (cd prometheus_client_python; pypy setup.py install --root $(DESTDIR) $(COMPILE)) 17 | (cd prometheus_client_python; python setup.py install --root $(DESTDIR) $(COMPILE)) 18 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | %: 2 | make -C ${CURDIR}/analytics $@ 3 | make -C ${CURDIR}/pinger $@ 4 | -------------------------------------------------------------------------------- /src/analytics/Makefile: -------------------------------------------------------------------------------- 1 | clean: 2 | true 3 | all: 4 | echo "TODO" 5 | test: 6 | true 7 | install: 8 | echo "TODO" 9 | -------------------------------------------------------------------------------- /src/analytics/analytics.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import concurrent.futures 3 | import flask 4 | import functools 5 | import json 6 | import sqlite3 7 | import threading 8 | import time 9 | import urllib 10 | import urllib2 11 | 12 | 13 | DB_FILE = '/etc/ipplan.db' 14 | CACHE_TIME = 10 15 | 16 | 17 | class DataSource(object): 18 | def __init__(self, data, func): 19 | self.data = data 20 | self.func = func 21 | 22 | 23 | app = flask.Flask(__name__) 24 | data_sources = {} 25 | 26 | 27 | def analytics(t): 28 | def handler(func): 29 | data_sources[t] = DataSource("", func) 30 | @app.route(t) 31 | @functools.wraps(func) 32 | def wrapper(): 33 | return data_sources[t].data 34 | return wrapper 35 | return handler 36 | 37 | 38 | def prometheus(query): 39 | host = 'http://localhost:9090' 40 | url = '{host}/prometheus/api/v1/query?query={query}&time={time}' 41 | 42 | o = urllib2.urlopen(url.format( 43 | query=urllib.quote(query), time=int(time.time()), host=host)) 44 | 45 | return o.read() 46 | 47 | 48 | @analytics('/event.hosts') 49 | def event_hosts(): 50 | conn = sqlite3.connect(DB_FILE) 51 | c = conn.cursor() 52 | c.execute('SELECT h.node_id, h.name, n.name ' 53 | 'FROM host h, network n WHERE n.node_id = h.network_id') 54 | 55 | nodes = {} 56 | for node_id, node, network in c.fetchall(): 57 | if not network.startswith('EVENT@'): 58 | continue 59 | c.execute('SELECT name, value FROM option WHERE node_id = ?', (node_id, )) 60 | options = {} 61 | for name, value in c: 62 | options[name] = value 63 | nodes[node] = { 64 | 'options': options 65 | } 66 | return json.dumps(nodes) 67 | 68 | 69 | @analytics('/ping.status') 70 | def ping_status(): 71 | result = json.loads(prometheus('changes(icmp_rtt_seconds_sum[1m])')) 72 | ts = result['data']['result'] 73 | 74 | nodes = {x['metric']['host']: 60-int(x['value'][1]) for x in ts} 75 | return json.dumps(nodes) 76 | 77 | 78 | @analytics('/mon.alerts') 79 | def mon_alerts(): 80 | result = json.loads(prometheus( 81 | 'count(' + 82 | 'label_replace(ALERTS{host!=""}, "instance", "$1", "host", "(.*)") ' + 83 | 'or label_replace(ALERTS{instance!=""}, "instance", "$1", "instance", "(.*):[0-9]+")) ' + 84 | 'by (instance)')) 85 | ts = result['data']['result'] 86 | nodes = {x['metric']['instance']: int(x['value'][1]) for x in ts} 87 | return json.dumps(nodes) 88 | 89 | 90 | @analytics('/snmp.saves') 91 | def snmp_saves(): 92 | result = json.loads(prometheus('sum(count_over_time({__name__=~".+",instance!=""}[5m])) by (instance)')) 93 | ts = result['data']['result'] 94 | 95 | nodes = {x['metric']['instance']: {'metrics': int(x['value'][1])} for x in ts} 96 | return json.dumps(nodes) 97 | 98 | 99 | @analytics('/snmp.errors') 100 | def snmp_errors(): 101 | result = json.loads(prometheus( 102 | 'count(max_over_time(up{job=~"snmp.*",instance!=""}[5m]) == 0) by (instance)')) 103 | ts = result['data']['result'] 104 | 105 | nodes = {x['metric']['instance']: { 106 | 'error': 'Timeout or Auth Error'} for x in ts} 107 | return json.dumps(nodes) 108 | 109 | 110 | @analytics('/syslog.status') 111 | def syslog_status(): 112 | result = json.loads(prometheus('max_over_time(syslog_log_bytes[5m])')) 113 | ts = result['data']['result'] 114 | nodes = {x['metric']['host']: {'size': int(x['value'][1])} for x in ts} 115 | return json.dumps(nodes) 116 | 117 | 118 | @analytics('/rancid.status') 119 | def rancid_status(): 120 | result = json.loads(prometheus('max_over_time(rancid_config_bytes[5m])')) 121 | ts = result['data']['result'] 122 | nodes = {x['metric']['host']: {'size': int(x['value'][1])} for x in ts} 123 | return json.dumps(nodes) 124 | 125 | 126 | @analytics('/dhcp.status') 127 | def dhcp_status(): 128 | result = json.loads(prometheus('dhcp_leases_current_count')) 129 | dhcp_usage = result['data']['result'] 130 | result = json.loads(prometheus('dhcp_leases_max_count')) 131 | dhcp_max = { 132 | x['metric']['network']: x['value'][1] 133 | for x in result['data']['result']} 134 | 135 | networks = {} 136 | for data in dhcp_usage: 137 | domain, network = data['metric']['network'].split('@', 2) 138 | vlan = data['metric']['vlan'] 139 | networks[network] = { 140 | 'domain': domain, 141 | 'vlan': vlan, 142 | 'usage': data['value'][1], 143 | 'max': dhcp_max[data['metric']['network']] 144 | } 145 | return json.dumps(networks) 146 | 147 | 148 | @analytics('/switch.version') 149 | def switch_version(): 150 | return "{}" 151 | 152 | 153 | def interface_variable(variable, key, bool_value=None, func=None, time=''): 154 | query = variable + '{instance!="",layer="access"}' + time 155 | if func: 156 | query = '%s(%s)' % (func, query) 157 | result = json.loads(prometheus(query)) 158 | ts = result['data']['result'] 159 | nodes = collections.defaultdict(lambda: collections.defaultdict(dict)) 160 | for data in ts: 161 | try: 162 | host = data['metric']['instance'] 163 | iface = data['metric']['interface'] 164 | if 'enum' in data['metric']: 165 | value = data['metric']['enum'] 166 | else: 167 | value = data['value'][1] 168 | if bool_value is not None: 169 | value = (bool_value == value) 170 | nodes[host][iface][key] = value 171 | nodes[host][iface]['lastoid'] = data['metric']['index'] 172 | except KeyError: 173 | # Ignore incomplete data 174 | continue 175 | return dict(nodes) 176 | 177 | 178 | @analytics('/switch.interfaces') 179 | def switch_interfaces(): 180 | nodes = collections.defaultdict(lambda: collections.defaultdict(dict)) 181 | variables = ( 182 | ('ifOperStatus', 'status'), 183 | ('vlanTrunkPortDynamicStatus', 'trunk', 'trunking'), 184 | ('ifOutErrors', 'errors_out', None, 'rate', '[10m]'), 185 | ('ifInErrors', 'errors_in', None, 'rate', '[10m]'), 186 | ('ifAdminStatus', 'admin'), 187 | ('ifHighSpeed', 'speed'), 188 | ('dot1dStpPortState', 'stp')) 189 | 190 | results = [] 191 | with concurrent.futures.ThreadPoolExecutor(max_workers=10) as e: 192 | for variables in e.map(lambda x: interface_variable(*x), variables): 193 | results.append(variables) 194 | 195 | for result in results: 196 | for node, ifaces in result.iteritems(): 197 | for iface, props in ifaces.iteritems(): 198 | nodes[node][iface].update(props) 199 | return json.dumps(nodes) 200 | 201 | 202 | @analytics('/switch.vlans') 203 | def switch_vlans(): 204 | result = json.loads(prometheus('changes(vtpVlanState{instance!=""}[5m])')) 205 | ts = result['data']['result'] 206 | 207 | nodes = collections.defaultdict(dict) 208 | for data in ts: 209 | host = data['metric']['instance'] 210 | vlan = data['metric']['index'].split('.', 1)[1] 211 | nodes[host][vlan] = 1 212 | return json.dumps(nodes) 213 | 214 | 215 | @analytics('/switch.model') 216 | def switch_model(): 217 | result = json.loads(prometheus( 218 | 'changes(entPhysicalModelName{instance!="",index="1"}[5m])')) 219 | ts = result['data']['result'] 220 | 221 | nodes = {x['metric']['instance']: {'model': x['metric']['value']} for x in ts} 222 | return json.dumps(nodes) 223 | 224 | 225 | def fetch(sources): 226 | while True: 227 | for source in sources: 228 | sources[source].data = sources[source].func() 229 | time.sleep(CACHE_TIME) 230 | 231 | 232 | if __name__ == '__main__': 233 | fetch_thread = threading.Thread(target=fetch,args=(data_sources,)) 234 | fetch_thread.daemon = True 235 | fetch_thread.start() 236 | # The background thread will be multiplied with the number of flask 237 | # threads, so keep just one thread for serving. The data is cached anyway 238 | # so it should be fast. 239 | app.run(debug=True, threaded=False, port=5000) 240 | 241 | -------------------------------------------------------------------------------- /src/pinger/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /src/pinger/Makefile: -------------------------------------------------------------------------------- 1 | clean: 2 | rm -fr build/ 3 | 4 | all: 5 | true 6 | 7 | test: 8 | true 9 | 10 | install: 11 | python setup.py install --root $(DESTDIR) $(COMPILE) 12 | mkdir -p $(DESTDIR)/opt/pinger/src/ 13 | cp pingerd $(DESTDIR)/opt/pinger/ 14 | -------------------------------------------------------------------------------- /src/pinger/dhmon-pingerd.init: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ### BEGIN INIT INFO 4 | # Provides: dhmon-pingerd 5 | # Required-Start: $network 6 | # Required-Stop: $network 7 | # Default-Start: 2 3 4 5 8 | # Default-Stop: 9 | # Short-Description: Start dhmon pingerd 10 | ### END INIT INFO 11 | 12 | PATH=/sbin:/bin:/usr/sbin:/usr/bin 13 | 14 | . /lib/lsb/init-functions 15 | 16 | DHMON=/srv/dhmon 17 | DHMONRUN=/var/run/dhmon 18 | test -d $DHMON || exit 5 19 | 20 | if [ -r /etc/default/dhmon ]; then 21 | . /etc/default/dhmon 22 | fi 23 | 24 | case $1 in 25 | start) 26 | log_daemon_msg "Starting dhmon pingerd" 27 | $DHMON/src/pinger/pingerd 28 | log_end_msg $? 29 | ;; 30 | stop) 31 | log_daemon_msg "Stopping dhmon pingerd" 32 | pkill -u nobody -f "$DHMON/src/pinger/pingerd" 33 | log_end_msg $? 34 | ;; 35 | restart|force-reload) 36 | $0 stop && sleep 2 && $0 start 37 | ;; 38 | try-restart) 39 | if $0 status >/dev/null; then 40 | $0 restart 41 | else 42 | exit 0 43 | fi 44 | ;; 45 | reload) 46 | exit 3 47 | ;; 48 | status) 49 | pgrep -u nobody -f $DHMON/src/pinger/pingerd -c > /dev/null 50 | if [ "$?" = "0" ]; then 51 | log_success_msg "dhmon pingerd is running" 52 | else 53 | log_failure_msg "dhmon pingerd is not running" 54 | fi 55 | ;; 56 | *) 57 | echo "Usage: $0 {start|stop|restart|try-restart|force-reload|status}" 58 | exit 2 59 | ;; 60 | esac 61 | -------------------------------------------------------------------------------- /src/pinger/dhmonpinger.c: -------------------------------------------------------------------------------- 1 | /* 2 | *Stateless pinger for dhmon. 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | /* The ICMP checksum is calculated statically and just set as a constant. */ 17 | #define PINGER_ICMP_CHKSUM 0xf7ff 18 | #define PINGER_TTL 128 19 | #define PINGER_CONTROL_SIZE 512 20 | #define PINGER_MAGIC 0xc001c0de 21 | #define PINGER_RCVBUF 3670016 22 | 23 | typedef struct __attribute__ ((__packed__)) { 24 | struct iphdr ip; 25 | struct icmphdr icmp; 26 | struct __attribute__ ((__packed__)) { 27 | uint64_t user; 28 | uint32_t tv_sec; 29 | uint32_t tv_usec; 30 | uint32_t magic; 31 | } payload; 32 | } icmp_t; 33 | 34 | 35 | /* len must be divisible by 2 */ 36 | static uint16_t in_cksum(void *ptr, size_t len) 37 | { 38 | uint16_t *u16_ptr = (uint16_t*)ptr; 39 | uint64_t sum = 0; 40 | 41 | while (len > 0) { 42 | sum += *u16_ptr++; 43 | len -= 2; 44 | } 45 | 46 | sum = (sum >> 16) + (sum & 0xffff); 47 | sum += (sum >> 16); 48 | return ~sum; 49 | } 50 | 51 | 52 | static PyObject *transmit(PyObject *self, PyObject *args) { 53 | icmp_t packet; 54 | struct sockaddr_in targetaddr; 55 | struct timeval timestamp; 56 | uint64_t user; 57 | size_t sent; 58 | int sockfd; 59 | const char *textaddr; 60 | 61 | memset(&packet, 0, sizeof(packet)); 62 | memset(&targetaddr, 0, sizeof(targetaddr)); 63 | 64 | packet.ip.version = 4; 65 | packet.ip.ihl = sizeof(packet.ip) / 4; 66 | packet.ip.tot_len = htons(sizeof(packet)); 67 | packet.ip.protocol = IPPROTO_ICMP; 68 | packet.ip.ttl = PINGER_TTL; 69 | 70 | packet.icmp.type = ICMP_ECHO; 71 | packet.icmp.checksum = htons(PINGER_ICMP_CHKSUM); 72 | packet.payload.magic = htonl(PINGER_MAGIC); 73 | 74 | if (!PyArg_ParseTuple(args, "isi", &sockfd, &textaddr, &user)) { 75 | return NULL; 76 | } 77 | 78 | if (inet_pton(AF_INET, textaddr, &targetaddr.sin_addr) != 1) 79 | return PyErr_Format(PyExc_IOError, "inet_pton failed for IP"); 80 | 81 | targetaddr.sin_family = AF_INET; 82 | packet.ip.daddr = targetaddr.sin_addr.s_addr; 83 | 84 | /* payload is magic + sent timestamp */ 85 | if (gettimeofday(×tamp, NULL) < 0) 86 | return PyErr_SetFromErrno(PyExc_OSError); 87 | 88 | packet.payload.user = user; 89 | packet.payload.tv_sec = timestamp.tv_sec; 90 | packet.payload.tv_usec = timestamp.tv_usec; 91 | packet.icmp.checksum = 0; 92 | packet.icmp.checksum = in_cksum( 93 | &packet.icmp, sizeof(packet.icmp) + sizeof(packet.payload)); 94 | 95 | sent = sendto( 96 | sockfd, (void*) &packet, sizeof(packet), 0, 97 | (struct sockaddr*) &targetaddr, sizeof(targetaddr)); 98 | if (sent < 0) { 99 | return PyErr_SetFromErrno(PyExc_IOError); 100 | } 101 | 102 | Py_RETURN_NONE; 103 | } 104 | 105 | 106 | static PyObject *receive(PyObject *self, PyObject *args) { 107 | char ip[16]; 108 | uint8_t control[PINGER_CONTROL_SIZE]; 109 | icmp_t packet; 110 | struct msghdr msg; 111 | struct cmsghdr *cmsg; 112 | struct iovec entry; 113 | struct sockaddr_in from_addr; 114 | struct timeval *stamp; 115 | int res; 116 | int sockfd; 117 | 118 | if (!PyArg_ParseTuple(args, "i", &sockfd)) { 119 | return NULL; 120 | } 121 | 122 | for(;;) { 123 | uint64_t user; 124 | int secs; 125 | int usecs; 126 | 127 | memset(&msg, 0, sizeof(msg)); 128 | msg.msg_iov = &entry; 129 | msg.msg_iovlen = 1; 130 | entry.iov_base = &packet; 131 | entry.iov_len = sizeof(packet); 132 | msg.msg_name = (caddr_t)&from_addr; 133 | msg.msg_namelen = sizeof(from_addr); 134 | msg.msg_control = control; 135 | msg.msg_controllen = PINGER_CONTROL_SIZE; 136 | 137 | Py_BEGIN_ALLOW_THREADS; 138 | res = recvmsg(sockfd, &msg, 0); 139 | if (res < 0) { 140 | return PyErr_SetFromErrno(PyExc_IOError); 141 | } 142 | Py_END_ALLOW_THREADS; 143 | 144 | /* Check that we actually sent this packet. */ 145 | if (res != sizeof(packet)) { 146 | continue; 147 | } 148 | if (packet.payload.magic != htonl(PINGER_MAGIC)) { 149 | continue; 150 | } 151 | 152 | stamp = NULL; 153 | for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { 154 | 155 | if (cmsg->cmsg_level != SOL_SOCKET) 156 | continue; 157 | if (cmsg->cmsg_type != SO_TIMESTAMP) 158 | continue; 159 | 160 | stamp = (struct timeval *)CMSG_DATA(cmsg); 161 | } 162 | 163 | if (!stamp) { 164 | fprintf(stderr, "No timestamp provided by the kernel\n"); 165 | continue; 166 | } 167 | 168 | user = packet.payload.user; 169 | secs = stamp->tv_sec - packet.payload.tv_sec; 170 | usecs = stamp->tv_usec - packet.payload.tv_usec; 171 | if (usecs < 0) { 172 | secs--; 173 | usecs = (1000000 + stamp->tv_usec) - packet.payload.tv_usec; 174 | } 175 | 176 | if (inet_ntop(AF_INET, &from_addr.sin_addr, ip, sizeof(ip)) == NULL) 177 | return PyErr_Format(PyExc_IOError, "inet_ntop failed for IP"); 178 | 179 | return Py_BuildValue("siii", ip, user, secs, usecs); 180 | } 181 | } 182 | 183 | 184 | static PyObject *create_socket(PyObject *self, PyObject *unused_args) { 185 | struct icmp_filter filt; 186 | int enable = 1; 187 | int rcvbuf = PINGER_RCVBUF; 188 | int sockfd; 189 | 190 | if ((sockfd = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP)) < 0) 191 | return PyErr_SetFromErrno(PyExc_IOError); 192 | 193 | if (setsockopt( 194 | sockfd, SOL_SOCKET, SO_TIMESTAMP, &enable, sizeof(enable)) < 0) 195 | return PyErr_SetFromErrno(PyExc_IOError); 196 | 197 | /* This requires changing net.core.rmem_max, but is needed to not drop 198 | * ICMP replies. */ 199 | if (setsockopt( 200 | sockfd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) < 0) 201 | return PyErr_SetFromErrno(PyExc_IOError); 202 | 203 | if (setsockopt( 204 | sockfd, IPPROTO_IP, IP_HDRINCL, &enable, sizeof(enable)) < 0) 205 | return PyErr_SetFromErrno(PyExc_IOError); 206 | 207 | filt.data = ~(1<