├── .coveragerc
├── .gitignore
├── .gitmodules
├── .travis.yml
├── LICENSE
├── Makefile
├── README.md
├── contrib
├── install
│ ├── data
│ │ └── elasticsearch-apt.pub
│ └── elasticsearch
├── inventory.py
├── metrics
│ ├── rancid-heartbeat
│ ├── syslog-heartbeat
│ └── tacacs-heartbeat
└── weathermap
│ ├── generate.sh
│ ├── weathermap.py
│ └── weathermap_test.py
├── debian
├── changelog
├── compat
├── control
├── dhmon-common.install
├── pinger.install
├── rules
├── snmpcollector.install
├── snmpcollector.links
└── source
│ ├── format
│ └── include-binaries
├── deps
└── Makefile
└── src
├── Makefile
├── analytics
├── Makefile
└── analytics.py
└── pinger
├── .gitignore
├── Makefile
├── dhmon-pingerd.init
├── dhmonpinger.c
├── pingerd
└── setup.py
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | cover_pylib = False
3 | branch = False
4 | data_file = .coverage
5 | parallel = True
6 | omit = *_test.*
7 | source =
8 | src/snmpcollector/src
9 | src/pinger
10 | src/analytics
11 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .*.swp
2 | *.db
3 | *.stats
4 | build/
5 | *.pyc
6 | node_modules
7 | debian/files
8 | debian/*.log
9 | debian/*.substvars
10 | debian/analytics/
11 | debian/pinger/
12 | debian/snmpcollector/
13 | debian/dhmon-common/
14 | debian/*.debhelper
15 | debian/tmp
16 | .coverage
17 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "deps/pyyaml"]
2 | path = deps/pyyaml
3 | url = https://github.com/yaml/pyyaml
4 | branch = master
5 | [submodule "deps/pika"]
6 | path = deps/pika
7 | url = https://github.com/pika/pika
8 | [submodule "deps/client_python"]
9 | path = deps/prometheus_client_python
10 | url = https://github.com/prometheus/client_python.git
11 | branch = master
12 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 |
3 | python:
4 | - "2.7"
5 |
6 | before_install:
7 | - echo "deb http://archive.ubuntu.com/ubuntu trusty main universe" | sudo tee -a /etc/apt/sources.list
8 | - sudo apt-get update -qq
9 | - >
10 | sudo apt-get install -qq --install-recommends debhelper debmake
11 | git-buildpackage pypy devscripts build-essential python-dev
12 | libsnmp-dev python-pika python-yaml python-mock
13 |
14 | install:
15 | - wget https://bootstrap.pypa.io/ez_setup.py -O - | sudo pypy
16 | - rm -f setuptools-*.zip
17 | - sudo pip install coveralls
18 | - sudo ln -sf /usr/local/bin/coverage /usr/bin/
19 |
20 | script:
21 | - make deb
22 |
23 | after_success:
24 | - coveralls
25 |
26 | before_deploy:
27 | - gem install mime-types -v 2.6.2
28 | deploy:
29 | provider: releases
30 | api_key:
31 | secure: eISBbJNB5f9PrKz4fwrnNrvunJHX2cZh2XXnORJ6bO7AAecaMOKJk1IAZd4TDb0UCqwFbp96P5JmC8Q8H/k/dD7QRV2IfXbbZ4E82OS7A3fVJ9x+0nq9KjsXWvxli38+PgUtHsAo9JAxEY5yvMLAp9DcX2YqxcK7kzG1FvD50SA=
32 | file_glob: true
33 | file: "../*.deb"
34 | on:
35 | tags: true
36 | repo: dhtech/dhmon
37 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 |
9 | This version of the GNU Lesser General Public License incorporates
10 | the terms and conditions of version 3 of the GNU General Public
11 | License, supplemented by the additional permissions listed below.
12 |
13 | 0. Additional Definitions.
14 |
15 | As used herein, "this License" refers to version 3 of the GNU Lesser
16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
17 | General Public License.
18 |
19 | "The Library" refers to a covered work governed by this License,
20 | other than an Application or a Combined Work as defined below.
21 |
22 | An "Application" is any work that makes use of an interface provided
23 | by the Library, but which is not otherwise based on the Library.
24 | Defining a subclass of a class defined by the Library is deemed a mode
25 | of using an interface provided by the Library.
26 |
27 | A "Combined Work" is a work produced by combining or linking an
28 | Application with the Library. The particular version of the Library
29 | with which the Combined Work was made is also called the "Linked
30 | Version".
31 |
32 | The "Minimal Corresponding Source" for a Combined Work means the
33 | Corresponding Source for the Combined Work, excluding any source code
34 | for portions of the Combined Work that, considered in isolation, are
35 | based on the Application, and not on the Linked Version.
36 |
37 | The "Corresponding Application Code" for a Combined Work means the
38 | object code and/or source code for the Application, including any data
39 | and utility programs needed for reproducing the Combined Work from the
40 | Application, but excluding the System Libraries of the Combined Work.
41 |
42 | 1. Exception to Section 3 of the GNU GPL.
43 |
44 | You may convey a covered work under sections 3 and 4 of this License
45 | without being bound by section 3 of the GNU GPL.
46 |
47 | 2. Conveying Modified Versions.
48 |
49 | If you modify a copy of the Library, and, in your modifications, a
50 | facility refers to a function or data to be supplied by an Application
51 | that uses the facility (other than as an argument passed when the
52 | facility is invoked), then you may convey a copy of the modified
53 | version:
54 |
55 | a) under this License, provided that you make a good faith effort to
56 | ensure that, in the event an Application does not supply the
57 | function or data, the facility still operates, and performs
58 | whatever part of its purpose remains meaningful, or
59 |
60 | b) under the GNU GPL, with none of the additional permissions of
61 | this License applicable to that copy.
62 |
63 | 3. Object Code Incorporating Material from Library Header Files.
64 |
65 | The object code form of an Application may incorporate material from
66 | a header file that is part of the Library. You may convey such object
67 | code under terms of your choice, provided that, if the incorporated
68 | material is not limited to numerical parameters, data structure
69 | layouts and accessors, or small macros, inline functions and templates
70 | (ten or fewer lines in length), you do both of the following:
71 |
72 | a) Give prominent notice with each copy of the object code that the
73 | Library is used in it and that the Library and its use are
74 | covered by this License.
75 |
76 | b) Accompany the object code with a copy of the GNU GPL and this license
77 | document.
78 |
79 | 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that,
82 | taken together, effectively do not restrict modification of the
83 | portions of the Library contained in the Combined Work and reverse
84 | engineering for debugging such modifications, if you also do each of
85 | the following:
86 |
87 | a) Give prominent notice with each copy of the Combined Work that
88 | the Library is used in it and that the Library and its use are
89 | covered by this License.
90 |
91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
92 | document.
93 |
94 | c) For a Combined Work that displays copyright notices during
95 | execution, include the copyright notice for the Library among
96 | these notices, as well as a reference directing the user to the
97 | copies of the GNU GPL and this license document.
98 |
99 | d) Do one of the following:
100 |
101 | 0) Convey the Minimal Corresponding Source under the terms of this
102 | License, and the Corresponding Application Code in a form
103 | suitable for, and under terms that permit, the user to
104 | recombine or relink the Application with a modified version of
105 | the Linked Version to produce a modified Combined Work, in the
106 | manner specified by section 6 of the GNU GPL for conveying
107 | Corresponding Source.
108 |
109 | 1) Use a suitable shared library mechanism for linking with the
110 | Library. A suitable mechanism is one that (a) uses at run time
111 | a copy of the Library already present on the user's computer
112 | system, and (b) will operate properly with a modified version
113 | of the Library that is interface-compatible with the Linked
114 | Version.
115 |
116 | e) Provide Installation Information, but only if you would otherwise
117 | be required to provide such information under section 6 of the
118 | GNU GPL, and only to the extent that such information is
119 | necessary to install and execute a modified version of the
120 | Combined Work produced by recombining or relinking the
121 | Application with a modified version of the Linked Version. (If
122 | you use option 4d0, the Installation Information must accompany
123 | the Minimal Corresponding Source and Corresponding Application
124 | Code. If you use option 4d1, you must provide the Installation
125 | Information in the manner specified by section 6 of the GNU GPL
126 | for conveying Corresponding Source.)
127 |
128 | 5. Combined Libraries.
129 |
130 | You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 |
136 | a) Accompany the combined library with a copy of the same work based
137 | on the Library, uncombined with any other library facilities,
138 | conveyed under the terms of this License.
139 |
140 | b) Give prominent notice with the combined library that part of it
141 | is a work based on the Library, and explaining where to find the
142 | accompanying uncombined form of the same work.
143 |
144 | 6. Revised Versions of the GNU Lesser General Public License.
145 |
146 | The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 |
151 | Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 |
161 | If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 |
2 | TAG=$(shell git name-rev --tags --name-only $(shell git rev-parse HEAD))
3 | VERSION=$(shell echo $(TAG) | grep -o 'v[0-9\.]*' \
4 | | sed -E 's/^v([0-9\.]+)/-N \1-1/')
5 | BRANCH=$(shell git rev-parse --abbrev-ref HEAD)
6 | TREE=$(shell test $(TAG) = undefined && echo $(BRANCH) || echo $(TAG))
7 |
8 | distclean:
9 |
10 | clean install all:
11 | (test $@ = clean && rm -f .coverage) || true
12 | make -C $(CURDIR)/deps/ $@
13 | make -C $(CURDIR)/src/ $@
14 |
15 | test:
16 | coverage erase
17 | TESTBASE=$(CURDIR) make -C $(CURDIR)/src/ $@
18 | coverage combine
19 | coverage report -m
20 |
21 | deb:
22 | echo Using $(TREE)
23 | git checkout $(TREE)
24 | cp debian/changelog debian/changelog.old
25 | rm -f ../dhmon_*.orig.tar.gz
26 | gbp dch --snapshot --auto --ignore-branch $(VERSION)
27 | gbp buildpackage --git-upstream-tree=$(TREE) --git-submodules \
28 | --git-ignore-new --git-ignore-branch --git-builder='debuild -i -I -us -uc'
29 | mv debian/changelog.old debian/changelog
30 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://travis-ci.org/dhtech/dhmon)
2 | [](https://coveralls.io/r/dhtech/dhmon)
3 |
4 | dhmon
5 | =====
6 |
7 | Awesome monitoring system for DreamHack
8 |
9 | See the Wiki https://github.com/dhtech/dhmon/wiki for latest scratch notes.
10 |
11 | ## Products
12 |
13 | dhmon consists of a number of smaller products:
14 |
15 | - **snmpcollector** The SNMP collection daemons
16 | - **pinger** RTT statistics collector
17 | - **analytics** API backend to access processed statistics
18 |
19 | ## Installation
20 |
21 | Install the Debian packages for the products you want.
22 |
23 | ## Building Debian packages
24 |
25 | You need to have `setuptools` for pypy installed
26 |
27 | wget https://bootstrap.pypa.io/ez_setup.py -O - | sudo pypy
28 |
29 | Build the packages
30 |
31 | make deb
32 |
33 | or if you prefer the longer way:
34 |
35 | # Create a new snapshot version
36 | gbp dch --snapshot --auto
37 |
38 | # Clean
39 | rm ../dhmon_*.orig.tar.gz
40 |
41 | # Build
42 | gbp buildpackage --git-upstream-tree=master --git-submodules \
43 | --git-ignore-new --git-builder='debuild -i -I -k28B92277'
44 |
45 |
--------------------------------------------------------------------------------
/contrib/install/data/elasticsearch-apt.pub:
--------------------------------------------------------------------------------
1 | -----BEGIN PGP PUBLIC KEY BLOCK-----
2 | Version: GnuPG v2.0.14 (GNU/Linux)
3 |
4 | mQENBFI3HsoBCADXDtbNJnxbPqB1vDNtCsqhe49vFYsZN9IOZsZXgp7aHjh6CJBD
5 | A+bGFOwyhbd7at35jQjWAw1O3cfYsKAmFy+Ar3LHCMkV3oZspJACTIgCrwnkic/9
6 | CUliQe324qvObU2QRtP4Fl0zWcfb/S8UYzWXWIFuJqMvE9MaRY1bwUBvzoqavLGZ
7 | j3SF1SPO+TB5QrHkrQHBsmX+Jda6d4Ylt8/t6CvMwgQNlrlzIO9WT+YN6zS+sqHd
8 | 1YK/aY5qhoLNhp9G/HxhcSVCkLq8SStj1ZZ1S9juBPoXV1ZWNbxFNGwOh/NYGldD
9 | 2kmBf3YgCqeLzHahsAEpvAm8TBa7Q9W21C8vABEBAAG0RUVsYXN0aWNzZWFyY2gg
10 | KEVsYXN0aWNzZWFyY2ggU2lnbmluZyBLZXkpIDxkZXZfb3BzQGVsYXN0aWNzZWFy
11 | Y2gub3JnPokBOAQTAQIAIgUCUjceygIbAwYLCQgHAwIGFQgCCQoLBBYCAwECHgEC
12 | F4AACgkQ0n1mbNiOQrRzjAgAlTUQ1mgo3nK6BGXbj4XAJvuZDG0HILiUt+pPnz75
13 | nsf0NWhqR4yGFlmpuctgCmTD+HzYtV9fp9qW/bwVuJCNtKXk3sdzYABY+Yl0Cez/
14 | 7C2GuGCOlbn0luCNT9BxJnh4mC9h/cKI3y5jvZ7wavwe41teqG14V+EoFSn3NPKm
15 | TxcDTFrV7SmVPxCBcQze00cJhprKxkuZMPPVqpBS+JfDQtzUQD/LSFfhHj9eD+Xe
16 | 8d7sw+XvxB2aN4gnTlRzjL1nTRp0h2/IOGkqYfIG9rWmSLNlxhB2t+c0RsjdGM4/
17 | eRlPWylFbVMc5pmDpItrkWSnzBfkmXL3vO2X3WvwmSFiQbkBDQRSNx7KAQgA5JUl
18 | zcMW5/cuyZR8alSacKqhSbvoSqqbzHKcUQZmlzNMKGTABFG1yRx9r+wa/fvqP6OT
19 | RzRDvVS/cycws8YX7Ddum7x8uI95b9ye1/Xy5noPEm8cD+hplnpU+PBQZJ5XJ2I+
20 | 1l9Nixx47wPGXeClLqcdn0ayd+v+Rwf3/XUJrvccG2YZUiQ4jWZkoxsA07xx7Bj+
21 | Lt8/FKG7sHRFvePFU0ZS6JFx9GJqjSBbHRRkam+4emW3uWgVfZxuwcUCn1ayNgRt
22 | KiFv9jQrg2TIWEvzYx9tywTCxc+FFMWAlbCzi+m4WD+QUWWfDQ009U/WM0ks0Kww
23 | EwSk/UDuToxGnKU2dQARAQABiQEfBBgBAgAJBQJSNx7KAhsMAAoJENJ9ZmzYjkK0
24 | c3MIAIE9hAR20mqJWLcsxLtrRs6uNF1VrpB+4n/55QU7oxA1iVBO6IFu4qgsF12J
25 | TavnJ5MLaETlggXY+zDef9syTPXoQctpzcaNVDmedwo1SiL03uMoblOvWpMR/Y0j
26 | 6rm7IgrMWUDXDPvoPGjMl2q1iTeyHkMZEyUJ8SKsaHh4jV9wp9KmC8C+9CwMukL7
27 | vM5w8cgvJoAwsp3Fn59AxWthN3XJYcnMfStkIuWgR7U2r+a210W6vnUxU4oN0PmM
28 | cursYPyeV0NX/KQeUeNMwGTFB6QHS/anRaGQewijkrYYoTNtfllxIu9XYmiBERQ/
29 | qPDlGRlOgVTd9xUfHFkzB52c70E=
30 | =92oX
31 | -----END PGP PUBLIC KEY BLOCK-----
32 |
--------------------------------------------------------------------------------
/contrib/install/elasticsearch:
--------------------------------------------------------------------------------
1 | #!/bin/bash -xe
2 |
3 | IP_DEFAULT=$(sqlite3 /etc/ipplan.db \
4 | "SELECT ipv4_addr_txt FROM host WHERE name = '$(hostname -f)'")
5 | IP=${IP-${IP_DEFAULT}}
6 |
7 | if [ -z "$IP" ]; then
8 | echo "error: could not determine assigned IP"
9 | exit 1
10 | fi
11 | echo "Using IP $IP"
12 |
13 | cd $(dirname $0)
14 |
15 | cat << EOF > /etc/apt/sources.list.d/elasticsearch.list
16 | deb http://packages.elasticsearch.org/elasticsearch/1.0/debian stable main
17 | EOF
18 |
19 | cat data/elasticsearch-apt.pub | apt-key add -
20 | apt-get update
21 |
22 | apt-get install -y elasticsearch openjdk-7-jre-headless
23 |
24 | cat << EOF > /etc/elasticsearch/elasticsearch.yml
25 | cluster.name: dhmon-es
26 | discovery.zen.ping.multicast.enabled: false
27 | discovery.zen.ping.unicast.hosts: ["metricstore.event.dreamhack.se" ]
28 | network.bind_host: 0.0.0.0
29 | network.publish_host: $IP
30 | EOF
31 |
32 | echo 'RESTART_ON_UPGRADE=true' > /etc/default/elasticsearch
33 | update-rc.d elasticsearch defaults 95 10
34 |
35 | service elasticsearch start
36 |
--------------------------------------------------------------------------------
/contrib/inventory.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python2
2 | # Script to list inventory with serial numbers from SNMP.
3 | # TODO(bluecmd): Currently only reads from a JSON dump file:
4 | # python inventory.py host\:d-center-st.event.dreamhack.local.lst
5 | #
6 | # Output example:
7 | # Root
8 | # c3xxx Stack (FOC1842U0JZ)
9 | # WS-C3850-48P (FCW1842C0SK)
10 | # Switch 2 - WS-C3850-48P - Power Supply A Container
11 | # Switch 2 - Power Supply A (DCB1835G08H)
12 | # Switch 2 - WS-C3850-48P - Power Supply B Container
13 | # Switch 2 - WS-C3850-48P - Fan 1 Container
14 | # Switch 2 - WS-C3850-48P - FAN 1
15 | # Switch 2 - WS-C3850-48P - Fan 2 Container
16 | # Switch 2 - WS-C3850-48P - FAN 2
17 | # ... [snip] ...
18 | # GigabitEthernet1/0/2
19 | # Switch 1 Slot 1 FRULink Container
20 | # 4x10G Uplink Module (FOC18436FP1)
21 | # Switch 1 Slot 1 SFP Container 0
22 | # Switch 1 Slot 1 SFP Container 1
23 | # SFP-10GBase-LR (VB13450356 )
24 | # Switch 1 Slot 1 SFP Container 2
25 | # Switch 1 Slot 1 SFP Container 3
26 |
27 | import base64
28 | import collections
29 | import json
30 | import sys
31 |
32 | SNMP_entPhysicalDescr = '.1.3.6.1.2.1.47.1.1.1.1.2'
33 | SNMP_entPhysicalContainedIn = '.1.3.6.1.2.1.47.1.1.1.1.4'
34 | SNMP_entPhysicalSerialNum = '.1.3.6.1.2.1.47.1.1.1.1.11'
35 |
36 | snmp = collections.defaultdict(dict)
37 | # Tree: Dict index is node ID, list entries are children
38 | inventory = collections.defaultdict(list)
39 |
40 | with file(sys.argv[1]) as f:
41 | for row in f:
42 | struct = json.loads(row)
43 | if isinstance(struct, int):
44 | # Timestamp, skip
45 | continue
46 |
47 | # Skip non-SNMP values
48 | if not struct['metric'].startswith('snmp.1'):
49 | continue
50 |
51 | # Skip VLAN aware contexts
52 | if '@' in struct['metric']:
53 | continue
54 |
55 | # Decode value
56 | value = struct['value']
57 | if isinstance(value, int):
58 | pass
59 | elif value.startswith('OCTETSTR'):
60 | try:
61 | value = base64.b64decode(value.split(':', 1)[1]).decode()
62 | except UnicodeDecodeError:
63 | # Ignore MAC addresses and stuff like that
64 | continue
65 | else:
66 | # Ignore unknown metric
67 | continue
68 |
69 | oid = struct['metric'][4:]
70 | root, lastoid = oid.rsplit('.', 1)
71 | snmp[root][int(lastoid)] = value
72 |
73 | # Walk the inventory tree
74 | for oid, value in snmp[SNMP_entPhysicalContainedIn].iteritems():
75 | inventory[value].append(oid)
76 |
77 |
78 | def get_product(lastoid):
79 | """Given a last OID, return the human readable 'Product name (S/N)'"""
80 | if lastoid == 0:
81 | return 'Root'
82 | # TODO(bluecmd): Kill global variable
83 | model = snmp[SNMP_entPhysicalDescr][lastoid]
84 | serial = snmp[SNMP_entPhysicalSerialNum][lastoid]
85 | if serial:
86 | return '%s (%s)' % (model, serial)
87 | return model
88 |
89 |
90 | def print_inventory(inventory, idx, level=0):
91 | """Recursively travel through the inventory database and print it"""
92 | print ' ' * level, get_product(idx)
93 | for child in inventory[idx]:
94 | print_inventory(inventory, child, level+1)
95 |
96 | # Print inventory, start with root
97 | print_inventory(inventory, 0)
98 |
--------------------------------------------------------------------------------
/contrib/metrics/rancid-heartbeat:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | cd /var/lib/rancid/*/configs
4 |
5 | TIMESTAMP=$(date +'%s')
6 |
7 | for host in $(sqlite3 /etc/ipplan.db "SELECT h.name FROM host h, option o WHERE o.name = 'rncd' AND h.node_id = o.node_id")
8 | do
9 | MARKER=$(svn info --xml $host \
10 | | grep text-updated \
11 | | sed 's/\(.*\)<\/text-updated>/\1/' \
12 | | xargs -I{} date --date={} +'%s')
13 | SIZE=$(wc -c $host | awk '{print $1}')
14 |
15 | echo "rancid_config_bytes{host=\"$host\"} $SIZE $(($TIMESTAMP * 1000))"
16 | echo "rancid_config_updated{host=\"$host\"} $SIZE $(($TIMESTAMP * 1000))"
17 | done
18 |
--------------------------------------------------------------------------------
/contrib/metrics/syslog-heartbeat:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | now=$(date +"%s")
4 |
5 | IFS='
6 | '
7 | for row in $(stat --printf "%n %Y %s\n" /var/log/dh/*/all.log)
8 | do
9 | filename=$(basename $(dirname $(echo $row | awk '{print $1}')))
10 | stamp=$(echo $row | awk '{print $2}')
11 | size=$(echo $row | awk '{print $3}')
12 | if ! echo "${filename}" | grep '\.' -q; then
13 | # TODO(bluecmd): we probably want full fqdn from servers instead
14 | host="$filename.event.dreamhack.se"
15 | else
16 | host="$filename"
17 | fi
18 | echo "syslog_log_bytes{host=\"$host\"} ${size} $(($now * 1000))"
19 | echo "syslog_log_updated{host=\"$host\"} ${stamp} $(($now * 1000))"
20 | done
21 |
--------------------------------------------------------------------------------
/contrib/metrics/tacacs-heartbeat:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | IFS='
4 | '
5 | LATEST=$(for line in $(grep tac_plus /var/log/dh/$(hostname)/all.log \
6 | | grep 'shell login' | awk '{print $1, $2, $3, $6}')
7 | do
8 | DATE=$(date --date="$(echo $line | awk '{print $1, $2, $3}')" +'%s')
9 | HOST=$(echo $line | awk '{print $4}' | sed 's/://g')
10 | echo $DATE $HOST
11 | done | sort -k 2 | uniq -f 1 | sort -n)
12 |
13 | TIMESTAMP=$(date +'%s')
14 | echo "TIMESTAMP=$TIMESTAMP"
15 | for line in $LATEST
16 | do
17 | LAST=$(echo $line | awk '{print $1}')
18 | RAW_HOST=$(echo $line | awk '{print $2}')
19 | HOST=$(host $RAW_HOST)
20 | if [[ "$?" != "0" ]]; then
21 | logger -p warn "Unable to resolve raw host: '$RAW_HOST'"
22 | continue
23 | fi
24 |
25 | if host $RAW_HOST | grep 'domain name pointer' -q; then
26 | HOST=$(echo $HOST | awk '{print $NF}' | sed 's/\.$//')
27 | else
28 | HOST=$(echo $HOST | awk '{print $1}')
29 | fi
30 | echo "$HOST|tacacs.last-login|$(($TIMESTAMP - $LAST)) * 1000"
31 | done | dhmon-metric $TIMESTAMP
32 |
--------------------------------------------------------------------------------
/contrib/weathermap/generate.sh:
--------------------------------------------------------------------------------
1 | python weathermap_test.py | twopi -T svg > /var/www/test.svg
2 |
--------------------------------------------------------------------------------
/contrib/weathermap/weathermap.py:
--------------------------------------------------------------------------------
1 | #!/usr/env python
2 | import redis
3 |
4 | class NondirectionalEdge(object):
5 | def __init__(self, a, b, weight):
6 | self.a = a
7 | self.b = b
8 | self.weight = weight
9 |
10 | def __eq__(self, other):
11 | # TODO(bluecmd): we don't consider different weights here
12 | return (self.a == other.a and self.b == other.b) or (
13 | self.a == other.b and self.b == other.a)
14 |
15 | def __hash__(self):
16 | return hash(str(sorted([self.a, self.b])))
17 |
18 | class NetworkGrapher(object):
19 |
20 | def __init__(self, nodes, neighbor_func):
21 | self.nodes = nodes
22 | self._neighbor_func = neighbor_func
23 |
24 | def _find_edges(self, node):
25 | neighbors = set()
26 | for neighbor, interface, weight in self._neighbor_func(node):
27 | neighbors.add(neighbor)
28 | yield (node, neighbor, interface, weight)
29 | self.visited.add(node)
30 | for neighbor in filter(lambda x: x not in self.visited, neighbors):
31 | self._find_edges(neighbor)
32 |
33 | def build(self):
34 | self.visited = set()
35 | self.edges = set()
36 | for root in self.nodes:
37 | for (a, b, interface, weight) in self._find_edges(root):
38 | self.edges.add((a, b, interface, weight))
39 |
40 | if __name__ == '__main__':
41 | r = redis.StrictRedis(host='localhost', port=6379, db=1)
42 | # Find all nodes by looking for cdpCacheDeviceId
43 | nodes = [
44 | key.split(':')[0] for key in r.keys('*:1.3.6.1.4.1.9.9.23.1.2.1.1.6.*.1')]
45 | print '%d nodes found' % len(nodes)
46 |
47 | def resolver(node):
48 | return map(r.get, r.keys('%s:1.3.6.1.4.1.9.9.23.1.2.1.1.6.*.1' % node))
49 |
50 | grapher = NetworkGrapher(nodes, resolver)
51 | grapher.build()
52 | for edge in grapher.edges:
53 | print '%s -- %s' % (edge.a, edge.b)
54 |
--------------------------------------------------------------------------------
/contrib/weathermap/weathermap_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/env python
2 | import collections
3 | import weathermap
4 |
5 | _map = collections.defaultdict(list)
6 |
7 | def build_mock(access=True):
8 | for i in xrange(1, 19):
9 | _map['root:%s' % ('w' if i < 10 else 'e')].append(('dist%d' % i, 'eth%d.1' % i, 100))
10 | _map['root:%s' % ('e' if i < 10 else 'w')].append(('dist%d' % i, 'eth%d.2' % i, 100))
11 | _map['dist%d' % i] = [('dist%d' % (i - 1 if i%2 else i+1), 'eth0', 10)]
12 |
13 | if access:
14 | for i in xrange(1, 200):
15 | _map['access%d' % i] = [('dist%d' % (i % 20), 'gig0', 1)]
16 |
17 | def mock_resolver(node):
18 | for a, b in _map.iteritems():
19 | if a == node:
20 | for n in b:
21 | yield n
22 |
23 | def mock_all_nodes():
24 | for a, b in _map.iteritems():
25 | for n in b:
26 | yield n[0]
27 | yield a
28 |
29 | def format_node(node):
30 | if node.startswith('access'):
31 | return '', 'point', 'dodgerblue4'
32 | elif node.startswith('dist'):
33 | return node, 'hexagon', 'goldenrod'
34 | elif node.startswith('root'):
35 | return node, 'circle', 'forestgreen'
36 |
37 | if __name__ == '__main__':
38 | build_mock(access=True)
39 | nodes = set(mock_all_nodes())
40 | grapher = weathermap.NetworkGrapher(nodes, mock_resolver)
41 | grapher.build()
42 | print 'graph G {'
43 | print ' ranksep=3.4;'
44 | print ' ratio=auto;'
45 | print ' overlap=false;'
46 | print ' splines=true;'
47 | print ' splines=true;'
48 | print ' bgcolor=black;'
49 | for node in set([x.split(':')[0] for x in nodes]):
50 | (label, shape, color) = format_node(node)
51 | print '"%s" [ label="%s",shape="%s",style="filled",fillcolor="%s" ];' % (
52 | node, label, shape, color)
53 | for edge in grapher.edges:
54 | weight = edge[3]
55 | penwidth = 1
56 | if weight < 100:
57 | penwidth = 0.5
58 | elif weight < 10:
59 | penwidth = 0.1
60 | color = int(edge[1][4:]) % 9 + 1
61 | print '%s -- %s [colorscheme=rdylgn9, weight=%d, color=%s, penwidth=%f]' % (edge[0], edge[1], edge[3], color, penwidth)
62 | print '}'
63 |
--------------------------------------------------------------------------------
/debian/changelog:
--------------------------------------------------------------------------------
1 | dhmon (0.1-1) UNRELEASED; urgency=medium
2 |
3 | * Initial release.
4 |
5 | -- Christian Svensson Thu, 25 Jun 2015 22:51:54 +0200
6 |
--------------------------------------------------------------------------------
/debian/compat:
--------------------------------------------------------------------------------
1 | 9
2 |
--------------------------------------------------------------------------------
/debian/control:
--------------------------------------------------------------------------------
1 | Source: dhmon
2 | Maintainer: Christian Svensson
3 | Section: misc
4 | Priority: optional
5 | Standards-Version: 3.9.2
6 | Build-Depends: debhelper (>= 9), pypy, libsnmp-dev, python-dev,
7 | python-pika, python-yaml, python-mock
8 |
9 | Package: dhmon-common
10 | Architecture: any
11 | Depends: ${shlibs:Depends}, ${misc:Depends}
12 | Description: Common dependencies for dhmon
13 | Dependencies that all or almost all dhmon components share.
14 |
15 | Package: snmpcollector
16 | Architecture: any
17 | Depends: ${shlibs:Depends}, ${misc:Depends}, dhmon-common,
18 | python-netsnmp, python-pika (>=0.9.14), pypy
19 | Description: SNMP collector for dhmon
20 | SNMP collection for the DreamHack monitoring system.
21 |
22 | Package: pinger
23 | Architecture: any
24 | Depends: ${shlibs:Depends}, ${misc:Depends}, dhmon-common
25 | Description: RTT collector for dhmon
26 | RTT collection for the DreamHack monitoring system.
27 |
28 | #Package: analytics
29 | #Architecture: all
30 | #Depends: ${shlibs:Depends}, ${misc:Depends}, dhmon-common
31 | #Description: Analytics API server for dhmon
32 | # API backend for for the DreamHack monitoring system.
33 |
34 |
--------------------------------------------------------------------------------
/debian/dhmon-common.install:
--------------------------------------------------------------------------------
1 | usr/local/lib/python2.7/dist-packages/prometheus_client* usr/lib/python2.7/dist-packages/prometheus_client
2 | usr/local/lib/pypy2.7/dist-packages/prometheus_client* usr/lib/pypy/dist-packages/
3 |
--------------------------------------------------------------------------------
/debian/pinger.install:
--------------------------------------------------------------------------------
1 | opt/pinger/pingerd /usr/bin/
2 | usr/local/lib/python2.7/dist-packages/dhmonpinger* usr/lib/python2.7/dist-packages/
3 |
--------------------------------------------------------------------------------
/debian/rules:
--------------------------------------------------------------------------------
1 | #!/usr/bin/make -f
2 | %:
3 | dh $@
4 |
5 | override_dh_installinit:
6 | dh_installinit --onlyscripts
7 |
--------------------------------------------------------------------------------
/debian/snmpcollector.install:
--------------------------------------------------------------------------------
1 | opt/snmpcollector/src/* /usr/share/snmpcollector/
2 | etc/snmpcollector.yaml
3 | etc/default/snmpcollector
4 | etc/init.d/snmpcollector
5 | usr/local/lib/python2.7/dist-packages/mibresolver* usr/lib/python2.7/dist-packages/
6 | usr/local/lib/pypy2.7/dist-packages/yaml* usr/lib/pypy/dist-packages/
7 | usr/local/lib/pypy2.7/dist-packages/pika* usr/lib/pypy/dist-packages/
8 |
--------------------------------------------------------------------------------
/debian/snmpcollector.links:
--------------------------------------------------------------------------------
1 | /usr/share/snmpcollector/trigger.py /usr/bin/snmpcollector-trigger
2 | /usr/share/snmpcollector/snmptest.py /usr/bin/snmpcollector-test
3 |
--------------------------------------------------------------------------------
/debian/source/format:
--------------------------------------------------------------------------------
1 | 3.0 (quilt)
2 |
--------------------------------------------------------------------------------
/debian/source/include-binaries:
--------------------------------------------------------------------------------
1 | deps/pyyaml/tests/data/invalid-character.loader-error
2 | deps/pyyaml/tests/data/odd-utf16.stream-error
3 | deps/pyyaml/tests/data/spec-05-01-utf16be.data
4 | deps/pyyaml/tests/data/spec-05-01-utf16le.data
5 | deps/pyyaml/tests/data/spec-05-02-utf16be.data
6 | deps/pyyaml/tests/data/spec-05-02-utf16le.data
7 | deps/pyyaml/tests/data/utf16be.data
8 | deps/pyyaml/tests/data/utf16le.data
9 |
--------------------------------------------------------------------------------
/deps/Makefile:
--------------------------------------------------------------------------------
1 | clean:
2 | (cd pyyaml; pypy setup.py clean)
3 | (cd pika; pypy setup.py clean)
4 | (cd prometheus_client_python; pypy setup.py clean)
5 | (cd prometheus_client_python; python setup.py clean)
6 | rm -fr */build
7 | rm -fr */*.egg-info
8 |
9 | all:
10 | true
11 |
12 | install:
13 | mkdir -p $(DESTDIR)/usr/lib/pypy/
14 | (cd pyyaml; pypy setup.py install --root $(DESTDIR) $(COMPILE))
15 | (cd pika; pypy setup.py install --root $(DESTDIR) $(COMPILE))
16 | (cd prometheus_client_python; pypy setup.py install --root $(DESTDIR) $(COMPILE))
17 | (cd prometheus_client_python; python setup.py install --root $(DESTDIR) $(COMPILE))
18 |
--------------------------------------------------------------------------------
/src/Makefile:
--------------------------------------------------------------------------------
1 | %:
2 | make -C ${CURDIR}/analytics $@
3 | make -C ${CURDIR}/pinger $@
4 |
--------------------------------------------------------------------------------
/src/analytics/Makefile:
--------------------------------------------------------------------------------
1 | clean:
2 | true
3 | all:
4 | echo "TODO"
5 | test:
6 | true
7 | install:
8 | echo "TODO"
9 |
--------------------------------------------------------------------------------
/src/analytics/analytics.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import concurrent.futures
3 | import flask
4 | import functools
5 | import json
6 | import sqlite3
7 | import threading
8 | import time
9 | import urllib
10 | import urllib2
11 |
12 |
13 | DB_FILE = '/etc/ipplan.db'
14 | CACHE_TIME = 10
15 |
16 |
17 | class DataSource(object):
18 | def __init__(self, data, func):
19 | self.data = data
20 | self.func = func
21 |
22 |
23 | app = flask.Flask(__name__)
24 | data_sources = {}
25 |
26 |
27 | def analytics(t):
28 | def handler(func):
29 | data_sources[t] = DataSource("", func)
30 | @app.route(t)
31 | @functools.wraps(func)
32 | def wrapper():
33 | return data_sources[t].data
34 | return wrapper
35 | return handler
36 |
37 |
38 | def prometheus(query):
39 | host = 'http://localhost:9090'
40 | url = '{host}/prometheus/api/v1/query?query={query}&time={time}'
41 |
42 | o = urllib2.urlopen(url.format(
43 | query=urllib.quote(query), time=int(time.time()), host=host))
44 |
45 | return o.read()
46 |
47 |
48 | @analytics('/event.hosts')
49 | def event_hosts():
50 | conn = sqlite3.connect(DB_FILE)
51 | c = conn.cursor()
52 | c.execute('SELECT h.node_id, h.name, n.name '
53 | 'FROM host h, network n WHERE n.node_id = h.network_id')
54 |
55 | nodes = {}
56 | for node_id, node, network in c.fetchall():
57 | if not network.startswith('EVENT@'):
58 | continue
59 | c.execute('SELECT name, value FROM option WHERE node_id = ?', (node_id, ))
60 | options = {}
61 | for name, value in c:
62 | options[name] = value
63 | nodes[node] = {
64 | 'options': options
65 | }
66 | return json.dumps(nodes)
67 |
68 |
69 | @analytics('/ping.status')
70 | def ping_status():
71 | result = json.loads(prometheus('changes(icmp_rtt_seconds_sum[1m])'))
72 | ts = result['data']['result']
73 |
74 | nodes = {x['metric']['host']: 60-int(x['value'][1]) for x in ts}
75 | return json.dumps(nodes)
76 |
77 |
78 | @analytics('/mon.alerts')
79 | def mon_alerts():
80 | result = json.loads(prometheus(
81 | 'count(' +
82 | 'label_replace(ALERTS{host!=""}, "instance", "$1", "host", "(.*)") ' +
83 | 'or label_replace(ALERTS{instance!=""}, "instance", "$1", "instance", "(.*):[0-9]+")) ' +
84 | 'by (instance)'))
85 | ts = result['data']['result']
86 | nodes = {x['metric']['instance']: int(x['value'][1]) for x in ts}
87 | return json.dumps(nodes)
88 |
89 |
90 | @analytics('/snmp.saves')
91 | def snmp_saves():
92 | result = json.loads(prometheus('sum(count_over_time({__name__=~".+",instance!=""}[5m])) by (instance)'))
93 | ts = result['data']['result']
94 |
95 | nodes = {x['metric']['instance']: {'metrics': int(x['value'][1])} for x in ts}
96 | return json.dumps(nodes)
97 |
98 |
99 | @analytics('/snmp.errors')
100 | def snmp_errors():
101 | result = json.loads(prometheus(
102 | 'count(max_over_time(up{job=~"snmp.*",instance!=""}[5m]) == 0) by (instance)'))
103 | ts = result['data']['result']
104 |
105 | nodes = {x['metric']['instance']: {
106 | 'error': 'Timeout or Auth Error'} for x in ts}
107 | return json.dumps(nodes)
108 |
109 |
110 | @analytics('/syslog.status')
111 | def syslog_status():
112 | result = json.loads(prometheus('max_over_time(syslog_log_bytes[5m])'))
113 | ts = result['data']['result']
114 | nodes = {x['metric']['host']: {'size': int(x['value'][1])} for x in ts}
115 | return json.dumps(nodes)
116 |
117 |
118 | @analytics('/rancid.status')
119 | def rancid_status():
120 | result = json.loads(prometheus('max_over_time(rancid_config_bytes[5m])'))
121 | ts = result['data']['result']
122 | nodes = {x['metric']['host']: {'size': int(x['value'][1])} for x in ts}
123 | return json.dumps(nodes)
124 |
125 |
126 | @analytics('/dhcp.status')
127 | def dhcp_status():
128 | result = json.loads(prometheus('dhcp_leases_current_count'))
129 | dhcp_usage = result['data']['result']
130 | result = json.loads(prometheus('dhcp_leases_max_count'))
131 | dhcp_max = {
132 | x['metric']['network']: x['value'][1]
133 | for x in result['data']['result']}
134 |
135 | networks = {}
136 | for data in dhcp_usage:
137 | domain, network = data['metric']['network'].split('@', 2)
138 | vlan = data['metric']['vlan']
139 | networks[network] = {
140 | 'domain': domain,
141 | 'vlan': vlan,
142 | 'usage': data['value'][1],
143 | 'max': dhcp_max[data['metric']['network']]
144 | }
145 | return json.dumps(networks)
146 |
147 |
148 | @analytics('/switch.version')
149 | def switch_version():
150 | return "{}"
151 |
152 |
153 | def interface_variable(variable, key, bool_value=None, func=None, time=''):
154 | query = variable + '{instance!="",layer="access"}' + time
155 | if func:
156 | query = '%s(%s)' % (func, query)
157 | result = json.loads(prometheus(query))
158 | ts = result['data']['result']
159 | nodes = collections.defaultdict(lambda: collections.defaultdict(dict))
160 | for data in ts:
161 | try:
162 | host = data['metric']['instance']
163 | iface = data['metric']['interface']
164 | if 'enum' in data['metric']:
165 | value = data['metric']['enum']
166 | else:
167 | value = data['value'][1]
168 | if bool_value is not None:
169 | value = (bool_value == value)
170 | nodes[host][iface][key] = value
171 | nodes[host][iface]['lastoid'] = data['metric']['index']
172 | except KeyError:
173 | # Ignore incomplete data
174 | continue
175 | return dict(nodes)
176 |
177 |
178 | @analytics('/switch.interfaces')
179 | def switch_interfaces():
180 | nodes = collections.defaultdict(lambda: collections.defaultdict(dict))
181 | variables = (
182 | ('ifOperStatus', 'status'),
183 | ('vlanTrunkPortDynamicStatus', 'trunk', 'trunking'),
184 | ('ifOutErrors', 'errors_out', None, 'rate', '[10m]'),
185 | ('ifInErrors', 'errors_in', None, 'rate', '[10m]'),
186 | ('ifAdminStatus', 'admin'),
187 | ('ifHighSpeed', 'speed'),
188 | ('dot1dStpPortState', 'stp'))
189 |
190 | results = []
191 | with concurrent.futures.ThreadPoolExecutor(max_workers=10) as e:
192 | for variables in e.map(lambda x: interface_variable(*x), variables):
193 | results.append(variables)
194 |
195 | for result in results:
196 | for node, ifaces in result.iteritems():
197 | for iface, props in ifaces.iteritems():
198 | nodes[node][iface].update(props)
199 | return json.dumps(nodes)
200 |
201 |
202 | @analytics('/switch.vlans')
203 | def switch_vlans():
204 | result = json.loads(prometheus('changes(vtpVlanState{instance!=""}[5m])'))
205 | ts = result['data']['result']
206 |
207 | nodes = collections.defaultdict(dict)
208 | for data in ts:
209 | host = data['metric']['instance']
210 | vlan = data['metric']['index'].split('.', 1)[1]
211 | nodes[host][vlan] = 1
212 | return json.dumps(nodes)
213 |
214 |
215 | @analytics('/switch.model')
216 | def switch_model():
217 | result = json.loads(prometheus(
218 | 'changes(entPhysicalModelName{instance!="",index="1"}[5m])'))
219 | ts = result['data']['result']
220 |
221 | nodes = {x['metric']['instance']: {'model': x['metric']['value']} for x in ts}
222 | return json.dumps(nodes)
223 |
224 |
225 | def fetch(sources):
226 | while True:
227 | for source in sources:
228 | sources[source].data = sources[source].func()
229 | time.sleep(CACHE_TIME)
230 |
231 |
232 | if __name__ == '__main__':
233 | fetch_thread = threading.Thread(target=fetch,args=(data_sources,))
234 | fetch_thread.daemon = True
235 | fetch_thread.start()
236 | # The background thread will be multiplied with the number of flask
237 | # threads, so keep just one thread for serving. The data is cached anyway
238 | # so it should be fast.
239 | app.run(debug=True, threaded=False, port=5000)
240 |
241 |
--------------------------------------------------------------------------------
/src/pinger/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 |
--------------------------------------------------------------------------------
/src/pinger/Makefile:
--------------------------------------------------------------------------------
1 | clean:
2 | rm -fr build/
3 |
4 | all:
5 | true
6 |
7 | test:
8 | true
9 |
10 | install:
11 | python setup.py install --root $(DESTDIR) $(COMPILE)
12 | mkdir -p $(DESTDIR)/opt/pinger/src/
13 | cp pingerd $(DESTDIR)/opt/pinger/
14 |
--------------------------------------------------------------------------------
/src/pinger/dhmon-pingerd.init:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | ### BEGIN INIT INFO
4 | # Provides: dhmon-pingerd
5 | # Required-Start: $network
6 | # Required-Stop: $network
7 | # Default-Start: 2 3 4 5
8 | # Default-Stop:
9 | # Short-Description: Start dhmon pingerd
10 | ### END INIT INFO
11 |
12 | PATH=/sbin:/bin:/usr/sbin:/usr/bin
13 |
14 | . /lib/lsb/init-functions
15 |
16 | DHMON=/srv/dhmon
17 | DHMONRUN=/var/run/dhmon
18 | test -d $DHMON || exit 5
19 |
20 | if [ -r /etc/default/dhmon ]; then
21 | . /etc/default/dhmon
22 | fi
23 |
24 | case $1 in
25 | start)
26 | log_daemon_msg "Starting dhmon pingerd"
27 | $DHMON/src/pinger/pingerd
28 | log_end_msg $?
29 | ;;
30 | stop)
31 | log_daemon_msg "Stopping dhmon pingerd"
32 | pkill -u nobody -f "$DHMON/src/pinger/pingerd"
33 | log_end_msg $?
34 | ;;
35 | restart|force-reload)
36 | $0 stop && sleep 2 && $0 start
37 | ;;
38 | try-restart)
39 | if $0 status >/dev/null; then
40 | $0 restart
41 | else
42 | exit 0
43 | fi
44 | ;;
45 | reload)
46 | exit 3
47 | ;;
48 | status)
49 | pgrep -u nobody -f $DHMON/src/pinger/pingerd -c > /dev/null
50 | if [ "$?" = "0" ]; then
51 | log_success_msg "dhmon pingerd is running"
52 | else
53 | log_failure_msg "dhmon pingerd is not running"
54 | fi
55 | ;;
56 | *)
57 | echo "Usage: $0 {start|stop|restart|try-restart|force-reload|status}"
58 | exit 2
59 | ;;
60 | esac
61 |
--------------------------------------------------------------------------------
/src/pinger/dhmonpinger.c:
--------------------------------------------------------------------------------
1 | /*
2 | *Stateless pinger for dhmon.
3 | */
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 |
14 | #include
15 |
16 | /* The ICMP checksum is calculated statically and just set as a constant. */
17 | #define PINGER_ICMP_CHKSUM 0xf7ff
18 | #define PINGER_TTL 128
19 | #define PINGER_CONTROL_SIZE 512
20 | #define PINGER_MAGIC 0xc001c0de
21 | #define PINGER_RCVBUF 3670016
22 |
23 | typedef struct __attribute__ ((__packed__)) {
24 | struct iphdr ip;
25 | struct icmphdr icmp;
26 | struct __attribute__ ((__packed__)) {
27 | uint64_t user;
28 | uint32_t tv_sec;
29 | uint32_t tv_usec;
30 | uint32_t magic;
31 | } payload;
32 | } icmp_t;
33 |
34 |
35 | /* len must be divisible by 2 */
36 | static uint16_t in_cksum(void *ptr, size_t len)
37 | {
38 | uint16_t *u16_ptr = (uint16_t*)ptr;
39 | uint64_t sum = 0;
40 |
41 | while (len > 0) {
42 | sum += *u16_ptr++;
43 | len -= 2;
44 | }
45 |
46 | sum = (sum >> 16) + (sum & 0xffff);
47 | sum += (sum >> 16);
48 | return ~sum;
49 | }
50 |
51 |
52 | static PyObject *transmit(PyObject *self, PyObject *args) {
53 | icmp_t packet;
54 | struct sockaddr_in targetaddr;
55 | struct timeval timestamp;
56 | uint64_t user;
57 | size_t sent;
58 | int sockfd;
59 | const char *textaddr;
60 |
61 | memset(&packet, 0, sizeof(packet));
62 | memset(&targetaddr, 0, sizeof(targetaddr));
63 |
64 | packet.ip.version = 4;
65 | packet.ip.ihl = sizeof(packet.ip) / 4;
66 | packet.ip.tot_len = htons(sizeof(packet));
67 | packet.ip.protocol = IPPROTO_ICMP;
68 | packet.ip.ttl = PINGER_TTL;
69 |
70 | packet.icmp.type = ICMP_ECHO;
71 | packet.icmp.checksum = htons(PINGER_ICMP_CHKSUM);
72 | packet.payload.magic = htonl(PINGER_MAGIC);
73 |
74 | if (!PyArg_ParseTuple(args, "isi", &sockfd, &textaddr, &user)) {
75 | return NULL;
76 | }
77 |
78 | if (inet_pton(AF_INET, textaddr, &targetaddr.sin_addr) != 1)
79 | return PyErr_Format(PyExc_IOError, "inet_pton failed for IP");
80 |
81 | targetaddr.sin_family = AF_INET;
82 | packet.ip.daddr = targetaddr.sin_addr.s_addr;
83 |
84 | /* payload is magic + sent timestamp */
85 | if (gettimeofday(×tamp, NULL) < 0)
86 | return PyErr_SetFromErrno(PyExc_OSError);
87 |
88 | packet.payload.user = user;
89 | packet.payload.tv_sec = timestamp.tv_sec;
90 | packet.payload.tv_usec = timestamp.tv_usec;
91 | packet.icmp.checksum = 0;
92 | packet.icmp.checksum = in_cksum(
93 | &packet.icmp, sizeof(packet.icmp) + sizeof(packet.payload));
94 |
95 | sent = sendto(
96 | sockfd, (void*) &packet, sizeof(packet), 0,
97 | (struct sockaddr*) &targetaddr, sizeof(targetaddr));
98 | if (sent < 0) {
99 | return PyErr_SetFromErrno(PyExc_IOError);
100 | }
101 |
102 | Py_RETURN_NONE;
103 | }
104 |
105 |
106 | static PyObject *receive(PyObject *self, PyObject *args) {
107 | char ip[16];
108 | uint8_t control[PINGER_CONTROL_SIZE];
109 | icmp_t packet;
110 | struct msghdr msg;
111 | struct cmsghdr *cmsg;
112 | struct iovec entry;
113 | struct sockaddr_in from_addr;
114 | struct timeval *stamp;
115 | int res;
116 | int sockfd;
117 |
118 | if (!PyArg_ParseTuple(args, "i", &sockfd)) {
119 | return NULL;
120 | }
121 |
122 | for(;;) {
123 | uint64_t user;
124 | int secs;
125 | int usecs;
126 |
127 | memset(&msg, 0, sizeof(msg));
128 | msg.msg_iov = &entry;
129 | msg.msg_iovlen = 1;
130 | entry.iov_base = &packet;
131 | entry.iov_len = sizeof(packet);
132 | msg.msg_name = (caddr_t)&from_addr;
133 | msg.msg_namelen = sizeof(from_addr);
134 | msg.msg_control = control;
135 | msg.msg_controllen = PINGER_CONTROL_SIZE;
136 |
137 | Py_BEGIN_ALLOW_THREADS;
138 | res = recvmsg(sockfd, &msg, 0);
139 | if (res < 0) {
140 | return PyErr_SetFromErrno(PyExc_IOError);
141 | }
142 | Py_END_ALLOW_THREADS;
143 |
144 | /* Check that we actually sent this packet. */
145 | if (res != sizeof(packet)) {
146 | continue;
147 | }
148 | if (packet.payload.magic != htonl(PINGER_MAGIC)) {
149 | continue;
150 | }
151 |
152 | stamp = NULL;
153 | for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
154 |
155 | if (cmsg->cmsg_level != SOL_SOCKET)
156 | continue;
157 | if (cmsg->cmsg_type != SO_TIMESTAMP)
158 | continue;
159 |
160 | stamp = (struct timeval *)CMSG_DATA(cmsg);
161 | }
162 |
163 | if (!stamp) {
164 | fprintf(stderr, "No timestamp provided by the kernel\n");
165 | continue;
166 | }
167 |
168 | user = packet.payload.user;
169 | secs = stamp->tv_sec - packet.payload.tv_sec;
170 | usecs = stamp->tv_usec - packet.payload.tv_usec;
171 | if (usecs < 0) {
172 | secs--;
173 | usecs = (1000000 + stamp->tv_usec) - packet.payload.tv_usec;
174 | }
175 |
176 | if (inet_ntop(AF_INET, &from_addr.sin_addr, ip, sizeof(ip)) == NULL)
177 | return PyErr_Format(PyExc_IOError, "inet_ntop failed for IP");
178 |
179 | return Py_BuildValue("siii", ip, user, secs, usecs);
180 | }
181 | }
182 |
183 |
184 | static PyObject *create_socket(PyObject *self, PyObject *unused_args) {
185 | struct icmp_filter filt;
186 | int enable = 1;
187 | int rcvbuf = PINGER_RCVBUF;
188 | int sockfd;
189 |
190 | if ((sockfd = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP)) < 0)
191 | return PyErr_SetFromErrno(PyExc_IOError);
192 |
193 | if (setsockopt(
194 | sockfd, SOL_SOCKET, SO_TIMESTAMP, &enable, sizeof(enable)) < 0)
195 | return PyErr_SetFromErrno(PyExc_IOError);
196 |
197 | /* This requires changing net.core.rmem_max, but is needed to not drop
198 | * ICMP replies. */
199 | if (setsockopt(
200 | sockfd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) < 0)
201 | return PyErr_SetFromErrno(PyExc_IOError);
202 |
203 | if (setsockopt(
204 | sockfd, IPPROTO_IP, IP_HDRINCL, &enable, sizeof(enable)) < 0)
205 | return PyErr_SetFromErrno(PyExc_IOError);
206 |
207 | filt.data = ~(1<