├── .bumpversion.cfg ├── .gitignore ├── .pylintrc ├── .travis.yml ├── .version ├── CHANGELOG.md ├── DESCRIPTION ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── Rakefile ├── beacon.jpg ├── beacon.png ├── debian ├── after_install.sh ├── before_install.sh ├── before_remove.sh ├── config.json ├── graphite_beacon.init ├── systemd.service └── upstart.conf ├── docker ├── exim4 ├── supervisor.conf └── update-exim4.conf.conf ├── examples ├── example-config.json ├── example-config.yml └── extended │ └── gmail_handler.json ├── graphite_beacon ├── __init__.py ├── _compat.py ├── alerts.py ├── app.py ├── core.py ├── graphite.py ├── handlers │ ├── __init__.py │ ├── cli.py │ ├── hipchat.py │ ├── http.py │ ├── log.py │ ├── opsgenie.py │ ├── pagerduty.py │ ├── slack.py │ ├── smtp.py │ ├── telegram.py │ └── victorops.py ├── template.py ├── templates │ ├── base.html │ ├── common │ │ ├── message.html │ │ ├── message.txt │ │ └── short.txt │ ├── graphite │ │ ├── message.html │ │ ├── message.txt │ │ ├── short.txt │ │ └── slack.txt │ └── url │ │ ├── message.html │ │ ├── message.txt │ │ └── short.txt ├── units.py └── utils.py ├── pytest.ini ├── requirements.txt ├── setup.cfg ├── setup.py ├── test-requirements.txt ├── tests ├── __init__.py ├── integration │ ├── __init__.py │ ├── graphite_test.py │ └── url_test.py ├── unit │ ├── __init__.py │ ├── alerts_test.py │ ├── conftest.py │ ├── core_test.py │ ├── graphite_test.py │ ├── handlers │ │ └── smtp_test.py │ ├── units_test.py │ └── utils_test.py └── util.py └── tox.ini /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | commit = True 3 | current_version = 0.27.2 4 | files = graphite_beacon/__init__.py 5 | tag = True 6 | tag_name = {new_version} 7 | 8 | [bumpversion:file:.version] 9 | 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | 43 | # Translations 44 | *.mo 45 | *.pot 46 | 47 | # Django stuff: 48 | *.log 49 | 50 | # Sphinx documentation 51 | docs/_build/ 52 | 53 | # PyBuilder 54 | target/ 55 | 56 | /todo.txt 57 | /pid 58 | /*.deb 59 | /bintray 60 | /local.json 61 | 62 | # Vim backup files 63 | *~ 64 | 65 | /.env 66 | /.ropeproject 67 | /.tox 68 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | # Add files or directories matching the regex patterns to the blacklist. The 4 | # regex matches against base names, not paths. 5 | ignore-patterns= 6 | 7 | # Pickle collected data for later comparisons. 8 | persistent=yes 9 | 10 | # List of plugins (as comma separated values of python modules names) to load, 11 | # usually to register additional checkers. 12 | load-plugins= 13 | 14 | # Use multiple processes to speed up Pylint. 15 | jobs=1 16 | 17 | # Allow loading of arbitrary C extensions. Extensions are imported into the 18 | # active Python interpreter and may run arbitrary code. 19 | unsafe-load-any-extension=no 20 | 21 | # A comma-separated list of package or module names from where C extensions may 22 | # be loaded. Extensions are loading into the active Python interpreter and may 23 | # run arbitrary code 24 | extension-pkg-whitelist= 25 | 26 | # Allow optimization of some AST trees. This will activate a peephole AST 27 | # optimizer, which will apply various small optimizations. For instance, it can 28 | # be used to obtain the result of joining multiple strings with the addition 29 | # operator. Joining a lot of strings can lead to a maximum recursion error in 30 | # Pylint and this flag can prevent that. It has one side effect, the resulting 31 | # AST will be different than the one from reality. This option is deprecated 32 | # and it will be removed in Pylint 2.0. 33 | optimize-ast=no 34 | 35 | 36 | [MESSAGES CONTROL] 37 | 38 | # Only show warnings with the listed confidence levels. Leave empty to show 39 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED 40 | confidence= 41 | 42 | # Enable the message, report, category or checker with the given id(s). You can 43 | # either give multiple identifier separated by comma (,) or put this option 44 | # multiple time (only on the command line, not in the configuration file where 45 | # it should appear only once). See also the "--disable" option for examples. 46 | #enable= 47 | 48 | # Disable the message, report, category or checker with the given id(s). You 49 | # can either give multiple identifiers separated by comma (,) or put this 50 | # option multiple times (only on the command line, not in the configuration 51 | # file where it should appear only once).You can also use "--disable=all" to 52 | # disable everything first and then reenable specific checks. For example, if 53 | # you want to run only the similarities checker, you can use "--disable=all 54 | # --enable=similarities". If you want to run only the classes checker, but have 55 | # no Warning level messages displayed, use"--disable=all --enable=classes 56 | # --disable=W" 57 | disable=import-star-module-level,old-octal-literal,oct-method,print-statement,unpacking-in-except,parameter-unpacking,backtick,old-raise-syntax,old-ne-operator,long-suffix,dict-view-method,dict-iter-method,metaclass-assignment,next-method-called,raw_input-builtin,long-builtin,file-builtin,execfile-builtin,coerce-builtin,cmp-builtin,buffer-builtin,basestring-builtin,apply-builtin,filter-builtin-not-iterating,using-cmp-argument,useless-suppression,range-builtin-not-iterating,suppressed-message,no-absolute-import,old-division,cmp-method,reload-builtin,zip-builtin-not-iterating,intern-builtin,unichr-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,input-builtin,round-builtin,hex-method,nonzero-method,map-builtin-not-iterating,missing-docstring,too-many-arguments,too-many-instance-attributes,broad-except,fixme,logging-not-lazy,duplicate-code,locally-disabled,file-ignored,attribute-defined-outside-init,logging-format-interpolation 58 | 59 | 60 | [REPORTS] 61 | 62 | # Set the output format. Available formats are text, parseable, colorized, msvs 63 | # (visual studio) and html. You can also give a reporter class, eg 64 | # mypackage.mymodule.MyReporterClass. 65 | output-format=text 66 | 67 | # Put messages in a separate file for each module / package specified on the 68 | # command line instead of printing them on stdout. Reports (if any) will be 69 | # written in a file name "pylint_global.[txt|html]". This option is deprecated 70 | # and it will be removed in Pylint 2.0. 71 | files-output=no 72 | 73 | # Tells whether to display a full report or only the messages 74 | reports=yes 75 | 76 | # Python expression which should return a note less than 10 (10 is the highest 77 | # note). You have access to the variables errors warning, statement which 78 | # respectively contain the number of errors / warnings messages and the total 79 | # number of statements analyzed. This is used by the global evaluation report 80 | # (RP0004). 81 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 82 | 83 | # Template used to display messages. This is a python new-style format string 84 | # used to format the message information. See doc for all details 85 | #msg-template= 86 | 87 | 88 | [BASIC] 89 | 90 | # Good variable names which should always be accepted, separated by a comma 91 | good-names=i,j,k,e,ex,Run,_ 92 | 93 | # Bad variable names which should always be refused, separated by a comma 94 | bad-names=foo,bar,baz,toto,tutu,tata 95 | 96 | # Colon-delimited sets of names that determine each other's naming style when 97 | # the name regexes allow several styles. 98 | name-group= 99 | 100 | # Include a hint for the correct naming format with invalid-name 101 | include-naming-hint=no 102 | 103 | # List of decorators that produce properties, such as abc.abstractproperty. Add 104 | # to this list to register other decorators that produce valid properties. 105 | property-classes=abc.abstractproperty 106 | 107 | # Regular expression matching correct function names 108 | function-rgx=[a-z_][a-z0-9_]{2,30}$ 109 | 110 | # Naming hint for function names 111 | function-name-hint=[a-z_][a-z0-9_]{2,30}$ 112 | 113 | # Regular expression matching correct variable names 114 | variable-rgx=[a-z_][a-z0-9_]{2,30}$ 115 | 116 | # Naming hint for variable names 117 | variable-name-hint=[a-z_][a-z0-9_]{2,30}$ 118 | 119 | # Regular expression matching correct constant names 120 | const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ 121 | 122 | # Naming hint for constant names 123 | const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$ 124 | 125 | # Regular expression matching correct attribute names 126 | attr-rgx=[a-z_][a-z0-9_]{2,30}$ 127 | 128 | # Naming hint for attribute names 129 | attr-name-hint=[a-z_][a-z0-9_]{2,30}$ 130 | 131 | # Regular expression matching correct argument names 132 | argument-rgx=[a-z_][a-z0-9_]{2,30}$ 133 | 134 | # Naming hint for argument names 135 | argument-name-hint=[a-z_][a-z0-9_]{2,30}$ 136 | 137 | # Regular expression matching correct class attribute names 138 | class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ 139 | 140 | # Naming hint for class attribute names 141 | class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ 142 | 143 | # Regular expression matching correct inline iteration names 144 | inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ 145 | 146 | # Naming hint for inline iteration names 147 | inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ 148 | 149 | # Regular expression matching correct class names 150 | class-rgx=[A-Z_][a-zA-Z0-9]+$ 151 | 152 | # Naming hint for class names 153 | class-name-hint=[A-Z_][a-zA-Z0-9]+$ 154 | 155 | # Regular expression matching correct module names 156 | module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 157 | 158 | # Naming hint for module names 159 | module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 160 | 161 | # Regular expression matching correct method names 162 | method-rgx=[a-z_][a-z0-9_]{2,30}$ 163 | 164 | # Naming hint for method names 165 | method-name-hint=[a-z_][a-z0-9_]{2,30}$ 166 | 167 | # Regular expression which should only match function or class names that do 168 | # not require a docstring. 169 | no-docstring-rgx=^_ 170 | 171 | # Minimum line length for functions/classes that require docstrings, shorter 172 | # ones are exempt. 173 | docstring-min-length=-1 174 | 175 | 176 | [ELIF] 177 | 178 | # Maximum number of nested blocks for function / method body 179 | max-nested-blocks=5 180 | 181 | 182 | [FORMAT] 183 | 184 | # Maximum number of characters on a single line. 185 | max-line-length=100 186 | 187 | # Regexp for a line that is allowed to be longer than the limit. 188 | ignore-long-lines=^\s*(# )??$ 189 | 190 | # Allow the body of an if to be on the same line as the test if there is no 191 | # else. 192 | single-line-if-stmt=no 193 | 194 | # List of optional constructs for which whitespace checking is disabled. `dict- 195 | # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. 196 | # `trailing-comma` allows a space between comma and closing bracket: (a, ). 197 | # `empty-line` allows space-only lines. 198 | no-space-check=trailing-comma,dict-separator 199 | 200 | # Maximum number of lines in a module 201 | max-module-lines=1000 202 | 203 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 204 | # tab). 205 | indent-string=' ' 206 | 207 | # Number of spaces of indent required inside a hanging or continued line. 208 | indent-after-paren=4 209 | 210 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 211 | expected-line-ending-format= 212 | 213 | [SIMILARITIES] 214 | 215 | # Minimum lines number of a similarity. 216 | min-similarity-lines=4 217 | 218 | # Ignore comments when computing similarities. 219 | ignore-comments=yes 220 | 221 | # Ignore docstrings when computing similarities. 222 | ignore-docstrings=yes 223 | 224 | # Ignore imports when computing similarities. 225 | ignore-imports=no 226 | 227 | 228 | [SPELLING] 229 | 230 | # Spelling dictionary name. Available dictionaries: none. To make it working 231 | # install python-enchant package. 232 | spelling-dict= 233 | 234 | # List of comma separated words that should not be checked. 235 | spelling-ignore-words= 236 | 237 | # A path to a file that contains private dictionary; one word per line. 238 | spelling-private-dict-file= 239 | 240 | # Tells whether to store unknown words to indicated private dictionary in 241 | # --spelling-private-dict-file option instead of raising a message. 242 | spelling-store-unknown-words=no 243 | 244 | 245 | [TYPECHECK] 246 | 247 | # Tells whether missing members accessed in mixin class should be ignored. A 248 | # mixin class is detected if its name ends with "mixin" (case insensitive). 249 | ignore-mixin-members=yes 250 | 251 | # List of module names for which member attributes should not be checked 252 | # (useful for modules/projects where namespaces are manipulated during runtime 253 | # and thus existing member attributes cannot be deduced by static analysis. It 254 | # supports qualified module names, as well as Unix pattern matching. 255 | ignored-modules= 256 | 257 | # List of class names for which member attributes should not be checked (useful 258 | # for classes with dynamically set attributes). This supports the use of 259 | # qualified names. 260 | ignored-classes=optparse.Values,thread._local,_thread._local 261 | 262 | # List of members which are set dynamically and missed by pylint inference 263 | # system, and so shouldn't trigger E1101 when accessed. Python regular 264 | # expressions are accepted. 265 | generated-members= 266 | 267 | # List of decorators that produce context managers, such as 268 | # contextlib.contextmanager. Add to this list to register other decorators that 269 | # produce valid context managers. 270 | contextmanager-decorators=contextlib.contextmanager 271 | 272 | 273 | [VARIABLES] 274 | 275 | # Tells whether we should check for unused import in __init__ files. 276 | init-import=no 277 | 278 | # A regular expression matching the name of dummy variables (i.e. expectedly 279 | # not used). 280 | dummy-variables-rgx=(_+[a-zA-Z0-9]*?$)|dummy 281 | 282 | # List of additional names supposed to be defined in builtins. Remember that 283 | # you should avoid to define new builtins when possible. 284 | additional-builtins= 285 | 286 | # List of strings which can identify a callback function by name. A callback 287 | # name must start or end with one of those strings. 288 | callbacks=cb_,_cb 289 | 290 | # List of qualified module names which can have objects that can redefine 291 | # builtins. 292 | redefining-builtins-modules=six.moves,future.builtins 293 | 294 | 295 | [CLASSES] 296 | 297 | # List of method names used to declare (i.e. assign) instance attributes. 298 | defining-attr-methods=__init__,__new__,setUp 299 | 300 | # List of valid names for the first argument in a class method. 301 | valid-classmethod-first-arg=cls 302 | 303 | # List of valid names for the first argument in a metaclass class method. 304 | valid-metaclass-classmethod-first-arg=mcs 305 | 306 | # List of member names, which should be excluded from the protected access 307 | # warning. 308 | exclude-protected=_asdict,_fields,_replace,_source,_make 309 | 310 | 311 | [DESIGN] 312 | 313 | # Maximum number of arguments for function / method 314 | max-args=5 315 | 316 | # Argument names that match this expression will be ignored. Default to name 317 | # with leading underscore 318 | ignored-argument-names=_.* 319 | 320 | # Maximum number of locals for function / method body 321 | max-locals=15 322 | 323 | # Maximum number of return / yield for function / method body 324 | max-returns=6 325 | 326 | # Maximum number of branch for function / method body 327 | max-branches=12 328 | 329 | # Maximum number of statements in function / method body 330 | max-statements=50 331 | 332 | # Maximum number of parents for a class (see R0901). 333 | max-parents=7 334 | 335 | # Maximum number of attributes for a class (see R0902). 336 | max-attributes=7 337 | 338 | # Minimum number of public methods for a class (see R0903). 339 | min-public-methods=2 340 | 341 | # Maximum number of public methods for a class (see R0904). 342 | max-public-methods=20 343 | 344 | # Maximum number of boolean expressions in a if statement 345 | max-bool-expr=5 346 | 347 | 348 | [IMPORTS] 349 | 350 | # Deprecated modules which should not be used, separated by a comma 351 | deprecated-modules=regsub,TERMIOS,Bastion,rexec 352 | 353 | # Create a graph of every (i.e. internal and external) dependencies in the 354 | # given file (report RP0402 must not be disabled) 355 | import-graph= 356 | 357 | # Create a graph of external dependencies in the given file (report RP0402 must 358 | # not be disabled) 359 | ext-import-graph= 360 | 361 | # Create a graph of internal dependencies in the given file (report RP0402 must 362 | # not be disabled) 363 | int-import-graph= 364 | 365 | # Force import order to recognize a module as part of the standard 366 | # compatibility libraries. 367 | known-standard-library= 368 | 369 | # Force import order to recognize a module as part of a third party library. 370 | known-third-party=enchant 371 | 372 | # Analyse import fallback blocks. This can be used to support both Python 2 and 373 | # 3 compatible code, which means that the block might have code that exists 374 | # only in one or another interpreter, leading to false positives when analysed. 375 | analyse-fallback-blocks=no 376 | 377 | 378 | [EXCEPTIONS] 379 | 380 | # Exceptions that will emit a warning when being caught. Defaults to 381 | # "Exception" 382 | overgeneral-exceptions=Exception 383 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 2.7 4 | 5 | env: 6 | - TOXENV=py27 7 | - TOXENV=py34 8 | - TOXENV=cov 9 | - TOXENV=pylint 10 | - TOXENV=pep8 11 | 12 | branches: 13 | only: 14 | - master 15 | - develop 16 | 17 | install: pip install --quiet tox 18 | 19 | script: tox 20 | 21 | after_script: 22 | - if [ $TOXENV == "cov" ]; then 23 | pip install --quiet coveralls; 24 | coveralls; 25 | fi 26 | -------------------------------------------------------------------------------- /.version: -------------------------------------------------------------------------------- 1 | 0.27.2 2 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.27.0 2 | 3 | - Upgrade tornado and drop Python 2.6 support (#144) 4 | - Improvement: Fail-fast with better error messages on fatal errors (e.g no config file) 5 | - Bug fix: Add default backwards-compatible config.json for Docker (#134) 6 | - Bug fix: Include pyyaml by default and use YAML config loader for `.yaml` files (#143) 7 | - Bug fix: Ensure `time_window` is honored when `until` is non-zero. Previously, the value 8 | of `until` would reduce that of `time_window`. (#147) 9 | 10 | ## 0.26.0 11 | 12 | - Improvement: use `validate_cert` for URL alerts (#111) 13 | - Improvement: better incident key and `client_url` for pagerduty handler (#109) 14 | - Improvement/bug fixes: enable persistence for the telegram handler and various bug fixes (#130) 15 | 16 | ## 0.25.4 17 | 18 | - Bug fix: don't crash due to lack of SIGHUP on Windows (#94) 19 | - Bug fix: access dict correctly in hipchat handler (#96) 20 | - Improvement: Allow slack notifications to users (#100) 21 | 22 | ## 0.25.3 23 | 24 | - Added 'minimum' and 'maximum' methods 25 | - Allow alerts to be defined in multiple config files 26 | 27 | ## 0.25.1 28 | 29 | - Fix issue #46; 30 | - Support `until` option for Graphite queries; 31 | - Customize alert behaviour with no data; 32 | - Enhance expressions (support AND/OR); 33 | - Added VictorOps handler; 34 | - Better Slack notifications; 35 | - Added `public_graphite_url` option; 36 | 37 | ## 0.24.0 38 | 39 | - Support YAML in config files. 40 | You should have install `yaml` and use `.yml` as config files. 41 | 42 | ## 0.23.0 43 | 44 | - Support systemd 45 | - Update CLI handler 46 | - Add PagerDuty handler 47 | 48 | ## 0.20.0 49 | 50 | - Add Slack (https://slack.com) handler 51 | - Add `request_timeout` alerts' option 52 | - Change history_size format: 144 -> 1day 53 | 54 | ## 0.18.0 55 | 56 | - Python 2.6 support 57 | 58 | ## 0.14.0 59 | 60 | - Add `smtp.graphite_url` option for set graphite_url in emails 61 | - Add `send_initial` option for send initial values when graphite-beacon starts 62 | - Update HTML email templates 63 | 64 | ## 0.12.0 65 | 66 | - Change format of handlers options 67 | 68 | ## 0.11.0 69 | 70 | - Fix release 0.9.0 71 | 72 | ## 0.9.0 73 | 74 | - Update units system 75 | - Support `include` 76 | - Easiest rules format 77 | 78 | ## 0.6.1 79 | 80 | - Support units format (bytes, s, ms, short, percent) 81 | - HTML emails 82 | - Added `repeat_interval` 83 | 84 | ## 0.4.0 85 | 86 | - Support URL alerts (load http response and check status) 87 | 88 | ## 0.2.0 89 | 90 | - First public release 91 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Simple alerting system for Graphite metrics. 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | From debian 2 | MAINTAINER docker@deliverous.com 3 | ENV DEBIAN_FRONTEND noninteractive 4 | RUN apt-get update && apt-get -y dist-upgrade && apt-get install -y python-pip python-dev supervisor exim4 && apt-get clean 5 | RUN pip install graphite-beacon 6 | RUN pip install supervisor-stdout 7 | 8 | # Supervisord 9 | ADD docker/supervisor.conf /etc/supervisor/conf.d/deliverous.conf 10 | 11 | # Conf Exim 12 | ADD docker/update-exim4.conf.conf /etc/exim4/update-exim4.conf.conf 13 | ADD docker/exim4 /etc/default/exim4 14 | 15 | # Add a default /config.json for backward compatibility 16 | RUN echo '{ "include":["/srv/alerting/etc/config.json"] }' > /config.json 17 | 18 | CMD ["/usr/bin/supervisord"] 19 | 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Kirill Klenov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include MANIFEST.in 3 | include README.rst 4 | include requirements.txt 5 | include test-requirements.txt 6 | 7 | recursive-include graphite_beacon * 8 | 9 | recursive-exclude * __pycache__ 10 | recursive-exclude * *.py[co] 11 | recursive-exclude * *.orig 12 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | VIRTUALENV=$(shell echo "$${VDIR:-'.env'}") 2 | space:= 3 | space+= 4 | 5 | all: $(VIRTUALENV) 6 | 7 | .PHONY: help 8 | # target: help - Display callable targets 9 | help: 10 | @egrep "^# target:" [Mm]akefile 11 | 12 | .PHONY: clean 13 | # target: clean - Clean the repository 14 | clean: 15 | @rm -rf build dist docs/_build *.deb 16 | find $(CURDIR)/$(MODULE) -name "*.pyc" -delete 17 | find $(CURDIR)/$(MODULE) -name "*.orig" -delete 18 | find $(CURDIR)/$(MODULE) -name "__pycache__" -delete 19 | 20 | # ============== 21 | # Bump version 22 | # ============== 23 | 24 | .PHONY: release 25 | VERSION?=minor 26 | # target: release - Bump version 27 | release: 28 | @pip install bumpversion 29 | @bumpversion $(VERSION) 30 | @git checkout master 31 | @git merge develop 32 | @git checkout develop 33 | @git push origin develop master 34 | @git push --tags 35 | 36 | .PHONY: minor 37 | minor: release 38 | 39 | .PHONY: patch 40 | patch: 41 | make release VERSION=patch 42 | 43 | .PHONY: major 44 | major: 45 | make release VERSION=major 46 | 47 | # =============== 48 | # Build package 49 | # =============== 50 | 51 | .PHONY: register 52 | # target: register - Register module on PyPi 53 | register: 54 | @python setup.py register 55 | 56 | .PHONY: upload 57 | # target: upload - Upload module on PyPi 58 | upload: clean 59 | @pip install twine wheel 60 | @python setup.py sdist upload 61 | @python setup.py bdist_wheel upload 62 | # @python setup.py sdist bdist_wheel 63 | # @twine upload dist/* 64 | 65 | .PHONY: deb 66 | BUILD=$(CURDIR)/build 67 | TARGET=/opt/graphite/beacon 68 | PACKAGE_POSTFIX?= 69 | PACKAGE_VERSION?=$(shell git describe --tags --abbrev=0 `git rev-list master --tags --max-count=1`) 70 | PACKAGE_NAME="graphite-beacon" 71 | PACKAGE_FULLNAME=$(PACKAGE_NAME)$(PACKAGE_POSTFIX) 72 | PACKAGE_MAINTAINER="Kirill Klenov " 73 | PACKAGE_DESCRIPTION="Simple allerting system for Graphite metrics." 74 | PACKAGE_URL=https://github.com/klen/graphite-beacon.git 75 | deb: clean 76 | @mkdir -p $(BUILD)/etc/init $(BUILD)/etc/init.d $(BUILD)/etc/systemd/system $(BUILD)/$(TARGET) 77 | @cp -r $(CURDIR)/graphite_beacon debian/config.json $(BUILD)/$(TARGET)/. 78 | @cp $(CURDIR)/debian/upstart.conf $(BUILD)/etc/init/graphite-beacon.conf 79 | @cp $(CURDIR)/debian/graphite_beacon.init $(BUILD)/etc/init.d/graphite-beacon 80 | @cp $(CURDIR)/debian/systemd.service $(BUILD)/etc/systemd/system/graphite-beacon.service 81 | @fpm -s dir -t deb -a all \ 82 | -n $(PACKAGE_FULLNAME) \ 83 | -v $(PACKAGE_VERSION) \ 84 | -m $(PACKAGE_MAINTAINER) \ 85 | --directories $(TARGET) \ 86 | --description $(PACKAGE_DESCRIPTION) \ 87 | --url $(PACKAGE_URL) \ 88 | --license "Copyright (C) 2014 horneds@gmail.com." \ 89 | --deb-user root \ 90 | --deb-group root \ 91 | --config-files /etc/init/graphite-beacon.conf \ 92 | --config-files /etc/systemd/system/graphite-beacon.service \ 93 | --config-files /opt/graphite/beacon/config.json \ 94 | --before-install $(CURDIR)/debian/before_install.sh \ 95 | --before-remove $(CURDIR)/debian/before_remove.sh \ 96 | --after-install $(CURDIR)/debian/after_install.sh \ 97 | -C $(CURDIR)/build \ 98 | -d "python" \ 99 | -d "python-dev" \ 100 | -d "python-pip" \ 101 | opt etc 102 | echo "%$(subst $(space),,$(PACKAGE_VERSION))%" 103 | for name in *.deb; do \ 104 | [ -f bintray ] && curl -T "$$name" -uklen:`cat bintray` https://api.bintray.com/content/klen/deb/graphite-beacon/$(subst $(space),,$(PACKAGE_VERSION))/$$name ; \ 105 | done 106 | 107 | # ============= 108 | # Development 109 | # ============= 110 | 111 | $(VIRTUALENV): requirements.txt 112 | @[ -d $(VIRTUALENV) ] || virtualenv --no-site-packages $(VIRTUALENV) 113 | @$(VIRTUALENV)/bin/pip install -r requirements.txt 114 | @touch $(VIRTUALENV) 115 | 116 | $(VIRTUALENV)/bin/py.test: $(VIRTUALENV) test-requirements.txt 117 | @$(VIRTUALENV)/bin/pip install -r test-requirements.txt 118 | @touch $(VIRTUALENV)/bin/py.test 119 | 120 | .PHONY: run 121 | # target: run - Run graphite-beacon 122 | run: $(VIRTUALENV) 123 | @$(VIRTUALENV)/bin/pip install -r test-requirements.txt 124 | $(VIRTUALENV)/bin/python -m graphite_beacon.app --config=local.json 125 | 126 | .PHONY: t 127 | # target: t - Runs tests 128 | t: $(VIRTUALENV)/bin/py.test tests 129 | $(VIRTUALENV)/bin/py.test -xs tests 130 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | graphite-beacon 2 | =============== 3 | 4 | ![logo](https://raw.github.com/klen/graphite-beacon/develop/beacon.png) 5 | 6 | Simple alerting system for [Graphite](http://graphite.wikidot.com/) metrics. 7 | 8 | Features: 9 | 10 | - Simple installation 11 | - No software dependencies (Databases, AMQP and etc) 12 | - Light and fully asynchronous 13 | - SMTP, HipChat, Slack, PagerDuty, HTTP handlers (PRs for additional handlers are welcome!) 14 | - Easily configurable and supports historical values 15 | 16 | [![Build status](http://img.shields.io/travis/klen/graphite-beacon.svg?style=flat-square)](http://travis-ci.org/klen/graphite-beacon) 17 | [![Coverage](http://img.shields.io/coveralls/klen/graphite-beacon.svg?style=flat-square)](https://coveralls.io/r/klen/graphite-beacon) 18 | [![Version](http://img.shields.io/pypi/v/graphite-beacon.svg?style=flat-square)](https://pypi.python.org/pypi/graphite_beacon) 19 | [![License](http://img.shields.io/pypi/l/graphite-beacon.svg?style=flat-square)](https://pypi.python.org/pypi/graphite_beacon) 20 | [![Downloads](http://img.shields.io/pypi/dm/graphite-beacon.svg?style=flat-square)](https://pypi.python.org/pypi/graphite_beacon) 21 | 22 | Example: 23 | ```js 24 | { 25 | "graphite_url": "http://g.server.org", 26 | "smtp": { 27 | "from": "beacon@server.org", 28 | "to": ["me@gmail.com"] 29 | }, 30 | "alerts": [ 31 | { "name": "MEM", 32 | "format": "bytes", 33 | "query": "aliasByNode(sumSeriesWithWildcards(collectd.*.memory.{memory-free,memory-cached}, 3), 1)", 34 | "rules": ["critical: < 200MB", "warning: < 400MB", "warning: < historical / 2"] }, 35 | { "name": "CPU", 36 | "format": "percent", 37 | "query": "aliasByNode(sumSeriesWithWildcards(collectd.*.cpu-*.cpu-user, 2), 1)", 38 | "rules": ["critical: >= 80%", "warning: >= 70%"] } 39 | ]} 40 | ``` 41 | 42 | Requirements 43 | ------------ 44 | 45 | - python (2.7, 3.3, 3.4) 46 | - tornado 47 | - funcparserlib 48 | - pyyaml 49 | 50 | 51 | Installation 52 | ------------ 53 | 54 | ### Python package 55 | 56 | **graphite-beacon** can be installed using pip: 57 | 58 | pip install graphite-beacon 59 | 60 | ### Debian package 61 | 62 | Using the command line, add the following to your /etc/apt/sources.list system config file: 63 | 64 | echo "deb http://dl.bintray.com/klen/deb /" | sudo tee -a /etc/apt/sources.list 65 | echo "deb-src http://dl.bintray.com/klen/deb /" | sudo tee -a /etc/apt/sources.list 66 | 67 | Install the package using apt-get: 68 | 69 | apt-get update 70 | apt-get install graphite-beacon 71 | 72 | ### Ansible role 73 | 74 | There is an ansible role to install the package: https://github.com/Stouts/Stouts.graphite-beacon 75 | 76 | ## Docker 77 | 78 | Build a config.json file and run : 79 | 80 | docker run -v /path/to/config.json:/srv/alerting/etc/config.json deliverous/graphite-beacon 81 | 82 | 83 | Usage 84 | ----- 85 | 86 | Just run `graphite-beacon`: 87 | 88 | $ graphite-beacon 89 | [I 141025 11:16:23 core:141] Read configuration 90 | [I 141025 11:16:23 core:55] Memory (10minute): init 91 | [I 141025 11:16:23 core:166] Loaded with options: 92 | ... 93 | 94 | ### Configuration 95 | 96 | ___ 97 | 98 | Time units: 99 | 100 | > '2second', '3.5minute', '4hour', '5.2day', '6week', '7month', '8year' 101 | 102 | > short formats are: '2s', '3m', '4.1h' ... 103 | 104 | Value units: 105 | 106 | > short: '2K', '3Mil', '4Bil', '5Tri' 107 | 108 | > bytes: '2KB', '3MB', '4GB' 109 | 110 | > bits: '2Kb', '3Mb', '4Gb' 111 | 112 | > bps: '2Kbps', '3Mbps', '4Gbps' 113 | 114 | > time: '2s', '3m', '4h', '5d' 115 | 116 | The default options are: 117 | 118 | > Note: comments are not allowed in JSON, but graphite-beacon strips them 119 | 120 | ```js 121 | 122 | { 123 | // Graphite server URL 124 | "graphite_url": "http://localhost", 125 | 126 | // Public graphite server URL 127 | // Used when notifying handlers, defaults to graphite_url 128 | "public_graphite_url": null, 129 | 130 | // HTTP AUTH username 131 | "auth_username": null, 132 | 133 | // HTTP AUTH password 134 | "auth_password": null, 135 | 136 | // Path to a pidfile 137 | "pidfile": null, 138 | 139 | // Default values format (none, bytes, s, ms, short) 140 | // Can be redefined for each alert. 141 | "format": "short", 142 | 143 | // Default query interval 144 | // Can be redefined for each alert. 145 | "interval": "10minute", 146 | 147 | // Default time window for Graphite queries 148 | // Defaults to query interval, can be redefined for each alert. 149 | "time_window": "10minute", 150 | 151 | // Notification repeat interval 152 | // If an alert is failed, its notification will be repeated with the interval below 153 | "repeat_interval": "2hour", 154 | 155 | // Default end time for Graphite queries 156 | // Defaults to the current time, can be redefined for each alert. 157 | "until": "0second", 158 | 159 | // Default loglevel 160 | "logging": "info", 161 | 162 | // Default method (average, last_value, sum, minimum, maximum). 163 | // Can be redefined for each alert. 164 | "method": "average", 165 | 166 | // Default alert to send when no data received (normal = no alert) 167 | // Can be redefined for each alert 168 | "no_data": "critical", 169 | 170 | // Default alert to send when loading failed (timeout, server error, etc) 171 | // (normal = no alert) 172 | // Can be redefined for each alert 173 | "loading_error": "critical" 174 | 175 | // Default prefix (used for notifications) 176 | "prefix": "[BEACON]", 177 | 178 | // Default handlers (log, smtp, hipchat, http, slack, pagerduty) 179 | "critical_handlers": ["log", "smtp"], 180 | "warning_handlers": ["log", "smtp"], 181 | "normal_handlers": ["log", "smtp"], 182 | 183 | // Send initial values (Send current values when reactor starts) 184 | "send_initial": true, 185 | 186 | // used together to ignore the missing value 187 | "default_nan_value": -1, 188 | "ignore_nan": false, 189 | 190 | // Default alerts (see configuration below) 191 | "alerts": [], 192 | 193 | // Path to other configuration files to include 194 | "include": [] 195 | } 196 | ``` 197 | 198 | You can setup options with a configuration file. See examples for 199 | [JSON](examples/example-config.json) and 200 | [YAML](examples/example-config.yml). 201 | 202 | A `config.json` file in the same directory that you run `graphite-beacon` 203 | from will be used automatically. 204 | 205 | #### Setup alerts 206 | 207 | Currently two types of alerts are supported: 208 | - Graphite alert (default) - check graphite metrics 209 | - URL alert - load http and check status 210 | 211 | > Note: comments are not allowed in JSON, but graphite-beacon strips them 212 | 213 | ```js 214 | 215 | "alerts": [ 216 | { 217 | // (required) Alert name 218 | "name": "Memory", 219 | 220 | // (required) Alert query 221 | "query": "*.memory.memory-free", 222 | 223 | // (optional) Alert type (graphite, url) 224 | "source": "graphite", 225 | 226 | // (optional) Default values format (none, bytes, s, ms, short) 227 | "format": "bytes", 228 | 229 | // (optional) Alert method (average, last_value, sum, minimum, maximum) 230 | "method": "average", 231 | 232 | // (optional) Alert interval [eg. 15second, 30minute, 2hour, 1day, 3month, 1year] 233 | "interval": "1minute", 234 | 235 | // (optional) What kind of alert to send when no data received (normal = no alert) 236 | "no_data": "warning", 237 | 238 | // (optional) Alert interval end time (see "Alert interval" for examples) 239 | "until": "5second", 240 | 241 | // (required) Alert rules 242 | // Rule format: "{level}: {operator} {value}" 243 | // Level one of [critical, warning, normal] 244 | // Operator one of [>, <, >=, <=, ==, !=] 245 | // Value (absolute value: 3000000 or short form like 3MB/12minute) 246 | // Multiple conditions can be separated by AND or OR conditions 247 | "rules": [ "critical: < 200MB", "warning: < 300MB" ] 248 | } 249 | ] 250 | ``` 251 | 252 | ##### Historical values 253 | 254 | graphite-beacon supports "historical" values for a rule. 255 | For example you may want to get warning when CPU usage is greater than 150% of normal usage: 256 | 257 | "warning: > historical * 1.5" 258 | 259 | Or memory is less than half the usual value: 260 | 261 | "warning: < historical / 2" 262 | 263 | 264 | Historical values for each query are kept. A historical value 265 | represents the average of all values in history. Rules using a historical value will 266 | only work after enough values have been collected (see `history_size`). 267 | 268 | History values are kept for 1 day by default. You can change this with the `history_size` 269 | option. 270 | 271 | See the below example for how to send a warning when today's new user count is 272 | less than 80% of the last 10 day average: 273 | 274 | ```js 275 | alerts: [ 276 | { 277 | "name": "Registrations", 278 | // Run once per day 279 | "interval": "1day", 280 | "query": "Your graphite query here", 281 | // Get average for last 10 days 282 | "history_size": "10day", 283 | "rules": [ 284 | // Warning if today's new user less than 80% of average for 10 days 285 | "warning: < historical * 0.8", 286 | // Critical if today's new user less than 50% of average for 10 days 287 | "critical: < historical * 0.5" 288 | ] 289 | } 290 | ], 291 | ``` 292 | 293 | ### Handlers 294 | 295 | Handlers allow for notifying an external service or process of an alert firing. 296 | 297 | #### Email Handler 298 | 299 | Sends an email (enabled by default). 300 | 301 | ```js 302 | { 303 | // SMTP default options 304 | "smtp": { 305 | "from": "beacon@graphite", 306 | "to": [], // List of email addresses to send to 307 | "host": "localhost", // SMTP host 308 | "port": 25, // SMTP port 309 | "username": null, // SMTP user (optional) 310 | "password": null, // SMTP password (optional) 311 | "use_tls": false, // Use TLS? 312 | "html": true, // Send HTML emails? 313 | 314 | // Graphite link for emails (By default is equal to main graphite_url) 315 | "graphite_url": null 316 | } 317 | } 318 | ``` 319 | 320 | #### HipChat Handler 321 | 322 | Sends a message to a HipChat room. 323 | 324 | ```js 325 | { 326 | "hipchat": { 327 | // (optional) Custom HipChat URL 328 | "url": 'https://api.custom.hipchat.my', 329 | 330 | "room": "myroom", 331 | "key": "mykey" 332 | } 333 | } 334 | ``` 335 | 336 | #### Webhook Handler (HTTP) 337 | 338 | Triggers a webhook. 339 | 340 | ```js 341 | { 342 | "http": { 343 | "url": "http://myhook.com", 344 | "params": {}, // (optional) Additional query(data) params 345 | "method": "GET" // (optional) HTTP method 346 | } 347 | } 348 | ``` 349 | 350 | #### Slack Handler 351 | 352 | Sends a message to a user or channel on Slack. 353 | 354 | ```js 355 | { 356 | "slack": { 357 | "webhook": "https://hooks.slack.com/services/...", 358 | "channel": "#general", // #channel or @user (optional) 359 | "username": "graphite-beacon", 360 | } 361 | } 362 | ``` 363 | 364 | #### Command Line Handler 365 | 366 | Runs a command. 367 | 368 | ```js 369 | { 370 | "cli": { 371 | // Command to run (required) 372 | // Several variables that will be substituted by values are allowed: 373 | // ${level} -- alert level 374 | // ${name} -- alert name 375 | // ${value} -- current metrics value 376 | // ${limit_value} -- metrics limit value 377 | "command": "./myscript ${level} ${name} ${value} ...", 378 | 379 | // Whitelist of alerts that will trigger this handler (optional) 380 | // All alerts will trigger this handler if absent. 381 | "alerts_whitelist": ["..."] 382 | } 383 | } 384 | ``` 385 | 386 | #### PagerDuty Handler 387 | 388 | Triggers a PagerDuty incident. 389 | 390 | ```js 391 | { 392 | "pagerduty": { 393 | "subdomain": "yoursubdomain", 394 | "apitoken": "apitoken", 395 | "service_key": "servicekey", 396 | } 397 | } 398 | ``` 399 | 400 | #### Telegram Handler 401 | 402 | Sends a Telegram message. 403 | 404 | ```js 405 | { 406 | "telegram": { 407 | "token": "telegram bot token", 408 | "bot_ident": "token you choose to activate bot in a group" 409 | "chatfile": "path to file where chat ids are saved, optional field" 410 | } 411 | } 412 | ``` 413 | 414 | ### Command Line Usage 415 | 416 | ``` 417 | $ graphite-beacon --help 418 | Usage: graphite-beacon [OPTIONS] 419 | 420 | Options: 421 | 422 | --config Path to an configuration file (JSON/YAML) 423 | (default config.json) 424 | --graphite_url Graphite URL (default http://localhost) 425 | --help show this help information 426 | --pidfile Set pid file 427 | 428 | --log_file_max_size max size of log files before rollover 429 | (default 100000000) 430 | --log_file_num_backups number of log files to keep (default 10) 431 | --log_file_prefix=PATH Path prefix for log files. Note that if you 432 | are running multiple tornado processes, 433 | log_file_prefix must be different for each 434 | of them (e.g. include the port number) 435 | --log_to_stderr Send log output to stderr (colorized if 436 | possible). By default use stderr if 437 | --log_file_prefix is not set and no other 438 | logging is configured. 439 | --logging=debug|info|warning|error|none 440 | Set the Python log level. If 'none', tornado 441 | won't touch the logging configuration. 442 | (default info) 443 | ``` 444 | 445 | Bug tracker 446 | ----------- 447 | 448 | If you have any suggestions, bug reports or annoyances please report them to 449 | the issue tracker at https://github.com/klen/graphite-beacon/issues 450 | 451 | Contributors 452 | ------------- 453 | 454 | * Andrej Kuročenko (https://github.com/kurochenko) 455 | * Cody Soyland (https://github.com/codysoyland) 456 | * Garrett Heel (https://github.com/GarrettHeel) 457 | * George Ionita (https://github.com/georgeionita) 458 | * James Yuzawa (https://github.com/yuzawa-san) 459 | * Kirill Klenov (https://github.com/klen) 460 | * Konstantin Bakulin (https://github.com/kbakulin) 461 | * Lammert Hellinga (https://github.com/Kogelvis) 462 | * Miguel Moll (https://github.com/MiguelMoll) 463 | * Nick Pillitteri (https://github.com/56quarters) 464 | * Niku Toivola (https://github.com/nikut) 465 | * Olli-Pekka Puolitaival (https://github.com/OPpuolitaival) 466 | * Phillip Hagedorn (https://github.com/phagedorn) 467 | * Raine Virta (https://github.com/raine) 468 | * Scott Nonnenberg (https://github.com/scottnonnenberg) 469 | * Sean Johnson (https://github.com/pirogoeth) 470 | * Terry Peng (https://github.com/tpeng) 471 | * Thomas Clavier (https://github.com/tclavier) 472 | * Yuriy Ilyin (https://github.com/YuriyIlyin) 473 | * dugeem (https://github.com/dugeem) 474 | * Joakim (https://github.com/VibyJocke) 475 | 476 | License 477 | -------- 478 | 479 | Licensed under a [MIT license](http://www.linfo.org/mitlicense.html) 480 | 481 | If you wish to express your appreciation for the role, you are welcome to send 482 | a postcard to: 483 | 484 | Kirill Klenov 485 | pos. Severny 8-3 486 | MO, Istra, 143500 487 | Russia 488 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | desc "Build graphite-beacon docker" 2 | task :build do 3 | sh "docker build -t graphite-beacon . " 4 | end 5 | 6 | desc "Run demo docker container" 7 | task :run => :build do 8 | sh "docker run -v $(pwd)/examples/example-config.json:/config.json graphite-beacon" 9 | end 10 | -------------------------------------------------------------------------------- /beacon.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klen/graphite-beacon/c1f071e9f557693bc90f6acbc314994985dc3b77/beacon.jpg -------------------------------------------------------------------------------- /beacon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klen/graphite-beacon/c1f071e9f557693bc90f6acbc314994985dc3b77/beacon.png -------------------------------------------------------------------------------- /debian/after_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ `/sbin/init --version` =~ upstart ]]; then start graphite-beacon; 4 | elif [[ `systemctl` =~ -\.mount ]]; then systemctl start graphite-beacon; 5 | elif [[ -f /etc/init.d/cron && ! -h /etc/init.d/cron ]]; then /etc/init.d/graphite-beacon start; 6 | fi 7 | -------------------------------------------------------------------------------- /debian/before_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | python -m pip install tornado funcparserlib 4 | -------------------------------------------------------------------------------- /debian/before_remove.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ `/sbin/init --version` =~ upstart ]]; then stop graphite-beacon; 4 | elif [[ `systemctl` =~ -\.mount ]]; then systemctl stop graphite-beacon; 5 | elif [[ -f /etc/init.d/cron && ! -h /etc/init.d/cron ]]; then /etc/init.d/graphite-beacon stop; 6 | fi 7 | 8 | -------------------------------------------------------------------------------- /debian/config.json: -------------------------------------------------------------------------------- 1 | // Graphite-beacon configuration file (default values) 2 | { 3 | // Path to config file 4 | "config": "config.json", 5 | 6 | // Graphite server URL 7 | "graphite_url": "http://localhost", 8 | 9 | // HTTP AUTH username 10 | "auth_username": null, 11 | 12 | // HTTP AUTH password 13 | "auth_password": null, 14 | 15 | // Path to a pidfile 16 | "pidfile": null, 17 | 18 | // Default query interval 19 | // Can be redfined for each alert. 20 | // [eg. 15second, 30minute, 2hour, 1day, 3month, 1year] 21 | "interval": "10minute", 22 | 23 | // Default loglevel 24 | "logging": "info", 25 | 26 | // Default method (average, last_value). 27 | "method": "average", 28 | 29 | // Default prefix (used for notifications) 30 | "prefix": "[BEACON]", 31 | 32 | // Default handlers (log, smtp, hipchat) 33 | "critical_handlers": ["log", "smtp"], 34 | "warning_handlers": ["log", "smtp"], 35 | "normal_handlers": ["log", "smtp"], 36 | 37 | // Place your alerts here 38 | "alerts": [ 39 | 40 | ] 41 | } 42 | -------------------------------------------------------------------------------- /debian/graphite_beacon.init: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | ### BEGIN INIT INFO 3 | # Provides: 4 | # Required-Start: $remote_fs $syslog 5 | # Required-Stop: $remote_fs $syslog 6 | # Default-Start: 2 3 4 5 7 | # Default-Stop: 0 1 6 8 | # Short-Description: Start daemon at boot time 9 | # Description: Enable service provided by daemon graphite_beacon. Used https://github.com/fhd/init-script-template for this init.d script 10 | ### END INIT INFO 11 | 12 | dir="/opt/graphite/beacon" 13 | cmd="python -m graphite_beacon.app --log_file_prefix=/var/log/graphite-beacon.log" 14 | user="" 15 | 16 | name=`basename $0` 17 | pid_file="/var/run/$name.pid" 18 | stdout_log="/var/log/$name.log" 19 | stderr_log="/var/log/$name.err" 20 | 21 | get_pid() { 22 | cat "$pid_file" 23 | } 24 | 25 | is_running() { 26 | [ -f "$pid_file" ] && ps `get_pid` > /dev/null 2>&1 27 | } 28 | 29 | case "$1" in 30 | start) 31 | if is_running; then 32 | echo "Already started" 33 | else 34 | echo "Starting $name" 35 | cd "$dir" 36 | if [ -z "$user" ]; then 37 | sudo $cmd >> "$stdout_log" 2>> "$stderr_log" & 38 | else 39 | sudo -u "$user" $cmd >> "$stdout_log" 2>> "$stderr_log" & 40 | fi 41 | echo $! > "$pid_file" 42 | if ! is_running; then 43 | echo "Unable to start, see $stdout_log and $stderr_log" 44 | exit 1 45 | fi 46 | fi 47 | ;; 48 | stop) 49 | if is_running; then 50 | echo -n "Stopping $name.." 51 | kill `get_pid` 52 | for i in {1..10} 53 | do 54 | if ! is_running; then 55 | break 56 | fi 57 | 58 | echo -n "." 59 | sleep 1 60 | done 61 | echo 62 | 63 | if is_running; then 64 | echo "Not stopped; may still be shutting down or shutdown may have failed" 65 | exit 1 66 | else 67 | echo "Stopped" 68 | if [ -f "$pid_file" ]; then 69 | rm "$pid_file" 70 | fi 71 | fi 72 | else 73 | echo "Not running" 74 | fi 75 | ;; 76 | restart) 77 | $0 stop 78 | if is_running; then 79 | echo "Unable to stop, will not attempt to start" 80 | exit 1 81 | fi 82 | $0 start 83 | ;; 84 | status) 85 | if is_running; then 86 | echo "Running" 87 | else 88 | echo "Stopped" 89 | exit 1 90 | fi 91 | ;; 92 | *) 93 | echo "Usage: $0 {start|stop|restart|status}" 94 | exit 1 95 | ;; 96 | esac 97 | 98 | exit 0 99 | 100 | -------------------------------------------------------------------------------- /debian/systemd.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=graphite-beacon example 3 | 4 | [Install] 5 | WantedBy=multi-user.target 6 | 7 | [Service] 8 | User=root 9 | Group=root 10 | WorkingDirectory=/opt/graphite/beacon 11 | ExecStart=/usr/bin/python -m graphite_beacon.app 12 | -------------------------------------------------------------------------------- /debian/upstart.conf: -------------------------------------------------------------------------------- 1 | description "Simple allerting system for Graphite metrics." 2 | start on runlevel [2345] 3 | stop on runlevel [!2345] 4 | 5 | console log 6 | kill timeout 20 7 | limit nofile 65536 65536 8 | respawn 9 | setuid root 10 | setgid root 11 | chdir /opt/graphite/beacon 12 | 13 | exec python -m graphite_beacon.app --log_file_prefix=/var/log/graphite-beacon.log 14 | -------------------------------------------------------------------------------- /docker/exim4: -------------------------------------------------------------------------------- 1 | # /etc/default/exim4 2 | EX4DEF_VERSION='' 3 | 4 | # 'combined' - one daemon running queue and listening on SMTP port 5 | # 'no' - no daemon running the queue 6 | # 'separate' - two separate daemons 7 | # 'ppp' - only run queue with /etc/ppp/ip-up.d/exim4. 8 | # 'nodaemon' - no daemon is started at all. 9 | # 'queueonly' - only a queue running daemon is started, no SMTP listener. 10 | # setting this to 'no' will also disable queueruns from /etc/ppp/ip-up.d/exim4 11 | QUEUERUNNER='combined' 12 | # how often should we run the queue 13 | QUEUEINTERVAL='30m' 14 | # options common to quez-runner and listening daemon 15 | COMMONOPTIONS='-v ' 16 | # more options for the daemon/process running the queue (applies to the one 17 | # started in /etc/ppp/ip-up.d/exim4, too. 18 | QUEUERUNNEROPTIONS='' 19 | # special flags given to exim directly after the -q. See exim(8) 20 | QFLAGS='' 21 | # Options for the SMTP listener daemon. By default, it is listening on 22 | # port 25 only. To listen on more ports, it is recommended to use 23 | # -oX 25:587:10025 -oP /var/run/exim4/exim.pid 24 | SMTPLISTENEROPTIONS='' 25 | 26 | -------------------------------------------------------------------------------- /docker/supervisor.conf: -------------------------------------------------------------------------------- 1 | [supervisord] 2 | nodaemon=true 3 | 4 | [program:graphite_beacon] 5 | command=graphite-beacon 6 | priority=1 7 | startsecs=0 8 | autostart=true 9 | stdout_events_enabled = true 10 | stderr_events_enabled = true 11 | 12 | [program:exim] 13 | command=/etc/init.d/exim4 start 14 | priority=1 15 | startsecs=0 16 | autostart=true 17 | stdout_events_enabled = true 18 | stderr_events_enabled = true 19 | 20 | [eventlistener:stdout] 21 | command = supervisor_stdout 22 | buffer_size = 100 23 | events = PROCESS_LOG 24 | result_handler = supervisor_stdout:event_handler 25 | -------------------------------------------------------------------------------- /docker/update-exim4.conf.conf: -------------------------------------------------------------------------------- 1 | # /etc/exim4/update-exim4.conf.conf 2 | # 3 | # ! Managed by puppet, do not edit ! 4 | # 5 | # update-exim4.conf uses this file to determine variable values to generate 6 | # exim configuration macros for the configuration file. 7 | # 8 | 9 | dc_eximconfig_configtype='internet' 10 | dc_other_hostnames='' 11 | dc_local_interfaces='127.0.0.1 ; ::1' 12 | dc_readhost='' 13 | dc_relay_domains='' 14 | dc_minimaldns='false' 15 | dc_relay_nets='' 16 | dc_smarthost='' 17 | CFILEMODE='644' 18 | dc_use_split_config='false' 19 | dc_hide_mailname='' 20 | dc_mailname_in_oh='true' 21 | dc_localdelivery='mail_spool' 22 | -------------------------------------------------------------------------------- /examples/example-config.json: -------------------------------------------------------------------------------- 1 | // Comments are allowed here 2 | { 3 | "interval": "20minute", 4 | "logging": "info", 5 | 6 | "critical_handlers": ["log"], 7 | "warning_handlers": ["log"], 8 | "normal_handlers": ["log"], 9 | 10 | // "graphite_url": "http://localhost", 11 | 12 | "alerts": [ 13 | // A graphite alert - be sure to set `graphite_url` appropriately. 14 | { 15 | "name": "Memory", 16 | "query": "aliasByNode(collectd.*.memory.memory-free, 1)", 17 | "interval": "10minute", 18 | "format": "bytes", 19 | "rules": ["warning: < 300MB", "critical: > 200MB"] 20 | }, 21 | // A ping alert 22 | { 23 | "name": "Site", 24 | "source": "url", 25 | "query": "http://google.com", 26 | "interval": "20second", 27 | "rules": ["critical: != 200"] 28 | } 29 | ] 30 | } 31 | -------------------------------------------------------------------------------- /examples/example-config.yml: -------------------------------------------------------------------------------- 1 | --- 2 | interval: "20minute" 3 | logging: info 4 | 5 | critical_handlers: 6 | - log 7 | warning_handlers: 8 | - log 9 | normal_handlers: 10 | - log 11 | 12 | # graphite_url: http://localhost 13 | 14 | alerts: 15 | # A graphite alert - be sure to set `graphite_url` appropriately. 16 | - name: "Memory" 17 | query: "aliasByNode(collectd.*.memory.memory-free, 1)" 18 | interval: "10minute" 19 | format: "bytes" 20 | rules: 21 | - "warning: < 300MB" 22 | - "critical: > 200MB" 23 | 24 | # A ping alert 25 | - name: "Site" 26 | source: "url" 27 | query: "http://google.com" 28 | interval: "20second" 29 | rules: 30 | - "critical: != 200" 31 | -------------------------------------------------------------------------------- /examples/extended/gmail_handler.json: -------------------------------------------------------------------------------- 1 | { 2 | "logging": "info", 3 | 4 | "critical_handlers": ["log", "smtp"], 5 | "warning_handlers": ["log", "smtp"], 6 | "normal_handlers": ["log", "smtp"], 7 | 8 | "smtp": { 9 | "username": "example@gmail.com", 10 | "password": "password", 11 | "use_tls": true, 12 | "host": "smtp.gmail.com", 13 | "port": 587, 14 | "from": "myemail@gmail.com", 15 | "to": ["myemail@gmail.com"] 16 | }, 17 | 18 | // "graphite_url": "http://localhost", 19 | 20 | "alerts": [ 21 | // Don't forget to set `graphite_url` correctly. 22 | { 23 | "name": "Memory", 24 | "query": "aliasByNode(collectd.*.memory.memory-free, 1)", 25 | "interval": "10minute", 26 | "format": "bytes", 27 | "rules": ["warning: < 300MB", "critical: > 200MB"] 28 | } 29 | ] 30 | } 31 | -------------------------------------------------------------------------------- /graphite_beacon/__init__.py: -------------------------------------------------------------------------------- 1 | """Graphite-beacon -- simple alerting system for Graphite.""" 2 | 3 | __version__ = "0.27.2" 4 | __license__ = "MIT" 5 | -------------------------------------------------------------------------------- /graphite_beacon/_compat.py: -------------------------------------------------------------------------------- 1 | # pylint: skip-file 2 | """ Compatibility. 3 | 4 | Some py2/py3 compatibility support based on a stripped down 5 | version of six so we don't have to depend on a specific version 6 | of it. 7 | 8 | :copyright: (c) 2014 by Armin Ronacher. 9 | :license: BSD 10 | """ 11 | import sys 12 | 13 | PY2 = sys.version_info[0] == 2 14 | _identity = lambda x: x 15 | 16 | 17 | if not PY2: 18 | text_type = str 19 | string_types = (str,) 20 | integer_types = (int, ) 21 | 22 | iterkeys = lambda d: iter(d.keys()) 23 | itervalues = lambda d: iter(d.values()) 24 | iteritems = lambda d: iter(d.items()) 25 | 26 | from io import StringIO 27 | 28 | from urllib import parse as urlparse 29 | 30 | def reraise(tp, value, tb=None): 31 | if value.__traceback__ is not tb: 32 | raise value.with_traceback(tb) 33 | raise value 34 | 35 | implements_to_string = _identity 36 | 37 | else: 38 | text_type = unicode 39 | string_types = (str, unicode) 40 | integer_types = (int, long) 41 | 42 | iterkeys = lambda d: d.iterkeys() 43 | itervalues = lambda d: d.itervalues() 44 | iteritems = lambda d: d.iteritems() 45 | 46 | from cStringIO import StringIO 47 | 48 | import urlparse 49 | 50 | exec('def reraise(tp, value, tb=None):\n raise tp, value, tb') 51 | 52 | def implements_to_string(cls): 53 | cls.__unicode__ = cls.__str__ 54 | cls.__str__ = lambda x: x.__unicode__().encode('utf-8') 55 | return cls 56 | 57 | 58 | def with_metaclass(meta, *bases): 59 | # This requires a bit of explanation: the basic idea is to make a 60 | # dummy metaclass for one level of class instantiation that replaces 61 | # itself with the actual metaclass. Because of internal type checks 62 | # we also need to make sure that we downgrade the custom metaclass 63 | # for one level to something closer to type (that's why __call__ and 64 | # __init__ comes back from type etc.). 65 | # 66 | # This has the advantage over six.with_metaclass in that it does not 67 | # introduce dummy classes into the final MRO. 68 | class metaclass(meta): 69 | __call__ = type.__call__ 70 | __init__ = type.__init__ 71 | 72 | def __new__(cls, name, this_bases, d): 73 | if this_bases is None: 74 | return type.__new__(cls, name, (), d) 75 | return meta(name, bases, d) 76 | 77 | return metaclass('temporary_class', None, {}) 78 | 79 | 80 | # Certain versions of pypy have a bug where clearing the exception stack 81 | # breaks the __exit__ function in a very peculiar way. This is currently 82 | # true for pypy 2.2.1 for instance. The second level of exception blocks 83 | # is necessary because pypy seems to forget to check if an exception 84 | # happend until the next bytecode instruction? 85 | BROKEN_PYPY_CTXMGR_EXIT = False 86 | if hasattr(sys, 'pypy_version_info'): 87 | class _Mgr(object): 88 | def __enter__(self): 89 | return self 90 | 91 | def __exit__(self, *args): 92 | sys.exc_clear() 93 | try: 94 | try: 95 | with _Mgr(): 96 | raise AssertionError() 97 | except: 98 | raise 99 | except TypeError: 100 | BROKEN_PYPY_CTXMGR_EXIT = True 101 | except AssertionError: 102 | pass 103 | 104 | # pylama:skip=1 105 | -------------------------------------------------------------------------------- /graphite_beacon/alerts.py: -------------------------------------------------------------------------------- 1 | """Implement alerts.""" 2 | 3 | import math 4 | from collections import defaultdict, deque 5 | from itertools import islice 6 | 7 | from tornado import httpclient as hc 8 | from tornado import escape, gen, ioloop, log 9 | 10 | from . import _compat as _ 11 | from . import units 12 | from .graphite import GraphiteRecord 13 | from .units import MILLISECOND, TimeUnit 14 | from .utils import HISTORICAL, LOGICAL_OPERATORS, convert_to_format, parse_rule 15 | 16 | LOGGER = log.gen_log 17 | METHODS = "average", "last_value", "sum", "minimum", "maximum" 18 | LEVELS = { 19 | 'critical': 0, 20 | 'warning': 10, 21 | 'normal': 20, 22 | } 23 | 24 | 25 | class sliceable_deque(deque): # pylint: disable=invalid-name 26 | 27 | """Deque with slices support.""" 28 | 29 | def __getitem__(self, index): 30 | """Support slices.""" 31 | try: 32 | return deque.__getitem__(self, index) 33 | except TypeError: 34 | return type(self)(islice(self, index.start, index.stop, index.step)) 35 | 36 | 37 | class AlertFabric(type): 38 | 39 | """Register alert's classes and produce an alert by source.""" 40 | 41 | alerts = {} 42 | 43 | def __new__(mcs, name, bases, params): 44 | """Register an Alert Class in self.""" 45 | source = params.get('source') 46 | cls = super(AlertFabric, mcs).__new__(mcs, name, bases, params) 47 | if source: 48 | mcs.alerts[source] = cls 49 | LOGGER.info('Register Alert: %s', source) 50 | return cls 51 | 52 | def get(cls, reactor, source='graphite', **options): 53 | """Get Alert Class by source.""" 54 | acls = cls.alerts[source] 55 | return acls(reactor, **options) 56 | 57 | 58 | class BaseAlert(_.with_metaclass(AlertFabric)): 59 | 60 | """Abstract basic alert class.""" 61 | 62 | source = None 63 | 64 | def __init__(self, reactor, **options): 65 | """Initialize alert.""" 66 | self.reactor = reactor 67 | self.options = options 68 | self.client = hc.AsyncHTTPClient() 69 | 70 | try: 71 | self.configure(**options) 72 | except Exception as e: 73 | LOGGER.exception(e) 74 | raise ValueError("Invalid alert configuration: %s" % e) 75 | 76 | self.waiting = False 77 | self.state = {None: "normal", "waiting": "normal", "loading": "normal"} 78 | self.history = defaultdict(lambda: sliceable_deque([], self.history_size)) 79 | 80 | LOGGER.info("Alert '%s': has inited", self) 81 | 82 | def __hash__(self): 83 | """Provide alert's hash.""" 84 | return hash(self.name) ^ hash(self.source) 85 | 86 | def __eq__(self, other): 87 | """Check that other alert iis the same.""" 88 | return hash(self) == hash(other) 89 | 90 | def __str__(self): 91 | """String representation.""" 92 | return "%s (%s)" % (self.name, self.interval) 93 | 94 | def configure(self, name=None, rules=None, query=None, **options): 95 | """Configure the alert.""" 96 | self.name = name 97 | if not name: 98 | raise AssertionError("Alert's name should be defined and not empty.") 99 | 100 | if not rules: 101 | raise AssertionError("%s: Alert's rules is invalid" % name) 102 | self.rules = [parse_rule(rule) for rule in rules] 103 | self.rules = list(sorted(self.rules, key=lambda r: LEVELS.get(r.get('level'), 99))) 104 | 105 | assert query, "%s: Alert's query is invalid" % self.name 106 | self.query = query 107 | 108 | interval_raw = options.get('interval', self.reactor.options['interval']) 109 | self.interval = TimeUnit.from_interval(interval_raw) 110 | 111 | time_window_raw = options.get( 112 | 'time_window', 113 | self.reactor.options.get('time_window', interval_raw), 114 | ) 115 | self.time_window = TimeUnit.from_interval(time_window_raw) 116 | 117 | until_raw = options.get('until', self.reactor.options['until']) 118 | self.until = TimeUnit.from_interval(until_raw) 119 | 120 | # Adjust the start time to cater for `until` 121 | self.from_time = self.time_window + self.until 122 | 123 | self._format = options.get('format', self.reactor.options['format']) 124 | self.request_timeout = options.get( 125 | 'request_timeout', self.reactor.options['request_timeout']) 126 | self.connect_timeout = options.get( 127 | 'connect_timeout', self.reactor.options['connect_timeout']) 128 | 129 | interval_ms = self.interval.convert_to(units.MILLISECOND) 130 | 131 | history_size_raw = options.get('history_size', self.reactor.options['history_size']) 132 | history_size_unit = TimeUnit.from_interval(history_size_raw) 133 | history_size_ms = history_size_unit.convert_to(MILLISECOND) 134 | self.history_size = int(math.ceil(history_size_ms / interval_ms)) 135 | 136 | self.no_data = options.get('no_data', self.reactor.options['no_data']) 137 | self.loading_error = options.get('loading_error', self.reactor.options['loading_error']) 138 | 139 | if self.reactor.options.get('debug'): 140 | self.callback = ioloop.PeriodicCallback(self.load, 5000) 141 | else: 142 | self.callback = ioloop.PeriodicCallback(self.load, interval_ms) 143 | 144 | def convert(self, value): 145 | """Convert self value.""" 146 | try: 147 | return convert_to_format(value, self._format) 148 | except (ValueError, TypeError): 149 | return value 150 | 151 | def reset(self): 152 | """Reset state to normal for all targets. 153 | 154 | It will repeat notification if a metric is still failed. 155 | """ 156 | for target in self.state: 157 | self.state[target] = "normal" 158 | 159 | def start(self): 160 | """Start checking.""" 161 | self.callback.start() 162 | self.load() 163 | 164 | def stop(self): 165 | """Stop checking.""" 166 | self.callback.stop() 167 | 168 | def check(self, records): 169 | """Check current value.""" 170 | for value, target in records: 171 | LOGGER.info("%s [%s]: %s", self.name, target, value) 172 | if value is None: 173 | self.notify(self.no_data, value, target) 174 | continue 175 | for rule in self.rules: 176 | if self.evaluate_rule(rule, value, target): 177 | self.notify(rule['level'], value, target, rule=rule) 178 | break 179 | else: 180 | self.notify('normal', value, target, rule=rule) 181 | 182 | self.history[target].append(value) 183 | 184 | def evaluate_rule(self, rule, value, target): 185 | """Calculate the value.""" 186 | def evaluate(expr): 187 | if expr in LOGICAL_OPERATORS.values(): 188 | return expr 189 | rvalue = self.get_value_for_expr(expr, target) 190 | if rvalue is None: 191 | return False # ignore this result 192 | return expr['op'](value, rvalue) 193 | 194 | evaluated = [evaluate(expr) for expr in rule['exprs']] 195 | while len(evaluated) > 1: 196 | lhs, logical_op, rhs = (evaluated.pop(0) for _ in range(3)) 197 | evaluated.insert(0, logical_op(lhs, rhs)) 198 | 199 | return evaluated[0] 200 | 201 | def get_value_for_expr(self, expr, target): 202 | """I have no idea.""" 203 | if expr in LOGICAL_OPERATORS.values(): 204 | return None 205 | rvalue = expr['value'] 206 | if rvalue == HISTORICAL: 207 | history = self.history[target] 208 | if len(history) < self.history_size: 209 | return None 210 | rvalue = sum(history) / float(len(history)) 211 | 212 | rvalue = expr['mod'](rvalue) 213 | return rvalue 214 | 215 | def notify(self, level, value, target=None, ntype=None, rule=None): 216 | """Notify main reactor about event.""" 217 | # Did we see the event before? 218 | if target in self.state and level == self.state[target]: 219 | return False 220 | 221 | # Do we see the event first time? 222 | if target not in self.state and level == 'normal' \ 223 | and not self.reactor.options['send_initial']: 224 | return False 225 | 226 | self.state[target] = level 227 | return self.reactor.notify(level, self, value, target=target, ntype=ntype, rule=rule) 228 | 229 | def load(self): 230 | """Load from remote.""" 231 | raise NotImplementedError() 232 | 233 | 234 | class GraphiteAlert(BaseAlert): 235 | 236 | """Check graphite records.""" 237 | 238 | source = 'graphite' 239 | 240 | def configure(self, **options): 241 | """Configure the alert.""" 242 | super(GraphiteAlert, self).configure(**options) 243 | 244 | self.method = options.get('method', self.reactor.options['method']) 245 | self.default_nan_value = options.get( 246 | 'default_nan_value', self.reactor.options['default_nan_value']) 247 | self.ignore_nan = options.get('ignore_nan', self.reactor.options['ignore_nan']) 248 | assert self.method in METHODS, "Method is invalid" 249 | 250 | self.auth_username = self.reactor.options.get('auth_username') 251 | self.auth_password = self.reactor.options.get('auth_password') 252 | self.validate_cert = self.reactor.options.get('validate_cert', True) 253 | 254 | self.url = self._graphite_url( 255 | self.query, graphite_url=self.reactor.options.get('graphite_url'), raw_data=True) 256 | LOGGER.debug('%s: url = %s', self.name, self.url) 257 | 258 | @gen.coroutine 259 | def load(self): 260 | """Load data from Graphite.""" 261 | LOGGER.debug('%s: start checking: %s', self.name, self.query) 262 | if self.waiting: 263 | self.notify('warning', 'Process takes too much time', target='waiting', ntype='common') 264 | else: 265 | self.waiting = True 266 | try: 267 | response = yield self.client.fetch(self.url, auth_username=self.auth_username, 268 | auth_password=self.auth_password, 269 | request_timeout=self.request_timeout, 270 | connect_timeout=self.connect_timeout, 271 | validate_cert=self.validate_cert) 272 | records = ( 273 | GraphiteRecord(line, self.default_nan_value, self.ignore_nan) 274 | for line in response.buffer) 275 | data = [ 276 | (None if record.empty else getattr(record, self.method), record.target) 277 | for record in records] 278 | if len(data) == 0: 279 | raise ValueError('No data') 280 | self.check(data) 281 | self.notify('normal', 'Metrics are loaded', target='loading', ntype='common') 282 | except Exception as e: 283 | self.notify( 284 | self.loading_error, 'Loading error: %s' % e, target='loading', ntype='common') 285 | self.waiting = False 286 | 287 | def get_graph_url(self, target, graphite_url=None): 288 | """Get Graphite URL.""" 289 | return self._graphite_url(target, graphite_url=graphite_url, raw_data=False) 290 | 291 | def _graphite_url(self, query, raw_data=False, graphite_url=None): 292 | """Build Graphite URL.""" 293 | query = escape.url_escape(query) 294 | graphite_url = graphite_url or self.reactor.options.get('public_graphite_url') 295 | 296 | url = "{base}/render/?target={query}&from=-{from_time}&until=-{until}".format( 297 | base=graphite_url, query=query, 298 | from_time=self.from_time.as_graphite(), 299 | until=self.until.as_graphite(), 300 | ) 301 | if raw_data: 302 | url = "{}&format=raw".format(url) 303 | return url 304 | 305 | 306 | class URLAlert(BaseAlert): 307 | 308 | """Check URLs.""" 309 | 310 | source = 'url' 311 | 312 | @staticmethod 313 | def get_data(response): 314 | """Value is response.status.""" 315 | return response.code 316 | 317 | @gen.coroutine 318 | def load(self): 319 | """Load URL.""" 320 | LOGGER.debug('%s: start checking: %s', self.name, self.query) 321 | if self.waiting: 322 | self.notify('warning', 'Process takes too much time', target='waiting', ntype='common') 323 | else: 324 | self.waiting = True 325 | try: 326 | response = yield self.client.fetch( 327 | self.query, method=self.options.get('method', 'GET'), 328 | request_timeout=self.request_timeout, 329 | connect_timeout=self.connect_timeout, 330 | validate_cert=self.options.get('validate_cert', True)) 331 | self.check([(self.get_data(response), self.query)]) 332 | self.notify('normal', 'Metrics are loaded', target='loading', ntype='common') 333 | 334 | except Exception as e: 335 | self.notify('critical', str(e), target='loading', ntype='common') 336 | 337 | self.waiting = False 338 | -------------------------------------------------------------------------------- /graphite_beacon/app.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import signal 3 | import sys 4 | 5 | from tornado import log 6 | from tornado.options import define, options, print_help 7 | 8 | from .core import Reactor 9 | 10 | LOGGER = log.gen_log 11 | DEFAULT_CONFIG_PATH = 'config.json' 12 | 13 | 14 | define('config', default=None, 15 | help='Path to a JSON or YAML config file (default config.json)') 16 | define('pidfile', default=Reactor.defaults['pidfile'], help='Set pid file') 17 | define('graphite_url', default=Reactor.defaults['graphite_url'], help='Graphite URL') 18 | 19 | 20 | def run(): 21 | options.parse_command_line() 22 | 23 | options_dict = options.as_dict() 24 | 25 | if not options_dict.get('config', None): 26 | if os.path.isfile(DEFAULT_CONFIG_PATH): 27 | options_dict['config'] = DEFAULT_CONFIG_PATH 28 | else: 29 | LOGGER.error("Config file is required.") 30 | print_help() 31 | sys.exit(1) 32 | 33 | reactor = Reactor(**options_dict) 34 | 35 | stop = lambda *args: reactor.stop() 36 | reinit = lambda *args: reactor.reinit() 37 | 38 | signal.signal(signal.SIGTERM, stop) 39 | signal.signal(signal.SIGINT, stop) 40 | if hasattr(signal, 'SIGHUP'): 41 | signal.signal(signal.SIGHUP, reinit) 42 | 43 | reactor.start() 44 | 45 | if __name__ == '__main__': 46 | run() 47 | -------------------------------------------------------------------------------- /graphite_beacon/core.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import sys 4 | from re import compile as re 5 | from re import M 6 | 7 | import yaml 8 | from tornado import ioloop, log 9 | 10 | from .alerts import BaseAlert 11 | from .handlers import registry 12 | from .units import MILLISECOND, TimeUnit 13 | 14 | LOGGER = log.gen_log 15 | 16 | COMMENT_RE = re(r'//\s+.*$', M) 17 | 18 | 19 | class Reactor(object): 20 | 21 | """ Class description. """ 22 | 23 | defaults = { 24 | 'auth_password': None, 25 | 'auth_username': None, 26 | 'config': None, 27 | 'critical_handlers': ['log', 'smtp'], 28 | 'debug': False, 29 | 'format': 'short', 30 | 'graphite_url': 'http://localhost', 31 | 'history_size': '1day', 32 | 'interval': '10minute', 33 | 'logging': 'info', 34 | 'method': 'average', 35 | 'no_data': 'critical', 36 | 'normal_handlers': ['log', 'smtp'], 37 | 'pidfile': None, 38 | 'prefix': '[BEACON]', 39 | 'public_graphite_url': None, 40 | 'repeat_interval': '2hour', 41 | 'request_timeout': 20.0, 42 | 'connect_timeout': 20.0, 43 | 'send_initial': False, 44 | 'until': '0second', 45 | 'warning_handlers': ['log', 'smtp'], 46 | 'default_nan_value': 0, 47 | 'ignore_nan': False, 48 | 'loading_error': 'critical', 49 | 'alerts': [] 50 | } 51 | 52 | def __init__(self, **options): 53 | self.alerts = set() 54 | self.loop = ioloop.IOLoop.instance() 55 | self.options = dict(self.defaults) 56 | self.reinit(**options) 57 | 58 | repeat_interval = TimeUnit.from_interval(self.options['repeat_interval']) 59 | LOGGER.info("Alarm reset interval is {}".format(repeat_interval)) 60 | self.callback = ioloop.PeriodicCallback( 61 | self.repeat, repeat_interval.convert_to(MILLISECOND)) 62 | 63 | def is_running(self): 64 | """Check whether the reactor is running. 65 | 66 | :rtype: bool 67 | """ 68 | return hasattr(self, 'callback') and self.callback.is_running() 69 | 70 | def reinit(self, **options): # pylint: disable=unused-argument 71 | LOGGER.info('Read configuration') 72 | 73 | self.options.update(options) 74 | 75 | config_valid = self.include_config(self.options.get('config')) 76 | for config in self.options.pop('include', []): 77 | config_valid = config_valid and self.include_config(config) 78 | 79 | # If we haven't started the ioloop yet and config is invalid then fail fast. 80 | if not self.is_running() and not config_valid: 81 | sys.exit(1) 82 | 83 | if not self.options['public_graphite_url']: 84 | self.options['public_graphite_url'] = self.options['graphite_url'] 85 | 86 | LOGGER.setLevel(self.options.get('logging', 'info').upper()) 87 | registry.clean() 88 | 89 | self.handlers = {'warning': set(), 'critical': set(), 'normal': set()} 90 | self.reinit_handlers('warning') 91 | self.reinit_handlers('critical') 92 | self.reinit_handlers('normal') 93 | 94 | self.remove_alerts() 95 | 96 | self.alerts = set( 97 | BaseAlert.get(self, **opts) for opts in self.options.get('alerts')) # pylint: disable=no-member 98 | 99 | # Only auto-start alerts if the reactor is already running 100 | if self.is_running(): 101 | self.start_alerts() 102 | 103 | LOGGER.debug('Loaded with options:') 104 | LOGGER.debug(json.dumps(self.options, indent=2)) 105 | return self 106 | 107 | def remove_alerts(self): 108 | for alert in list(self.alerts): 109 | alert.stop() 110 | self.alerts.remove(alert) 111 | 112 | def start_alerts(self): 113 | for alert in self.alerts: 114 | alert.start() 115 | 116 | def include_config(self, config): 117 | LOGGER.info('Load configuration: %s' % config) 118 | if config: 119 | loader_name, loader = _get_loader(config) 120 | LOGGER.debug('Using loader: %s' % loader_name) 121 | if not loader: 122 | return False 123 | try: 124 | with open(config) as fconfig: 125 | source = fconfig.read() 126 | if loader_name == 'json': 127 | source = COMMENT_RE.sub("", source) 128 | config = loader(source) 129 | self.options.get('alerts').extend(config.pop("alerts", [])) 130 | self.options.update(config) 131 | except (IOError, ValueError): 132 | LOGGER.error('Invalid config file: %s' % config) 133 | return False 134 | return True 135 | 136 | def reinit_handlers(self, level='warning'): 137 | for name in self.options['%s_handlers' % level]: 138 | try: 139 | self.handlers[level].add(registry.get(self, name)) 140 | except Exception as e: 141 | LOGGER.error('Handler "%s" did not init. Error: %s' % (name, e)) 142 | 143 | def repeat(self): 144 | LOGGER.info('Reset alerts') 145 | for alert in self.alerts: 146 | alert.reset() 147 | 148 | def start(self, start_loop=True): 149 | """Start all the things. 150 | 151 | :param start_loop bool: whether to start the ioloop. should be False if 152 | the IOLoop is managed externally 153 | """ 154 | self.start_alerts() 155 | if self.options.get('pidfile'): 156 | with open(self.options.get('pidfile'), 'w') as fpid: 157 | fpid.write(str(os.getpid())) 158 | self.callback.start() 159 | LOGGER.info('Reactor starts') 160 | 161 | if start_loop: 162 | self.loop.start() 163 | 164 | def stop(self, stop_loop=True): 165 | self.callback.stop() 166 | self.remove_alerts() 167 | if stop_loop: 168 | self.loop.stop() 169 | if self.options.get('pidfile'): 170 | os.unlink(self.options.get('pidfile')) 171 | LOGGER.info('Reactor has stopped') 172 | 173 | def notify(self, level, alert, value, target=None, ntype=None, rule=None): 174 | """ Provide the event to the handlers. """ 175 | 176 | LOGGER.info('Notify %s:%s:%s:%s', level, alert, value, target or "") 177 | 178 | if ntype is None: 179 | ntype = alert.source 180 | 181 | for handler in self.handlers.get(level, []): 182 | handler.notify(level, alert, value, target=target, ntype=ntype, rule=rule) 183 | 184 | 185 | def _get_loader(config): 186 | """Determine which config file type and loader to use based on a filename. 187 | 188 | :param config str: filename to config file 189 | :return: a tuple of the loader type and callable to load 190 | :rtype: (str, Callable) 191 | """ 192 | if config.endswith('.yml') or config.endswith('.yaml'): 193 | if not yaml: 194 | LOGGER.error("pyyaml must be installed to use the YAML loader") 195 | # TODO: stop reactor if running 196 | return None, None 197 | return 'yaml', yaml.load 198 | else: 199 | return 'json', json.loads 200 | -------------------------------------------------------------------------------- /graphite_beacon/graphite.py: -------------------------------------------------------------------------------- 1 | class GraphiteRecord(object): 2 | 3 | def __init__(self, metric_string, default_nan_value=None, ignore_nan=False): 4 | try: 5 | meta, data = metric_string.split('|') 6 | except ValueError: 7 | peek = ((metric_string[:40] + '..') 8 | if len(metric_string) > 40 else metric_string) 9 | raise ValueError("Unable to parse graphite record: {}".format(peek)) 10 | 11 | self.target, start_time, end_time, step = meta.rsplit(',', 3) 12 | self.start_time = int(start_time) 13 | self.end_time = int(end_time) 14 | self.step = int(step) 15 | self.default_nan_value = default_nan_value 16 | self.ignore_nan = ignore_nan 17 | self.values = list(self._values(data.rsplit(','))) 18 | self.empty = len(self.values) == 0 19 | 20 | def _values(self, values): 21 | for value in values: 22 | try: 23 | if self.ignore_nan and float(value) == self.default_nan_value: 24 | continue 25 | yield float(value) 26 | except ValueError: 27 | continue 28 | 29 | @property 30 | def average(self): 31 | return self.sum / len(self.values) 32 | 33 | @property 34 | def last_value(self): 35 | return self.values[-1] 36 | 37 | @property 38 | def sum(self): 39 | return sum(self.values) 40 | 41 | @property 42 | def minimum(self): 43 | return min(self.values) 44 | 45 | @property 46 | def maximum(self): 47 | return max(self.values) 48 | -------------------------------------------------------------------------------- /graphite_beacon/handlers/__init__.py: -------------------------------------------------------------------------------- 1 | from tornado import log 2 | 3 | from graphite_beacon import _compat as _ 4 | from graphite_beacon.template import TEMPLATES 5 | 6 | LOGGER = log.gen_log 7 | 8 | 9 | class HandlerMeta(type): 10 | 11 | loaded = {} 12 | handlers = {} 13 | 14 | def __new__(mcs, name, bases, params): 15 | cls = super(HandlerMeta, mcs).__new__(mcs, name, bases, params) 16 | name = params.get('name') 17 | if name: 18 | mcs.handlers[name] = cls 19 | LOGGER.info("Register Handler: %s", name) 20 | return cls 21 | 22 | @classmethod 23 | def clean(mcs): 24 | mcs.loaded = {} 25 | 26 | @classmethod 27 | def get(mcs, reactor, name): 28 | if name not in mcs.loaded: 29 | mcs.loaded[name] = mcs.handlers[name](reactor) 30 | return mcs.loaded[name] 31 | 32 | 33 | class AbstractHandler(_.with_metaclass(HandlerMeta)): 34 | 35 | name = None 36 | defaults = {} 37 | 38 | def __init__(self, reactor): 39 | self.reactor = reactor 40 | self.options = dict(self.defaults) 41 | self.options.update(self.reactor.options.get(self.name, {})) 42 | self.init_handler() 43 | LOGGER.debug('Handler "%s" has inited: %s', self.name, self.options) 44 | 45 | def get_short(self, level, alert, value, target=None, ntype=None, rule=None): # pylint: disable=unused-argument 46 | tmpl = TEMPLATES[ntype]['short'] 47 | return tmpl.generate( 48 | level=level, reactor=self.reactor, alert=alert, value=value, target=target).strip() 49 | 50 | def init_handler(self): 51 | """ Init configuration here.""" 52 | raise NotImplementedError() 53 | 54 | def notify(self, level, alert, value, target=None, ntype=None, rule=None): 55 | raise NotImplementedError() 56 | 57 | registry = HandlerMeta # pylint: disable=invalid-name 58 | 59 | from .hipchat import HipChatHandler # pylint: disable=wrong-import-position 60 | from .http import HttpHandler # pylint: disable=wrong-import-position 61 | from .log import LogHandler # pylint: disable=wrong-import-position 62 | from .pagerduty import PagerdutyHandler # pylint: disable=wrong-import-position 63 | from .slack import SlackHandler # pylint: disable=wrong-import-position 64 | from .smtp import SMTPHandler # pylint: disable=wrong-import-position 65 | from .cli import CliHandler # pylint: disable=wrong-import-position 66 | from .opsgenie import OpsgenieHandler # pylint: disable=wrong-import-position 67 | from .victorops import VictorOpsHandler # pylint: disable=wrong-import-position 68 | from .telegram import TelegramHandler # pylint: disable=wrong-import-position 69 | -------------------------------------------------------------------------------- /graphite_beacon/handlers/cli.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | from graphite_beacon.handlers import LOGGER, AbstractHandler 4 | 5 | 6 | class CliHandler(AbstractHandler): 7 | 8 | name = 'cli' 9 | 10 | # Default options 11 | defaults = { 12 | 'command': None, 13 | 'alerts_whitelist': [], 14 | } 15 | 16 | def init_handler(self): 17 | self.command_template = self.options.get('command') 18 | self.whitelist = self.options.get('alerts_whitelist') 19 | assert self.command_template, 'Command line command is not defined.' 20 | 21 | def notify(self, level, *args, **kwargs): 22 | LOGGER.debug("Handler (%s) %s", self.name, level) 23 | 24 | def get_alert_name(*args): 25 | name = str(args[0]) 26 | # remove time characteristics e.g. (1minute) 27 | return name.rsplit(' ', 1)[0].strip() 28 | 29 | # Run only for whitelisted names if specified 30 | if not self.whitelist or get_alert_name(*args) in self.whitelist: 31 | command = substitute_variables(self.command_template, level, *args, **kwargs) 32 | subprocess.Popen( 33 | command, 34 | shell=True, 35 | stdin=None, 36 | stdout=None, 37 | stderr=None, 38 | close_fds=True) 39 | 40 | 41 | def substitute_variables(command, level, name, value, target=None, **kwargs): 42 | """Substitute variables in command fragments by values e.g. ${level} => 'warning'.""" 43 | rule = kwargs.get('rule', {}) 44 | rule_value = rule.get('value', '') if rule else '' 45 | substitutes = { 46 | '${level}': str(level), 47 | '${target}': str(target), 48 | '${name}': '"' + str(name) + '"', 49 | '${value}': str(value), 50 | '${limit_value}': str(rule_value), 51 | } 52 | 53 | result = command 54 | for pattern, value in substitutes.items(): 55 | result = result.replace(pattern, value) 56 | 57 | return result 58 | -------------------------------------------------------------------------------- /graphite_beacon/handlers/hipchat.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from tornado import httpclient as hc 4 | from tornado import gen 5 | 6 | from graphite_beacon.handlers import LOGGER, AbstractHandler 7 | 8 | 9 | class HipChatHandler(AbstractHandler): 10 | 11 | name = 'hipchat' 12 | 13 | # Default options 14 | defaults = { 15 | 'url': 'https://api.hipchat.com', 16 | 'room': None, 17 | 'key': None, 18 | } 19 | 20 | colors = { 21 | 'critical': 'red', 22 | 'warning': 'yellow', 23 | 'normal': 'green', 24 | } 25 | 26 | def init_handler(self): 27 | self.room = self.options.get('room') 28 | self.key = self.options.get('key') 29 | assert self.room, 'Hipchat room is not defined.' 30 | assert self.key, 'Hipchat key is not defined.' 31 | self.client = hc.AsyncHTTPClient() 32 | 33 | @gen.coroutine 34 | def notify(self, level, *args, **kwargs): 35 | LOGGER.debug("Handler (%s) %s", self.name, level) 36 | 37 | data = { 38 | 'message': self.get_short(level, *args, **kwargs).decode('UTF-8'), 39 | 'notify': True, 40 | 'color': self.colors.get(level, 'gray'), 41 | 'message_format': 'text', 42 | } 43 | 44 | yield self.client.fetch('{url}/v2/room/{room}/notification?auth_token={token}'.format( 45 | url=self.options.get('url'), room=self.room, token=self.key), headers={ 46 | 'Content-Type': 'application/json'}, method='POST', body=json.dumps(data)) 47 | -------------------------------------------------------------------------------- /graphite_beacon/handlers/http.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | 3 | from tornado import httpclient as hc 4 | from tornado import gen 5 | 6 | from graphite_beacon.handlers import LOGGER, AbstractHandler 7 | 8 | 9 | class HttpHandler(AbstractHandler): 10 | 11 | name = 'http' 12 | 13 | # Default options 14 | defaults = { 15 | 'params': {}, 16 | 'method': 'GET', 17 | } 18 | 19 | def init_handler(self): 20 | self.url = self.options.get('url') 21 | assert self.url, 'URL is not defined' 22 | self.params = self.options['params'] 23 | self.method = self.options['method'] 24 | self.client = hc.AsyncHTTPClient() 25 | 26 | @gen.coroutine 27 | def notify(self, level, alert, value, target=None, ntype=None, rule=None): 28 | LOGGER.debug("Handler (%s) %s", self.name, level) 29 | 30 | message = self.get_short(level, alert, value, target=target, ntype=ntype, rule=rule) 31 | data = {'alert': alert.name, 'desc': message, 'level': level} 32 | if target: 33 | data['target'] = target 34 | if rule: 35 | data['rule'] = rule['raw'] 36 | 37 | if alert.source == 'graphite': 38 | data['graph_url'] = alert.get_graph_url(target) 39 | data['value'] = value 40 | 41 | data.update(self.params) 42 | body = urllib.urlencode(data) 43 | yield self.client.fetch(self.url, method=self.method, body=body) 44 | -------------------------------------------------------------------------------- /graphite_beacon/handlers/log.py: -------------------------------------------------------------------------------- 1 | from graphite_beacon.handlers import LOGGER, AbstractHandler 2 | 3 | 4 | class LogHandler(AbstractHandler): 5 | 6 | """Handle events to log output.""" 7 | 8 | name = 'log' 9 | 10 | def init_handler(self): 11 | self.logger = LOGGER 12 | 13 | def notify(self, level, *args, **kwargs): 14 | message = self.get_short(level, *args, **kwargs) 15 | if level == 'normal': 16 | self.logger.info(message) 17 | elif level == 'warning': 18 | self.logger.warn(message) 19 | elif level == 'critical': 20 | self.logger.error(message) 21 | -------------------------------------------------------------------------------- /graphite_beacon/handlers/opsgenie.py: -------------------------------------------------------------------------------- 1 | import json 2 | import urllib 3 | 4 | from tornado import gen, httpclient 5 | 6 | from graphite_beacon.handlers import AbstractHandler 7 | 8 | 9 | class OpsgenieHandler(AbstractHandler): 10 | 11 | name = 'opsgenie' 12 | 13 | defaults = { 14 | 'api_key': None 15 | } 16 | 17 | def init_handler(self): 18 | self.api_key = self.options.get('api_key') 19 | assert self.api_key, "Opsgenie API key not defined." 20 | self.client = httpclient.AsyncHTTPClient() 21 | 22 | @gen.coroutine 23 | def notify(self, level, alert, value, target=None, *args, **kwargs): 24 | 25 | message = self.get_short(level, alert, value, target, *args, **kwargs) 26 | description = "{url}/composer/?{params}".format( 27 | url=self.reactor.options['public_graphite_url'], 28 | params=urllib.urlencode({'target': alert.query})) 29 | alias = target + ':' + alert.name 30 | 31 | if level == 'critical': 32 | yield self.client.fetch( 33 | 'https://api.opsgenie.com/v1/json/alert', 34 | method='POST', 35 | headers={'Content-Type': 'application/json'}, 36 | body=json.dumps({'apiKey': self.api_key, 37 | 'message': message, 38 | 'alias': alias, 39 | 'description': description})) 40 | elif level == 'normal': 41 | # Close issue 42 | yield self.client.fetch( 43 | 'https://api.opsgenie.com/v1/json/alert/close', 44 | method='POST', 45 | headers={'Content-Type': 'application/json'}, 46 | body=json.dumps({'apiKey': self.api_key, 'alias': alias})) 47 | # TODO: Maybe add option to create alert when level == 'warning'? 48 | -------------------------------------------------------------------------------- /graphite_beacon/handlers/pagerduty.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from tornado import httpclient as hc 4 | from tornado import gen 5 | 6 | from graphite_beacon.handlers import LOGGER, AbstractHandler 7 | 8 | 9 | class PagerdutyHandler(AbstractHandler): 10 | 11 | name = 'pagerduty' 12 | 13 | # Default options 14 | defaults = { 15 | 'subdomain': None, 16 | 'apitoken': None, 17 | 'service_key': None 18 | } 19 | 20 | def init_handler(self): 21 | self.subdomain = self.options.get('subdomain') 22 | assert self.subdomain, 'subdomain is not defined' 23 | self.apitoken = self.options.get('apitoken') 24 | assert self.apitoken, 'apitoken is not defined' 25 | self.service_key = self.options.get('service_key') 26 | assert self.service_key, 'service_key is not defined' 27 | self.client = hc.AsyncHTTPClient() 28 | 29 | @gen.coroutine 30 | def notify(self, level, alert, value, target=None, ntype=None, rule=None): 31 | LOGGER.debug("Handler (%s) %s", self.name, level) 32 | message = self.get_short(level, alert, value, target=target, ntype=ntype, rule=rule) 33 | LOGGER.debug('message1:%s', message) 34 | if level == 'normal': 35 | event_type = 'resolve' 36 | else: 37 | event_type = 'trigger' 38 | 39 | headers = { 40 | "Content-type": "application/json", 41 | } 42 | 43 | client_url = None 44 | if target: 45 | client_url = alert.get_graph_url(target) 46 | incident_key = 'graphite connect error' 47 | if rule: 48 | incident_key = "alert={},rule={}".format(alert.name, rule['raw']) 49 | 50 | data = { 51 | "service_key": self.service_key, 52 | "event_type": event_type, 53 | "description": message, 54 | "details": message, 55 | "incident_key": incident_key, 56 | "client": 'graphite-beacon', 57 | "client_url": client_url 58 | } 59 | yield self.client.fetch( 60 | "https://events.pagerduty.com/generic/2010-04-15/create_event.json", 61 | body=json.dumps(data), 62 | headers=headers, 63 | method='POST' 64 | ) 65 | -------------------------------------------------------------------------------- /graphite_beacon/handlers/slack.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from tornado import httpclient as hc 4 | from tornado import gen 5 | 6 | from graphite_beacon.handlers import LOGGER, AbstractHandler 7 | from graphite_beacon.template import TEMPLATES 8 | 9 | 10 | class SlackHandler(AbstractHandler): 11 | 12 | name = 'slack' 13 | 14 | # Default options 15 | defaults = { 16 | 'webhook': None, 17 | 'channel': None, 18 | 'username': 'graphite-beacon', 19 | } 20 | 21 | emoji = { 22 | 'critical': ':exclamation:', 23 | 'warning': ':warning:', 24 | 'normal': ':white_check_mark:', 25 | } 26 | 27 | def init_handler(self): 28 | self.webhook = self.options.get('webhook') 29 | assert self.webhook, 'Slack webhook is not defined.' 30 | 31 | self.channel = self.options.get('channel') 32 | if self.channel and not self.channel.startswith(('#', '@')): 33 | self.channel = '#' + self.channel 34 | self.username = self.options.get('username') 35 | self.client = hc.AsyncHTTPClient() 36 | 37 | def get_message(self, level, alert, value, target=None, ntype=None, rule=None): # pylint: disable=unused-argument 38 | msg_type = 'slack' if ntype == 'graphite' else 'short' 39 | tmpl = TEMPLATES[ntype][msg_type] 40 | return tmpl.generate( 41 | level=level, reactor=self.reactor, alert=alert, value=value, target=target).strip() 42 | 43 | @gen.coroutine 44 | def notify(self, level, *args, **kwargs): 45 | LOGGER.debug("Handler (%s) %s", self.name, level) 46 | 47 | message = self.get_message(level, *args, **kwargs) 48 | data = dict() 49 | data['username'] = self.username 50 | data['text'] = message 51 | data['icon_emoji'] = self.emoji.get(level, ':warning:') 52 | if self.channel: 53 | data['channel'] = self.channel 54 | 55 | body = json.dumps(data) 56 | yield self.client.fetch( 57 | self.webhook, 58 | method='POST', 59 | headers={'Content-Type': 'application/json'}, 60 | body=body 61 | ) 62 | -------------------------------------------------------------------------------- /graphite_beacon/handlers/smtp.py: -------------------------------------------------------------------------------- 1 | import datetime as dt 2 | from email.mime.multipart import MIMEMultipart 3 | from email.mime.text import MIMEText 4 | from smtplib import SMTP 5 | 6 | from tornado import concurrent, gen 7 | 8 | from graphite_beacon.handlers import LOGGER, TEMPLATES, AbstractHandler 9 | 10 | 11 | class SMTPHandler(AbstractHandler): 12 | 13 | name = 'smtp' 14 | 15 | # Default options 16 | defaults = { 17 | 'host': 'localhost', 18 | 'port': 25, 19 | 'username': None, 20 | 'password': None, 21 | 'from': 'beacon@graphite', 22 | 'to': None, 23 | 'use_tls': False, 24 | 'html': True, 25 | 'graphite_url': None, 26 | } 27 | 28 | def init_handler(self): 29 | """ Check self options. """ 30 | assert self.options.get('host') and self.options.get('port'), "Invalid options" 31 | assert self.options.get('to'), 'Recipients list is empty. SMTP disabled.' 32 | if not isinstance(self.options['to'], (list, tuple)): 33 | self.options['to'] = [self.options['to']] 34 | 35 | @gen.coroutine 36 | def notify(self, level, *args, **kwargs): 37 | LOGGER.debug("Handler (%s) %s", self.name, level) 38 | 39 | msg = self.get_message(level, *args, **kwargs) 40 | msg['Subject'] = self.get_short(level, *args, **kwargs) 41 | msg['From'] = self.options['from'] 42 | msg['To'] = ", ".join(self.options['to']) 43 | 44 | smtp = SMTP() 45 | yield smtp_connect(smtp, self.options['host'], self.options['port']) # pylint: disable=no-value-for-parameter 46 | 47 | if self.options['use_tls']: 48 | yield smtp_starttls(smtp) # pylint: disable=no-value-for-parameter 49 | 50 | if self.options['username'] and self.options['password']: 51 | yield smtp_login(smtp, # pylint: disable=no-value-for-parameter 52 | self.options['username'], 53 | self.options['password']) 54 | 55 | try: 56 | LOGGER.debug("Send message to: %s", ", ".join(self.options['to'])) 57 | smtp.sendmail(self.options['from'], self.options['to'], msg.as_string()) 58 | finally: 59 | smtp.quit() 60 | 61 | def get_message(self, level, alert, value, target=None, ntype=None, rule=None): 62 | txt_tmpl = TEMPLATES[ntype]['text'] 63 | ctx = dict( 64 | reactor=self.reactor, alert=alert, value=value, level=level, target=target, 65 | dt=dt, rule=rule, **self.options) 66 | msg = MIMEMultipart('alternative') 67 | plain = MIMEText(str(txt_tmpl.generate(**ctx)), 'plain') 68 | msg.attach(plain) 69 | if self.options['html']: 70 | html_tmpl = TEMPLATES[ntype]['html'] 71 | html = MIMEText(str(html_tmpl.generate(**ctx)), 'html') 72 | msg.attach(html) 73 | return msg 74 | 75 | 76 | @concurrent.return_future 77 | def smtp_connect(smtp, host, port, callback): 78 | callback(smtp.connect(host, port)) 79 | 80 | 81 | @concurrent.return_future 82 | def smtp_starttls(smtp, callback): 83 | callback(smtp.starttls()) 84 | 85 | 86 | @concurrent.return_future 87 | def smtp_login(smtp, username, password, callback): 88 | callback(smtp.login(username, password)) 89 | 90 | # pylama:ignore=E1120 91 | -------------------------------------------------------------------------------- /graphite_beacon/handlers/telegram.py: -------------------------------------------------------------------------------- 1 | """Send alerts to telegram chats""" 2 | 3 | import json 4 | from os.path import exists 5 | 6 | from tornado import gen, httpclient 7 | 8 | from graphite_beacon.handlers import LOGGER, AbstractHandler 9 | from graphite_beacon.template import TEMPLATES 10 | 11 | 12 | HELP_MESSAGE = """Telegram handler for graphite-beacon 13 | *usage* /command [parameters] 14 | *examples* 15 | /activate token123 16 | /deactivate token123 17 | *commands* 18 | _activate_ activate bot and remember this chat 19 | (no need to activate next time) 20 | _deactivate_ deactivate bot and forget this chat 21 | _help_ see this message 22 | note: every command must be preceded by slash symbol (/) 23 | *parameters* 24 | _bot-ident_ mandatory for Telegram groups 25 | note: parameters must be separated by whitespace 26 | """ 27 | 28 | 29 | class TelegramHandler(AbstractHandler): 30 | """uses telegram bot api to send alerts 31 | To make it work you want to: 32 | - create bot and write its token to configs: 33 | https://core.telegram.org/bots#3-how-do-i-create-a-bot 34 | - make up some bot_ident and write it to configs 35 | - optionally, make blank file for storing chats (chatfile) 36 | and write its path to configs 37 | """ 38 | 39 | name = 'telegram' 40 | 41 | # Default options 42 | defaults = { 43 | 'token': None, 44 | 'bot_ident': None, 45 | 'chatfile': None 46 | } 47 | 48 | def init_handler(self): 49 | 50 | token = self.options.get('token') 51 | assert token, 'Telegram bot API token is not defined.' 52 | 53 | self.client = CustomClient(token) 54 | 55 | self.bot_ident = self.options.get('bot_ident') 56 | assert self.bot_ident, 'Telegram bot ident token is not defined.' 57 | 58 | chatfile = self.options.get('chatfile') 59 | if not chatfile: 60 | LOGGER.warning('chatfile not found in configs') 61 | elif not exists(chatfile): 62 | LOGGER.error('chatfile specified in configs does not exist') 63 | chatfile = None 64 | self.chatfile = chatfile 65 | self.chats = get_chatlist(self.chatfile) 66 | 67 | self._listen_commands() 68 | 69 | @gen.coroutine 70 | def _listen_commands(self): 71 | """Monitor new updates and send them further to 72 | self._respond_commands, where bot actions 73 | are decided. 74 | """ 75 | 76 | self._last_update = None 77 | update_body = {'timeout': 2} 78 | 79 | while True: 80 | latest = self._last_update 81 | # increase offset to filter out older updates 82 | update_body.update({'offset': latest + 1} if latest else {}) 83 | update_resp = self.client.get_updates(update_body) 84 | update_resp.add_done_callback(self._respond_commands) 85 | yield gen.sleep(5) 86 | 87 | @gen.coroutine 88 | def _respond_commands(self, update_response): 89 | """Extract commands to bot from update and 90 | act accordingly. For description of commands, 91 | see HELP_MESSAGE variable on top of this module. 92 | """ 93 | 94 | chatfile = self.chatfile 95 | chats = self.chats 96 | 97 | exc, upd = update_response.exception(), update_response.result().body 98 | if exc: 99 | LOGGER.error(str(exc)) 100 | if not upd: 101 | return 102 | 103 | data = get_data(upd, self.bot_ident) 104 | for update_id, chat_id, message_id, command in data: 105 | self._last_update = update_id 106 | chat_is_known = chat_id in chats 107 | chats_changed = False 108 | reply_text = None 109 | 110 | if command == '/activate': 111 | if chat_is_known: 112 | reply_text = 'This chat is already activated.' 113 | else: 114 | LOGGER.debug( 115 | 'Adding chat [%s] to notify list.', chat_id) 116 | reply_text = 'Activated.' 117 | chats.add(chat_id) 118 | chats_changed = True 119 | 120 | elif command == '/deactivate': 121 | if chat_is_known: 122 | LOGGER.debug( 123 | 'Deleting chat [%s] from notify list.', chat_id) 124 | reply_text = 'Deactivated.' 125 | chats.remove(chat_id) 126 | chats_changed = True 127 | 128 | if chats_changed and chatfile: 129 | write_to_file(chats, chatfile) 130 | 131 | elif command == '/help': 132 | reply_text = HELP_MESSAGE 133 | 134 | else: 135 | LOGGER.warning('Could not parse command: ' 136 | 'bot ident is wrong or missing') 137 | 138 | if reply_text: 139 | yield self.client.send_message({ 140 | 'chat_id': chat_id, 141 | 'reply_to_message_id': message_id, 142 | 'text': reply_text, 143 | 'parse_mode': 'Markdown', 144 | }) 145 | 146 | @gen.coroutine 147 | def notify(self, level, *args, **kwargs): 148 | """Sends alerts to telegram chats. 149 | This method is called from top level module. 150 | Do not rename it. 151 | """ 152 | 153 | LOGGER.debug('Handler (%s) %s', self.name, level) 154 | 155 | notify_text = self.get_message(level, *args, **kwargs) 156 | for chat in self.chats.copy(): 157 | data = {"chat_id": chat, "text": notify_text} 158 | yield self.client.send_message(data) 159 | 160 | def get_message(self, level, alert, value, **kwargs): 161 | """Standart alert message. Same format across all 162 | graphite-beacon handlers. 163 | """ 164 | target, ntype = kwargs.get('target'), kwargs.get('ntype') 165 | 166 | msg_type = 'telegram' if ntype == 'graphite' else 'short' 167 | tmpl = TEMPLATES[ntype][msg_type] 168 | generated = tmpl.generate( 169 | level=level, reactor=self.reactor, alert=alert, 170 | value=value, target=target,) 171 | return generated.decode().strip() 172 | 173 | 174 | def write_to_file(chats, chatfile): 175 | """called every time chats are modified""" 176 | with open(chatfile, 'w') as handler: 177 | handler.write('\n'.join((str(id_) for id_ in chats))) 178 | 179 | 180 | def get_chatlist(chatfile): 181 | """Try reading ids of saved chats from file. 182 | If we fail, return empty set""" 183 | if not chatfile: 184 | return set() 185 | try: 186 | with open(chatfile) as file_contents: 187 | return set(int(chat) for chat in file_contents) 188 | except (OSError, IOError) as exc: 189 | LOGGER.error('could not load saved chats:\n%s', exc) 190 | return set() 191 | 192 | 193 | def get_data(upd, bot_ident): 194 | """Parse telegram update.""" 195 | 196 | update_content = json.loads(upd.decode()) 197 | result = update_content['result'] 198 | data = (get_fields(update, bot_ident) for update in result) 199 | return (dt for dt in data if dt is not None) 200 | 201 | 202 | def get_fields(upd, bot_ident): 203 | """In telegram api, not every update has message field, 204 | and not every message has update field. 205 | We skip those cases. Rest of fields are mandatory. 206 | We also skip if text is not a valid command to handler. 207 | """ 208 | msg = upd.get('message', {}) 209 | text = msg.get('text') 210 | if not text: 211 | return 212 | chat_id = msg['chat']['id'] 213 | command = filter_commands(text, chat_id, bot_ident) 214 | if not command: 215 | return 216 | return (upd['update_id'], chat_id, msg['message_id'], command) 217 | 218 | 219 | def filter_commands(text, chat_id, correct_ident): 220 | """Check if text is valid command to bot. 221 | Return string(either some command or error name) or None. 222 | Telegram group may have many participants including bots, 223 | so we need to check bot identifier to make sure command is 224 | given to our bot. 225 | """ 226 | is_group = (chat_id < 0) # always negative for groups 227 | split_cmd = text.split()[:2] 228 | command = split_cmd[0].strip().lower() 229 | 230 | # make sure command is known 231 | if command not in ('/activate', '/deactivate', '/help'): 232 | return 233 | # dont check bot_ident if not in group 234 | if not is_group: 235 | return command 236 | # check bot_ident 237 | if len(split_cmd) < 2: 238 | return 'no_ident' 239 | if split_cmd[1].strip() != correct_ident: 240 | return 'wrong_ident' 241 | 242 | return command 243 | 244 | 245 | class CustomClient(object): 246 | """Handles all http requests using telegram api methods""" 247 | 248 | def __init__(self, tg_bot_token): 249 | self.token = tg_bot_token 250 | self.client = httpclient.AsyncHTTPClient() 251 | self.get_updates = self.fetchmaker('getUpdates') 252 | self.send_message = self.fetchmaker('sendMessage') 253 | 254 | def url(self, tg_api_method): 255 | """construct url from base url, bot token and api method""" 256 | base_url = 'https://api.telegram.org/bot%s/%s' 257 | return base_url % (self.token, tg_api_method) 258 | 259 | def fetchmaker(self, telegram_api_method): 260 | """Receives api method as string and returns 261 | wrapper around AsyncHTTPClient's fetch method 262 | """ 263 | 264 | fetch = self.client.fetch 265 | request = self.url(telegram_api_method) 266 | 267 | def _fetcher(body, method='POST', headers=None): 268 | """Uses fetch method of tornado http client.""" 269 | body = json.dumps(body) 270 | if not headers: 271 | headers = {} 272 | headers.update({'Content-Type': 'application/json'}) 273 | return fetch( 274 | request=request, body=body, method=method, headers=headers) 275 | return _fetcher 276 | -------------------------------------------------------------------------------- /graphite_beacon/handlers/victorops.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from tornado import httpclient as hc 4 | from tornado import gen 5 | 6 | from graphite_beacon.handlers import LOGGER, AbstractHandler 7 | 8 | try: 9 | from urlparse import urljoin 10 | except ImportError: 11 | from urllib.parse import urljoin 12 | 13 | 14 | class VictorOpsHandler(AbstractHandler): 15 | 16 | name = 'victorops' 17 | 18 | def init_handler(self): 19 | self.url = self.options.get('endpoint') 20 | assert self.url, 'REST Endpoint is not defined' 21 | 22 | self.routing_key = self.options.get('routing_key', 'everyone') 23 | self.url = urljoin(self.url, self.routing_key) 24 | 25 | self.client = hc.AsyncHTTPClient() 26 | 27 | @gen.coroutine 28 | def notify(self, level, alert, value, target=None, ntype=None, rule=None): 29 | LOGGER.debug("Handler (%s) %s", self.name, level) 30 | 31 | message = self.get_short(level, alert, value, target=target, ntype=ntype, rule=rule) 32 | data = {'entity_display_name': alert.name, 'state_message': message, 'message_type': level} 33 | if target: 34 | data['target'] = target 35 | if rule: 36 | data['rule'] = rule['raw'] 37 | body = json.dumps(data) 38 | headers = {'Content-Type': 'application/json;'} 39 | yield self.client.fetch(self.url, method="POST", body=body, headers=headers) 40 | -------------------------------------------------------------------------------- /graphite_beacon/template.py: -------------------------------------------------------------------------------- 1 | import os.path as op 2 | 3 | from tornado import template 4 | 5 | LOADER = template.Loader(op.join(op.dirname(op.abspath(__file__)), 'templates'), autoescape=None) 6 | TEMPLATES = { 7 | 'graphite': { 8 | 'html': LOADER.load('graphite/message.html'), 9 | 'text': LOADER.load('graphite/message.txt'), 10 | 'short': LOADER.load('graphite/short.txt'), 11 | 'telegram': LOADER.load('graphite/short.txt'), 12 | 'slack': LOADER.load('graphite/slack.txt') 13 | }, 14 | 'url': { 15 | 'html': LOADER.load('url/message.html'), 16 | 'text': LOADER.load('url/message.txt'), 17 | 'short': LOADER.load('url/short.txt'), 18 | }, 19 | 'common': { 20 | 'html': LOADER.load('common/message.html'), 21 | 'text': LOADER.load('common/message.txt'), 22 | 'short': LOADER.load('common/short.txt'), 23 | }, 24 | } 25 | -------------------------------------------------------------------------------- /graphite_beacon/templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | {{ reactor.options.get('prefix') }} {{ level.upper() }} 7 | 8 | 56 | 61 | 66 | 67 | 68 |
69 | 70 | 71 | 179 | 180 |
72 | 73 | 74 | 75 | 101 | 102 | 103 | 104 | 105 | 126 | 127 | 128 | 129 | 130 | 131 | 174 | 175 | 176 | 177 |
76 | 77 | 78 | 79 | 97 | 98 |
80 | 81 | 82 | 93 | 94 |
83 | 84 | 85 | 90 | 91 |
86 | 87 | {% block content1 %}{% end %} 88 | 89 |
92 |
95 | 96 |
99 | 100 |
106 | 107 | 108 | 109 | 122 | 123 |
110 | 111 | 112 | 113 | 118 | 119 |
114 | 115 | {% block content2 %}{% end %} 116 | 117 |
120 | 121 |
124 | 125 |
132 | 133 | 134 | 135 | 170 | 171 |
136 | 137 | 138 | 139 | 166 | 167 |
140 | 141 | 142 | 143 | 146 | 147 |
144 | 145 |
148 | 149 | 150 | 151 | 152 | 153 | 154 | 161 | 162 |
155 | 156 | You can configure alerts for notifications in your configuration file. 157 |
158 | See Graphite-beacon 159 |
160 |
163 | 164 | 165 |
168 | 169 |
172 | 173 |
178 |
181 |
182 | 183 | 184 | 185 | -------------------------------------------------------------------------------- /graphite_beacon/templates/common/message.html: -------------------------------------------------------------------------------- 1 | {% extends "../base.html" %} 2 | 3 | {% block content1 %} 4 | 5 | 6 | 16 | 17 |
7 | 8 | 9 | 10 | 13 | 14 |
11 | {{ level.upper() }} [{{alert.name}}] 12 |
15 |
18 | {% end %} 19 | 20 | {% block content2 %} 21 | 22 | 23 | 27 | 28 |
24 | Time: {{ dt.datetime.now().strftime('%H:%M %d/%m/%Y') }}
25 | Value: {{ value }}
26 |
29 | {% end %} 30 | -------------------------------------------------------------------------------- /graphite_beacon/templates/common/message.txt: -------------------------------------------------------------------------------- 1 | {{ reactor.options.get('prefix') }} {{ level.upper() }} 2 | {{ '=' * len(reactor.options.get('prefix') + level)}} 3 | 4 | Alert: {{ alert.name }} 5 | Message: {{ value }} 6 | Time: {{ dt.datetime.now().strftime('%H:%M %d/%m/%Y') }} 7 | 8 | -- 9 | 10 | You can configure alerts for notifications in your congifuration file. 11 | See https://github.com/klen/graphite-beacon 12 | 13 | -------------------------------------------------------------------------------- /graphite_beacon/templates/common/short.txt: -------------------------------------------------------------------------------- 1 | {% if level == 'normal' %} 2 | {{ reactor.options.get('prefix') }} {{level.upper()}} <{{ alert.name }}> is back to normal. 3 | {% else %} 4 | {{ reactor.options.get('prefix') }} {{ level.upper() }} <{{ alert.name }}> {{ value }} 5 | {% end %} 6 | 7 | -------------------------------------------------------------------------------- /graphite_beacon/templates/graphite/message.html: -------------------------------------------------------------------------------- 1 | {% extends "../base.html" %} 2 | 3 | {% block content1 %} 4 | 5 | 6 | 22 | 23 |
7 | 8 | 9 | 13 | 14 | 15 | 18 | 19 |
10 | {{ level.upper() }} [{{alert.name}}] 11 | {% if target %} - {{ target }} {% end %} 12 |
16 | Value: {{ alert.convert(value) }} 17 |
20 | 21 |
24 | {% end %} 25 | 26 | {% block content2 %} 27 | 28 | 29 | 54 | 55 | 56 | 67 | 68 |
30 | 31 | Time: {{ dt.datetime.now().strftime('%H:%M %d/%m/%Y') }}
32 | 33 | Method: {{ alert.method }}
34 | 35 | Query: {{ alert.query }}
36 | 37 | {% if rule %} 38 | Rule: {{ rule['raw'] }}
39 | {% end %} 40 | 41 | {% if target %} 42 | Target: {{ target }}
43 | {% if alert.history[target] %} 44 | History: {{ [ alert.convert(v) for v in alert.history[target][:100] ] }}
{% end %} 45 | {% end %} 46 | 47 | {% if target and rule %} 48 | {% for expr in filter(lambda x: isinstance(x, dict), rule['exprs']) %} 49 | Tested value: {{ alert.convert(alert.get_value_for_expr(expr, target)) }}
50 | {% end %} 51 | {% end %} 52 | 53 |
57 | graph 58 |

59 | 60 | 61 | 64 | 65 | 66 |
69 | {% end %} 70 | -------------------------------------------------------------------------------- /graphite_beacon/templates/graphite/message.txt: -------------------------------------------------------------------------------- 1 | {{ reactor.options.get('prefix') }} {{ level.upper() }} 2 | {{ '=' * len(reactor.options.get('prefix') + level)}} 3 | 4 | Alert: {{ alert.name }} 5 | Status: {{ level }} 6 | 7 | Time: {{ dt.datetime.now().strftime('%H:%M %d/%m/%Y') }} 8 | Value: {{ alert.convert(value) }} ({{value}}) 9 | Method: {{ alert.method }} 10 | {% if rule %} 11 | Rule: {{ rule['raw'] }}{% end %} 12 | {% if target %} 13 | Target: {{ target }} 14 | {% if alert.history[target] %} 15 | History: {{ [ alert.convert(v) for v in alert.history[target][:100] ] }} {% end %} 16 | {% end %} 17 | {% if target and rule %}{% for expr in filter(lambda x: isinstance(x, dict), rule['exprs']) %} 18 | Tested value: {{ alert.convert(alert.get_value_for_expr(expr, target)) }}
19 | {% end %}{% end %} 20 | 21 | View the graph: {{ alert.get_graph_url(alert.query) }} 22 | 23 | -- 24 | 25 | You can configure alerts for notifications in your configuration file. 26 | See https://github.com/klen/graphite-beacon 27 | 28 | -------------------------------------------------------------------------------- /graphite_beacon/templates/graphite/short.txt: -------------------------------------------------------------------------------- 1 | {% if level == 'normal' %} 2 | {{ reactor.options.get('prefix') }} {{level.upper() }} <{{ alert.name }}>{% if target %} ({{target}}){% end %} is back to normal. 3 | {% else %} 4 | {{ reactor.options.get('prefix') }} {{ level.upper() }} <{{ alert.name }}>{% if target %} ({{target}}){% end %} failed. Current value: {{ alert.convert(value) }} 5 | {% end %} 6 | -------------------------------------------------------------------------------- /graphite_beacon/templates/graphite/slack.txt: -------------------------------------------------------------------------------- 1 | {% if level == 'normal' %} 2 | {{ reactor.options.get('prefix') }} {{level.upper() }} <{{ alert.name }}>{% if target %} `{{target}}`{% end %} is back to normal. 3 | {% else %} 4 | {{ reactor.options.get('prefix') }} {{ level.upper() }} <{{ alert.name }}>{% if target %} `{{target}}`{% end %} failed. Current value: {{ alert.convert(value) }} 5 | {% if target %}<{{ alert.get_graph_url(target) }}|View Graph>{% end %} 6 | {% end %} 7 | -------------------------------------------------------------------------------- /graphite_beacon/templates/url/message.html: -------------------------------------------------------------------------------- 1 | {% extends "../base.html" %} 2 | 3 | {% block content1 %} 4 | 5 | 6 | 23 | 24 |
7 | 8 | 9 | 10 | 14 | 15 | 16 | 19 | 20 |
11 | {{ level.upper() }} [{{alert.name}}] 12 | {% if target %} - {{ target }} {% end %} 13 |
17 | Response: {{ value }} 18 |
21 | 22 |
25 | {% end %} 26 | 27 | {% block content2 %} 28 | 29 | 30 | 38 | 39 |
31 | Time: {{ dt.datetime.now().strftime('%H:%M %d/%m/%Y') }}
32 | {% if target %} Target: {{ target }}
{% end %} 33 | Query: {{ alert.query }}
34 | {% if rule %} 35 | Rule: {{ rule['raw'] }}
36 | {% end %} 37 |
40 | {% end %} 41 | -------------------------------------------------------------------------------- /graphite_beacon/templates/url/message.txt: -------------------------------------------------------------------------------- 1 | {{ reactor.options.get('prefix') }} {{ level.upper() }} 2 | {{ '=' * len(reactor.options.get('prefix') + level)}} 3 | 4 | Alert: {{ alert.name }} 5 | Status: {{ level }} 6 | Value: {{value}} 7 | 8 | Time: {{ dt.datetime.now().strftime('%H:%M %d/%m/%Y') }} 9 | URL: {{ alert.query }} 10 | Status: {{ value }} 11 | {% if rule %} 12 | Rule: {{ rule['raw'] }}{% end %} 13 | 14 | -- 15 | 16 | You can configure alerts for notifications in your congifuration file. 17 | See https://github.com/klen/graphite-beacon 18 | 19 | -------------------------------------------------------------------------------- /graphite_beacon/templates/url/short.txt: -------------------------------------------------------------------------------- 1 | {% if level == 'normal' %} 2 | {{ reactor.options.get('prefix') }} {{level.upper()}} <{{ alert.name }}>{% if target %} ({{target}}){% end %} is back to normal. 3 | {% else %} 4 | {{ reactor.options.get('prefix') }} {{ level.upper() }} <{{ alert.name }}>{% if target %} ({{target}}){% end %} failed to load {{ alert.query }}. Response status is {{ value }} 5 | {% end %} 6 | -------------------------------------------------------------------------------- /graphite_beacon/units.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import re 4 | 5 | NUMBER_RE = re.compile(r'(?P\-?\d*\.?\d*)(?P\w+)') 6 | 7 | # Time units 8 | MILLISECOND = 'millisecond' 9 | SECOND = 'second' 10 | MINUTE = 'minute' 11 | HOUR = 'hour' 12 | DAY = 'day' 13 | WEEK = 'week' 14 | MONTH = 'month' 15 | YEAR = 'year' 16 | 17 | 18 | class TimeUnit(object): 19 | """A duration of time with a unit granularity.""" 20 | 21 | UNIT_ALISES = { 22 | MILLISECOND: ['ms'], 23 | SECOND: ['s'], 24 | MINUTE: ['m'], 25 | HOUR: ['h'], 26 | DAY: ['d'], 27 | WEEK: ['w'], 28 | MONTH: ['M'], 29 | YEAR: ['y'], 30 | } 31 | UNIT_ALIASES_REVERSE = {a: unit for unit, aliases in UNIT_ALISES.items() 32 | for a in aliases} 33 | 34 | UNITS_IN_SECONDS = { 35 | MILLISECOND: 0.001, 36 | SECOND: 1, 37 | MINUTE: 60, 38 | HOUR: 3600, 39 | DAY: 86400, 40 | WEEK: 604800, 41 | MONTH: 2592000, 42 | YEAR: 31536000, 43 | } 44 | UNITS_IN_MILLISECONDS = {k: v * 1000 for k, v in UNITS_IN_SECONDS.items()} 45 | 46 | UNITS_TO_GRAPHITE = { 47 | SECOND: 's', 48 | MINUTE: 'min', 49 | HOUR: 'h', 50 | DAY: 'd', 51 | WEEK: 'w', 52 | MONTH: 'mon', 53 | YEAR: 'y', 54 | } 55 | 56 | def __init__(self, value, unit): 57 | try: 58 | self.value = float(value) 59 | except ValueError: 60 | raise ValueError("Time unit values must be floats: {}".format(value)) 61 | self.unit = self._normalize_unit(unit) 62 | 63 | if self.value < 0: 64 | raise ValueError("Negative time units are not supported: {}".format(value)) 65 | if not self.unit: 66 | raise ValueError("Unable to parse time unit: {}{}".format(value, unit)) 67 | 68 | def display_value(self): 69 | return int(self.value) if self.value.is_integer() else self.value 70 | 71 | @classmethod 72 | def from_interval(cls, interval): 73 | match = None 74 | try: 75 | match = NUMBER_RE.search(interval) 76 | except TypeError: 77 | pass 78 | if not match: 79 | raise ValueError("Unable to parse interval: {}".format(interval)) 80 | return cls(match.group('value'), match.group('unit')) 81 | 82 | def __repr__(self): 83 | return '{}{}'.format(self.display_value(), self.unit) 84 | 85 | def as_tuple(self): 86 | return (self.value, self.unit) 87 | 88 | def __add__(self, other): 89 | if not isinstance(other, TimeUnit): 90 | raise ValueError("Cannot add object that is not a TimeUnit") 91 | result_ms = self.convert_to(MILLISECOND) + other.convert_to(MILLISECOND) 92 | return TimeUnit(self.convert(result_ms, MILLISECOND, self.unit), self.unit) 93 | 94 | def __sub__(self, other): 95 | if not isinstance(other, TimeUnit): 96 | raise ValueError("Cannot subtract object that is not a TimeUnit") 97 | result_ms = self.convert_to(MILLISECOND) - other.convert_to(MILLISECOND) 98 | return TimeUnit(self.convert(result_ms, MILLISECOND, self.unit), self.unit) 99 | 100 | @classmethod 101 | def _normalize_value_ms(cls, value): 102 | """Normalize a value in ms to the largest unit possible without decimal places. 103 | 104 | Note that this ignores fractions of a second and always returns a value _at least_ 105 | in seconds. 106 | 107 | :return: the normalized value and unit name 108 | :rtype: Tuple[Union[int, float], str] 109 | """ 110 | value = round(value / 1000) * 1000 # Ignore fractions of second 111 | 112 | sorted_units = sorted(cls.UNITS_IN_MILLISECONDS.items(), 113 | key=lambda x: x[1], reverse=True) 114 | for unit, unit_in_ms in sorted_units: 115 | unit_value = value / unit_in_ms 116 | if unit_value.is_integer(): 117 | return int(unit_value), unit 118 | return value, MILLISECOND # Should never get here 119 | 120 | @classmethod 121 | def _normalize_unit(cls, unit): 122 | """Resolve a unit to its real name if it's an alias. 123 | 124 | :param unit str: the unit to normalize 125 | :return: the normalized unit, or None one isn't found 126 | :rtype: Union[None, str] 127 | """ 128 | if unit in cls.UNITS_IN_SECONDS: 129 | return unit 130 | return cls.UNIT_ALIASES_REVERSE.get(unit, None) 131 | 132 | def as_graphite(self): 133 | # Graphite does not support decimal numbers, so normalize to an integer 134 | value, unit = self._normalize_value_ms(self.convert_to(MILLISECOND)) 135 | 136 | # Edge case where the value fits into every unit, so just use the original 137 | # unless it is MILLISECOND 138 | if value == 0: 139 | unit = SECOND if self.unit == MILLISECOND else self.unit 140 | 141 | assert unit in self.UNITS_TO_GRAPHITE 142 | return '{}{}'.format(int(value), self.UNITS_TO_GRAPHITE[unit]) 143 | 144 | def convert_to(self, unit): 145 | return TimeUnit.convert(self.value, self.unit, unit) 146 | 147 | @classmethod 148 | def convert(cls, value, from_unit, to_unit): 149 | """Convert a value from one time unit to another. 150 | 151 | :return: the numeric value converted to the desired unit 152 | :rtype: float 153 | """ 154 | value_ms = value * cls.UNITS_IN_MILLISECONDS[from_unit] 155 | return value_ms / cls.UNITS_IN_MILLISECONDS[to_unit] 156 | -------------------------------------------------------------------------------- /graphite_beacon/utils.py: -------------------------------------------------------------------------------- 1 | import operator as op 2 | from re import compile as re 3 | 4 | from funcparserlib.lexer import Token, make_tokenizer 5 | from funcparserlib.parser import a, finished, many, maybe, skip, some 6 | 7 | # NOTE: the unit conversions below should be considered deprecated and migrated 8 | # over to `unit.py` instead. 9 | 10 | NUMBER_RE = re(r'(\d*\.?\d*)') 11 | CONVERT = { 12 | "bytes": ( 13 | ("TB", 1099511627776), ("GB", 1073741824.0), ("MB", 1048576.0), ("KB", 1024.0), 14 | ), 15 | "bits": ( 16 | ("Tb", 1099511627776), ("Gb", 1073741824.0), ("Mb", 1048576.0), ("Kb", 1024.0), 17 | ), 18 | "bps": ( 19 | ("Gbps", 1000000000.0), ("Mbps", 1000000.0), ("Kbps", 1000.0), 20 | ), 21 | "short": ( 22 | ("Tri", 1000000000000.0), ("Bil", 1000000000.0), ("Mil", 1000000.0), ("K", 1000.0), 23 | ), 24 | "s": ( 25 | ("y", 31536000.0), 26 | ("M", 2592000.0), 27 | ("w", 604800.0), 28 | ("d", 86400.0), 29 | ("h", 3600.0), 30 | ("m", 60.0), 31 | ("s", 1.0), 32 | ("ms", 0.001), 33 | ), 34 | "percent": ( 35 | ("%", 1), 36 | ) 37 | } 38 | CONVERT_HASH = dict((name, value) for _types in CONVERT.values() for (name, value) in _types) 39 | CONVERT['ms'] = list((n, v * 1000) for n, v in CONVERT['s']) 40 | CONVERT_HASH['%'] = 1 41 | 42 | IDENTITY = lambda x: x 43 | 44 | HISTORICAL = 'historical' 45 | COMPARATORS = {'>': op.gt, '>=': op.ge, '<': op.lt, '<=': op.le, '==': op.eq, '!=': op.ne} 46 | OPERATORS = {'*': op.mul, '/': op.truediv, '+': op.add, '-': op.sub} 47 | LOGICAL_OPERATORS = {'AND': op.and_, 'OR': op.or_} 48 | 49 | RULE_TOKENIZER = make_tokenizer( 50 | [ 51 | (u'Level', (r'(critical|warning|normal)',)), 52 | (u'Historical', (HISTORICAL,)), 53 | (u'Comparator', (r'({})'.format('|'.join(sorted(COMPARATORS.keys(), reverse=True))),)), 54 | (u'LogicalOperator', (r'({})'.format('|'.join(LOGICAL_OPERATORS.keys())),)), 55 | (u'Sep', (r':',)), 56 | (u'Operator', (r'(?:\*|\+|-|\/)',)), 57 | (u'Number', (r'(\d+\.?\d*)',)), 58 | (u'Unit', (r'({})'.format('|'.join(sorted(CONVERT_HASH.keys(), reverse=True))),)), 59 | (u'Space', (r'\s+',)) 60 | ] 61 | ) 62 | 63 | 64 | def convert_to_format(value, frmt=None): 65 | value = float(value) 66 | units = CONVERT.get(frmt, []) 67 | for name, size in units: 68 | if size < value: 69 | break 70 | else: 71 | return value 72 | 73 | value /= size # pylint: disable=undefined-loop-variable 74 | value = ("%.1f" % value).rstrip('0').rstrip('.') 75 | return "{}{}".format(value, name) # pylint: disable=undefined-loop-variable 76 | 77 | 78 | def convert_from_format(num, unit=None): 79 | if not unit: 80 | return float(num) 81 | return float(num) * CONVERT_HASH.get(unit, 1) 82 | 83 | 84 | def _tokenize_rule(_str): 85 | return [x for x in RULE_TOKENIZER(_str) if x.type not in ['Space']] 86 | 87 | 88 | def _parse_rule(seq): 89 | tokval = lambda x: x.value 90 | toktype = lambda t: some(lambda x: x.type == t) >> tokval # pylint: disable=undefined-variable 91 | sep = lambda s: a(Token(u'Sep', s)) >> tokval 92 | s_sep = lambda s: skip(sep(s)) 93 | 94 | level = toktype(u'Level') 95 | comparator = toktype(u'Comparator') >> COMPARATORS.get 96 | number = toktype(u'Number') >> float 97 | historical = toktype(u'Historical') 98 | unit = toktype(u'Unit') 99 | operator = toktype(u'Operator') 100 | logical_operator = toktype(u'LogicalOperator') >> LOGICAL_OPERATORS.get 101 | 102 | exp = comparator + ((number + maybe(unit)) | historical) + maybe(operator + number) 103 | rule = ( 104 | level + s_sep(':') + exp + many(logical_operator + exp) 105 | ) 106 | 107 | overall = rule + skip(finished) 108 | return overall.parse(seq) 109 | 110 | 111 | def _parse_expr(expr): 112 | cond, value, mod = expr 113 | 114 | if value != HISTORICAL: 115 | value = convert_from_format(*value) 116 | 117 | if mod: 118 | _op, num = mod 119 | mod = lambda x: OPERATORS[_op](x, num) 120 | 121 | return {'op': cond, 'value': value, 'mod': mod or IDENTITY} 122 | 123 | 124 | def parse_rule(rule): 125 | tokens = _tokenize_rule(rule) 126 | level, initial_expr, exprs = _parse_rule(tokens) 127 | 128 | result = {'level': level, 'raw': rule, 'exprs': [_parse_expr(initial_expr)]} 129 | 130 | for logical_operator, expr in exprs: 131 | result['exprs'].extend([logical_operator, _parse_expr(expr)]) 132 | 133 | return result 134 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | norecursedirs = env 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tornado==5.1.1 2 | funcparserlib==0.3.6 3 | pyyaml==3.12 4 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | 4 | [coverage:run] 5 | source = graphite_beacon 6 | 7 | [wheel] 8 | universal = 1 9 | 10 | [pep8] 11 | ignore = E402,E731 12 | max-line-length = 120 13 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import re 3 | from os import path as op 4 | 5 | from setuptools import setup 6 | 7 | 8 | def _read(fname): 9 | try: 10 | return open(op.join(op.dirname(__file__), fname)).read() 11 | except IOError: 12 | return '' 13 | 14 | _meta = _read('graphite_beacon/__init__.py') 15 | _license = re.search(r'^__license__\s*=\s*"(.*)"', _meta, re.M).group(1) 16 | _version = re.search(r'^__version__\s*=\s*"(.*)"', _meta, re.M).group(1) 17 | 18 | install_requires = [ 19 | l for l in _read('requirements.txt').split('\n') if l and not l.startswith('#')] 20 | 21 | setup( 22 | name='graphite_beacon', 23 | version=_version, 24 | license=_license, 25 | description=_read('DESCRIPTION'), 26 | long_description=_read('README.md'), 27 | platforms=('Any'), 28 | keywords="graphite alerts monitoring system".split(), # noqa 29 | 30 | author='Kirill Klenov', 31 | author_email='horneds@gmail.com', 32 | url='http://github.com/klen/graphite-beacon', 33 | classifiers=[ 34 | 'Development Status :: 4 - Beta', 35 | 'Intended Audience :: Developers', 36 | 'License :: OSI Approved :: MIT License', 37 | 'Natural Language :: English', 38 | 'Natural Language :: Russian', 39 | 'Operating System :: OS Independent', 40 | 'Programming Language :: Python :: 2', 41 | 'Programming Language :: Python :: 3', 42 | 'Programming Language :: Python', 43 | 'Topic :: Software Development :: Libraries :: Python Modules', 44 | 'Topic :: Software Development :: Testing', 45 | 'Topic :: Utilities', 46 | ], 47 | 48 | packages=['graphite_beacon'], 49 | include_package_data=True, 50 | install_requires=install_requires, 51 | entry_points={'console_scripts': ['graphite-beacon = graphite_beacon.app:run']}, 52 | ) 53 | -------------------------------------------------------------------------------- /test-requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | -e . 3 | 4 | mock==2.0.0 5 | pytest==4.0.0 6 | pylint==1.6.4 7 | coverage==4.3 8 | pep8==1.7.0 9 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klen/graphite-beacon/c1f071e9f557693bc90f6acbc314994985dc3b77/tests/__init__.py -------------------------------------------------------------------------------- /tests/integration/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klen/graphite-beacon/c1f071e9f557693bc90f6acbc314994985dc3b77/tests/integration/__init__.py -------------------------------------------------------------------------------- /tests/integration/graphite_test.py: -------------------------------------------------------------------------------- 1 | import mock 2 | import tornado.gen 3 | from mock import ANY 4 | from tornado import ioloop 5 | from tornado.httpclient import HTTPRequest, HTTPResponse 6 | from tornado.testing import AsyncTestCase, gen_test 7 | 8 | from graphite_beacon.alerts import GraphiteAlert 9 | from graphite_beacon.core import Reactor 10 | from graphite_beacon._compat import StringIO 11 | 12 | from ..util import build_graphite_response 13 | 14 | fetch_mock_url = lambda m: m.call_args_list[0][0][0] 15 | 16 | 17 | class TestGraphite(AsyncTestCase): 18 | 19 | def get_new_ioloop(self): 20 | return ioloop.IOLoop.instance() 21 | 22 | @mock.patch('graphite_beacon.alerts.hc.AsyncHTTPClient.fetch') 23 | @mock.patch('graphite_beacon.handlers.smtp.SMTPHandler.notify') 24 | @gen_test 25 | def test_graphite(self, mock_smpt_notify, mock_fetch): 26 | self.reactor = Reactor( 27 | alerts=[ 28 | { 29 | 'name': 'test', 30 | 'query': '*', 31 | 'rules': ["normal: == 0", "warning: >= 5"] 32 | }, 33 | ], 34 | smtp={ 35 | 'from': 'graphite@localhost', 36 | 'to': ['alerts@localhost'], 37 | }, 38 | interval='0.25second', 39 | time_window='10minute', 40 | until='1minute', 41 | ) 42 | 43 | assert not self.reactor.is_running() 44 | 45 | alert = list(self.reactor.alerts)[0] 46 | assert len(self.reactor.alerts) == 1 47 | assert isinstance(alert, GraphiteAlert) 48 | 49 | metric_data = [5, 7, 9] 50 | build_resp = lambda: HTTPResponse(HTTPRequest('http://localhost:80/graphite'), 200, 51 | buffer=StringIO(build_graphite_response(data=metric_data))) 52 | 53 | mock_fetch.side_effect = iter(tornado.gen.maybe_future(build_resp()) 54 | for _ in range(10)) 55 | 56 | self.reactor.start(start_loop=False) 57 | yield tornado.gen.sleep(0.5) 58 | 59 | # There should be at least 1 immediate fetch + 1 instance of the PeriodicCallback 60 | assert mock_fetch.call_count >= 2 61 | 62 | expected = 'http://localhost/render/?target=%2A&from=-11min&until=-1min&format=raw' 63 | assert fetch_mock_url(mock_fetch) == expected 64 | 65 | assert alert.state['*'] == 'warning' 66 | 67 | assert mock_smpt_notify.call_count == 1 68 | mock_smpt_notify.assert_called_once_with( 69 | 'warning', 70 | alert, 71 | 7.0, 72 | ntype='graphite', 73 | rule=ANY, 74 | target='*') 75 | 76 | self.reactor.stop(stop_loop=False) 77 | -------------------------------------------------------------------------------- /tests/integration/url_test.py: -------------------------------------------------------------------------------- 1 | import mock 2 | import tornado.gen 3 | from mock import ANY 4 | from tornado import ioloop 5 | from tornado.httpclient import HTTPRequest, HTTPResponse 6 | from tornado.testing import AsyncTestCase, gen_test 7 | 8 | from graphite_beacon.alerts import URLAlert 9 | from graphite_beacon.core import Reactor 10 | from graphite_beacon._compat import StringIO 11 | 12 | from ..util import build_graphite_response 13 | 14 | fetch_mock_url = lambda m: m.call_args_list[0][0][0] 15 | 16 | 17 | class TestGraphite(AsyncTestCase): 18 | 19 | target_url = 'http://localhost/check' 20 | 21 | def get_new_ioloop(self): 22 | return ioloop.IOLoop.instance() 23 | 24 | @mock.patch('graphite_beacon.alerts.hc.AsyncHTTPClient.fetch') 25 | @mock.patch('graphite_beacon.handlers.smtp.SMTPHandler.notify') 26 | @gen_test 27 | def test_graphite(self, mock_smpt_notify, mock_fetch): 28 | self.reactor = Reactor( 29 | alerts=[ 30 | { 31 | 'name': 'test', 32 | 'source': 'url', 33 | 'query': self.target_url, 34 | 'rules': ['warning: != 200'] 35 | } 36 | ], 37 | smtp={ 38 | 'from': 'graphite@localhost', 39 | 'to': ['alerts@localhost'], 40 | }, 41 | interval='0.25second', 42 | ) 43 | 44 | assert not self.reactor.is_running() 45 | 46 | alert = list(self.reactor.alerts)[0] 47 | assert len(self.reactor.alerts) == 1 48 | assert isinstance(alert, URLAlert) 49 | 50 | metric_data = [5, 7, 9] 51 | build_resp = lambda: HTTPResponse(HTTPRequest(self.target_url), 500, 52 | buffer=StringIO('')) 53 | 54 | mock_fetch.side_effect = iter(tornado.gen.maybe_future(build_resp()) 55 | for _ in range(10)) 56 | 57 | self.reactor.start(start_loop=False) 58 | yield tornado.gen.sleep(0.5) 59 | 60 | # There should be at least 1 immediate fetch + 1 instance of the PeriodicCallback 61 | assert mock_fetch.call_count >= 2 62 | assert fetch_mock_url(mock_fetch) == self.target_url 63 | 64 | assert alert.state[self.target_url] == 'warning' 65 | assert mock_smpt_notify.call_count == 1 66 | mock_smpt_notify.assert_called_once_with( 67 | 'warning', 68 | alert, 69 | 500.0, 70 | ntype='url', 71 | rule=ANY, 72 | target=self.target_url) 73 | 74 | self.reactor.stop(stop_loop=False) 75 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klen/graphite-beacon/c1f071e9f557693bc90f6acbc314994985dc3b77/tests/unit/__init__.py -------------------------------------------------------------------------------- /tests/unit/alerts_test.py: -------------------------------------------------------------------------------- 1 | import mock 2 | 3 | from graphite_beacon import units 4 | from graphite_beacon._compat import urlparse 5 | from graphite_beacon.alerts import BaseAlert, GraphiteAlert, URLAlert 6 | from graphite_beacon.core import Reactor 7 | from graphite_beacon.units import SECOND 8 | 9 | BASIC_ALERT_OPTS = { 10 | 'name': 'GraphiteTest', 11 | 'query': '*', 12 | 'rules': ['normal: == 0'], 13 | } 14 | 15 | BASIC_GRAPHITE_ALERT_OPTS = BASIC_ALERT_OPTS 16 | 17 | BASIC_URL_ALERT_OPTS = { 18 | 'name': 'URLTest', 19 | 'query': '*', 20 | 'source': 'url', 21 | 'rules': ['normal: == 0'], 22 | } 23 | 24 | 25 | def test_alert(reactor): 26 | alert1 = BaseAlert.get(reactor, **BASIC_GRAPHITE_ALERT_OPTS) 27 | assert alert1 28 | assert isinstance(alert1, GraphiteAlert) 29 | 30 | alert2 = BaseAlert.get(reactor, **BASIC_URL_ALERT_OPTS) 31 | assert isinstance(alert2, URLAlert) 32 | 33 | assert alert1 != alert2 34 | 35 | alert3 = BaseAlert.get(reactor, interval='2m', **BASIC_GRAPHITE_ALERT_OPTS) 36 | assert alert3.interval.as_tuple() == (2, units.MINUTE) 37 | 38 | assert alert1 == alert3 39 | assert set([alert1, alert3]) == set([alert1]) 40 | 41 | alert = BaseAlert.get(reactor, name='Test', query='*', rules=["warning: >= 3MB"]) 42 | assert alert.rules[0]['exprs'][0]['value'] == 3145728 43 | 44 | 45 | def test_history_size(reactor): 46 | alert = BaseAlert.get(reactor, interval='1second', history_size='10second', 47 | **BASIC_GRAPHITE_ALERT_OPTS) 48 | assert alert.history_size == 10 49 | 50 | alert = BaseAlert.get(reactor, interval='1minute', history_size='5hour', 51 | **BASIC_GRAPHITE_ALERT_OPTS) 52 | assert alert.history_size == 60*5 53 | 54 | alert = BaseAlert.get(reactor, interval='5minute', history_size='1minute', 55 | **BASIC_GRAPHITE_ALERT_OPTS) 56 | assert alert.history_size == 1 57 | 58 | 59 | def test_time_window(): 60 | # Time window set explicitly on the alert - should be preferred 61 | alert = BaseAlert.get(Reactor(), time_window='6second', interval='3second', 62 | **BASIC_GRAPHITE_ALERT_OPTS) 63 | assert alert.time_window.as_tuple() == (6, SECOND) 64 | 65 | # Time window set explicitly at the root - should be preferred next 66 | reactor = Reactor(interval='10second', time_window='4second') 67 | alert = BaseAlert.get(reactor, interval='3second', **BASIC_GRAPHITE_ALERT_OPTS) 68 | assert alert.time_window.as_tuple() == (4, SECOND) 69 | 70 | # No time window set, but interval set directly on the alert 71 | reactor = Reactor(interval='10second') 72 | alert = BaseAlert.get(reactor, interval='1second', **BASIC_GRAPHITE_ALERT_OPTS) 73 | assert alert.time_window.as_tuple() == (1, SECOND) 74 | 75 | # Only time interval set at root 76 | reactor = Reactor(interval='10second') 77 | alert = BaseAlert.get(reactor, **BASIC_GRAPHITE_ALERT_OPTS) 78 | assert alert.time_window.as_tuple() == (10, SECOND) 79 | 80 | 81 | def test_from_time(reactor): 82 | alert = BaseAlert.get(reactor, time_window='5minute', 83 | **BASIC_GRAPHITE_ALERT_OPTS) 84 | 85 | url = urlparse.urlparse(alert.get_graph_url('*')) 86 | query = urlparse.parse_qs(url.query) 87 | assert query['from'] == ['-5min'] 88 | assert query['until'] == ['-0s'] 89 | 90 | 91 | def test_from_time_with_until(reactor): 92 | alert = BaseAlert.get(reactor, time_window='5minute', until='1minute', 93 | **BASIC_GRAPHITE_ALERT_OPTS) 94 | 95 | url = urlparse.urlparse(alert.get_graph_url('*')) 96 | query = urlparse.parse_qs(url.query) 97 | assert query['from'] == ['-6min'] 98 | assert query['until'] == ['-1min'] 99 | 100 | 101 | def test_multimetrics(reactor): 102 | alert = BaseAlert.get( 103 | reactor, name="Test", query="*", rules=[ 104 | "critical: > 100", "warning: > 50", "warning: < historical / 2"]) 105 | reactor.alerts = set([alert]) 106 | 107 | with mock.patch.object(reactor, 'notify'): 108 | alert.check([(110, 'metric1'), (60, 'metric2'), (30, 'metric3')]) 109 | 110 | assert reactor.notify.call_count == 2 111 | 112 | # metric1 - critical 113 | assert reactor.notify.call_args_list[0][0][0] == 'critical' 114 | assert reactor.notify.call_args_list[0][1]['target'] == 'metric1' 115 | 116 | # metric2 - warning 117 | assert reactor.notify.call_args_list[1][0][0] == 'warning' 118 | assert reactor.notify.call_args_list[1][1]['target'] == 'metric2' 119 | 120 | assert list(alert.history['metric1']) == [110] 121 | 122 | with mock.patch.object(reactor, 'notify'): 123 | alert.check([(60, 'metric1'), (60, 'metric2'), (30, 'metric3')]) 124 | assert reactor.notify.call_count == 1 125 | 126 | # metric1 - warning, metric2 didn't change 127 | assert reactor.notify.call_args_list[0][0][0] == 'warning' 128 | assert reactor.notify.call_args_list[0][1]['target'] == 'metric1' 129 | 130 | assert list(alert.history['metric1']) == [110, 60] 131 | 132 | with mock.patch.object(reactor, 'notify'): 133 | alert.check([(60, 'metric1'), (30, 'metric2'), (105, 'metric3')]) 134 | assert reactor.notify.call_count == 2 135 | 136 | # metric2 - normal 137 | assert reactor.notify.call_args_list[0][0][0] == 'normal' 138 | assert reactor.notify.call_args_list[0][1]['target'] == 'metric2' 139 | 140 | # metric3 - critical 141 | assert reactor.notify.call_args_list[1][0][0] == 'critical' 142 | assert reactor.notify.call_args_list[1][1]['target'] == 'metric3' 143 | 144 | assert list(alert.history['metric1']) == [110, 60, 60] 145 | 146 | with mock.patch.object(reactor, 'notify'): 147 | alert.check([(60, 'metric1'), (30, 'metric2'), (105, 'metric3')]) 148 | assert reactor.notify.call_count == 0 149 | 150 | with mock.patch.object(reactor, 'notify'): 151 | alert.check([(70, 'metric1'), (21, 'metric2'), (105, 'metric3')]) 152 | assert reactor.notify.call_count == 1 153 | 154 | # metric2 - historical warning 155 | assert reactor.notify.call_args_list[0][0][0] == 'warning' 156 | assert reactor.notify.call_args_list[0][1]['target'] == 'metric2' 157 | 158 | assert list(alert.history['metric1']) == [60, 60, 60, 70] 159 | assert alert.state['metric1'] == 'warning' 160 | 161 | reactor.repeat() 162 | 163 | assert alert.state == { 164 | None: 'normal', 'metric1': 'normal', 'metric2': 'normal', 'metric3': 'normal', 165 | 'waiting': 'normal', 'loading': 'normal'} 166 | 167 | 168 | def test_multiexpressions(reactor): 169 | alert = BaseAlert.get( 170 | reactor, name="Test", query="*", rules=["warning: > historical * 1.05 AND > 70"]) 171 | reactor.alerts = set([alert]) 172 | 173 | with mock.patch.object(reactor, 'notify'): 174 | alert.check([ 175 | (50, 'metric1'), (65, 'metric1'), (85, 'metric1'), (65, 'metric1'), 176 | (68, 'metric1'), (75, 'metric1')]) 177 | 178 | assert reactor.notify.call_count == 1 179 | 180 | # metric2 - warning 181 | assert reactor.notify.call_args_list[0][0][0] == 'warning' 182 | assert reactor.notify.call_args_list[0][1]['target'] == 'metric1' 183 | 184 | assert list(alert.history['metric1']) == [85, 65, 68, 75] 185 | -------------------------------------------------------------------------------- /tests/unit/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from graphite_beacon.core import Reactor 4 | 5 | 6 | @pytest.fixture 7 | def reactor(): 8 | return Reactor(history_size='40m') 9 | -------------------------------------------------------------------------------- /tests/unit/core_test.py: -------------------------------------------------------------------------------- 1 | from graphite_beacon.core import Reactor 2 | 3 | 4 | def test_reactor(): 5 | rr = Reactor() 6 | assert rr 7 | assert rr.reinit() 8 | 9 | rr = Reactor(include=['examples/example-config.json'], alerts=[ 10 | {'name': 'test', 'query': '*', 'rules': ["normal: == 0"]}]) 11 | assert rr.options['interval'] == '20minute' 12 | assert len(rr.alerts) == 3 13 | 14 | rr = Reactor(include=['examples/example-config.yml'], alerts=[ 15 | {'name': 'test', 'query': '*', 'rules': ["normal: == 0"]}]) 16 | assert rr.options['interval'] == '20minute' 17 | assert len(rr.alerts) == 3 18 | 19 | 20 | def test_public_graphite_url(): 21 | rr = Reactor(graphite_url='http://localhost', public_graphite_url=None) 22 | rr.reinit() 23 | assert rr.options.get("public_graphite_url") == 'http://localhost' 24 | 25 | rr.reinit(public_graphite_url="http://public") 26 | assert rr.options.get("public_graphite_url") == "http://public" 27 | 28 | 29 | def test_invalid_handler(reactor): 30 | reactor.reinit(critical_handlers=['log', 'unknown']) 31 | assert len(reactor.handlers['critical']) == 1 32 | -------------------------------------------------------------------------------- /tests/unit/graphite_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from graphite_beacon.graphite import GraphiteRecord 4 | 5 | from ..util import build_graphite_response 6 | 7 | 8 | build_record = lambda data: GraphiteRecord(build_graphite_response(data=data)) 9 | 10 | 11 | class TestGraphiteRecord(object): 12 | def test_invalid_record(self): 13 | with pytest.raises(ValueError): 14 | GraphiteRecord('not,legit,data') 15 | 16 | def test_invalid_record_long(self): 17 | with pytest.raises(ValueError) as e: 18 | GraphiteRecord('' + ('' * 50)) 19 | assert '' in str(e.value) 20 | assert str(e.value).endswith('..') 21 | 22 | def test_record(self): 23 | assert build_record([1, 2, 3]).values == [1.0, 2.0, 3.0] 24 | 25 | def test_average(self): 26 | assert build_record([1]).average == 1.0 27 | assert build_record([1, 2, 3]).average == 2.0 28 | assert build_record([5, 5, 5, 10]).average == 6.25 29 | assert build_record([1.5, 2.5, 3.5]).average == 2.5 30 | 31 | def test_last_value(self): 32 | assert build_record([1]).last_value == 1.0 33 | assert build_record([1, 2, 3]).last_value == 3.0 34 | 35 | def test_sum(self): 36 | assert build_record([1]).sum == 1.0 37 | assert build_record([1.5, 2.5, 3]).sum == 7.0 38 | 39 | def test_minimum(self): 40 | assert build_record([1]).minimum == 1.0 41 | assert build_record([9.0, 2.3, 4]).minimum == 2.3 42 | 43 | def test_maximum(self): 44 | assert build_record([1]).maximum == 1.0 45 | assert build_record([9.0, 2.3, 4]).maximum == 9.0 46 | -------------------------------------------------------------------------------- /tests/unit/handlers/smtp_test.py: -------------------------------------------------------------------------------- 1 | from graphite_beacon.alerts import BaseAlert 2 | from graphite_beacon.handlers.smtp import SMTPHandler 3 | 4 | 5 | def test_html_template(reactor): 6 | target = 'node.com' 7 | galert = BaseAlert.get(reactor, name='Test', query='*', rules=["normal: == 0"]) 8 | galert.history[target] += [1, 2, 3, 4, 5] 9 | 10 | reactor.options['smtp'] = { 11 | 'to': 'user@com.com', 'graphite_url': 'http://graphite.myhost.com'} 12 | smtp = SMTPHandler(reactor) 13 | 14 | message = smtp.get_message( 15 | 'critical', galert, 3000000, target=target, ntype='graphite', rule=galert.rules[0]) 16 | assert message 17 | 18 | assert len(message._payload) == 2 19 | text, html = message._payload 20 | assert 'graphite.myhost.com' in html.as_string() 21 | 22 | ualert = BaseAlert.get( 23 | reactor, source='url', name='Test', query='http://google.com', rules=["critical: != 200"]) 24 | message = smtp.get_message('critical', ualert, '3000000', target, 'url') 25 | assert message 26 | 27 | assert len(message._payload) == 2 28 | _, html = message._payload 29 | assert 'google.com' in html.as_string() 30 | 31 | ealert = BaseAlert.get(reactor, name='Test', query='*', rules=["critical: > 5 AND < 10"]) 32 | message = smtp.get_message( 33 | 'critical', ealert, 8, target=target, ntype='graphite', rule=ealert.rules[0]) 34 | assert message 35 | 36 | assert len(message._payload) == 2 37 | -------------------------------------------------------------------------------- /tests/unit/units_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from graphite_beacon.units import (DAY, HOUR, MILLISECOND, MINUTE, MONTH, 4 | SECOND, YEAR, TimeUnit) 5 | 6 | 7 | class TestTimeUnit(object): 8 | def test_from_interval(self): 9 | assert TimeUnit.from_interval('2second').as_tuple() == (2, SECOND) 10 | assert TimeUnit.from_interval('2.5second').as_tuple() == (2.5, SECOND) 11 | 12 | def test_from_interval_invalid(self): 13 | inputs = [None, '', 'minute1', '-1minute', '2meter'] 14 | for i in inputs: 15 | with pytest.raises(ValueError): 16 | TimeUnit.from_interval(i) 17 | 18 | def test_convert(self): 19 | assert TimeUnit.convert(10, SECOND, MILLISECOND) == 10000 20 | assert TimeUnit.convert(1, MILLISECOND, SECOND) == 0.001 21 | assert TimeUnit.convert(10, MINUTE, SECOND) == 600 22 | assert TimeUnit.convert(1.2, DAY, MILLISECOND) == 103680000 23 | 24 | def test_str(self): 25 | assert str(TimeUnit(0, MILLISECOND)) == "0millisecond" 26 | assert str(TimeUnit(10, SECOND)) == "10second" 27 | assert str(TimeUnit(2, YEAR)) == "2year" 28 | 29 | def test_arithmetic(self): 30 | assert (TimeUnit(10, SECOND) - TimeUnit(5, SECOND)).as_tuple() == (5, SECOND) 31 | assert (TimeUnit(10, SECOND) + TimeUnit(5, SECOND)).as_tuple() == (15, SECOND) 32 | 33 | assert (TimeUnit(50, SECOND) + TimeUnit(70, SECOND)).as_tuple() == (120, SECOND) 34 | assert (TimeUnit(50, SECOND) + TimeUnit(71, SECOND)).as_tuple() == (121, SECOND) 35 | 36 | assert (TimeUnit(0, SECOND) + TimeUnit(0, SECOND)).as_tuple() == (0, SECOND) 37 | assert (TimeUnit(0, MILLISECOND) + TimeUnit(0, MILLISECOND)).as_tuple() == (0, MILLISECOND) 38 | assert (TimeUnit(0, SECOND) + TimeUnit(0, YEAR)).as_tuple() == (0, SECOND) 39 | 40 | assert (TimeUnit(0, SECOND) - TimeUnit(0, SECOND)).as_tuple() == (0, SECOND) 41 | assert (TimeUnit(0, MILLISECOND) - TimeUnit(0, MILLISECOND)).as_tuple() == (0, MILLISECOND) 42 | assert (TimeUnit(0, SECOND) - TimeUnit(0, YEAR)).as_tuple() == (0, SECOND) 43 | 44 | def test_arithmetic_decimal(self): 45 | assert (TimeUnit(1.5, SECOND) + TimeUnit(1, SECOND)).as_tuple() == (2.5, SECOND) 46 | 47 | def test_as_graphite(self): 48 | assert TimeUnit(10, MINUTE).as_graphite() == '10min' 49 | assert TimeUnit(875, SECOND).as_graphite() == '875s' 50 | assert TimeUnit(2, HOUR).as_graphite() == '2h' 51 | assert TimeUnit(1, MONTH).as_graphite() == '1mon' 52 | 53 | def test_as_graphite_decimal(self): 54 | assert TimeUnit(1.5, MONTH).as_graphite() == '45d' 55 | assert TimeUnit(5.1, DAY).as_graphite() == '7344min' 56 | assert TimeUnit(1.5, YEAR).as_graphite() == '13140h' 57 | assert TimeUnit(1, MILLISECOND).as_graphite() == '0s' 58 | assert TimeUnit(501, MILLISECOND).as_graphite() == '1s' 59 | -------------------------------------------------------------------------------- /tests/unit/utils_test.py: -------------------------------------------------------------------------------- 1 | import operator as op 2 | 3 | import pytest 4 | from funcparserlib.lexer import LexerError 5 | 6 | from graphite_beacon.utils import parse_rule as parse_rule 7 | from graphite_beacon.utils import (IDENTITY, convert_from_format, 8 | convert_to_format) 9 | 10 | 11 | def test_convert(): 12 | assert convert_to_format(789874) == 789874 13 | assert convert_from_format(789874) 14 | assert convert_to_format(45, 'percent') == "45%" 15 | assert convert_from_format('45', '%') == 45 16 | 17 | assert convert_to_format(789, 'bytes') == 789 18 | assert convert_to_format(456789, 'bytes') == '446.1KB' 19 | assert convert_from_format('456.8', 'KB') == 467763.2 20 | assert convert_to_format(45678912, 'bytes') == '43.6MB' 21 | assert convert_from_format('45.7', 'MB') == 47919923.2 22 | assert convert_to_format(4567891245, 'bytes') == '4.3GB' 23 | assert convert_from_format('4.6', 'GB') == 4939212390.4 24 | 25 | assert convert_from_format('456.8', 'Kb') == 467763.2 26 | assert convert_from_format('456.8', 'Kbps') == 456800 27 | 28 | assert convert_to_format(789, 'short') == 789 29 | assert convert_to_format(456789, 'short') == '456.8K' 30 | assert convert_from_format('456.8', 'K') == 456800 31 | assert convert_to_format(45678912, 'short') == '45.7Mil' 32 | assert convert_from_format('45.7', 'Mil') == 45700000 33 | assert convert_to_format(4567891245, 'short') == '4.6Bil' 34 | assert convert_from_format('4.6', 'Bil') == 4600000000 35 | 36 | assert convert_to_format(789, 's') == "13.2m" 37 | assert convert_from_format('13.2', 'm') == 792 38 | assert convert_to_format(789456, 's') == "1.3w" 39 | assert convert_from_format('1.3', 'w') == 786240 40 | assert convert_to_format(789456234, 's') == "25y" 41 | 42 | assert convert_to_format(79456234, 'ms') == "22.1h" 43 | assert convert_to_format(34, 'ms') == "34ms" 44 | 45 | 46 | def test_parse_rule(): 47 | with pytest.raises(LexerError): 48 | assert parse_rule('invalid') 49 | 50 | assert parse_rule('normal: == 0') == { 51 | 'level': 'normal', 'raw': 'normal: == 0', 52 | 'exprs': [{'op': op.eq, 'value': 0, 'mod': IDENTITY}]} 53 | 54 | assert parse_rule('critical: < 30MB') == { 55 | 'level': 'critical', 'raw': 'critical: < 30MB', 56 | 'exprs': [{'op': op.lt, 'value': 31457280, 'mod': IDENTITY}]} 57 | 58 | assert parse_rule('warning: >= 30MB') == { 59 | 'level': 'warning', 'raw': 'warning: >= 30MB', 60 | 'exprs': [{'op': op.ge, 'value': 31457280, 'mod': IDENTITY}]} 61 | 62 | assert parse_rule('warning: >= historical') == { 63 | 'level': 'warning', 'raw': 'warning: >= historical', 64 | 'exprs': [{'op': op.ge, 'value': 'historical', 'mod': IDENTITY}]} 65 | 66 | assert parse_rule('warning: >= historical AND > 25') == { 67 | 'level': 'warning', 'raw': 'warning: >= historical AND > 25', 68 | 'exprs': [{'op': op.ge, 'value': 'historical', 'mod': IDENTITY}, 69 | op.and_, 70 | {'op': op.gt, 'value': 25, 'mod': IDENTITY}]} 71 | 72 | rule = parse_rule('warning: >= historical * 1.2') 73 | assert rule['exprs'][0]['mod'] 74 | assert rule['exprs'][0]['mod'](5) == 6 75 | -------------------------------------------------------------------------------- /tests/util.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | 4 | 5 | 6 | def build_graphite_response(target_name='*', start_timestamp=1480000000, 7 | end_timestamp=1480000050, series_step=60, 8 | data=None): 9 | """Build a graphite response. 10 | 11 | Format: ,,,|[data]* 12 | 13 | :param target_name str: the target query being fulfilled 14 | :param start_timestamp int: unix timestamp for query start 15 | :param end_timestamp int: unix timestamp for query end 16 | :param series_step int: the length of time between each step 17 | :param data list: query results 18 | :rtype: StringIO 19 | """ 20 | data = data or [] 21 | return ( 22 | "{},{},{},{}|{}" 23 | .format(target_name, start_timestamp, end_timestamp, series_step, 24 | ','.join(str(d) for d in data)) 25 | ) 26 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist=py27,py34,cov,pylint,pep8 3 | 4 | [testenv] 5 | commands=py.test tests 6 | deps = -r{toxinidir}/test-requirements.txt 7 | 8 | [testenv:cov] 9 | deps = 10 | {[testenv]deps} 11 | commands = 12 | coverage run -m py.test tests 13 | coverage report 14 | 15 | [testenv:pylint] 16 | deps = 17 | {[testenv]deps} 18 | commands = 19 | pylint --reports no graphite_beacon 20 | 21 | [testenv:pep8] 22 | deps = 23 | {[testenv]deps} 24 | commands = 25 | pep8 graphite_beacon 26 | --------------------------------------------------------------------------------