├── AUTHORS
├── ChangeLog
├── LICENSE
├── README.rst
├── TODO.md
├── contrib
    ├── Docker
    │   ├── Dockerfile
    │   └── conf_files
    │   │   ├── haproxystats.conf
    │   │   └── supervisor
    │   │       ├── haproxystats-process.ini
    │   │       └── haproxystats-pull.ini
    ├── ansible-playbook
    │   ├── group_vars
    │   │   └── all
    │   ├── hosts
    │   ├── main-playbook.yml
    │   └── roles
    │   │   ├── create-newuser
    │   │       └── tasks
    │   │       │   └── main.yml
    │   │   ├── haproxy-socket-dir-permission
    │   │       └── tasks
    │   │       │   └── main.yml
    │   │   ├── haproxystats-config-file
    │   │       ├── tasks
    │   │       │   └── main.yml
    │   │       └── templates
    │   │       │   └── haproxystats.conf.j2
    │   │   ├── install-dependency-package
    │   │       └── tasks
    │   │       │   └── main.yml
    │   │   ├── install-haproxystats
    │   │       └── tasks
    │   │       │   └── main.yml
    │   │   └── systemd-files
    │   │       ├── tasks
    │   │           └── main.yml
    │   │       └── templates
    │   │           ├── haproxystats-process.service.j2
    │   │           └── haproxystats-pull.service.j2
    ├── nagios
    │   ├── check_haproxystats_process.sh
    │   ├── check_haproxystats_process_number_of_procs.sh
    │   ├── check_haproxystats_pull.sh
    │   └── check_haproxystats_queue_size.py
    ├── puppet
    │   ├── manifests
    │   │   ├── init.pp
    │   │   └── params.pp
    │   └── templates
    │   │   ├── defaults.conf.erb
    │   │   ├── exclude_backend.conf.erb
    │   │   ├── exclude_frontend.conf.erb
    │   │   ├── haproxystats-process-monit-check.sh.erb
    │   │   ├── process-systemd-overwrites.conf.erb
    │   │   ├── process.conf.erb
    │   │   ├── pull-systemd-overwrites.conf.erb
    │   │   └── pull.conf.erb
    ├── systemd
    │   ├── haproxystats-process.service
    │   └── haproxystats-pull.service
    └── tcp_server.py
├── haproxystats-architecture.png
├── haproxystats.conf
├── haproxystats
    ├── __init__.py
    ├── metrics.py
    ├── process.py
    ├── pull.py
    └── utils.py
├── requirements.txt
├── setup.cfg
└── setup.py


/AUTHORS:
--------------------------------------------------------------------------------
 1 | Christian Rovner <crovner@gmail.com>
 2 | Hossein <hos7ein@localhost.localdomain>
 3 | Igor Vuk <parcijala@gmail.com>
 4 | Jose Pedro Oliveira <jose.p.oliveira.oss@gmail.com>
 5 | Marcin Deranek <gringo@slonko.net>
 6 | Marcin Deranek <marcin.deranek@booking.com>
 7 | Patrick Kaeding <patrick@kaeding.name>
 8 | Pavlos Parissis <pavlos.parissis@booking.com>
 9 | Pavlos Parissis <pavlos.parissis@gmail.com>
10 | hos7ein <hossein.a97@gmail.com>
11 | 


--------------------------------------------------------------------------------
/ChangeLog:
--------------------------------------------------------------------------------
  1 | CHANGES
  2 | =======
  3 | 
  4 | 0.5.2
  5 | -----
  6 | 
  7 | * RELEASE 0.5.2 version
  8 | * Use the network target on the Systemd unit files
  9 | * Calculate average only when actual connections have been made
 10 | * Update binary path in systemd file for haproxystats-process service
 11 | * Update binary path in systemd file for haproxystats-pull service
 12 | * Update python packages for CentOS distribution
 13 | 
 14 | 0.5.1
 15 | -----
 16 | 
 17 | * RELEASE 0.5.1 version
 18 | * Fix runtime error when specifying a custom list of metrics
 19 | * Check liveness of workers and exit if at least one is dead
 20 | * Fix formatting in README for group-namespace option
 21 | 
 22 | 0.5.0
 23 | -----
 24 | 
 25 | * RELEASE 0.5.0 version
 26 | * Support grouping metrics by frontend, backend and server names
 27 | 
 28 | 0.4.2
 29 | -----
 30 | 
 31 | * RELEASE 0.4.2 version
 32 | * Add missing comma
 33 | 
 34 | 0.4.1
 35 | -----
 36 | 
 37 | * RELEASE 0.4.1 version
 38 | * Add supporting of ubuntu hosts and update Ansible playbook
 39 | * Also report about total requests for backend
 40 | * update readme file
 41 | * update Ansible Playbook and readme file
 42 | 
 43 | 0.4.0
 44 | -----
 45 | 
 46 | * RELEASE 0.4.0 version
 47 | * Handle the absent of daemon's metric
 48 | * It's a counter, so should be in different section
 49 | * Update the list of supported metrics
 50 | * Use int rather string in the default values
 51 | * Use int rather string in default value
 52 | * Switch to correct section name for pycodestyle
 53 | * PEP257 compatible docstrings
 54 | * Switch from yarl.UR to urlparse from standard Python library
 55 | * Update reference of UNIX sockets to include TCP
 56 | * PEP257 compatible docstring
 57 | * Simplify the return logic of two functions
 58 | * Add configuration\_check\_for\_servers()
 59 | * PEP257 compatible docstrings
 60 | * Disable pylint warning for too-many-branches
 61 | * Change the log severity to info for task report
 62 | * Add support for pulling statistics from TCP sockets
 63 | * Make that code more readable
 64 | * PEP257 compatible docstrinigs
 65 | * Fix typo in README
 66 | 
 67 | 0.3.15
 68 | ------
 69 | 
 70 | * RELEASE 0.3.15 version
 71 | * Add dcon and dses in the list of FRONTEND\_METRICS
 72 | * Add ctime in the list of SERVER\_AVG\_METRICS
 73 | * Add chkdown in the list of server metrics
 74 | * Add \`slim\` to server metrics, in addition to frontends and backends
 75 | * Mention in README about ansible and move code under contrib
 76 | * fix update readme file
 77 | * - Add Ansible Playbook For Fast Deploy haproxystats On CentOS 7.x - Update README File
 78 | * Fix a typo in README.rst
 79 | * Place metric back in the queue in case of OSError exc
 80 | * Log a message when graphite dispatcher queue is full
 81 | * Make docstrings compatible with EP257
 82 | 
 83 | 0.3.14
 84 | ------
 85 | 
 86 | * RELEASE 0.3.14 version
 87 | * Fix wrong calculation for Uptime\_sec
 88 | 
 89 | 0.3.13
 90 | ------
 91 | 
 92 | * RELEASE 0.3.13 version
 93 | * Convert Idle\_pct to CpuUsagePct
 94 | 
 95 | 0.3.12
 96 | ------
 97 | 
 98 | * RELEASE 0.3.12 version
 99 | * Fix yet another regression introduced by 9e78b6918
100 | * Fix indentation issues
101 | * Drop unused columns to safe memory
102 | * Fix performance regression introduced by 9e78b6918
103 | 
104 | 0.3.11
105 | ------
106 | 
107 | * RELEASE 0.3.11 version
108 | * Compute standard deviation and percentiles for Idle\_pct
109 | * Sanitize values in pxname and svname columns
110 | * Simplify the way we exclude frontends/backends
111 | * Add TotalServer metrics for backend, close #4
112 | * Update URI references for HAProxy documentation
113 | 
114 | 0.3.10
115 | ------
116 | 
117 | * RELEASE 0.3.10 version
118 | * Make sure we cancel get() coroutine when times out
119 | 
120 | 0.3.9
121 | -----
122 | 
123 | * RELEASE 0.3.9 version
124 | * Avoid replacing '.' with '\_' in namespace setting
125 | * Avoid crashing when daemon statistics are inconsistent
126 | * Do a comparison to None singleton with is not
127 | * Rename variable epoch to timestamp
128 | * Place keywords in the same line
129 | 
130 | 0.3.8
131 | -----
132 | 
133 | * RELEASE 0.3.8 version
134 | * Declare failure when zero UNIX socket are found
135 | * Rephrase various comments and log messages
136 | * Check queue length when it is safe to do
137 | * Remove shebang and add vim file encoding
138 | * Remove '%' prompt character from instructions
139 | * Move external software under contrib directory
140 | * Fix a typo in README
141 | * Add more keywords in setup.cfg
142 | * Update README
143 | * Ignore pep8 W503
144 | * Increase readability of code
145 | * Remove rst file extension for LICENSE
146 | * Add rst file extension for LICENSE
147 | * Update TODO
148 | * Add missing colons to declare a block
149 | * Update development instructions in README
150 | * Add instructions to build a development environemnt
151 | * Update TODO
152 | 
153 | 0.3.7
154 | -----
155 | 
156 | * RELEASE 0.3.7 version
157 | * Include average metrics when processing per daemon
158 | * Remove redundant string replacement
159 | * Provide number of metrics which are sent
160 | * Fix a silly bug in conditional statement
161 | * Update docstrings/comments
162 | * Mention that WallClockTime includes sending time
163 | * Add/update docstrings and fix pylint warnings
164 | * Fix a but where we skip CompressBpsIn metric
165 | * Mention in documentation about SslConnPercentage
166 | * Calculate percentage also for SslCurrConns
167 | 
168 | 0.3.6
169 | -----
170 | 
171 | * RELEASE 0.3.6 version
172 | * Produce statistics for haproxystats-process
173 | * Fix a typo for option name in the puppet module
174 | 
175 | 0.3.5
176 | -----
177 | 
178 | * RELEASE 0.3.5 version
179 | * Remove check for accessibility for dst-dir and tmp-dst-dir
180 | * Disable some pylint checks
181 | * Calculate percentages for HAProxy workers
182 | * Use correct name
183 | * Add a missing comma
184 | * Update installation instructions
185 | 
186 | 0.3.4
187 | -----
188 | 
189 | * RELEASE 0.3.4 version
190 | * Add items in TODO
191 | * Add home-page in setup.cfg
192 | * Make sure configuration has valid metric names
193 | * Remove done tasks from TODO
194 | 
195 | 0.3.3
196 | -----
197 | 
198 | * RELEASE 0.3.3 version
199 | * Add a tcp\_server which simulates graphite-relay
200 | * Add support for per process statistics
201 | * Make it more clear that we perform aggregation
202 | 
203 | 0.3.2
204 | -----
205 | 
206 | * RELEASE 0.3.2 version
207 | * Make sure we can \_\_init\_\_ method of parent class
208 | * Remove unnecessary fallback
209 | * Catch the case where config is in invalid format
210 | * Perform a sanity check on configuration
211 | * Disable pylint warning at the correct line
212 | * Add sanity checks for parameters in puppet module
213 | * Add queue-size parameter in the example config
214 | * Update puppet module
215 | 
216 | 0.3.1
217 | -----
218 | 
219 | * RELEASE 0.3.1 version
220 | * Document queue-size parameter of pull section
221 | * Rephrase the paragraph in Queuing system
222 | * Use fallback to True for prefix-hostname
223 | * User hyphen rather underscore
224 | * Use correct key name for queue-size parameter
225 | * Add connect and write timeout to defaults/README
226 | 
227 | 0.3.0
228 | -----
229 | 
230 | * RELEASE 0.3.0 version
231 | * Add support for excluding frontends and backends
232 | * Revert "Replace in README UTF-8 characters with plain text"
233 | * Refactor the loglevel part
234 | * Disable pylint checks for few cases
235 | * Make load\_file\_content to skip commented out lines
236 | * Catch the case when incoming directory disappears
237 | * Disable few pylint checks
238 | * Remove unnecessary fallback
239 | * Add aggr-server-metrics parameter in the defaults
240 | 
241 | 0.2.1
242 | -----
243 | 
244 | * RELEASE 0.2.1 version
245 | * Increase severity to info, to easier debugging
246 | * Replace in README UTF-8 characters with plain text
247 | * Provide more accurate sleep time to avoid interval drifting
248 | 
249 | 0.2.0
250 | -----
251 | 
252 | * RELEASE 0.2.0 version
253 | * Rephrase few sentences and fix spelling mistakes
254 | 
255 | 0.1.16
256 | ------
257 | 
258 | * RELEASE 0.1.16 version
259 | * Compute averages rather sum for act/bck metrics
260 | * Update README
261 | * Remove README under nagios
262 | * Update nagios and monit checks
263 | * Update puppet code
264 | * Add minimum version for pyinotify in requirements
265 | * Update requirements.txt
266 | * Add a function to load content of file to a list
267 | * Catch OSError when pandas parses csv file
268 | 
269 | 0.1.15
270 | ------
271 | 
272 | * RELEASE 0.1.15 version
273 | * Handle TCP socket failures in a better way
274 | * Remove unused variable
275 | * Rename decorator, retries --> retry\_on\_failures
276 | * Add docstrings in functions
277 | * Remove log\_hook as it is not needed anymore
278 | * Rearrange comments
279 | * Replace a for loop with a list comprehension
280 | * Remove unnecessary dependencies from Unit files
281 | 
282 | 0.1.14
283 | ------
284 | 
285 | * RELEASE 0.1.14
286 | * Catch the case when FD is closed from outside
287 | * Update on puppet recipe
288 | * Add puppet classes
289 | * Add monit configuration
290 | * Updates on nagios checks
291 | * Update TODO
292 | 
293 | 0.1.13
294 | ------
295 | 
296 | * RELEASE 0.1.13
297 | * Make configurable the aggregation of server's stats
298 | * Make metric names configurable
299 | * Rearrange metric name lists
300 | * Add nagios checks
301 | 
302 | 0.1.12
303 | ------
304 | 
305 | * RELEASE 0.1.12
306 | * Report only the relevant sections for each program
307 | 
308 | 0.1.11
309 | ------
310 | 
311 | * RELEASE 0.1.11 version
312 | * Fix regression introduced by 5d1003ca
313 | 
314 | 0.1.10
315 | ------
316 | 
317 | * RELEASE 0.1.10 version
318 | * Use float rather int for measuring wall clock
319 | * Report wallclock time for processing statistics
320 | * Add support for connect and write timeouts
321 | * Remove metrics from utils.py
322 | * Move metrics to a separate module
323 | * Update docstrings/comments
324 | 
325 | 0.1.9
326 | -----
327 | 
328 | * RELEASE 0.1.9 version
329 | * Remove unnecessary server metrics
330 | * Disable computation of stats across all backends
331 | 
332 | 0.1.8
333 | -----
334 | 
335 | * RELEASE 0.1.8 version
336 | * Introduce queue-size for pull program
337 | * Log exception when we fail to send data to graphite
338 | * Avoid leaking FD when connection timeout
339 | * Log when we close TCP connection to graphite
340 | * Rewrite comment
341 | 
342 | 0.1.7
343 | -----
344 | 
345 | * RELEASE 0.1.7 version
346 | * Set timeout on TCP socket for graphite handler
347 | 
348 | 0.1.6
349 | -----
350 | 
351 | * RELEASE 0.1.6 version
352 | * Avoid a crash if incoming directory can't be removed
353 | * Log version on startup
354 | 
355 | 0.1.5
356 | -----
357 | 
358 | * RELEASE 0.1.5
359 | * Catch few more Connection exceptions on close
360 | * Report wall clock time for pulling statistics
361 | * Emit an error when we fail to remove temporary dir
362 | * Add retry logic on connections to UNIX sockets
363 | * Warn if tasks are canceled when reach pull-timeout
364 | * More reasonable values for pull retry logic
365 | * Update TODO
366 | * Fix a regression introduced by b61ce6bcc
367 | * Remove unused variable
368 | * Remove unnecessary space
369 | * Rename parameter signal to signalname
370 | * Rearrange log messages when parsing sites stats
371 | * Rearrange log messages when parsing daemon stats
372 | * Error if Pandas data frame is empty
373 | * Prevent passing empty data to Pandas
374 | * Reorder log messages and change severities
375 | * Warn if data directory doesn't have any files
376 | 
377 | 0.1.4
378 | -----
379 | 
380 | * RELEASE 0.1.4
381 | * Catch socket.timeout
382 | * Log filenames in debug mode
383 | 
384 | 0.1.3
385 | -----
386 | 
387 | * RELEASE 0.1.3
388 | * Remove old data file from temporary directory
389 | 
390 | 0.1.2
391 | -----
392 | 
393 | * RELEASE 0.1.2
394 | * Introduce pull-timeout
395 | * Keep isolate shutdown and write\_file functions
396 | * Catch timeout during the connection
397 | 
398 | 0.1.1
399 | -----
400 | 
401 | * RELEASE 0.1.1
402 | * Rework the retries decorator
403 | 
404 | 0.1.0
405 | -----
406 | 
407 | * RELEASE 0.1.0
408 | * Shield workers from death due to errors from dispatchers
409 | 
410 | 0.0.5
411 | -----
412 | 
413 | * RELEASE 0.0.5 version
414 | * Don't try to process empty Pandas data frame
415 | 
416 | 0.0.4
417 | -----
418 | 
419 | * RELEASE 0.0.4 version
420 | * Break early when STOP item is fetched from queue
421 | * Don't enable local-store by default
422 | * Perform a clean shutdown in case no tasks are running
423 | * Wait for socket files to be created
424 | * Don't proceed if watched dir isn't created
425 | * Introduce a configuration parameter for process
426 | * Add systemd unit files
427 | 
428 | 0.0.3
429 | -----
430 | 
431 | * RELEASE 0.0.3 version
432 | * Don't include 'haproxy' in graphite path
433 | * Fix typo in the section name
434 | 
435 | 0.0.2
436 | -----
437 | 
438 | * RELEASE 0.0.2 version
439 | * Fix a directory pathname in the defaults
440 | * Insert also 'haproxy' in the graphite namespace
441 | * Add requirements file
442 | 
443 | 0.0.1
444 | -----
445 | 
446 | * RELEASE 0.0.1 version
447 | * Change suffix for README in setup.cfg
448 | * The 1st functional version of the program:-)
449 | * Initial commit
450 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
   1 | .. README.rst
   2 | 
   3 | ============
   4 | haproxystats
   5 | ============
   6 | 
   7 |     *A HAProxy statistics collection program*
   8 | 
   9 | .. contents::
  10 | 
  11 | Introduction
  12 | ------------
  13 | 
  14 | **haproxystats** is a statistics collector for `HAProxy`_ load balancer which
  15 | processes various statistics and pushes them to graphing systems (Graphite).
  16 | It is designed to satisfy the following requirements:
  17 | 
  18 | #. Fast and configurable processing of HAProxy statistics
  19 | #. Perform aggregation when HAProxy runs in multiprocess (nbproc > 1)
  20 | #. Pull statistics at very low intervals (10secs)
  21 | #. Flexible dispatching of statistics to different systems (Graphite,  kafka)
  22 | 
  23 | The main design characteristic is the split between pulling the statistics and
  24 | processing them. This provides the ability to pull data as frequently
  25 | as possible without worrying about the impact on processing time. It also
  26 | reduces the risk of losing data in case of trouble during the processing phase.
  27 | 
  28 | It runs locally on each load balancer node, offering a decentralized setup for
  29 | the processing phase, but it can be easily extended in the future to have a
  30 | centralized setup for the processing phase. In that centralized setup it will
  31 | be possible to perform aggregation on a cluster level as well.
  32 | Until then users can deploy `carbon-c-relay`_ for aggregation.
  33 | 
  34 | Because of this design haproxystats comes with two programs:
  35 | **haproxystats-pull** and **haproxystats-process**. The former pulls
  36 | statistics from HAProxy via `stats socket`_ and it uses the `asyncio`_ framework
  37 | from Python to achieve high concurrency and low footprint. The latter
  38 | processes the statistics and pushes them to various destinations. It utilizes
  39 | `Pandas`_ for data analysis and the multiprocess framework from Python.
  40 | 
  41 | haproxystats requires Python 3.4, docopt and Pandas to be available in the
  42 | system.
  43 | 
  44 | How haproxystats works
  45 | ----------------------
  46 | 
  47 | 
  48 | .. image:: haproxystats-architecture.png
  49 | 
  50 | 
  51 | haproxystats-pull sends `info`_ and `stat`_ commands to all haproxy processes
  52 | in order to collect statistics for the daemon and for all
  53 | frontends/backends/servers. Data returned from each process and for each
  54 | command is stored in individual files which are saved under one directory. The
  55 | time (seconds since the epoch) of retrieval is used to name that directory.
  56 | haproxystats-process watches for changes on the parent directory and when a
  57 | directory is created it adds its full path to the queue. Multiple workers pick
  58 | up items (directories) from the queue and process statistics from those
  59 | directories.
  60 | 
  61 | haproxystats-pull
  62 | #################
  63 | 
  64 | haproxystats-pull leverages the `asyncio`_ framework from Python by utilizing
  65 | coroutines to multiplex I/O access over several `stats socket`_, which are
  66 | simple UNIX and TCP sockets.
  67 | 
  68 | The actual task of storing the data to the file system is off-loaded to a very
  69 | light `pool of threads`_ in order to avoid blocking the coroutines during the
  70 | disk IO phase.
  71 | 
  72 | haproxystats-pull manages the *incoming* directory and makes sure directories
  73 | are created with correct names. It also suspends the collection when the number
  74 | of directories under the *incoming* directory exceeds a threshold. This avoids
  75 | filling up the disk when haproxystats-process is unavailable for sometime.
  76 | This an example of directory structure:
  77 | 
  78 | .. code-block:: bash
  79 | 
  80 |     incoming
  81 |     ├── 1457298067
  82 |     │   ├── admin1.sock_info
  83 |     │   ├── admin1.sock_stat
  84 |     │   ├── admin2.sock_info
  85 |     │   ├── admin2.sock_stat
  86 |     │   ├── admin3.sock_info
  87 |     │   ├── admin3.sock_stat
  88 |     │   ├── admin4.sock_info
  89 |     │   └── admin4.sock_stat
  90 |     └── 1457298072
  91 |         ├── admin1.sock_info
  92 |         ├── admin1.sock_stat
  93 |         ├── admin2.sock_info
  94 |         ├── admin2.sock_stat
  95 |         ├── admin3.sock_info
  96 |         ├── admin3.sock_stat
  97 |         ├── admin4.sock_info
  98 |         └── admin4.sock_stat
  99 | 
 100 | haproxystats-process
 101 | ####################
 102 | 
 103 | haproxystats-process is a multiprocess program. The parent process uses the
 104 | Linux kernel's `inotify`_ API to watch for changes in *incoming* directory.
 105 | 
 106 | It receives an event when a directory is either created or moved in *incoming*
 107 | directory. The event contains the absolute path name of that directory. It
 108 | maintains an internal queue in which it puts directory names. Multiple child
 109 | processes pick directory names from the queue and process the data.
 110 | 
 111 | Its worker dispatches statistics to various destinations. The directories are
 112 | removed from *incoming* directory when all statistics are successfully
 113 | processed.
 114 | 
 115 | When haproxystats-process starts it scans the *incoming* directory
 116 | for new directories and processes them instantly, so you don't lose statistics
 117 | if haproxystats-process is unavailable for sometime.
 118 | 
 119 | Dispatchers
 120 | ###########
 121 | 
 122 | haproxystats-process currently supports 2 different dispatchers.
 123 | 
 124 | 1. **Graphite**
 125 | 
 126 | Pushes statistics to a Graphite system via a local or remote carbon-relay.
 127 | The recommended method is to use `carbon-c-relay`_. It is very fast and capable
 128 | of handling millions of metrics per second. This dispatcher utilizes an internal
 129 | queue to store metrics which are failed to be sent to Graphite.
 130 | 
 131 | An example of graphite namespace::
 132 | 
 133 |     <loadbalancers>.<lb-01>.haproxy.frontend.<frontendname>.
 134 |     <loadbalancers>.<lb-01>.haproxy.backend.<backendname>.
 135 |     <loadbalancers>.<lb-01>.haproxy.backend.<backendname>.server.<servername>
 136 |     <loadbalancers>.<lb-01>.haproxy.server.<servername>.
 137 |     <loadbalancers>.<lb-01>.haproxy.daemon.
 138 |     <loadbalancers>.<lb-01>.haproxy.haproxystats.<metric names>.
 139 | 
 140 | 2. **local-store**
 141 | 
 142 | Stores statistics in the local disk. Use it only for debugging purposes.
 143 | 
 144 | Statistics for HAProxy
 145 | ######################
 146 | 
 147 | In addition the statistics that are exposed by HAProxy, haproxystats provides
 148 | the following statistics.
 149 | 
 150 | HAProxy process
 151 | ~~~~~~~~~~~~~~~
 152 | 
 153 | HAProxy exposes Idle_pct and haproxystats-process converts it to CPU
 154 | utilization without removing Idle_pct metric. This avoids the usage of
 155 | scale(-1) and offset(100) functions on graphite::
 156 | 
 157 |     CpuUsagePct  CPU utilization in percentage
 158 | 
 159 | The following metrics are calculated only when HAProxy is configured with more
 160 | than 1 processes (nbproc > 1)::
 161 | 
 162 |     25PercentileCpuUsagePct 25th percentile of CpuUsagePct across all processes
 163 |     50PercentileCpuUsagePct 50th percentile              -//-
 164 |     75PercentileCpuUsagePct 75th percentile              -//-
 165 |     95PercentileCpuUsagePct 95th percentile              -//-
 166 |     99PercentileCpuUsagePct 99th percentile              -//-
 167 |     StdCpuUsagePct          standard deviation           -//-
 168 | 
 169 | Queuing system
 170 | ##############
 171 | 
 172 | The *incoming* directory together with the inotify API provides a simple
 173 | queueing system which is used as a communication channel between
 174 | haproxystats-pull and haproxystats-process programs.
 175 | 
 176 | There isn't any feedback mechanism in place, thus haproxystats-pull monitors
 177 | the number of directories before it pulls data from HAProxy and suspends its
 178 | job when the number of directories exceeds a threshold.
 179 | 
 180 | See **queue-size** parameter of **pull** section.
 181 | 
 182 | Statistics for haproxystats
 183 | ###########################
 184 | 
 185 | **haproxystats** provides statistics for the time it takes to process,
 186 | calculate and send HAProxy metrics. By default provides the following list
 187 | of metric names with values in seconds::
 188 | 
 189 |     loadbalancers.lb-01.haproxy.haproxystats.WallClockTimeHAProxy
 190 |     loadbalancers.lb-01.haproxy.haproxystats.WallClockTimeFrontends
 191 |     loadbalancers.lb-01.haproxy.haproxystats.WallClockTimeBackends
 192 |     loadbalancers.lb-01.haproxy.haproxystats.WallClockTimeServers
 193 |     loadbalancers.lb-01.haproxy.haproxystats.WallClockTimeAllStats
 194 | 
 195 | It also provides the number of metrics which are send to graphite::
 196 | 
 197 |     loadbalancers.lb-01.haproxy.haproxystats.MetricsHAProxy
 198 |     loadbalancers.lb-01.haproxy.haproxystats.MetricsFrontend
 199 |     loadbalancers.lb-01.haproxy.haproxystats.MetricsBackend
 200 |     loadbalancers.lb-01.haproxy.haproxystats.MetricsServer
 201 | 
 202 | Configuration
 203 | -------------
 204 | 
 205 | haproxystats uses the popular `INI`_ format for its configuration file.
 206 | This is an example configuration file (/etc/haproxystats.conf)::
 207 | 
 208 | 
 209 |     [DEFAULT]
 210 |     loglevel = info
 211 |     retries  = 2
 212 |     timeout  = 1
 213 |     interval = 2
 214 | 
 215 |     [paths]
 216 |     base-dir = /var/lib/haproxystats
 217 | 
 218 |     [pull]
 219 |     loglevel        = info
 220 |     socket-dir      = /run/haproxy
 221 |     retries         = 1
 222 |     timeout         = 0.1
 223 |     interval        = 0.5
 224 |     pull-timeout    = 2
 225 |     pull-interval   = 10
 226 |     dst-dir         = ${paths:base-dir}/incoming
 227 |     tmp-dst-dir     = ${paths:base-dir}/incoming.tmp
 228 |     workers         = 8
 229 |     queue-size      = 360
 230 | 
 231 |     [process]
 232 |     src-dir             = ${paths:base-dir}/incoming
 233 |     workers             = 4
 234 |     per-process-metrics = false
 235 | 
 236 |     [graphite]
 237 |     server          = 127.0.0.1
 238 |     port            = 3002
 239 |     retries         = 3
 240 |     interval        = 1.8
 241 |     connect-timeout = 1.0
 242 |     write-timeout   = 1.0
 243 |     delay           = 10
 244 |     backoff         = 2
 245 |     namespace       = loadbalancers
 246 |     prefix-hostname = true
 247 |     fqdn            = true
 248 |     queue-size      = 1000000
 249 | 
 250 |     #[local-store]
 251 |     #dir = ${paths:base-dir}/local-store
 252 | 
 253 | All the above settings are optional as haproxystats comes with default values
 254 | for all of them. Thus, both programs can be started without supplying any
 255 | configuration.
 256 | 
 257 | DEFAULT section
 258 | ###############
 259 | 
 260 | Settings in this section can be overwritten in other sections.
 261 | 
 262 | * **loglevel** Defaults to **info**
 263 | 
 264 | Log level to use, possible values are: debug, info, warning, error, critical
 265 | 
 266 | * **retries** Defaults to **2**
 267 | 
 268 | Number of times to retry a connection after a failure. Used by haproxystats-pull
 269 | and haproxystats-process when they open a connection to a UNIX/TCP socket and
 270 | Graphite respectively.
 271 | 
 272 | * **timeout** Defaults to **1** (seconds)
 273 | 
 274 | Time to wait for establishing a connection. Used by haproxystats-pull and
 275 | haproxystats-process when they open a connection to a UNIX/TCP socket and Graphite
 276 | respectively.
 277 | 
 278 | * **interval** Defaults to **2**
 279 | 
 280 | Time to wait before trying to open a connection. Used by haproxystats-pull and
 281 | haproxystats-process when they retry a connection to a UNIX/TCP socket and Graphite
 282 | respectively.
 283 | 
 284 | paths section
 285 | #############
 286 | 
 287 | * **base-dir** Defaults to **/var/lib/haproxystats**
 288 | 
 289 | The directory to use as the base of the directory structure.
 290 | 
 291 | pull section
 292 | ############
 293 | 
 294 | * **socket-dir** Unset by default
 295 | 
 296 | A directory with HAProxy socket files.
 297 | 
 298 | * **servers** Unset by default
 299 | 
 300 | A list of servers to pull statistics from. You define a server by passing a URL,
 301 | here some examples::
 302 | 
 303 |     tcp://127.0.0.1:5555
 304 |     tcp://foo.bar.com:4444
 305 |     tcp://[fe80::3f2f:46b3:ef0c:a420]:4444
 306 |     unix:///run/haproxy.sock
 307 | 
 308 | Only TCP and UNIX schemes are supported and the port for TCP servers **must**
 309 | be set. For UNIX scheme you can only pass a file and not a directory, but
 310 | **socket-dir** option can be set as well, so you can use a directory and UNIX
 311 | socket files at the same time. You can use comma as separator to pass multiple
 312 | servers::
 313 | 
 314 |     servers = unix:///run/haproxy.sock,tcp://127.0.0.1:555,tcp://127.0.0.1:556
 315 | 
 316 | * **buffer-limit** Defaults to **6291456** (bytes)
 317 | 
 318 | At most size bytes are read and returned from the sockets. Setting too low and
 319 | it will slow down the retrieval of statistics.
 320 | Only values greater than or equal to 1 are accepted.
 321 | 
 322 | * **retries** Defaults to **1**
 323 | 
 324 | Number of times to reconnect to UNIX/TCP socket after a failure.
 325 | 
 326 | * **timeout** Defaults to **0.1** (seconds)
 327 | 
 328 | Time to wait for establishing a connection to UNIX/TCP socket. There is no need to
 329 | set it higher than few ms as haproxy accepts a connection within 1-2ms.
 330 | 
 331 | * **interval** Defaults to **0.5** (seconds)
 332 | 
 333 | Time to wait before trying to reconnect to UNIX/TCP socket after a failure. Tune it
 334 | based on the duration of the reload process of haproxy. haproxy reloads within
 335 | few ms but in some environments with hundreds different SSL certificates it can
 336 | take a bit more.
 337 | 
 338 | * **pull-interval** Defaults to **10** (seconds)
 339 | 
 340 | How often to pull statistics from HAProxy. A value of *1* second can overload
 341 | the haproxy processes in environments with thousands backends/servers.
 342 | 
 343 | * **pull-timeout** Defaults to **2** (seconds)
 344 | 
 345 | Total time to wait for the pull process to finish. Should be always less than
 346 | **pull-interval**.
 347 | 
 348 | * **dst-dir** Defaults **/var/lib/haproxystats/incoming**
 349 | 
 350 | A directory to store statistics retrieved by HAProxy.
 351 | 
 352 | * **tmp-dst-dir** Defaults **/var/lib/haproxystats/incoming.tmp**
 353 | 
 354 | A directory to use as temporary storage location before directories are moved
 355 | to **dst-dir**.  haproxystats-pull stores statistics for each process under
 356 | that directory and only when data from all haproxy processes are successfully
 357 | retrieved they are moved to **dst-dir**. Make sure **dst-dir** and
 358 | **tmp-dst-dir** are on the same file system, so the move of the directories
 359 | become a rename which is a quick and atomic operation.
 360 | 
 361 | * **workers**  Defaults to **8**
 362 | 
 363 | Number of threads to use for writing statistics to disk. These are very
 364 | light threads and don't consume a lot of resources. Shouldn't be set higher
 365 | than the number of haproxy processes.
 366 | 
 367 | * **queue-size** Defaults to **360**
 368 | 
 369 | Suspend the pulling of statistics when the number of directories in **dst-dir**
 370 | exceeds this limit.
 371 | 
 372 | process section
 373 | ###############
 374 | 
 375 | * **src-dir** Defaults **/var/lib/haproxystats/incoming**
 376 | 
 377 | 
 378 | A directory to watch for changes. It should point to the same directory as
 379 | the **dst-dir** option from *pull* section.
 380 | 
 381 | * **workers** Defaults to **4**
 382 | 
 383 | Number of workers to use for processing statistics. These are real processes
 384 | which can consume a fair bit of CPU.
 385 | 
 386 | * **frontend-metrics** Unset by default
 387 | 
 388 | A list of frontend metric names separated by space to process. By default all
 389 | statistics are processed and this overwrites the default selection.
 390 | 
 391 | haproxystats-process emits an error and refuses to start if metrics aren't
 392 | valid HAProxy metrics. Check the list of valid metrics in Chapter 9.1 of
 393 | `management`_ documentation of HAProxy.
 394 | 
 395 | * **backend-metrics** Unset by default
 396 | 
 397 | A list of backend metric names separated by space to process. By default all
 398 | statistics are processed and this overwrites the default selection.
 399 | 
 400 | haproxystats-process emits an error and refuses to start if metrics aren't
 401 | valid HAProxy metrics. Check the list of valid metrics in Chapter 9.1 of
 402 | `management`_ documentation of HAProxy.
 403 | 
 404 | * **server-metrics** Unset by default
 405 | 
 406 | A list of server metric names separated by space to process. By default all
 407 | statistics are processed and this overwrites the default selection.
 408 | 
 409 | haproxystats-process emits an error and refuses to start if metrics aren't
 410 | valid HAProxy metrics. Check the list of valid metrics in Chapter 9.1 of
 411 | `management`_ documentation of HAProxy.
 412 | 
 413 | * **aggr-server-metrics** Defaults to **false**
 414 | 
 415 | Aggregates server's statistics across all backends.
 416 | 
 417 | * **exclude-frontends** Unset by default
 418 | 
 419 | A file which contains one frontend name per line for which processing is
 420 | skipped.
 421 | 
 422 | * **exclude-backends** Unset by default
 423 | 
 424 | A file which contains one backend name per line for which processing is
 425 | skipped.
 426 | 
 427 | * **per-process-metrics** Defaults to **false**
 428 | 
 429 | HAProxy daemon provides statistics and by default **haproxystat-process**
 430 | aggregates those statistics when HAProxy runs in multiprocess mode
 431 | (nbproc > 1).
 432 | 
 433 | Set this to **true** to get those statistics also per process as well.
 434 | This is quite useful for monitoring purposes where someone wants to monitor
 435 | sessions per process in order to see if traffic is evenly distributed to all
 436 | processes by the kernel.
 437 | 
 438 | It is also useful in setups where configuration for frontends and backends is
 439 | unevenly spread across all processes, for instance processes 1-4 manage SSL
 440 | frontends and processes 5-7 manage noSSL frontends.
 441 | 
 442 | This adds another path in Graphite under haproxy space::
 443 | 
 444 |     loadbalancers.lb-01.haproxy.daemon.process.<process_num>.<metric>
 445 | 
 446 | * **calculate-percentages** Defaults to **false**
 447 | 
 448 | Calculates percentages for a selection of metrics for HAProxy daemon. When
 449 | **per-process-metrics** is set to **true** the calculation happens also per
 450 | HAProxy process. This adds the following metric names::
 451 | 
 452 |     ConnPercentage
 453 |     ConnRatePercentage
 454 |     SslRatePercentage
 455 |     SslConnPercentage
 456 | 
 457 | Those metrics can be used for alerting when the current usage on connections
 458 | is very close the configured limit.
 459 | 
 460 | * **liveness-check-interval** Defaults to **10** (seconds)
 461 | 
 462 | How often to check if all workers are alive and trigger a termination if at
 463 | least one is dead.
 464 | 
 465 | graphite section
 466 | ################
 467 | 
 468 | This dispatcher **is enabled** by default and it can't be disabled.
 469 | 
 470 | * **server** Defaults to **127.0.0.1**
 471 | 
 472 | Graphite server to connect to.
 473 | 
 474 | * **port**  Defaults to **3002**
 475 | 
 476 | Graphite port to connect to.
 477 | 
 478 | * **retries** Defaults to **3**
 479 | 
 480 | Number of times to reconnect to Graphite after a failure.
 481 | 
 482 | * **interval** Defaults to **1.8** (seconds)
 483 | 
 484 | Time to wait before trying to reconnect to Graphite after a failure.
 485 | 
 486 | * **connect-timeout** Defaults to **1** (seconds)
 487 | 
 488 | Time to wait for establishing a connection to Graphite relay.
 489 | 
 490 | * **write-timeout** Defaults to **1** (seconds)
 491 | 
 492 | Time to wait on sending data to Graphite relay.
 493 | 
 494 | * **delay** Defaults to **10** (seconds)
 495 | 
 496 | How long to wait before trying to connect again after number of retries has
 497 | exceeded the threshold set in **retries**. During the delay period metrics are
 498 | stored in the queue of the dispatcher, see **queue-size**.
 499 | 
 500 | * **backoff** Defaults to **2**
 501 | 
 502 | A simple exponential backoff to apply for each retry.
 503 | 
 504 | * **namespace** Defaults to **loadbalancers**
 505 | 
 506 | A top level graphite namespace.
 507 | 
 508 | * **prefix-hostname** Defaults to **true**
 509 | 
 510 | Insert the hostname of the load balancer in the Graphite namespace, example::
 511 | 
 512 |     loadbalancers.lb-01.haproxy.
 513 | 
 514 | * **fqdn** Defaults to **true**
 515 | 
 516 | Use FQDN or short name in the graphite namespace
 517 | 
 518 | * **queue-size**  Defaults to **1000000**
 519 | 
 520 | haproxystats-process uses a queue to store metrics which failed to be sent due
 521 | to a connection error/timeout. This is a First In First Out queueing system.
 522 | When the queue reaches the limit, the oldest items are removed to free space.
 523 | 
 524 | * **group-namespace** Unset by default.
 525 | 
 526 | group graphite metrics by patterns. When a frontend, backend or server matches a
 527 | given pattern, the metric will be prefixed by this namespace, plus a
 528 | configurable group name which must be specified in the **frontend-groups**,
 529 | **backend-groups** or **server-groups** sections. These sections consist of
 530 | group names and their corresponding regular expression that will be matched
 531 | against frontend, backend or server names (depending on the section).
 532 | 
 533 | For example:
 534 | 
 535 | Let's assume our metrics look something like::
 536 | 
 537 |     loadbalancers.lb-01.haproxy.frontend.foo-001.<metric>
 538 |     loadbalancers.lb-01.haproxy.frontend.foo-002.<metric>
 539 |     ...
 540 |     loadbalancers.lb-01.haproxy.frontend.bar-001.<metric>
 541 |     loadbalancers.lb-01.haproxy.frontend.bar-002.<metric>
 542 |     ...
 543 | 
 544 | And we want them to be grouped to like this::
 545 | 
 546 |     loadbalancers.lb-01.haproxy.flavor.abc.frontend.foo-001.<metric>
 547 |     loadbalancers.lb-01.haproxy.flavor.abc.frontend.foo-002.<metric>
 548 |     ...
 549 |     loadbalancers.lb-01.haproxy.flavor.xyz.frontend.bar-001.<metric>
 550 |     loadbalancers.lb-01.haproxy.flavor.xyz.frontend.bar-002.<metric>
 551 |     ...
 552 | 
 553 | The configuration should contain these settings::
 554 | 
 555 |     [graphite]
 556 |     group-namespace = flavor
 557 | 
 558 |     [frontend-groups]
 559 |     abc = ^foo-
 560 |     xyz = ^bar-
 561 | 
 562 | Note that if the **group-namespace** setting is specified, then at least one of
 563 | **frontend-groups**, **backend-groups** or **server-groups** sections must be
 564 | specified as well.
 565 | 
 566 | Also note that if frontend, backend or server names contain dots, these will be
 567 | converted to underscores for graphite -- because dots are graphite's namespace
 568 | separator. The patterns will have to take this into account.
 569 | 
 570 | * **group-namespace-double-writes** Unset by default.
 571 | 
 572 | Boolean; required only if **group-namespace** is specified. If True, send to
 573 | graphite the original metric as well as the grouped metrics. If False, send
 574 | only the grouped metrics. (See **group-namespace**.)
 575 | 
 576 | frontend-groups, backend-groups, and server-groups sections
 577 | ###########################################################
 578 | 
 579 | Specify the patterns to match against frontend, backend and/or server names, to
 580 | group graphite metrics and give them a variable prefix. See **group-namespace**.
 581 | 
 582 | These sections are optional, unless **group-namespace** is set.
 583 | 
 584 | local-store section
 585 | ###################
 586 | 
 587 | This dispatcher **isn't** enabled by default.
 588 | 
 589 | The primarily use of local-store dispatcher is to debug/troubleshoot possible
 590 | problems with the processing or/and with Graphite. There isn't any clean-up
 591 | process in place, thus you need remove the files after they are created.
 592 | Don't leave it enabled for more than 1 hour as it can easily fill up the disk
 593 | in environments with hundreds frontends/backends and thousands servers.
 594 | 
 595 | * **dir** Defaults to **/var/lib/haproxystats/local-store**
 596 | 
 597 | A directory to stores statistics after they have been processed. The current
 598 | format is compatible with Graphite.
 599 | 
 600 | Systemd integration
 601 | -------------------
 602 | 
 603 | haproxystats-pull and haproxystats-process are simple programs which are not
 604 | daemonized and they output logging messages to stdout. This is by design as it
 605 | simplifies the code. The daemonization and logging is off-loaded to systemd
 606 | which has everything we need for that job.
 607 | 
 608 | Under contrib/systend directory there are service files for both programs.
 609 | These are functional systemd Unit files which are used in production.
 610 | 
 611 | The order in which these 2 programs start doesn't matter and there isn't any
 612 | soft or hard dependency between them.
 613 | 
 614 | Furthermore, these programs don't need to run as root. It highly recommended to
 615 | create a dedicated user to run them. You need to add that user to the group of
 616 | *haproxy* and adjust socket configuration of haproxy to allow write for the
 617 | group, see below an example configuration::
 618 | 
 619 |     stats socket /run/haproxy/sock1 user haproxy group haproxy mode 660 level admin process 1
 620 |     stats socket /run/haproxy/sock2 user haproxy group haproxy mode 660 level admin process 2
 621 |     stats socket /run/haproxy/sock3 user haproxy group haproxy mode 660 level admin process 3
 622 | 
 623 | systemd Unit files use haproxystats user which has to be created prior running
 624 | haproxystats programs.
 625 | 
 626 | Graceful shutdown
 627 | -----------------
 628 | 
 629 | In an effort to reduce the loss of statistics both programs support graceful
 630 | shutdown. When *SIGHUP* or *SIGTERM* signals are sent they perform a clean exit.
 631 | When a signal is sent to haproxystats-process it may take some time for the
 632 | program to exit, as it waits for all workers to empty the queue.
 633 | 
 634 | Puppet module
 635 | -------------
 636 | 
 637 | A puppet module is available under contrib directory which provides classes for
 638 | configuring both programs.
 639 | 
 640 | Because haproxystats-process is CPU bound program, CPU Affinity is configured
 641 | using systemd. By default it pins the workers to the last CPUs.
 642 | 
 643 | You should take care of pinning haproxy processes to other CPUs in order to
 644 | avoid haproxystats-process *stealing* CPU cycles from haproxy. In production
 645 | servers you usually pin the first 80% of CPUs to haproxy processes and you
 646 | leave the rest of CPUs for other processes. The default template of puppet
 647 | module enforces this logic.
 648 | 
 649 | haproxystats-pull is a single threaded program which doesn't use a lot of CPU
 650 | cycles and by default is assigned to the last CPU.
 651 | 
 652 | Ansible Playbook
 653 | ----------------
 654 | 
 655 | A Ansible playbook is available under contrib directory. For installation
 656 | instruction of the playbook please read Installation chapter of this document.
 657 | 
 658 | Nagios checks
 659 | -------------
 660 | 
 661 | Several nagios checks are provided for monitoring purposes, they can be found
 662 | under contrib/nagios directory.
 663 | 
 664 | * check_haproxystats_process_number_of_procs.sh
 665 | 
 666 | Monitor the number of processes of haproxystats-process program. Systemd
 667 | monitors only the parent process and this check helps to detect cases where
 668 | some worker(s) die unexpectedly
 669 | 
 670 | * check_haproxystats_process.sh
 671 | 
 672 | A wrapper around systemctl tool to detect a dead parent process.
 673 | 
 674 | * check_haproxystats_pull.sh
 675 | 
 676 | A wrapper around systemctl tool to a check if haproxystats-pull is running.
 677 | 
 678 | * check_haproxystats_queue_size.py
 679 | 
 680 | Checks the size of the *incoming* directory queue which is consumed by
 681 | haproxystats-process and alert when exceeds a threshold.
 682 | 
 683 | 
 684 | Starting the programs
 685 | ---------------------
 686 | 
 687 | ::
 688 | 
 689 |     haproxystats-pull -f ./haproxystats.conf
 690 | 
 691 | ::
 692 | 
 693 |     haproxystats-process -f ./haproxystats.conf
 694 | 
 695 | Usage::
 696 | 
 697 |     % haproxystats-pull -h
 698 |     Pulls statistics from HAProxy daemon over UNIX socket(s)
 699 | 
 700 |     Usage:
 701 |         haproxystats-pull [-f <file>] [-p | -P]
 702 | 
 703 |     Options:
 704 |         -f, --file <file>  configuration file with settings
 705 |                            [default: /etc/haproxystats.conf]
 706 |         -p, --print        show default settings
 707 |         -P, --print-conf   show configuration
 708 |         -h, --help         show this screen
 709 |         -v, --version      show version
 710 | 
 711 | 
 712 |     % haproxystats-process -h
 713 |     Processes statistics from HAProxy and pushes them to Graphite
 714 | 
 715 |     Usage:
 716 |         haproxystats-process [-f <file>] [-d <dir>] [-p | -P]
 717 | 
 718 |     Options:
 719 |         -f, --file <file>  configuration file with settings
 720 |                            [default: /etc/haproxystats.conf]
 721 |         -d, --dir <dir>    directory with additional configuration files
 722 |         -p, --print        show default settings
 723 |         -P, --print-conf   show configuration
 724 |         -h, --help         show this screen
 725 |         -v, --version      show version
 726 | 
 727 | 
 728 | Development
 729 | -----------
 730 | I would love to hear what other people think about **haproxystats** and provide
 731 | feedback. Please post your comments, bug reports and wishes on my `issues page
 732 | <https://github.com/unixsurfer/haproxystats/issues>`_.
 733 | 
 734 | How to setup a development environment
 735 | ######################################
 736 | 
 737 | Install HAProxy::
 738 | 
 739 |     % sudo apt-get install haproxy
 740 | 
 741 | Use a basic HAProxy configuration in multiprocess mode::
 742 | 
 743 |     global
 744 |         log 127.0.0.1 len 2048 local2
 745 |         chroot /var/lib/haproxy
 746 |         stats socket /run/haproxy/admin1.sock mode 666 level admin process 1
 747 |         stats socket /run/haproxy/admin2.sock mode 666 level admin process 2
 748 |         stats socket /run/haproxy/admin3.sock mode 666 level admin process 3
 749 |         stats socket /run/haproxy/admin4.sock mode 666 level admin process 4
 750 |         # allow read/write access to anyone----------^
 751 |         stats timeout 30s
 752 |         user haproxy
 753 |         group haproxy
 754 |         daemon
 755 |         nbproc 4
 756 |         cpu-map 1 0
 757 |         cpu-map 2 1
 758 |         cpu-map 3 1
 759 |         cpu-map 4 0
 760 | 
 761 |     defaults
 762 |         log global
 763 |         mode    http
 764 |         timeout connect 5000
 765 |         timeout client  50000
 766 |         timeout server  50000
 767 | 
 768 |     frontend frontend_proc1
 769 |         bind 0.0.0.0:81 process 1
 770 |         default_backend backend_proc1
 771 | 
 772 |     frontend frontend_proc2
 773 |         bind 0.0.0.0:82 process 2
 774 |         default_backend backend_proc1
 775 | 
 776 |     frontend frontend1_proc34
 777 |         bind :83 process 3
 778 |         bind :83 process 4
 779 |         default_backend backend1_proc34
 780 | 
 781 |     backend backend_proc1
 782 |         bind-process 1
 783 |         default-server inter 1000s
 784 |         option httpchk GET / HTTP/1.1\r\nHost:\ .com\r\nUser-Agent:\ HAProxy
 785 |         server member1_proc1 10.189.224.169:80 weight 100 check fall 2 rise 3
 786 |         server member2_proc1 10.196.70.109:80 weight 100 check fall 2 rise 3
 787 |         server bck_all_srv1 10.196.70.109:88 weight 100 check fall 2 rise 3
 788 | 
 789 |     backend backend1_proc34
 790 |         bind-process 3,4
 791 |         default-server inter 1000s
 792 |         option httpchk GET / HTTP/1.1\r\nHost:\ .com\r\nUser-Agent:\ HAProxy
 793 |         server bck1_proc34_srv1 10.196.70.109:80 check fall 2 inter 5s rise 3
 794 |         server bck1_proc34_srv2 10.196.70.109:80 check fall 2 inter 5s rise 3
 795 |         server bck_all_srv1 10.196.70.109:80 check fall 2 inter 5s rise 3
 796 | 
 797 |     backend backend_proc2
 798 |         bind-process 2
 799 |         default-server inter 1000s
 800 |         option httpchk GET / HTTP/1.1\r\nHost:\ .com\r\nUser-Agent:\ HAProxy
 801 |         server bck_proc2_srv1_proc2 127.0.0.1:8001 check fall 2 inter 5s rise 3
 802 |         server bck_proc2_srv2_proc2 127.0.0.1:8002 check fall 2 inter 5s rise 3
 803 |         server bck_proc2_srv3_proc2 127.0.0.1:8003 check fall 2 inter 5s rise 3
 804 |         server bck_proc2_srv4_proc2 127.0.0.1:8004 check fall 2 inter 5s rise 3
 805 | 
 806 | Start HAProxy and check it is up::
 807 | 
 808 |     sudo systemctl start haproxy.service;systemctl status -l haproxy.service
 809 | 
 810 | Create a python virtual environment using virtualenvwrapper tool::
 811 | 
 812 |     mkvirtualenv --python=`which python3` haproxystats-dev
 813 | 
 814 | **Do not** exit the *haproxystats-dev* virtual environment.
 815 | 
 816 | Clone the project, if you are planning to contribute then you should fork it on
 817 | GitHub and clone that project instead::
 818 | 
 819 |     mkdir ~/repo;cd ~/repo
 820 |     git clone https://github.com/unixsurfer/haproxystats
 821 | 
 822 | Install necessary libraries::
 823 | 
 824 |     cd haproxystats
 825 |     pip install -U pbr setuptools
 826 |     pip install -r ./requirements.txt
 827 | 
 828 | Start a TCP server which acts a Graphite relay and listens on 127.0.0.1:39991::
 829 | 
 830 |     python3 ./contrib/tcp_server.py
 831 | 
 832 | Install haproxystats::
 833 | 
 834 |     python setup.py install
 835 | 
 836 | Create necessary directory structure::
 837 | 
 838 |     mkdir -p ./var/var/lib/haproxystats
 839 |     mkdir -p ./var/etc
 840 |     mkdir -p ./var/etc/haproxystats.d
 841 | 
 842 | Adjust the following configuration and save it in ./var/etc/haproxystats.conf::
 843 | 
 844 |     [DEFAULT]
 845 |     loglevel = debug
 846 |     retries  = 2
 847 |     timeout  = 1
 848 |     interval = 2
 849 | 
 850 |     [paths]
 851 |     base-dir = /home/<username>/repo/haproxystats/var/var/lib/haproxystats
 852 | 
 853 |     [pull]
 854 |     socket-dir    = /run/haproxy
 855 |     retries       = 1
 856 |     timeout       = 0.1
 857 |     interval      = 0.5
 858 |     pull-timeout  = 10
 859 |     pull-interval = 10
 860 |     dst-dir       = ${paths:base-dir}/incoming
 861 |     tmp-dst-dir   = ${paths:base-dir}/incoming.tmp
 862 |     workers       = 8
 863 | 
 864 |     [process]
 865 |     src-dir               = ${paths:base-dir}/incoming
 866 |     workers               = 2
 867 |     calculate-percentages = true
 868 |     per-process-metrics   = true
 869 | 
 870 |     [graphite]
 871 |     server          = 127.0.0.1
 872 |     port            = 39991
 873 |     retries         = 3
 874 |     interval        = 0.8
 875 |     timeout         = 0.9
 876 |     delay           = 10
 877 |     backoff         = 2
 878 |     namespace       = loadbalancers
 879 |     prefix_hostname = true
 880 |     fqdn            = true
 881 |     queue-size      = 1000
 882 | 
 883 |     [local-store]
 884 |     dir = ${paths:base-dir}/local-store
 885 | 
 886 | Start haproxystats-pull and haproxystats-process on 2 different terminals::
 887 | 
 888 |     haproxystats-pull -f var/etc/haproxystats.conf
 889 |     haproxystats-process -f var/etc/haproxystats.conf
 890 | 
 891 | Exit from *haproxystats-dev* virtual environment::
 892 | 
 893 |     deactivate
 894 | 
 895 | **Start hacking and don't forget to make a Pull Request**
 896 | 
 897 | Installation
 898 | ------------
 899 | 
 900 | Use pip::
 901 | 
 902 |     pip install haproxystats
 903 | 
 904 | From Source::
 905 | 
 906 |    sudo python setup.py install
 907 | 
 908 | Build (source) RPMs::
 909 | 
 910 |    python setup.py clean --all; python setup.py bdist_rpm
 911 | 
 912 | Build a source archive for manual installation::
 913 | 
 914 |    python setup.py sdist
 915 | 
 916 | Use Ansible Playbook:
 917 | 
 918 | To deploy haproxystats By Ansible Playbook go to contrib/ansible-playbook
 919 | directory::
 920 | 
 921 |    cd contrib/ansible-playbook
 922 | 
 923 | Then enter your haproxy server IP address in hosts file::
 924 | 
 925 |    vi hosts
 926 | 
 927 | After that set information of your environment in variable file::
 928 | 
 929 |    vi group_vars/all
 930 | 
 931 | Now for run Ansible Playbook use this command::
 932 | 
 933 |    ansible-playbook -i hosts main-playbook.yml
 934 | 
 935 | When Ansible Playbook run successful completely, you can take control haproxystats-pull and haproxystats-process by systemd::
 936 | 
 937 |    systemctl start haproxystats-pull.service
 938 | 
 939 |    systemctl start haproxystats-process.service
 940 | 
 941 | Use Docker::
 942 | 
 943 | To build haproxystats docker image, after clone the project you should run the beneath commands, all you need as a prerequisite is having docker on your machine::
 944 | 
 945 |    cd haproxystats/contrib/Docker
 946 |    
 947 |    docker build --tag [Your-name]/haproxystats  .
 948 |    
 949 | Launch a container:
 950 | 
 951 | To launch a haproxystats docker container you can use this instruction::
 952 | 
 953 |    docker run -d --restart always --name [container_name] --hostname=[container_hostname] -v [path_of_haproxystats_config_file]:/etc/haproxystats/haproxystats.conf -v [path_of_haproxy_socket_files]:[path_of_socket-dir]  [Your-name]/haproxystats
 954 | 
 955 | Notes:
 956 | 
 957 | - [container_name]: The name of the container which you can choose as you wish.
 958 | - [container_hostname]: The hostname of the container which you can choose as you wish.
 959 | - [path_of_haproxystats_config_file]: The path to your haproxystats configuration file.
 960 | - [path_of_haproxy_socket_files]: The path to your haproxy socket files.
 961 | - [path_of_socket-dir]: The path of the haproxy socket files inside of container that you have set in the haproxystats.conf file.
 962 | 
 963 | 
 964 | For example::
 965 | 
 966 |    docker run -d --restart always --name haproxystats --hostname=haproxystats -v /opt/haproxystats/haproxystats.conf:/etc/haproxystats/haproxystats.conf -v /var/lib/haproxy:/run/haproxy  hos7ein/haproxystats
 967 | 
 968 | 
 969 | How to make a release
 970 | ---------------------
 971 | 
 972 | #. Bump version in haproxystats/__init__.py
 973 | 
 974 | #. Commit above change with::
 975 | 
 976 |       git commit -av -m'RELEASE 0.1.3 version'
 977 | 
 978 | #. Create a signed tag, pbr will use this for the version number::
 979 | 
 980 |       git tag -s 0.1.3 -m 'bump release'
 981 | 
 982 | #. Create the source distribution archive (the archive will be placed in the
 983 |    **dist** directory)::
 984 | 
 985 |       python setup.py sdist
 986 | 
 987 | #. pbr updates ChangeLog file and we want to squeeze this change to the
 988 |    previous commit, thus run::
 989 | 
 990 |       git commit -av --amend
 991 | 
 992 | #. Move current tag to the last commit::
 993 | 
 994 |       git tag -fs 0.1.3 -m 'bump release'
 995 | 
 996 | #. Push changes::
 997 | 
 998 |       git push;git push --tags
 999 | 
1000 | #. Upload to Python Package Index::
1001 | 
1002 |       twine upload -s -p  dist/*
1003 | 
1004 | 
1005 | Contributors
1006 | ------------
1007 | 
1008 | The following people have contributed to project with feedback and code reviews
1009 | 
1010 | - Károly Nagy https://github.com/charlesnagy
1011 | 
1012 | - Dan Achim https://github.com/danakim
1013 | 
1014 | Licensing
1015 | ---------
1016 | 
1017 | Apache 2.0
1018 | 
1019 | Acknowledgement
1020 | ---------------
1021 | This program was originally developed for Booking.com.  With approval
1022 | from Booking.com, the code was generalised and published as Open Source
1023 | on github, for which the author would like to express his gratitude.
1024 | 
1025 | Contacts
1026 | --------
1027 | 
1028 | **Project website**: https://github.com/unixsurfer/haproxystats
1029 | 
1030 | **Author**: Pavlos Parissis <pavlos.parissis@gmail.com>
1031 | 
1032 | .. _HAProxy: http://www.haproxy.org/
1033 | .. _stats socket: http://cbonte.github.io/haproxy-dconv/1.6/management.html#9.2
1034 | .. _carbon-c-relay: https://github.com/grobian/carbon-c-relay
1035 | .. _Pandas: http://pandas.pydata.org/
1036 | .. _asyncio: https://docs.python.org/3/library/asyncio.html
1037 | .. _inotify: http://linux.die.net/man/7/inotify
1038 | .. _stat: http://cbonte.github.io/haproxy-dconv/1.6/management.html#show%20stat
1039 | .. _info: http://cbonte.github.io/haproxy-dconv/1.6/management.html#show%20info
1040 | .. _pool of threads: https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
1041 | .. _INI: https://en.wikipedia.org/wiki/INI_file
1042 | .. _carbon-c-relay: https://github.com/grobian/carbon-c-relay
1043 | .. _management: http://cbonte.github.io/haproxy-dconv/1.6/management.html#9.1
1044 | 


--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/unixsurfer/haproxystats/3ef4b3cacada9b6ed52dcc7726d8dad81a821ed1/TODO.md


--------------------------------------------------------------------------------
/contrib/Docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM alpine:3.12
 2 | 
 3 | # ---------------- #
 4 | #   Installation   #
 5 | # ---------------- #
 6 | 
 7 | # Install and setup all prerequisites
 8 | RUN apk add --no-cache gcc g++ python3 py3-pip python3-dev supervisor                                              &&\
 9 |     wget -c -O /requirements.txt https://raw.githubusercontent.com/unixsurfer/haproxystats/master/requirements.txt &&\
10 |     pip3 install --requirement /requirements.txt                                                                   &&\
11 |     pip3 install haproxystats                                                                                      &&\
12 |     mkdir -p  /etc/haproxystats  /var/lib/haproxy  /var/log/supervisor                                             &&\
13 |     rm -rf /var/cache/apk/*                                                                                        &&\
14 |     rm -rf /requirements.txt            
15 | 
16 |     
17 | COPY ./conf_files/supervisor/   /etc/supervisor.d/
18 | 
19 | 
20 | # -------- #
21 | #   Run!   #
22 | # -------- #
23 | 
24 | CMD ["/usr/bin/supervisord", "--nodaemon", "--configuration", "/etc/supervisord.conf"]


--------------------------------------------------------------------------------
/contrib/Docker/conf_files/haproxystats.conf:
--------------------------------------------------------------------------------
 1 | [DEFAULT]
 2 | loglevel = info
 3 | retries  = 2
 4 | timeout  = 1
 5 | interval = 2
 6 | 
 7 | [paths]
 8 | base-dir = /var/lib/haproxystats
 9 | 
10 | [pull]
11 | loglevel        = info
12 | socket-dir      = /var/lib/haproxy
13 | retries         = 1
14 | timeout         = 1
15 | interval        = 1
16 | pull-timeout    = 0.5
17 | pull-interval   = 10
18 | dst-dir         = ${paths:base-dir}/incoming
19 | tmp-dst-dir     = ${paths:base-dir}/incoming.tmp
20 | workers         = 8
21 | 
22 | [process]
23 | src-dir         = ${paths:base-dir}/incoming
24 | workers         = 4
25 | 
26 | [graphite]
27 | server          = 127.0.0.1
28 | port            = 3002
29 | retries         = 2
30 | interval        = 0.8
31 | delay           = 10
32 | backoff         = 2
33 | namespace       = loadbalancers
34 | prefix_hostname = true
35 | fqdn            = true
36 | queue-size      = 1000000
37 | 
38 | #[local-store]
39 | #dir = ${paths:base-dir}/local-store
40 | 


--------------------------------------------------------------------------------
/contrib/Docker/conf_files/supervisor/haproxystats-process.ini:
--------------------------------------------------------------------------------
1 | [program:haproxystats-process]
2 | command = /usr/bin/haproxystats-process -f /etc/haproxystats/haproxystats.conf
3 | stdout_logfile = /var/log/supervisor/%(program_name)s.log
4 | stderr_logfile = /var/log/supervisor/%(program_name)s.log
5 | autorestart = true


--------------------------------------------------------------------------------
/contrib/Docker/conf_files/supervisor/haproxystats-pull.ini:
--------------------------------------------------------------------------------
1 | [program:haproxystats-pull]
2 | command = /usr/bin/haproxystats-pull -f /etc/haproxystats/haproxystats.conf
3 | stdout_logfile = /var/log/supervisor/%(program_name)s.log
4 | stderr_logfile = /var/log/supervisor/%(program_name)s.log
5 | autorestart = true
6 | 


--------------------------------------------------------------------------------
/contrib/ansible-playbook/group_vars/all:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Variables listed here are applicable to all host groups
 3 | 
 4 | 
 5 | haproxy_socket_dir: /var/lib/haproxy
 6 | graphite_server: 10.10.22.77
 7 | graphite_port: 2003
 8 | pull_workers: 4
 9 | process_workers: 2
10 | 


--------------------------------------------------------------------------------
/contrib/ansible-playbook/hosts:
--------------------------------------------------------------------------------
1 | [haproxy-servers]
2 | 10.10.22.70
3 | 


--------------------------------------------------------------------------------
/contrib/ansible-playbook/main-playbook.yml:
--------------------------------------------------------------------------------
 1 | # Installation haproxystats
 2 | - hosts: haproxy-servers
 3 |   remote_user: root
 4 |   gather_facts: False
 5 |   roles:
 6 |   - install-dependency-package
 7 |   - install-haproxystats
 8 |   - create-newuser
 9 |   - haproxy-socket-dir-permission
10 |   - systemd-files
11 |   - haproxystats-config-file
12 | 


--------------------------------------------------------------------------------
/contrib/ansible-playbook/roles/create-newuser/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Create haproxystats group
 3 |   group:
 4 |      name: haproxystats
 5 |      state: present
 6 | 
 7 | - name : Create new user for haproxystats service and add to haproxystats and haproxy group
 8 |   user:
 9 |     name: haproxystats
10 |     group: haproxystats
11 |     groups: haproxy
12 |     append: yes
13 | 


--------------------------------------------------------------------------------
/contrib/ansible-playbook/roles/haproxy-socket-dir-permission/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: Set permission for haproxy socket directory
3 |   file:
4 |     path: "{{ haproxy_socket_dir }}"
5 |     state: directory
6 |     mode: 0770
7 | 


--------------------------------------------------------------------------------
/contrib/ansible-playbook/roles/haproxystats-config-file/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: Make haproxystats config file
3 |   template:
4 |     src: haproxystats.conf.j2
5 |     dest: /etc/haproxystats.conf
6 |     backup: yes
7 | 


--------------------------------------------------------------------------------
/contrib/ansible-playbook/roles/haproxystats-config-file/templates/haproxystats.conf.j2:
--------------------------------------------------------------------------------
 1 | [DEFAULT]
 2 | loglevel = info
 3 | retries  = 2
 4 | timeout  = 1
 5 | interval = 2
 6 | 
 7 | [paths]
 8 | base-dir = /home/haproxystats
 9 | 
10 | [pull]
11 | loglevel        = info
12 | socket-dir      = {{ haproxy_socket_dir }}
13 | retries         = 1
14 | timeout         = 0.1
15 | interval        = 0.5
16 | pull-timeout    = 2
17 | pull-interval   = 10
18 | dst-dir         = ${paths:base-dir}/incoming
19 | tmp-dst-dir     = ${paths:base-dir}/incoming.tmp
20 | workers         = {{ pull_workers }}
21 | queue-size      = 360
22 | 
23 | [process]
24 | src-dir             = ${paths:base-dir}/incoming
25 | workers             = {{ process_workers }}
26 | per-process-metrics = false
27 | 
28 | [graphite]
29 | server          = {{ graphite_server }}
30 | port            = {{ graphite_port }}
31 | retries         = 3
32 | interval        = 1.8
33 | connect-timeout = 1.0
34 | write-timeout   = 1.0
35 | delay           = 10
36 | backoff         = 2
37 | namespace       = loadbalancers
38 | prefix-hostname = true
39 | fqdn            = true
40 | queue-size      = 1000000
41 | 
42 | #[local-store]
43 | #dir = ${paths:base-dir}/local-store
44 | 


--------------------------------------------------------------------------------
/contrib/ansible-playbook/roles/install-dependency-package/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Check out distribution and install python version 2 on Ubuntu hosts
 3 |   raw: DISTRO=$(awk '/^ID=/' /etc/*-release | awk -F'=' '{ print tolower($2) }') && if [ "$DISTRO" == "ubuntu" ] ; then apt-get -y update && apt-get install -y python-minimal; fi
 4 | 
 5 | - name: enable gather_facts
 6 |   setup:
 7 | 
 8 | #task on CentOS or RHEL distribution
 9 | - name: Install or update EPEL repo
10 |   yum: name=epel-release state=latest
11 |   when: ansible_distribution == 'CentOS' or ansible_distribution == 'Red Hat Enterprise Linux'
12 | 
13 | - name: Install python 3 and pip3 packages
14 |   yum:
15 |     name: "{{ packages }}"
16 |     state: latest
17 |   vars:
18 |     packages:
19 |     - python3-pip
20 |     - gcc
21 |     - gcc-c++
22 |     - python3-devel
23 |   when: ansible_distribution == 'CentOS' or ansible_distribution == 'Red Hat Enterprise Linux'
24 | 
25 | #task on Debian or Ubuntu distribution
26 | - name: Install python 3 and pip3 packages
27 |   apt:
28 |     name: "{{ packages }}"
29 |     state: latest
30 |   vars:
31 |     packages:
32 |     - python3
33 |     - python3-pip
34 |     - python3-setuptools
35 |     - python-setuptools
36 |   when: ansible_distribution == 'Debian' or ansible_distribution == 'Ubuntu'
37 | 


--------------------------------------------------------------------------------
/contrib/ansible-playbook/roles/install-haproxystats/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Update setuptools by pip3
 3 |   pip:
 4 |     name:
 5 |       - setuptools
 6 |     state: latest
 7 |     executable: pip3
 8 | 
 9 | 
10 | - name: Install haproxystats by pip3
11 |   pip:
12 |     name:
13 |       - pbr
14 |       - pandas
15 |       - haproxystats
16 |     state: latest
17 |     executable: pip3
18 |   when: ansible_distribution == 'CentOS' or ansible_distribution == 'Red Hat Enterprise Linux'
19 | 
20 | 
21 | - name: Install haproxystats by pip3
22 |   pip:
23 |     name:
24 |       - haproxystats
25 |     state: latest
26 |     executable: pip3
27 |   when: ansible_distribution == 'Debian' or ansible_distribution == 'Ubuntu'
28 | 


--------------------------------------------------------------------------------
/contrib/ansible-playbook/roles/systemd-files/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Make haproxystats-pull systemd file
 3 |   template:
 4 |     src: haproxystats-pull.service.j2
 5 |     dest: /etc/systemd/system/haproxystats-pull.service
 6 |     backup: yes
 7 | 
 8 | - name: Make haproxystats-process systemd file
 9 |   template:
10 |     src: haproxystats-process.service.j2
11 |     dest: /etc/systemd/system/haproxystats-process.service
12 |     backup: yes
13 | 


--------------------------------------------------------------------------------
/contrib/ansible-playbook/roles/systemd-files/templates/haproxystats-process.service.j2:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Processes statistics from HAProxy and pushes them to Graphite
 3 | After=network.target syslog.target
 4 | Wants=network.target syslog.target
 5 | Documentation=https://github.com/unixsurfer/haproxystats
 6 | 
 7 | [Service]
 8 | Type=simple
 9 | KillMode=process
10 | Environment="CONFIG=/etc/haproxystats.conf"
11 | User=haproxystats
12 | Group=haproxystats
13 | ExecStart=/usr/local/bin/haproxystats-process -f $CONFIG
14 | TimeoutStartSec=3
15 | TimeoutStopSec=60
16 | Restart=on-failure
17 | 
18 | [Install]
19 | WantedBy=multi-user.target
20 | 


--------------------------------------------------------------------------------
/contrib/ansible-playbook/roles/systemd-files/templates/haproxystats-pull.service.j2:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Pulls statistics from HAProxy daemon over UNIX sockets
 3 | After=network.target syslog.target
 4 | Wants=network.target syslog.target
 5 | Documentation=https://github.com/unixsurfer/haproxystats
 6 | 
 7 | [Service]
 8 | Type=simple
 9 | Environment="CONFIG=/etc/haproxystats.conf"
10 | User=haproxystats
11 | Group=haproxystats
12 | ExecStart=/usr/local/bin/haproxystats-pull -f $CONFIG
13 | TimeoutStartSec=3
14 | TimeoutStopSec=6
15 | Restart=on-failure
16 | 
17 | [Install]
18 | WantedBy=multi-user.target
19 | 


--------------------------------------------------------------------------------
/contrib/nagios/check_haproxystats_process.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | osversion=$(/usr/local/bin/facter operatingsystemmajrelease)
 3 | if [ "${osversion}" -lt 7 ]; then
 4 |     echo "OK: haproxystats-process doesn't run here as it only runs on CentOS version 7 and higher"
 5 |     exit 0
 6 | fi
 7 | message=$(systemctl is-active haproxystats-process.service)
 8 | if [[ $? -ne 0 ]]; then
 9 |     echo "CRITICAL:" "${message}"
10 |     exit 2
11 | else
12 |     echo "OK:" "${message}"
13 |     exit 0
14 | fi
15 | 


--------------------------------------------------------------------------------
/contrib/nagios/check_haproxystats_process_number_of_procs.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | VERBOSE_ARG=
 3 | 
 4 | while getopts ":v" opt; do
 5 |   case $opt in
 6 |     v)
 7 |        VERBOSE_ARG=" -vv"
 8 |        ;;
 9 |   esac
10 | done
11 | if [[ -x /opt/blue-python/3.4/bin/haproxystats-process && -r /etc/haproxystats.conf ]]; then
12 |     WORKERS=$(/opt/blue-python/3.4/bin/haproxystats-process -f /etc/haproxystats.conf -P|grep workers |awk '{print $3}')
13 |     if [ $? -ne 0 ]; then
14 |         echo "OK: haproxystats-process doesn't run here"
15 |         exit 0
16 |     fi
17 |     PROCESSES=$(($WORKERS+1))
18 |     msg=$(/usr/lib64/nagios/plugins/check_procs\
19 |         -c "${PROCESSES}":"${PROCESSES}"\
20 |         --ereg-argument-array='^/usr/local/bin/blue-python3.4 /opt/blue-python/3.4/bin/haproxystats-process -f /etc/haproxystats.conf$'\
21 |         -u haproxystats\
22 |         $VERBOSE_ARG)
23 |     EXITCODE=$?
24 |     if [[ ${EXITCODE} -ne 0 ]]; then
25 |         echo "${msg}" "Number of processes must be ${PROCESSES} OPDOC: TBD"
26 |     else
27 |         echo "${msg}"
28 |     fi
29 |     exit ${EXITCODE}
30 | else
31 |     echo "OK: haproxystats-process isn't installed here"
32 |     exit 0
33 | fi
34 | 


--------------------------------------------------------------------------------
/contrib/nagios/check_haproxystats_pull.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | osversion=$(/usr/local/bin/facter operatingsystemmajrelease)
 3 | if [ "${osversion}" -lt 7 ]; then
 4 |     echo "OK: haproxystats-pull doesn't run here as it only runs on CentOS version 7 and higher"
 5 |     exit 0
 6 | fi
 7 | message=$(systemctl is-active haproxystats-pull.service)
 8 | if [[ $? -ne 0 ]]; then
 9 |     echo "CRITICAL:" "${message}"
10 |     exit 2
11 | else
12 |     echo "OK:" "${message}"
13 |     exit 0
14 | fi
15 | 


--------------------------------------------------------------------------------
/contrib/nagios/check_haproxystats_queue_size.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # vim:fenc=utf-8
 4 | """Checks the size of the queue which is consumed by haproxystats-process
 5 | 
 6 | Usage:
 7 |     check_haproxystats_queue_size [-f <file> -o <ok> -w <warning> ]
 8 | 
 9 | Options:
10 |     -f, --file <file>        configuration file with settings
11 |                              [default: /etc/haproxystats.conf]
12 |     -o, --ok <ok>            OK threshold [default: 60]
13 |     -w, --warning <warning>  WARNING threshold [default: 120]
14 |     -h, --help               show this screen
15 | """
16 | import os
17 | import sys
18 | from configparser import (ConfigParser, ExtendedInterpolation, NoSectionError,
19 |                           NoOptionError)
20 | from docopt import docopt
21 | 
22 | 
23 | def main():
24 |     """
25 |     main code
26 |     """
27 |     args = docopt(__doc__)
28 |     config = ConfigParser(interpolation=ExtendedInterpolation())
29 |     config.read(args['--file'])
30 |     try:
31 |         base_dir = config.get('pull', 'dst-dir')
32 |     except (NoSectionError, NoOptionError) as exc:
33 |         print('OK: missing configuration as I got: {e}'.format(e=exc))
34 |         sys.exit(0)
35 |     std_msg = (": Queue location={b}, Thresholds OK={ok} WARNING={w} and any "
36 |                "higher value is critical").format(b=base_dir,
37 |                                                   ok=args['--ok'],
38 |                                                   w=args['--warning'])
39 |     try:
40 |         dirs = [os.path.join(base_dir, x) for x in os.listdir(base_dir) if
41 |                 os.path.isdir(os.path.join(base_dir, x))]
42 |     except (PermissionError, FileNotFoundError, OSError) as exc:
43 |         print("UNKNOWN: can't check {d} due to {e}".format(d=base_dir,
44 |                                                            e=exc))
45 |         sys.exit(3)
46 |     queue_size = len(dirs)
47 |     if queue_size <= int(args['--ok']):
48 |         print('OK: queue size {q}{s}'.format(q=queue_size, s=std_msg))
49 |         sys.exit(0)
50 |     elif int(args['--ok']) < queue_size <= int(args['--warning']):
51 |         print('WARNING: queue size {q}{s}'.format(q=queue_size, s=std_msg))
52 |         sys.exit(1)
53 |     else:
54 |         print('CRITICAL: queue size {q}{s}'.format(q=queue_size, s=std_msg))
55 |         sys.exit(2)
56 | 
57 | # This is the standard boilerplate that calls the main() function.
58 | if __name__ == '__main__':
59 |     main()
60 | 


--------------------------------------------------------------------------------
/contrib/puppet/manifests/init.pp:
--------------------------------------------------------------------------------
  1 | # == Class: haproxystats
  2 | #
  3 | # A class to configure HAProxy statistics collection tool haproxystats.
  4 | # See more information about haproxystats here
  5 | # https://github.com/unixsurfer/haproxystats
  6 | #
  7 | # === Parameters
  8 | #
  9 | # Document parameters here.
 10 | #
 11 | # [*sample_parameter*]
 12 | #
 13 | # === Examples
 14 | #
 15 | #  class { 'haproxystats':
 16 | #  }
 17 | #
 18 | # === Actions
 19 | #
 20 | # - Create user and group haproxystats
 21 | #
 22 | # === Requires
 23 | #
 24 | # - 'haproxystats' user and group defined in profile_base::user
 25 | # - syslog::activate{ 'haproxystats':}
 26 | # - daemon-reload exec resource
 27 | #   exec {
 28 | #     'systemd-daemon-reload':
 29 | #       refreshonly => true,
 30 | #       command     => '/bin/systemctl daemon-reload',
 31 | #       logoutput   => true;
 32 | #   }
 33 | # === Authors
 34 | #
 35 | # Author Name <pavlos.parissis@gmail.com>
 36 | #
 37 | # === Copyright
 38 | #
 39 | # Copyright 2016 Pavlos Parissis
 40 | #
 41 | class haproxystats (
 42 |   $package_name                 = $::haproxystats::params::package_name,
 43 |   $version                      = $::haproxystats::params::version,
 44 |   $enable                       = $::haproxystats::params::enable,
 45 |   $autostart                    = $::haproxystats::params::autostart,
 46 |   $enable_monit                 = $::haproxystats::params::enable_monit,
 47 |   $user                         = $::haproxystats::params::user,
 48 |   $group                        = $::haproxystats::params::group,
 49 |   $groups                       = $::haproxystats::params::groups,
 50 |   $log_rotate                   = $::haproxystats::params::log_rotate,
 51 |   $log_rotate_freq              = $::haproxystats::params::log_rotate_freq,
 52 |   $default_loglevel             = $::haproxystats::params::default_loglevel,
 53 |   $default_retries              = $::haproxystats::params::default_retries,
 54 |   $default_timeout              = $::haproxystats::params::default_timeout,
 55 |   $default_interval             = $::haproxystats::params::default_interval,
 56 |   $paths_base_dir               = $::haproxystats::params::paths_base_dir,
 57 |   $pull_loglevel                = $::haproxystats::params::pull_loglevel,
 58 |   $pull_retries                 = $::haproxystats::params::pull_retries,
 59 |   $pull_timeout                 = $::haproxystats::params::pull_timeout,
 60 |   $pull_interval                = $::haproxystats::params::pull_interval,
 61 |   $pull_socket_dir              = $::haproxystats::params::pull_socket_dir,
 62 |   $pull_pull_timeout            = $::haproxystats::params::pull_pull_timeout,
 63 |   $pull_pull_interval           = $::haproxystats::params::pull_pull_interval,
 64 |   $pull_dst_dir                 = $::haproxystats::params::pull_dst_dir,
 65 |   $pull_tmp_dst_dir             = $::haproxystats::params::pull_tmp_dst_dir,
 66 |   $pull_workers                 = $::haproxystats::params::pull_workers,
 67 |   $pull_queue_size              = $::haproxystats::params::pull_queue_size,
 68 |   $pull_CPUAffinity             = $::haproxystats::params::pull_CPUAffinity,
 69 |   $process_workers              = $::haproxystats::params::process_workers,
 70 |   $process_src_dir              = $::haproxystats::params::process_src_dir,
 71 |   $process_loglevel             = $::haproxystats::params::process_loglevel,
 72 |   $process_CPUAffinity          = $::haproxystats::params::process_CPUAffinity,
 73 |   $process_aggr_server_metrics  = $::haproxystats::params::process_aggr_server_metrics,
 74 |   $process_per_process_metrics  = $::haproxystats::params::process_per_process_metrics,
 75 |   $process_exclude_frontends    = $::haproxystats::params::process_exclude_frontends,
 76 |   $process_exclude_backends     = $::haproxystats::params::process_exclude_backends,
 77 |   $process_compute_percentages  = $::haproxystats::params::process_compute_percentages,
 78 |   $graphite_server              = $::haproxystats::params::graphite_server,
 79 |   $graphite_port                = $::haproxystats::params::graphite_port,
 80 |   $graphite_retries             = $::haproxystats::params::graphite_retries,
 81 |   $graphite_interval            = $::haproxystats::params::graphite_interval,
 82 |   $graphite_connect_timeout     = $::haproxystats::params::graphite_connect_timeout,
 83 |   $graphite_write_timeout       = $::haproxystats::params::graphite_write_timeout,
 84 |   $graphite_delay               = $::haproxystats::params::graphite_delay,
 85 |   $graphite_backoff             = $::haproxystats::params::graphite_backoff,
 86 |   $graphite_queue_size          = $::haproxystats::params::graphite_queue_size,
 87 |   $graphite_namespace           = $::haproxystats::params::graphite_namespace,
 88 |   $graphite_prefix_hostname     = $::haproxystats::params::graphite_prefix_hostname,
 89 |   $graphite_fqdn                = $::haproxystats::params::graphite_fqdn,
 90 |   $local_store_enabled          = $::haproxystats::params::local_store_enabled,
 91 |   $local_store_dir              = $::haproxystats::params::local_store_dir,
 92 | ) inherits haproxystats::params {
 93 | 
 94 |   validate_re($default_loglevel, [
 95 |                                    '^debug$',
 96 |                                    '^info$',
 97 |                                    '^warning$',
 98 |                                    '^error$',
 99 |                                    '^critical$',
100 |                                  ]
101 |           )
102 |   if ! is_numeric($default_timeout) {
103 |     fail("default_timeout must be a number")
104 |   }
105 |   if ! is_numeric($default_retries) {
106 |     fail("default_retries must be a number")
107 |   }
108 |   if ! is_numeric($default_interval) {
109 |     fail("default_interval must be a number")
110 |   }
111 |   validate_re($pull_loglevel, [
112 |                                 '^debug$',
113 |                                 '^info$',
114 |                                 '^warning$',
115 |                                 '^error$',
116 |                                 '^critical$',
117 |                               ]
118 |           )
119 |   if ! is_numeric($pull_timeout) {
120 |     fail("pull_timeout must be a number")
121 |   }
122 |   if ! is_numeric($pull_retries) {
123 |     fail("pull_retries must be a number")
124 |   }
125 |   if ! is_numeric($pull_interval) {
126 |     fail("pull_interval must be a number")
127 |   }
128 |   if ! is_numeric($pull_pull_interval) {
129 |     fail("pull_pull_interval must be a number")
130 |   }
131 |   if ! is_numeric($pull_pull_timeout) {
132 |     fail("pull_pull_timeout must be a number")
133 |   }
134 |   if ! is_numeric($pull_workers) {
135 |     fail("pull_workers must be a number")
136 |   }
137 |   if ! is_numeric($pull_queue_size) {
138 |     fail("pull_queue_size must be a number")
139 |   }
140 |   validate_re($process_loglevel, [
141 |                                    '^debug$',
142 |                                    '^info$',
143 |                                    '^warning$',
144 |                                    '^error$',
145 |                                    '^critical$',
146 |                                  ]
147 |           )
148 |   if ! is_numeric($process_workers) {
149 |     fail("process_workers must be a number")
150 |   }
151 |   validate_bool($process_aggr_server_metrics)
152 |   validate_bool($process_compute_percentages)
153 |   validate_array($process_exclude_backends)
154 |   validate_array($process_exclude_frontends)
155 |   if ! is_numeric($graphite_port) {
156 |     fail("graphite_port must be a number")
157 |   }
158 |   if ! is_numeric($graphite_retries) {
159 |     fail("graphite_retries must be a number")
160 |   }
161 |   if ! is_numeric($graphite_interval) {
162 |     fail("graphite_interval must be a number")
163 |   }
164 |   if ! is_numeric($graphite_connect_timeout) {
165 |     fail("graphite_connect_timeout must be a number")
166 |   }
167 |   if ! is_numeric($graphite_write_timeout) {
168 |     fail("graphite_write_timeout must be a number")
169 |   }
170 |   if ! is_numeric($graphite_delay) {
171 |     fail("graphite_delay must be a number")
172 |   }
173 |   if ! is_numeric($graphite_backoff) {
174 |     fail("graphite_backoff must be a number")
175 |   }
176 |   if ! is_numeric($graphite_queue_size) {
177 |     fail("graphite_queue_size must be a number")
178 |   }
179 |   validate_bool($graphite_prefix_hostname)
180 |   validate_bool($graphite_fqdn)
181 |   validate_bool($local_store_enabled)
182 | 
183 |   $dotdir = '/etc/haproxystats.d'
184 |   $exclude_frontends_filename = "${dotdir}/exclude_frontend.conf"
185 |   $exclude_backends_filename  = "${dotdir}/exclude_backend.conf"
186 |   realize ( Group[$user] )
187 |   User  <| title == "${user}" |> {
188 |     groups  => $groups,
189 |   }
190 | 
191 |   package {
192 |     $package_name:
193 |       ensure => $version,
194 |   }
195 | 
196 |   file {
197 |     $paths_base_dir:
198 |       ensure  => directory,
199 |       owner   => $user,
200 |       group   => $group,
201 |       require => [
202 |         User[$user],
203 |         Group[$group]
204 |       ],
205 |       mode    => '0755';
206 |     ['/etc/systemd/system/haproxystats-process.service.d',
207 |      '/etc/systemd/system/haproxystats-pull.service.d']:
208 |       ensure   => directory,
209 |       owner    => root,
210 |       group    => root,
211 |       mode     => '0755',
212 |       purge    => true,
213 |       recurse  => true;
214 |     '/etc/systemd/system/haproxystats-pull.service.d/overwrites.conf':
215 |       ensure   => file,
216 |       owner    => root,
217 |       group    => root,
218 |       mode     => '0444',
219 |       content  => template('haproxystats/pull-systemd-overwrites.conf.erb'),
220 |       notify  => [
221 |         Exec['systemd-daemon-reload'],
222 |         Service['haproxystats-pull'],
223 |       ];
224 |     '/etc/systemd/system/haproxystats-process.service.d/overwrites.conf':
225 |       ensure   => file,
226 |       owner    => root,
227 |       group    => root,
228 |       mode     => '0444',
229 |       content  => template('haproxystats/process-systemd-overwrites.conf.erb'),
230 |       notify  => [
231 |         Exec['systemd-daemon-reload'],
232 |         Service['haproxystats-process'],
233 |       ];
234 |     '/usr/local/bin/haproxystats-process-monit-check.sh':
235 |       ensure => file,
236 |       owner  => root,
237 |       group  => root,
238 |       mode   => '0755',
239 |       content => template('haproxystats/haproxystats-process-monit-check.sh.erb');
240 |     $dotdir:
241 |       ensure  => directory,
242 |       owner   => root,
243 |       group   => root,
244 |       mode    => '0755';
245 |     $exclude_frontends_filename:
246 |       ensure   => size($process_exclude_frontends) ? {
247 |         0       => absent,
248 |         default => file,
249 |       },
250 |       owner    => root,
251 |       group    => root,
252 |       mode     => '0444',
253 |       content  => template('haproxystats/exclude_frontend.conf.erb'),
254 |       notify  => [
255 |         Service['haproxystats-process'],
256 |       ];
257 |     $exclude_backends_filename:
258 |       ensure   => size($process_exclude_backends) ? {
259 |         0       => absent,
260 |         default => file,
261 |       },
262 |       owner    => root,
263 |       group    => root,
264 |       mode     => '0444',
265 |       content  => template('haproxystats/exclude_backend.conf.erb'),
266 |       notify  => [
267 |         Service['haproxystats-process'],
268 |       ];
269 |   }
270 |   concat {
271 |     '/etc/haproxystats.conf':
272 |       mode    => 0444,
273 |       owner   => $user,
274 |       group   => $group,
275 |       require => [Package[$package_name]],
276 |       notify  => [
277 |         Service['haproxystats-pull'],
278 |         Service['haproxystats-process'],
279 |       ];
280 |   }
281 |   concat::fragment {
282 |     'defaults':
283 |       target  => '/etc/haproxystats.conf',
284 |       order   => '00',
285 |       content => template('haproxystats/defaults.conf.erb');
286 |     'pull':
287 |       target  => '/etc/haproxystats.conf',
288 |       order   => '01',
289 |       content => template('haproxystats/pull.conf.erb'),
290 |       notify  => Service['haproxystats-pull'];
291 |     'process':
292 |       target  => '/etc/haproxystats.conf',
293 |       order   => '02',
294 |       content => template('haproxystats/process.conf.erb'),
295 |       notify  => Service['haproxystats-process'];
296 |   }
297 |   service {
298 |     'haproxystats-pull':
299 |       ensure  => $enable,
300 |       enable  => $autostart,
301 |       require => [
302 |         Package[$package_name],
303 |         Concat['/etc/haproxystats.conf'],
304 |       ];
305 |     'haproxystats-process':
306 |       ensure  => $enable,
307 |       enable  => $autostart,
308 |       require => [
309 |         Package[$package_name],
310 |         Concat['/etc/haproxystats.conf'],
311 |       ];
312 |   }
313 |   syslog::activate{ 'haproxystats':
314 |     rotate      => $log_rotate,
315 |     rotate_freq => $log_rotate_freq;
316 |   }
317 | 
318 |   $real_enable_monit = $enable ? {
319 |     false     => false,
320 |     'stopped' => false,
321 |     default   => $enabled_monit,
322 |   }
323 |   monit::program {
324 |     'haproxystats-process':
325 |       enabled      => $real_enable_monit,
326 |       scriptname   => '/usr/local/bin/haproxystats-process-monit-check.sh' ,
327 |       email        => 'foo@bar.com',
328 |       tolerance    => 2,
329 |       priority     => 'priority_1',
330 |       nrestarts    => 2,
331 |       stop_timeout => 380,
332 |       require      => File['/usr/local/bin/haproxystats-process-monit-check.sh'];
333 |   }
334 | }
335 | 


--------------------------------------------------------------------------------
/contrib/puppet/manifests/params.pp:
--------------------------------------------------------------------------------
 1 | # - PRIVATE CLASS -
 2 | class haproxystats::params {
 3 |   $package_name                  = 'blue-python34-haproxystats'
 4 |   $version                       = 'latest'
 5 |   $enable                        = true
 6 |   $autostart                     = true
 7 |   $enable_monit                  = true
 8 |   $user                          = 'haproxystats'
 9 |   $group                         = 'haproxystats'
10 |   $groups                        = 'hapee'
11 |   $log_rotate                    = 4
12 |   $log_rotate_freq               = 'daily'
13 |   $default_loglevel              = 'info'
14 |   $default_retries               = 2
15 |   $default_timeout               = 1
16 |   $default_interval              = 2
17 |   $paths_base_dir                = '/var/lib/haproxystats'
18 |   $pull_loglevel                 = $default_loglevel
19 |   $pull_retries                  = 2
20 |   $pull_timeout                  = 1
21 |   $pull_interval                 = 2
22 |   $pull_socket_dir               = '/run/hapee'
23 |   $pull_pull_timeout             = 8
24 |   $pull_pull_interval            = 10
25 |   $pull_dst_dir                  = '${paths:base-dir}/incoming'
26 |   $pull_tmp_dst_dir              = '${paths:base-dir}/incoming.tmp'
27 |   $pull_workers                  = 8
28 |   $pull_queue_size               = 360
29 |   $pull_CPUAffinity              = undef
30 |   $process_workers               = 2
31 |   $process_loglevel              = $default_loglevel
32 |   $process_CPUAffinity           = undef
33 |   $process_aggr_server_metrics   = false
34 |   $process_per_process_metrics   = false
35 |   $process_src_dir               = '${paths:base-dir}/incoming'
36 |   $process_exclude_frontends     = []
37 |   $process_exclude_backends      = []
38 |   $process_calculate_percentages = false
39 |   $graphite_server               = hiera('graphite::host')
40 |   $graphite_port                 = hiera('graphite::port')
41 |   $graphite_retries              = 2
42 |   $graphite_interval             = 1
43 |   $graphite_connect_timeout      = 2
44 |   $graphite_write_timeout        = 4
45 |   $graphite_delay                = 10
46 |   $graphite_backoff              = 2
47 |   $graphite_queue_size           = 10000
48 |   $graphite_namespace            = 'loadbalancers'
49 |   $graphite_prefix_hostname      = true
50 |   $graphite_fqdn                 = true
51 |   $local_store_enabled           = false
52 |   $local_store_dir               = '${paths:base-dir}/local-store'
53 | }
54 | 
55 | 


--------------------------------------------------------------------------------
/contrib/puppet/templates/defaults.conf.erb:
--------------------------------------------------------------------------------
 1 | #############################################################################
 2 | #
 3 | #       This file is managed by Puppet
 4 | #       any changes made locally will be lost.
 5 | #
 6 | # The master version of this file is created with concat module using
 7 | # the following fragments:
 8 | # puppet:///modules/haproxystats/templates/{defaults,pull,process}.conf.erb
 9 | #
10 | #############################################################################
11 | [DEFAULT]
12 | loglevel = <%= @default_loglevel%>
13 | retries  = <%= @default_retries%>
14 | timeout  = <%= @default_timeout%>
15 | interval = <%= @default_interval%>
16 | 
17 | [paths]
18 | base-dir = <%= @paths_base_dir%>
19 | 
20 | 


--------------------------------------------------------------------------------
/contrib/puppet/templates/exclude_backend.conf.erb:
--------------------------------------------------------------------------------
 1 | ####################################################################
 2 | #                                                                  #
 3 | #       This file is managed by Puppet                             #
 4 | #       any changes made locally will be lost.                     #
 5 | #                                                                  #
 6 | # puppet:///modules/haproxystats/templates/exclude_backend.conf.erb#
 7 | #                                                                  #
 8 | ####################################################################
 9 | <%- @process_exclude_backends.each do |val| -%>
10 | <%= val.to_s %>
11 | <%- end -%>
12 | 


--------------------------------------------------------------------------------
/contrib/puppet/templates/exclude_frontend.conf.erb:
--------------------------------------------------------------------------------
 1 | ######################################################################
 2 | #                                                                    #
 3 | #       This file is managed by Puppet                               #
 4 | #       any changes made locally will be lost.                       #
 5 | #                                                                    #
 6 | # puppet:///modules/haproxystats/templates/exclude_frontend.conf.erb #
 7 | #                                                                    #
 8 | ######################################################################
 9 | <%- @process_exclude_frontends.each do |val| -%>
10 | <%= val.to_s %>
11 | <%- end -%>
12 | 


--------------------------------------------------------------------------------
/contrib/puppet/templates/haproxystats-process-monit-check.sh.erb:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | ####################################################################################
 3 | #       This file is managed by Puppet                                             #
 4 | #       any changes made locally will be lost.                                     #
 5 | # puppet:///modules/haproxystats/templates/haproxystats-process-monit-chech.sh.erb #
 6 | #                                                                                  #
 7 | ####################################################################################
 8 | <%-
 9 |     _processes = @process_workers.to_i + 1
10 | -%>
11 | if [[ -x /opt/blue-python/3.4/bin/haproxystats-process && -r /etc/haproxystats.conf ]]; then
12 |     msg=$(/usr/lib64/nagios/plugins/check_procs\
13 |         -c <%= _processes-%>:<%= _processes-%>\
14 |         --ereg-argument-array='^/usr/local/bin/blue-python3.4 /opt/blue-python/3.4/bin/haproxystats-process -f /etc/haproxystats.conf$'\
15 |         -u <%= @user-%>
16 |         -vv)
17 |     EXITCODE=$?
18 |     if [[ ${EXITCODE} -ne 0 ]]; then
19 |         echo "${msg}" "Number of processes must be <%= _processes-%> OPDOC: TBD"
20 |     else
21 |         echo "${msg}"
22 |     fi
23 |     exit ${EXITCODE}
24 | else
25 |     echo "OK: haproxystats-process isn't installed here"
26 |     exit 0
27 | fi
28 | 


--------------------------------------------------------------------------------
/contrib/puppet/templates/process-systemd-overwrites.conf.erb:
--------------------------------------------------------------------------------
 1 | #############################################################################
 2 | #
 3 | #       This file is managed by Puppet
 4 | #       any changes made locally will be lost.
 5 | #
 6 | # The master version of this file is at
 7 | # puppet:///modules/haproxystats/templates/process-systemd-overwrites.conf.erb
 8 | #
 9 | #############################################################################
10 | <%-
11 |     _cpus = scope.lookupvar('::processorcount').to_i
12 |     if @process_CPUAffinity
13 |         _value = @process_CPUAffinity
14 |     else
15 |         if @process_workers.to_i >= _cpus
16 |             _workers = _cpus
17 |         else
18 |             _workers = @process_workers.to_i
19 |         end
20 |         cpu_list = Array (0.._cpus-1)
21 |         _value = cpu_list[-_workers..-1].join(' ')
22 |     end
23 | -%>
24 | [Service]
25 | CPUAffinity = <%=_value%>
26 | 


--------------------------------------------------------------------------------
/contrib/puppet/templates/process.conf.erb:
--------------------------------------------------------------------------------
 1 | <%-
 2 |     _cpus = scope.lookupvar('::processorcount').to_i
 3 |     if @process_workers.to_i >= _cpus
 4 |         _workers = _cpus
 5 |     else
 6 |         _workers = @process_workers.to_i
 7 |     end
 8 | -%>
 9 | [process]
10 | loglevel    = <%= @process_loglevel %>
11 | workers     = <%= _workers %>
12 | src-dir     = <%= @process_src_dir %>
13 | aggr-server-metrics = <%= @process_aggr_server_metrics %>
14 | per-process-metrics = <%= @process_per_process_metrics %>
15 | <%- if @process_exclude_frontends.size > 0 -%>
16 | exclude-frontends   = <%= @exclude_frontends_filename %>
17 | <%- end -%>
18 | <%- if @process_exclude_backends.size > 0 -%>
19 | exclude-backends    = <%= @exclude_backends_filename %>
20 | <%- end -%>
21 | calculate-percentages  = <%= @process_calculate_percentages %>
22 | 
23 | [graphite]
24 | server          = <%= @graphite_server %>
25 | port            = <%= @graphite_port %>
26 | retries         = <%= @graphite_retries %>
27 | interval        = <%= @graphite_interval %>
28 | connect-timeout = <%= @graphite_connect_timeout %>
29 | write-timeout   = <%= @graphite_write_timeout %>
30 | delay           = <%= @graphite_delay %>
31 | backoff         = <%= @graphite_backoff %>
32 | queue-size      = <%= @graphite_queue_size %>
33 | namespace       = <%= @graphite_namespace %>
34 | prefix-name     = <%= @graphite_prefix_hostname %>
35 | fqdn            = <%= @graphite_fqdn %>
36 | 
37 | <%- if @local_store_enabled -%>
38 | [local-store]
39 | dir         = <%= @local_store_dir %>
40 | <%- end -%>
41 | 


--------------------------------------------------------------------------------
/contrib/puppet/templates/pull-systemd-overwrites.conf.erb:
--------------------------------------------------------------------------------
 1 | #############################################################################
 2 | #
 3 | #       This file is managed by Puppet
 4 | #       any changes made locally will be lost.
 5 | #
 6 | # The master version of this file is at
 7 | # puppet:///modules/haproxystats/templates/pull-systemd-overwrites.conf.erb
 8 | #
 9 | #############################################################################
10 | <%-
11 |     if @pull_CPUAffinity
12 |         _value = @pull_CPUAffinity
13 |     else
14 |         cpu_list = Array (0..scope.lookupvar('::processorcount').to_i-1)
15 |         _value = cpu_list[-1]
16 |     end
17 | -%>
18 | [Service]
19 | CPUAffinity = <%=_value%>
20 | 


--------------------------------------------------------------------------------
/contrib/puppet/templates/pull.conf.erb:
--------------------------------------------------------------------------------
 1 | [pull]
 2 | loglevel      = <%= @pull_loglevel %>
 3 | socket-dir    = <%= @pull_socket_dir %>
 4 | retries       = <%= @pull_retries %>
 5 | timeout       = <%= @pull_timeout %>
 6 | interval      = <%= @pull_interval %>
 7 | pull-timeout  = <%= @pull_pull_timeout %>
 8 | pull-interval = <%= @pull_pull_interval %>
 9 | dst-dir       = <%= @pull_dst_dir %>
10 | tmp-dst-dir   = <%= @pull_tmp_dst_dir %>
11 | workers       = <%= @pull_workers %>
12 | queue-size    = <%= @pull_queue_size %>
13 | 
14 | 


--------------------------------------------------------------------------------
/contrib/systemd/haproxystats-process.service:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Processes statistics from HAProxy and pushes them to Graphite
 3 | After=network.target syslog.target
 4 | Wants=network.target syslog.target
 5 | Documentation=https://github.com/unixsurfer/haproxystats
 6 | 
 7 | [Service]
 8 | Type=simple
 9 | KillMode=process
10 | Environment="CONFIG=/etc/haproxystats.conf"
11 | User=haproxystats
12 | Group=haproxystats
13 | ExecStart=/opt/blue-python/3.4/bin/haproxystats-process -f $CONFIG
14 | TimeoutStartSec=3
15 | TimeoutStopSec=60
16 | Restart=always
17 | 
18 | [Install]
19 | WantedBy=multi-user.target
20 | 


--------------------------------------------------------------------------------
/contrib/systemd/haproxystats-pull.service:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Pulls statistics from HAProxy daemon over UNIX sockets
 3 | After=network.target syslog.target
 4 | Wants=network.target syslog.target
 5 | Documentation=https://github.com/unixsurfer/haproxystats
 6 | 
 7 | [Service]
 8 | Type=simple
 9 | Environment="CONFIG=/etc/haproxystats.conf"
10 | User=haproxystats
11 | Group=haproxystats
12 | ExecStart=/opt/blue-python/3.4/bin/haproxystats-pull -f $CONFIG
13 | TimeoutStartSec=3
14 | TimeoutStopSec=6
15 | Restart=on-failure
16 | 
17 | [Install]
18 | WantedBy=multi-user.target
19 | 


--------------------------------------------------------------------------------
/contrib/tcp_server.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # vim:fenc=utf-8
 4 | #
 5 | """
 6 | A very simple TCP server for simulating a graphite relay, copied-paste from
 7 | Python documentation. Few things were adjusted to make pylint happy and print
 8 | incoming data.
 9 | """
10 | import asyncio
11 | 
12 | 
13 | class EchoServerClientProtocol(asyncio.Protocol):
14 |     """
15 |     A TCP server
16 |     """
17 |     def __init__(self):
18 |         self.peername = None
19 |         self.transport = None
20 | 
21 |     def connection_made(self, transport):
22 |         self.peername = transport.get_extra_info('peername')
23 |         print('Connection from {}'.format(self.peername))
24 |         self.transport = transport
25 | 
26 |     def data_received(self, data):
27 |         message = data.decode()
28 |         print(message)
29 | 
30 |     def connection_lost(self, exc):
31 |         print('client {} closed connection {}'.format(self.peername, exc))
32 | 
33 | 
34 | def main():
35 |     """
36 |     main code
37 |     """
38 |     loop = asyncio.get_event_loop()
39 |     # Each client connection will create a new protocol instance
40 |     coro = loop.create_server(EchoServerClientProtocol, '127.0.0.1', 39991)
41 |     server = loop.run_until_complete(coro)
42 | 
43 |     # Serve requests until Ctrl+C is pressed
44 |     print('Serving on {}'.format(server.sockets[0].getsockname()))
45 |     try:
46 |         loop.run_forever()
47 |     except KeyboardInterrupt:
48 |         pass
49 | 
50 |     # Close the server
51 |     server.close()
52 |     loop.run_until_complete(server.wait_closed())
53 |     loop.close()
54 | 
55 | # This is the standard boilerplate that calls the main() function.
56 | if __name__ == '__main__':
57 |     main()
58 | 


--------------------------------------------------------------------------------
/haproxystats-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/unixsurfer/haproxystats/3ef4b3cacada9b6ed52dcc7726d8dad81a821ed1/haproxystats-architecture.png


--------------------------------------------------------------------------------
/haproxystats.conf:
--------------------------------------------------------------------------------
 1 | [DEFAULT]
 2 | loglevel = info
 3 | retries  = 2
 4 | timeout  = 1
 5 | interval = 2
 6 | 
 7 | [paths]
 8 | base-dir = /var/lib/haproxystats
 9 | 
10 | [pull]
11 | loglevel        = info
12 | socket-dir      = /run/haproxy
13 | retries         = 1
14 | timeout         = 1
15 | interval        = 1
16 | pull-timeout    = 0.5
17 | pull-interval   = 10
18 | dst-dir         = ${paths:base-dir}/incoming
19 | tmp-dst-dir     = ${paths:base-dir}/incoming.tmp
20 | workers         = 8
21 | 
22 | [process]
23 | src-dir         = ${paths:base-dir}/incoming
24 | workers         = 4
25 | 
26 | [graphite]
27 | server          = 127.0.0.1
28 | port            = 3002
29 | retries         = 2
30 | interval        = 0.8
31 | delay           = 10
32 | backoff         = 2
33 | namespace       = loadbalancers
34 | prefix_hostname = true
35 | fqdn            = true
36 | queue-size      = 1000000
37 | 
38 | #[local-store]
39 | #dir = ${paths:base-dir}/local-store
40 | 


--------------------------------------------------------------------------------
/haproxystats/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # vim:fenc=utf-8
 3 | #
 4 | """A collection of Python tools to process HAProxy statistics."""
 5 | __title__ = 'haproxystats'
 6 | __author__ = 'Pavlos Parissis'
 7 | __license__ = 'Apache 2.0'
 8 | __version__ = '0.5.2'
 9 | __copyright__ = 'Copyright 2016 Pavlos Parissis <pavlos.parissis@gmail.com'
10 | 
11 | DEFAULT_OPTIONS = {
12 |     'DEFAULT': {
13 |         'retries': 2,
14 |         'timeout': 1,
15 |         'interval': 2,
16 |         'loglevel': 'info',
17 |     },
18 |     'paths': {
19 |         'base-dir': '/var/lib/haproxystats',
20 |     },
21 |     'pull': {
22 |         'retries': 1,
23 |         'timeout': 0.1,
24 |         'interval': 0.5,
25 |         'pull-timeout': 2,
26 |         'pull-interval': 10,
27 |         'buffer-limit': 6291456,
28 |         'dst-dir': '/var/lib/haproxystats/incoming',
29 |         'tmp-dst-dir': '/var/lib/haproxystats/incoming.tmp',
30 |         'workers': 8,
31 |         'queue-size': 360,
32 |     },
33 |     'process': {
34 |         'workers': '4',
35 |         'src-dir': '/var/lib/haproxystats/incoming',
36 |         'aggr-server-metrics': 'false',
37 |         'per-process-metrics': 'false',
38 |         'calculate-percentages': 'false',
39 |         'liveness-check-interval': 10,
40 |     },
41 |     'graphite': {
42 |         'server': '127.0.0.1',
43 |         'port': 3002,
44 |         'retries': 3,
45 |         'interval': 1.8,
46 |         'connect-timeout': 1.0,
47 |         'write-timeout': 1.0,
48 |         'delay': 10,
49 |         'backoff': 2,
50 |         'namespace': 'loadbalancers',
51 |         'prefix-hostname': 'true',
52 |         'fqdn': 'true',
53 |         'queue-size': 1000000
54 |     },
55 | }
56 | 


--------------------------------------------------------------------------------
/haproxystats/metrics.py:
--------------------------------------------------------------------------------
  1 | """Provide constants for grouping metric names.
  2 | 
  3 | There are seperated groups for frontend, backend, servers and haproxy daemon.
  4 | Metric names are the field names contained in the HAProxy statistics.
  5 | """
  6 | from collections import namedtuple
  7 | 
  8 | DAEMON_METRICS = [
  9 |     'CompressBpsIn',
 10 |     'CompressBpsOut',
 11 |     'CompressBpsRateLim',
 12 |     'ConnRate',
 13 |     'ConnRateLimit',
 14 |     'CumConns',
 15 |     'CumReq',
 16 |     'CumSslConns',
 17 |     'CurrConns',
 18 |     'CurrSslConns',
 19 |     'Hard_maxconn',
 20 |     'MaxConnRate',
 21 |     'MaxSessRate',
 22 |     'MaxSslConns',
 23 |     'MaxSslRate',
 24 |     'MaxZlibMemUsage',
 25 |     'Maxconn',
 26 |     'Maxpipes',
 27 |     'Maxsock',
 28 |     'Memmax_MB',
 29 |     'PipesFree',
 30 |     'PipesUsed',
 31 |     'Run_queue',
 32 |     'SessRate',
 33 |     'SessRateLimit',
 34 |     'SslBackendKeyRate',
 35 |     'SslBackendMaxKeyRate',
 36 |     'SslCacheLookups',
 37 |     'SslCacheMisses',
 38 |     'SslFrontendKeyRate',
 39 |     'SslFrontendMaxKeyRate',
 40 |     'SslFrontendSessionReuse_pct',
 41 |     'SslRate',
 42 |     'SslRateLimit',
 43 |     'Tasks',
 44 |     'Ulimit-n',
 45 |     'ZlibMemUsage',
 46 | ]
 47 | 
 48 | DAEMON_AVG_METRICS = ['Idle_pct', 'Uptime_sec']
 49 | 
 50 | COMMON = [
 51 |     'bin',
 52 |     'bout',
 53 |     'dresp',
 54 |     'hrsp_1xx',
 55 |     'hrsp_2xx',
 56 |     'hrsp_3xx',
 57 |     'hrsp_4xx',
 58 |     'hrsp_5xx',
 59 |     'hrsp_other',
 60 |     'rate',
 61 |     'rate_max',
 62 |     'scur',
 63 |     'slim',
 64 |     'smax',
 65 |     'stot'
 66 | ]
 67 | 
 68 | SERVER_METRICS = [
 69 |     'chkfail',
 70 |     'chkdown',
 71 |     'cli_abrt',
 72 |     'econ',
 73 |     'eresp',
 74 |     'lbtot',
 75 |     'qcur',
 76 |     'qmax',
 77 |     'srv_abrt',
 78 |     'wredis',
 79 |     'wretr'
 80 | ] + COMMON
 81 | 
 82 | SERVER_AVG_METRICS = ['throttle', 'weight']
 83 | SERVER_AVG_TIME_METRICS = ['ctime', 'qtime', 'rtime', 'ttime']
 84 | 
 85 | BACKEND_METRICS = [
 86 |     'chkdown',
 87 |     'cli_abrt',
 88 |     'comp_byp',
 89 |     'comp_in',
 90 |     'comp_out',
 91 |     'comp_rsp',
 92 |     'downtime',
 93 |     'dreq',
 94 |     'econ',
 95 |     'eresp',
 96 |     'intercepted',
 97 |     'lbtot',
 98 |     'qcur',
 99 |     'qmax',
100 |     'req_tot',
101 |     'srv_abrt',
102 |     'wredis',
103 |     'wretr',
104 | ] + COMMON
105 | 
106 | BACKEND_AVG_METRICS = ['act', 'bck', 'weight']
107 | BACKEND_AVG_TIME_METRICS = ['rtime', 'ctime', 'qtime', 'ttime']
108 | 
109 | FRONTEND_METRICS = [
110 |     'comp_byp',
111 |     'comp_in',
112 |     'comp_out',
113 |     'comp_rsp',
114 |     'conn_rate',
115 |     'conn_rate_max',
116 |     'conn_tot',
117 |     'dcon',
118 |     'dses',
119 |     'dreq',
120 |     'ereq',
121 |     'intercepted',
122 |     'rate_lim',
123 |     'req_rate',
124 |     'req_rate_max',
125 |     'req_tot'
126 | ] + COMMON
127 | 
128 | MetricNamesPercentage = namedtuple('MetricsNamesPercentage',
129 |                                    ['name', 'limit', 'title'])
130 | 


--------------------------------------------------------------------------------
/haproxystats/process.py:
--------------------------------------------------------------------------------
  1 | # vim:fenc=utf-8
  2 | # pylint: disable=too-many-locals
  3 | # pylint: disable=too-many-branches
  4 | """Processes statistics from HAProxy and pushes them to Graphite.
  5 | 
  6 | Usage:
  7 |     haproxystats-process [-f <file>] [-d <dir>] [-p | -P]
  8 | 
  9 | Options:
 10 |     -f, --file <file>  configuration file with settings
 11 |                        [default: /etc/haproxystats.conf]
 12 |     -d, --dir <dir>    directory with additional configuration files
 13 |     -p, --print        show default settings
 14 |     -P, --print-conf   show configuration
 15 |     -h, --help         show this screen
 16 |     -v, --version      show version
 17 | """
 18 | import os
 19 | import multiprocessing
 20 | import signal
 21 | import logging
 22 | import glob
 23 | import copy
 24 | import re
 25 | import sys
 26 | import time
 27 | import shutil
 28 | import socket
 29 | import fileinput
 30 | from collections import defaultdict
 31 | from configparser import ConfigParser, ExtendedInterpolation, ParsingError
 32 | from threading import Lock, Thread
 33 | from docopt import docopt
 34 | import pyinotify
 35 | import pandas
 36 | 
 37 | from haproxystats import __version__ as VERSION
 38 | from haproxystats import DEFAULT_OPTIONS
 39 | from haproxystats.utils import (dispatcher, GraphiteHandler, get_files,
 40 |                                 FileHandler, EventHandler, concat_csv,
 41 |                                 FILE_SUFFIX_INFO, FILE_SUFFIX_STAT,
 42 |                                 load_file_content, configuration_check,
 43 |                                 read_write_access, check_metrics,
 44 |                                 daemon_percentage_metrics, send_wlc,
 45 |                                 calculate_percentage_per_column,
 46 |                                 calculate_percentage_per_row)
 47 | from haproxystats.metrics import (DAEMON_AVG_METRICS, DAEMON_METRICS,
 48 |                                   SERVER_AVG_METRICS, SERVER_AVG_TIME_METRICS,
 49 |                                   SERVER_METRICS,
 50 |                                   BACKEND_AVG_METRICS, BACKEND_AVG_TIME_METRICS,
 51 |                                   BACKEND_METRICS,
 52 |                                   FRONTEND_METRICS)
 53 | 
 54 | LOG_FORMAT = ('%(asctime)s [%(process)d] [%(processName)-11s] '
 55 |               '[%(funcName)-20s] %(levelname)-8s %(message)s')
 56 | logging.basicConfig(format=LOG_FORMAT)
 57 | log = logging.getLogger('root')  # pylint: disable=I0011,C0103
 58 | 
 59 | watcher = pyinotify.WatchManager()  # pylint: disable=I0011,C0103
 60 | # watched events
 61 | MASK = pyinotify.IN_CREATE | pyinotify.IN_MOVED_TO  # pylint: disable=no-member
 62 | 
 63 | STOP_SIGNAL = 'STOP'
 64 | 
 65 | 
 66 | class Checker(Thread):
 67 |     """Check the liveness of consumer"""
 68 |     def __init__(self, consumers, interval):
 69 |         """Initialization.
 70 | 
 71 |         Arguments:
 72 |             consumers (list): A list of consumers(multiprocessing.Process obj)
 73 |             interval (float): How often to run the check
 74 |         """
 75 |         super(Checker, self).__init__()
 76 |         self.daemon = True
 77 |         self.consumers = consumers
 78 |         self.interval = interval
 79 | 
 80 |     def run(self):
 81 |         """Terminate main program if at least one consumer isn't alive"""
 82 |         while True:
 83 |             alive_consumers = 0
 84 |             for consumer in self.consumers:
 85 |                 if not consumer.is_alive():
 86 |                     log.critical("consumer %s is dead", consumer.name)
 87 |                 else:
 88 |                     alive_consumers += 1
 89 |                     log.debug("consumer %s is alive", consumer.name)
 90 |             if alive_consumers < len(self.consumers):
 91 |                 log.critical("terminating myself as %s consumers are dead",
 92 |                              len(self.consumers) - alive_consumers)
 93 |                 os.kill(os.getpid(), signal.SIGTERM)
 94 | 
 95 |             time.sleep(self.interval)
 96 | 
 97 | class Consumer(multiprocessing.Process):
 98 |     """Process statistics and dispatch them to handlers."""
 99 | 
100 |     # Cache results of the get_metric_paths() function call
101 |     path_cache = {
102 |         'frontend': {},
103 |         'backend': {},
104 |         'server': {}
105 |     }
106 | 
107 |     # Store compiled patterns declared in the 'frontend-groups' and
108 |     # 'backend-groups' config sections
109 |     metric_patterns = {
110 |         'frontend': [],
111 |         'backend': [],
112 |         'server': [],
113 |     }
114 | 
115 |     def __init__(self, tasks, config):
116 |         """Initialization.
117 | 
118 |         Arguments:
119 |             tasks (queue): A queue from which we consume items.
120 |             config (obj): A configParser object which holds configuration.
121 |         """
122 |         multiprocessing.Process.__init__(self)
123 |         self.tasks = tasks
124 |         self.config = config
125 |         self.local_store = None
126 |         self.file_handler = None
127 |         self.timestamp = None  # The time that statistics were retrieved
128 | 
129 |         # Build graphite path (<namespace>.<hostname>.haproxy)
130 |         graphite_tree = []
131 |         graphite_tree.append(self.config.get('graphite', 'namespace'))
132 |         if self.config.getboolean('graphite', 'prefix-hostname'):
133 |             if self.config.getboolean('graphite', 'fqdn'):
134 |                 graphite_tree.append(socket.gethostname().replace('.', '_'))
135 |             else:
136 |                 graphite_tree.append(socket.gethostname().split('.')[0])
137 |         graphite_tree.append('haproxy')
138 |         self.graphite_path = '.'.join(graphite_tree)
139 | 
140 |         # Compile regex patterns for metric groups
141 |         if self.config.has_option('graphite', 'group-namespace'):
142 |             self.build_metric_patterns()
143 |             self.double_writes =\
144 |                 self.config.getboolean('graphite',
145 |                                        'group-namespace-double-writes')
146 |         else:
147 |             self.double_writes = False
148 | 
149 | 
150 |     def run(self):
151 |         """Consume item from queue and process it.
152 | 
153 |         It is the target function of Process class. Consumes items from
154 |         the queue, processes data which are pulled down by haproxystats-pull
155 |         program and uses Pandas to perform all computations of statistics.
156 | 
157 |         It exits when it receives STOP_SIGNAL as item.
158 | 
159 |         To avoid orphan processes on the system, it must be robust against
160 |         failures and try very hard recover from failures.
161 |         """
162 |         if self.config.has_section('local-store'):
163 |             self.local_store = self.config.get('local-store', 'dir')
164 |             self.file_handler = FileHandler()
165 |             dispatcher.register('open', self.file_handler.open)
166 |             dispatcher.register('send', self.file_handler.send)
167 |             dispatcher.register('flush', self.file_handler.flush)
168 |             dispatcher.register('loop', self.file_handler.loop)
169 | 
170 |         timeout = self.config.getfloat('graphite', 'timeout')
171 |         connect_timeout = self.config.getfloat('graphite',
172 |                                                'connect-timeout',
173 |                                                fallback=timeout)
174 |         write_timeout = self.config.getfloat('graphite',
175 |                                              'write-timeout',
176 |                                              fallback=timeout)
177 |         graphite = GraphiteHandler(
178 |             server=self.config.get('graphite', 'server'),
179 |             port=self.config.getint('graphite', 'port'),
180 |             connect_timeout=connect_timeout,
181 |             write_timeout=write_timeout,
182 |             retries=self.config.getint('graphite', 'retries'),
183 |             interval=self.config.getfloat('graphite', 'interval'),
184 |             delay=self.config.getfloat('graphite', 'delay'),
185 |             backoff=self.config.getfloat('graphite', 'backoff'),
186 |             queue_size=self.config.getint('graphite', 'queue-size')
187 |         )
188 |         dispatcher.register('open', graphite.open)
189 |         dispatcher.register('send', graphite.send)
190 | 
191 |         dispatcher.signal('open')
192 | 
193 |         try:
194 |             while True:
195 |                 log.info('waiting for item from the queue')
196 |                 incoming_dir = self.tasks.get()
197 |                 log.info('received item %s', incoming_dir)
198 |                 if incoming_dir == STOP_SIGNAL:
199 |                     break
200 |                 start_time = time.time()
201 | 
202 |                 # incoming_dir => /var/lib/haproxystats/incoming/1454016646
203 |                 # timestamp => 1454016646
204 |                 self.timestamp = os.path.basename(incoming_dir)
205 | 
206 |                 # update filename for file handler.
207 |                 # This *does not* error if a file handler is not registered.
208 |                 dispatcher.signal('loop',
209 |                                   local_store=self.local_store,
210 |                                   timestamp=self.timestamp)
211 | 
212 |                 self.process_stats(incoming_dir)
213 | 
214 |                 # This flushes data to file
215 |                 dispatcher.signal('flush')
216 | 
217 |                 # Remove directory as data have been successfully processed.
218 |                 log.debug('removing %s', incoming_dir)
219 |                 try:
220 |                     shutil.rmtree(incoming_dir)
221 |                 except (FileNotFoundError, PermissionError, OSError) as exc:
222 |                     log.critical('failed to remove directory %s with:%s. '
223 |                                  'This should not have happened as it means '
224 |                                  'another worker processed data from this '
225 |                                  'directory or something/someone removed the '
226 |                                  'directory!', incoming_dir, exc)
227 |                 elapsed_time = time.time() - start_time
228 |                 log.info('total wall clock time in seconds %.3f', elapsed_time)
229 |                 data = ("{p}.haproxystats.{m} {v} {t}\n"
230 |                         .format(p=self.graphite_path,
231 |                                 m='TotalWallClockTime',
232 |                                 v="{t:.3f}".format(t=elapsed_time),
233 |                                 t=self.timestamp))
234 |                 dispatcher.signal('send', data=data)
235 |                 log.info('finished with %s', incoming_dir)
236 |         except KeyboardInterrupt:
237 |             log.critical('Ctrl-C received')
238 | 
239 |         return
240 | 
241 |     @send_wlc(output=dispatcher, name='AllStats')
242 |     def process_stats(self, pathname):
243 |         """Delegate the processing of statistics to other functions.
244 | 
245 |         Arguments:
246 |             pathname (str): Directory where statistics from HAProxy are saved.
247 |         """
248 |         # statistics for HAProxy daemon and for frontend/backend/server have
249 |         # different format and haproxystats-pull save them using a different
250 |         # file suffix, so we can distinguish them easier.
251 |         files = get_files(pathname, FILE_SUFFIX_INFO)
252 |         if not files:
253 |             log.warning("%s directory doesn't contain any files with HAProxy "
254 |                         "daemon statistics", pathname)
255 |         else:
256 |             self.haproxy_stats(files)
257 |         files = get_files(pathname, FILE_SUFFIX_STAT)
258 | 
259 |         if not files:
260 |             log.warning("%s directory doesn't contain any files with site "
261 |                         "statistics", pathname)
262 |         else:
263 |             self.sites_stats(files)
264 | 
265 |     @send_wlc(output=dispatcher, name='HAProxy')
266 |     def haproxy_stats(self, files):
267 |         """Process statistics for HAProxy daemon.
268 | 
269 |         Arguments:
270 |             files (list): A list of files which contain the output of 'show
271 |             info' command on the stats socket.
272 |         """
273 |         cnt_metrics = 1  # a metric counter
274 |         log.info('processing statistics for HAProxy daemon')
275 |         log.debug('processing files %s', ' '.join(files))
276 |         raw_info_stats = defaultdict(list)
277 |         # Parse raw data and build a data structure, input looks like:
278 |         #     Name: HAProxy
279 |         #     Version: 1.6.3-4d747c-52
280 |         #     Release_date: 2016/02/25
281 |         #     Nbproc: 4
282 |         #     Uptime_sec: 59277
283 |         #     SslFrontendSessionReuse_pct: 0
284 |         #     ....
285 |         with fileinput.input(files=files) as file_input:
286 |             for line in file_input:
287 |                 if ': ' in line:
288 |                     key, value = line.split(': ', 1)
289 |                     try:
290 |                         numeric_value = int(value)
291 |                     except ValueError:
292 |                         pass
293 |                     else:
294 |                         raw_info_stats[key].append(numeric_value)
295 | 
296 |         if not raw_info_stats:
297 |             log.error('failed to parse daemon statistics')
298 |             return
299 |         else:
300 |             # Here is where Pandas enters and starts its magic.
301 |             try:
302 |                 dataframe = pandas.DataFrame(raw_info_stats)
303 |             except ValueError as exc:
304 |                 log.error('failed to create Pandas object for daemon '
305 |                           'statistics %s', exc)
306 |                 return
307 | 
308 |             sums = dataframe.loc[:, DAEMON_METRICS].sum()
309 |             avgs = dataframe.loc[:, DAEMON_AVG_METRICS].mean()
310 |             cnt_metrics += sums.size + avgs.size
311 | 
312 |             # Pandas did all the hard work, let's join above tables and extract
313 |             # statistics
314 |             for values in pandas.concat([sums, avgs], axis=0).items():
315 |                 data = ("{p}.daemon.{m} {v} {t}\n"
316 |                         .format(p=self.graphite_path,
317 |                                 m=values[0].replace('.', '_'),
318 |                                 v=values[1],
319 |                                 t=self.timestamp))
320 |                 dispatcher.signal('send', data=data)
321 | 
322 |             dataframe['CpuUsagePct'] = (dataframe.loc[:, 'Idle_pct']
323 |                                         .map(lambda x: (x * -1) + 100))
324 |             if dataframe.loc[:, 'Idle_pct'].size > 1:
325 |                 log.info('calculating percentiles for CpuUsagePct')
326 |                 percentiles = (dataframe.loc[:, 'CpuUsagePct']
327 |                                .quantile(q=[0.25, 0.50, 0.75, 0.95, 0.99],
328 |                                          interpolation='nearest'))
329 |                 for per in percentiles.items():
330 |                     # per[0] = index => [0.25, 0.50, 0.75, 0.95, 0.99]
331 |                     # per[1] = percentile value
332 |                     cnt_metrics += 1
333 |                     data = ("{p}.daemon.{m} {v} {t}\n"
334 |                             .format(p=self.graphite_path,
335 |                                     m=("{:.2f}PercentileCpuUsagePct"
336 |                                        .format(per[0]).split('.')[1]),
337 |                                     v=per[1],
338 |                                     t=self.timestamp))
339 |                     dispatcher.signal('send', data=data)
340 | 
341 |                 cnt_metrics += 1
342 |                 data = ("{p}.daemon.{m} {v} {t}\n"
343 |                         .format(p=self.graphite_path,
344 |                                 m="StdCpuUsagePct",
345 |                                 v=dataframe.loc[:, 'CpuUsagePct'].std(),
346 |                                 t=self.timestamp))
347 |                 dispatcher.signal('send', data=data)
348 | 
349 |             if self.config.getboolean('process', 'calculate-percentages'):
350 |                 for metric in daemon_percentage_metrics():
351 |                     cnt_metrics += 1
352 |                     log.info('calculating percentage for %s', metric.name)
353 |                     try:
354 |                         value = calculate_percentage_per_column(dataframe,
355 |                                                                 metric)
356 |                     except KeyError:
357 |                         log.warning("metric %s doesn't exist", metric.name)
358 |                     else:
359 |                         data = ("{p}.daemon.{m} {v} {t}\n"
360 |                                 .format(p=self.graphite_path,
361 |                                         m=metric.title,
362 |                                         v=value,
363 |                                         t=self.timestamp))
364 |                         dispatcher.signal('send', data=data)
365 | 
366 |             if self.config.getboolean('process', 'per-process-metrics'):
367 |                 log.info("processing statistics per daemon")
368 |                 indexed_by_worker = dataframe.set_index('Process_num')
369 |                 metrics_per_worker = (indexed_by_worker
370 |                                       .loc[:, DAEMON_METRICS
371 |                                            + ['CpuUsagePct']
372 |                                            + DAEMON_AVG_METRICS])
373 |                 cnt_metrics += metrics_per_worker.size
374 | 
375 |                 for worker, row in metrics_per_worker.iterrows():
376 |                     for values in row.iteritems():
377 |                         data = ("{p}.daemon.process.{w}.{m} {v} {t}\n"
378 |                                 .format(p=self.graphite_path,
379 |                                         w=worker,
380 |                                         m=values[0].replace('.', '_'),
381 |                                         v=values[1],
382 |                                         t=self.timestamp))
383 |                         dispatcher.signal('send', data=data)
384 | 
385 |                 if self.config.getboolean('process', 'calculate-percentages'):
386 |                     for metric in daemon_percentage_metrics():
387 |                         log.info('calculating percentage for %s per daemon',
388 |                                  metric.name)
389 |                         _percentages = (metrics_per_worker
390 |                                         .loc[:, [metric.limit, metric.name]]
391 |                                         .apply(calculate_percentage_per_row,
392 |                                                axis=1,
393 |                                                args=(metric,)))
394 | 
395 |                         cnt_metrics += _percentages.size
396 |                         for worker, row in _percentages.iterrows():
397 |                             for values in row.iteritems():
398 |                                 data = ("{p}.daemon.process.{w}.{m} {v} {t}\n"
399 |                                         .format(p=self.graphite_path,
400 |                                                 w=worker,
401 |                                                 m=values[0].replace('.', '_'),
402 |                                                 v=values[1],
403 |                                                 t=self.timestamp))
404 |                                 dispatcher.signal('send', data=data)
405 | 
406 |             data = ("{p}.haproxystats.MetricsHAProxy {v} {t}\n"
407 |                     .format(p=self.graphite_path,
408 |                             v=cnt_metrics,
409 |                             t=self.timestamp))
410 |             dispatcher.signal('send', data=data)
411 | 
412 |             log.info('number of HAProxy metrics %s', cnt_metrics)
413 |             log.info('finished processing statistics for HAProxy daemon')
414 | 
415 |     def sites_stats(self, files):
416 |         """Process statistics for frontends/backends/servers.
417 | 
418 |         Arguments:
419 |             files (list): A list of files which contain the output of 'show
420 |             stat' command on the stats socket of HAProxy.
421 |         """
422 |         log.info('processing statistics for sites')
423 |         log.debug('processing files %s', ' '.join(files))
424 |         log.debug('merging multiple csv files to one Pandas data frame')
425 |         data_frame = concat_csv(files)
426 |         excluded_backends = []
427 | 
428 |         if data_frame is not None:
429 |             # Perform some sanitization on the raw data
430 |             if '# pxname' in data_frame.columns:
431 |                 log.debug('replace "# pxname" column with  "pxname"')
432 |                 data_frame.rename(columns={'# pxname': 'pxname'}, inplace=True)
433 |             if 'Unnamed: 62' in data_frame.columns:
434 |                 log.debug('remove "Unnamed: 62" column')
435 |                 try:
436 |                     data_frame.drop(labels=['Unnamed: 62'],
437 |                                     axis=1,
438 |                                     inplace=True)
439 |                 except ValueError as error:
440 |                     log.warning("failed to drop 'Unnamed: 62' column with: %s",
441 |                                 error)
442 |             # Sanitize the values for pxname (frontend's/backend's names) and
443 |             # svname (server's names) columns by replacing dots with
444 |             # underscores because Graphite uses the dot in the namespace.
445 |             data_frame['pxname_'] = (data_frame.pxname
446 |                                      .apply(lambda value:
447 |                                             value.replace('.', '_')))
448 |             data_frame['svname_'] = (data_frame.svname
449 |                                      .apply(lambda value:
450 |                                             value.replace('.', '_')))
451 | 
452 |             data_frame.drop('pxname', axis=1, inplace=True)
453 |             data_frame.drop('svname', axis=1, inplace=True)
454 | 
455 |             if not isinstance(data_frame, pandas.DataFrame):
456 |                 log.warning('Pandas data frame was not created')
457 |                 return
458 |             if len(data_frame.index) == 0:
459 |                 log.error('Pandas data frame is empty')
460 |                 return
461 | 
462 |             # For some metrics HAProxy returns nothing, so we replace them
463 |             # with zeros
464 |             data_frame.fillna(0, inplace=True)
465 | 
466 |             self.process_frontends(data_frame)
467 | 
468 |             exclude_backends_file = self.config.get('process',
469 |                                                     'exclude-backends',
470 |                                                     fallback=None)
471 |             if exclude_backends_file is not None:
472 |                 excluded_backends = load_file_content(exclude_backends_file)
473 |                 log.info('excluding backends %s', excluded_backends)
474 |                 # replace dots in backend names
475 |                 excluded_backends[:] = [x.replace('.', '_')
476 |                                         for x in excluded_backends]
477 | 
478 |             filter_backend = ~data_frame['pxname_'].isin(excluded_backends)
479 | 
480 |             self.process_backends(data_frame, filter_backend)
481 |             self.process_servers(data_frame, filter_backend)
482 |             log.info('finished processing statistics for sites')
483 |         else:
484 |             log.error('failed to process statistics for sites')
485 | 
486 |     @send_wlc(output=dispatcher, name='Frontends')
487 |     def process_frontends(self, data_frame):
488 |         """Process statistics for frontends.
489 | 
490 |         Arguments:
491 |             data_frame (obj): A pandas data_frame ready for processing.
492 |         """
493 |         # Filtering for Pandas
494 |         cnt_metrics = 1
495 |         log.debug('processing statistics for frontends')
496 |         is_frontend = data_frame['svname_'] == 'FRONTEND'
497 |         excluded_frontends = []
498 |         metrics = self.config.get('process', 'frontend-metrics', fallback=None)
499 | 
500 |         if metrics is not None:
501 |             metrics = metrics.split(' ')
502 |         else:
503 |             metrics = FRONTEND_METRICS
504 |         log.debug('metric names for frontends %s', metrics)
505 | 
506 |         exclude_frontends_file = self.config.get('process',
507 |                                                  'exclude-frontends',
508 |                                                  fallback=None)
509 |         if exclude_frontends_file is not None:
510 |             excluded_frontends = load_file_content(exclude_frontends_file)
511 |             log.info('excluding frontends %s', excluded_frontends)
512 |             # replace dots in frontend names
513 |             excluded_frontends[:] = [x.replace('.', '_')
514 |                                      for x in excluded_frontends]
515 |         filter_frontend = (~data_frame['pxname_']
516 |                            .isin(excluded_frontends))
517 | 
518 |         frontend_stats = (data_frame[is_frontend & filter_frontend]
519 |                           .loc[:, ['pxname_'] + metrics])
520 | 
521 |         # Group by frontend name and sum values for each column
522 |         frontend_aggr_stats = frontend_stats.groupby(['pxname_']).sum()
523 |         cnt_metrics += frontend_aggr_stats.size
524 |         for index, row in frontend_aggr_stats.iterrows():
525 |             paths = self.get_metric_paths('frontend', index)
526 |             for i in row.iteritems():
527 |                 datapoints = [
528 |                     "{p}.frontend.{f}.{m} {v} {t}\n"
529 |                     .format(p=path,
530 |                             f=index,
531 |                             m=i[0],
532 |                             v=i[1],
533 |                             t=self.timestamp) for path in paths
534 |                 ]
535 |                 for datapoint in datapoints:
536 |                     dispatcher.signal('send', data=datapoint)
537 | 
538 |         data = ("{p}.haproxystats.MetricsFrontend {v} {t}\n"
539 |                 .format(p=self.graphite_path,
540 |                         v=cnt_metrics,
541 |                         t=self.timestamp))
542 |         dispatcher.signal('send', data=data)
543 |         log.info('number of frontend metrics %s', cnt_metrics)
544 | 
545 |         log.debug('finished processing statistics for frontends')
546 | 
547 |     @send_wlc(output=dispatcher, name='Backends')
548 |     def process_backends(self, data_frame, filter_backend):
549 |         """Process statistics for backends.
550 | 
551 |         Arguments:
552 |             data_frame (obj): A pandas data_frame ready for processing.
553 |             filter_backend: A filter to apply on data_frame.
554 |         """
555 |         cnt_metrics = 1
556 |         log.debug('processing statistics for backends')
557 |         # Filtering for Pandas
558 |         is_backend = data_frame['svname_'] == 'BACKEND'
559 |         # For averages only consider entries with actual connections made
560 |         got_traffic = data_frame['lbtot'] > 0
561 | 
562 |         metrics = self.config.get('process', 'backend-metrics', fallback=None)
563 |         if metrics is not None:
564 |             metrics = metrics.split(' ')
565 |         else:
566 |             metrics = BACKEND_METRICS
567 |         log.debug('metric names for backends %s', metrics)
568 |         # Get rows only for backends. For some metrics we need the sum and
569 |         # for others the average, thus we split them.
570 |         stats_sum = (data_frame[is_backend & filter_backend]
571 |                      .loc[:, ['pxname_'] + metrics])
572 |         stats_avg = (data_frame[is_backend & filter_backend]
573 |                      .loc[:, ['pxname_'] + BACKEND_AVG_METRICS])
574 |         stats_avg_time = (data_frame[is_backend & filter_backend & got_traffic]
575 |                           .loc[:, ['pxname_'] + BACKEND_AVG_TIME_METRICS])
576 | 
577 |         aggr_sum = stats_sum.groupby(['pxname_'], as_index=False).sum()
578 |         aggr_avg = stats_avg.groupby(['pxname_'], as_index=False).mean()
579 |         aggr_avg_time = stats_avg_time.groupby(['pxname_'], as_index=False) \
580 |                                       .mean()
581 |         merged_stats = aggr_sum.merge(aggr_avg, on='pxname_', how='outer') \
582 |                                .merge(aggr_avg_time, on='pxname_', how='outer')
583 | 
584 |         rows, columns = merged_stats.shape
585 |         cnt_metrics += rows * (columns - 1)  # minus the index
586 | 
587 |         for _, row in merged_stats.iterrows():
588 |             backend = row[0]
589 |             paths = self.get_metric_paths('backend', backend)
590 |             for i in row[1:].iteritems():
591 |                 datapoints = [
592 |                     "{p}.backend.{b}.{m} {v} {t}\n"
593 |                     .format(p=path,
594 |                             b=backend,
595 |                             m=i[0],
596 |                             v=i[1],
597 |                             t=self.timestamp) for path in paths
598 |                 ]
599 |                 for datapoint in datapoints:
600 |                     dispatcher.signal('send', data=datapoint)
601 | 
602 |         data = ("{p}.haproxystats.MetricsBackend {v} {t}\n"
603 |                 .format(p=self.graphite_path,
604 |                         v=cnt_metrics,
605 |                         t=self.timestamp))
606 |         dispatcher.signal('send', data=data)
607 | 
608 |         log.info('number of backend metrics %s', cnt_metrics)
609 |         log.debug('finished processing statistics for backends')
610 | 
611 |     @send_wlc(output=dispatcher, name='Servers')
612 |     def process_servers(self, data_frame, filter_backend):
613 |         """Process statistics for servers.
614 | 
615 |         Arguments:
616 |             data_frame (obj): A pandas data_frame ready for processing.
617 |             filter_backend: A filter to apply on data_frame.
618 |         """
619 |         cnt_metrics = 1
620 |         # A filter for rows with stats for servers
621 |         is_server = data_frame['type'] == 2
622 |         # For averages only consider entries with actual connections made
623 |         got_traffic = data_frame['lbtot'] > 0
624 | 
625 |         log.debug('processing statistics for servers')
626 | 
627 |         server_metrics = self.config.get('process',
628 |                                          'server-metrics',
629 |                                          fallback=None)
630 |         if server_metrics is not None:
631 |             server_metrics = server_metrics.split(' ')
632 |         else:
633 |             server_metrics = SERVER_METRICS
634 |         log.debug('metric names for servers %s', server_metrics)
635 |         # Get rows only for servers. For some metrics we need the sum and
636 |         # for others the average, thus we split them.
637 |         stats_sum = (data_frame[is_server & filter_backend]
638 |                      .loc[:, ['pxname_', 'svname_'] + server_metrics])
639 |         stats_avg = (data_frame[is_server & filter_backend]
640 |                      .loc[:, ['pxname_', 'svname_'] + SERVER_AVG_METRICS])
641 |         stats_avg_time = (data_frame[is_server & filter_backend & got_traffic]
642 |                           .loc[:, ['pxname_', 'svname_'] + SERVER_AVG_TIME_METRICS])
643 |         servers = (data_frame[is_server & filter_backend]
644 |                    .loc[:, ['pxname_', 'svname_']])
645 | 
646 |         # Calculate the number of configured servers in a backend
647 |         tot_servers = (servers
648 |                        .groupby(['pxname_'])
649 |                        .agg({'svname_': pandas.Series.nunique}))
650 |         aggr_sum = (stats_sum
651 |                     .groupby(['pxname_', 'svname_'], as_index=False)
652 |                     .sum())
653 |         aggr_avg = (stats_avg
654 |                     .groupby(['pxname_', 'svname_'], as_index=False)
655 |                     .mean())
656 |         aggr_avg_time = (stats_avg_time
657 |                          .groupby(['pxname_', 'svname_'], as_index=False)
658 |                          .mean())
659 |         merged_stats = aggr_sum.merge(aggr_avg,
660 |                                       on=['svname_', 'pxname_'],
661 |                                       how='outer') \
662 |                                .merge(aggr_avg_time,
663 |                                       on=['svname_', 'pxname_'],
664 |                                       how='outer')
665 |         rows, columns = merged_stats.shape
666 |         cnt_metrics += rows * (columns - 2)
667 |         for backend, row in tot_servers.iterrows():
668 |             cnt_metrics += 1
669 |             paths = self.get_metric_paths('backend', backend)
670 |             datapoints = [
671 |                 "{p}.backend.{b}.{m} {v} {t}\n"
672 |                 .format(p=path,
673 |                         b=backend,
674 |                         m='TotalServers',
675 |                         v=row[0],
676 |                         t=self.timestamp) for path in paths
677 |             ]
678 |             for datapoint in datapoints:
679 |                 dispatcher.signal('send', data=datapoint)
680 | 
681 |         for _, row in merged_stats.iterrows():
682 |             backend = row[0]
683 |             server = row[1]
684 |             paths = self.get_metric_paths('backend', backend)
685 |             for i in row[2:].iteritems():
686 |                 datapoints = [
687 |                     "{p}.backend.{b}.server.{s}.{m} {v} {t}\n"
688 |                     .format(p=path,
689 |                             b=backend,
690 |                             s=server,
691 |                             m=i[0],
692 |                             v=i[1],
693 |                             t=self.timestamp) for path in paths
694 |                 ]
695 |                 for datapoint in datapoints:
696 |                     dispatcher.signal('send', data=datapoint)
697 | 
698 |         if self.config.getboolean('process', 'aggr-server-metrics'):
699 |             log.info('aggregate stats for servers across all backends')
700 |             # Produce statistics for servers across all backends
701 |             stats_sum = (data_frame[is_server]
702 |                          .loc[:, ['svname_'] + SERVER_METRICS])
703 |             stats_avg = (data_frame[is_server]
704 |                          .loc[:, ['svname_'] + SERVER_AVG_METRICS])
705 |             stats_avg_time = (data_frame[is_server & got_traffic]
706 |                               .loc[:, ['svname_'] + SERVER_AVG_TIME_METRICS])
707 |             aggr_sum = (stats_sum
708 |                         .groupby(['svname_'], as_index=False)
709 |                         .sum())
710 |             aggr_avg = (stats_avg
711 |                         .groupby(['svname_'], as_index=False)
712 |                         .mean())
713 |             aggr_avg_time = (stats_avg_time
714 |                              .groupby(['svname_'], as_index=False)
715 |                              .mean())
716 |             merged_stats = aggr_sum.merge(aggr_avg,
717 |                                           on=['svname_'],
718 |                                           how='outer') \
719 |                                    .merge(aggr_avg_time,
720 |                                           on=['svname_'],
721 |                                           how='outer')
722 |             rows, columns = merged_stats.shape
723 |             cnt_metrics += rows * (columns - 1)  # minus the index
724 | 
725 |             for _, row in merged_stats.iterrows():
726 |                 server = row[0]
727 |                 paths = self.get_metric_paths('server', server)
728 |                 for i in row[1:].iteritems():
729 |                     datapoints = [
730 |                         "{p}.server.{s}.{m} {v} {t}\n"
731 |                         .format(p=path,
732 |                                 s=server,
733 |                                 m=i[0],
734 |                                 v=i[1],
735 |                                 t=self.timestamp) for path in paths
736 |                     ]
737 |                     for datapoint in datapoints:
738 |                         dispatcher.signal('send', data=datapoint)
739 | 
740 |         data = ("{p}.haproxystats.MetricsServer {v} {t}\n"
741 |                 .format(p=self.graphite_path,
742 |                         v=cnt_metrics,
743 |                         t=self.timestamp))
744 |         dispatcher.signal('send', data=data)
745 | 
746 |         log.info('number of server metrics %s', cnt_metrics)
747 |         log.debug('finished processing statistics for servers')
748 | 
749 | 
750 |     def build_metric_patterns(self):
751 |         """Compile regexes from frontend- backend- and server-groups config.
752 | 
753 |         Builds a list of pairs (pattern_name, regex) to be used when sending
754 |         metrics. When a frontend, backend or server matches a given pattern, the
755 |         string in pattern_name can be inserted into the metric.
756 | 
757 |         This list is stored in the class variable 'metric_patterns'.
758 |         """
759 |         # Don't let Consumer instances run this at the same time
760 |         lock = Lock()
761 |         with lock:
762 |             for (section, patterns) in Consumer.metric_patterns.items():
763 |                 # Run only once
764 |                 if patterns:
765 |                     return
766 |                 config_section = "{}-groups".format(section)
767 |                 if config_section not in self.config.sections():
768 |                     continue
769 |                 for (name, pattern) in self.config.items(config_section):
770 |                     # Skip items inherited from the [DEFAULTS] section
771 |                     if name in self.config.defaults():
772 |                         continue
773 |                     try:
774 |                         regex = re.compile(pattern)
775 |                     except re.error as error:
776 |                         log.error('faied to compile %s pattern %s. Error: %s',
777 |                                   config_section, name, error)
778 |                     else:
779 |                         Consumer.metric_patterns[section].append((name, regex))
780 |             log.debug('built metric patterns %s', Consumer.metric_patterns)
781 | 
782 | 
783 |     def get_metric_paths(self, section, section_name):
784 |         """Return the graphite path(s) of a metric.
785 | 
786 |         When the name of a frontend or backend matches a given pattern, the
787 |         returned graphite path will include the name of the pattern, prefixed by
788 |         a string defined in the 'group-namespace' config setting. The list of
789 |         patterns and their names are defined in the 'frontend-groups',
790 |         'backend-groups' and 'server-groups' config sections.
791 | 
792 |         Additionally, if the config option 'group-namespace-double-writes' is
793 |         true, this function will return the default graphite path as well,
794 |         so every datapoint may be sent to graphite on both paths.
795 | 
796 |         If no groups are defined, or if there is no match for the given
797 |         frontend/backend name, it returns only the default graphite path.
798 | 
799 |         If two or more patterns match a frontend/backend name, only one will be
800 |         used: the first one declared in the config file.
801 | 
802 |         Arguments:
803 |             section (str): Either 'frontend', 'backend' or 'server'.
804 |             section_name (str): The name of said frontend/backend/server.
805 |         """
806 |         group = None
807 |         for (pattern_name, pattern) in Consumer.metric_patterns[section]:
808 |             if pattern.search(section_name):
809 |                 group = pattern_name
810 |                 break
811 |         if group is None:
812 |             return [self.graphite_path]
813 |         try:
814 |             path = Consumer.path_cache[section][section_name]
815 |         except KeyError:
816 |             # cache miss
817 |             group_namespace = self.config.get('graphite', 'group-namespace')
818 |             path = "{}.{}.{}".format(self.graphite_path, group_namespace, group)
819 |             Consumer.path_cache[section][section_name] = path
820 |         if self.double_writes:
821 |             return [path, self.graphite_path]
822 |         else:
823 |             return [path]
824 | 
825 | 
826 | def main():
827 |     """Parse CLI arguments and launches main program."""
828 |     args = docopt(__doc__, version=VERSION)
829 | 
830 |     config = ConfigParser(interpolation=ExtendedInterpolation())
831 |     # Set defaults for all sections
832 |     config.read_dict(copy.copy(DEFAULT_OPTIONS))
833 |     try:
834 |         config.read(args['--file'])
835 |     except ParsingError as exc:
836 |         sys.exit(str(exc))
837 | 
838 |     config_dir = args['--dir']
839 |     if config_dir is not None:
840 |         if not os.path.isdir(config_dir):
841 |             raise ValueError("{d} directory with .conf files doesn't exist"
842 |                              .format(d=config_dir))
843 |         else:
844 |             config_files = glob.glob(os.path.join(config_dir, '*.conf'))
845 |             try:
846 |                 config.read(config_files)
847 |             except ParsingError as exc:
848 |                 sys.exit(str(exc))
849 | 
850 |     incoming_dir = config.get('process', 'src-dir')
851 | 
852 |     if args['--print']:
853 |         for section in sorted(DEFAULT_OPTIONS):
854 |             if section == 'pull':
855 |                 continue
856 |             print("[{}]".format(section))
857 |             for key, value in sorted(DEFAULT_OPTIONS[section].items()):
858 |                 print("{k} = {v}".format(k=key, v=value))
859 |             print()
860 |         sys.exit(0)
861 |     if args['--print-conf']:
862 |         for section in sorted(config):
863 |             if section == 'pull':
864 |                 continue
865 |             print("[{}]".format(section))
866 |             for key, value in sorted(config[section].items()):
867 |                 print("{k} = {v}".format(k=key, v=value))
868 |             print()
869 |         sys.exit(0)
870 | 
871 |     try:
872 |         configuration_check(config, 'paths')
873 |         configuration_check(config, 'process')
874 |         configuration_check(config, 'graphite')
875 |         read_write_access(config.get('process', 'src-dir'))
876 |         check_metrics(config)
877 |     except ValueError as exc:
878 |         sys.exit(str(exc))
879 | 
880 |     tasks = multiprocessing.Queue()
881 |     handler = EventHandler(tasks=tasks)
882 |     notifier = pyinotify.Notifier(watcher, handler)
883 |     num_consumers = config.getint('process', 'workers')
884 |     incoming_dir = config.get('process', 'src-dir')
885 | 
886 |     loglevel =\
887 |         config.get('process', 'loglevel').upper()  # pylint: disable=no-member
888 |     log.setLevel(getattr(logging, loglevel, None))
889 | 
890 |     log.info('haproxystats-processs %s version started', VERSION)
891 |     # process incoming data which were retrieved while processing was stopped
892 |     for pathname in glob.iglob(incoming_dir + '/*'):
893 |         if os.path.isdir(pathname):
894 |             log.info('putting %s in queue', pathname)
895 |             tasks.put(pathname)
896 | 
897 |     def shutdown(signalnb=None, frame=None):
898 |         """Signal processes to exit.
899 | 
900 |         It adds STOP_SIGNAL to the queue, which causes processes to exit in a
901 |         clean way.
902 | 
903 |         Arguments:
904 |             signalnb (int): The ID of signal
905 |             frame (obj): Frame object at the time of receiving the signal
906 |         """
907 |         log.info('received %s at %s', signalnb, frame)
908 |         notifier.stop()
909 |         for _ in range(num_consumers):
910 |             log.info('sending stop signal to worker')
911 |             tasks.put(STOP_SIGNAL)
912 |         log.info('waiting for workers to finish their work')
913 |         for consumer in consumers:
914 |             consumer.join()
915 |         log.info('exiting')
916 |         sys.exit(0)
917 | 
918 |     # Register our graceful shutdown process to termination signals
919 |     signal.signal(signal.SIGHUP, shutdown)
920 |     signal.signal(signal.SIGTERM, shutdown)
921 | 
922 |     # Add our watcher
923 |     while True:
924 |         try:
925 |             log.info('adding a watch for %s', incoming_dir)
926 |             watcher.add_watch(incoming_dir, MASK, quiet=False, rec=False)
927 |         except pyinotify.WatchManagerError as error:
928 |             log.error('received error (%s), going to retry in few seconds',
929 |                       error)
930 |             time.sleep(3)
931 |         else:
932 |             break
933 | 
934 |     log.info('creating %d consumers', num_consumers)
935 |     consumers = [Consumer(tasks, config) for i in range(num_consumers)]
936 |     for consumer in consumers:
937 |         consumer.start()
938 | 
939 |     _thread = Checker(
940 |         consumers, config.getfloat('process', 'liveness-check-interval')
941 |     )
942 |     _thread.start()
943 |     log.info('watching %s directory for incoming data', incoming_dir)
944 |     notifier.loop(daemonize=False)
945 | 
946 | 
947 | if __name__ == '__main__':
948 |     main()
949 | 


--------------------------------------------------------------------------------
/haproxystats/pull.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim:fenc=utf-8
  3 | # pylint: disable=too-many-statements
  4 | # pylint: disable=too-many-arguments
  5 | # pylint: disable=too-many-branches
  6 | # pylint: disable=too-many-locals
  7 | #
  8 | """Pulls statistics from HAProxy daemon over UNIX/TCP socket(s).
  9 | 
 10 | Usage:
 11 |     haproxystats-pull [-f <file> ] [-p | -P]
 12 | 
 13 | Options:
 14 |     -f, --file <file>  configuration file with settings
 15 |                        [default: /etc/haproxystats.conf]
 16 |     -p, --print        show default settings
 17 |     -P, --print-conf   show configuration
 18 |     -h, --help         show this screen
 19 |     -v, --version      show version
 20 | """
 21 | import os
 22 | import asyncio
 23 | from concurrent.futures import ThreadPoolExecutor, ALL_COMPLETED
 24 | import sys
 25 | import time
 26 | import signal
 27 | import shutil
 28 | import logging
 29 | from functools import partial
 30 | from configparser import ConfigParser, ExtendedInterpolation, ParsingError
 31 | import copy
 32 | import glob
 33 | from urllib.parse import urlparse
 34 | from docopt import docopt
 35 | 
 36 | from haproxystats import __version__ as VERSION
 37 | from haproxystats import DEFAULT_OPTIONS
 38 | from haproxystats.utils import (is_unix_socket, CMD_SUFFIX_MAP,
 39 |                                 configuration_check)
 40 | 
 41 | LOG_FORMAT = ('%(asctime)s [%(process)d] [%(threadName)-10s:%(funcName)s] '
 42 |               '%(levelname)-8s %(message)s')
 43 | logging.basicConfig(format=LOG_FORMAT)
 44 | log = logging.getLogger('root')  # pylint: disable=I0011,C0103
 45 | CMDS = ['show info', 'show stat']
 46 | 
 47 | 
 48 | @asyncio.coroutine
 49 | def get(socket_name, cmd, storage_dir, loop, executor, config):
 50 |     """Fetch data from a UNIX and TCP socket.
 51 | 
 52 |     Sends a command to HAProxy over UNIX/TCP socket, reads the response and
 53 |     then offloads the writing of the received data to a thread, so we don't
 54 |     block this coroutine.
 55 | 
 56 |     Arguments:
 57 |         socket_name (str or tuple): Either the full path of the UNIX socket
 58 |         or a tuple with two elements, where 1st element is the host and the
 59 |         second is the port.
 60 |         cmd (str): The command to send.
 61 |         storage_dir (str): The full path of the directory to save the response.
 62 |         loop (obj): A base event loop from asyncio module.
 63 |         executor (obj): A Threader executor to execute calls asynchronously.
 64 |         config (obj): A configParser object which holds configuration.
 65 | 
 66 |     Returns:
 67 |         True if statistics from a UNIX/TCP sockets are saved False otherwise.
 68 | 
 69 |     """
 70 |     retries = config.getint('pull', 'retries')
 71 |     timeout = config.getfloat('pull', 'timeout')
 72 |     interval = config.getfloat('pull', 'interval')
 73 |     limit = config.getint('pull', 'buffer-limit')
 74 |     attempt = 0  # times to attempt a connect after a failure
 75 |     raised = None
 76 | 
 77 |     if isinstance(socket_name, str):
 78 |         socket_type = 'UNIX'
 79 |         address = socket_name
 80 |     elif isinstance(socket_name, tuple):
 81 |         host, port = socket_name
 82 |         address = "{h}:{p}".format(h=host, p=port)
 83 |         socket_type = 'TCP'
 84 | 
 85 |     log.debug('connecting to %s socket %s', socket_type, address)
 86 |     if retries == -1:
 87 |         attempt = -1  # -1 means retry indefinitely
 88 |     elif retries == 0:
 89 |         attempt = 1  # Zero means don't retry
 90 |     else:
 91 |         attempt = retries + 1  # any other value means retry N times
 92 |     while attempt != 0:
 93 |         if raised:  # an exception was raised sleep before the next retry
 94 |             log.error('caught "%s" when connecting to %s socket %s, '
 95 |                       'remaining tries %s, sleeping for %.2f seconds',
 96 |                       raised, socket_type, address, attempt, interval)
 97 |             yield from asyncio.sleep(interval)
 98 |         try:
 99 |             if socket_type == 'UNIX':
100 |                 connect = asyncio.open_unix_connection(address, limit=limit)
101 |             else:
102 |                 connect = asyncio.open_connection(host=host,
103 |                                                   port=port,
104 |                                                   limit=limit)
105 |             reader, writer = yield from asyncio.wait_for(connect, timeout)
106 |         except (ConnectionRefusedError, PermissionError, asyncio.TimeoutError,
107 |                 OSError) as exc:
108 |             raised = exc
109 |         else:
110 |             log.debug('connection established to %s socket %s',
111 |                       socket_type,
112 |                       address)
113 |             raised = None
114 |             break
115 | 
116 |         attempt -= 1
117 | 
118 |     if raised is not None:
119 |         log.error('failed to connect to %s socket %s after %s retries',
120 |                   socket_type, address, retries)
121 |         return False
122 |     else:
123 |         log.debug('connection established to %s socket %s',
124 |                   socket_type, address)
125 | 
126 |     log.debug('sending command "%s" to %s socket %s',
127 |               cmd,
128 |               socket_type,
129 |               address)
130 |     writer.write('{c}\n'.format(c=cmd).encode())
131 |     data = yield from reader.read()
132 |     writer.close()
133 | 
134 |     data_size = len(data)
135 |     if data_size == 0:
136 |         log.critical('received zero data')
137 |         return False
138 | 
139 |     log.debug('received %s bytes from %s socket %s',
140 |               data_size, socket_type, address)
141 | 
142 |     suffix = CMD_SUFFIX_MAP.get(cmd.split()[1])
143 |     if socket_type == 'UNIX':
144 |         filename = os.path.basename(address) + suffix
145 |     elif socket_type == 'TCP':
146 |         filename = address + suffix
147 | 
148 |     filename = os.path.join(storage_dir, filename)
149 |     log.debug('going to save data to %s', filename)
150 |     # Offload the writing to a thread so we don't block ourselves.
151 | 
152 |     def write_file():
153 |         """Write data to a file.
154 | 
155 |         Returns:
156 |             True if succeeds False otherwise.
157 | 
158 |         """
159 |         try:
160 |             with open(filename, 'w') as file_handle:
161 |                 file_handle.write(data.decode())
162 |         except OSError as exc:
163 |             log.critical('failed to write data %s', exc)
164 |             return False
165 |         else:
166 |             log.debug('data saved in %s', filename)
167 |             return True
168 | 
169 |     result = yield from loop.run_in_executor(executor, write_file)
170 | 
171 |     return result
172 | 
173 | 
174 | @asyncio.coroutine
175 | def pull_stats(config, storage_dir, loop, executor):
176 |     """Launch coroutines for pulling statistics from UNIX/TCP sockets.
177 | 
178 |     This a delegating routine.
179 | 
180 |     Arguments:
181 |         config (obj): A configParser object which holds configuration.
182 |         storage_dir (str): The absolute directory path to save the statistics.
183 |         loop (obj): A base event loop.
184 |         executor(obj): A ThreadPoolExecutor object.
185 | 
186 |     Returns:
187 |         True if statistics from all sockets are fetched False otherwise.
188 | 
189 |     """
190 |     results = []  # stores the result of finished tasks
191 |     sockets = []
192 |     pull_timeout = config.getfloat('pull', 'pull-timeout')
193 |     if int(pull_timeout) == 0:
194 |         pull_timeout = None
195 | 
196 |     if config.has_option('pull', 'socket-dir'):
197 |         socket_dir = config.get('pull', 'socket-dir')
198 |         socket_files = [f for f in glob.glob(socket_dir + '/*')
199 |                         if is_unix_socket(f)]
200 |         if not socket_files:
201 |             log.error("found zero UNIX sockets under %s to connect to",
202 |                       socket_dir)
203 |         else:
204 |             sockets.extend(socket_files)
205 | 
206 |     if config.has_option('pull', 'servers'):
207 |         servers = config.get('pull', 'servers').strip(',').split(',')
208 |         for server in servers:
209 |             url = urlparse(server.strip())
210 |             if url.scheme == 'unix':
211 |                 sockets.append(url.path)
212 |             elif url.scheme == 'tcp':
213 |                 sockets.append((url.hostname, url.port))
214 | 
215 |     if not sockets:
216 |         log.error("found zero UNIX and TCP sockets")
217 |         return False
218 | 
219 |     log.debug('pull statistics')
220 |     coroutines = [get(socket_name, cmd, storage_dir, loop, executor, config)
221 |                   for socket_name in sockets
222 |                   for cmd in CMDS]
223 |     # Launch all connections.
224 |     done, pending = yield from asyncio.wait(coroutines,
225 |                                             timeout=pull_timeout,
226 |                                             return_when=ALL_COMPLETED)
227 |     for task in done:
228 |         log.debug('task status: %s', task)
229 |         results.append(task.result())
230 | 
231 |     log.info('task report, done:%s pending:%s succeed:%s failed:%s',
232 |              len(done),
233 |              len(pending),
234 |              results.count(True),
235 |              results.count(False))
236 | 
237 |     for task in pending:
238 |         log.warning('cancelling task %s as it reached its timeout threshold of'
239 |                     ' %.2f seconds', task, pull_timeout)
240 |         task.cancel()
241 | 
242 |     # only when all tasks are finished successfully we claim success
243 |     return not pending and len(set(results)) == 1 and True in set(results)
244 | 
245 | 
246 | def supervisor(loop, config, executor):
247 |     """Coordinate the pulling of HAProxy statistics from UNIX/TCP sockets.
248 | 
249 |     This is the client routine which launches requests to all HAProxy
250 |     UNIX/TCP sockets for retrieving statistics and save them to file-system.
251 |     It runs indefinitely until main program is terminated.
252 | 
253 |     Arguments:
254 |         loop (obj): A base event loop from asyncio module.
255 |         config (obj): A configParser object which holds configuration.
256 |         executor(obj): A ThreadPoolExecutor object.
257 |     """
258 |     dst_dir = config.get('pull', 'dst-dir')
259 |     tmp_dst_dir = config.get('pull', 'tmp-dst-dir')
260 |     exit_code = 1
261 | 
262 |     interval = config.getint('pull', 'pull-interval')
263 |     start_offset = time.time() % interval
264 | 
265 |     while True:
266 |         timestamp = time.time()
267 |         log.debug('entering while loop')
268 |         try:
269 |             queue = [x for x in os.listdir(dst_dir)
270 |                      if os.path.isdir(os.path.join(dst_dir, x))]
271 |         except FileNotFoundError as exc:
272 |             log.warning('%s disappeared: %s. Going to create it', dst_dir, exc)
273 |             try:
274 |                 os.makedirs(dst_dir)
275 |             except OSError as exc:
276 |                 # errno 17 => file exists
277 |                 if exc.errno != 17:
278 |                     sys.exit("failed to make directory {d}:{e}"
279 |                              .format(d=dst_dir, e=exc))
280 |         else:
281 |             if len(queue) >= config.getint('pull', 'queue-size'):
282 |                 log.warning("queue reached max size of %s, pulling statistics "
283 |                             "is suspended", len(queue))
284 |                 # calculate sleep time
285 |                 sleep = start_offset - time.time() % interval
286 |                 if sleep < 0:
287 |                     sleep += interval
288 |                 log.info('sleeping for %.3fs secs', sleep)
289 |                 time.sleep(sleep)
290 |                 continue
291 |         # HAProxy statistics are stored in a directory and we use retrieval
292 |         # time(seconds since the Epoch) as a name of the directory.
293 |         # We first store them in a temporary place until we receive statistics
294 |         # from all UNIX/TCP sockets.
295 |         storage_dir = os.path.join(tmp_dst_dir, str(int(timestamp)))
296 | 
297 |         # Exit if our storage directory can't be created
298 |         try:
299 |             os.makedirs(storage_dir)
300 |         except OSError as exc:
301 |             # errno 17 => file exists
302 |             if exc.errno == 17:
303 |                 old_data_files = glob.glob(storage_dir + '/*')
304 |                 for old_file in old_data_files:
305 |                     log.info('removing old data file %s', old_file)
306 |                     os.remove(old_file)
307 |             else:
308 |                 msg = ("failed to make directory {d}:{e}"
309 |                        .format(d=storage_dir, e=exc))
310 |                 log.critical(msg)
311 |                 log.critical('a fatal error has occurred, exiting..')
312 |                 break
313 | 
314 |         try:
315 |             log.debug('launching delegating coroutine')
316 |             result = loop.run_until_complete(pull_stats(config, storage_dir,
317 |                                                         loop, executor))
318 |             log.debug('delegating coroutine finished')
319 |         except asyncio.CancelledError:
320 |             log.info('Received CancelledError exception')
321 |             exit_code = 0
322 |             break
323 | 
324 |         # if and only if we received statistics from all sockets then move
325 |         # statistics to the permanent directory.
326 |         # NOTE: when temporary and permanent storage directory are on the same
327 |         # file-system the move is actual a rename, which is an atomic
328 |         # operation.
329 |         if result:
330 |             log.debug('move %s to %s', storage_dir, dst_dir)
331 |             try:
332 |                 shutil.move(storage_dir, dst_dir)
333 |             except OSError as exc:
334 |                 log.critical("failed to move %s to %s: %s",
335 |                              storage_dir,
336 |                              dst_dir,
337 |                              exc)
338 |                 log.critical('a fatal error has occurred, exiting..')
339 |                 break
340 |             else:
341 |                 log.info('statistics are stored in %s',
342 |                          os.path.join(dst_dir, os.path.basename(storage_dir)))
343 |         else:
344 |             log.critical('failed to pull stats')
345 |             log.debug('removing temporary directory %s', storage_dir)
346 |             try:
347 |                 shutil.rmtree(storage_dir)
348 |             except (FileNotFoundError, PermissionError, OSError) as exc:
349 |                 log.error('failed to remove temporary directory %s with:%s',
350 |                           storage_dir,
351 |                           exc)
352 | 
353 |         log.info('wall clock time in seconds: %.3f', time.time() - timestamp)
354 |         # calculate sleep time
355 |         sleep = start_offset - time.time() % interval
356 |         if sleep < 0:
357 |             sleep += interval
358 |         log.info('sleeping for %.3fs secs', sleep)
359 |         time.sleep(sleep)
360 | 
361 |     # It is very unlikely that threads haven't finished their job by now, but
362 |     # they perform disk IO operations which can take some time in certain
363 |     # situations, thus we want to wait for them in order to perform a clean
364 |     # shutdown.
365 |     log.info('waiting for threads to finish any pending IO tasks')
366 |     executor.shutdown(wait=True)
367 |     log.info('closing asyncio event loop')
368 |     loop.close()
369 |     log.info('exiting with status %s', exit_code)
370 |     sys.exit(exit_code)
371 | 
372 | 
373 | def main():
374 |     """Parse CLI arguments and launch main program."""
375 |     args = docopt(__doc__, version=VERSION)
376 | 
377 |     config = ConfigParser(interpolation=ExtendedInterpolation())
378 |     # Set defaults for all sections
379 |     config.read_dict(copy.copy(DEFAULT_OPTIONS))
380 |     # Load configuration from a file. NOTE: ConfigParser doesn't warn if user
381 |     # sets a filename which doesn't exist, in this case defaults will be used.
382 |     try:
383 |         config.read(args['--file'])
384 |     except ParsingError as exc:
385 |         sys.exit(str(exc))
386 | 
387 |     if args['--print']:
388 |         for section in sorted(DEFAULT_OPTIONS):
389 |             if section == 'pull' or section == 'DEFAULT':
390 |                 print("[{}]".format(section))
391 |                 for key, value in sorted(DEFAULT_OPTIONS[section].items()):
392 |                     print("{k} = {v}".format(k=key, v=value))
393 |                 print()
394 |         sys.exit(0)
395 |     if args['--print-conf']:
396 |         for section in sorted(config):
397 |             if section == 'pull' or section == 'DEFAULT':
398 |                 print("[{}]".format(section))
399 |                 for key, value in sorted(config[section].items()):
400 |                     print("{k} = {v}".format(k=key, v=value))
401 |                 print()
402 |         sys.exit(0)
403 | 
404 |     try:
405 |         configuration_check(config, 'pull')
406 |     except ValueError as exc:
407 |         sys.exit(str(exc))
408 | 
409 |     loglevel = (config.get('pull', 'loglevel')  # pylint: disable=no-member
410 |                 .upper())
411 |     log.setLevel(getattr(logging, loglevel, None))
412 | 
413 |     log.info('haproxystats-pull %s version started', VERSION)
414 |     # Setup our event loop
415 |     loop = asyncio.get_event_loop()
416 |     executor = ThreadPoolExecutor(max_workers=config.getint('pull',
417 |                                                             'workers'))
418 |     # Register shutdown to signals
419 | 
420 |     def shutdown(signalname):
421 |         """Perform a clean shutdown.
422 | 
423 |         Arguments:
424 |             signalname (str): Signal name
425 |         """
426 |         tasks_running = False
427 |         log.info('received %s', signalname)
428 | 
429 |         for task in asyncio.Task.all_tasks():
430 |             if not task.done():
431 |                 tasks_running = True
432 |                 log.info('cancelling %s task', task)
433 |                 task.cancel()
434 | 
435 |         if not tasks_running:
436 |             log.info('no tasks were running when %s signal received', signal)
437 |             log.info('waiting for threads to finish any pending IO tasks')
438 |             executor.shutdown(wait=True)
439 |             sys.exit(0)
440 | 
441 |     loop.add_signal_handler(signal.SIGHUP, partial(shutdown, 'SIGHUP'))
442 |     loop.add_signal_handler(signal.SIGTERM, partial(shutdown, 'SIGTERM'))
443 | 
444 |     # a temporary directory to store fetched data
445 |     tmp_dst_dir = config['pull']['tmp-dst-dir']
446 |     # a permanent directory to move data from the temporary directory. Data are
447 |     # picked up by the process daemon from that directory.
448 |     dst_dir = config['pull']['dst-dir']
449 |     for directory in dst_dir, tmp_dst_dir:
450 |         try:
451 |             os.makedirs(directory)
452 |         except OSError as exc:
453 |             # errno 17 => file exists
454 |             if exc.errno != 17:
455 |                 sys.exit("failed to make directory {d}:{e}"
456 |                          .format(d=directory, e=exc))
457 |     supervisor(loop, config, executor)
458 | 
459 | 
460 | # This is the standard boilerplate that calls the main() function.
461 | if __name__ == '__main__':
462 |     main()
463 | 


--------------------------------------------------------------------------------
/haproxystats/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim:fenc=utf-8
  3 | # pylint: disable=too-many-instance-attributes
  4 | # pylint: disable=too-many-arguments
  5 | # pylint: disable=too-many-branches
  6 | """Provide functions, constants and classes that are used by haproxystats."""
  7 | import os
  8 | import stat
  9 | from collections import defaultdict, deque
 10 | from functools import wraps
 11 | import io
 12 | import socket
 13 | import shutil
 14 | import logging
 15 | import time
 16 | import configparser
 17 | import glob
 18 | import re
 19 | from urllib.parse import urlparse
 20 | import pyinotify
 21 | import pandas
 22 | 
 23 | from haproxystats.metrics import (MetricNamesPercentage, FRONTEND_METRICS,
 24 |                                   BACKEND_METRICS, BACKEND_AVG_METRICS,
 25 |                                   BACKEND_AVG_TIME_METRICS,
 26 |                                   SERVER_METRICS, SERVER_AVG_METRICS,
 27 |                                   SERVER_AVG_TIME_METRICS)
 28 | 
 29 | 
 30 | log = logging.getLogger('root')  # pylint: disable=I0011,C0103
 31 | 
 32 | FILE_SUFFIX_INFO = '_info'
 33 | FILE_SUFFIX_STAT = '_stat'
 34 | CMD_SUFFIX_MAP = {'info': FILE_SUFFIX_INFO, 'stat': FILE_SUFFIX_STAT}
 35 | 
 36 | OPTIONS_TYPE = {
 37 |     'paths': {
 38 |         'base-dir': 'get',
 39 |     },
 40 |     'pull': {
 41 |         'loglevel': 'get',
 42 |         'retries': 'getint',
 43 |         'timeout': 'getfloat',
 44 |         'interval': 'getfloat',
 45 |         'pull-timeout': 'getfloat',
 46 |         'pull-interval': 'getint',
 47 |         'buffer-limit': 'getint',
 48 |         'dst-dir': 'get',
 49 |         'tmp-dst-dir': 'get',
 50 |         'workers': 'getint',
 51 |         'queue-size': 'getint',
 52 |     },
 53 |     'process': {
 54 |         'workers': 'getint',
 55 |         'src-dir': 'get',
 56 |         'aggr-server-metrics': 'getboolean',
 57 |         'per-process-metrics': 'getboolean',
 58 |         'calculate-percentages': 'getboolean',
 59 |         'liveness-check-interval': 'getfloat',
 60 |     },
 61 |     'graphite': {
 62 |         'server': 'get',
 63 |         'port': 'getint',
 64 |         'retries': 'getint',
 65 |         'interval': 'getfloat',
 66 |         'connect-timeout': 'getfloat',
 67 |         'write-timeout': 'getfloat',
 68 |         'delay': 'getfloat',
 69 |         'backoff': 'getfloat',
 70 |         'namespace': 'get',
 71 |         'prefix-hostname': 'getboolean',
 72 |         'fqdn': 'getboolean',
 73 |         'queue-size': 'getint',
 74 |     },
 75 |     'local-store': {
 76 |         'dir': 'get',
 77 |     },
 78 | }
 79 | VALID_TCP_SOCKETS = [
 80 |     'tcp',
 81 |     'unix',
 82 | ]
 83 | 
 84 | 
 85 | class BrokenConnection(Exception):
 86 |     """A wrapper of all possible exception during a TCP connect."""
 87 | 
 88 |     def __init__(self, raised):
 89 |         """Initilaztion."""
 90 |         self.raised = raised
 91 | 
 92 |         super().__init__()
 93 | 
 94 | 
 95 | def load_file_content(filename):
 96 |     """Build list from the content of a file.
 97 | 
 98 |     Arguments:
 99 |         filename (str): A absolute path of a filename
100 | 
101 |     Returns:
102 |         A list
103 | 
104 |     """
105 |     commented = re.compile(r'\s*?#')
106 |     try:
107 |         with open(filename, 'r') as _file:
108 |             _content = [line.strip() for line in _file.read().splitlines()
109 |                         if not commented.match(line)]
110 |     except OSError as exc:
111 |         log.error('failed to read %s:%s', filename, exc)
112 |         return []
113 |     else:
114 |         return _content
115 | 
116 | 
117 | def is_unix_socket(path):
118 |     """Check if path is a valid UNIX socket.
119 | 
120 |     Arguments:
121 |         path (str): A file name path
122 | 
123 |     Returns:
124 |         True if path is a valid UNIX socket otherwise False.
125 | 
126 |     """
127 |     mode = os.stat(path).st_mode
128 | 
129 |     return stat.S_ISSOCK(mode)
130 | 
131 | 
132 | def concat_csv(csv_files):
133 |     """Perform a concatenation along several csv files.
134 | 
135 |     Arguments:
136 |         csv_files (lst): A list of csv files.
137 | 
138 |     Returns:
139 |         A pandas data frame object or None if fails to parse csv_files
140 | 
141 |     """
142 |     data_frames = []
143 |     for csv_file in csv_files:
144 |         try:
145 |             data_frame = pandas.read_csv(csv_file, low_memory=False)
146 |         except (ValueError, OSError) as exc:
147 |             log.error('Pandas failed to parse %s file with: %s', csv_file, exc)
148 |         else:
149 |             if not data_frame.empty:
150 |                 data_frames.append(data_frame)
151 |     if data_frames:
152 |         return pandas.concat(data_frames)
153 | 
154 |     return None
155 | 
156 | 
157 | def get_files(path, suffix):
158 |     """Return the filenames from a directory which match a suffix.
159 | 
160 |     Arguments:
161 |         path (str): Pathname
162 |         suffix (str): Suffix to match against
163 | 
164 |     Returns:
165 |         A list of filenames
166 | 
167 |     """
168 |     files = [filename
169 |              for filename in glob.glob(path + '/*{s}'.format(s=suffix))]
170 | 
171 |     return files
172 | 
173 | 
174 | def retry_on_failures(retries=3,
175 |                       interval=0.9,
176 |                       backoff=3,
177 |                       exceptions=(ConnectionResetError, ConnectionRefusedError,
178 |                                   ConnectionAbortedError, BrokenPipeError,
179 |                                   OSError),
180 |                       exception_to_raise=BrokenConnection):
181 |     """Perform a retry logic when an exception is raised by the decorated func.
182 | 
183 |     Arguments:
184 |         retries (int): Maximum times to retry
185 |         interval (float): Sleep this many seconds between retries
186 |         backoff (int): Multiply interval by this factor after each failure
187 |         exceptions (tuple): A list of exceptions to catch
188 |         exception_to_raise (obj): An exception to raise when maximum tries
189 |             have been reached.
190 | 
191 |     The decorator calls the function up to retries times if it raises an
192 |     exception from the tuple. The decorated function will only be retried if
193 |     it raises one of the specified exceptions.
194 |     """
195 |     def dec(func):
196 |         """Decorator.
197 | 
198 |         Arguments:
199 |             func (obj): A function to decorate
200 |         """
201 |         def decorated_func(*args, **kwargs):
202 |             """Retry decorated functions."""
203 |             backoff_interval = interval
204 |             raised = None
205 |             attempt = 0  # times to attempt a connect after a failure
206 |             if retries == -1:  # -1 means retry indefinitely
207 |                 attempt = -1
208 |             elif retries == 0:  # Zero means don't retry
209 |                 attempt = 1
210 |             else:  # any other value means retry N times
211 |                 attempt = retries + 1
212 |             while attempt != 0:
213 |                 if raised:
214 |                     log.error('caught "%s" at "%s", remaining tries %s, '
215 |                               'sleeping for %.2f seconds', raised,
216 |                               func.__name__, attempt, backoff_interval)
217 |                     time.sleep(backoff_interval)
218 |                     backoff_interval = backoff_interval * backoff
219 |                 try:
220 |                     return func(*args, **kwargs)
221 |                 except exceptions as error:
222 |                     raised = error
223 |                 else:
224 |                     raised = None
225 |                     break
226 | 
227 |                 attempt -= 1
228 | 
229 |             if raised:
230 |                 raise exception_to_raise(raised=raised)
231 | 
232 |         return decorated_func
233 | 
234 |     return dec
235 | 
236 | 
237 | class Dispatcher(object):
238 |     """Dispatch data to different handlers."""
239 | 
240 |     def __init__(self):
241 |         """Initilaztion."""
242 |         self.handlers = defaultdict(list)
243 | 
244 |     def register(self, signal, callback):
245 |         """Register a callback to a signal.
246 | 
247 |         Multiple callbacks can be assigned to the same signal.
248 | 
249 |         Arguments:
250 |             signal (str): The name of the signal
251 |             callbacl (obj): A callable object to call for the given signal.
252 |         """
253 |         self.handlers[signal].append(callback)
254 | 
255 |     def unregister(self, signal, callback):
256 |         """Unregister a callback to a signal.
257 | 
258 |         Arguments:
259 |             signal (str): The name of the signal
260 |             callbacl (obj): A callable object to call for the given signal.
261 |         """
262 |         try:
263 |             self.handlers[signal].remove(callback)
264 |         except ValueError:
265 |             log.debug('tried to unregister %s from unknown %s signal',
266 |                       callback, signal)
267 | 
268 |     def signal(self, signal, **kwargs):
269 |         """Run registered handlers.
270 | 
271 |         Arguments:
272 |             signal (str): A registered signal
273 |         """
274 |         if signal in self.handlers:
275 |             for handler in self.handlers.get(signal):
276 |                 handler(**kwargs)
277 | 
278 | 
279 | class GraphiteHandler():
280 |     """A handler to send data to graphite.
281 | 
282 |     Arguments:
283 |         server (str): Server name or IP address.
284 |         port (int): Port to connect to
285 |         retries (int): Numbers to retry on connection failure
286 |         interval (float): Time to sleep between retries
287 |         connect_timeout (float): Timeout on connection
288 |         write_timeout (float): Timeout on sending data
289 |         delay (float): Time to delay a connection attempt after last failure
290 |         backoff (float): Multiply interval by this factor after each failure
291 |         queue_size (int): Maximum size of the queue
292 |     """
293 | 
294 |     def __init__(self,
295 |                  server,
296 |                  port=3002,
297 |                  retries=1,
298 |                  interval=2,
299 |                  connect_timeout=1,
300 |                  write_timeout=1,
301 |                  delay=4,
302 |                  backoff=2,
303 |                  queue_size=1000000):
304 |         """Initilaztion."""
305 |         self.server = server
306 |         self.port = port
307 |         self.retries = retries
308 |         self.interval = interval
309 |         self.connect_timeout = connect_timeout
310 |         self.write_timeout = write_timeout
311 |         self.delay = delay
312 |         self.backoff = backoff
313 |         self.queue_size = queue_size
314 |         self.dqueue = deque([], maxlen=self.queue_size)
315 |         self.connection = None
316 |         self.timer = None
317 |         self.exceptions = (ConnectionResetError, ConnectionRefusedError,
318 |                            ConnectionAbortedError, BrokenPipeError, OSError,
319 |                            socket.timeout)
320 | 
321 |         log.debug('connect timeout %.2fsecs write timeout %.2fsecs',
322 |                   self.connect_timeout,
323 |                   self.write_timeout)
324 | 
325 |     def open(self):
326 |         """Open a connection to graphite relay."""
327 |         try:
328 |             self.connect()
329 |         except BrokenConnection as error:
330 |             self.connection = None
331 |             log.error('failed to connect to %s on port %s: %s',
332 |                       self.server,
333 |                       self.port,
334 |                       error.raised)
335 |         else:
336 |             self.connection.settimeout(self.write_timeout)
337 |             log.info('successfully connected to %s on port %s, TCP info %s',
338 |                      self.server,
339 |                      self.port,
340 |                      self.connection)
341 | 
342 |     @property
343 |     def connect(self):
344 |         """Wrap connection so we can pass arguments to decorator."""
345 |         @retry_on_failures(retries=self.retries,
346 |                            interval=self.interval,
347 |                            backoff=self.backoff,
348 |                            exceptions=self.exceptions,
349 |                            exception_to_raise=BrokenConnection)
350 |         def _create_connection():
351 |             """Try to open a connection.
352 | 
353 |             Exceptions are caught by the decorator which implements the retry
354 |             logic.
355 |             """
356 |             log.info('connecting to %s on port %s', self.server, self.port)
357 |             self.connection = socket.create_connection(
358 |                 (self.server, self.port),
359 |                 timeout=self.connect_timeout)
360 | 
361 |         return _create_connection
362 | 
363 |     def send(self, **kwargs):
364 |         """Send data to graphite relay."""
365 |         self.dqueue.appendleft(kwargs.get('data'))
366 | 
367 |         while len(self.dqueue) != 0:
368 |             item = self.dqueue.popleft()
369 |             try:
370 |                 self.connection.sendall(bytes(item, 'utf-8'))
371 |             # AttributeError means that open() method failed, all other
372 |             # exceptions indicate connection problems
373 |             except (AttributeError, BrokenPipeError, ConnectionResetError,
374 |                     ConnectionAbortedError, socket.timeout) as exc:
375 |                 self.dqueue.appendleft(item)
376 |                 # Only try to connect again if some time has passed
377 |                 if self.timer is None:
378 |                     self.timer = time.time()
379 |                     log.warning('graphite connection problem is detected')
380 |                     log.debug('timer is set to:%s', self.timer)
381 |                 elif time.time() - self.timer > self.delay:
382 |                     log.error('caught %s while sending data to graphite', exc)
383 |                     log.warning('%s secs since last failure', self.delay)
384 |                     log.info('TCP info: %s', self.connection)
385 |                     self.timer = None
386 | 
387 |                     if len(self.dqueue) == self.dqueue.maxlen:
388 |                         log.critical("graphite dispatcher queue is full, old "
389 |                                      "metrics will be dropped")
390 | 
391 |                     if not isinstance(exc, AttributeError):
392 |                         self.close()
393 |                     else:
394 |                         log.warning('connection is not available')
395 |                     self.open()
396 |                 return
397 |             except OSError as exc:
398 |                 self.dqueue.appendleft(item)
399 |                 # Unclear under which conditions we may get OSError
400 |                 log.warning('caught %s while sending data to graphite', exc)
401 |                 log.info('TCP info: %s', self.connection)
402 |                 self.close()
403 |                 self.open()
404 |                 return
405 |             else:
406 |                 # Consume all items from the local deque before return to
407 |                 # the caller. This causes a small delay to the caller at the
408 |                 # benefit of flushing data as soon as possible which avoids
409 |                 # gaps in graphs.
410 |                 continue
411 | 
412 |     def close(self, **kwargs):  # pylint: disable=unused-argument
413 |         """Close TCP connection to graphite relay."""
414 |         log.info('closing connection to %s on port %s', self.server, self.port)
415 |         log.info('TCP info: %s', self.connection)
416 |         try:
417 |             self.connection.close()
418 |         except (ConnectionRefusedError, ConnectionResetError, socket.timeout,
419 |                 ConnectionAbortedError) as exc:
420 |             log.warning('closing connection failed: %s', exc)
421 |         except (AttributeError, OSError) as exc:
422 |             log.critical('closing connection failed: %s. We should not receive'
423 |                          ' this exception, it is a BUG',
424 |                          exc)
425 |         else:
426 |             log.info('successfully closed connection to %s on port %s',
427 |                      self.server,
428 |                      self.port)
429 | 
430 | 
431 | dispatcher = Dispatcher()  # pylint: disable=I0011,C0103
432 | 
433 | 
434 | class FileHandler():
435 |     """A handler to write data to a file."""
436 | 
437 |     def __init__(self):
438 |         """Initilaztion."""
439 |         self._input = None
440 |         self._output = None
441 | 
442 |     def open(self):
443 |         """Build a stringIO object in memory ready to be used."""
444 |         self._input = io.StringIO()
445 | 
446 |     def send(self, **kwargs):
447 |         """Write data to a file-like object."""
448 |         self._input.write(kwargs.get('data'))
449 | 
450 |     def set_path(self, filepath):
451 |         """Set the filepath to send data.
452 | 
453 |         Arguments:
454 |             filepath (str): The pathname of the file
455 |         """
456 |         log.debug('filepath for local-store set to %s', filepath)
457 |         try:
458 |             self._output = open(filepath, 'w')
459 |         except (OSError, PermissionError) as error:
460 |             log.error('failed to create %s: %s', filepath, error)
461 | 
462 |     def loop(self, **kwargs):
463 |         """Rotate the file."""
464 |         base_dir = os.path.join(kwargs.get('local_store'),
465 |                                 kwargs.get('timestamp'))
466 |         try:
467 |             os.makedirs(base_dir)
468 |         except (OSError, PermissionError) as error:
469 |             # errno 17 => file exists
470 |             if error.errno != 17:
471 |                 log.error('failed to make directory %s: %s', base_dir, error)
472 |         self.set_path(filepath=os.path.join(base_dir, 'stats'))
473 | 
474 |     def flush(self, **kwargs):  # pylint: disable=unused-argument
475 |         """Flush data to disk."""
476 |         self._input.seek(0)
477 |         try:
478 |             shutil.copyfileobj(self._input, self._output)
479 |             self._output.flush()
480 |             self._output.close()
481 |         except (OSError, PermissionError, AttributeError) as error:
482 |             log.error('failed to flush data to file: %s', error)
483 |             self._input.close()
484 | 
485 |         self.open()
486 | 
487 | 
488 | class EventHandler(pyinotify.ProcessEvent):
489 |     """An event handler for inotify to push items to a queue.
490 | 
491 |     If the event isn't for a directory no action is taken.
492 | 
493 |     Arguments:
494 |         tasks (queue obj): A queue to put items.
495 |     """
496 | 
497 |     def my_init(self, tasks):  # pylint: disable=arguments-differ
498 |         """Initilaztion."""
499 |         self.tasks = tasks
500 | 
501 |     def _put_item_to_queue(self, pathname):
502 |         """Add item to queue if and only if the pathname is a directory."""
503 |         if os.path.isdir(pathname):
504 |             log.info('putting %s in queue', pathname)
505 |             self.tasks.put(pathname)
506 |         else:
507 |             log.info("ignore %s as it isn't directory", pathname)
508 | 
509 |     def process_IN_CREATE(self, event):  # pylint: disable=C0103
510 |         """Add an item to the queue when a directory is created."""
511 |         log.debug('received an event for CREATE')
512 |         self._put_item_to_queue(event.pathname)
513 | 
514 |     def process_IN_MOVED_TO(self, event):  # pylint: disable=C0103
515 |         """Add an item to the queue when a directory/file is moved."""
516 |         log.debug('received an event for MOVE')
517 |         self._put_item_to_queue(event.pathname)
518 | 
519 | 
520 | def configuration_check(config, section):
521 |     """Perform a sanity check on configuration.
522 | 
523 |     Arguments:
524 |         config (obg): A configparser object which holds our configuration.
525 |         section (str): Section name
526 | 
527 |     Raises:
528 |         ValueError on the first occureance of invalid configuration
529 | 
530 |     Returns:
531 |         None if all checks are successful.
532 | 
533 |     """
534 |     loglevel = config[section]['loglevel']
535 |     num_level = getattr(logging, loglevel.upper(), None)
536 |     if not isinstance(num_level, int):
537 |         raise ValueError("invalid configuration, section:'{s}' option:'{o}' "
538 |                          "error: invalid loglevel '{l}'"
539 |                          .format(s=section,
540 |                                  o='loglevel',
541 |                                  l=loglevel))
542 | 
543 |     for option, getter in OPTIONS_TYPE[section].items():
544 |         try:
545 |             getattr(config, getter)(section, option)
546 |         except (configparser.Error, ValueError) as exc:
547 |             # For some errors ConfigParser mentions section/option names and
548 |             # for others not. We want for all possible errors to mention
549 |             # section and option names in order to make the life of our user
550 |             # easier.
551 |             if 'section' not in str(exc):
552 |                 raise ValueError("invalid configuration, section:'{s}' "
553 |                                  "option:'{p}' error:{e}"
554 |                                  .format(s=section,
555 |                                          p=option,
556 |                                          e=str(exc)))
557 |             else:
558 |                 raise ValueError("invalid configuration, error:{e}"
559 |                                  .format(e=str(exc)))
560 | 
561 |     # asyncio.StreamReader does not accept a value less than 1.
562 |     if config.getint('pull', 'buffer-limit') < 1:
563 |         raise ValueError("invalid configuration, you can set a value less "
564 |                          "than 1 for 'buffer-limit' option of 'pull'section")
565 | 
566 |     if config.has_option('pull', 'socket-dir'):
567 |         try:
568 |             socket_dir = config.get('pull', 'socket-dir')
569 |         except (configparser.Error, ValueError) as exc:
570 |             raise ValueError("invalid configuration, error:{e}"
571 |                              .format(e=str(exc)))
572 |         else:
573 |             if not socket_dir:
574 |                 raise ValueError("invalid configuration, no value for option "
575 |                                  "'socket-dir'")
576 | 
577 |     if config.has_option('pull', 'servers'):
578 |         try:
579 |             servers = config.get('pull', 'servers').split(',')
580 |         except (configparser.Error, ValueError) as exc:
581 |             raise ValueError("invalid configuration, error:{e}"
582 |                              .format(e=str(exc)))
583 |         else:
584 |             if len(servers) == 1 and not servers[0]:
585 |                 raise ValueError("invalid configuration, no value for option "
586 |                                  "'servers' in the section 'pull'")
587 |             configuration_check_for_servers(servers)
588 | 
589 |     if section == 'process':
590 |         groups = {'frontend-groups', 'backend-groups', 'server-groups'}
591 |         configured_groups = groups.intersection(config.sections())
592 |         if config.has_option('graphite', 'group-namespace'):
593 |             try:
594 |                 config.getboolean('graphite', 'group-namespace-double-writes')
595 |             except (configparser.Error, ValueError) as exc:
596 |                 raise ValueError("invalid configuration, section:'graphite' "
597 |                                  "option:'group-namespace-double-writes' "
598 |                                  "error:{e}".format(e=exc))
599 |             if not configured_groups:
600 |                 raise ValueError("invalid configuration, at least one of these "
601 |                                  "sections should exist: {}".format(groups))
602 |         else:
603 |             if configured_groups:
604 |                 raise ValueError("invalid configuration, no value for option "
605 |                                  "'group-namespace' in the section 'graphite'")
606 | 
607 | 
608 | def configuration_check_for_servers(servers, option='servers', section='pull'):
609 |     """Perform a sanity check against the values for servers.
610 | 
611 |     Arguments:
612 |         servers (list): A list of servers.
613 |         option (str): The name of the option they belong to.
614 |         section (str): The name of the section the option is part of
615 | 
616 |     Raises:
617 |         ValueError on the first occureance of invalid configuration
618 | 
619 |     Returns:
620 |         None if all checks are successful.
621 | 
622 |     """
623 |     for server in servers:
624 |         server = server.strip()
625 |         if not server:
626 |             raise ValueError("invalid configuration, invalid value for '{o}' "
627 |                              "option of '{s}' section"
628 |                              .format(o=option, s=section))
629 |         try:
630 |             url = urlparse(server)
631 |         except ValueError as exc:
632 |             raise ValueError("invalid configuration, failed to parse '{o}' "
633 |                              "option of '{s}' section, error:{e}"
634 |                              .format(e=str(exc), o=option, s=section))
635 |         else:
636 |             if url.scheme not in VALID_TCP_SOCKETS:
637 |                 raise ValueError("invalid configuration, only unix and tcp "
638 |                                  "type of servers are supported set in '{o}' "
639 |                                  "option of '{s}' section"
640 |                                  .format(o=option, s=section))
641 | 
642 |             if url.scheme == 'tcp' and not url.port:
643 |                 raise ValueError("invalid configuration, port is not set in "
644 |                                  "'{o}' option of '{s}' section"
645 |                                  .format(o=option, s=section))
646 | 
647 |             if url.scheme == 'unix' and not url.path:
648 |                 raise ValueError("invalid configuration, path is not set in "
649 |                                  "'{o}' option of '{s}' section"
650 |                                  .format(o=option, s=section))
651 | 
652 | 
653 | def check_metrics(config):
654 |     """Check if metrics set by user are valid.
655 | 
656 |     Arguments:
657 |         config (obg): A configparser object which holds our configuration.
658 | 
659 |     Raises:
660 |         ValueError when metrics are not valid
661 | 
662 |     Returns:
663 |         None if all checks are successful.
664 | 
665 |     """
666 |     valid_metrics_per_option = {
667 |         'frontend-metrics': FRONTEND_METRICS,
668 |         'backend-metrics': BACKEND_METRICS + BACKEND_AVG_METRICS + BACKEND_AVG_TIME_METRICS,
669 |         'server-metrics': SERVER_METRICS + SERVER_AVG_METRICS + SERVER_AVG_TIME_METRICS,
670 |     }
671 |     for option, valid_metrics in valid_metrics_per_option.items():
672 |         user_metrics = config.get('process', option, fallback=None)
673 |         if user_metrics is not None:
674 |             metrics = set(user_metrics.split(' '))
675 |             if not metrics:
676 |                 break
677 |             if not set(valid_metrics).issuperset(metrics):
678 |                 raise ValueError("invalid configuration, section:'{s}' "
679 |                                  "option:'{p}' error:'{e}'"
680 |                                  .format(s='process',
681 |                                          p=option,
682 |                                          e='invalid list of metrics'))
683 | 
684 | 
685 | def read_write_access(directory):
686 |     """Check if read/write access is granted on a directory.
687 | 
688 |     Arguments:
689 |         directory (str): Directory name
690 | 
691 |     Raises:
692 |         OSError if either read or write access isn't granted
693 | 
694 |     Returns:
695 |         None if read/write access is granted
696 | 
697 |     """
698 |     check_file = os.path.join(directory, '.read_write_check')
699 |     try:
700 |         with open(check_file, 'w') as _file:
701 |             _file.write('')
702 |     except OSError as exc:
703 |         raise ValueError("invalid configuration, read and write access is not "
704 |                          "granted for '{d}' directory, error:{e}"
705 |                          .format(d=directory,
706 |                                  e=str(exc)))
707 |     else:
708 |         os.remove(check_file)
709 | 
710 | 
711 | def daemon_percentage_metrics():
712 |     """Build a list of namedtuples.
713 | 
714 |     Those namedtuples hold metric names for HAProxy daemon for which we
715 |     calculate a percentage.
716 |     """
717 |     _list = []
718 |     _list.append(MetricNamesPercentage(name='CurrConns',
719 |                                        limit='Maxconn',
720 |                                        title='ConnPercentage'))
721 |     _list.append(MetricNamesPercentage(name='ConnRate',
722 |                                        limit='ConnRateLimit',
723 |                                        title='ConnRatePercentage'))
724 |     _list.append(MetricNamesPercentage(name='CurrSslConns',
725 |                                        limit='MaxSslConns',
726 |                                        title='SslConnPercentage'))
727 |     _list.append(MetricNamesPercentage(name='SslRate',
728 |                                        limit='SslRateLimit',
729 |                                        title='SslRatePercentage'))
730 | 
731 |     return _list
732 | 
733 | 
734 | def calculate_percentage_per_row(row, metric):
735 |     """Calculate the percentage per row for 2 columns.
736 | 
737 |     It selects per row 2 columns, metric.name and metric.limit, out of the
738 |     dataframe and then calculate the percentage.
739 | 
740 |     Example where metric.name is 'CurrConns' and metric.limit is 'MaxConn'.
741 |     +-------------+---------+-----------+
742 |     |             | MaxConn | CurrConns |
743 |     +-------------+---------+-----------+
744 |     | Process_num |         |           |
745 |     | 0           | 300     | 13        |
746 |     | 1           | 300     | 15        |
747 |     | 2           | 300     | 11        |
748 |     +-------------+---------+-----------+
749 | 
750 |     It returns a Pandas Series with a column name set to metric.title
751 |     +-------------+----------------+
752 |     |             | ConnPercentage |
753 |     +-------------+----------------+
754 |     | Process_num |                |
755 |     | 0           | 13             |
756 |     | 1           | 15             |
757 |     | 2           | 11             |
758 |     +-------------+----------------+
759 | 
760 |     Arguments:
761 | 
762 |         dataframe (obj): Pandas dataframe with statistics for HAProxy workers
763 |         metric (tuple): A namedtuple of MetricNamesPercentage
764 | 
765 |     Returns:
766 |         A Pandas Series with percentage as integer
767 | 
768 |     """
769 |     if row[metric.limit] == 0:
770 |         return pandas.Series({metric.title: 0})
771 | 
772 |     return pandas.Series(
773 |         {
774 |             metric.title: (100 * row[metric.name]
775 |                            / row[metric.limit]).astype('int')
776 |         }
777 |     )
778 | 
779 | 
780 | def calculate_percentage_per_column(dataframe, metric):
781 |     """Calculate the percentage against 2 Pandas Series.
782 | 
783 |     It selects 2 columns, metric.name and metric.limit, out of the dataframe,
784 |     sums the values per column and then calculate the percentage.
785 | 
786 |     Example where metric.name is 'CurrConns' and metric.limit is 'MaxConn'.
787 |     It calculates the sum per column and the retuns the percentage of CurrConns
788 |     as part of Maxconn.
789 |     +---+---------+-----------+
790 |     |   | MaxConn | CurrConns |
791 |     +---+---------+-----------+
792 |     | 0 | 300     | 13        |
793 |     | 1 | 300     | 15        |
794 |     | 2 | 300     | 11        |
795 |     +---+---------+-----------+
796 | 
797 |     Arguments:
798 | 
799 |         dataframe (obj): Pandas dataframe with statistics for HAProxy workers
800 |         metric (tuple): A namedtuple of MetricNamesPercentage
801 | 
802 | 
803 |     Returns:
804 |         A percentage as integer
805 | 
806 |     """
807 |     _sum = dataframe.loc[:, [metric.name]].sum()[0]
808 |     _sum_limit = dataframe.loc[:, [metric.limit]].sum()[0]
809 |     if _sum_limit == 0:
810 |         return 0
811 | 
812 |     return int(100 * _sum / _sum_limit)
813 | 
814 | 
815 | def send_wlc(output, name):
816 |     """Send to graphite the wall clock time of the decorated  method.
817 | 
818 |     The decorated method must have the following attributes:
819 |         graphite_path (str): The graphite path to use for storing the metric
820 |         timestamp (int): Time to credit the wallclock time
821 | 
822 |     Arguments:
823 |         output (obj): A dispatcher object which has send method registered
824 |         name (str): A name to append to the metric.
825 |     """
826 |     def decorated(func):
827 |         """Decorator.
828 | 
829 |         Arguments:
830 |             func (obj): A function to decorate
831 |         """
832 |         @wraps(func)
833 |         def wrapper(self, *args, **kwargs):
834 |             """Time the execution of decorated function."""
835 |             start_time = time.time()
836 |             result = func(self, *args, **kwargs)
837 |             elapsed_time = '{t:.3f}'.format(t=time.time() - start_time)
838 |             data = ("{p}.haproxystats.{m} {v} {t}\n"
839 |                     .format(p=getattr(self, 'graphite_path'),
840 |                             m='WallClockTime' + name,
841 |                             v=elapsed_time,
842 |                             t=getattr(self, 'timestamp')))
843 |             log.info("wall clock time in seconds for %s %s",
844 |                      func.__name__,
845 |                      elapsed_time)
846 |             output.signal('send', data=data)
847 | 
848 |             return result
849 | 
850 |         return wrapper
851 | 
852 |     return decorated
853 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | docopt>=0.6.1
2 | pandas>=0.17.1
3 | pyinotify>=0.9.6
4 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = haproxystats
 3 | author = Pavlos Parissis
 4 | author-email = pavlos.parissis@gmail.com
 5 | maintainer = Pavlos Parissis
 6 | maintainer-email = pavlos.parissis@gmail.com
 7 | summary = A HAProxy statistics collection program
 8 | home-page: https://github.com/unixsurfer/haproxystats
 9 | license = Apache 2.0
10 | description-file = README.rst
11 | classifier =
12 |     Development Status :: 5 - Production/Stable
13 |         Environment :: Console
14 |         Intended Audience :: Information Technology
15 |         Intended Audience :: System Administrators
16 |         Natural Language :: English
17 |         Operating System :: POSIX
18 |         Programming Language :: Python :: 3.4
19 |         Topic :: Utilities
20 | keywords = haproxystats haproxy stats collector statistics
21 | 
22 | [files]
23 | packages =
24 |     haproxystats
25 | 
26 | [entry_points]
27 | console_scripts =
28 |      haproxystats-pull = haproxystats.pull:main
29 |      haproxystats-process = haproxystats.process:main
30 | 
31 | [pycodestyle]
32 | ignore = W503
33 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | 
3 | import setuptools
4 | 
5 | setuptools.setup(
6 |     setup_requires=['pbr'],
7 |     pbr=True)
8 | 


--------------------------------------------------------------------------------