├── .dockerignore ├── .github └── CODEOWNERS ├── ChangeLog ├── Dockerfile ├── LICENSE ├── README.md ├── cpanfile ├── lib ├── Pingmachine │ ├── Config.pm │ ├── Graph │ │ └── Smokeping.pm │ ├── Main.pm │ ├── Order.pm │ ├── Order │ │ ├── FPing.pm │ │ ├── HTTPing.pm │ │ ├── PPing.pm │ │ ├── SPing.pm │ │ └── SSH.pm │ ├── OrderList.pm │ ├── OrdersDirWatcher.pm │ ├── Probe.pm │ ├── Probe │ │ ├── FPing.pm │ │ ├── HTTPing.pm │ │ ├── PPing.pm │ │ ├── SPing.pm │ │ └── SSH.pm │ └── ProbeList.pm └── Smokeping │ └── Colorspace.pm ├── pingmachine ├── pingmachine-graph ├── pingmachine-status └── pingtest.pl /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | .github 3 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @open-ch/conf-mon 2 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | 2021-08-30 Konrad Buchlei 2 | * Release 1.6 3 | * httping probe: handle exit code 127 (connection actively refused) as 100% packet loss 4 | 5 | 2021-07-29 Konrad Bucheli 6 | * Release 1.5 7 | * httping probe: fix timeout 8 | * pingmachine-status: support for structured order folder 9 | 10 | 2021-05-26 Konrad Bucheli 11 | * Release 1.4 12 | * httping probe: allow to configure HTTP error codes to be considered as timeout (e.g. proxy related failures) 13 | 14 | 2021-01-21 Konrad Bucheli 15 | * Release 1.3 16 | * support for structured order folder 17 | * fixes parsing in httping probe 18 | 19 | 2021-01-11 Konrad Bucheli 20 | * Release 1.2 21 | * httping: added httping probe 22 | 23 | 2020-05-25 Konrad Bucheli 24 | * Release 1.1 25 | * allow to ping more than 5000 targets 26 | 27 | 2019-02-25 Daniel Aschwanden 28 | * Release 1.0 29 | * fping probe: fix pid checking 30 | 31 | 2018-12-28 Konrad Bucheli 32 | * Release 0.8 33 | * kill safeguard: check if process is the same we started before killing 34 | * scion support: added sping and pping probe 35 | * ssh probe: fix nmap timeout option 36 | 37 | 2018-03-07 Daniel Aschwanden 38 | * Release 0.7 39 | * fixes division by zero error 40 | 41 | 2017-07-01 Konrad Bucheli 42 | * Release 0.6 43 | * telegraf support: pingmachine can send metrics to the telegraf socket_listener 44 | * pingmachine-status: warn if last_result can't be loaded 45 | 46 | 2014-06-23 David Schweikert 47 | * Release 0.5 48 | * Improve support for IPv6 fping orders 49 | 50 | 2014-05-20 David Schweikert 51 | * Release 0.4 52 | * Add support for IPv6 fping orders 53 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM perl:5.34 2 | 3 | COPY . /app 4 | WORKDIR /app 5 | 6 | RUN apt-get update && apt-get install -y pkg-config gettext xml2 libpango1.0-dev libcairo2-dev groff-base fping httping 7 | 8 | RUN cpanm --installdeps . 9 | 10 | VOLUME /var/lib/pingmachine 11 | 12 | ENTRYPOINT [ "perl", "-I./lib", "./pingmachine" ] 13 | CMD [ "--debug" ] 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pingmachine 2 | =========== 3 | 4 | Pingmachine - Smokeping-like pinging framework 5 | 6 | Introduction 7 | ------------ 8 | Monitoring components often need to do latency/loss measurements by sending 9 | regularly some probes, typically ICMP ECHO (ping). Pingmachine was born out of 10 | the idea to have a common framework that can take care of doing this "pinging". 11 | That is useful, because pinging is more complicated than it seems: it needs to 12 | be done efficiently (don't for too much), asynchronously (pinging takes time) 13 | and data needs to be stored in a way that the most useful statistics can be 14 | extracted. 15 | 16 | The following were requirements when designing pingmachine (which might help 17 | understand the reason for its architecture): 18 | 19 | - Configurable targets 20 | - Extensible probing methods (not only ping) 21 | - High performance (should handle 10'000 targets) 22 | - Storage of the measurements distribution plus the median value, if multiple 23 | probes to the same target are sent at each iteration. 24 | - Consolidation using maximum, minimum and average functions. 25 | - Management of old targets data (the list of targets change rather frequently!) 26 | 27 | Smokeping vs. Pingmachine 28 | ------------------------- 29 | After initial tests with Smokeping, we decided to write our own framework for 30 | starting "ping-jobs" and collecting measurements. Our script is much simpler 31 | than Smokeping, which is a complete monitoring solution, including a Web-site 32 | to browse results. Nonetheless, "Pingmachine" is heavily inspired by Smokeping. 33 | The main differences are: 34 | 35 | - Smokeping is designed as a complete application, including the frontend. This 36 | is noticed in many places, for example in what must be configured for it to 37 | run at all (URL address, etc.) 38 | - The Smokeping configuration is not dynamic. If it changes, then Smokeping 39 | needs to be reloaded. This could be a problem for us if we want to use it for 40 | highly dynamic applications (sang?) 41 | - Generation of graphs doesn't work out of the box, because the graphs are 42 | generated by the CGI when displaying a page. We can't only ask Smokeping to 43 | generate a particular graph. 44 | 45 | Interface for Applications 46 | -------------------------- 47 | One of the things that pingmachine is optimized for, is high dynamicity of the 48 | configuration. What needs to be measured could change every minute and 49 | pingmachine should be able to handle it. 50 | 51 | 52 | ### Order Interface 53 | 54 | Pingmachine uses a directory where configuration 55 | "snippets" are put by whoever wants something to be done. These configuration 56 | snippets are called *orders* 57 | 58 | Pingmachine monitors the /orders and immediately picks up new and modified orders 59 | and does what is necessary to start with the measurements. If the order file is 60 | deleted, pingmachine will also automatically stop with that monitoring. Please do not put the order file directly into the orders directory (see old order interface), but in a subdirectory of your choise, eventually structured with further subdirectories as you like. 61 | 62 | The output produced by the monitoring work will be put in a separate directory 63 | (/output). It will reflect the structure of the input folder: 64 | 65 | .---> /orders/... >---, 66 | / \ 67 | Application---. .--- pingmachine <---> fping 68 | \ / 69 | `---< /output/... <---' 70 | 71 | 72 | #### Orders Specification 73 | 74 | An order is typically one target IP address that needs to be monitored. If an 75 | application needs to monitor an IP address, it just writes the corresponding 76 | order file in the "/orders" directory 77 | 78 | Orders also need to be periodically refreshed (at least once an hour), to make 79 | sure that pingmachine continues with the measurements. This mechanism is needed 80 | to make sure that no stale configuration has the consequence of monitoring an 81 | IP address indefinitely. 82 | 83 | A example order could be (in YAML): 84 | 85 | user: tmon 86 | task: tun_eth0_4061 87 | step: 300 88 | pings: 20 89 | probe: fping 90 | fping: 91 | host: 62.179.116.250 92 | 93 | To additionally forward the measurements to telegraf, the order can optionally be extended with measurement name and tags: 94 | 95 | measurement_name: tunnel 96 | tags: 97 | tunnel_id: 12458 98 | remote_host: 5292 99 | interface: eth2 100 | remote_interface: eth2 101 | 102 | The relative path from the orders/ directory is the "order id" and it is 103 | chosen by the client and that it is unique for all applications and all targets. 104 | We suggest to use a subdirectory per application and an application uniqe name 105 | for the order file. 106 | 107 | Be aware that there are file file system limitation for the number of links 108 | that an inode can have, therefore paths of arbitrary depth in orders/ are supported. 109 | 110 | The file-system tree could look as follows: 111 | 112 | /var/lib/pingmachine/ 113 | |---- orders/ 114 | | |---- app1/ 115 | | | |---- target1 116 | | | `---- target2 117 | | |----... 118 | 119 | Note that orders are to be considered dynamic configuration. 120 | The "users" (tmon, etc.) are usually high-level programs which 121 | will just install order as needed. Pingmachine will then do the measurements as 122 | instructed. In other words: the complete /var/lib/pingmachine 123 | directory can be completely deleted and recreated (with the loss of measured 124 | data, however). 125 | 126 | 127 | ### Old Order Interface 128 | 129 | To make this possible, pingmachine uses a directory where configuration 130 | "snippets" are put by whoever wants something to be done. These configuration 131 | snippets are called *orders* and *telegraf*. The configuration of one order or telegraf order is not allowed to 132 | change. Orders and telegraf orders can only be added or removed. 133 | 134 | Pingmachine monitors these /orders and /telegraf directories, immediately picks up new orders and telegraf orders, 135 | and does what is necessary to start with the measurements. If the order file is 136 | deleted, pingmachine will also automatically stop with that monitoring. 137 | 138 | The output produced by the monitoring work, will be put in a separate directory 139 | (/output), which can then be used by who give the order to fetch the data: 140 | 141 | .----> /orders >----, 142 | / /telegraf \ 143 | Application----. .---- pingmachine <----> fping 144 | \ / 145 | `----< /output <----' 146 | 147 | 148 | #### Orders and Telegraf Orders Specification 149 | 150 | An order is typically one target IP address that needs to be monitored. If an 151 | application needs to monitor an IP address, it just writes the corresponding 152 | order file in the "/orders" directory (more about the exact directory structure 153 | later). It additionally writes a corresponding telegraf file in the "telegraf" 154 | directory if the IP address belongs to a tunnel. 155 | 156 | Orders and telegraf orders also need to be periodically refreshed (at least once an hour), to make 157 | sure that pingmachine continues with the measurements. This mechanism is needed 158 | to make sure that no stale configuration has the consequence of monitoring an 159 | IP address indefinitely. 160 | 161 | A example order could be (in YAML): 162 | 163 | user: tmon 164 | task: tun_eth0_4061 165 | step: 300 166 | pings: 20 167 | probe: fping 168 | fping: 169 | host: 62.179.116.250 170 | 171 | A example telegraf order could be (in YAML): 172 | 173 | measurement_name: tunnel 174 | tags: 175 | tunnel_id: 12458 176 | remote_host: 5292 177 | interface: eth2 178 | remote_interface: eth2 179 | 180 | The file name (the order "id") is determined by calculating the md5 checksum on 181 | the file contents. This makes sure that different orders have different 182 | identifiers and also that, if the same order reappears, it is going to have the 183 | same id. The file name of the existing telegraf files is the same as the one of 184 | the corresponding order file. 185 | 186 | The file-system tree could look as follows: 187 | 188 | /var/lib/pingmachine/ 189 | |---- orders/ 190 | | |---- 6dd803dc5d29b72564467de7ddbfc695 191 | | `---- cd7d89acdba05cef56184db4a7b044ea 192 | |---- telegraf/ 193 | | |---- 6dd803dc5d29b72564467de7ddbfc695 194 | | `---- cd7d89acdba05cef56184db4a7b044ea 195 | 196 | Note that orders and telegraf orders are to be considered dynamic configuration. 197 | The "users" (tmon, etc.) are usually high-level programs which 198 | will just install order as needed. Pingmachine will then do the measurements as 199 | instructed. In other words: the complete /var/lib/pingmachine 200 | directory can be completely deleted and recreated (with the loss of measured 201 | data, however). 202 | 203 | Output 204 | ------ 205 | The generated RRD file is put in a order-specific directory created under the 206 | output tree. Each directory contains one rrd file: main.rrd. The definition of 207 | the RRD file is the same for all probes and its creation handled by the main 208 | script. This allows us to interchange probe types easily and to offload some 209 | work out of the probe modules. 210 | 211 | |---- output/ 212 | | |---- app1/ 213 | | | `---- target1/ 214 | | | |---- main.rrd 215 | | | `---- last_result 216 | | |---- 6dd803dc5d29b72564467de7ddbfc695/ 217 | | | |---- main.rrd 218 | | | `---- last_result 219 | | `---- ... 220 | 221 | Also, the output directory also contains a last_result file, with just the 222 | latest result of the pinging. It is meant to be used by programs that only need 223 | information about the latest ping job. The format of the file is as follows: 224 | 225 | time: 1310116500 226 | updated: 1310116524 227 | step: 20 228 | pings: 1 229 | loss: 0 230 | min: 5.700000e-04 231 | median: 5.700000e-04 232 | max: 5.700000e-04 233 | 234 | Archiving 235 | --------- 236 | When an order is explicitly deleted or has timed out, the corresponding output 237 | directory is moved to the archive directory: 238 | 239 | |---- archive/ 240 | | |---- app1 241 | | | `---- target2 242 | | | `---- main.rrd 243 | | |---- 2b45d6a19d2c3684767440fcb2f0b0c9/ 244 | | | `---- main.rrd 245 | | |---- ... 246 | 247 | As soon as the "order" file of an archived order is put again into the orders 248 | directory, pingmachine will move the output data into place again. It should 249 | not be possible to have both data in output and in archive for the same order. 250 | 251 | Supported Probes 252 | ---------------- 253 | ## fping 254 | 255 | fping: 256 | host: 213.156.230.57 257 | interface: eth0 258 | source_ip: 10.0.0.12 259 | 260 | Note that the source interface and IP are optional. The [fping](https://fping.org/) utility must be installed on the system. 261 | 262 | ## httping 263 | 264 | httping: 265 | url: http://www.example.com 266 | user_agent: pingmachine 267 | proxy: http://10.0.0.24:8080 268 | http_codes_as_failure: 403,407,503 269 | 270 | Note that the user_agent, the proxy and the http_codes_as_failure configuration are optional. The [httping](https://www.vanheusden.com/httping/) utility must be installed on the system. 271 | 272 | 273 | 274 | ## SCION SCMP and pingpong tool 275 | 276 | sping: 277 | host: 213.156.230.57 278 | interface: eth0 279 | source_ip: 10.0.0.12 280 | flags: 281 | 282 | 283 | pping: 284 | host: 213.156.230.57 285 | interface: eth0 286 | source_ip: 10.0.0.12 287 | flags: 288 | 289 | 290 | Sent metrics 291 | ------------ 292 | For every telegraf file, both metrics gathered by pingmachine and metrics 293 | provided in the telegraf file are sent 294 | 295 | Installation 296 | ------------ 297 | Required perl modules: 298 | - AE 299 | - AnyEvent 300 | - Log::Any 301 | - Log::Any::Adapter::Dispatch 302 | - InfluxDB::LineProtocol 303 | - IO::Socket 304 | - Mouse 305 | - MouseX::NativeTraits 306 | - RRDs (RRDtool) 307 | - Term::ANSIColor 308 | - Try::Tiny 309 | - YAML::XS 310 | 311 | License 312 | ------- 313 | See the LICENSE file for usage conditions. 314 | -------------------------------------------------------------------------------- /cpanfile: -------------------------------------------------------------------------------- 1 | requires 'AE'; 2 | requires 'AnyEvent','7.17'; 3 | requires 'AnyEvent::Util'; 4 | requires 'EV','4.33'; 5 | requires 'IO::Socket','1.46'; 6 | requires 'InfluxDB::LineProtocol','1.014'; 7 | requires 'Linux::Inotify2','2.3'; 8 | requires 'Log::Any','1.710'; 9 | requires 'Log::Any::Adapter','1.710'; 10 | requires 'Log::Any::Adapter::Dispatch','0.08'; 11 | requires 'Mouse','v2.5.10'; 12 | requires 'MouseX::NativeTraits','1.09'; 13 | requires 'Net::Server::Daemonize','0.06'; 14 | requires 'Params::Validate','1.30'; 15 | requires 'Term::ANSIColor','5.01'; 16 | requires 'Try::Tiny','0.30'; 17 | requires 'XML::Simple','2.25'; 18 | requires 'YAML::XS','0.83'; 19 | requires 'Alien::RRDtool','0.06'; 20 | -------------------------------------------------------------------------------- /lib/Pingmachine/Config.pm: -------------------------------------------------------------------------------- 1 | package Pingmachine::Config; 2 | 3 | use strict; 4 | 5 | use Log::Any qw($log); 6 | 7 | my $_base_dir = '/var/lib/pingmachine'; 8 | my $_orders_dir = $_base_dir . '/orders'; 9 | my $_telegraf_dir = $_base_dir . '/telegraf'; 10 | my $_output_dir = $_base_dir . '/output'; 11 | my $_archive_dir = $_base_dir . '/archive'; 12 | my $_orders_max_age = 3660; # 1 hour plus some margin 13 | 14 | sub orders_dir { return $_orders_dir; } 15 | sub telegraf_dir { return $_telegraf_dir; } 16 | sub output_dir { return $_output_dir; } 17 | sub archive_dir { return $_archive_dir; } 18 | sub orders_max_age { return $_orders_max_age; } 19 | 20 | sub base_dir { 21 | my ($class, $value) = @_; 22 | if(defined $value) { 23 | $_base_dir = $value; 24 | $_orders_dir = $_base_dir . '/orders'; 25 | $_telegraf_dir = $_base_dir . '/telegraf'; 26 | $_output_dir = $_base_dir . '/output'; 27 | $_archive_dir = $_base_dir . '/archive'; 28 | } 29 | return $_base_dir; 30 | } 31 | 32 | my ($host, $port); 33 | 34 | sub get_telegraf { return ($host, $port); } 35 | 36 | sub set_telegraf { 37 | my ($class, $value) = @_; 38 | if(defined $value) { 39 | ($host, $port) = split(':', $value); 40 | return ($host, $port); 41 | } 42 | return; 43 | } 44 | 45 | sub rras { 46 | my ($class, $rrd_template, $step) = @_; 47 | 48 | if($rrd_template eq 'smokeping' and $step < 300) { 49 | return [ 50 | [ 'AVERAGE', 0.5, 1, 900 ], 51 | 52 | [ 'AVERAGE', 0.5, 300/$step, 864 ], # 72 hours, 5 min. resolution (day) 53 | [ 'MIN', 0.5, 300/$step, 864 ], 54 | [ 'MAX', 0.5, 300/$step, 864 ], 55 | 56 | [ 'AVERAGE', 0.5, 1800/$step, 480 ], # 10 days, 30 min. resolution (week) 57 | [ 'MIN', 0.5, 1800/$step, 480 ], 58 | [ 'MAX', 0.5, 1800/$step, 480 ], 59 | 60 | [ 'AVERAGE', 0.5, 3600/$step, 960 ], # 40 days, 60 min. resolution (month) 61 | [ 'MIN', 0.5, 3600/$step, 960 ], 62 | [ 'MAX', 0.5, 3600/$step, 960 ], 63 | 64 | [ 'AVERAGE', 0.5, 12*3600/$step, 800 ], # 400 days, 12 h. resolution (year) 65 | [ 'MAX', 0.5, 12*3600/$step, 800 ], 66 | [ 'MIN', 0.5, 12*3600/$step, 800 ], 67 | 68 | [ 'AVERAGE', 0.5, 7*24*3600/$step, 520 ], # 3600 days, 7 d. resolution (10 years) 69 | [ 'MAX', 0.5, 7*24*3600/$step, 520 ], 70 | [ 'MIN', 0.5, 7*24*3600/$step, 520 ], 71 | ]; 72 | } 73 | elsif($rrd_template eq 'smokeping' and $step == 300) { 74 | # tuned RRAs for step 300 75 | return [ 76 | [ 'AVERAGE', 0.5, 1, 864 ], # 72 hours, 5 min. resolution (day) 77 | 78 | [ 'AVERAGE', 0.5, 6, 480 ], # 10 days, 30 min. resolution (week) 79 | [ 'MIN', 0.5, 6, 480 ], 80 | [ 'MAX', 0.5, 6, 480 ], 81 | 82 | [ 'AVERAGE', 0.5, 12, 960 ], # 40 days, 60 min. resolution (month) 83 | [ 'MIN', 0.5, 12, 960 ], 84 | [ 'MAX', 0.5, 12, 960 ], 85 | 86 | [ 'AVERAGE', 0.5, 144, 800 ], # 400 days, 12 h. resolution (year) 87 | [ 'MAX', 0.5, 144, 800 ], 88 | [ 'MIN', 0.5, 144, 800 ], 89 | 90 | [ 'AVERAGE', 0.5, 2016, 520 ], # 3600 days, 7 d. resolution (10 years) 91 | [ 'MAX', 0.5, 2016, 520 ], 92 | [ 'MIN', 0.5, 2016, 520 ], 93 | ]; 94 | } 95 | else { 96 | $log->warning("Unknown rrd_template/step, using Smokeping defaults (rrd_template: $rrd_template, $step)"); 97 | # Smokeping standard 98 | return [ 99 | [ 'AVERAGE', 0.5, 1, 1008 ], 100 | [ 'AVERAGE', 0.5, 12, 4320 ], 101 | [ 'MIN', 0.5, 12, 4320 ], 102 | [ 'MAX', 0.5, 12, 4320 ], 103 | [ 'AVERAGE', 0.5, 144, 720 ], 104 | [ 'MAX', 0.5, 144, 720 ], 105 | [ 'MIN', 0.5, 144, 720 ], 106 | ]; 107 | } 108 | }; 109 | 110 | 1; 111 | -------------------------------------------------------------------------------- /lib/Pingmachine/Graph/Smokeping.pm: -------------------------------------------------------------------------------- 1 | package Pingmachine::Graph::Smokeping; 2 | 3 | # Parts of this code are derived from Smokeping, with the 4 | # following license text: 5 | # 6 | # This program is free software; you can redistribute it 7 | # and/or modify it under the terms of the GNU General Public 8 | # License as published by the Free Software Foundation; either 9 | # version 2 of the License, or (at your option) any later 10 | # version. 11 | # 12 | # This program is distributed in the hope that it will be 13 | # useful, but WITHOUT ANY WARRANTY; without even the implied 14 | # warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 15 | # PURPOSE. See the GNU General Public License for more 16 | # details. 17 | # 18 | # You should have received a copy of the GNU General Public 19 | # License along with this program; if not, write to the Free 20 | # Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 21 | # 02139, USA. 22 | 23 | 24 | use strict; 25 | use RRDs; 26 | use Params::Validate qw(validate); 27 | use Smokeping::Colorspace; 28 | 29 | # helper package to produce Smokeping-like graphs 30 | 31 | sub _get_loss_colors { 32 | my ($pings) = @_; 33 | return { 34 | 0 => ['0', '#26ff00'], 35 | 1 => ["1/$pings", '#00b8ff'], 36 | 2 => ["2/$pings", '#0059ff'], 37 | 3 => ["3/$pings", '#5e00ff'], 38 | 4 => ["4/$pings", '#7e00ff'], 39 | int($pings/2) => [int($pings/2)."/$pings", '#dd00ff'], 40 | $pings-1 => [($pings-1)."/$pings", '#ff0000'], 41 | }; 42 | } 43 | 44 | sub _get_loss_bg_colors { 45 | my ($pings) = @_; 46 | my $lc = _get_loss_colors($pings); 47 | my %lcback; 48 | foreach my $key (keys %$lc) { 49 | if ($key == 0) { 50 | $lcback{$key} = ""; 51 | next; 52 | } 53 | my $web = $lc->{$key}[1]; 54 | my @rgb = Smokeping::Colorspace::web_to_rgb($web); 55 | my @hsl = Smokeping::Colorspace::rgb_to_hsl(@rgb); 56 | $hsl[2] = (1 - $hsl[2]) * (2/3) + $hsl[2]; 57 | @rgb = Smokeping::Colorspace::hsl_to_rgb(@hsl); 58 | $web = Smokeping::Colorspace::rgb_to_web(@rgb); 59 | $lcback{$key} = $web; 60 | } 61 | return \%lcback; 62 | } 63 | 64 | sub _findmax { 65 | my ($rrd, $timespan) = @_; 66 | 67 | # fetch max of median ping 68 | my ($graphret,$xs,$ys) = RRDs::graph( 69 | "dummy", 70 | '--start', -$timespan, 71 | "DEF:maxping=${rrd}:median:AVERAGE", 72 | 'PRINT:maxping:MAX:%le' 73 | ); 74 | my $ERR=RRDs::error; die "Error while graphing $rrd: $ERR\n" if $ERR; 75 | my $val = $graphret->[0]; 76 | $val = 0 if $val =~ /nan/i; 77 | 78 | return $val * 1.3; # leave about 20% space above the maximum median, so 79 | # that we have some room to show the smoke 80 | } 81 | 82 | sub _smokecol { 83 | my $count = shift; 84 | return [] unless $count > 2; 85 | my $half = $count/2; 86 | my @items; 87 | my $itop=$count; 88 | my $ibot=1; 89 | for (; $itop > $ibot; $itop--,$ibot++){ 90 | my $color = int(190/$half * ($half-$ibot))+50; 91 | push @items, "CDEF:smoke${ibot}=cp${ibot},UN,UNKN,cp${itop},cp${ibot},-,IF"; 92 | push @items, "AREA:cp${ibot}"; 93 | push @items, "STACK:smoke${ibot}#".(sprintf("%02x",$color) x 3); 94 | }; 95 | return \@items; 96 | } 97 | 98 | sub _graph_all { 99 | my ($p, $max) = @_; 100 | 101 | my $lc = _get_loss_colors($p->{pings}); 102 | my $lcback = _get_loss_bg_colors($p->{pings}); 103 | 104 | my @g; 105 | my @aftersmoke; 106 | 107 | # Median value 108 | push @g, "VDEF:avmed=median,AVERAGE"; 109 | push @g, 'GPRINT:median:LAST:Median RTT Current\: %.1lf %ss\t'; 110 | push @g, 'GPRINT:median:MAX:Max\: %.1lf %ss\t'; 111 | push @g, 'GPRINT:avmed:Average\: %.1lf %ss\l'; 112 | push @g, "LINE1:median#202020"; 113 | 114 | # Loss value 115 | push @g, "CDEF:ploss=loss,$p->{pings},/,100,*"; 116 | push @g, 'GPRINT:ploss:LAST:Packet Loss Current\: %.2lf %%\t'; 117 | push @g, 'GPRINT:ploss:MAX:Max\: %.2lf %%\t'; 118 | push @g, 'GPRINT:ploss:AVERAGE:Average\: %.2lf %% \l'; 119 | push @g, 'COMMENT:Packet Loss'; 120 | 121 | my $last = -1; 122 | foreach my $loss (sort {$a <=> $b} keys %$lc){ 123 | next if $loss >= $p->{pings}; 124 | my $lvar = $loss; $lvar =~ s/\./d/g ; 125 | 126 | # Median color (loss) 127 | my $yscale = $max / $p->{height}; 128 | push @aftersmoke, "CDEF:me$lvar=loss,$last,GT,loss,$loss,LE,*,1,UNKN,IF,median,*"; 129 | push @aftersmoke, "CDEF:meL$lvar=me$lvar,$yscale,-"; 130 | push @aftersmoke, "CDEF:meH$lvar=me$lvar,0,*,$yscale,2,*,+"; 131 | push @aftersmoke, "AREA:meL$lvar"; 132 | push @aftersmoke, "STACK:meH$lvar$lc->{$loss}[1]:$lc->{$loss}[0]"; 133 | 134 | # Background color (loss) 135 | push @g, "CDEF:lossbg$lvar=loss,$last,GT,loss,$loss,LE,*,INF,UNKN,IF"; 136 | push @g, "AREA:lossbg$lvar$lcback->{$loss}"; 137 | 138 | push @aftersmoke, 139 | "CDEF:lossbgs$lvar=loss,$last,GT,loss,$loss,LE,*,cp2,UNKN,IF"; 140 | push @aftersmoke, 141 | "AREA:lossbgs$lvar$lcback->{$loss}"; 142 | 143 | $last = $loss; 144 | } 145 | 146 | 147 | # Smoke 148 | my $smoke = $p->{pings} >= 3 ? _smokecol $p->{pings} : 149 | [ 'COMMENT:(Not enough pings to draw any smoke.)\s', 'COMMENT:\s' ]; 150 | push @g, @$smoke; 151 | 152 | # Finish background color 153 | push @g, @aftersmoke; 154 | 155 | push @g, 'COMMENT: \l'; 156 | 157 | return @g; 158 | } 159 | 160 | sub graph { 161 | my $class = shift; 162 | 163 | # Validate and extract parameters 164 | my %p = validate(@_, { 165 | rrd => 1, 166 | img => 1, 167 | timespan => 1, 168 | pings => 1, 169 | width => 1, 170 | height => 1, 171 | title => 0, 172 | }); 173 | 174 | # Do some needed calculations 175 | my $max = _findmax($p{rrd}, $p{timespan}); 176 | 177 | # Base RRDs::graph parameters 178 | my @g = ( 179 | '--width', $p{width}, 180 | '--height', $p{height}, 181 | '--alt-y-grid', 182 | '--rigid', 183 | '--lower-limit','0', 184 | '--upper-limit', $max, 185 | '--start', "-$p{timespan}s", 186 | '--color', 'SHADEA#ffffff', 187 | '--color', 'SHADEB#ffffff', 188 | '--color', 'BACK#ffffff', 189 | '--color', 'CANVAS#ffffff', 190 | '--units-exponent', -3, 191 | "DEF:median=$p{rrd}:median:AVERAGE", 192 | "DEF:loss=$p{rrd}:loss:AVERAGE", 193 | (map {"DEF:ping${_}=$p{rrd}:ping${_}:AVERAGE"} 1..$p{pings}), 194 | (map {"CDEF:cp${_}=ping${_},$max,LT,ping${_},INF,IF"} 1..$p{pings}), 195 | ); 196 | push @g, '--title', $p{title} if $p{title}; 197 | 198 | # Loss 199 | push @g, _graph_all(\%p, $max); 200 | 201 | # Do the graph 202 | RRDs::graph( 203 | $p{img}, 204 | @g, 205 | ); 206 | my $ERR=RRDs::error; die "Error while graphing $p{rrd}: $ERR\n" if $ERR; 207 | } 208 | 209 | 1; 210 | -------------------------------------------------------------------------------- /lib/Pingmachine/Main.pm: -------------------------------------------------------------------------------- 1 | package Pingmachine::Main; 2 | 3 | # This modules encapsulates the main program logic. 4 | 5 | use Any::Moose; 6 | use Carp; 7 | use AnyEvent; 8 | use Log::Any qw($log); 9 | use Fcntl qw(:flock); 10 | use File::Path; 11 | 12 | use Pingmachine::OrderList; 13 | use Pingmachine::OrdersDirWatcher; 14 | use Pingmachine::ProbeList; 15 | 16 | # AnyEvent condvar: "Quit now" 17 | has 'ae_quit_cv' => ( 18 | isa => 'AnyEvent::CondVar', 19 | is => 'ro', 20 | lazy => 1, 21 | default => sub { AnyEvent->condvar }, 22 | ); 23 | 24 | # AnyEvent watchers 25 | has 'ae_watchers' => ( 26 | traits => ['Array'], 27 | isa => 'ArrayRef', 28 | is => 'ro', 29 | default => sub { [] }, 30 | handles => { 31 | add_ae_watcher => 'push', 32 | }, 33 | ); 34 | 35 | has 'order_list' => ( 36 | isa => 'Pingmachine::OrderList', 37 | is => 'ro', 38 | lazy => 1, 39 | default => sub { 40 | my $self = shift; 41 | my $ol = Pingmachine::OrderList->new(); 42 | $ol->register_add_order_cb(sub { 43 | $self->add_order_cb($_[0]); 44 | }); 45 | $ol->register_remove_order_cb(sub { 46 | $self->remove_order_cb($_[0]); 47 | }); 48 | return $ol; 49 | }, 50 | ); 51 | 52 | has 'probe_list' => ( 53 | isa => 'Pingmachine::ProbeList', 54 | is => 'ro', 55 | lazy => 1, 56 | default => sub { 57 | my $self = shift; 58 | my $ol = Pingmachine::ProbeList->new(); 59 | return $ol; 60 | }, 61 | ); 62 | 63 | has 'orders_dir_watcher' => ( 64 | isa => 'Pingmachine::OrdersDirWatcher', 65 | is => 'ro', 66 | writer => '_set_orders_dir_watcher', 67 | ); 68 | 69 | has 'lock' => ( 70 | is => 'ro', 71 | isa => 'FileHandle', 72 | lazy => 1, 73 | default => sub { 74 | my $lock_fh; 75 | my $basedir = Pingmachine::Config->base_dir; 76 | my $lockfile = $basedir . '/' . '.lock'; 77 | open($lock_fh, '>', $lockfile) or 78 | log_die("can't write lock file $lockfile: $!\n"); 79 | flock($lock_fh, LOCK_NB | LOCK_EX) or 80 | log_die("can't lock base directory $basedir. Is pingmachine running already?\n"); 81 | return $lock_fh; 82 | }, 83 | ); 84 | 85 | 86 | # log_die: we need this to properly log before the event loop starts 87 | sub log_die { 88 | $log->fatal("$_[0]"); 89 | exit(1); 90 | } 91 | 92 | sub _create_dir_structure { 93 | # Create needed directories: 94 | # - orders 95 | my $orders_dir = Pingmachine::Config->orders_dir; 96 | mkpath($orders_dir) unless -d $orders_dir; 97 | # - telegraf 98 | my $telegraf_dir = Pingmachine::Config->telegraf_dir; 99 | mkpath($telegraf_dir) unless -d $telegraf_dir; 100 | # - output 101 | my $output_dir = Pingmachine::Config->output_dir; 102 | mkpath($output_dir) unless -d $output_dir; 103 | # - archive 104 | my $archive_dir = Pingmachine::Config->archive_dir; 105 | mkpath($archive_dir) unless -d $archive_dir; 106 | } 107 | 108 | sub run { 109 | my ($self) = @_; 110 | 111 | # Lock 112 | $self->lock; 113 | 114 | # Create basedir structure 115 | $self->_create_dir_structure; 116 | 117 | # Install signal watchers for SIGINT and SIGTERM 118 | $self->add_ae_watcher( 119 | AnyEvent->signal(signal => "INT", cb => sub { $self->ae_quit_cv->send() }) 120 | ); 121 | $self->add_ae_watcher( 122 | AnyEvent->signal(signal => "TERM", cb => sub { $self->ae_quit_cv->send() }) 123 | ); 124 | 125 | # Create OrdersDirWatcher object (watches /orders) 126 | $self->_set_orders_dir_watcher( 127 | Pingmachine::OrdersDirWatcher->new(order_list => $self->order_list) 128 | ); 129 | 130 | # Log that we started 131 | $log->info("pingmachine started"); 132 | $self->update_process_name(); 133 | 134 | # Enter event loop 135 | $self->ae_quit_cv->recv(); 136 | 137 | # Log that we stopped 138 | $log->info("pingmachine stopped"); 139 | } 140 | 141 | sub add_order_cb { 142 | my ($self, $order) = @_; 143 | 144 | $log->info("new order: ".$order->nice_name); 145 | $self->probe_list->add_order($order); 146 | $self->update_process_name(); 147 | } 148 | 149 | sub remove_order_cb { 150 | my ($self, $order) = @_; 151 | 152 | $log->info("removed order: ".$order->nice_name); 153 | $self->probe_list->remove_order($order); 154 | $self->update_process_name(); 155 | } 156 | 157 | sub update_process_name { 158 | my ($self) = @_; 159 | $0 = "pingmachine [orders: ".$self->order_list->count."]"; 160 | } 161 | 162 | __PACKAGE__->meta->make_immutable; 163 | 164 | 1; 165 | -------------------------------------------------------------------------------- /lib/Pingmachine/Order.pm: -------------------------------------------------------------------------------- 1 | package Pingmachine::Order; 2 | 3 | # This module represents a single Pingmachine order description. 4 | # It also responsible for creating/updating the corresponding RRD file 5 | 6 | use Any::Moose; 7 | use Any::Moose '::Util::TypeConstraints'; 8 | use IO::Socket; 9 | use AnyEvent::Util qw(fh_nonblocking); 10 | use AnyEvent; 11 | use Log::Any qw($log); 12 | use RRDs; 13 | use File::Path; 14 | use File::Temp qw(tempfile); 15 | use File::Copy qw(move); 16 | use YAML::XS qw(LoadFile); 17 | use InfluxDB::LineProtocol qw(data2line); 18 | 19 | use Pingmachine::Config; 20 | use Pingmachine::Order::FPing; 21 | use Pingmachine::Order::SPing; 22 | use Pingmachine::Order::PPing; 23 | use Pingmachine::Order::HTTPing; 24 | use Pingmachine::Order::SSH; 25 | 26 | has 'id' => ( 27 | isa => 'Str', 28 | is => 'ro', 29 | required => 1 30 | ); 31 | 32 | has 'user' => ( 33 | isa => 'Str', 34 | is => 'ro', 35 | required => 1 36 | ); 37 | 38 | has 'step' => ( 39 | isa => 'Int', 40 | is => 'ro', 41 | required => 1 42 | ); 43 | 44 | has 'pings' => ( 45 | isa => 'Int', 46 | is => 'ro', 47 | required => 1 48 | ); 49 | 50 | has 'probe' => ( 51 | isa => 'Str', 52 | is => 'ro', 53 | required => 1 54 | ); 55 | 56 | has 'rrd_template' => ( 57 | isa => 'Str', 58 | is => 'ro', 59 | default => sub { return "smokeping" }, 60 | # other values: norrd 61 | ); 62 | 63 | has 'order_file' => ( 64 | isa => 'Str', 65 | is => 'ro', 66 | required => 1, 67 | ); 68 | 69 | has 'telegraf_file' => ( 70 | isa => 'Str', 71 | is => 'ro', 72 | ); 73 | 74 | has 'telegraf' => ( 75 | isa => 'HashRef', 76 | is => 'ro', 77 | default => sub { return {} }, 78 | ); 79 | 80 | has 'my_output_dir' => ( 81 | isa => 'Str', 82 | is => 'ro', 83 | lazy => 1, 84 | default => sub { 85 | my $self = shift; 86 | return Pingmachine::Config->output_dir . '/' . $self->id 87 | }, 88 | ); 89 | 90 | has 'my_archive_dir' => ( 91 | isa => 'Str', 92 | is => 'ro', 93 | lazy => 1, 94 | default => sub { 95 | my $self = shift; 96 | return Pingmachine::Config->archive_dir . '/' . $self->id 97 | }, 98 | ); 99 | 100 | has 'is_archived' => ( 101 | isa => 'Bool', 102 | is => 'ro', 103 | writer => '_set_is_archived', 104 | default => sub { return 0 }, 105 | ); 106 | 107 | class_type 'Pingmachine::Order::FPing'; 108 | coerce 'Pingmachine::Order::FPing', from 'HashRef', 109 | via { Pingmachine::Order::FPing->new(%$_) }; 110 | 111 | has 'fping' => ( 112 | isa => 'Pingmachine::Order::FPing', 113 | is => 'ro', 114 | coerce => 1, 115 | ); 116 | 117 | class_type 'Pingmachine::Order::SPing'; 118 | coerce 'Pingmachine::Order::SPing', from 'HashRef', 119 | via { Pingmachine::Order::SPing->new(%$_) }; 120 | 121 | has 'sping' => ( 122 | isa => 'Pingmachine::Order::SPing', 123 | is => 'ro', 124 | coerce => 1, 125 | ); 126 | 127 | class_type 'Pingmachine::Order::PPing'; 128 | coerce 'Pingmachine::Order::PPing', from 'HashRef', 129 | via { Pingmachine::Order::PPing->new(%$_) }; 130 | 131 | has 'pping' => ( 132 | isa => 'Pingmachine::Order::PPing', 133 | is => 'ro', 134 | coerce => 1, 135 | ); 136 | 137 | class_type 'Pingmachine::Order::HTTPing'; 138 | coerce 'Pingmachine::Order::HTTPing', from 'HashRef', 139 | via { Pingmachine::Order::HTTPing->new(%$_) }; 140 | 141 | has 'httping' => ( 142 | isa => 'Pingmachine::Order::HTTPing', 143 | is => 'ro', 144 | coerce => 1, 145 | ); 146 | 147 | class_type 'Pingmachine::Order::SSH'; 148 | coerce 'Pingmachine::Order::SSH', from 'HashRef', 149 | via { Pingmachine::Order::SSH->new(%$_) }; 150 | 151 | has 'ssh' => ( 152 | isa => 'Pingmachine::Order::SSH', 153 | is => 'ro', 154 | coerce => 1, 155 | ); 156 | 157 | 158 | sub BUILD { 159 | my $self = shift; 160 | my $step = $self->step; 161 | my $pings = $self->pings; 162 | 163 | # Check parameters 164 | ($step / $pings) >= 1 or 165 | die "step / pings must be > 1 (step: $step, pings: $pings)"; 166 | 167 | # If the order exists in the archive directory, then restore it 168 | my $archive_dir = $self->my_archive_dir; 169 | if(-d $archive_dir) { 170 | $self->unarchive(); 171 | } 172 | 173 | my $outdir = $self->my_output_dir; 174 | mkpath($outdir) unless -d $outdir; 175 | 176 | $self->_rrd_create() unless $self->rrd_template eq 'norrd'; 177 | }; 178 | 179 | sub nice_name { 180 | my ($self) = @_; 181 | if($self->probe eq 'fping') { 182 | return $self->id . ' (user: ' . $self->user . ', fping: ' . $self->fping->host . ')'; 183 | } 184 | elsif($self->probe eq 'sping') { 185 | return $self->id . ' (user: ' . $self->user . ', sping: ' . $self->sping->host . ')'; 186 | } 187 | elsif($self->probe eq 'pping') { 188 | return $self->id . ' (user: ' . $self->user . ', pping: ' . $self->pping->host . ')'; 189 | } 190 | elsif($self->probe eq 'httping') { 191 | return $self->id . ' (user: ' . $self->user . ', httping: ' . $self->httping->url . ')'; 192 | } 193 | elsif($self->probe eq 'ssh') { 194 | return $self->id . ' (user: ' . $self->user . ', ssh: ' . $self->ssh->host . ')'; 195 | } 196 | else { 197 | return $self->id . ' (user: ' . $self->user . ')'; 198 | } 199 | } 200 | 201 | # This value is used to assign orders to probes. Same key, same probe. 202 | sub probe_instance_key { 203 | my ($self) = @_; 204 | my @keys = ( $self->probe, $self->pings, $self->step ); 205 | if($self->probe eq 'fping') { 206 | push @keys, $self->fping->probe_instance_key; 207 | } 208 | if($self->probe eq 'sping') { 209 | push @keys, $self->sping->probe_instance_key; 210 | } 211 | if($self->probe eq 'pping') { 212 | push @keys, $self->pping->probe_instance_key; 213 | } 214 | if($self->probe eq 'httping') { 215 | push @keys, $self->httping->probe_instance_key; 216 | } 217 | if($self->probe eq 'ssh') { 218 | push @keys, $self->ssh->probe_instance_key; 219 | } 220 | return join('|', @keys); 221 | } 222 | 223 | sub rrd_filename { 224 | my ($self) = @_; 225 | return $self->my_output_dir . '/main.rrd'; 226 | } 227 | 228 | sub _rrd_create { 229 | my ($self) = @_; 230 | my $id = $self->id; 231 | my $dir = $self->my_output_dir; 232 | 233 | my $rrdfile = $self->rrd_filename; 234 | return if -f $rrdfile; 235 | 236 | # This is all very Smokeping-inspired 237 | my $now = int(AnyEvent->now); 238 | RRDs::create( 239 | $rrdfile, 240 | '--start', $now - $now % $self->step - $self->step, 241 | '--step', $self->step, 242 | "DS:loss:GAUGE:".(2*$self->step).":0:".$self->pings, 243 | "DS:median:GAUGE:".(2*$self->step).":0:180", 244 | (map { "DS:ping${_}:GAUGE:".(2*$self->step).":0:180" } 1..$self->pings), 245 | (map { "RRA:".(join ":", @{$_}) } @{Pingmachine::Config->rras($self->rrd_template, $self->step)}) 246 | ); 247 | my $ERR=RRDs::error; 248 | $log->error("error while updating $rrdfile: $ERR") if $ERR; 249 | } 250 | 251 | sub add_results { 252 | my ($self, $rrd_time, $results) = @_; 253 | 254 | if($self->is_archived) { 255 | $log->debug($self->id.": discarding results for archived order"); 256 | return; 257 | } 258 | 259 | $log->debug($self->id.": add results") if $log->is_debug(); 260 | 261 | $self->_update_telegraf($rrd_time, $results); 262 | $self->_update_rrd($rrd_time, $results); 263 | $self->_update_last_results_file($rrd_time, $results); 264 | } 265 | 266 | sub _update_rrd { 267 | my ($self, $rrd_time, $results) = @_; 268 | 269 | return if $self->rrd_template eq 'norrd'; 270 | 271 | # This is all very Smokeping-inspired 272 | my @rtts = @{$results->{rtts}}; 273 | my $entries = scalar @rtts; 274 | my $loss = $self->pings - $entries; 275 | my $median = $rtts[int($entries/2)]; defined $median or $median = 'U'; 276 | my $lowerloss = int($loss/2); 277 | my $upperloss = $loss - $lowerloss; 278 | @rtts = ((map {'U'} 1..$lowerloss),@rtts, (map {'U'} 1..$upperloss)); 279 | my $rrdfile = $self->rrd_filename; 280 | RRDs::update($rrdfile, "$rrd_time:${loss}:${median}:".(join ":", @rtts)); 281 | my $ERR=RRDs::error; 282 | if ( $ERR ) { 283 | if ( $ERR =~ m/main\.rrd\'\sis\snot\san\sRRD\sfile/xms ) { 284 | # special case: 285 | # RRD file is corrupted 286 | 287 | # new filename to store the corrupted version for later analysis 288 | my $corrupted = $rrdfile; 289 | $corrupted =~ s/main\.rrd/corrupted_main\.rrd/; 290 | 291 | move( $rrdfile, $corrupted ) 292 | or $log->error("could not move corrupted $rrdfile: $!"); 293 | 294 | # create a new rrd file 295 | $self->_rrd_create(); 296 | 297 | $log->info("detected corrupted RRD file $rrdfile and moved it to create a new RRD file."); 298 | } 299 | else { 300 | # default error handling: just report it 301 | $log->error("error while updating $rrdfile: $ERR"); 302 | } 303 | } 304 | } 305 | 306 | sub _update_last_results_file { 307 | my ($self, $rrd_time, $results) = @_; 308 | 309 | # Also add a file with the raw last results 310 | my $fh; 311 | my $last_results_file = $self->my_output_dir . '/last_result'; 312 | my $tmpfile = $last_results_file . '.tmp'; 313 | open($fh, '>', $tmpfile) or do { 314 | $log->error("can't write $tmpfile: $!"); 315 | return; 316 | }; 317 | 318 | my $now = int(AnyEvent->now); 319 | my @rtts = @{$results->{rtts}}; 320 | my $entries = scalar @rtts; 321 | my $loss = $self->pings - $entries; 322 | my $median = defined $rtts[int($entries/2)] ? $rtts[int($entries/2)] : '~'; 323 | my $min = defined $rtts[0] ? $rtts[0] : '~'; 324 | my $max = defined $rtts[$entries-1] ? $rtts[$entries-1] : '~'; 325 | 326 | print $fh "time: $rrd_time\n". 327 | "updated: ".$now."\n". 328 | "step: ".$self->step."\n". 329 | "pings: ".$self->pings."\n". 330 | "loss: ".$loss."\n". 331 | "min: ".$min."\n". 332 | "median: ".$median."\n". 333 | "max: ".$max."\n"; 334 | close($fh); 335 | unlink($last_results_file); 336 | rename($tmpfile, $last_results_file) or do { 337 | $log->error("can't rename $tmpfile to $last_results_file: $!\n"); 338 | return; 339 | }; 340 | } 341 | 342 | # Sends messages to telegraf 343 | sub _update_telegraf { 344 | my ($self, $rrd_time, $results) = @_; 345 | 346 | my @rtts = @{$results->{pings}}; 347 | my @sorted_rtts = @{$results->{rtts}}; 348 | my $all_pings = scalar @rtts; 349 | my $successful_pings = scalar @sorted_rtts; 350 | my $loss = 100.0*($all_pings - $successful_pings)/$all_pings; 351 | my $median = $rtts[int($successful_pings/2)]; # will be undef if empty 352 | my $min = $sorted_rtts[0]; # will be undef if empty 353 | my $max = pop @sorted_rtts; # will be undef if empty 354 | my $step = $self->step; 355 | 356 | if ($self->telegraf->{'measurement_name'} && Pingmachine::Config->get_telegraf) { 357 | my ($telegraf_host, $telegraf_port) = Pingmachine::Config->get_telegraf; 358 | my $measurement_name = $self->telegraf->{'measurement_name'}; 359 | my $tags = $self->telegraf->{'tags'}; 360 | 361 | # Create the socket. 362 | my $telegraf_socket = new IO::Socket::INET( 363 | PeerAddr => $telegraf_host, 364 | PeerPort => $telegraf_port, 365 | Proto => 'udp', 366 | Type => IO::Socket::SOCK_DGRAM, 367 | Blocking => 0, 368 | ) or die("Can't open UDP socket: $@"); 369 | 370 | # set our socket to non blocking mode 371 | AnyEvent::Util::fh_nonblocking($telegraf_socket, 1); 372 | 373 | my $result_rrd_time = sprintf("%d%09d", $rrd_time , ($rrd_time - int($rrd_time)) * 1_000_000_000); # nanoseconds time conversion required by InfluxDB::LineProtocol 374 | 375 | # undef will be casted to empty string which will mess up the influx schema definition 376 | # so only select the defined fields 377 | my $fields = {}; 378 | $fields->{median_rtt} = sprintf("%f", $median) if defined $median; 379 | $fields->{min_rtt} = sprintf("%f", $min) if defined $min; 380 | $fields->{max_rtt} = sprintf("%f", $max) if defined $max; 381 | $fields->{loss} = sprintf("%f",$loss) if defined $loss; 382 | 383 | my $influx_line = data2line($measurement_name, $fields, $tags, $result_rrd_time); 384 | $telegraf_socket->send($influx_line,0) or die("Cannot send message"); 385 | 386 | for my $i (0..$all_pings-1) { 387 | my $time = $rrd_time + $step * $i / $all_pings; 388 | my $result_time = sprintf("%d%09d", $time , ($time - int($time)) * 1_000_000_000); 389 | 390 | next if (! $rtts[$i]); 391 | $influx_line = data2line($measurement_name, { individual_rtt => $rtts[$i]}, $tags, $result_time); 392 | 393 | $telegraf_socket->send($influx_line,0) or die("Cannot send message"); 394 | } 395 | } 396 | } 397 | 398 | 399 | sub archive { 400 | my ($self) = @_; 401 | return if $self->is_archived(); 402 | $self->_set_is_archived(1); 403 | 404 | my $archive_dir = $self->my_archive_dir; 405 | mkpath($archive_dir); 406 | rename($self->my_output_dir, $archive_dir) or 407 | $log->error("can't archive output to directory $archive_dir: $!"); 408 | } 409 | 410 | sub unarchive { 411 | my ($self) = @_; 412 | $self->_set_is_archived(0); 413 | my $archive_dir = $self->my_archive_dir; 414 | -d $archive_dir or return; 415 | 416 | # revive output directory 417 | if(! -d $self->my_output_dir) { 418 | if(rename($archive_dir, $self->my_output_dir)) { 419 | $log->info("revived archived order: ".$self->nice_name); 420 | } 421 | else { 422 | $log->error("can't restore output directory from $archive_dir to ". 423 | $self->my_output_dir.": $!"); 424 | } 425 | } 426 | else { 427 | $log->error("can't restore output directory from $archive_dir to ". 428 | $self->my_output_dir.": directory exists"); 429 | } 430 | } 431 | 432 | #sub DEMOLISH { 433 | # my ($self) = @_; 434 | # $log->debug("order garbage collected: ".$self->id); 435 | #} 436 | 437 | __PACKAGE__->meta->make_immutable; 438 | 439 | 1; 440 | -------------------------------------------------------------------------------- /lib/Pingmachine/Order/FPing.pm: -------------------------------------------------------------------------------- 1 | package Pingmachine::Order::FPing; 2 | 3 | use Any::Moose; 4 | 5 | has 'host' => ( 6 | isa => 'Str', 7 | is => 'ro', 8 | required => 1, 9 | ); 10 | 11 | has 'source_ip' => ( 12 | isa => 'Str', 13 | is => 'ro', 14 | ); 15 | 16 | has 'interface' => ( 17 | isa => 'Str', 18 | is => 'ro', 19 | ); 20 | 21 | has 'interval' => ( 22 | isa => 'Int', 23 | is => 'ro', 24 | ); 25 | 26 | has 'ipv6' => ( 27 | isa => 'Bool', 28 | is => 'ro', 29 | lazy => 1, 30 | default => sub { 31 | my $self = shift; 32 | return $self->host =~ /^[:0-9a-f]+$/i; 33 | }, 34 | ); 35 | 36 | sub probe_instance_key { 37 | my ($self) =@_; 38 | 39 | my @keys; 40 | push (@keys, "interval:".$self->interval) if ($self->interval); 41 | push (@keys, "source_ip:".$self->source_ip) if ($self->source_ip); 42 | push (@keys, "interface:".$self->interface) if ($self->interface); 43 | push (@keys, "v6:".$self->ipv6) if ($self->ipv6); 44 | scalar @keys or @keys = (''); 45 | 46 | return join('|', @keys); 47 | } 48 | 49 | __PACKAGE__->meta->make_immutable; 50 | 51 | 1; 52 | -------------------------------------------------------------------------------- /lib/Pingmachine/Order/HTTPing.pm: -------------------------------------------------------------------------------- 1 | package Pingmachine::Order::HTTPing; 2 | 3 | use Any::Moose; 4 | 5 | has 'url' => ( 6 | isa => 'Str', 7 | is => 'ro', 8 | required => 1, 9 | ); 10 | 11 | has 'interval' => ( 12 | isa => 'Int', 13 | is => 'ro', 14 | ); 15 | 16 | has 'user_agent' => ( 17 | isa => 'Str', 18 | is => 'ro', 19 | ); 20 | 21 | has 'proxy' => ( 22 | isa => 'Str', 23 | is => 'ro', 24 | ); 25 | 26 | has 'http_codes_as_failure' => ( 27 | isa => 'Str', 28 | is => 'ro', 29 | ); 30 | 31 | sub probe_instance_key { 32 | my ($self) =@_; 33 | 34 | my @keys; 35 | push (@keys, "interval:".$self->interval) if ($self->interval); 36 | push (@keys, "user_agent:".$self->user_agent) if ($self->user_agent); 37 | push (@keys, "http_codes_as_failure:".$self->http_codes_as_failure) if ($self->http_codes_as_failure); 38 | push (@keys, "proxy:".$self->proxy) if ($self->proxy); 39 | return join('|', @keys); 40 | } 41 | 42 | __PACKAGE__->meta->make_immutable; 43 | 44 | 1; 45 | -------------------------------------------------------------------------------- /lib/Pingmachine/Order/PPing.pm: -------------------------------------------------------------------------------- 1 | package Pingmachine::Order::PPing; 2 | 3 | use Any::Moose; 4 | 5 | has 'host' => ( 6 | isa => 'Str', 7 | is => 'ro', 8 | required => 1, 9 | ); 10 | 11 | has 'source_ip' => ( 12 | isa => 'Str', 13 | is => 'ro', 14 | ); 15 | 16 | has 'flags' => ( 17 | isa => 'Str', 18 | is => 'ro', 19 | ); 20 | 21 | has 'interface' => ( 22 | isa => 'Str', 23 | is => 'ro', 24 | ); 25 | 26 | has 'interval' => ( 27 | isa => 'Int', 28 | is => 'ro', 29 | ); 30 | 31 | has 'ipv6' => ( 32 | isa => 'Bool', 33 | is => 'ro', 34 | lazy => 1, 35 | default => sub { 36 | my $self = shift; 37 | return $self->host =~ /^[:0-9a-f]+$/i; 38 | }, 39 | ); 40 | 41 | sub probe_instance_key { 42 | my ($self) =@_; 43 | 44 | my @keys; 45 | push (@keys, "interval:".$self->interval) if ($self->interval); 46 | push (@keys, "source_ip:".$self->source_ip) if ($self->source_ip); 47 | push (@keys, "flags:".$self->flags) if ($self->flags); 48 | push (@keys, "interface:".$self->interface) if ($self->interface); 49 | push (@keys, "v6:".$self->ipv6) if ($self->ipv6); 50 | 51 | return join('|', @keys); 52 | } 53 | 54 | __PACKAGE__->meta->make_immutable; 55 | 56 | 1; 57 | -------------------------------------------------------------------------------- /lib/Pingmachine/Order/SPing.pm: -------------------------------------------------------------------------------- 1 | package Pingmachine::Order::SPing; 2 | 3 | use Any::Moose; 4 | 5 | has 'host' => ( 6 | isa => 'Str', 7 | is => 'ro', 8 | required => 1, 9 | ); 10 | 11 | has 'source_ip' => ( 12 | isa => 'Str', 13 | is => 'ro', 14 | ); 15 | 16 | has 'flags' => ( 17 | isa => 'Str', 18 | is => 'ro', 19 | ); 20 | 21 | has 'interface' => ( 22 | isa => 'Str', 23 | is => 'ro', 24 | ); 25 | 26 | has 'interval' => ( 27 | isa => 'Int', 28 | is => 'ro', 29 | ); 30 | 31 | has 'ipv6' => ( 32 | isa => 'Bool', 33 | is => 'ro', 34 | lazy => 1, 35 | default => sub { 36 | my $self = shift; 37 | return $self->host =~ /^[:0-9a-f]+$/i; 38 | }, 39 | ); 40 | 41 | sub probe_instance_key { 42 | my ($self) =@_; 43 | 44 | my @keys; 45 | push (@keys, "interval:".$self->interval) if ($self->interval); 46 | push (@keys, "source_ip:".$self->source_ip) if ($self->source_ip); 47 | push (@keys, "flags:".$self->flags) if ($self->flags); 48 | push (@keys, "interface:".$self->interface) if ($self->interface); 49 | push (@keys, "v6:".$self->ipv6) if ($self->ipv6); 50 | 51 | return join('|', @keys); 52 | } 53 | 54 | __PACKAGE__->meta->make_immutable; 55 | 56 | 1; 57 | -------------------------------------------------------------------------------- /lib/Pingmachine/Order/SSH.pm: -------------------------------------------------------------------------------- 1 | package Pingmachine::Order::SSH; 2 | 3 | use Any::Moose; 4 | 5 | has 'host' => ( 6 | isa => 'Str', 7 | is => 'ro', 8 | required => 1, 9 | ); 10 | 11 | has 'key_type' => ( 12 | isa => 'Str', 13 | is => 'ro', 14 | required => 1, 15 | ); 16 | 17 | sub probe_instance_key { 18 | my ($self) = @_; 19 | return $self->key_type; 20 | } 21 | 22 | __PACKAGE__->meta->make_immutable; 23 | 24 | 1; 25 | -------------------------------------------------------------------------------- /lib/Pingmachine/OrderList.pm: -------------------------------------------------------------------------------- 1 | package Pingmachine::OrderList; 2 | 3 | use Any::Moose; 4 | use Log::Any qw($log); 5 | 6 | # An OrderList object contains orders and is responsible for the correct 7 | # book-keeping. It is filled by OrdersWatcher. 8 | 9 | # Note: we don't use a native trait here, because it doesn't perform well :-( 10 | has '_orders' => ( 11 | isa => 'HashRef', 12 | is => 'ro', 13 | default => sub { return {} }, 14 | ); 15 | 16 | has '_add_order_cb' => ( 17 | traits => ['Array'], 18 | isa => 'ArrayRef[CodeRef]', 19 | is => 'ro', 20 | default => sub { return [] }, 21 | handles => { 22 | register_add_order_cb => 'push', 23 | }, 24 | ); 25 | 26 | has '_remove_order_cb' => ( 27 | traits => ['Array'], 28 | isa => 'ArrayRef[CodeRef]', 29 | is => 'ro', 30 | default => sub { return [] }, 31 | handles => { 32 | register_remove_order_cb => 'push', 33 | }, 34 | ); 35 | 36 | sub has_order { 37 | my ($self, $order_id) = @_; 38 | return exists $self->{_orders}{$order_id}; 39 | } 40 | 41 | sub count { 42 | my ($self) = @_; 43 | return scalar keys %{$self->{_orders}}; 44 | } 45 | 46 | sub list { 47 | my ($self) = @_; 48 | return keys %{$self->{_orders}}; 49 | } 50 | 51 | sub get_all { 52 | my ($self) = @_; 53 | return values %{$self->{_orders}}; 54 | } 55 | 56 | sub get_order { 57 | my ($self, $order_id) = @_; 58 | return $self->{_orders}{$order_id}; 59 | } 60 | 61 | sub add_order { 62 | my ($self, $order) = @_; 63 | my $order_id = $order->id; 64 | 65 | # Skip, if already known 66 | if($self->has_order($order_id)) { 67 | return; 68 | } 69 | 70 | # Store order 71 | $self->{_orders}{$order_id} = $order; 72 | 73 | # Run callbacks 74 | for my $cb (@{$self->_add_order_cb}) { 75 | $cb->($order); 76 | } 77 | } 78 | 79 | sub remove_order_id { 80 | my ($self, $order_id) = @_; 81 | 82 | my $order = $self->get_order($order_id); 83 | return unless defined $order; 84 | 85 | delete $self->{_orders}{$order_id}; 86 | 87 | # run callbacks 88 | for my $cb (@{$self->_remove_order_cb}) { 89 | $cb->($order); 90 | } 91 | } 92 | 93 | __PACKAGE__->meta->make_immutable; 94 | 95 | 1; 96 | -------------------------------------------------------------------------------- /lib/Pingmachine/OrdersDirWatcher.pm: -------------------------------------------------------------------------------- 1 | package Pingmachine::OrdersDirWatcher; 2 | 3 | # This module is responsible for monitoring the /orders directory and 4 | # adding/removing orders, as necessary. 5 | 6 | ### NOTE NOTE NOTE: The fancy inotify support is disabled for now, because I presume stability 7 | ### issues to be related to it (being so fancy and all :-)) 8 | ### The consequence is that new order get noticed with up to 30 seconds delay 9 | ### -- dws@open.ch, 2012-01-17 10 | 11 | use Any::Moose; 12 | use AnyEvent; 13 | use Log::Any qw($log); 14 | use Try::Tiny; 15 | use YAML::XS qw(LoadFile); 16 | 17 | use Pingmachine::Config; 18 | use Pingmachine::Order; 19 | use Pingmachine::OrderList; 20 | 21 | my $RESCAN_PERIOD = 30; # full rescan every 30 seconds (just to be sure, shouldn't be needed..) 22 | 23 | has 'orders_dir' => ( 24 | isa => 'Str', 25 | is => 'ro', 26 | default => sub { 27 | return Pingmachine::Config->orders_dir; 28 | }, 29 | ); 30 | 31 | has 'telegraf_dir' => ( 32 | isa => 'Str', 33 | is => 'ro', 34 | default => sub { 35 | return Pingmachine::Config->telegraf_dir; 36 | }, 37 | ); 38 | 39 | has 'order_list' => ( 40 | isa => 'Pingmachine::OrderList', 41 | is => 'ro', 42 | required => 1, 43 | ); 44 | 45 | # AnyEvent watchers 46 | has 'ae_watchers' => ( 47 | traits => ['Array'], 48 | isa => 'ArrayRef', 49 | is => 'ro', 50 | default => sub { [] }, 51 | handles => { 52 | add_ae_watcher => 'push', 53 | }, 54 | ); 55 | 56 | sub BUILD { 57 | my $self = shift; 58 | my $args = shift; 59 | 60 | $self->_scan_orders_directory(); 61 | #$self->_setup_orders_inotify_watcher(); 62 | $self->_setup_timed_rescan(); 63 | } 64 | 65 | # scan /orders directory and: 66 | # - remove obsolete orders 67 | # - add new orders 68 | sub _scan_orders_directory { 69 | my ($self) = @_; 70 | my %in_directory; # used to track deleted orders 71 | my $orders_dir = $self->orders_dir; 72 | my $now = int(AnyEvent->now); 73 | 74 | $self->_scan_orders_directory_recursively($orders_dir, '', $now, \%in_directory); 75 | 76 | for my $order_id ($self->order_list->list) { 77 | if (!$in_directory{$order_id}) { 78 | $self->_remove_order($order_id); 79 | } 80 | } 81 | 82 | return; 83 | } 84 | 85 | sub _scan_orders_directory_recursively { 86 | my ($self, $dir, $order_id_prefix, $now, $in_directory) = @_; 87 | my $dh; 88 | opendir($dh, $dir) or die "can't open $dir: $!"; 89 | while (my $order_file = readdir($dh)) { 90 | next if $order_file eq '.'; 91 | next if $order_file eq '..'; 92 | 93 | my $order_id = $order_id_prefix ? "$order_id_prefix/$order_file" : $order_file; 94 | my $file = "$dir/$order_file"; 95 | if (-d $file) { 96 | $self->_scan_orders_directory_recursively($file, $order_id, $now, $in_directory); 97 | next; 98 | } 99 | 100 | # Archive file if too old 101 | my ($mtime) = (lstat($file))[9]; 102 | defined $mtime or do { 103 | $log->warn("can't stat $file: $!"); 104 | next; 105 | }; 106 | my $timediff = $now - $mtime; 107 | my $max_age = Pingmachine::Config->orders_max_age; 108 | if($timediff > $max_age) { 109 | $log->info("archiving old order: $order_id (age: ${timediff}s)"); 110 | if($self->order_list->get_order($order_id)) { 111 | $self->_remove_order($order_id); 112 | } 113 | else { 114 | my $order = $self->_parse_order($order_id, $self->orders_dir . '/' . $order_id, $self ->telegraf_dir . '/' . $order_id); 115 | $order->archive($order_id) if $order; 116 | } 117 | unlink($file); 118 | next; 119 | } 120 | 121 | # Add order 122 | $self->_add_order($order_id); 123 | $in_directory->{$order_id} = 1; 124 | } 125 | 126 | closedir($dh); 127 | return; 128 | }; 129 | 130 | 131 | sub _parse_order { 132 | my ($self, $order_id, $order_file, $telegraf_file) = @_; 133 | 134 | my $order; 135 | try { 136 | my $order_def = LoadFile($order_file); 137 | $order_def->{id} = $order_id; 138 | $order_def->{order_file} = $order_file; 139 | if (-e $telegraf_file) { 140 | $order_def->{telegraf_file} = $telegraf_file; 141 | try { 142 | $order_def->{telegraf} = LoadFile($telegraf_file); 143 | } 144 | catch { 145 | my $error = $_; 146 | chomp $error; 147 | unlink $telegraf_file; 148 | $log->warning("unable to load telegraf file $telegraf_file ($error). It has been deleted as it was most likely corrupt."); 149 | } 150 | } 151 | $order = Pingmachine::Order->new($order_def); 152 | } 153 | catch { 154 | my $error = $_; 155 | chomp $error; 156 | unlink $order_file; 157 | $log->warning("unable to load order file $order_file ($error). It has been deleted as it was most likely corrupt."); 158 | }; 159 | return $order; 160 | } 161 | 162 | sub _add_order { 163 | my ($self, $order_id) = @_; 164 | 165 | # Skip it, if already known 166 | return if $self->order_list->has_order($order_id); 167 | 168 | # Parse 169 | my $order = $self->_parse_order($order_id, $self ->orders_dir . '/' . $order_id, $self ->telegraf_dir . '/' . $order_id); 170 | return unless $order; 171 | 172 | # Add order to list 173 | $self->order_list->add_order($order); 174 | } 175 | 176 | 177 | sub _remove_order { 178 | my ($self, $order_id) = @_; 179 | 180 | my $order = $self->order_list->get_order($order_id); 181 | return unless defined $order; 182 | 183 | # Archive order 184 | $order->archive(); 185 | 186 | # Remove from list 187 | $self->order_list->remove_order_id($order_id); 188 | } 189 | 190 | ## watch /orders directory for added/deleted files 191 | #sub _setup_orders_inotify_watcher { 192 | # my ($self) = @_; 193 | # 194 | # my $inotify = new Linux::Inotify2 195 | # or die "unable to create new inotify object: $!"; 196 | # $inotify->watch( 197 | # $self->orders_dir, 198 | # IN_CLOSE_WRITE | IN_DELETE | IN_MOVED_FROM | IN_MOVED_TO, 199 | # sub { 200 | # my $e = shift; 201 | # if($e->IN_CLOSE_WRITE or $e->IN_MOVED_TO) { 202 | # $self->_add_order($e->name) if -f $self->orders_dir . '/' . $e->name; 203 | # } 204 | # elsif($e->IN_DELETE or $e->IN_MOVED_FROM) { 205 | # $self->_remove_order($e->name); 206 | # } 207 | # } 208 | # ); 209 | # 210 | # $self->add_ae_watcher( 211 | # AnyEvent->io( 212 | # fh => $inotify->fileno, 213 | # poll => 'r', 214 | # cb => sub { $inotify->poll } 215 | # ) 216 | # ); 217 | #} 218 | 219 | # we don't trust inotify completely, so schedule a full directory scan 220 | # every once in a while 221 | sub _setup_timed_rescan { 222 | my ($self) = @_; 223 | 224 | $self->add_ae_watcher( 225 | AnyEvent->timer( 226 | after => $RESCAN_PERIOD, 227 | interval => $RESCAN_PERIOD, 228 | cb => sub { 229 | $self->_scan_orders_directory(); 230 | } 231 | ) 232 | ); 233 | } 234 | 235 | __PACKAGE__->meta->make_immutable; 236 | 237 | 1; 238 | -------------------------------------------------------------------------------- /lib/Pingmachine/Probe.pm: -------------------------------------------------------------------------------- 1 | package Pingmachine::Probe; 2 | 3 | # A probe represents a "pinging" job with a given step. Multiple orders 4 | # with the same probe name and step will be managed by the same probe 5 | # object. 6 | 7 | use Any::Moose 'Role'; 8 | use Log::Any qw($log); 9 | 10 | use Pingmachine::OrderList; 11 | use Pingmachine::Order; 12 | 13 | has 'step' => ( 14 | isa => 'Int', 15 | is => 'ro', 16 | required => 1 17 | ); 18 | 19 | has 'pings' => ( 20 | isa => 'Int', 21 | is => 'ro', 22 | required => 1 23 | ); 24 | 25 | has 'time_offset' => ( 26 | isa => 'Int', 27 | is => 'ro', 28 | lazy => 1, 29 | default => sub { 30 | my $self = shift; 31 | return int(rand($self->step / ($self->pings+1))); 32 | }, 33 | ); 34 | 35 | has 'order_list' => ( 36 | isa => 'Pingmachine::OrderList', 37 | is => 'ro', 38 | lazy => 1, 39 | default => sub { 40 | my $self = shift; 41 | my $ol = Pingmachine::OrderList->new(); 42 | return $ol; 43 | }, 44 | handles => { 45 | add_order => 'add_order', 46 | remove_order_id => 'remove_order_id', 47 | } 48 | ); 49 | 50 | # AnyEvent watchers 51 | has 'run_ae_w' => ( 52 | isa => 'Object', 53 | is => 'rw', 54 | ); 55 | 56 | requires 'name'; 57 | requires 'run'; 58 | requires 'max_orders'; 59 | 60 | # returns the amount of seconds to wait for the next scheduled run 61 | sub _get_next_run_after { 62 | my $self = shift; 63 | 64 | my $now = AnyEvent->now; 65 | my $step = $self->step; 66 | my $time_offset = $self->time_offset; 67 | my $now_mod = $now % $step; 68 | if($now_mod < $time_offset) { 69 | return $time_offset - $now_mod; 70 | } 71 | else { 72 | return $time_offset - $now_mod + $step; 73 | } 74 | } 75 | 76 | sub _schedule_next_run { 77 | my ($self) = @_; 78 | 79 | $self->run_ae_w( 80 | AnyEvent->timer ( 81 | after => $self->_get_next_run_after(), 82 | cb => sub { 83 | $self->_schedule_next_run(); 84 | $self->run(); 85 | } 86 | ) 87 | ); 88 | } 89 | 90 | around 'add_order' => sub { 91 | my $orig = shift; 92 | my $self = shift; 93 | 94 | # check that the order has the same step value 95 | my $order = $_[0]; 96 | defined $order or die "Pingmachine::Probe: must pass order object as argument\n"; 97 | $order->isa('Pingmachine::Order') or die "Pingmachine::Probe: order must be a Pingmachine::Orger object\n"; 98 | $order->step == $self->step or 99 | die "Pingmachine::Probe: order must have step: ".$order->step."\n"; 100 | $order->pings == $self->pings or 101 | die "Pingmachine::Probe: order must have pings: ".$order->pings."\n"; 102 | 103 | $self->$orig(@_); 104 | }; 105 | 106 | sub start { 107 | my $self = shift; 108 | 109 | $self->_schedule_next_run(); 110 | $log->info("started ".$self->name." probe (time offset: ".$self->time_offset.", step: ".$self->step.")"); 111 | } 112 | 113 | 1; 114 | -------------------------------------------------------------------------------- /lib/Pingmachine/Probe/FPing.pm: -------------------------------------------------------------------------------- 1 | package Pingmachine::Probe::FPing; 2 | 3 | use Any::Moose; 4 | use AnyEvent; 5 | use AnyEvent::Util; 6 | use Log::Any qw($log); 7 | use List::Util qw(shuffle); 8 | 9 | my $FPING_BIN = -x '/usr/bin/fping' ? '/usr/bin/fping' : '/usr/sbin/fping'; 10 | my $FPING6_BIN = '/usr/bin/fping6'; 11 | 12 | my $TIMEOUT = 3000; # -t option (in ms) 13 | my $MIN_WAIT = 1; # -i option (is ms) 14 | 15 | has 'name' => ( 16 | is => 'ro', 17 | isa => 'Str', 18 | default => sub { "fping" }, 19 | ); 20 | 21 | has 'max_orders' => ( 22 | is => 'ro', 23 | isa => 'Int', 24 | default => 1000, 25 | ); 26 | 27 | has 'results' => ( 28 | is => 'ro', 29 | isa => 'HashRef', 30 | default => sub { {} }, 31 | ); 32 | 33 | has 'current_job' => ( 34 | is => 'rw', 35 | isa => 'HashRef', 36 | ); 37 | 38 | has 'interval' => ( 39 | is => 'ro', 40 | isa => 'Int', 41 | ); 42 | 43 | has 'source_ip' => ( 44 | is => 'ro', 45 | isa => 'Str', 46 | ); 47 | 48 | has 'interface' => ( 49 | is => 'ro', 50 | isa => 'Str', 51 | ); 52 | 53 | has 'ipv6' => ( 54 | is => 'ro', 55 | isa => 'Bool', 56 | default => 0, 57 | ); 58 | 59 | with 'Pingmachine::Probe'; 60 | 61 | sub _start_new_job { 62 | my ($self) = @_; 63 | 64 | # We want to distribute the samples as homogeneously as possible in the 65 | # available $step time (to increase the probability of detecting periodic 66 | # problems and to decrease the network peak load). 67 | # - We wait for at least 1 seconds for each ping ($interval). 68 | # - We can instruct fping to distribute the pings (fping -p parameter). 69 | # That parameter also determines the maximal wait time, so we 70 | # can have at most $step/$interval pings if we ping a single 71 | # host. 72 | # - If multiple hosts are pinged, then we need also to consider the $MIN_WAIT 73 | # (fping -i) parameter. 74 | # - The total time needed by fping is (assuming $hostcount*MIN_WAIT < $interval): 75 | # ($pings-1)*interval + *$hostcount*$MIN_WAIT + $TIMEOUT 76 | 77 | my $step = $self->step; 78 | my $pings = $self->pings; 79 | my $hostcount = $self->order_list->count; 80 | 81 | return unless $hostcount; 82 | 83 | # Determine $interval (fping -p) 84 | my $interval; # interval applies only to periods between pings in series (fping -p) 85 | if($self->interval) { 86 | $interval = $self->interval; 87 | } 88 | else { 89 | $interval = int(($step * 1000 * 0.8 - $hostcount*$MIN_WAIT - $TIMEOUT) / $pings); 90 | $interval >= 1000 or 91 | die "fping: calculated interval too small: $interval (step = $step, pings = $pings)\n"; 92 | } 93 | 94 | 95 | # Make sure that we can process all hosts 96 | if($interval / $hostcount < $MIN_WAIT) { 97 | die "fping: step * 1000 / (pings * hostcount) must be at least 10 (step=$step, pings=$pings, hostcount=$hostcount\n"; 98 | } 99 | 100 | # Prepare job 101 | my %job = ( 102 | host2order => {}, 103 | output => '', 104 | cmd => '', 105 | pid => '', 106 | ); 107 | for my $order ($self->order_list->get_all) { 108 | my $host = $order->fping->host; 109 | push @{$job{host2order}{$host}}, $order; 110 | } 111 | $job{hostlist} = join("\n", shuffle keys %{$job{host2order}}) . "\n", 112 | $self->current_job(\%job); 113 | 114 | # Run fping 115 | my $cmd = [ 116 | $self->ipv6 ? $FPING6_BIN : $FPING_BIN, 117 | '-q', 118 | '-p', $interval, 119 | '-C', $pings, 120 | '-i', $MIN_WAIT, 121 | '-t', $TIMEOUT, 122 | ]; 123 | if ( $self->source_ip ) { 124 | push @{$cmd}, '-S'; 125 | push @{$cmd}, $self->source_ip; 126 | } 127 | if ( $self->interface ) { 128 | push @{$cmd}, '-I'; 129 | push @{$cmd}, $self->interface; 130 | } 131 | $log->debug("starting: @$cmd (step: $step, pings: $pings, offset: ".$self->time_offset().")") if $log->is_debug(); 132 | $job{cmd} = join(' ', @$cmd); 133 | my $cv = run_cmd $cmd, 134 | '<', \$job{hostlist}, 135 | '>', '/dev/null', 136 | '2>', \$job{output}, 137 | '$$', \$job{pid}; 138 | 139 | # Install fping exit callback 140 | $cv->cb( 141 | sub { 142 | my $cbv = shift; 143 | $job{pid} = undef; 144 | my $exit = $cbv->recv; 145 | $exit = $exit >> 8; 146 | if($exit and $exit != 1 and $exit != 2) { 147 | # exit 1 means that some hosts aren't reachable 148 | # exit 2 means "any IP addresses were not found" 149 | $log->warning("fping seems to have failed (exit: $exit, stderr: ".$job{output}.")"); 150 | return; 151 | } 152 | 153 | $log->debug("finished: @$cmd (step: $step, pings: $pings, offset: ".$self->time_offset().")") if $log->is_debug(); 154 | 155 | $self->_collect_current_job(); 156 | 157 | $log->debug("collected: @$cmd (step: $step, pings: $pings, offset: ".$self->time_offset().")") if $log->is_debug(); 158 | } 159 | ); 160 | } 161 | 162 | sub _kill_current_job { 163 | my ($self) = @_; 164 | 165 | # Kill fping, if still running 166 | my $job = $self->current_job; 167 | if($job->{pid}) { 168 | # Check that we are killing the process we started and not an innocent bystander 169 | my $cmd_match = 0; 170 | if (open(proc_fh, "/proc/$job->{pid}/cmdline")) { 171 | $cmd_match = (join('', readline(proc_fh)) eq $job->{cmd}); 172 | close(proc_fh); 173 | } 174 | if($cmd_match && kill(0, $job->{pid})) { 175 | $log->warning("killing unfinished fping process (step: ".$self->step.", pings: ".$self->pings.", offset: ".$self->time_offset().")"); 176 | kill 9, $job->{pid}; 177 | $job->{pid} = undef; 178 | } 179 | elsif($job->{output}) { 180 | $log->warning("fping has finished, but we didn't notice... collecting (step: ".$self->step.", pings: ".$self->pings.", offset: ".$self->time_offset.")"); 181 | $self->_collect_current_job(); 182 | } 183 | else { 184 | $log->warning("fping has finished, but we didn't notice... no output found (?)"); 185 | } 186 | } 187 | } 188 | 189 | sub _collect_current_job { 190 | my ($self) = @_; 191 | 192 | my $job = $self->current_job; 193 | $self->current_job({}); 194 | my %results; 195 | 196 | # Do nothing, if fping didn't run yet or if job has been already collected 197 | return unless $job->{output}; 198 | 199 | # Parse fping report 200 | my $text = $job->{output}; 201 | while($text !~ /\G\z/gc) { 202 | if($text =~ /\G(\S+)[ \t]+:/gc) { 203 | my $host = $1; 204 | my @data; 205 | while($text =~ /\G[ \t]+([-\d\.]+)/gc) { 206 | push @data, $1; 207 | } 208 | # raw ping times 209 | my @pings = map {$_ eq '-' ? undef : $_ / 1000} @data; 210 | $results{$host}{pings} = \@pings; 211 | # sorted rtt times 212 | my @rtts = map {sprintf "%.6e", $_ / 1000} sort {$a <=> $b} grep /^\d/, @data; 213 | $results{$host}{rtts} = \@rtts; 214 | } 215 | 216 | # discard any other output on the line (ICMP host unreachable errors, etc.) 217 | $text =~ /\G.*\n/gc; 218 | } 219 | 220 | $log->debug("adding results") if $log->is_debug(); 221 | 222 | # Add results (to RRD) 223 | if(scalar keys %results) { 224 | my $now = int(AnyEvent->now); 225 | my $step = $self->step; 226 | my $rrd_time = $now - $step - $now%$step; 227 | for my $host (keys %results) { 228 | my $h2o = $job->{host2order}{$host}; 229 | if(not defined $h2o) { 230 | $log->warning("fping produced results for unknown host (host: $host, step: $step)"); 231 | next; 232 | } 233 | for my $order (@{$h2o}) { 234 | $order->add_results($rrd_time, $results{$host}); 235 | } 236 | } 237 | } 238 | 239 | $log->debug("adding results finished") if $log->is_debug(); 240 | } 241 | 242 | 243 | sub run { 244 | my ($self) = @_; 245 | 246 | $self->_kill_current_job(); 247 | $self->_start_new_job(); 248 | } 249 | 250 | __PACKAGE__->meta->make_immutable; 251 | 252 | 1; 253 | -------------------------------------------------------------------------------- /lib/Pingmachine/Probe/HTTPing.pm: -------------------------------------------------------------------------------- 1 | package Pingmachine::Probe::HTTPing; 2 | 3 | use Any::Moose; 4 | use AnyEvent; 5 | use AnyEvent::Util; 6 | use Log::Any qw($log); 7 | use List::Util qw(shuffle); 8 | use Data::Dumper; 9 | 10 | my $HTTPING_BIN = '/usr/bin/httping'; 11 | my $TIMEOUT = 15; # -t option (in s) 12 | 13 | has 'name' => ( 14 | is => 'ro', 15 | isa => 'Str', 16 | default => sub { "httping" }, 17 | ); 18 | 19 | has 'max_orders' => ( 20 | is => 'ro', 21 | isa => 'Int', 22 | default => 15, 23 | ); 24 | 25 | has 'results' => ( 26 | is => 'ro', 27 | isa => 'HashRef', 28 | default => sub { {} }, 29 | ); 30 | 31 | has 'current_job' => ( 32 | is => 'rw', 33 | isa => 'HashRef', 34 | ); 35 | 36 | has 'interval' => ( 37 | is => 'ro', 38 | isa => 'Int', 39 | default => 1, 40 | ); 41 | 42 | has 'url' => ( 43 | is => 'ro', 44 | isa => 'Str', 45 | ); 46 | 47 | has 'proxy' => ( 48 | is => 'ro', 49 | isa => 'Str', 50 | ); 51 | 52 | has 'user_agent' => ( 53 | is => 'ro', 54 | isa => 'Str', 55 | default => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36", 56 | ); 57 | 58 | has 'http_codes_as_failure' => ( 59 | is => 'ro', 60 | isa => 'Str', 61 | ); 62 | 63 | with 'Pingmachine::Probe'; 64 | 65 | sub _start_new_job { 66 | my ($self) = @_; 67 | my $cv; 68 | 69 | my $step = $self->step; 70 | my $pings = $self->pings; 71 | my $interval = $self->interval; 72 | my $user_agent = $self->user_agent; 73 | 74 | my $urlcount = $self->order_list->count; 75 | 76 | return unless $urlcount; 77 | 78 | # Make sure that we can process the request 79 | if(($TIMEOUT + $interval * 1000) * $pings > $step * 1000) { 80 | die "httping: step * 1000 must be higher than (timeout + interval * 1000) * pings (step = $step, timeout=$TIMEOUT, interval=$interval, pings=$pings)\n"; 81 | } 82 | 83 | # Prepare job 84 | my %job = ( 85 | url2order => {}, 86 | output => {}, 87 | cmd => {}, 88 | pid => {}, 89 | ); 90 | for my $order ($self->order_list->get_all) { 91 | my $url = $order->httping->url; 92 | push @{$job{url2order}{$url}}, $order; 93 | } 94 | $self->current_job(\%job); 95 | 96 | for my $url (keys %{$job{url2order}}) { 97 | # Run httping 98 | my $cmd = [ 99 | $HTTPING_BIN, 100 | $url, 101 | '--user-agent', $user_agent, 102 | '--count', $pings, 103 | '--interval', $interval, 104 | '--timeout', $TIMEOUT, 105 | ]; 106 | 107 | # if specific http error codes have to be considered a failure, show-statuscode option is required 108 | if ( $self->http_codes_as_failure ) { 109 | push @{$cmd}, '--show-statuscodes'; 110 | } 111 | 112 | if ( $self->proxy ) { 113 | push @{$cmd}, '-x'; 114 | push @{$cmd}, $self->proxy; 115 | } 116 | 117 | $job{cmd}{$url} = join(' ', @$cmd); 118 | $cv = run_cmd $cmd, 119 | '2>', '/dev/null', 120 | '>', \$job{output}{$url}, 121 | '$$', \$job{pid}{$url}; 122 | } 123 | 124 | $cv->cb( 125 | sub { 126 | my $cbv = shift; 127 | for my $url (keys %{$job{url2order}}) { 128 | $job{pid}{$url} = undef; 129 | } 130 | my $exit = $cbv->recv; 131 | $exit = $exit >> 8; 132 | if($exit and $exit != 1 and $exit != 2 and $exit != 127) { 133 | # exit 1 means that some urls aren't reachable 134 | # exit 2 means "any IP addresses were not found" 135 | # exit 127 means that the connection is actively refused 136 | $log->warning("httping seems to have failed (exit: $exit, httping output:\n" . Dumper(\$job{output}) . ")"); 137 | return; 138 | } 139 | 140 | $log->debug("finished:" . Dumper(\$job{cmd}) . "(step: $step, pings: $pings, offset: " . $self->time_offset() . ")") if $log->is_debug(); 141 | 142 | $self->_collect_current_job(); 143 | 144 | $log->debug("collected:". Dumper(\$job{cmd}) . "(step: $step, pings: $pings, offset: " . $self->time_offset() . ")") if $log->is_debug(); 145 | } 146 | ); 147 | } 148 | 149 | sub _kill_current_job { 150 | my ($self) = @_; 151 | 152 | # Kill httping, if still running 153 | my $job = $self->current_job; 154 | for my $url (keys %{$job->{url2order}}) { 155 | if($job->{pid}{$url}) { 156 | # Check that we are killing the process we started and not an innocent bystander 157 | my $cmd_match = 0; 158 | if (open(my $proc_fh, "/proc/$job->{pid}{$url}/cmdline")) { 159 | $cmd_match = (join('', readline($proc_fh)) eq $job->{cmd}{$url}); 160 | close($proc_fh); 161 | } 162 | if($cmd_match && kill(0, $job->{pid}{$url})) { 163 | $log->warning("killing unfinished httping process (step: ".$self->step.", pings: ".$self->pings.", offset: ".$self->time_offset().")"); 164 | kill 9, $job->{pid}{$url}; 165 | $job->{pid}{$url} = undef; 166 | } 167 | elsif($job->{output}{$url}) { 168 | $log->warning("httping has finished, but we didn't notice... collecting (step: ".$self->step.", pings: ".$self->pings.", offset: ".$self->time_offset.")"); 169 | $self->_collect_current_job(); 170 | } 171 | else { 172 | $log->warning("httping has finished, but we didn't notice... no output found (?)"); 173 | } 174 | } 175 | } 176 | } 177 | 178 | sub _collect_current_job { 179 | my ($self) = @_; 180 | my $text; 181 | 182 | my $job = $self->current_job; 183 | $self->current_job({}); 184 | my %results; 185 | 186 | # Parse httping report 187 | for my $url (keys %{$job->{url2order}}) { 188 | # Do nothing, if httping didn't run yet or if job has been already collected 189 | return unless $job->{output}{$url}; 190 | 191 | my $raw_text = $job->{output}{$url}; 192 | # sample output 193 | # PING neverssl.com:80 (/): 194 | # connected to neverssl.com:80 (524 bytes), seq=0 time= 10.86 ms 200 OK 195 | # connected to neverssl.com:80 (524 bytes), seq=1 time= 15.46 ms 200 OK 196 | # --- http://neverssl.com/ ping statistics --- 197 | # 2 connects, 2 ok, 0.00% failed, time 2027ms 198 | # round-trip min/avg/max = 10.9/13.2/15.5 ms 199 | # parse line by line httping output and format it as with fping 200 | my @lines = split /\n/, $raw_text; 201 | my @pings; 202 | for my $line (@lines) { 203 | # if the line contains an error (i.e. timeout, connection refused, ...) add a - 204 | if ($line =~ /could not connect|time out|timeout|short read/) { 205 | push @pings, undef; 206 | } 207 | # if http codes to be considered as failures are defined and the line contains one of them, ignore the rtt value and add a - 208 | elsif ($self->http_codes_as_failure) { 209 | # replace for example 403,407 with (403|407) [A-Z]+[a-z]*(\s[A-Z]+[a-z]*)? to match response codes 403 Forbidden, 407 Proxy Authentication Required 210 | my $httping_errors = $self->http_codes_as_failure =~ s/,/|/gr; 211 | $httping_errors = '(' . $httping_errors . ') [A-Z]+[a-z]*(\s[A-Z]+[a-z]*)?'; 212 | if ($line =~ /$httping_errors/) { 213 | push @pings, undef; 214 | } 215 | # if the line contains the httping result add the number 216 | elsif ($line =~ /time=\s*(\d\d*.\d\d*) ms/) { 217 | push @pings, $1/1000; 218 | } 219 | } 220 | # if the line contains the httping result add the number 221 | elsif ($line =~ /time=\s*(\d\d*.\d\d*) ms/) { 222 | push @pings, $1/1000; 223 | } 224 | # ignore all the rest 225 | } 226 | # raw ping times 227 | $results{$url}{pings} = \@pings; 228 | # sorted rtt times 229 | my @rtts = map {sprintf "%.6e", $_ } sort {$a <=> $b} grep {$_} @pings; 230 | $results{$url}{rtts} = \@rtts; 231 | } 232 | 233 | $log->debug("adding results") if $log->is_debug(); 234 | 235 | # Add results (to RRD) 236 | if(keys %results) { 237 | my $now = int(AnyEvent->now); 238 | my $step = $self->step; 239 | my $rrd_time = $now - $step - $now%$step; 240 | for my $url (keys %results) { 241 | my $u2o = $job->{url2order}{$url}; 242 | if(!defined $u2o) { 243 | $log->warning("httping produced results for unknown url (url: $url, step: $step)"); 244 | next; 245 | } 246 | # don't add results if the pings array is empty 247 | if (scalar @{$results{$url}{pings}} == 0) { 248 | $log->warning("httping didn't produce any result for url $url\n"); 249 | next; 250 | } 251 | for my $order (@{$u2o}) { 252 | $order->add_results($rrd_time, $results{$url}); 253 | } 254 | } 255 | } 256 | $log->debug("adding results finished") if $log->is_debug(); 257 | } 258 | 259 | 260 | sub run { 261 | my ($self) = @_; 262 | 263 | $self->_kill_current_job(); 264 | $self->_start_new_job(); 265 | } 266 | 267 | __PACKAGE__->meta->make_immutable; 268 | 269 | 1; 270 | -------------------------------------------------------------------------------- /lib/Pingmachine/Probe/PPing.pm: -------------------------------------------------------------------------------- 1 | package Pingmachine::Probe::PPing; 2 | 3 | use Any::Moose; 4 | use AnyEvent; 5 | use AnyEvent::Util; 6 | use Log::Any qw($log); 7 | use List::Util qw(shuffle); 8 | 9 | my $SCION_DIR = $ENV{'SC'} // '/home/scion/go/src/github.com/scionproto/scion'; 10 | my $PPING_BIN = "$SCION_DIR/bin/pingpong"; 11 | 12 | my $TIMEOUT = 3000; # -t option (in ms) 13 | my $MIN_WAIT = 1; # -i option (is ms) 14 | 15 | has 'name' => ( 16 | is => 'ro', 17 | isa => 'Str', 18 | default => sub { "pping" }, 19 | ); 20 | 21 | has 'max_orders' => ( 22 | is => 'ro', 23 | isa => 'Int', 24 | default => 1000, 25 | ); 26 | 27 | has 'results' => ( 28 | is => 'ro', 29 | isa => 'HashRef', 30 | default => sub { {} }, 31 | ); 32 | 33 | has 'current_job' => ( 34 | is => 'rw', 35 | isa => 'HashRef', 36 | ); 37 | 38 | has 'interval' => ( 39 | is => 'ro', 40 | isa => 'Int', 41 | ); 42 | 43 | has 'source_ip' => ( 44 | is => 'ro', 45 | isa => 'Str', 46 | ); 47 | 48 | has 'flags' => ( 49 | is => 'ro', 50 | isa => 'Str', 51 | ); 52 | 53 | has 'interface' => ( 54 | is => 'ro', 55 | isa => 'Str', 56 | ); 57 | 58 | has 'ipv6' => ( 59 | is => 'ro', 60 | isa => 'Bool', 61 | default => 0, 62 | ); 63 | 64 | with 'Pingmachine::Probe'; 65 | 66 | sub _start_new_job { 67 | my ($self) = @_; 68 | 69 | # We want to distribute the samples as homogeneously as possible in the 70 | # available $step time (to increase the probability of detecting periodic 71 | # problems and to decrease the network peak load). 72 | # - We wait for at least 1 seconds for each ping ($interval). 73 | # - We can instruct pping to distribute the pings (pping -interval parameter). 74 | # That parameter also determines the maximal wait time, so we 75 | # can have at most $step/$interval pings if we ping a single 76 | # host. 77 | # - If multiple hosts are pinged, then we need also to consider the $MIN_WAIT 78 | # (pping -timeout) parameter. 79 | # - The total time needed by pping is (assuming $hostcount*MIN_WAIT < $interval): 80 | # ($pings-1)*interval + *$hostcount*$MIN_WAIT + $TIMEOUT 81 | 82 | my $step = $self->step; 83 | my $pings = $self->pings; 84 | my $hostcount = $self->order_list->count; 85 | 86 | return unless $hostcount; 87 | 88 | # Determine $interval (pping -interval) 89 | my $interval; # interval applies only to periods between pings in series (pping -interval, -c != 1) 90 | if($self->interval) { 91 | $interval = $self->interval; 92 | } 93 | else { 94 | $interval = int(($step * 1000 * 0.8 - $hostcount*$MIN_WAIT - $TIMEOUT) / $pings); 95 | $interval >= 1000 or 96 | die "pping: calculated interval too small: $interval (step = $step, pings = $pings)\n"; 97 | } 98 | 99 | 100 | # Make sure that we can process all hosts 101 | if($interval / $hostcount < $MIN_WAIT) { 102 | die "pping: step * 1000 / (pings * hostcount) must be at least 10 (step=$step, pings=$pings, hostcount=$hostcount\n"; 103 | } 104 | 105 | # Prepare job 106 | my %job = ( 107 | host2order => {}, 108 | output => '', 109 | effective_cmd => '', 110 | pid => '', 111 | ); 112 | for my $order ($self->order_list->get_all) { 113 | my $host = $order->pping->host; 114 | push @{$job{host2order}{$host}}, $order; 115 | } 116 | $job{hostlist} = join("\n", shuffle keys %{$job{host2order}}) . "\n", 117 | $self->current_job(\%job); 118 | 119 | # Run pingpong 120 | my $single_host = substr $job{hostlist}, 0, -1; 121 | $log->info("single_host: ".$single_host); 122 | my @cmd = [ 123 | $PPING_BIN, 124 | '-id', 'pingpong_client', 125 | '-mode', 'client', 126 | '-interval', $interval.'ms', 127 | '-count', $pings, 128 | '-timeout', $TIMEOUT.'s', 129 | '-format', '"%[3]s : %[5]s"', 130 | '-remote', $single_host, 131 | '-local', $self->source_ip, 132 | '-aggregate', 133 | ]; 134 | # Policy and performance flags 135 | push @cmd, $self->flags; 136 | # 137 | my $cmd_string = "cd $SCION_DIR && ".join(' ', @cmd); 138 | my $effective_cmd = [ 'su', 'scion', '-c', $cmd_string ]; 139 | $job{effective_cmd} = join(' ', @$effective_cmd); 140 | $log->debug("starting: @cmd (step: $step, pings: $pings, offset: ".$self->time_offset().")") if $log->is_debug(); 141 | 142 | my $cv = run_cmd $effective_cmd, 143 | '2>', '/dev/null', 144 | '>', \$job{output}, 145 | '$$', \$job{pid}; 146 | 147 | # Install pping exit callback 148 | $cv->cb( 149 | sub { 150 | my $cbv = shift; 151 | $job{pid} = undef; 152 | my $exit = $cbv->recv; 153 | $exit = $exit >> 8; 154 | if($exit and $exit != 1 and $exit != 2) { 155 | # exit 1 means that some hosts aren't reachable 156 | # exit 2 means "any IP addresses were not found" 157 | $log->warning("pping seems to have failed (exit: $exit, stderr: ".$job{output}.")"); 158 | return; 159 | } 160 | 161 | $log->debug("finished: @cmd (step: $step, pings: $pings, offset: ".$self->time_offset().")") if $log->is_debug(); 162 | 163 | $self->_collect_current_job(); 164 | 165 | $log->debug("collected: @cmd (step: $step, pings: $pings, offset: ".$self->time_offset().")") if $log->is_debug(); 166 | } 167 | ); 168 | } 169 | 170 | sub _kill_current_job { 171 | my ($self) = @_; 172 | 173 | # Kill pping, if still running 174 | my $job = $self->current_job; 175 | my $job_pid = $job->{pid}; 176 | if($job_pid) { 177 | # Check that we are killing the process we started and not an innocent bystander 178 | my $cmd_match = 0; 179 | if (open(proc_fh, "/proc/${job_pid}/cmdline")) { 180 | $cmd_match = (join('', readline(proc_fh)) eq $job->{effective_cmd}); 181 | close(proc_fh); 182 | } 183 | if($cmd_match && kill(0, $job->{pid})) { 184 | $log->warning("killing unfinished pping process (step: ".$self->step.", pings: ".$self->pings.", offset: ".$self->time_offset().")"); 185 | kill 9, $job->{pid}; 186 | $job->{pid} = undef; 187 | } 188 | elsif($job->{output}) { 189 | $log->warning("pping has finished, but we didn't notice... collecting (step: ".$self->step.", pings: ".$self->pings.", offset: ".$self->time_offset.")"); 190 | $self->_collect_current_job(); 191 | } 192 | else { 193 | $log->warning("pping has finished, but we didn't notice... no output found (?)"); 194 | } 195 | } 196 | } 197 | 198 | sub _collect_current_job { 199 | my ($self) = @_; 200 | 201 | my $job = $self->current_job; 202 | $self->current_job({}); 203 | my %results; 204 | 205 | # Do nothing, if pping didn't run yet or if job has been already collected 206 | return unless $job->{output}; 207 | 208 | # Parse pping report 209 | my $text = $job->{output}; 210 | $log->info("Output to parse: text: ".$text."--"); 211 | while($text !~ /\G\z/gc) { 212 | if($text =~ /\G(\S+)[ \t]+:/gc) { 213 | my $host = $1; 214 | my @data; 215 | while($text =~ /\G[ \t]+([-\d\.]+)/gc) { 216 | push @data, $1; 217 | } 218 | # raw ping times 219 | my @pings = map {$_ eq '-' ? undef : $_ / 1000} @data; 220 | $results{$host}{pings} = \@pings; 221 | # sorted rtt times 222 | my @rtts = map {sprintf "%.6e", $_ / 1000} sort {$a <=> $b} grep /^\d/, @data; 223 | $results{$host}{rtts} = \@rtts; 224 | $log->info("adding results: rtts: ".join(" ", @rtts)); 225 | } 226 | 227 | # discard any other output on the line (ICMP host unreachable errors, etc.) 228 | $text =~ /\G.*\n/gc; 229 | } 230 | 231 | $log->debug("adding results") if $log->is_debug(); 232 | 233 | # Add results (to RRD) 234 | if(scalar keys %results) { 235 | my $now = int(AnyEvent->now); 236 | my $step = $self->step; 237 | my $rrd_time = $now - $step - $now%$step; 238 | for my $host (keys %results) { 239 | my $h2o = $job->{host2order}{$host}; 240 | unless ($h2o) { 241 | $log->warning("pping produced results for unknown host (host: $host, step: $step)"); 242 | next; 243 | } 244 | for my $order (@{$h2o}) { 245 | $order->add_results($rrd_time, $results{$host}); 246 | } 247 | } 248 | } 249 | 250 | $log->debug("adding results finished") if $log->is_debug(); 251 | } 252 | 253 | 254 | sub run { 255 | my ($self) = @_; 256 | 257 | $self->_kill_current_job(); 258 | $self->_start_new_job(); 259 | } 260 | 261 | __PACKAGE__->meta->make_immutable; 262 | 263 | 1; 264 | -------------------------------------------------------------------------------- /lib/Pingmachine/Probe/SPing.pm: -------------------------------------------------------------------------------- 1 | package Pingmachine::Probe::SPing; 2 | 3 | use Any::Moose; 4 | use AnyEvent; 5 | use AnyEvent::Util; 6 | use Log::Any qw($log); 7 | use List::Util qw(shuffle); 8 | 9 | my $SCION_DIR = $ENV{'SC'} // '/home/scion/go/src/github.com/scionproto/scion'; 10 | my $SPING_BIN = "$SCION_DIR/bin/scmp_fping"; 11 | 12 | my $TIMEOUT = 3000; # -t option (in ms) 13 | my $MIN_WAIT = 1; # -i option (is ms) 14 | 15 | has 'name' => ( 16 | is => 'ro', 17 | isa => 'Str', 18 | default => sub { "sping" }, 19 | ); 20 | 21 | has 'max_orders' => ( 22 | is => 'ro', 23 | isa => 'Int', 24 | default => 1000, 25 | ); 26 | 27 | has 'results' => ( 28 | is => 'ro', 29 | isa => 'HashRef', 30 | default => sub { {} }, 31 | ); 32 | 33 | has 'current_job' => ( 34 | is => 'rw', 35 | isa => 'HashRef', 36 | ); 37 | 38 | has 'interval' => ( 39 | is => 'ro', 40 | isa => 'Int', 41 | ); 42 | 43 | has 'source_ip' => ( 44 | is => 'ro', 45 | isa => 'Str', 46 | ); 47 | 48 | has 'flags' => ( 49 | is => 'ro', 50 | isa => 'Str', 51 | ); 52 | 53 | has 'interface' => ( 54 | is => 'ro', 55 | isa => 'Str', 56 | ); 57 | 58 | has 'ipv6' => ( 59 | is => 'ro', 60 | isa => 'Bool', 61 | default => 0, 62 | ); 63 | 64 | with 'Pingmachine::Probe'; 65 | 66 | sub _start_new_job { 67 | my ($self) = @_; 68 | 69 | # We want to distribute the samples as homogeneously as possible in the 70 | # available $step time (to increase the probability of detecting periodic 71 | # problems and to decrease the network peak load). 72 | # - We wait for at least 1 seconds for each ping ($interval). 73 | # - We can instruct sping to distribute the pings (sping -interval parameter). 74 | # That parameter also determines the maximal wait time, so we 75 | # can have at most $step/$interval pings if we ping a single 76 | # host. 77 | # - If multiple hosts are pinged, then we need also to consider the $MIN_WAIT 78 | # (sping -timeout) parameter. 79 | # - The total time needed by sping is (assuming $hostcount*MIN_WAIT < $interval): 80 | # ($pings-1)*interval + *$hostcount*$MIN_WAIT + $TIMEOUT 81 | 82 | my $step = $self->step; 83 | my $pings = $self->pings; 84 | my $hostcount = $self->order_list->count; 85 | 86 | return unless $hostcount; 87 | 88 | # Determine $interval (sping -interval) 89 | my $interval; # interval applies only to periods between pings in series (sping -interval, with -c != 1) 90 | if($self->interval) { 91 | $interval = $self->interval; 92 | } 93 | else { 94 | $interval = int(($step * 1000 * 0.8 - $hostcount*$MIN_WAIT - $TIMEOUT) / $pings); 95 | $interval >= 1000 or 96 | die "sping: calculated interval too small: $interval (step = $step, pings = $pings)\n"; 97 | } 98 | 99 | 100 | # Make sure that we can process all hosts 101 | if($interval / $hostcount < $MIN_WAIT) { 102 | die "sping: step * 1000 / (pings * hostcount) must be at least 10 (step=$step, pings=$pings, hostcount=$hostcount\n"; 103 | } 104 | 105 | # Prepare job 106 | my %job = ( 107 | host2order => {}, 108 | output => '', 109 | effective_cmd => '', 110 | pid => '', 111 | ); 112 | for my $order ($self->order_list->get_all) { 113 | my $host = $order->sping->host; 114 | push @{$job{host2order}{$host}}, $order; 115 | } 116 | $job{hostlist} = join("\n", shuffle keys %{$job{host2order}}) . "\n", 117 | $self->current_job(\%job); 118 | 119 | # Run scmp 120 | my $single_host = substr $job{hostlist}, 0, -1; 121 | $log->info("single_host: ".$single_host); 122 | my @cmd = [ 123 | $SPING_BIN, 124 | '-id', 'scmp_echo', 125 | '-interval', $interval.'ms', 126 | '-c', $pings, 127 | '-timeout', $TIMEOUT.'s', 128 | '-remote', $single_host, 129 | '-local', $self->source_ip, 130 | ]; 131 | # Policy and performance flags 132 | push @cmd, $self->flags; 133 | # 134 | my $cmd_string = "cd $SCION_DIR && ".join(' ', @cmd); 135 | my $effective_cmd = [ 'su', 'scion', '-c', $cmd_string ]; 136 | $job{effective_cmd} = join(' ', @$effective_cmd); 137 | $log->debug("starting: @cmd (step: $step, pings: $pings, offset: ".$self->time_offset().")") if $log->is_debug(); 138 | 139 | my $cv = run_cmd $effective_cmd, 140 | '2>', '/dev/null', 141 | '>', \$job{output}, 142 | '$$', \$job{pid}; 143 | 144 | # Install sping exit callback 145 | $cv->cb( 146 | sub { 147 | my $cbv = shift; 148 | $job{pid} = undef; 149 | my $exit = $cbv->recv; 150 | $exit = $exit >> 8; 151 | if($exit and $exit != 1 and $exit != 2) { 152 | # exit 1 means that some hosts aren't reachable 153 | # exit 2 means "any IP addresses were not found" 154 | $log->warning("sping seems to have failed (exit: $exit, stderr: ".$job{output}.")"); 155 | return; 156 | } 157 | 158 | $log->debug("finished: @cmd (step: $step, pings: $pings, offset: ".$self->time_offset().")") if $log->is_debug(); 159 | 160 | $self->_collect_current_job(); 161 | 162 | $log->debug("collected: @cmd (step: $step, pings: $pings, offset: ".$self->time_offset().")") if $log->is_debug(); 163 | } 164 | ); 165 | } 166 | 167 | sub _kill_current_job { 168 | my ($self) = @_; 169 | 170 | # Kill sping, if still running 171 | my $job = $self->current_job; 172 | my $job_pid = $job->{pid}; 173 | if($job_pid) { 174 | # Check that we are killing the process we started and not an innocent bystander 175 | my $cmd_match = 0; 176 | if (open(proc_fh, "/proc/${job_pid}/cmdline")) { 177 | $cmd_match = (join('', readline(proc_fh)) eq $job->{effective_cmd}); 178 | close(proc_fh); 179 | } 180 | if($cmd_match && kill(0, $job->{pid})) { 181 | $log->warning("killing unfinished sping process (step: ".$self->step.", pings: ".$self->pings.", offset: ".$self->time_offset().")"); 182 | kill 9, $job->{pid}; 183 | $job->{pid} = undef; 184 | } 185 | elsif($job->{output}) { 186 | $log->warning("sping has finished, but we didn't notice... collecting (step: ".$self->step.", pings: ".$self->pings.", offset: ".$self->time_offset.")"); 187 | $self->_collect_current_job(); 188 | } 189 | else { 190 | $log->warning("sping has finished, but we didn't notice... no output found (?)"); 191 | } 192 | } 193 | } 194 | 195 | sub _collect_current_job { 196 | my ($self) = @_; 197 | 198 | my $job = $self->current_job; 199 | $self->current_job({}); 200 | my %results; 201 | 202 | # Do nothing, if sping didn't run yet or if job has been already collected 203 | return unless $job->{output}; 204 | 205 | # Parse sping report 206 | my $text = $job->{output}; 207 | $log->info("Output to parse: text: ".$text."--"); 208 | while($text !~ /\G\z/gc) { 209 | if($text =~ /\G(\S+)[ \t]+:/gc) { 210 | my $host = $1; 211 | my @data; 212 | while($text =~ /\G[ \t]+([-\d\.]+)/gc) { 213 | push @data, $1; 214 | } 215 | # raw ping times 216 | my @pings = map {$_ eq '-' ? undef : $_ / 1000} @data; 217 | $results{$host}{pings} = \@pings; 218 | # sorted rtt times 219 | my @rtts = map {sprintf "%.6e", $_ / 1000} sort {$a <=> $b} grep /^\d/, @data; 220 | $results{$host}{rtts} = \@rtts; 221 | $log->info("adding results: rtts: ".join(" ", @rtts)); 222 | } 223 | 224 | # discard any other output on the line (ICMP host unreachable errors, etc.) 225 | $text =~ /\G.*\n/gc; 226 | } 227 | 228 | $log->debug("adding results") if $log->is_debug(); 229 | 230 | # Add results (to RRD) 231 | if(scalar keys %results) { 232 | my $now = int(AnyEvent->now); 233 | my $step = $self->step; 234 | my $rrd_time = $now - $step - $now%$step; 235 | for my $host (keys %results) { 236 | my $h2o = $job->{host2order}{$host}; 237 | unless ($h2o) { 238 | $log->warning("sping produced results for unknown host (host: $host, step: $step)"); 239 | next; 240 | } 241 | for my $order (@{$h2o}) { 242 | $order->add_results($rrd_time, $results{$host}); 243 | } 244 | } 245 | } 246 | 247 | $log->debug("adding results finished") if $log->is_debug(); 248 | } 249 | 250 | 251 | sub run { 252 | my ($self) = @_; 253 | 254 | $self->_kill_current_job(); 255 | $self->_start_new_job(); 256 | } 257 | 258 | __PACKAGE__->meta->make_immutable; 259 | 260 | 1; 261 | -------------------------------------------------------------------------------- /lib/Pingmachine/Probe/SSH.pm: -------------------------------------------------------------------------------- 1 | package Pingmachine::Probe::SSH; 2 | 3 | # NOTE: this probe is a bit special because: 4 | # 1. It doesn't support pings values > 1 5 | # 2. It doesn't measure RTT (only loss) 6 | # Improvements welcome... 7 | 8 | use Any::Moose; 9 | use AnyEvent; 10 | use AnyEvent::Util; 11 | use Log::Any qw($log); 12 | use List::Util qw(shuffle); 13 | use XML::Simple; 14 | 15 | my $NMAP_BIN = '/bin/nmap'; 16 | my $SSH_TIMEOUT = 20; 17 | 18 | has 'name' => ( 19 | is => 'ro', 20 | isa => 'Str', 21 | default => sub { "ssh" }, 22 | ); 23 | 24 | has 'max_orders' => ( 25 | is => 'ro', 26 | isa => 'Int', 27 | default => 1000, 28 | ); 29 | 30 | has 'results' => ( 31 | is => 'ro', 32 | isa => 'HashRef', 33 | default => sub { {} }, 34 | ); 35 | 36 | has 'current_job' => ( 37 | is => 'rw', 38 | isa => 'HashRef', 39 | ); 40 | 41 | has 'key_type' => ( 42 | is => 'ro', 43 | isa => 'Str', 44 | required => 1, 45 | ); 46 | 47 | with 'Pingmachine::Probe'; 48 | 49 | 50 | sub _start_new_job { 51 | my ($self) = @_; 52 | 53 | # Prepare job 54 | my %job = ( 55 | host2order => {}, 56 | pid => '', 57 | stderr => '', 58 | stdout => '', 59 | ); 60 | for my $order ($self->order_list->get_all) { 61 | my $host = $order->ssh->host; 62 | push @{$job{host2order}{$host}}, $order; 63 | } 64 | $job{hostlist} = join("\n", shuffle keys %{$job{host2order}}) . "\n", 65 | $self->current_job(\%job); 66 | 67 | # Run nmap 68 | my $cmd = [ 69 | $NMAP_BIN, 70 | '-sT', # TCP connect() scanning 71 | '-v', # verbose mode to see if host is down 72 | '-n', # disable DNS resolution 73 | '-PN', # disable host discovery (assume all hosts online) 74 | '-p',22, 75 | '--max-rtt-timeout',$SSH_TIMEOUT, 76 | '-oX','-', # XML output format 77 | '-iL','-', # use stdin to input list 78 | '-sV', # enable version detection 79 | '--version-intensity',4, # speed up version detection (lower => faster but more inaccurate) 80 | ]; 81 | $log->debug("starting: @$cmd") if $log->is_debug(); 82 | 83 | $job{cmd_cv} = run_cmd $cmd, 84 | '<', \$job{hostlist}, 85 | '2>', \$job{stderr}, 86 | '>', \$job{stdout}, 87 | '$$', \$job{pid}; 88 | 89 | # Install exit callback 90 | $job{cmd_cv}->cb( 91 | sub { 92 | my $cbv = shift; 93 | $job{pid} = undef; 94 | my $exit = $cbv->recv; 95 | $exit = $exit >> 8; 96 | if($exit) { 97 | $log->warning("nmap seems to have failed (exit: $exit, stderr: $job{stderr})"); 98 | return; 99 | } 100 | 101 | $self->_collect_results(); 102 | } 103 | ); 104 | } 105 | 106 | sub _kill_current_job { 107 | my ($self) = @_; 108 | 109 | # Kill ssh-keyscan, if still running 110 | my $job = $self->current_job; 111 | if($job->{pid}) { 112 | $log->warning("killing unfinished nmap process"); 113 | $job->{killed} = 1; 114 | kill 9, $job->{pid}; 115 | $job->{pid} = undef; 116 | } 117 | } 118 | 119 | sub _collect_results { 120 | my ($self) = @_; 121 | 122 | my $job = $self->current_job; 123 | 124 | # Do nothing, if the job was killed (we might miss some entries) 125 | return if $job->{killed}; 126 | 127 | # Do nothing, if ssh-keyscan didn't run yet or if job has been already collected 128 | my $stdout = $job->{stdout}; 129 | return unless defined $stdout; 130 | 131 | my $xml_ref = XMLin($stdout); 132 | 133 | # Process XML output 134 | my %results; 135 | foreach my $host (@{$xml_ref->{host}}) { 136 | my $address = $host->{address}{addr}; 137 | my $status = $host->{ports}{port}{state}{state} // $host->{status}{state}; 138 | my $product = $host->{ports}{port}{service}{product}; 139 | 140 | if( $status eq 'open' && $product && $product =~ /SSH/i ) { 141 | my $ping = $host->{times}{srtt}/1000000; # convert ns to ms 142 | $results{$address}{rtts} = [$ping]; # sorted rtts 143 | $results{$address}{pings} = [$ping]; # raw ping values 144 | } 145 | else { 146 | $results{$address}{pings} = [undef]; 147 | $results{$address}{rtts} = []; 148 | } 149 | } 150 | 151 | # Add results 152 | my $now = int(AnyEvent->now); 153 | my $step = $self->step; 154 | my $rrd_time = $now - $step - $now%$step; 155 | for my $host (keys %{$job->{host2order}}) { 156 | my $h2o = $job->{host2order}{$host}; 157 | for my $order (@{$h2o}) { 158 | $order->add_results($rrd_time, $results{$host}); 159 | } 160 | } 161 | 162 | $job->{stdout} = undef; 163 | } 164 | 165 | sub run { 166 | my ($self) = @_; 167 | 168 | $self->_kill_current_job(); 169 | $self->_start_new_job(); 170 | } 171 | 172 | sub BUILD { 173 | my $self = shift; 174 | my $args = shift; 175 | 176 | if($self->pings != 1) { 177 | $log->warning("The 'ssh' probe currently only supports pings=1. Other values will be ignored"); 178 | } 179 | 180 | } 181 | 182 | __PACKAGE__->meta->make_immutable; 183 | 184 | 1; 185 | -------------------------------------------------------------------------------- /lib/Pingmachine/ProbeList.pm: -------------------------------------------------------------------------------- 1 | package Pingmachine::ProbeList; 2 | 3 | use Any::Moose; 4 | use Log::Any qw($log); 5 | 6 | use Pingmachine::Probe::FPing; 7 | use Pingmachine::Probe::SPing; 8 | use Pingmachine::Probe::PPing; 9 | use Pingmachine::Probe::HTTPing; 10 | use Pingmachine::Probe::SSH; 11 | 12 | has '_probes' => ( 13 | isa => 'HashRef', 14 | is => 'ro', 15 | default => sub { return {} }, 16 | ); 17 | 18 | has '_order2probe' => ( 19 | isa => 'HashRef', 20 | is => 'ro', 21 | default => sub { return {} }, 22 | ); 23 | 24 | # structure of _probes: 25 | # $key => { 26 | # HASHREF(xxx) => { count => 1000, max => 1000, probe => HASHREF(...) }, 27 | # HASHREF(xxx) => { count => 234, max => 1000, probe => HASHREF(...) }, 28 | # } 29 | 30 | sub _find_probe_for_new_order { 31 | my ($self, $key) = @_; 32 | 33 | if(not defined $self->{_probes}{$key}) { 34 | $self->{_probes}{$key} = {}; 35 | } 36 | 37 | for my $p (values %{$self->{_probes}{$key}}) { 38 | if($p->{count} < $p->{max}) { 39 | $p->{count}++; 40 | return $p->{probe}; 41 | } 42 | } 43 | 44 | return undef; 45 | } 46 | 47 | sub add_order { 48 | my ($self, $order) = @_; 49 | 50 | # Create probe, if needed 51 | my $key = $order->probe_instance_key; 52 | my $probe = $self->_find_probe_for_new_order($key); 53 | if(not defined $probe) { 54 | my $probe_type = $order->probe; 55 | $log->debug("create new probe for key: $key"); 56 | if($probe_type eq 'fping') { 57 | $probe = Pingmachine::Probe::FPing->new( 58 | step => $order->step, 59 | pings => $order->pings, 60 | interval => $order->fping->interval || 0, 61 | source_ip => $order->fping->source_ip || 0, 62 | interface => $order->fping->interface || 0, 63 | ipv6 => $order->fping->ipv6 || 0, 64 | ); 65 | } 66 | elsif($probe_type eq 'sping') { 67 | $probe = Pingmachine::Probe::SPing->new( 68 | step => $order->step, 69 | pings => $order->pings, 70 | interval => $order->sping->interval || 0, 71 | source_ip => $order->sping->source_ip || 0, 72 | flags => $order->sping->flags || '', 73 | interface => $order->sping->interface || 0, 74 | ipv6 => $order->sping->ipv6 || 0, 75 | ); 76 | } 77 | elsif($probe_type eq 'pping') { 78 | $probe = Pingmachine::Probe::PPing->new( 79 | step => $order->step, 80 | pings => $order->pings, 81 | interval => $order->pping->interval || 0, 82 | source_ip => $order->pping->source_ip || 0, 83 | flags => $order->pping->flags || '', 84 | interface => $order->pping->interface || 0, 85 | ipv6 => $order->pping->ipv6 || 0, 86 | ); 87 | } 88 | elsif($probe_type eq 'httping') { 89 | $probe = Pingmachine::Probe::HTTPing->new( 90 | step => $order->step, 91 | pings => $order->pings, 92 | interval => $order->httping->interval || 0, 93 | user_agent => $order->httping->user_agent || '', 94 | http_codes_as_failure => $order->httping->http_codes_as_failure || '', 95 | proxy => $order->httping->proxy || '', 96 | ); 97 | } 98 | elsif($probe_type eq 'ssh') { 99 | $probe = Pingmachine::Probe::SSH->new( 100 | step => $order->step, 101 | pings => $order->pings, 102 | key_type => $order->ssh->key_type, 103 | ); 104 | } 105 | else { 106 | $log->warning("unknown probe type: $probe_type"); 107 | return; 108 | } 109 | 110 | $self->{_probes}{$key}{$probe} = { count => 1, max => $probe->max_orders, probe => $probe }; 111 | $probe->start(); 112 | } 113 | 114 | # Add order to probe 115 | $probe->add_order($order); 116 | $self->{_order2probe}{$order} = $probe; 117 | } 118 | 119 | sub remove_order { 120 | my ($self, $order) = @_; 121 | 122 | # Find probe 123 | my $key = $order->probe_instance_key; 124 | my $probe = $self->{_order2probe}{$order}; 125 | if(not defined $probe) { 126 | $log->warning("can't find probe for order $order"); 127 | return; 128 | } 129 | 130 | # Remove order from probe 131 | $probe->remove_order_id($order->id); 132 | 133 | # Remove from our probe list 134 | if(not defined $self->{_probes}{$key}{$probe}) { 135 | $log->warning("can't find probe in our probe list for order $order"); 136 | } 137 | else { 138 | my $p = $self->{_probes}{$key}{$probe}; 139 | $p->{count}--; 140 | if($p->{count} <= 0) { 141 | delete $self->{_probes}{$key}{$probe}; 142 | } 143 | } 144 | } 145 | 146 | __PACKAGE__->meta->make_immutable; 147 | 148 | 1; 149 | -------------------------------------------------------------------------------- /lib/Smokeping/Colorspace.pm: -------------------------------------------------------------------------------- 1 | # -*- perl -*- 2 | 3 | package Smokeping::Colorspace; 4 | 5 | =head1 NAME 6 | 7 | Smokeping::Colorspace - Simple Colorspace Conversion methods 8 | 9 | =head1 OVERVIEW 10 | 11 | This module provides simple colorspace conversion methods, primarily allowing 12 | conversion from RGB (red, green, blue) to and from HSL (hue, saturation, luminosity). 13 | 14 | =head1 COPYRIGHT 15 | 16 | Copyright 2006 by Grahame Bowland. 17 | 18 | =head1 LICENSE 19 | 20 | This program is free software; you can redistribute it 21 | and/or modify it under the terms of the GNU General Public 22 | License as published by the Free Software Foundation; either 23 | version 2 of the License, or (at your option) any later 24 | version. 25 | 26 | This program is distributed in the hope that it will be 27 | useful, but WITHOUT ANY WARRANTY; without even the implied 28 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 29 | PURPOSE. See the GNU General Public License for more 30 | details. 31 | 32 | You should have received a copy of the GNU General Public 33 | License along with this program; if not, write to the Free 34 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 35 | 02139, USA. 36 | 37 | =head1 AUTHOR 38 | 39 | Grahame Bowland 40 | 41 | =cut 42 | 43 | sub web_to_rgb { 44 | my $web = shift; 45 | $web =~ s/^#//; 46 | my @rgb = (hex(substr($web, 0, 2)) / 255, 47 | hex(substr($web, 2, 2)) / 255, 48 | hex(substr($web, 4, 2)) / 255) ; 49 | return @rgb; 50 | } 51 | 52 | sub rgb_to_web { 53 | my @rgb = @_; 54 | return sprintf("#%.2x%.2x%.2x", 255 * $rgb[0], 255 * $rgb[1], 255 * $rgb[2]); 55 | } 56 | 57 | sub min_max_indexes { 58 | my $idx = 0; 59 | my ($min_idx, $min, $max_idx, $max); 60 | my @l = @_; 61 | 62 | foreach my $i (@l) { 63 | if (not defined($min) or ($i < $min)) { 64 | $min = $i; 65 | $min_idx = $idx; 66 | } 67 | if (not defined($max) or ($i > $max)) { 68 | $max = $i; 69 | $max_idx = $idx; 70 | } 71 | $idx++; 72 | } 73 | return ($min_idx, $min, $max_idx, $max); 74 | } 75 | 76 | # source for conversion algorithm is: 77 | # http://www.easyrgb.com/math.php?MATH=M18#text18 78 | sub rgb_to_hsl { 79 | my @rgb = @_; 80 | my ($h, $l, $s); 81 | 82 | my ($min_idx, $min, $max_idx, $max) = min_max_indexes(@rgb); 83 | my $delta_max = $max - $min; 84 | $l = ($max + $min) / 2; 85 | if ($delta_max == 0) { 86 | my $h = 0; 87 | my $s = 0; 88 | } else { 89 | if ($l < 0.5) { 90 | $s = $delta_max / ($max + $min); 91 | } else { 92 | $s = $delta_max / (2 - $max - $min); 93 | } 94 | my $delta_r = ((($max - $rgb[0]) / 6) + ($max / 2)) / $delta_max; 95 | my $delta_g = ((($max - $rgb[1]) / 6) + ($max / 2)) / $delta_max; 96 | my $delta_b = ((($max - $rgb[2]) / 6) + ($max / 2)) / $delta_max; 97 | if ($max_idx == 0) { 98 | $h = $delta_b - $delta_g; 99 | } elsif ($max_idx == 1) { 100 | $h = (1/3) + $delta_r - $delta_b; 101 | } else { 102 | $h = (2/3) + $delta_g - $delta_r; 103 | } 104 | if ($h < 0) { 105 | $h += 1; 106 | } elsif ($h > 1) { 107 | $h -= 1; 108 | } 109 | } 110 | return ($h, $s, $l); 111 | } 112 | 113 | sub hue_to_rgb { 114 | my ($v1, $v2, $vh) = @_; 115 | if ($vh < 0) { 116 | $vh += 1; 117 | } elsif ($vh > 1) { 118 | $vh -= 1; 119 | } 120 | if ($vh * 6 < 1) { 121 | return $v1 + ($v2 - $v1) * 6 * $vh; 122 | } elsif ($vh * 2 < 1) { 123 | return $v2; 124 | } elsif ($vh * 3 < 2) { 125 | return $v1 + ($v2 - $v1) * ((2/3) - $vh) * 6; 126 | } else { 127 | return $v1; 128 | } 129 | } 130 | 131 | sub hsl_to_rgb { 132 | my ($h, $s, $l) = @_; 133 | my ($r, $g, $b); 134 | if ($s == 0) { 135 | $r = $g = $b = $l; 136 | } else { 137 | my $ls; 138 | if ($l < 0.5) { 139 | $ls = $l * (1 + $s); 140 | } else { 141 | $ls = ($l + $s) - ($s * $l); 142 | } 143 | $l = 2 * $l - $ls; 144 | $r = hue_to_rgb($l, $ls, $h + 1/3); 145 | $g = hue_to_rgb($l, $ls, $h); 146 | $b = hue_to_rgb($l, $ls, $h - (1/3)); 147 | } 148 | return ($r, $g, $b); 149 | } 150 | 151 | 1; 152 | 153 | -------------------------------------------------------------------------------- /pingmachine: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | ############################################################################### 3 | # 4 | # Pingmachine - Smokeping-like Latency Measurement 5 | # 6 | # Written by David Schweikert , June 2011 7 | # Copyright (c) 2011-2014 Open Systems AG, Switzerland 8 | # All Rights Reserved. 9 | # 10 | # See LICENSE file for the software licensing conditions. 11 | # 12 | ############################################################################### 13 | 14 | use strict; 15 | use warnings; 16 | 17 | use EV; 18 | use Pingmachine::Main; 19 | use Net::Server::Daemonize qw(daemonize); 20 | use Getopt::Long; 21 | use Log::Any qw($log); 22 | use Log::Any::Adapter; 23 | 24 | my %opt = (); 25 | 26 | sub usage { 27 | die "usage: pingmachine [--debug] [--user=USER] [--basedir=DIR] [--telegraf=HOST:PORT]\n". 28 | " (default basedir: ".Pingmachine::Config->base_dir.")\n"; 29 | } 30 | 31 | sub my_daemonize { 32 | my ($uid, $gid, $groups); 33 | if($opt{user}) { 34 | ($uid, $gid) = (getpwnam($opt{user}))[2,3]; 35 | defined $uid or die "ERROR: can't resolve username $opt{user}"; 36 | $groups = `id -G $opt{user}`; chomp $groups; 37 | } 38 | chdir '/tmp'; 39 | 40 | daemonize('root', 'root', undef); 41 | 42 | # Net::Server::Daemonize support for supplementary groups is buggy -> we 43 | # need to do it ourselves with $) 44 | # 45 | # Note that this is important because fping is executable only by 46 | # pingmachine's secondary group monitor 47 | if($opt{user}) { 48 | $) = "$gid $groups"; 49 | $> = $uid; 50 | } 51 | 52 | EV::default_loop->loop_fork(); 53 | } 54 | 55 | sub main { 56 | # Parse arguments 57 | GetOptions (\%opt, 'debug|d', 'help|h', 'user|u=s', 'basedir|b=s', 'telegraf|t=s'); 58 | usage() if $opt{help}; 59 | Pingmachine::Config->base_dir($opt{basedir}) if $opt{basedir}; 60 | Pingmachine::Config->set_telegraf($opt{telegraf}) if $opt{telegraf}; 61 | 62 | # Configure logging 63 | if($opt{debug}) { 64 | Log::Any::Adapter->set('Dispatch', outputs => [ 65 | [ 'Screen', min_level => 'debug', newline => 1 ], 66 | ]); 67 | } 68 | else { 69 | Log::Any::Adapter->set('Dispatch', outputs => [ 70 | [ 'Syslog', min_level => 'info', ident => 'pingmachine' ], 71 | ]); 72 | } 73 | 74 | $EV::DIED = sub { 75 | $log->error("INTERNAL ERROR: $@"); 76 | }; 77 | 78 | # Daemonize 79 | my_daemonize() unless $opt{debug}; 80 | 81 | # Start Pingmachine 82 | my $pingmachine = Pingmachine::Main->new(); 83 | $pingmachine->run(); 84 | } 85 | 86 | main; 87 | -------------------------------------------------------------------------------- /pingmachine-graph: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | ############################################################################### 3 | # 4 | # Pingmachine - Smokeping-like Latency Measurement 5 | # 6 | # pingmachine-graph written by Jan Bernegger 7 | # Copyright (c) 2013-2014 Open Systems AG, Switzerland 8 | # All Rights Reserved. 9 | # 10 | # See LICENSE file for the software licensing conditions. 11 | # 12 | ############################################################################### 13 | 14 | use strict; 15 | use warnings; 16 | 17 | use lib '/opt/OSAGping/lib'; 18 | use lib '/opt/OSAGrrdfw/lib'; 19 | 20 | use Pingmachine::Graph::Smokeping; 21 | 22 | use Try::Tiny; 23 | use Getopt::Long; 24 | use Scalar::Util qw(looks_like_number); 25 | use feature qw(switch); 26 | 27 | # config 28 | my $PINGM_OUTPUT = '/var/lib/pingmachine/output'; 29 | my $PINGM_ORDERS = '/var/lib/pingmachine/orders'; 30 | 31 | # Option handling 32 | my %opts = (); 33 | GetOptions(\%opts, 34 | 'id|i=s', 35 | 'period|p=s', 36 | 'out|o=s', 37 | 'help|h' => \&usage) || usage(); 38 | 39 | 40 | #----------------------- 41 | # main 42 | #----------------------- 43 | sub main { 44 | try { 45 | for my $arg (qw(id period out)) { 46 | defined $opts{$arg} or die "mandatory option $arg not defined\n"; 47 | } 48 | 49 | if(grep {$_ eq $opts{period}} qw(day week month)) { 50 | given($opts{period}) { 51 | when('day') { $opts{period} = 86400; } 52 | when('week') { $opts{period} = 86400*7; } 53 | default { $opts{period} = 86400*30; } 54 | } 55 | } 56 | 57 | looks_like_number($opts{period}) or die "period $opts{period} is neither a number nor day|week|month\n"; 58 | 59 | my $id = get_full_id($opts{id}); 60 | 61 | my $rrd_file = "$PINGM_OUTPUT/$id/main.rrd"; 62 | my $order_file = "$PINGM_ORDERS/$id"; 63 | 64 | die "order_id '$id' invalid\n" unless -f $rrd_file && -f $order_file; 65 | 66 | my $pings; 67 | open(my $fd, '<', $order_file); 68 | while(<$fd>) { 69 | if(/pings:\s+ (\d+)/xms) { 70 | $pings = $1; 71 | last; 72 | } 73 | } 74 | 75 | die "could not parse order file\n" unless $pings; 76 | 77 | Pingmachine::Graph::Smokeping->graph( 78 | img => $opts{out}, 79 | rrd => $rrd_file, 80 | pings => $pings, 81 | timespan => $opts{period}, 82 | width => 510, 83 | height => 300, 84 | ); 85 | 86 | print "image $opts{out} successfully created\n"; 87 | } 88 | catch { 89 | print "ERROR: $_"; 90 | }; 91 | } 92 | 93 | sub get_full_id { 94 | my $id = shift; 95 | 96 | opendir(my $dh, $PINGM_ORDERS) || die "can't opendir $PINGM_ORDERS: $!"; 97 | my @id_files = grep { /^$id/ && -f "$PINGM_ORDERS/$_" } readdir($dh); 98 | 99 | die "order_id $id invalid\n" unless @id_files == 1; 100 | 101 | return $id_files[0]; 102 | } 103 | 104 | sub usage { 105 | print <, June 2011 7 | # Copyright (c) 2011-2014 Open Systems AG, Switzerland 8 | # All Rights Reserved. 9 | # 10 | # See LICENSE file for the software licensing conditions. 11 | # 12 | ############################################################################### 13 | 14 | use strict; 15 | use warnings; 16 | use feature ':5.10'; 17 | use YAML::XS qw(LoadFile Dump); 18 | use Term::ANSIColor; 19 | use Try::Tiny; 20 | use List::Util qw(max); 21 | 22 | my $ORDERS_DIR = '/var/lib/pingmachine/orders'; 23 | my $OUTPUT_DIR = '/var/lib/pingmachine/output'; 24 | 25 | my %pretty_state_map = ( 26 | 'up' => my_colored(' UP ', 'green'), 27 | 'down' => my_colored(' DOWN ', 'red'), 28 | 'unknown' => ' unkn ', 29 | ); 30 | 31 | sub my_colored { 32 | if(-t STDOUT) { 33 | return colored(@_); 34 | } 35 | else { 36 | return $_[0]; 37 | } 38 | } 39 | 40 | sub pretty_rtt { 41 | my ($rtt) = @_; 42 | if(not defined $rtt) { 43 | return '-'; 44 | } 45 | return sprintf('%.0f ms', $rtt*1000); 46 | } 47 | 48 | sub pretty_loss { 49 | my ($loss, $pings) = @_; 50 | if(not defined $loss) { 51 | return '-'; 52 | } 53 | my $loss_pct = sprintf('%3.0f%%', $loss*100/$pings); 54 | if($loss == 0 or $pings > 2 and $loss == 1) { 55 | return my_colored($loss_pct, 'green'); 56 | } 57 | elsif($loss < $pings) { 58 | return my_colored($loss_pct, 'yellow'); 59 | } 60 | else { 61 | return my_colored($loss_pct, 'red'); 62 | } 63 | return 64 | } 65 | 66 | sub pretty_time { 67 | my ($time) = @_; 68 | my $delta_t = time - $time; 69 | if($delta_t < 120) { 70 | return sprintf("%3d s ", $delta_t); 71 | } 72 | elsif($delta_t < 7200) { 73 | return sprintf("%3d min", $delta_t / 60); # "120 min" -> 7 chars 74 | } 75 | elsif($delta_t < 48 * 3600) { 76 | return sprintf("%3d h ", $delta_t / 3600); 77 | } 78 | else { 79 | return sprintf("%3d d ", $delta_t / (3600*24)); 80 | } 81 | } 82 | 83 | sub sortable_ip { 84 | my ($ip) = @_; 85 | defined $ip or return undef; 86 | if($ip =~ /^[:0-9a-f]+$/i) { 87 | my @n = split(/:/, $ip); 88 | return "a.".join(":", map { sprintf "%04x", hex($_) } @n); 89 | } 90 | elsif($ip =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) { 91 | return sprintf("%03d.%03d.%03d.%03d", $1, $2, $3, $4); 92 | } 93 | else { 94 | return $ip; 95 | } 96 | } 97 | 98 | sub main { 99 | my ($orders, $orders_by_user, $any_ipv6); 100 | ($orders, $orders_by_user, $any_ipv6) = scan_orders_dir($ORDERS_DIR, '', $orders, $orders_by_user, $any_ipv6); 101 | 102 | # read results 103 | my $oid_length = 8; 104 | for my $oid (keys %{$orders}) { 105 | $oid_length = max($oid_length, length($oid)); 106 | if(-f "$OUTPUT_DIR/$oid/last_result") { 107 | try { $orders->{$oid}{result} = LoadFile("$OUTPUT_DIR/$oid/last_result"); } 108 | catch { warn "WARNING: can't parse $OUTPUT_DIR/$oid/last_result\n"; } 109 | } 110 | } 111 | 112 | # pretty print 113 | state $format = " %-${oid_length}s %7s %5s %-8s %-15s %-7s %7s %4s%s\n"; 114 | if($any_ipv6) { 115 | $format = " %-${oid_length}s %7s %5s %-8s %-36s %-7s %7s %4s%s\n"; 116 | } 117 | my $first_user = 1; 118 | for my $user (sort keys %{$orders_by_user}) { 119 | $first_user ? $first_user = 0 : print "\n"; 120 | say my_colored("- $user", 'bold'); 121 | say ""; 122 | printf(my_colored($format, 'bold'), "order", "step", "pings", "probe", "host", "updated", "m.rtt", "loss", ""); 123 | printf($format, '-'x$oid_length, '-'x7, '-'x5, '-'x8, '-'x($any_ipv6 ? 32 : 15), '-'x7, '-'x7, '-'x4, ""); 124 | for my $oid (sort { $orders->{$a}{sort_key} cmp $orders->{$b}{sort_key} } keys %{$orders_by_user->{$user}}) 125 | { 126 | my $order = $orders->{$oid}; 127 | my $additional = additional_info($order); 128 | printf($format, 129 | $oid, 130 | "$order->{step} s", 131 | $order->{pings}, 132 | $order->{probe}, 133 | $order->{probe_host} // '-', 134 | !defined $order->{result} ? '-' : ( 135 | time - $order->{result}{updated} > $order->{step} ? 136 | my_colored(pretty_time($order->{result}{updated}), 'red') : 137 | pretty_time($order->{result}{updated}) 138 | ), 139 | !defined $order->{result} ? '-' : ( 140 | pretty_rtt($order->{result}{median}) 141 | ), 142 | !defined $order->{result} ? '-' : ( 143 | pretty_loss($order->{result}{loss}, $order->{pings}) 144 | ), 145 | $additional ? " ($additional)" : "", 146 | ); 147 | } 148 | } 149 | } 150 | 151 | sub scan_orders_dir { 152 | my ($orders_dir, $order_id_prefix, $orders, $orders_by_user, $any_ipv6) = @_; 153 | opendir(my $open_orders_dir, $orders_dir) or die "ERROR: can't open $ORDERS_DIR: $!\n"; 154 | while(my $order_file = readdir($open_orders_dir)) { 155 | next if $order_file eq '.' or $order_file eq '..'; 156 | 157 | my $order_id = $order_id_prefix ? "$order_id_prefix/$order_file" : $order_file; 158 | my $order_path = "$orders_dir/$order_file"; 159 | if (-d $order_path) { 160 | ($orders, $orders_by_user, $any_ipv6) = scan_orders_dir($order_path, $order_id, $orders, $orders_by_user, $any_ipv6); 161 | } 162 | 163 | next unless -f $order_path; 164 | my $order; 165 | try { $order = LoadFile($order_path); } 166 | catch { warn "WARNING: can't parse $order_path\n"; }; 167 | next unless $order; 168 | next unless $order->{user}; 169 | $orders->{$order_id} = $order; 170 | if(defined $order->{probe} and defined $order->{$order->{probe}}{host}) { 171 | $order->{probe_host} = $order->{$order->{probe}}{host}; 172 | $order->{sort_key} = sortable_ip($order->{probe_host}) . ":$order->{probe}:$order->{step}:$order_id"; 173 | if($order->{$order->{probe}}{host} =~ /^[:0-9a-f]+$/i) { 174 | $any_ipv6 = 1; 175 | } 176 | } 177 | else { 178 | $order->{sort_key} = "ZZZ:$order->{probe}:$order->{step}:$order_id"; 179 | } 180 | $orders_by_user->{$order->{user}}{$order_id} = $order; 181 | } 182 | 183 | return $orders, $orders_by_user, $any_ipv6; 184 | } 185 | 186 | sub additional_info { 187 | my ($order) = @_; 188 | 189 | my @additional; 190 | if($order->{probe} eq 'fping') { 191 | if($order->{fping}{interface}) { 192 | push @additional, $order->{fping}{interface}; 193 | } 194 | if($order->{fping}{source_ip}) { 195 | push @additional, "source=".$order->{fping}{source_ip}; 196 | } 197 | } 198 | if(scalar @additional) { 199 | return join(', ', @additional); 200 | } 201 | else { 202 | return undef; 203 | } 204 | } 205 | 206 | main; 207 | -------------------------------------------------------------------------------- /pingtest.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # This example scripts shows how to use Pingmachine so that you get an immediate feedback 4 | # about ping results (to react as soon as possible) 5 | # 6 | # Note that it is rather complicated, because of that requirements (immediate 7 | # notification). You shouldn't do this, unless you need to... 8 | 9 | use strict; 10 | use AnyEvent; 11 | use EV; 12 | use Linux::Inotify2; 13 | use Digest::MD5 qw(md5_hex); 14 | use YAML::XS qw(LoadFile); 15 | use POSIX qw(strftime); 16 | 17 | my $step = 1; # write results every $step interval 18 | my $ping_ip = '8.8.8.8'; 19 | 20 | my $orders_dir = '/var/lib/pingmachine/orders'; 21 | my $output_base = '/var/lib/pingmachine/output'; 22 | 23 | ### write_order: Write the Pingmachine order file 24 | ### 25 | my $order_w; # We would use 'state' for this, if we had Perl >= 5.10... 26 | my $order_id; 27 | sub write_order { 28 | # Write order file 29 | my $order = <', $order_file) or 42 | die "ERROR: can't write $order_file: $!\n"; 43 | print $fh $order; 44 | close($fh); 45 | 46 | # Schedule a rewrite of the file 47 | # (so that pingmachine doesn't delete it) 48 | $order_w = AnyEvent->timer( 49 | after => 300, 50 | cb => sub { write_order(); } 51 | ); 52 | 53 | return $output_base . "/$order_id"; 54 | } 55 | 56 | ### remove_order: Remove the order file, when we quit 57 | ### (note that Pingmachine removes it for us after one hour that we didn't 58 | ### refresh it, but we do it anyway for neatiness) 59 | ### 60 | sub remove_order { 61 | unlink $orders_dir . "/$order_id"; 62 | } 63 | 64 | ### watch_output: Watch output directory for new "last_result" file. 65 | ### (note that Pingmachine creates a tempory file and then 66 | ### renames (moves) it to "last_results") 67 | my $watch_output_w; 68 | sub watch_output { 69 | my ($output_dir) = @_; 70 | my $inotify = Linux::Inotify2->new() or 71 | die "ERROR: Unable to create new inotify object: $!"; 72 | $inotify->watch("$output_dir", 73 | IN_MOVED_TO, 74 | sub { 75 | my $e = shift; 76 | $e->name eq 'last_result' or return; 77 | read_result($e->fullname); 78 | } 79 | ); 80 | $watch_output_w = AnyEvent->io( 81 | fh => $inotify->fileno, 82 | poll => 'r', 83 | cb => sub { $inotify->poll } 84 | ); 85 | } 86 | 87 | ### read_result: Called when last_result has been updated by Pingmachine 88 | ### 89 | sub read_result { 90 | my ($file) = @_; 91 | my $results = LoadFile($file); 92 | 93 | printf("%-15s %-15s %s\n", 94 | strftime("%H:%M:%S", localtime($results->{updated})), 95 | strftime("%H:%M:%S", localtime(time)), 96 | $results->{median} 97 | ); 98 | } 99 | 100 | ### main: Main routine 101 | ### (because I don't like having things in the root scope) 102 | ### 103 | sub main { 104 | my $output_dir = write_order(); 105 | 106 | # Install signal watchers for SIGINT and SIGTERM 107 | my $quit_cv = AnyEvent->condvar; 108 | my $w1 = AnyEvent->signal(signal => "INT", cb => sub { $quit_cv->send() }); 109 | my $w2 = AnyEvent->signal(signal => "TERM", cb => sub { $quit_cv->send() }); 110 | 111 | # Install output directory watcher 112 | watch_output($output_dir); 113 | 114 | # Write header 115 | printf("%-15s %-15s %s\n", "Sample time", "Now", "RTT"); 116 | 117 | # Start event loop 118 | $quit_cv->recv; 119 | 120 | # Quitting -> remove order file 121 | remove_order(); 122 | } 123 | 124 | main; 125 | --------------------------------------------------------------------------------