├── .github └── FUNDING.yml ├── COPYING ├── README.md ├── check_bird2_bgp ├── README.md ├── check_bird2_bgp ├── check_bird2_bgp.command.conf └── check_bird2_bgp.service.conf ├── check_bird_bgp ├── README.md ├── check_bird_bgp ├── check_bird_bgp.command.conf └── check_bird_bgp.service.conf ├── check_bird_ospf ├── README.md ├── check_bird_ospf ├── check_bird_ospf.command.conf └── check_bird_ospf.service.conf ├── check_conntrack_size ├── README.md ├── check_conntrack_size ├── check_conntrack_size.command.conf └── check_conntrack_size.service.conf ├── check_dns_sync ├── check_dns_sync ├── check_dns_sync.command.conf └── check_dns_sync.service.conf ├── check_ifupdown2 ├── README.md ├── check_ifupdown2 ├── check_ifupdown2.command.conf └── check_ifupdown2.service.conf └── check_isilon ├── README.md ├── check_isilon ├── check_isilon.command.conf └── check_isilon.service.conf /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: BarbarossaTM 2 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Icinga2 plugins / checks 2 | 3 | This repository contains a number of check plugins for the Icinga2 monitoring system I developed over time. 4 | 5 | All of them are available on [Icinga Exchange](https://exchange.icinga.com/BarbarossaTM). 6 | 7 | If you have suggestens or patches how these checks can be extended, feel free to open a PR or issue. 8 | -------------------------------------------------------------------------------- /check_bird2_bgp/README.md: -------------------------------------------------------------------------------- 1 | ``` 2 | $ ./check_bird2_bgp -h 3 | usage: check_bird2_bgp [-h] --asn ASN [--ibgp] [--ibgp_w RANGE] 4 | [--ibgp_c RANGE] [--ebgp] [--ebgp_w RANGE] 5 | [--ebgp_c RANGE] [--disabled_ok] 6 | [--sessions_down_ok LIST] 7 | [--sessions_down_ok_file FILENAME] 8 | [--ignore_missing_file] 9 | 10 | check bird iBGP sessions 11 | 12 | optional arguments: 13 | -h, --help show this help message and exit 14 | --asn ASN, -A ASN Local AS number 15 | --ibgp, -i Check iBGP sessions 16 | --ibgp_w RANGE Warning interval for down iBGP sessions 17 | --ibgp_c RANGE Critical interval for down iBGP sessions 18 | --ebgp, -e Check eBGP sessions 19 | --ebgp_w RANGE Warning interval for down eBGP sessions 20 | --ebgp_c RANGE Critical interval for down eBGP sessions 21 | --disabled_ok Treat sessions disabled in bird as OK. 22 | --sessions_down_ok LIST 23 | List of sessions which are OK to be down. Provide a 24 | space separated list. 25 | --sessions_down_ok_file FILENAME 26 | List of sessions which are OK to be down. Provide one 27 | interfaces per line. 28 | --ignore_missing_file 29 | Ignore a possible non-existent file given as 30 | --interfaces_down_ok_file 31 | ``` 32 | -------------------------------------------------------------------------------- /check_bird2_bgp/check_bird2_bgp: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # 3 | # Check state of BGP sessions in Bird 2.x Internet Routing Daemon 4 | # 5 | # Based on check_bird_bgp from Maximilian Wilhelm 6 | # 7 | # (C) 2023 by Mario Rimann 8 | # 9 | 10 | import argparse 11 | import os 12 | import re 13 | import subprocess 14 | import sys 15 | 16 | 17 | def read_sessions_from_file (file_path, missing_ok): 18 | sessions = [] 19 | 20 | # If we shouldn't care, we won't care if it's not there. 21 | if not os.path.isfile (file_path) and missing_ok: 22 | return sessions 23 | 24 | try: 25 | with open (args.sessions_down_ok_file, 'r') as ido_fh: 26 | for session in ido_fh.readlines (): 27 | if not session.startswith ('#'): 28 | sessions.append (session.strip ()) 29 | 30 | except IOError as err: 31 | errno, strerror = err.args 32 | print ("Failed to read sessions_down_ok from '%s': %s" % (args.sessions_down_ok_file, strerror)) 33 | sys.exit (1) 34 | 35 | return sessions 36 | 37 | def validate_range_arg (arg_name): 38 | value = getattr (args, arg_name) 39 | if not value: 40 | return None 41 | 42 | # Check if a RANGE was given 43 | limits = value.split (':') 44 | if len (limits) != 2: 45 | return "Error: Invalid value for --%s, expected RANGE: %s" % (arg_name, value) 46 | 47 | # Try to validate range, on limit might be empty 48 | try: 49 | # Try to parse range values to integers if present 50 | a = None 51 | b = None 52 | if (limits[0] != ''): 53 | a = int (limits[0]) 54 | if (limits[1] != ''): 55 | b = int (limits[1]) 56 | 57 | # Validate range if both values were given 58 | if (a != None and b != None and a > b): 59 | return "Error: Invalid value for --%s, invalid RANGE: %s" % (arg_name, value) 60 | except ValueError: 61 | return "Error: Expected numeric values in RANGE for --%s: %s" % (arg_name, value) 62 | 63 | 64 | ################################################################################ 65 | # Argument parsing and basic input validation # 66 | ################################################################################ 67 | 68 | parser = argparse.ArgumentParser (description = 'check bird iBGP sessions') 69 | 70 | parser.add_argument ('--asn', '-A', help = "Local AS number", required = True) 71 | parser.add_argument ('--ibgp', '-i', help = "Check iBGP sessions", action = 'store_true') 72 | parser.add_argument ('--ibgp_warn', '--ibgp_w', help = "Warning interval for down iBGP sessions", default = "1:1", metavar = "RANGE") 73 | parser.add_argument ('--ibgp_crit', '--ibgp_c', help = "Critical interval for down iBGP sessions", default = "2:", metavar = "RANGE") 74 | parser.add_argument ('--ebgp', '-e', help = "Check eBGP sessions", action = 'store_true') 75 | parser.add_argument ('--ebgp_warn', '--ebgp_w', help = "Warning interval for down eBGP sessions", default = "1:1", metavar = "RANGE") 76 | parser.add_argument ('--ebgp_crit', '--ebgp_c', help = "Critical interval for down eBGP sessions", default = "2:", metavar = "RANGE") 77 | parser.add_argument ('--disabled_ok', help = "Treat sessions disabled in bird as OK.", action = 'store_true') 78 | parser.add_argument ('--sessions_down_ok', metavar = "LIST", help = "List of sessions which are OK to be down. Provide a space separated list.") 79 | parser.add_argument ('--sessions_down_ok_file', metavar = "FILENAME", help = "List of sessions which are OK to be down. Provide one interfaces per line.") 80 | parser.add_argument ('--ignore_missing_file', help = "Ignore a possible non-existent file given as --interfaces_down_ok_file", action = 'store_true') 81 | parser.add_argument ('--session', help = "Only check for session with given name.") 82 | parser.add_argument ('--show_performance_data', help='Whether to show performance data or not', action='store_true') 83 | parser.add_argument ('--routes_imported_warn', help = "Warning interval for imported routes", metavar = "RANGE") 84 | parser.add_argument ('--routes_imported_crit', help = "Critical interval for imported routes", metavar = "RANGE") 85 | parser.add_argument ('--routes_exported_warn', help = "Warning interval for exported routes", metavar = "RANGE") 86 | parser.add_argument ('--routes_exported_crit', help = "Critical interval for exported routes", metavar = "RANGE") 87 | parser.add_argument ('--routes_preferred_warn', help = "Warning interval for preferred routes", metavar = "RANGE") 88 | parser.add_argument ('--routes_preferred_crit', help = "Critical interval for preferred routes", metavar = "RANGE") 89 | 90 | args = parser.parse_args () 91 | 92 | if not args.ibgp and not args.ebgp: 93 | print ("Error: You have to enable at least one of iBGP and eBGP checking.\n", file=sys.stderr) 94 | parser.print_help () 95 | sys.exit (3) 96 | 97 | if args.session and args.ibgp and args.ebgp: 98 | print ("Error: A single session can't be iBGP and eBGP at the same time!") 99 | parser.print_help () 100 | sys.exit (3) 101 | 102 | # Validate limit arguments 103 | for item in ('ibgp', 'ebgp', 'routes_imported', 'routes_exported', 'routes_preferred'): 104 | for severity in ('warn', 'crit'): 105 | msg = validate_range_arg ("%s_%s" % (item, severity)) 106 | if msg: 107 | print (msg) 108 | sys.exit (3) 109 | 110 | 111 | session_down_codes = { 112 | 'warn' : [ 1, 'WARNING' ], 113 | 'crit' : [ 2, 'CRITICAL'], 114 | } 115 | route_codes = { 116 | 'routes_exported' : 'Exported', 117 | 'routes_imported' : 'Imported', 118 | 'routes_preferred' : 'Preferred', 119 | } 120 | 121 | # Are some sessions ok being down? 122 | sessions_down_ok = [] 123 | if args.sessions_down_ok: 124 | sessions_down_ok = args.sessions_down_ok.split () 125 | 126 | if args.sessions_down_ok_file: 127 | sessions_down_ok.extend (read_sessions_from_file (args.sessions_down_ok_file, args.ignore_missing_file)) 128 | 129 | ################################################################################ 130 | # Query BGP protocols from bird # 131 | ################################################################################ 132 | 133 | # Check for one specific session only 134 | if args.session: 135 | cmd = [ "/usr/bin/sudo", '/usr/sbin/birdc', "show protocol all %s" % args.session ] 136 | # Check for all sessions and filter later 137 | else: 138 | cmd = [ "/usr/bin/sudo", '/usr/sbin/birdc', "show protocols all" ] 139 | 140 | try: 141 | protocols = subprocess.Popen (cmd, bufsize = 4194304, stdout = subprocess.PIPE).stdout 142 | 143 | # cmd exited with non-zero code 144 | except subprocess.CalledProcessError as c: 145 | print ("Failed to run %s: %s" % (" ".join (cmd), c.output)) 146 | sys.exit (1) 147 | 148 | # This should not have happend. 149 | except Exception as e: 150 | print ("Unknown error while running %s: %s" % (" ".join (cmd), str (e))) 151 | sys.exit (3) 152 | 153 | 154 | # cr03_in_ffho_net BGP master up 2017-04-06 Established 155 | # Preference: 100 156 | # Input filter: ibgp_in 157 | # Output filter: ibgp_out 158 | # Routes: 38 imported, 3 exported, 1 preferred 159 | # OR 160 | # Routes: 1 imported, 0 filtered, 1 exported, 0 preferred 161 | # Route change stats: received rejected filtered ignored accepted 162 | # Import updates: 16779 0 0 72 16707 163 | # Import withdraws: 18012 0 --- 1355 16657 164 | # Export updates: 55104 18903 24743 --- 11458 165 | # Export withdraws: 9789 --- --- --- 11455 166 | # BGP state: Established 167 | # Neighbor address: 10.132.255.3 168 | # Neighbor AS: 65132 169 | # Neighbor ID: 10.132.255.3 170 | # Neighbor caps: refresh enhanced-refresh restart-able AS4 171 | # Session: internal multihop AS4 172 | # Source address: 10.132.255.12 173 | # Hold timer: 198/240 174 | # Keepalive timer: 13/80 175 | 176 | ################################################################################ 177 | # Parse all fields from bird output into bgp_sessions dict # 178 | ################################################################################ 179 | 180 | bgp_sessions = {} 181 | 182 | # Simple fields with only one values 183 | simple_fields = [ 'Preference', 'Input filter', 'Output filter', 'BGP state', 'Neighbor address', 'Neighbor AS', 184 | 'Neighbor ID', 'Source address', 'Hold timer', 'Keepalive timer', 'Last error' ] 185 | 186 | # More "complex" fields 187 | fields = { 188 | 'Routes' : { 189 | 're' : re.compile (r'Routes:\s+(\d+) imported, ((\d+) filtered, )?(\d+) exported, (\d+) preferred'), 190 | 'groups' : [ 1, 4, 5 ], 191 | 'mangle_dict' : { 192 | 'Routes imported' : 1, 193 | 'Routes exported' : 4, 194 | 'Routes preferred' : 5, 195 | } 196 | }, 197 | 198 | 'Neighbor caps' : { 199 | 're' : re.compile (r'Neighbor caps:\s+(.+)$'), 200 | 'groups' : [ 1 ], 201 | 'list' : True, 202 | 'split' : lambda x: x.split (), 203 | }, 204 | 205 | 'Session' : { 206 | 're' : re.compile (r'Session:\s+(.+)$'), 207 | 'groups' : [ 1 ], 208 | 'list' : True, 209 | 'split' : lambda x: x.split (), 210 | }, 211 | } 212 | 213 | # Generate entries for simple fields 214 | for field in simple_fields: 215 | fields[field] = { 216 | 're' : re.compile (r'^\s*%s:\s+(.+)$' % field), 217 | 'groups' : [ 1 ], 218 | } 219 | 220 | 221 | proto_re = re.compile (r'^([0-9a-zA-Z_.-]+)\s+BGP\s+') # XXX 222 | ignore_re = re.compile (r'^(BIRD [0-9.]+ ready.|name\s+proto\s+table\s+.*)?$') 223 | 224 | 225 | # Parse session list 226 | protocol = None 227 | proto_dict = None 228 | for line in protocols.readlines (): 229 | line = line.strip () 230 | 231 | # Python3 glue 232 | if sys.version_info >= (3, 0): 233 | line = str (line, encoding='utf-8') 234 | 235 | # Preamble or empty string 236 | if ignore_re.search (line): 237 | protocol = None 238 | proto_dict = None 239 | continue 240 | 241 | # Start of a new protocol 242 | match = proto_re.search (line) 243 | if match: 244 | protocol = match.group (1) 245 | bgp_sessions[protocol] = {} 246 | proto_dict = bgp_sessions[protocol] 247 | continue 248 | 249 | # Ignore any non-BGP protocols, empty lines, etc. 250 | if protocol == None: 251 | continue 252 | 253 | # Parse and store any interesting lines / fields 254 | for field, config in fields.items (): 255 | match = config['re'].search (line) 256 | if not match: 257 | continue 258 | 259 | # Get values from match 260 | values = [] 261 | for group in config['groups']: 262 | values.append (match.group (group)) 263 | 264 | # Store entries separately? 265 | mangle_dict = config.get ('mangle_dict', None) 266 | if mangle_dict: 267 | for entry, group in mangle_dict.items (): 268 | proto_dict[entry] = match.group (group) 269 | 270 | # Store as list? 271 | if config.get ('list', False) == True: 272 | proto_dict[field] = config['split'] (match.group (1)) 273 | 274 | # Store as string 275 | else: 276 | proto_dict[field] = " ".join (values) 277 | 278 | 279 | ################################################################################ 280 | # Check the status quo # 281 | ################################################################################ 282 | 283 | up = [] 284 | down = [] 285 | ret_code = 0 286 | 287 | # for performance data 288 | total_routes_imported = 0 289 | total_routes_exported = 0 290 | 291 | down_by_proto = { 292 | 'ibgp' : [], 293 | 'ebgp' : [] 294 | } 295 | 296 | proto_str = { 297 | 'ibgp' : 'iBGP', 298 | 'ebgp' : 'eBGP' 299 | } 300 | 301 | sessions_up = {} 302 | 303 | for protoname, config in sorted (bgp_sessions.items ()): 304 | session_args = config.get ('Session', []) 305 | 306 | # Check if user gave us a remote ASN as local AS 307 | if ('external' in session_args) and (config['Neighbor AS'] == args.asn): 308 | print ("ERROR: Session %s is eBGP but has our ASN! The given local ASN seems wrong!" % protoname) 309 | ret_code = 3 310 | 311 | if ('internal' in session_args) and (config['Neighbor AS'] != args.asn): 312 | print ("ERROR: Session %s is iBGP but does not have our ASN! The given local ASN seems wrong!" % protoname) 313 | ret_code = 3 314 | 315 | # Determine session type 316 | session_type = "ibgp" 317 | if ('external' in session_args) or (config['Neighbor AS'] != args.asn): 318 | session_type = "ebgp" 319 | remote_as = "I" if session_type == "ibgp" else config.get ('Neighbor AS') 320 | session_desc = "%s/%s" % (protoname, remote_as) 321 | 322 | # Skip iBGP/eBGP sessions when not asked to check them, but check for specific session, if given 323 | if (args.ibgp != True and (('internal' in session_args) or (config['Neighbor AS'] == args.asn))) or \ 324 | (args.ebgp != True and (('external' in session_args) or (config['Neighbor AS'] != args.asn))): 325 | if not args.session: 326 | continue 327 | 328 | expected = "iBGP" if args.ibgp else "eBGP" 329 | print ("ERROR: Session %s is %s but %s was expected!" % (args.session, proto_str[session_type], expected)) 330 | ret_code = 2 331 | 332 | bgp_state = config['BGP state'] 333 | if bgp_state == 'Established': 334 | up.append (session_desc) 335 | sessions_up[session_desc] = config['Routes'] 336 | 337 | # Session disable and we don't care 338 | elif bgp_state == 'Down' and args.disabled_ok: 339 | up.append (session_desc + " (Disabled)") 340 | 341 | # Session down but in session_down_ok* list 342 | elif protoname in sessions_down_ok: 343 | up.append (session_desc + " (Down/OK)") 344 | 345 | # Something's broken 346 | else: 347 | last_error = 'Disabled' if bgp_state == 'Down' else config.get ('Last error', 'unknown') 348 | session_desc += " (%s)" % last_error 349 | 350 | down.append (session_desc) 351 | down_by_proto[session_type].append (session_desc) 352 | 353 | 354 | # Check down iBGP / eBGP sessions limits 355 | for proto, sessions in down_by_proto.items (): 356 | down_sessions = len (sessions) 357 | if down_sessions == 0: 358 | continue 359 | 360 | for level in [ 'warn', 'crit' ]: 361 | limits = getattr (args, "%s_%s" % (proto, level)).split (":") 362 | code, code_name = session_down_codes[level] 363 | 364 | # Check if number of down sessions is within warning or critical limits 365 | if (limits[0] == '' or down_sessions >= int (limits[0])) and \ 366 | (limits[1] == '' or down_sessions <= int (limits[1])): 367 | if ret_code < code: 368 | ret_code = code 369 | 370 | # Check routes for up sessions 371 | for session, routes in sessions_up.items (): 372 | session_info = {} 373 | session_info['routes_imported'], session_info['routes_exported'], session_info['routes_preferred'] = routes.split (' ') 374 | 375 | total_routes_imported += int(session_info['routes_imported']) 376 | total_routes_exported += int(session_info['routes_exported']) 377 | 378 | for r_type in route_codes.keys(): 379 | for level in [ 'crit', 'warn' ]: 380 | try: 381 | limits = getattr (args, "%s_%s" % (r_type, level)).split (":") 382 | except: 383 | pass 384 | else: 385 | code, code_name = session_down_codes[level] 386 | if (limits[0] == '' or int(session_info[r_type]) >= int (limits[0])) and \ 387 | (limits[1] == '' or int(session_info[r_type]) <= int (limits[1])): 388 | if ret_code < code: 389 | ret_code = code 390 | print("%s Routes: %s with %s route(s) is %s" % (route_codes[r_type],session,session_info[r_type],code_name)) 391 | break 392 | 393 | # Special handling for session given by name 394 | if args.session: 395 | # Check is given session name was found 396 | if len (bgp_sessions) == 0: 397 | print ("ERROR: Given session %s not present in configuration!" % args.session) 398 | sys.exit (2) 399 | 400 | if len (down) > 0: 401 | print ("DOWN: %s" % ", ".join (down)) 402 | 403 | if len (up) > 0: 404 | print ("OK: %s" % ", ".join (up)) 405 | 406 | if args.show_performance_data: 407 | print('| num_sessions=%i total_routes_imported=%i total_routes_exported=%i' % (len(bgp_sessions), total_routes_imported, total_routes_exported)) 408 | 409 | sys.exit (ret_code) 410 | -------------------------------------------------------------------------------- /check_bird2_bgp/check_bird2_bgp.command.conf: -------------------------------------------------------------------------------- 1 | object CheckCommand "bird2_bgp" { 2 | import "plugin-check-command" 3 | 4 | command = [ "/usr/bin/sudo", PluginDir + "/check_bird2_bgp" ] 5 | 6 | arguments = { 7 | "--proto" = "$proto$" # IP protocol version to check 8 | "--asn" = "$asn$" # Local AS number 9 | "--ibgp" = { # Check iBGP sessions 10 | set_if = "$ibgp$" 11 | } 12 | "--ibgp_warn" = "$ibgp_warn$" # Warning interval for down iBGP sessions 13 | "--ibgp_crit" = "$ibgp_crit$" # Critical interval for down iBGP sessions 14 | "--ebgp" = { # Check eBGP sessions 15 | set_if = "$ebgp$" 16 | } 17 | "--ebgp_warn" = "$ebgp_warn$" # Warning interval for down eBGP sessions 18 | "--ebgp_crit" = "$ebgp_crit$" # Critical interval for down eBGP sessions 19 | "--disabled_ok" = { # Treat sessions disabled in bird as OK. 20 | set_if = "$disabled_ok$" 21 | } 22 | "--show_performance_data" = { # enable output of performance data if requested 23 | set_if = "$show_performance_data$" 24 | } 25 | "--sessions_down_ok" = "$sessions_down_ok$" 26 | # List of sessions which are OK to be down. (Space separated list) 27 | "--sessions_down_ok_file" = "$sessions_down_ok_file$" 28 | # List of sessions which are OK to be down. (One per line) 29 | "--ignore_missing_file" = { # Ignore a possible non-existent file given as --sessions_down_ok_file 30 | set_if = "$ignore_missing_file$" 31 | } 32 | "--session" = "$session$" # Only check for session with given name 33 | "--routes_imported_warn" = "$routes_imported_warn$" # Warning interval for imported routes 34 | "--routes_imported_crit" = "$routes_imported_crit$" # Critical interval for imported routes 35 | "--routes_exported_warn" = "$routes_exported_warn$" # Warning interval for exported routes 36 | "--routes_exported_crit" = "$routes_exported_crit$" # Critical interval for exported routes 37 | "--routes_preferred_warn" = "$routes_preferred_warn$" # Warning interval for preferred routes 38 | "--routes_preferred_crit" = "$routes_preferred_crit$" # Critical interval for preferred routes 39 | } 40 | 41 | vars.proto = "4" 42 | vars.ibgp_warn = "1:1" 43 | vars.ibgp_crit = "2:" 44 | vars.ebgp_warn = "1:1" 45 | vars.ebgp_crit = "2:" 46 | } 47 | -------------------------------------------------------------------------------- /check_bird2_bgp/check_bird2_bgp.service.conf: -------------------------------------------------------------------------------- 1 | # 2 | # bird iBGP 3 | apply Service "bird_ibgp4" { 4 | import "generic-service" 5 | 6 | check_command = "bird2_bgp" 7 | 8 | if (host.name != NodeName) { 9 | command_endpoint = host.name 10 | } 11 | 12 | vars.ibgp = true 13 | vars.ibgp_warn = "1:1" 14 | vars.ibgp_crit = "2:" 15 | vars.asn = 65132 16 | vars.proto = "4" 17 | vars.sessions_down_ok_file = "/etc/icinga2/ffho-conf.d/bird2_ibgp_sessions_down_ok.txt" 18 | vars.ignore_missing_file = true 19 | 20 | assign where host.address && host.vars.os == "Linux" && "router" in host.vars.roles 21 | } 22 | 23 | apply Service "bird2_ibgp6" { 24 | import "generic-service" 25 | 26 | check_command = "bird2_bgp" 27 | 28 | if (host.name != NodeName) { 29 | command_endpoint = host.name 30 | } 31 | 32 | vars.ibgp = true 33 | vars.ibgp_warn = "1:1" 34 | vars.ibgp_crit = "2:" 35 | vars.asn = 65132 36 | vars.proto = "6" 37 | vars.sessions_down_ok_file = "/etc/icinga2/ffho-conf.d/bird2_ibgp_sessions_down_ok.txt" 38 | vars.ignore_missing_file = true 39 | 40 | assign where host.address && host.vars.os == "Linux" && "router" in host.vars.roles 41 | } 42 | 43 | 44 | # 45 | # bird2 eBGP 46 | apply Service "bird2_ebgp4" { 47 | import "generic-service" 48 | 49 | check_command = "bird2_bgp" 50 | 51 | if (host.name != NodeName) { 52 | command_endpoint = host.name 53 | } 54 | 55 | vars.ebgp = true 56 | vars.asn = 65132 57 | vars.proto = "4" 58 | 59 | assign where host.address && host.vars.os == "Linux" && "ffrl-exit" in host.vars.roles 60 | } 61 | 62 | apply Service "bird2_ebgp6" { 63 | import "generic-service" 64 | 65 | check_command = "bird2_bgp" 66 | 67 | if (host.name != NodeName) { 68 | command_endpoint = host.name 69 | } 70 | 71 | vars.ebgp = true 72 | vars.asn = 65132 73 | vars.proto = "6" 74 | 75 | assign where host.address && host.vars.os == "Linux" && "ffrl-exit" in host.vars.roles 76 | } 77 | 78 | 79 | ################################################################################ 80 | # Session-based checking # 81 | ################################################################################ 82 | 83 | apply Service "bird2_ibgp_" for (key => session in host.vars.ibgp_peers) { 84 | import "generic-service" 85 | 86 | check_command = "bird2_bgp" 87 | 88 | if (host.name != NodeName) { 89 | command_endpoint = host.name 90 | } 91 | 92 | vars.ibgp = true 93 | vars.ibgp_warn = "1:1" 94 | vars.ibgp_crit = "2:" 95 | vars.asn = 65049 96 | vars.sessions_down_ok_file = "/etc/bird/ibgp_sessions_to_ignore.txt" 97 | vars.ignore_missing_file = true 98 | vars.session = session 99 | 100 | vars.grafana_graph_disable = true 101 | vars.show_performance_data = true 102 | 103 | assign where host.address && "routereflector" in host.vars.groups 104 | } 105 | 106 | apply Service "bird2_ebgp_" for (key => session in host.vars.ebgp_peers) { 107 | import "generic-service" 108 | 109 | check_command = "bird2_bgp" 110 | 111 | if (host.name != NodeName) { 112 | command_endpoint = host.name 113 | } 114 | 115 | vars.ebgp = true 116 | vars.ebgp_warn = "1:1" 117 | vars.ebgp_crit = "2:" 118 | vars.asn = 65049 119 | vars.sessions_down_ok_file = "/etc/bird/ebgp_sessions_to_ignore.txt" 120 | vars.ignore_missing_file = true 121 | vars.session = session 122 | 123 | vars.grafana_graph_disable = true 124 | vars.show_performance_data = true 125 | 126 | assign where host.address && "routereflector" in host.vars.groups 127 | } 128 | -------------------------------------------------------------------------------- /check_bird_bgp/README.md: -------------------------------------------------------------------------------- 1 | ``` 2 | $ ./check_bird_bgp -h 3 | usage: check_bird_bgp [-h] [--proto {4,6}] --asn ASN [--ibgp] [--ibgp_w RANGE] 4 | [--ibgp_c RANGE] [--ebgp] [--ebgp_w RANGE] 5 | [--ebgp_c RANGE] [--disabled_ok] 6 | [--sessions_down_ok LIST] 7 | [--sessions_down_ok_file FILENAME] 8 | [--ignore_missing_file] 9 | 10 | check bird iBGP sessions 11 | 12 | optional arguments: 13 | -h, --help show this help message and exit 14 | --proto {4,6}, -p {4,6} 15 | IP protocol version to check 16 | --asn ASN, -A ASN Local AS number 17 | --ibgp, -i Check iBGP sessions 18 | --ibgp_w RANGE Warning interval for down iBGP sessions 19 | --ibgp_c RANGE Critical interval for down iBGP sessions 20 | --ebgp, -e Check eBGP sessions 21 | --ebgp_w RANGE Warning interval for down eBGP sessions 22 | --ebgp_c RANGE Critical interval for down eBGP sessions 23 | --disabled_ok Treat sessions disabled in bird as OK. 24 | --sessions_down_ok LIST 25 | List of sessions which are OK to be down. Provide a 26 | space separated list. 27 | --sessions_down_ok_file FILENAME 28 | List of sessions which are OK to be down. Provide one 29 | interfaces per line. 30 | --ignore_missing_file 31 | Ignore a possible non-existent file given as 32 | --interfaces_down_ok_file 33 | ``` 34 | -------------------------------------------------------------------------------- /check_bird_bgp/check_bird_bgp: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # 3 | # Check state of BGP sessions in Bird 1.x Internet Routing Daemon 4 | # 5 | # Maximilian Wilhelm 6 | # -- Thu 13 Apr 2017 12:04:13 PM CEST 7 | # 8 | 9 | import argparse 10 | import os 11 | import re 12 | import subprocess 13 | import sys 14 | 15 | 16 | def read_sessions_from_file (file_path, missing_ok): 17 | sessions = [] 18 | 19 | # If we shouldn't care, we won't care if it's not there. 20 | if not os.path.isfile (file_path) and missing_ok: 21 | return sessions 22 | 23 | try: 24 | with open (args.sessions_down_ok_file, 'r') as ido_fh: 25 | for session in ido_fh.readlines (): 26 | if not session.startswith ('#'): 27 | sessions.append (session.strip ()) 28 | 29 | except IOError as err: 30 | errno, strerror = err.args 31 | print ("Failed to read sessions_down_ok from '%s': %s" % (args.sessions_down_ok_file, strerror)) 32 | sys.exit (1) 33 | 34 | return sessions 35 | 36 | def validate_range_arg (arg_name): 37 | value = getattr (args, arg_name) 38 | if not value: 39 | return None 40 | 41 | # Check if a RANGE was given 42 | limits = value.split (':') 43 | if len (limits) != 2: 44 | return "Error: Invalid value for --%s, expected RANGE: %s" % (arg_name, value) 45 | 46 | # Try to validate range, on limit might be empty 47 | try: 48 | # Try to parse range values to integers if present 49 | a = None 50 | b = None 51 | if (limits[0] != ''): 52 | a = int (limits[0]) 53 | if (limits[1] != ''): 54 | b = int (limits[1]) 55 | 56 | # Validate range if both values were given 57 | if (a != None and b != None and a > b): 58 | return "Error: Invalid value for --%s, invalid RANGE: %s" % (arg_name, value) 59 | except ValueError: 60 | return "Error: Expected numeric values in RANGE for --%s: %s" % (arg_name, value) 61 | 62 | 63 | ################################################################################ 64 | # Argument parsing and basic input validation # 65 | ################################################################################ 66 | 67 | parser = argparse.ArgumentParser (description = 'check bird iBGP sessions') 68 | 69 | parser.add_argument ('--proto', '-p', help = 'IP protocol version to check', default = '4', choices = ['4', '6']) 70 | parser.add_argument ('--asn', '-A', help = "Local AS number", required = True) 71 | parser.add_argument ('--ibgp', '-i', help = "Check iBGP sessions", action = 'store_true') 72 | parser.add_argument ('--ibgp_warn', '--ibgp_w', help = "Warning interval for down iBGP sessions", default = "1:1", metavar = "RANGE") 73 | parser.add_argument ('--ibgp_crit', '--ibgp_c', help = "Critical interval for down iBGP sessions", default = "2:", metavar = "RANGE") 74 | parser.add_argument ('--ebgp', '-e', help = "Check eBGP sessions", action = 'store_true') 75 | parser.add_argument ('--ebgp_warn', '--ebgp_w', help = "Warning interval for down eBGP sessions", default = "1:1", metavar = "RANGE") 76 | parser.add_argument ('--ebgp_crit', '--ebgp_c', help = "Critical interval for down eBGP sessions", default = "2:", metavar = "RANGE") 77 | parser.add_argument ('--disabled_ok', help = "Treat sessions disabled in bird as OK.", action = 'store_true') 78 | parser.add_argument ('--sessions_down_ok', metavar = "LIST", help = "List of sessions which are OK to be down. Provide a space separated list.") 79 | parser.add_argument ('--sessions_down_ok_file', metavar = "FILENAME", help = "List of sessions which are OK to be down. Provide one interfaces per line.") 80 | parser.add_argument ('--ignore_missing_file', help = "Ignore a possible non-existent file given as --interfaces_down_ok_file", action = 'store_true') 81 | parser.add_argument ('--session', help = "Only check for session with given name.") 82 | parser.add_argument ('--show_performance_data', help='Whether to show performance data or not', action='store_true') 83 | parser.add_argument ('--routes_imported_warn', help = "Warning interval for imported routes", metavar = "RANGE") 84 | parser.add_argument ('--routes_imported_crit', help = "Critical interval for imported routes", metavar = "RANGE") 85 | parser.add_argument ('--routes_exported_warn', help = "Warning interval for exported routes", metavar = "RANGE") 86 | parser.add_argument ('--routes_exported_crit', help = "Critical interval for exported routes", metavar = "RANGE") 87 | parser.add_argument ('--routes_preferred_warn', help = "Warning interval for preferred routes", metavar = "RANGE") 88 | parser.add_argument ('--routes_preferred_crit', help = "Critical interval for preferred routes", metavar = "RANGE") 89 | 90 | args = parser.parse_args () 91 | 92 | if not args.ibgp and not args.ebgp: 93 | print ("Error: You have to enable at least one of iBGP and eBGP checking.\n", file=sys.stderr) 94 | parser.print_help () 95 | sys.exit (3) 96 | 97 | if args.session and args.ibgp and args.ebgp: 98 | print ("Error: A single session can't be iBGP and eBGP at the same time!") 99 | parser.print_help () 100 | sys.exit (3) 101 | 102 | # Validate limit arguments 103 | for item in ('ibgp', 'ebgp', 'routes_imported', 'routes_exported', 'routes_preferred'): 104 | for severity in ('warn', 'crit'): 105 | msg = validate_range_arg ("%s_%s" % (item, severity)) 106 | if msg: 107 | print (msg) 108 | sys.exit (3) 109 | 110 | 111 | session_down_codes = { 112 | 'warn' : [ 1, 'WARNING' ], 113 | 'crit' : [ 2, 'CRITICAL'], 114 | } 115 | route_codes = { 116 | 'routes_exported' : 'Exported', 117 | 'routes_imported' : 'Imported', 118 | 'routes_preferred' : 'Preferred', 119 | } 120 | 121 | # Are some sessions ok being down? 122 | sessions_down_ok = [] 123 | if args.sessions_down_ok: 124 | sessions_down_ok = args.sessions_down_ok.split () 125 | 126 | if args.sessions_down_ok_file: 127 | sessions_down_ok.extend (read_sessions_from_file (args.sessions_down_ok_file, args.ignore_missing_file)) 128 | 129 | ################################################################################ 130 | # Query BGP protocols from bird # 131 | ################################################################################ 132 | cmds = { 133 | '4' : '/usr/sbin/birdc', 134 | '6' : '/usr/sbin/birdc6', 135 | } 136 | 137 | # Check for one specific session only 138 | if args.session: 139 | cmd = [ "/usr/bin/sudo", cmds[args.proto], "show protocol all %s" % args.session ] 140 | # Check for all sessions and filter later 141 | else: 142 | cmd = [ "/usr/bin/sudo", cmds[args.proto], "show protocols all" ] 143 | 144 | try: 145 | protocols = subprocess.Popen (cmd, bufsize = 4194304, stdout = subprocess.PIPE).stdout 146 | 147 | # cmd exited with non-zero code 148 | except subprocess.CalledProcessError as c: 149 | print ("Failed to run %s: %s" % (" ".join (cmd), c.output)) 150 | sys.exit (1) 151 | 152 | # This should not have happend. 153 | except Exception as e: 154 | print ("Unknown error while running %s: %s" % (" ".join (cmd), str (e))) 155 | sys.exit (3) 156 | 157 | 158 | # cr03_in_ffho_net BGP master up 2017-04-06 Established 159 | # Preference: 100 160 | # Input filter: ibgp_in 161 | # Output filter: ibgp_out 162 | # Routes: 38 imported, 3 exported, 1 preferred 163 | # OR 164 | # Routes: 1 imported, 0 filtered, 1 exported, 0 preferred 165 | # Route change stats: received rejected filtered ignored accepted 166 | # Import updates: 16779 0 0 72 16707 167 | # Import withdraws: 18012 0 --- 1355 16657 168 | # Export updates: 55104 18903 24743 --- 11458 169 | # Export withdraws: 9789 --- --- --- 11455 170 | # BGP state: Established 171 | # Neighbor address: 10.132.255.3 172 | # Neighbor AS: 65132 173 | # Neighbor ID: 10.132.255.3 174 | # Neighbor caps: refresh enhanced-refresh restart-able AS4 175 | # Session: internal multihop AS4 176 | # Source address: 10.132.255.12 177 | # Hold timer: 198/240 178 | # Keepalive timer: 13/80 179 | 180 | ################################################################################ 181 | # Parse all fields from bird output into bgp_sessions dict # 182 | ################################################################################ 183 | 184 | bgp_sessions = {} 185 | 186 | # Simple fields with only one values 187 | simple_fields = [ 'Preference', 'Input filter', 'Output filter', 'BGP state', 'Neighbor address', 'Neighbor AS', 188 | 'Neighbor ID', 'Source address', 'Hold timer', 'Keepalive timer', 'Last error' ] 189 | 190 | # More "complex" fields 191 | fields = { 192 | 'Routes' : { 193 | 're' : re.compile (r'Routes:\s+(\d+) imported, ((\d+) filtered, )?(\d+) exported, (\d+) preferred'), 194 | 'groups' : [ 1, 4, 5 ], 195 | 'mangle_dict' : { 196 | 'Routes imported' : 1, 197 | 'Routes exported' : 4, 198 | 'Routes preferred' : 5, 199 | } 200 | }, 201 | 202 | 'Neighbor caps' : { 203 | 're' : re.compile (r'Neighbor caps:\s+(.+)$'), 204 | 'groups' : [ 1 ], 205 | 'list' : True, 206 | 'split' : lambda x: x.split (), 207 | }, 208 | 209 | 'Session' : { 210 | 're' : re.compile (r'Session:\s+(.+)$'), 211 | 'groups' : [ 1 ], 212 | 'list' : True, 213 | 'split' : lambda x: x.split (), 214 | }, 215 | } 216 | 217 | # Generate entries for simple fields 218 | for field in simple_fields: 219 | fields[field] = { 220 | 're' : re.compile (r'^\s*%s:\s+(.+)$' % field), 221 | 'groups' : [ 1 ], 222 | } 223 | 224 | 225 | proto_re = re.compile (r'^([0-9a-zA-Z_.-]+)\s+BGP\s+') # XXX 226 | ignore_re = re.compile (r'^(BIRD [0-9.]+ ready.|name\s+proto\s+table\s+.*)?$') 227 | 228 | 229 | # Parse session list 230 | protocol = None 231 | proto_dict = None 232 | for line in protocols.readlines (): 233 | line = line.strip () 234 | 235 | # Python3 glue 236 | if sys.version_info >= (3, 0): 237 | line = str (line, encoding='utf-8') 238 | 239 | # Preamble or empty string 240 | if ignore_re.search (line): 241 | protocol = None 242 | proto_dict = None 243 | continue 244 | 245 | # Start of a new protocol 246 | match = proto_re.search (line) 247 | if match: 248 | protocol = match.group (1) 249 | bgp_sessions[protocol] = {} 250 | proto_dict = bgp_sessions[protocol] 251 | continue 252 | 253 | # Ignore any non-BGP protocols, empty lines, etc. 254 | if protocol == None: 255 | continue 256 | 257 | # Parse and store any interesting lines / fields 258 | for field, config in fields.items (): 259 | match = config['re'].search (line) 260 | if not match: 261 | continue 262 | 263 | # Get values from match 264 | values = [] 265 | for group in config['groups']: 266 | values.append (match.group (group)) 267 | 268 | # Store entries separately? 269 | mangle_dict = config.get ('mangle_dict', None) 270 | if mangle_dict: 271 | for entry, group in mangle_dict.items (): 272 | proto_dict[entry] = match.group (group) 273 | 274 | # Store as list? 275 | if config.get ('list', False) == True: 276 | proto_dict[field] = config['split'] (match.group (1)) 277 | 278 | # Store as string 279 | else: 280 | proto_dict[field] = " ".join (values) 281 | 282 | 283 | ################################################################################ 284 | # Check the status quo # 285 | ################################################################################ 286 | 287 | up = [] 288 | down = [] 289 | ret_code = 0 290 | 291 | # for performance data 292 | total_routes_imported = 0 293 | total_routes_exported = 0 294 | 295 | down_by_proto = { 296 | 'ibgp' : [], 297 | 'ebgp' : [] 298 | } 299 | 300 | proto_str = { 301 | 'ibgp' : 'iBGP', 302 | 'ebgp' : 'eBGP' 303 | } 304 | 305 | sessions_up = {} 306 | 307 | for protoname, config in sorted (bgp_sessions.items ()): 308 | session_args = config.get ('Session', []) 309 | 310 | # Check if user gave us a remote ASN as local AS 311 | if ('external' in session_args) and (config['Neighbor AS'] == args.asn): 312 | print ("ERROR: Session %s is eBGP but has our ASN! The given local ASN seems wrong!" % protoname) 313 | ret_code = 3 314 | 315 | if ('internal' in session_args) and (config['Neighbor AS'] != args.asn): 316 | print ("ERROR: Session %s is iBGP but does not have our ASN! The given local ASN seems wrong!" % protoname) 317 | ret_code = 3 318 | 319 | # Determine session type 320 | session_type = "ibgp" 321 | if ('external' in session_args) or (config['Neighbor AS'] != args.asn): 322 | session_type = "ebgp" 323 | remote_as = "I" if session_type == "ibgp" else config.get ('Neighbor AS') 324 | session_desc = "%s/%s" % (protoname, remote_as) 325 | 326 | # Skip iBGP/eBGP sessions when not asked to check them, but check for specific session, if given 327 | if (args.ibgp != True and (('internal' in session_args) or (config['Neighbor AS'] == args.asn))) or \ 328 | (args.ebgp != True and (('external' in session_args) or (config['Neighbor AS'] != args.asn))): 329 | if not args.session: 330 | continue 331 | 332 | expected = "iBGP" if args.ibgp else "eBGP" 333 | print ("ERROR: Session %s is %s but %s was expected!" % (args.session, proto_str[session_type], expected)) 334 | ret_code = 2 335 | 336 | bgp_state = config['BGP state'] 337 | if bgp_state == 'Established': 338 | up.append (session_desc) 339 | sessions_up[session_desc] = config['Routes'] 340 | 341 | # Session disable and we don't care 342 | elif bgp_state == 'Down' and args.disabled_ok: 343 | up.append (session_desc + " (Disabled)") 344 | 345 | # Session down but in session_down_ok* list 346 | elif protoname in sessions_down_ok: 347 | up.append (session_desc + " (Down/OK)") 348 | 349 | # Something's broken 350 | else: 351 | last_error = 'Disabled' if bgp_state == 'Down' else config.get ('Last error', 'unknown') 352 | session_desc += " (%s)" % last_error 353 | 354 | down.append (session_desc) 355 | down_by_proto[session_type].append (session_desc) 356 | 357 | 358 | # Check down iBGP / eBGP sessions limits 359 | for proto, sessions in down_by_proto.items (): 360 | down_sessions = len (sessions) 361 | if down_sessions == 0: 362 | continue 363 | 364 | for level in [ 'warn', 'crit' ]: 365 | limits = getattr (args, "%s_%s" % (proto, level)).split (":") 366 | code, code_name = session_down_codes[level] 367 | 368 | # Check if number of down sessions is within warning or critical limits 369 | if (limits[0] == '' or down_sessions >= int (limits[0])) and \ 370 | (limits[1] == '' or down_sessions <= int (limits[1])): 371 | if ret_code < code: 372 | ret_code = code 373 | 374 | # Check routes for up sessions 375 | for session, routes in sessions_up.items (): 376 | session_info = {} 377 | session_info['routes_imported'], session_info['routes_exported'], session_info['routes_preferred'] = routes.split (' ') 378 | 379 | total_routes_imported += int(session_info['routes_imported']) 380 | total_routes_exported += int(session_info['routes_exported']) 381 | 382 | for r_type in route_codes.keys(): 383 | for level in [ 'crit', 'warn' ]: 384 | try: 385 | limits = getattr (args, "%s_%s" % (r_type, level)).split (":") 386 | except: 387 | pass 388 | else: 389 | code, code_name = session_down_codes[level] 390 | if (limits[0] == '' or int(session_info[r_type]) >= int (limits[0])) and \ 391 | (limits[1] == '' or int(session_info[r_type]) <= int (limits[1])): 392 | if ret_code < code: 393 | ret_code = code 394 | print("%s Routes: %s with %s route(s) is %s" % (route_codes[r_type],session,session_info[r_type],code_name)) 395 | break 396 | 397 | # Special handling for session given by name 398 | if args.session: 399 | # Check is given session name was found 400 | if len (bgp_sessions) == 0: 401 | print ("ERROR: Given session %s not present in configuration!" % args.session) 402 | sys.exit (2) 403 | 404 | if len (down) > 0: 405 | print ("DOWN: %s" % ", ".join (down)) 406 | 407 | if len (up) > 0: 408 | print ("OK: %s" % ", ".join (up)) 409 | 410 | if args.show_performance_data: 411 | print('| num_sessions=%i total_routes_imported=%i total_routes_exported=%i' % (len(bgp_sessions), total_routes_imported, total_routes_exported)) 412 | 413 | sys.exit (ret_code) 414 | -------------------------------------------------------------------------------- /check_bird_bgp/check_bird_bgp.command.conf: -------------------------------------------------------------------------------- 1 | object CheckCommand "bird_bgp" { 2 | import "plugin-check-command" 3 | 4 | command = [ "/usr/bin/sudo", PluginDir + "/check_bird_bgp" ] 5 | 6 | arguments = { 7 | "--proto" = "$proto$" # IP protocol version to check 8 | "--asn" = "$asn$" # Local AS number 9 | "--ibgp" = { # Check iBGP sessions 10 | set_if = "$ibgp$" 11 | } 12 | "--ibgp_warn" = "$ibgp_warn$" # Warning interval for down iBGP sessions 13 | "--ibgp_crit" = "$ibgp_crit$" # Critical interval for down iBGP sessions 14 | "--ebgp" = { # Check eBGP sessions 15 | set_if = "$ebgp$" 16 | } 17 | "--ebgp_warn" = "$ebgp_warn$" # Warning interval for down eBGP sessions 18 | "--ebgp_crit" = "$ebgp_crit$" # Critical interval for down eBGP sessions 19 | "--disabled_ok" = { # Treat sessions disabled in bird as OK. 20 | set_if = "$disabled_ok$" 21 | } 22 | "--show_performance_data" = { # enable output of performance data if requested 23 | set_if = "$show_performance_data$" 24 | } 25 | "--sessions_down_ok" = "$sessions_down_ok$" 26 | # List of sessions which are OK to be down. (Space separated list) 27 | "--sessions_down_ok_file" = "$sessions_down_ok_file$" 28 | # List of sessions which are OK to be down. (One per line) 29 | "--ignore_missing_file" = { # Ignore a possible non-existent file given as --sessions_down_ok_file 30 | set_if = "$ignore_missing_file$" 31 | } 32 | "--session" = "$session$" # Only check for session with given name 33 | "--routes_imported_warn" = "$routes_imported_warn$" # Warning interval for imported routes 34 | "--routes_imported_crit" = "$routes_imported_crit$" # Critical interval for imported routes 35 | "--routes_exported_warn" = "$routes_exported_warn$" # Warning interval for exported routes 36 | "--routes_exported_crit" = "$routes_exported_crit$" # Critical interval for exported routes 37 | "--routes_preferred_warn" = "$routes_preferred_warn$" # Warning interval for preferred routes 38 | "--routes_preferred_crit" = "$routes_preferred_crit$" # Critical interval for preferred routes 39 | } 40 | 41 | vars.proto = "4" 42 | vars.ibgp_warn = "1:1" 43 | vars.ibgp_crit = "2:" 44 | vars.ebgp_warn = "1:1" 45 | vars.ebgp_crit = "2:" 46 | } 47 | -------------------------------------------------------------------------------- /check_bird_bgp/check_bird_bgp.service.conf: -------------------------------------------------------------------------------- 1 | # 2 | # bird iBGP 3 | apply Service "bird_ibgp4" { 4 | import "generic-service" 5 | 6 | check_command = "bird_bgp" 7 | 8 | if (host.name != NodeName) { 9 | command_endpoint = host.name 10 | } 11 | 12 | vars.ibgp = true 13 | vars.ibgp_warn = "1:1" 14 | vars.ibgp_crit = "2:" 15 | vars.asn = 65132 16 | vars.proto = "4" 17 | vars.sessions_down_ok_file = "/etc/icinga2/ffho-conf.d/bird_ibgp_sessions_down_ok.txt" 18 | vars.ignore_missing_file = true 19 | 20 | assign where host.address && host.vars.os == "Linux" && "router" in host.vars.roles 21 | } 22 | 23 | apply Service "bird_ibgp6" { 24 | import "generic-service" 25 | 26 | check_command = "bird_bgp" 27 | 28 | if (host.name != NodeName) { 29 | command_endpoint = host.name 30 | } 31 | 32 | vars.ibgp = true 33 | vars.ibgp_warn = "1:1" 34 | vars.ibgp_crit = "2:" 35 | vars.asn = 65132 36 | vars.proto = "6" 37 | vars.sessions_down_ok_file = "/etc/icinga2/ffho-conf.d/bird_ibgp_sessions_down_ok.txt" 38 | vars.ignore_missing_file = true 39 | 40 | assign where host.address && host.vars.os == "Linux" && "router" in host.vars.roles 41 | } 42 | 43 | 44 | # 45 | # bird eBGP 46 | apply Service "bird_ebgp4" { 47 | import "generic-service" 48 | 49 | check_command = "bird_bgp" 50 | 51 | if (host.name != NodeName) { 52 | command_endpoint = host.name 53 | } 54 | 55 | vars.ebgp = true 56 | vars.asn = 65132 57 | vars.proto = "4" 58 | 59 | assign where host.address && host.vars.os == "Linux" && "ffrl-exit" in host.vars.roles 60 | } 61 | 62 | apply Service "bird_ebgp6" { 63 | import "generic-service" 64 | 65 | check_command = "bird_bgp" 66 | 67 | if (host.name != NodeName) { 68 | command_endpoint = host.name 69 | } 70 | 71 | vars.ebgp = true 72 | vars.asn = 65132 73 | vars.proto = "6" 74 | 75 | assign where host.address && host.vars.os == "Linux" && "ffrl-exit" in host.vars.roles 76 | } 77 | 78 | 79 | ################################################################################ 80 | # Session-based checking # 81 | ################################################################################ 82 | 83 | apply Service "bird_ibgp4_" for (key => session in host.vars.bgp_peers4) { 84 | import "generic-service" 85 | 86 | check_command = "bird_bgp" 87 | 88 | if (host.name != NodeName) { 89 | command_endpoint = host.name 90 | } 91 | 92 | vars.ibgp = true 93 | vars.ibgp_warn = "1:1" 94 | vars.ibgp_crit = "2:" 95 | vars.asn = 65049 96 | vars.proto = "4" 97 | vars.sessions_down_ok_file = "/etc/bird/ibgp_sessions_to_ignore.txt" 98 | vars.ignore_missing_file = true 99 | vars.session = session 100 | 101 | vars.grafana_graph_disable = true 102 | vars.show_performance_data = true 103 | 104 | assign where host.address && "routereflector" in host.vars.groups 105 | } 106 | 107 | apply Service "bird_ibgp6_" for (key => session in host.vars.bgp_peers6) { 108 | import "generic-service" 109 | 110 | check_command = "bird_bgp" 111 | 112 | if (host.name != NodeName) { 113 | command_endpoint = host.name 114 | } 115 | 116 | vars.ibgp = true 117 | vars.ibgp_warn = "1:1" 118 | vars.ibgp_crit = "2:" 119 | vars.asn = 65049 120 | vars.proto = "6" 121 | vars.sessions_down_ok_file = "/etc/bird/ibgp_sessions_to_ignore.txt" 122 | vars.ignore_missing_file = true 123 | vars.session = session 124 | 125 | vars.grafana_graph_disable = true 126 | vars.show_performance_data = true 127 | 128 | assign where host.address && "routereflector" in host.vars.groups 129 | } 130 | -------------------------------------------------------------------------------- /check_bird_ospf/README.md: -------------------------------------------------------------------------------- 1 | ``` 2 | $ ./check_bird_ospf -h 3 | usage: check_bird_ospf [-h] [--proto {4,6}] [--protocol PROTOCOL] 4 | [--interfaces_down_ok LIST] 5 | [--interfaces_down_ok_file FILENAME] 6 | [--ignore_missing_file] 7 | 8 | check bird OSPF sessions 9 | 10 | optional arguments: 11 | -h, --help show this help message and exit 12 | --proto {4,6}, -p {4,6} 13 | IP protocol version to check 14 | --protocol PROTOCOL, -P PROTOCOL 15 | Bird OSPF protocol instance name to check 16 | --interfaces_down_ok LIST 17 | List of interfaces which are OK to have no OSPF 18 | neighbor. Provide a space separated list. 19 | --interfaces_down_ok_file FILENAME 20 | List of interfaces which are OK to have no OSPF 21 | neighbor. Provide one interfaces per line. 22 | --ignore_missing_file 23 | Ignore a possible non-existent file given as 24 | --interfaces_down_ok_file 25 | --no_ptp_ip Do not print router ID/IP on ptp interfaces 26 | ``` 27 | -------------------------------------------------------------------------------- /check_bird_ospf/check_bird_ospf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # 3 | # Check state of OSPF sessions in Bird Internet Routing Daemon 4 | # 5 | # Maximilian Wilhelm 6 | # -- Wed 26 Apr 2017 07:26:48 PM CEST 7 | # 8 | 9 | from __future__ import print_function 10 | import argparse 11 | import os.path 12 | import re 13 | import subprocess 14 | import sys 15 | 16 | 17 | def read_interfaces_from_file (file_path, missing_ok): 18 | interfaces = [] 19 | 20 | # If we shouldn't care, we won't care if it's not there. 21 | if not os.path.isfile (file_path) and missing_ok: 22 | return interfaces 23 | 24 | try: 25 | with open (args.interfaces_down_ok_file, 'r') as ido_fh: 26 | for iface in ido_fh.readlines (): 27 | if not iface.startswith ('#'): 28 | interfaces.append (iface.strip ()) 29 | 30 | except IOError as err: 31 | errno, strerror = err.args 32 | print ("Failed to read interfaces_down_ok from '%s': %s" % (args.interfaces_down_ok_file, strerror)) 33 | sys.exit (1) 34 | 35 | return interfaces 36 | 37 | 38 | def format_peer (ifname, iface_config, peer): 39 | global args 40 | 41 | if args.no_ptp_ip and iface_config['Type'] == 'ptp': 42 | return ifname 43 | 44 | return "%s/%s" % (ifname, peer) 45 | 46 | 47 | parser = argparse.ArgumentParser (description = 'check bird OSPF sessions') 48 | 49 | parser.add_argument ('--proto', '-p', help = 'IP protocol version to check', default = '4', choices = ['4', '6']) 50 | parser.add_argument ('--protocol', '-P', help = 'Bird OSPF protocol instance name to check', default = "") 51 | parser.add_argument ('--interfaces_down_ok', metavar = "LIST", help = "List of interfaces which are OK to have no OSPF neighbor. Provide a space separated list.") 52 | parser.add_argument ('--interfaces_down_ok_file', metavar = "FILENAME", help = "List of interfaces which are OK to have no OSPF neighbor. Provide one interfaces per line.") 53 | parser.add_argument ('--ignore_missing_file', help = "Ignore a possible non-existent file given as --interfaces_down_ok_file", action = 'store_true') 54 | parser.add_argument ('--no_ptp_ip', help = "Do not print router ID/IP on ptp interfaces", action = 'store_true') 55 | 56 | args = parser.parse_args () 57 | 58 | 59 | # Are some interfaces ok being down? 60 | interfaces_down_ok = [] 61 | if args.interfaces_down_ok: 62 | interfaces_down_ok = args.interfaces_down_ok.split () 63 | 64 | if args.interfaces_down_ok_file: 65 | interfaces_down_ok.extend (read_interfaces_from_file (args.interfaces_down_ok_file, args.ignore_missing_file)) 66 | 67 | 68 | ################################################################################ 69 | # Query OSPF protocl information from bird # 70 | ################################################################################ 71 | cmds = { 72 | '4' : '/usr/sbin/birdc', 73 | '6' : '/usr/sbin/birdc6', 74 | } 75 | 76 | cmd_interfaces = [ "/usr/bin/sudo", cmds[args.proto], "show ospf interface %s" % args.protocol ] 77 | cmd_neighbors = [ "/usr/bin/sudo", cmds[args.proto], "show ospf neighbors %s" % args.protocol ] 78 | 79 | try: 80 | interfaces_fh = subprocess.Popen (cmd_interfaces, bufsize = 4194304, stdout = subprocess.PIPE) 81 | if interfaces_fh.returncode: 82 | print ("Failed to get OSPF interfaces from bird: %s" % str (" ".join ([line.strip () for line in interfaces_fh.stdout.readlines ()]))) 83 | sys.exit (1) 84 | 85 | neighbors_fh = subprocess.Popen (cmd_neighbors, bufsize = 4194304, stdout = subprocess.PIPE) 86 | if neighbors_fh.returncode: 87 | print ("Failed to get OSPF neighbors from bird: %s" % str (" ".join ([line.strip () for line in neighbors_fh.stdout.readlines ()]))) 88 | sys.exit (1) 89 | 90 | # cmd exited with non-zero code 91 | except subprocess.CalledProcessError as c: 92 | print ("Failed to get OSPF information from bird: %s" % c.output) 93 | sys.exit (1) 94 | 95 | # This should not have happend. 96 | except Exception as e: 97 | print ("Unknown error while getting OSPF information from bird: %s" % str (e)) 98 | sys.exit (3) 99 | 100 | 101 | ################################################################################ 102 | # Parse interfaces and neighbors # 103 | ################################################################################ 104 | 105 | interfaces = {} 106 | 107 | interface_re = re.compile (r'^Interface (.+) \(') 108 | state_re = re.compile (r'(Type|State): (.+)$') 109 | stub_re = re.compile (r'\(stub\)') 110 | 111 | # Parse interfaces 112 | interface = None 113 | for line in interfaces_fh.stdout.readlines (): 114 | line = line.strip () 115 | 116 | # Python3 glue 117 | if sys.version_info >= (3, 0): 118 | line = str (line, encoding='utf-8') 119 | 120 | # Create empty interface hash 121 | match = interface_re.search (line) 122 | if match: 123 | interface = match.group (1) 124 | interfaces[interface] = {} 125 | continue 126 | 127 | # Store Type and State attributes 128 | match = state_re.search (line) 129 | if match: 130 | interfaces[interface][match.group (1)] = match.group (2) 131 | 132 | 133 | # Delete any stub interfaces from our list 134 | for iface in list (interfaces): 135 | if stub_re.search (interfaces[iface]['State']): 136 | del interfaces[iface] 137 | 138 | 139 | ok = [] 140 | broken = [] 141 | down = [] 142 | 143 | neighbor_re = re.compile (r'^([0-9a-fA-F.:]+)\s+(\d+)\s+([\w/-]+)\s+([0-9:]+)\s+([\w.-]+)\s+([\w.:]+)') 144 | 145 | # Read and check all neighbor states 146 | for line in neighbors_fh.stdout.readlines (): 147 | line = line.strip () 148 | 149 | # Python3 glue 150 | if sys.version_info >= (3, 0): 151 | line = str (line, encoding='utf-8') 152 | 153 | match = neighbor_re.search (line) 154 | if match: 155 | peer = match.group (1) 156 | state = match.group (3) 157 | ifname = match.group (5) 158 | 159 | interface = interfaces[ifname] 160 | 161 | # Mark interfaces as "up" in bird 162 | interface['up'] = 1 163 | 164 | # State FULL is awesome. 165 | if 'Full' in state: 166 | ok.append (format_peer (ifname, interface, peer)) 167 | 168 | # In broadcast areas there are only two FULL sessions (to the DR and BDR) 169 | # all other sessions will be 2-Way/Other which is perfectly fine. 170 | elif state == "2-Way/Other" and interface['Type'] == "broadcast": 171 | ok.append (format_peer (ifname, interface, peer)) 172 | 173 | # Everything else is considered broken. 174 | # Likely some ExStart/* etc. pointing to possible MTU troubles. 175 | else: 176 | broken.append ("%s:%s" % (format_peer (ifname, interface, peer), state)) 177 | 178 | 179 | # Check for any interfaces which should have (at least) an OSPF peer 180 | # but don't appear in the neighbors list 181 | for iface in interfaces.keys (): 182 | if iface in interfaces_down_ok: 183 | ok.append ("%s (Down/OK)" % iface) 184 | 185 | elif "up" not in interfaces[iface]: 186 | down.append (iface) 187 | 188 | 189 | ################################################################################ 190 | # Prepare output # 191 | ################################################################################ 192 | 193 | ret_code = 0 194 | 195 | # Any down interfaces? 196 | if len (down) > 0: 197 | ret_code = 2 198 | print ("DOWN: %s" % ", ".join (sorted (down))) 199 | 200 | # Any broken sessions? 201 | if len (broken) > 0: 202 | # Issue a warning when there are issues.. 203 | if ret_code < 2: 204 | ret_code = 1 205 | 206 | print ("BROKEN: %s" % ", ".join (sorted (broken))) 207 | 208 | # And the good ones 209 | if len (ok) > 0: 210 | print ("OK: %s" % ", ".join (sorted (ok))) 211 | 212 | sys.exit (ret_code) 213 | -------------------------------------------------------------------------------- /check_bird_ospf/check_bird_ospf.command.conf: -------------------------------------------------------------------------------- 1 | # 2 | # bird OSPF 3 | object CheckCommand "bird_ospf" { 4 | import "plugin-check-command" 5 | 6 | command = [ "/usr/bin/sudo", PluginDir + "/check_bird_ospf" ] 7 | 8 | arguments = { 9 | "--proto" = "$proto$" 10 | "--protocol" = "$protocol$" 11 | "--interfaces_down_ok" = "$interfaces_down_ok$" 12 | "--interfaces_down_ok_file" = "$interfaces_down_ok_file$" 13 | "--ignore_missing_file" = { 14 | set_if = "$ignore_missing_file$" 15 | } 16 | } 17 | 18 | vars.proto = "4" # IP protocol version to check 19 | vars.protocol = "" # Bird OSPF protocol instance name to check 20 | vars.interfaces_down_ok = "" # List of interfaces which are OK to have no session. (Space separated list) 21 | vars.interfaces_down_ok_file = "" # List of interfaces which are OK to have no session. (One per line) 22 | vars.ignore_missing_file = false # Ignore a possible non-existent file given as --interfaces_down_ok_file 23 | } 24 | -------------------------------------------------------------------------------- /check_bird_ospf/check_bird_ospf.service.conf: -------------------------------------------------------------------------------- 1 | # 2 | # bird OSPF 3 | apply Service "bird_ospf" { 4 | import "generic-service" 5 | 6 | check_command = "bird_ospf" 7 | vars.interfaces_down_ok_file = "/etc/icinga2/ffho-conf.d/bird_ospf_interfaces_down_ok.txt" 8 | vars.ignore_missing_file = true 9 | 10 | if (host.name != NodeName) { 11 | command_endpoint = host.name 12 | } 13 | 14 | assign where host.address && host.vars.os == "Linux" && "router" in host.vars.roles 15 | } 16 | 17 | apply Service "bird_ospf6" { 18 | import "generic-service" 19 | 20 | check_command = "bird_ospf" 21 | vars.proto = "6" 22 | vars.interfaces_down_ok_file = "/etc/icinga2/ffho-conf.d/bird_ospf_interfaces_down_ok.txt" 23 | vars.ignore_missing_file = true 24 | 25 | if (host.name != NodeName) { 26 | command_endpoint = host.name 27 | } 28 | 29 | assign where host.address && host.vars.os == "Linux" && "router" in host.vars.roles 30 | } 31 | -------------------------------------------------------------------------------- /check_conntrack_size/README.md: -------------------------------------------------------------------------------- 1 | ``` 2 | $ ./check_conntrack_size -h 3 | usage: check_conntrack_size [-h] [--warn WARN] [--crit CRIT] 4 | [--no-conntrack {ok,warn,crit,unkn}] 5 | 6 | check netfilter conntrack table size 7 | 8 | optional arguments: 9 | -h, --help show this help message and exit 10 | --warn WARN, -w WARN Warning conntrack table usage (percent) 11 | --crit CRIT, -c CRIT Critical conntrack table usage (percent) 12 | --no-conntrack {ok,warn,crit,unkn} 13 | Return code when no conntrack is loaded. 14 | ``` 15 | -------------------------------------------------------------------------------- /check_conntrack_size/check_conntrack_size: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # 3 | # Nagios plugin to check netfilter conntrack size 4 | # 5 | # Maximilian Wilhelm 6 | # -- Fri 11 Mar 2016 08:56:08 PM CET 7 | # 8 | 9 | import argparse 10 | import os.path 11 | import sys 12 | 13 | code = 0 14 | msg = "" 15 | 16 | parser = argparse.ArgumentParser (description = 'check netfilter conntrack table size') 17 | 18 | parser.add_argument ('--warn', '-w', help = "Warning conntrack table usage (percent)", default = "70", type = int) 19 | parser.add_argument ('--crit', '-c', help = "Critical conntrack table usage (percent)", default = "85", type = int) 20 | parser.add_argument ('--no-conntrack', help = "Return code when no conntrack is loaded.", default = "ok", choices = [ "ok", "warn", "crit", "unkn" ]) 21 | 22 | args = parser.parse_args () 23 | 24 | ret_map = { 25 | 'ok' : 0, 26 | 'warn' : 1, 27 | 'crit' : 2, 28 | 'unkn' : 3, 29 | } 30 | 31 | def read_int (path): 32 | try: 33 | with open (path, 'r') as fh: 34 | return int (fh.read ()) 35 | except ValueError as v: 36 | return -1 37 | except IOError as i: 38 | print ("conntrack seems not to be loaded.") 39 | sys.exit (ret_map[args.no_conntrack]) 40 | 41 | num_entries = read_int ("/proc/sys/net/netfilter/nf_conntrack_count") 42 | max_entries = read_int ("/proc/sys/net/netfilter/nf_conntrack_max") 43 | 44 | usage = num_entries / max_entries * 100 45 | 46 | # Calculate Performace Data Metrics 47 | warn_entries = int (max_entries / 100 * args.warn) 48 | crit_entries = int (max_entries / 100 * args.crit) 49 | 50 | perf_string = "'count'=%d;%d;%d" % (num_entries, warn_entries, crit_entries) 51 | 52 | 53 | if usage >= args.crit: 54 | code = 2 55 | msg = "Conntrack pool usage over %s%%: %d (%d / %d)" % (args.crit, usage, num_entries, max_entries) 56 | 57 | elif usage >= args.warn: 58 | code = 1 59 | msg = "Conntrack pool usage over %s%%: %d (%d/ %d)" % (args.warn, usage, num_entries, max_entries) 60 | 61 | elif usage < args.warn: 62 | code = 0 63 | msg = "Conntrack pool usage as at %d%% (%d / %d)" % (usage, num_entries, max_entries) 64 | 65 | else: 66 | code = 3 67 | msg = "WTF? Please examine the situation manually and kindly do the needful!" 68 | 69 | print ("%s | %s" % (msg, perf_string)) 70 | sys.exit (code) 71 | -------------------------------------------------------------------------------- /check_conntrack_size/check_conntrack_size.command.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Conntrack table size 3 | object CheckCommand "conntrack_size" { 4 | import "plugin-check-command" 5 | 6 | command = [ "/usr/bin/sudo", PluginDir + "/check_conntrack_size" ] 7 | } 8 | -------------------------------------------------------------------------------- /check_conntrack_size/check_conntrack_size.service.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Metfilter connection tracking table size 3 | apply Service "conntrack_size" { 4 | import "generic-service" 5 | 6 | check_command = "conntrack_size" 7 | 8 | if (host.name != NodeName) { 9 | command_endpoint = host.name 10 | } 11 | 12 | assign where host.address && host.vars.os == "Linux" 13 | } 14 | -------------------------------------------------------------------------------- /check_dns_sync/check_dns_sync: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # 3 | # Maximilian Wilhelm 4 | # -- Mon 30 Mar 2020 11:55:47 PM CEST 5 | # 6 | 7 | import argparse 8 | from dns.flags import to_text 9 | from dns.resolver import Resolver 10 | from ipaddress import ip_address 11 | import sys 12 | import time 13 | 14 | # Exit code definitions 15 | OK = 0 16 | WARNING = 1 17 | CRITICAL = 2 18 | UNKNOWN = 3 19 | 20 | # Track start time 21 | time_start = time.time () 22 | 23 | parser = argparse.ArgumentParser (description = 'Check DNS sync') 24 | parser.add_argument ('--reference-ns', required = True, help = 'IP address of reference NS') 25 | parser.add_argument ('--replica-ns', required = True, help = 'IP address of NS to be checked') 26 | parser.add_argument ('--check-mode', choices = [ 'serial', 'axfr' ], default = 'serial', help = 'Compare only serial or full zone content?') 27 | parser.add_argument ('--timeout', type = int, default = 10, help = 'Timeout for DNS operations') 28 | parser.add_argument ('--verbose', '-v', action = 'store_true', help = 'Be verbose in the output') 29 | parser.add_argument ('zones', nargs = '+', help = 'Zones to compare') 30 | 31 | args = parser.parse_args () 32 | 33 | if args.check_mode == 'axfr': 34 | print ("AXFR check mode not implemented yet. Send patches :)") 35 | sys.exit (UNKNOWN) 36 | 37 | # 38 | # Helpers 39 | # 40 | 41 | def is_ip (ns): 42 | try: 43 | ip = ip_address (ns) 44 | except ValueError: 45 | return False 46 | 47 | return True 48 | 49 | 50 | def check_zone (zone): 51 | res = { 52 | 'state' : UNKNOWN, 53 | 'diff' : '', 54 | 'errors' : '', 55 | } 56 | 57 | if args.check_mode == 'serial': 58 | try: 59 | # Query reference NS 60 | reference = reference_res.query (zone, 'SOA') 61 | 62 | # Check is answer is authoritive 63 | if not 'AA' in to_text (reference.response.flags): 64 | res['state'] = CRITICAL 65 | res['errors'] = "Got non-authoritive answer from reference NS: %s" % args.reference_ns 66 | return res 67 | except Exception as e: 68 | res['errors'] = "Error while checking reference NS %s: %s" % (args.reference_ns, e) 69 | return res 70 | 71 | try: 72 | # Query replica NS 73 | replica = replica_res.query (zone, 'SOA') 74 | 75 | # Check is answer is authoritive 76 | if not 'AA' in to_text (replica.response.flags): 77 | res['state'] = CRITICAL 78 | res['errors'] = "Got non-authoritive answer from replica NS: %s" % args.replica_ns 79 | return res 80 | except Exception as e: 81 | res['errors'] = "Error while checking replica NS %s: %s" % (args.replica_ns, e) 82 | return res 83 | 84 | try: 85 | reference_serial = str (reference.response.answer[0]).split ()[6] 86 | replica_serial = str (replica.response.answer[0]).split ()[6] 87 | except AttributeError as a: 88 | res['errors'] = a 89 | return res 90 | except IndexError as i: 91 | res['errors'] = i 92 | return res 93 | 94 | if reference_serial == replica_serial: 95 | res['state'] = OK 96 | else: 97 | res['state'] = CRITICAL 98 | res['errors'] = "Serial mismatch: %s vs. %s" % (reference_serial, replica_serial) 99 | 100 | return res 101 | 102 | 103 | # 104 | # Setup 105 | # 106 | 107 | # Check for possible badness 108 | if not is_ip (args.reference_ns): 109 | print ("Error: Reference NS has to an IP address.") 110 | sys.exit (CRITICAL) 111 | 112 | if not is_ip (args.replica_ns): 113 | print ("Error: Replica NS has to an IP address.") 114 | sys.exit (CRITICAL) 115 | 116 | if args.reference_ns == args.replica_ns: 117 | print ("Error: Reference NS and replica NS must not be the same!") 118 | sys.exit (CRITICAL) 119 | 120 | 121 | # Resolver for reference NS 122 | reference_res = Resolver (configure = False) 123 | reference_res.nameservers = [args.reference_ns] 124 | reference_res.lifetime = args.timeout 125 | 126 | # Resolver for NS to be checked 127 | replica_res = Resolver (configure = False) 128 | replica_res.nameservers = [args.replica_ns] 129 | replica_res.lifetime = args.timeout 130 | 131 | 132 | # 133 | # Let#s go 134 | # 135 | 136 | codes = {} 137 | ret_code = OK 138 | errors = "" 139 | in_sync = [] 140 | 141 | for zone in args.zones: 142 | check = check_zone (zone) 143 | 144 | # Keep track of states 145 | state = check['state'] 146 | codes[state] = codes.get (state, 0) + 1 147 | 148 | if state == OK: 149 | in_sync.append (zone) 150 | continue 151 | 152 | errors += "Zone '%s': %s\n" % (zone, check['errors']) 153 | 154 | if state > ret_code: 155 | ret_code = check['state'] 156 | 157 | if errors: 158 | print (errors) 159 | 160 | if in_sync: 161 | if args.verbose: 162 | print ("Zones in sync: %s" % ", ".join (sorted (in_sync))) 163 | 164 | time_delta = int (1000 * (time.time () - time_start)) 165 | 166 | print ("Checked %d zones in %d ms. %d OK, %d WARN, %d CRIT, %d UNKN" % ( 167 | len (args.zones), 168 | time_delta, 169 | codes.get (OK, 0), 170 | codes.get (WARNING, 0), 171 | codes.get (CRITICAL ,0), 172 | codes.get (UNKNOWN, 0), 173 | )) 174 | 175 | sys.exit (ret_code) 176 | -------------------------------------------------------------------------------- /check_dns_sync/check_dns_sync.command.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Check DNS sync 3 | object CheckCommand "dns_sync" { 4 | import "plugin-check-command" 5 | 6 | command = [ PluginDir + "/check_dns_sync" ] 7 | 8 | arguments = { 9 | "--reference-ns" = "$reference_ns$" 10 | "--replica-ns" = "$replica_ns$" 11 | "--timeout" = { 12 | set_if = "$timeout$" 13 | value = "$timeout$" 14 | } 15 | "--check_mode" = { 16 | set_if = "$check_mode$" 17 | value = "$check_mode$" 18 | } 19 | "zones" = { 20 | value = "$zones$" 21 | skip_key = true 22 | order = 99 23 | } 24 | } 25 | 26 | vars.reference_ns = "" # IP of reference NS Server 27 | vars.replica_ns = "" # IP of replica NS Server 28 | vars.timeout = "" # Timeout for DNS operations 29 | vars.check_mode = "serial" # Check mode: serial or axfr 30 | vars.zones = [] # List of zone names to be checked 31 | } 32 | -------------------------------------------------------------------------------- /check_dns_sync/check_dns_sync.service.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Check DNS sync 3 | apply Service "dns_sync" { 4 | import "generic-service" 5 | 6 | check_command = "dns_sync" 7 | vars.reference_ns = "10.132.251.53" 8 | vars.replica_ns = host.address 9 | vars.zones = [ 10 | "ffho.net", 11 | "10.132.in-addr.arpa", 12 | "30.172.in-addr.arpa", 13 | ] 14 | 15 | assign where host.address && "dns-auth" in host.vars.roles 16 | ignore where "dns-server-master" in host.vars.roles 17 | } 18 | -------------------------------------------------------------------------------- /check_ifupdown2/README.md: -------------------------------------------------------------------------------- 1 | ``` 2 | $ ./check_ifupdown2 -h 3 | usage: check_ifupdown2 [-h] --ok_string OK_STRING 4 | 5 | Check interface configuration. 6 | 7 | optional arguments: 8 | -h, --help show this help message and exit 9 | --ok_string OK_STRING 10 | Ifupdown success string 11 | ``` 12 | -------------------------------------------------------------------------------- /check_ifupdown2/check_ifupdown2: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # 3 | # Check state of interfaces configured with ifupdown2 4 | # 5 | # Maximilian Wilhelm 6 | # -- Fri, 14 Apr 2017 20:05:45 +0200 7 | # 8 | 9 | import argparse 10 | import re 11 | import subprocess 12 | import sys 13 | 14 | parser = argparse.ArgumentParser (description = 'Check interface configuration.') 15 | parser.add_argument ("--ok_string", help = "Ifupdown success string", required = True) 16 | args = parser.parse_args () 17 | 18 | cmd = [ "/usr/bin/sudo", "/sbin/ifquery", "-c", "-a" ] 19 | 20 | 21 | try: 22 | ifquery = subprocess.Popen (cmd, bufsize = 4194304, stdout = subprocess.PIPE, stderr = subprocess.PIPE).stdout 23 | 24 | # cmd exited with non-zero code 25 | except subprocess.CalledProcessError as c: 26 | print "Failed to run %s: %s" % (" ".join (cmd), c.output) 27 | sys.exit (1) 28 | 29 | # This should not have happend. 30 | except Exception as e: 31 | print "Unknown error while running %s: %s" % (" ".join (cmd), str (e)) 32 | sys.exit (3) 33 | 34 | ################################################################################ 35 | # Parse all entries from ifquery output into interfaces dict # 36 | ################################################################################ 37 | 38 | interfaces_ok = [] 39 | interfaces_err = [] 40 | 41 | interface_re = re.compile (r'^iface (\S+)\s+\[(.+)\]$') 42 | ignore_re = re.compile (r'^(auto .*)?$') 43 | line_re = re.compile (r'^\s+(\S+)\s+(.+)\s+\[(.+)\]$') 44 | 45 | # Parse session list 46 | interface = None 47 | interface_dict = None 48 | for line in ifquery.readlines (): 49 | line = line.rstrip () 50 | 51 | # Preamble or empty string 52 | if ignore_re.search (line): 53 | if not interface_dict: 54 | continue 55 | 56 | if interface_dict['ok']: 57 | interfaces_ok.append (interface) 58 | 59 | else: 60 | del interface_dict['ok'] 61 | 62 | errors = ",".join (sorted (interface_dict.keys ())) 63 | if errors == "": 64 | errors = "DOWN" 65 | 66 | interfaces_err.append ("%s: %s" % (interface, errors)) 67 | 68 | interface = None 69 | interface_dict = None 70 | continue 71 | 72 | # Start of a new interface 73 | match = interface_re.search (line) 74 | if match: 75 | interface = match.group (1) 76 | interface_dict = { 77 | 'ok' : True if match.group (2) == args.ok_string else False, 78 | } 79 | 80 | continue 81 | 82 | # Ignore anything when we're out an interface scope 83 | if interface == None: 84 | continue 85 | 86 | # Parse and store any interesting lines / fields 87 | match = line_re.search (line) 88 | if not match: 89 | continue 90 | 91 | attr = match.group (1) 92 | value = match.group (2) 93 | status = match.group (3) 94 | 95 | if status != args.ok_string: 96 | interface_dict['ok'] = False 97 | interface_dict[attr] = value 98 | 99 | 100 | ret_code = 0 101 | if len (interfaces_err) > 0: 102 | print "ERR: %s" % "; ".join (interfaces_err) 103 | ret_code = 2 104 | 105 | if len (interfaces_ok) > 0: 106 | print "OK: %s" % ", ".join (interfaces_ok) 107 | 108 | sys.exit (ret_code) 109 | -------------------------------------------------------------------------------- /check_ifupdown2/check_ifupdown2.command.conf: -------------------------------------------------------------------------------- 1 | # 2 | # ifupdown2 3 | object CheckCommand "ifupdown2" { 4 | import "plugin-check-command" 5 | 6 | command = [ "/usr/bin/sudo", PluginDir + "/check_ifupdown2" ] 7 | 8 | arguments = { 9 | "--ok_string" = "$ok_string$" 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /check_ifupdown2/check_ifupdown2.service.conf: -------------------------------------------------------------------------------- 1 | # 2 | # ifupdown2 3 | apply Service "ifupdown2" { 4 | import "generic-service" 5 | 6 | check_command = "ifupdown2" 7 | 8 | if (host.name != NodeName) { 9 | command_endpoint = host.name 10 | } 11 | 12 | vars.ok_string = "[ OK ]" 13 | 14 | assign where host.address && host.vars.os == "Linux" 15 | } 16 | -------------------------------------------------------------------------------- /check_isilon/README.md: -------------------------------------------------------------------------------- 1 | # Check Dell/EMC² Isilon / PowerScale storage systems 2 | 3 | This plugin can check Dell/EMC² Isilon / PowerScale storage systems for events/alarms, 4 | the state of the storage pools and SyncIQ replication. 5 | 6 | The checks are done via the Platform API (PAPI) and require a valid user with access rights 7 | for the PAPI and some read only priviledges (see below). 8 | 9 | 10 | ## Cluster configuration 11 | 12 | For this check to work there needs to exist a user on your cluster with priviledges 13 | to access the Platform API and read only access to the events, smartpools, and synciq (for all feature to work). 14 | 15 | This can be done by creating a role like this: 16 | 17 | ``` 18 | # isi auth roles view Monitoring 19 | Name: Monitoring 20 | Description: - 21 | Members: imt-monitoring-user 22 | Privileges 23 | ID: ISI_PRIV_LOGIN_PAPI 24 | Read Only: True 25 | 26 | ID: ISI_PRIV_EVENT 27 | Read Only: True 28 | 29 | ID: ISI_PRIV_SMARTPOOLS 30 | Read Only: True 31 | 32 | ID: ISI_PRIV_STATISTICS 33 | Read Only: True 34 | 35 | ID: ISI_PRIV_SYNCIQ 36 | Read Only: True 37 | ``` 38 | 39 | And then creating a user account (e.g. in the 'local' provider) and assigning the user to the above (or a similar) role. 40 | -------------------------------------------------------------------------------- /check_isilon/check_isilon: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Maximilian Wilhelm 4 | # -- Sat, 20 Feb 2021 16:03:12 +0100 5 | # 6 | 7 | import argparse 8 | import base64 9 | import json 10 | import requests 11 | import sys 12 | import time 13 | 14 | # Exit code definitions 15 | OK = 0 16 | WARNING = 1 17 | CRITICAL = 2 18 | UNKNOWN = 3 19 | 20 | ################################################################################ 21 | # Helper and check methods # 22 | ################################################################################ 23 | 24 | def query_api (url, auth): 25 | r = requests.get (url, auth = auth, verify = tls_cacert_dir) 26 | 27 | if r.status_code != 200: 28 | print ("UNKNOWN: Query to %s failed: %s" % (url, "error")) 29 | sys.exit (UNKNOWN) 30 | 31 | res_json = r.json () 32 | 33 | return res_json 34 | 35 | 36 | def convert_time (epoch): 37 | try: 38 | t = time.localtime (int (epoch)) 39 | return time.strftime ("%a %d.%m.%Y %H:%M:%S", t) 40 | except ValueError: 41 | return "unknown" 42 | 43 | 44 | def check_events (data): 45 | ret_code = OK 46 | msg = "" 47 | perf_data = [] 48 | 49 | for evl in data['eventlists']: 50 | event = evl['events'][0] 51 | 52 | # Event has been resolved, go on 53 | if event['resolve_time'] != 0: 54 | continue 55 | 56 | # Format message like "(Critical|Warning) on (Node NN|Cluster): $msg" 57 | sev = event['severity'] 58 | scope = "node %s" % event['lnn'] if event['lnn'] != 0 else "cluster" 59 | msg += "%s on %s: %s\n" % (sev.capitalize (), scope, event['message']) 60 | 61 | # Store worst severity 62 | if sev == 'warning' and ret_code < WARNING: 63 | ret_code = WARNING 64 | elif sev == 'critical' and ret_code < CRITICAL: 65 | ret_code = CRITICAL 66 | 67 | if ret_code == OK: 68 | msg = "No active events" 69 | 70 | return (ret_code, msg, perf_data) 71 | 72 | 73 | def check_storagepools (data): 74 | ret_code = OK 75 | msg = "" 76 | perf_data = [] 77 | 78 | for np in data['nodepools']: 79 | name = np['name'] 80 | 81 | # This pool isn't formed yet, nothing we can do 82 | if name.startswith ('unprovisioned_pool'): 83 | if args.ignore_unprovisioned: 84 | continue 85 | 86 | perf_data.append ("'%s'=%sB;%d;%d;0;%s" % ( 87 | name, 88 | 0, 89 | 0, 90 | 0, 91 | 0 92 | )) 93 | 94 | continue 95 | 96 | 97 | for storage_type in ('hdd', 'ssd'): 98 | usage = int (np['usage']['used_%s_bytes' % storage_type]) 99 | warn = int (float (np['usage']['usable_%s_bytes' % storage_type]) * 0.75) 100 | crit = int (float (np['usage']['usable_%s_bytes' % storage_type]) * 0.88) 101 | size = np['usage']['usable_%s_bytes' % storage_type] 102 | 103 | if size == "0": 104 | continue 105 | 106 | if usage > crit: 107 | ret_code = CRITICAL 108 | msg += " Pool %s is over 85%% full!" % name 109 | 110 | elif usage > warn and ret_code < CRITICAL: 111 | ret_code = WARNING 112 | msg += " Pool %s is over 75%% full!" % name 113 | 114 | perf_data.append ("'%s-%s'=%sB;%d;%d;0;%s" % ( 115 | name, 116 | storage_type, 117 | usage, 118 | warn, 119 | crit, 120 | size 121 | )) 122 | 123 | if ret_code == OK: 124 | msg = "Pools OK" 125 | 126 | return (ret_code, msg, perf_data) 127 | 128 | 129 | def check_synciq (data): 130 | ret_code = OK 131 | msg = "" 132 | perf_data = [] 133 | errors = "" 134 | 135 | policies = 0 136 | 137 | for policy in data['policies']: 138 | if policy['enabled'] == 'false': 139 | continue 140 | 141 | policies += 1 142 | 143 | # Gather some relevant attributes 144 | last_job_state = policy['last_job_state'] 145 | last_started = convert_time (policy['last_started']) 146 | last_success = convert_time (policy['last_success']) 147 | next_run = convert_time (policy['next_run']) 148 | 149 | # Append to msg or errors depending on state 150 | newmsg = "Policy %s %s, policy was last started %s, last successful run was %s, next run is scheduled for %s\n" % ( 151 | policy['name'], 152 | last_job_state, 153 | last_started, 154 | last_success, 155 | next_run) 156 | 157 | if last_job_state in ("failed"): 158 | ret_code = CRITICAL 159 | last_job_state = last_job_state.upper () 160 | errors += newmsg 161 | else: 162 | msg += newmsg 163 | 164 | 165 | # No enabled policy configured at all? 166 | if policies == 0 and ret_code < 1: 167 | ret_code = 1 168 | msg = "No enabled policy configured, is this what you want?" 169 | 170 | msg = errors + msg 171 | 172 | return (ret_code, msg, perf_data) 173 | 174 | ################################################################################ 175 | # Configuration # 176 | ################################################################################ 177 | 178 | checks = { 179 | 'events' : { 180 | 'api_path' : "/platform/7/event/eventlists", 181 | 'function' : check_events, 182 | }, 183 | 184 | 'storagepools' : { 185 | 'api_path' : "/platform/3/storagepool/nodepools", 186 | 'function' : check_storagepools, 187 | }, 188 | 189 | 'synciq' : { 190 | 'api_path' : '/platform/7/sync/policies', 191 | 'function' : check_synciq, 192 | }, 193 | } 194 | 195 | tls_cacert_dir = "/etc/ssl/certs" 196 | 197 | ################################################################################ 198 | # Parse command line argument # 199 | ################################################################################ 200 | 201 | parser = argparse.ArgumentParser () 202 | 203 | subparsers = parser.add_subparsers (dest = 'check', required = True, description = "Choose one item to check", title = "Checks") 204 | 205 | # Check for events/alerts 206 | parser_events = subparsers.add_parser ('events', help = 'Check events') 207 | 208 | # Check storage pools 209 | parser_stpl = subparsers.add_parser ('storagepools', help = 'Check storagepools') 210 | parser_stpl.add_argument ('--ignore-unprovisioned', action = 'store_true', help = "Ignore unprovisioned pools instead reporting them with 0 values.") 211 | 212 | # Check SyncIQ status 213 | parser_siq = subparsers.add_parser ('synciq', help = 'Check SyncIQ') 214 | 215 | # Arguments for all checks 216 | parser.add_argument ('fqdn', help = "FQDN of Isilon PAPI endpoint") 217 | parser.add_argument ('username', help = "Username for PAPI login") 218 | parser.add_argument ('password', help = "Password for PAPI login") 219 | 220 | args = parser.parse_args () 221 | 222 | 223 | # Format URL and prepare auth data 224 | url = "https://%s:8080/%s" % (args.fqdn, checks[args.check]['api_path'].lstrip ('/')) 225 | auth = (args.username, args.password) 226 | data = query_api (url, auth) 227 | 228 | # Run check 229 | (ret_code, msg, perf_data) = checks[args.check]['function'] (data) 230 | 231 | if ret_code == OK: 232 | msg = "OK: " + msg 233 | if ret_code == WARNING: 234 | msg = "Warning: " + msg 235 | if ret_code == CRITICAL: 236 | msg = "Critical: " + msg 237 | if ret_code == UNKNOWN: 238 | msg = "Unknown: " + msg 239 | 240 | if perf_data: 241 | msg += "| " + " ".join (sorted (perf_data)) 242 | 243 | print (msg.rstrip ()) 244 | sys.exit (ret_code) 245 | -------------------------------------------------------------------------------- /check_isilon/check_isilon.command.conf: -------------------------------------------------------------------------------- 1 | object CheckCommand "isilon" { 2 | import "plugin-check-command" 3 | 4 | command = [ PluginDir + "/check_isilon" ] 5 | 6 | arguments = { 7 | # The ckeck to perform 8 | "--check" = { 9 | value = "$check$" 10 | skip_key = true 11 | required = true 12 | order = -4 13 | } 14 | 15 | # Global required parameters 16 | "--fqdn" = { 17 | value = "$fqdn$" 18 | skip_key = true 19 | required = true 20 | order = -3 21 | } 22 | 23 | "--username" = { 24 | value = "$username$" 25 | skip_key = 1 26 | required = 1 27 | order = -2 28 | } 29 | 30 | "--password" = { 31 | value = "$password$" 32 | skip_key = 1 33 | required = 1 34 | order = -1 35 | } 36 | 37 | # Optional args for storagepools check 38 | "--ignore-unprovisioned" = { 39 | set_if = "$ignore-unprovisioned$" 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /check_isilon/check_isilon.service.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Isilon 3 | # 4 | 5 | apply Service "check_isilon_events" { 6 | import "generic-service" 7 | 8 | check_command = "isilon" 9 | 10 | vars.check = "events" 11 | vars.fqdn = "admin." + host.name 12 | vars.username = UsernameIsilonPAPI 13 | vars.password = PasswordIsilonPAPI 14 | 15 | assign where host.vars.storage == "Isilon" 16 | } 17 | 18 | apply Service "check_isilon_storagepools" { 19 | import "generic-service" 20 | 21 | check_command = "isilon" 22 | 23 | vars.check = "storagepools" 24 | vars.fqdn = "admin." + host.name 25 | vars.username = UsernameIsilonPAPI 26 | vars.password = PasswordIsilonPAPI 27 | 28 | assign where host.vars.storage == "Isilon" 29 | } 30 | 31 | apply Service "check_isilon_synciq" { 32 | import "generic-service" 33 | 34 | check_command = "isilon" 35 | 36 | vars.check = "synciq" 37 | vars.fqdn = "admin." + host.name 38 | vars.username = UsernameIsilonPAPI 39 | vars.password = PasswordIsilonPAPI 40 | 41 | assign where host.vars.storage == "Isilon" && host.vars.storage_role == "primary" 42 | } 43 | --------------------------------------------------------------------------------