├── LICENSE
├── README.md
├── check_rancher2.sh
└── icinga2
    ├── command_check_rancher2.conf
    └── example_service_checks.conf


/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     <one line to give the program's name and a brief idea of what it does.>
294 |     Copyright (C) <year>  <name of author>
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   <signature of Ty Coon>, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # check_rancher2
 2 | 
 3 | Monitoring plugin to check Kubernetes container environments in Rancher 2.x
 4 | 
 5 | This is the public repository for development. 
 6 | 
 7 | Latest release and documentation can be found here: 
 8 | 
 9 | https://www.claudiokuenzler.com/monitoring-plugins/check_rancher2.php
10 | 
11 | The check_rancher2 monitoring plugin is sponsored by Infiniroot (Hosted Rancher Kubernetes in Switzerland) -> www.infiniroot.com
12 | 


--------------------------------------------------------------------------------
/check_rancher2.sh:
--------------------------------------------------------------------------------
   1 | #!/bin/bash 
   2 | ##########################################################################################
   3 | # Script/Plugin: check_rancher2.sh                                                       #
   4 | # Author:        Claudio Kuenzler                                                        #
   5 | # Official repo: https://github.com/Napsty/check_rancher2                                #
   6 | # Documentation: https://www.claudiokuenzler.com/monitoring-plugins/check_rancher2.php   #
   7 | # Purpose:       Monitor Rancher 2.x Kubernetes cluster and their containers             #
   8 | # Description:   Checks status of resources within the Rancher managed Kubernetes        #
   9 | #                cluster(s) using Rancher 2.x API                                        #
  10 | #                                                                                        #
  11 | # License :      GNU General Public Licence (GPL) http://www.gnu.org/                    #
  12 | # This program is free software; you can redistribute it and/or modify it under the      #
  13 | # terms of the GNU General Public License as published by the Free Software Foundation;  #
  14 | # either version 2 of the License, or (at your option) any later version.                #
  15 | # This program is distributed in the hope that it will be useful, but WITHOUT ANY        #
  16 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A        #
  17 | # PARTICULAR PURPOSE.  See the GNU General Public License for more details.              #
  18 | # You should have received a copy of the GNU General Public License along with this      #
  19 | # program; if not, see <https://www.gnu.org/licenses/>.                                  #
  20 | #                                                                                        #
  21 | # Copyright 2018-2023 Claudio Kuenzler                                                   #
  22 | # Copyright 2020 Matthias Kneer                                                          #
  23 | # Copyright 2021-2023 Steffen Eichler                                                    #
  24 | # Copyright 2021 lopf                                                                    #
  25 | #                                                                                        #
  26 | # History:                                                                               #
  27 | # 20180629 alpha Started programming of script                                           #
  28 | # 20180713 beta1 Public release in repository                                            #
  29 | # 20180803 beta2 Check for "type", echo project name in "all workload" check, too        #
  30 | # 20180806 beta3 Fix important bug in for loop in workload check, check for 'paused'     #
  31 | # 20180906 beta4 Catch cluster not found and zero workloads in workload check            #
  32 | # 20180906 beta5 Fix paused check (type 'object' has no elements to extract (arg 5)      #
  33 | # 20180921 beta6 Added pod(s) check within a project                                     #
  34 | # 20180926 beta7 Handle a workflow in status 'updating' as warning, not critical         #
  35 | # 20181107 beta8 Missing pod check type in help, documentation completed                 #
  36 | # 20181109 1.0.0 Do not alert for succeeded pods                                         #
  37 | # 20190308 1.1.0 Added node(s) check                                                     #
  38 | # 20190903 1.1.1 Detect invalid hostname (non-API hostname)                              #
  39 | # 20190903 1.2.0 Allow self-signed certificates (-s)                                     #
  40 | # 20190913 1.2.1 Detect additional redirect (308)                                        #
  41 | # 20200129 1.2.2 Fix typos in workload perfdata (#11) and single cluster health (#12)    #
  42 | # 20200523 1.2.3 Handle 403 forbidden error (#15)                                        #
  43 | # 20200617 1.3.0 Added ignore parameter (-i)                                             #
  44 | # 20210210 1.4.0 Checking specific workloads and pods inside a namespace                 #
  45 | # 20210413 1.5.0 Plugin now uses jq instead of jshon, fix cluster error check (#19)      #
  46 | # 20210504 1.6.0 Add usage performance data on single cluster check, fix project check   #
  47 | # 20210824 1.6.1 Fix cluster and project not found error (#24)                           #
  48 | # 20211021 1.7.0 Check for additional node (pressure) conditions (#27)                   #
  49 | # 20211201 1.7.1 Fix cluster state detection (#26)                                       #
  50 | # 20220610 1.8.0 More performance data, long parameters, other improvements (#31)        #
  51 | # 20220729 1.9.0 Output improvements (#32), show workload namespace (#33)                #
  52 | # 20220909 1.10.0 Fix ComponentStatus (#35), show K8s version in single cluster check    #
  53 | # 20220909 1.10.0 Allow ignoring statuses on workload checks (#29)                       #
  54 | # 20230110 1.11.0 Allow ignoring workload names, provisioning cluster not critical (#39) #
  55 | # 20230202 1.12.0 Add local-certs check type                                             #
  56 | # 20231208 1.12.1 Use 'command -v' instead of 'which' for required command check         #
  57 | ##########################################################################################
  58 | # (Pre-)Define some fixed variables
  59 | STATE_OK=0              # define the exit code if status is OK
  60 | STATE_WARNING=1         # define the exit code if status is Warning
  61 | STATE_CRITICAL=2        # define the exit code if status is Critical
  62 | STATE_UNKNOWN=3         # define the exit code if status is Unknown
  63 | export PATH=/usr/local/bin:/usr/bin:/bin:$PATH # Set path
  64 | proto=http		# Protocol to use, default is http, can be overwritten with -S parameter
  65 | version=1.12.1
  66 | ##########################################################################################
  67 | # functions
  68 | 
  69 | # https://kubernetes.io/docs/reference/kubernetes-api/common-definitions/quantity/
  70 | # convert memory to smallest possible value byte depending on unit
  71 | function convertMemory()
  72 | {
  73 |   local memory_count=$1
  74 |   local memory_unit=$2
  75 |   
  76 |   if [[ ${memory_unit} == "Ei" ]]; then
  77 |     memory=$(( ${memory_count} * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 ))
  78 |   elif [[ ${memory_unit} == "E" ]]; then
  79 |     memory=$(( ${memory_count} * 1000 * 1000 * 1000 * 1000 * 1000 * 1000 ))
  80 |   elif [[ ${memory_unit} == "Pi" ]]; then
  81 |     memory=$(( ${memory_count} * 1024 * 1024 * 1024 * 1024 * 1024 ))
  82 |   elif [[ ${memory_unit} == "P" ]]; then
  83 |     memory=$(( ${memory_count} * 1000 * 1000 * 1000 * 1000 * 1000 ))
  84 |   elif [[ ${memory_unit} == "Ti" ]]; then
  85 |     memory=$(( ${memory_count} * 1024 * 1024 * 1024 * 1024 ))
  86 |   elif [[ ${memory_unit} == "T" ]]; then
  87 |     memory=$(( ${memory_count} * 1000 * 1000 * 1000 * 1000 ))
  88 |   elif [[ ${memory_unit} == "Gi" ]]; then
  89 |     memory=$(( ${memory_count} * 1024 * 1024 * 1024 ))
  90 |   elif [[ ${memory_unit} == "G" ]]; then
  91 |     memory=$(( ${memory_count} * 1000 * 1000 * 1000 ))
  92 |   elif [[ ${memory_unit} == "Mi" ]]; then
  93 |     memory=$(( ${memory_count} * 1024 * 1024 ))
  94 |   elif [[ ${memory_unit} == "M" ]]; then
  95 |     memory=$(( ${memory_count} * 1000 * 1000 ))
  96 |   elif [[ ${memory_unit} == "Ki" ]]; then
  97 |     memory=$(( ${memory_count} * 1024 ))
  98 |   elif [[ ${memory_unit} == "k" ]]; then
  99 |     memory=$(( ${memory_count} * 1000 ))
 100 |   elif [[ ${memory_unit} == "m" ]]; then
 101 |     memory=$(( ${memory_count} / 1000 ))
 102 |   elif [[ ${memory_unit} == "" ]]; then
 103 |     memory=$(( ${memory_count} ))
 104 |   else
 105 |     echo "UNKNOWN: unexpected memory unit (${memory_unit})."
 106 |     exit ${STATE_UNKNOWN}
 107 |   fi
 108 | 
 109 |   printf $memory
 110 | }
 111 | 
 112 | # convert cpu to smallest possible value (m = milli CPU) depending on unit
 113 | function convertCpu()
 114 | {
 115 |   local cpu_count=$1
 116 |   local cpu_unit=$2
 117 |   
 118 |   # m = milli CPU
 119 |   if [[ ${cpu_unit} == "m" ]]; then
 120 |     cpu=${cpu_count}
 121 |   # no unit means full cpu
 122 |   elif [[ ${cpu_unit} == "" ]]; then
 123 |     cpu=$(( ${cpu_count} * 1000 ))
 124 |   else
 125 |     echo "UNKNOWN: unexpected cpu unit (${cpu_unit})."
 126 |     exit ${STATE_UNKNOWN}
 127 |   fi
 128 | 
 129 |   printf $cpu
 130 | }
 131 | 
 132 | # convert pod to smallest possible value (one pod) depending on unit
 133 | function convertPods()
 134 | {
 135 |   local pods_count=$1
 136 |   local pods_unit=$2
 137 | 
 138 |   # k = 1000 pods
 139 |   if [[ ${pods_unit} == "k" ]]; then
 140 |     pods=$(( ${pods_count} * 1000 ))
 141 |   # no unit
 142 |   elif [[ ${pods_unit} == "" ]]; then
 143 |     pods=${pods_count}
 144 |   else
 145 |     echo "UNKNOWN: unexpected pods unit (${pods_unit})."
 146 |     exit ${STATE_UNKNOWN}
 147 |   fi
 148 | 
 149 |   printf $pods
 150 | }
 151 | 
 152 | # We all need help from time to time
 153 | usage ()
 154 | {
 155 | printf "check_rancher2 v ${version} (c) 2018-2023 Claudio Kuenzler and contributers (published under GPLv2)
 156 | Usage: $0 -H Rancher2Address -U user-token -P password [-S] -t checktype [-c cluster] [-p project] [-n namespace] [-w workload] [-o pod]
 157 | 
 158 | Options:
 159 | \t[ -H | --apihost ] Address of Rancher 2 API (e.g. rancher.example.com)
 160 | \t[ -U | --apiuser ] API username (Access Key)
 161 | \t[ -P | --apipass ] API password (Secret Key)
 162 | \t[ -S | --secure  ] Use https instead of http
 163 | \t[ -s | --selfsigned ] Allow self-signed certificates
 164 | \t[ -t | --type ] Check type (see list below for available check types)
 165 | \t[ -c | --clustername ] Cluster name (for specific cluster check)
 166 | \t[ -p | --projectname ] Project name (for specific project check, needed for workload checks)
 167 | \t[ -n | --namespacename ] Namespace name (needed for specific workload or pod checks)
 168 | \t[ -w | --workloadname ] Workload name (for specific workload check)
 169 | \t[ -o | --podname ] Pod name (for specific pod check, this makes only sense if you use static pods)
 170 | \t[ -i | --ignore ] Comma-separated list of status(es) to ignore (node and workload check types), list of workload name(s) to ignore (workload check type) or certificate to ignore (local-certs check type)
 171 | \t[ --cpu-warn ] Exit with WARNING status if more than PERCENT of cpu capacity is used (supported check types: node, cluster)
 172 | \t[ --cpu-crit ] Exit with CRITICAL status if more than PERCENT of cpu capacity is used (supported check types: node, cluster)
 173 | \t[ --memory-warn ] Exit with WARNING status if more than PERCENT of mem capacity is used (supported check types: node, cluster)
 174 | \t[ --memory-crit ] Exit with CRITICAL status if more than PERCENT of mem capacity is used (supported check types: node, cluster)
 175 | \t[ --pods-warn ] Exit with WARNING status if more than PERCENT of pod capacity is used (supported check types: node, cluster)
 176 | \t[ --pods-crit ] Exit with CRITICAL status if more than PERCENT of pod capacity is used (supported check types: node, cluster)
 177 | \t[ --cert-warn ] Warning threshold in days to warn before a certificate expires (supported check types: local-certs)
 178 | \t[ -h  | --help ] Help. I need somebody. Help. Not just anybody. Heeeeeelp!
 179 | 
 180 | Check Types:
 181 | \tinfo -> Informs about available clusters and projects and their API ID's. These ID's are needed for specific checks.
 182 | \tcluster -> Checks the current status of all clusters or of a specific cluster (defined with -c clusterid)
 183 | \tnode -> Checks the current status of nodes in all clusters or of nodes in a specific cluster (defined with -c clusterid)
 184 | \tproject -> Checks the current status of all projects or of a specific project (defined with -p projectid)
 185 | \tworkload -> Checks the current status of all or a specific (-w workloadname) workload within a project (-p projectid must be set!)
 186 | \tpod -> Checks the current status of all or a specific (-o podname -n namespace) pod within a project (-p projectid must be set!)
 187 | \tlocal-certs -> Checks the current status of all internal Rancher certificates (e.g. rancher-webhook) in local cluster under the System project (namespace: cattle-system)
 188 | "
 189 | exit ${STATE_UNKNOWN}
 190 | }
 191 | #########################################################################
 192 | # Check for necessary commands
 193 | for cmd in jq curl; do
 194 |  if ! `command -v ${cmd} 1>/dev/null`; then
 195 |    echo "UNKNOWN: ${cmd} does not exist, please check if command exists and PATH is correct"
 196 |    exit ${STATE_UNKNOWN}
 197 |  fi
 198 | done
 199 | #########################################################################
 200 | PARSED_ARGUMENTS=$(getopt -a -n check_rancher2 -o H:U:P:t:c:p:n:w:o:Ssi:h --long apihost:,apiuser:,apipass:,type:,clustername:,projectname:,namespacename:,workloadname:,podname:,secure,selfsigned,ignore:,cpu-warn:,cpu-crit:,memory-warn:,memory-crit:,pods-warn:,pods-crit:,cert-warn: -- "$@")
 201 | VALID_ARGUMENTS=$?
 202 | if [ "$VALID_ARGUMENTS" != "0" ]; then
 203 |   usage
 204 | fi
 205 | #########################################################################
 206 | # Get user-given variables
 207 | eval set -- "$PARSED_ARGUMENTS"
 208 | while :; do
 209 |   case "$1" in
 210 |   -H | --apihost)       apihost=${2}       ; shift 2 ;;
 211 |   -U | --apiuser)       apiuser=${2}       ; shift 2 ;;
 212 |   -P | --apipass)       apipass=${2}       ; shift 2 ;;
 213 |   -t | --type)          type=${2}          ; shift 2 ;;
 214 |   -c | --clustername)   clustername=${2}   ; shift 2 ;;
 215 |   -p | --projectname)   projectname=${2}   ; shift 2 ;;
 216 |   -n | --namespacename) namespacename=${2} ; shift 2 ;;
 217 |   -w | --workloadname)  workloadname=${2}  ; shift 2 ;;
 218 |   -o | --podname)       podname=${2}       ; shift 2 ;;
 219 |   -S | --secure)        proto=https        ; shift ;;
 220 |   -s | --selfsigned)    selfsigned="-k"    ; shift ;;
 221 |   -i | --ignore)        ignore=${2}        ; shift 2 ;;
 222 |   --cpu-warn)           cpu_warn=${2}      ; shift 2 ;;
 223 |   --cpu-crit)           cpu_crit=${2}      ; shift 2 ;;
 224 |   --memory-warn)        memory_warn=${2}   ; shift 2 ;;
 225 |   --memory-crit)        memory_crit=${2}   ; shift 2 ;;
 226 |   --pods-warn)          pods_warn=${2}     ; shift 2 ;;
 227 |   --pods-crit)          pods_crit=${2}     ; shift 2 ;;
 228 |   --cert-warn)          cert_warn=${2}     ; shift 2 ;;
 229 |   --)                   shift; break ;;
 230 |   -h | --help)          usage;;
 231 |   *)      echo "Unexpected option: $1 - this should not happen. Please consult --help for valid options."
 232 | 	  usage;;
 233 |   esac
 234 | done
 235 | #########################################################################
 236 | # Did user obey to usage?
 237 | if [ -z $apihost ]; then
 238 |   echo -e "CHECK_RANCHER2 UNKNOWN - Missing Rancher 2.x API host address"
 239 |   exit ${STATE_UNKNOWN}
 240 | fi
 241 | 
 242 | if [ -z $apiuser ]; then
 243 |   echo -e "CHECK_RANCHER2 UNKNOWN - Missing API user"
 244 |   exit ${STATE_UNKNOWN}
 245 | fi
 246 | 
 247 | if [ -z $apipass ]; then
 248 |   echo -e "CHECK_RANCHER2 UNKNOWN - Missing API password"
 249 |   exit ${STATE_UNKNOWN}
 250 | fi
 251 | 
 252 | if [ -z $type ]; then
 253 |   echo -e "CHECK_RANCHER2 UNKNOWN - Missing check type"
 254 |   exit ${STATE_UNKNOWN}
 255 | fi
 256 | 
 257 | if [[ "$cpu_warn" -gt "$cpu_crit" ]]; then
 258 |   echo -e "CHECK_RANCHER2 UNKNOWN - cpu-warn should be lower than cpu-crit"
 259 |   exit ${STATE_UNKNOWN}
 260 | fi
 261 | 
 262 | if [[ "$memory_warn" -gt "$memory_crit" ]]; then
 263 |   echo -e "CHECK_RANCHER2 UNKNOWN - memory-warn should be lower than memory-crit"
 264 |   exit ${STATE_UNKNOWN}
 265 | fi
 266 | 
 267 | if [[ "$pods_warn" -gt "$pods_crit" ]]; then
 268 |   echo -e "CHECK_RANCHER2 UNKNOWN - pods-warn should be lower than pods-crit"
 269 |   exit ${STATE_UNKNOWN}
 270 | fi
 271 | 
 272 | #########################################################################
 273 | # Base communication check
 274 | apicheck=$(curl -s ${selfsigned} -o /dev/null -w "%{http_code}" -u "${apiuser}:${apipass}" "${proto}://${apihost}/v3/project")
 275 | 
 276 | # Detect failures
 277 | if [[ $apicheck = 000 ]]; then
 278 |   echo -e "CHECK_RANCHER2 UNKNOWN - Invalid host address detected: ${apihost}. Use valid IP or DNS name on which the Rancher 2 API is accessible."
 279 |   exit ${STATE_UNKNOWN}
 280 | elif [[ $apicheck = 301 ]]; then
 281 |   echo -e "CHECK_RANCHER2 UNKNOWN - Redirect detected. Maybe http to https? Use -S parameter."
 282 |   exit ${STATE_UNKNOWN}
 283 | elif [[ $apicheck = 302 ]]; then
 284 |   echo -e "CHECK_RANCHER2 UNKNOWN - Redirect detected. Maybe http to https? Use -S parameter."
 285 |   exit ${STATE_UNKNOWN}
 286 | elif [[ $apicheck = 308 ]]; then
 287 |   echo -e "CHECK_RANCHER2 UNKNOWN - Redirect detected. Maybe http to https? Use -S parameter."
 288 |   exit ${STATE_UNKNOWN}
 289 | elif [[ $apicheck = 401 ]]; then
 290 |   echo -e "CHECK_RANCHER2 WARNING - Authentication failed"
 291 |   exit ${STATE_WARNING}
 292 | elif [[ $apicheck = 403 ]]; then
 293 |   echo -e "CHECK_RANCHER2 CRITICAL - Access to API forbidden"
 294 |   exit ${STATE_CRITICAL}
 295 | elif [[ $apicheck -gt 499 ]]; then
 296 |   echo -e "CHECK_RANCHER2 CRITICAL - API Returned HTTP $apicheck error"
 297 |   exit ${STATE_CRITICAL}
 298 | fi
 299 | 
 300 | #########################################################################
 301 | # Do the checks
 302 | case ${type} in 
 303 | 
 304 | # --- info --- #
 305 | info)
 306 | api_out_clusters=$(curl -s ${selfsigned} -u "${apiuser}:${apipass}" "${proto}://${apihost}/v3/clusters")
 307 | api_out_project=$(curl -s ${selfsigned} -u "${apiuser}:${apipass}" "${proto}://${apihost}/v3/project")
 308 | declare -a cluster_ids=( $(echo "$api_out_clusters" | jq -r '.data[].id') )
 309 | declare -a cluster_names=( $(echo "$api_out_clusters" | jq -r '.data[].name') )
 310 | declare -a project_ids=( $(echo "$api_out_project" | jq -r '.data[].id') )
 311 | declare -a project_names=( $(echo "$api_out_project" | jq -r '.data[].name') )
 312 | 
 313 | #echo ${cluster_ids[*]}     # Enable for debugging
 314 | #echo ${cluster_names[*]}   # Enable for debugging
 315 | #echo ${project_ids[*]}     # Enable for debugging
 316 | #echo ${project_names[*]}   # Enable for debugging
 317 | 
 318 | i=0
 319 | for entry in ${cluster_ids[*]}; do
 320 |   pretty_clusters[$i]="${entry} alias ${cluster_names[$i]} -"
 321 |   let i++
 322 | done
 323 | 
 324 | i=0
 325 | for entry in ${project_ids[*]}; do
 326 |   pretty_projects[$i]="${entry} alias ${project_names[$i]} -"
 327 |   let i++
 328 | done
 329 | 
 330 | 
 331 | echo "CHECK_RANCHER2 OK - Found ${#cluster_ids[*]} clusters: ${pretty_clusters[*]} and ${#project_ids[*]} projects: ${pretty_projects[*]}|'clusters'=${#cluster_ids[*]};;;; 'projects'=${#project_ids[*]};;;;"
 332 | exit ${STATE_OK} 
 333 | ;;
 334 | 
 335 | # --- cluster status check --- #
 336 | cluster)
 337 | if [[ -z $clustername ]]; then
 338 | 
 339 | # Check status of all clusters
 340 |   api_out_clusters=$(curl -s ${selfsigned} -u "${apiuser}:${apipass}" "${proto}://${apihost}/v3/clusters")
 341 |   declare -a cluster_ids=( $(echo "$api_out_clusters" | jq -r '.data[].id') )
 342 |   declare -a cluster_names=( $(echo "$api_out_clusters" | jq -r '.data[].name') )
 343 |   
 344 |   e=0
 345 |   for cluster in ${cluster_ids[*]}; do
 346 |     #echo $cluster # For Debug
 347 |     clusteralias=$(echo "$api_out_clusters" | jq -r '.data[] | select(.id == "'${cluster}'")|.name')
 348 |     declare -a clusterstate=( $(echo "$api_out_clusters" | jq -r '.data[] | select(.id == "'${cluster}'") | .state') )
 349 |     declare -a component=( $(echo "$api_out_clusters" | jq -r '.data[] | select(.id == "'${cluster}'") | .componentStatuses[]?.name') )
 350 |     declare -a healthstatus=( $(echo "$api_out_clusters" | jq -r '.data[] | select(.id == "'${cluster}'") | .componentStatuses[]?.conditions[].status') )
 351 | 
 352 |     if [[ "${clusterstate}" = "provisioning" ]]; then
 353 |         componentwarnings[$e]="cluster ${clusteralias} is in ${clusterstate} state -"
 354 |         clusterwarnings[$e]="${cluster}"
 355 |     elif [[ "${clusterstate}" != "active" ]]; then
 356 |         componenterrors[$e]="cluster ${clusteralias} is in ${clusterstate} state -"
 357 |         clustererrors[$e]="${cluster}"
 358 |     fi
 359 | 
 360 |     c=0
 361 |     for status in ${healthstatus[*]}; do
 362 |       if [[ ${status} != True ]]; then 
 363 |         componenterrors[$e]="${component[$c]} in cluster ${clusteralias} is not healthy -"
 364 |         clustererrors[$e]="${cluster}"
 365 |       fi
 366 |       #echo "${component[$c]} ${status}" # For Debug
 367 |       let c++
 368 |       let e++
 369 |     done
 370 |   done
 371 | 
 372 |   clustererrorcount=$(echo ${clustererrors[*]} | tr ' ' '\n' | sort -u | tr '\n' ' ' | wc -w)
 373 |   clusterwarningcount=$(echo ${clusterwarnings[*]} | tr ' ' '\n' | sort -u | tr '\n' ' ' | wc -w)
 374 | 
 375 |   if [[ ${#componenterrors[*]} -gt 0 ]]; then
 376 |     echo "CHECK_RANCHER2 CRITICAL - ${componenterrors[*]}|'clusters_total'=${#cluster_ids[*]};;;; 'clusters_errors'=${clustererrorcount};;;; 'clusters_warning'=${clusterwarningcount};;;;"
 377 |     exit ${STATE_CRITICAL}
 378 |   elif [[ ${#componentwarnings[*]} -gt 0 ]]; then
 379 |     echo "CHECK_RANCHER2 WARNING - ${componentwarnings[*]}|'clusters_total'=${#cluster_ids[*]};;;; 'clusters_errors'=${clustererrorcount};;;; 'clusters_warning'=${clusterwarningcount};;;;"
 380 |     exit ${STATE_WARNING}
 381 |   else
 382 |     echo "CHECK_RANCHER2 OK - All clusters (${#cluster_ids[*]}) are healthy|'clusters_total'=${#cluster_ids[*]};;;; 'clusters_errors'=${#componenterrors[*]};;;; 'clusters_warning'=${clusterwarningcount};;;;"
 383 |     exit ${STATE_OK}
 384 |   fi
 385 | 
 386 | else
 387 |  
 388 | # Check status of a single cluster 
 389 |   api_out_single_cluster=$(curl -s ${selfsigned} -u "${apiuser}:${apipass}" "${proto}://${apihost}/v3/clusters/${clustername}")
 390 | 
 391 |   # Check if that given cluster name exists
 392 |   if [[ -n $(echo "$api_out_single_cluster" | grep -i "NotFound") ]]; then
 393 |     echo "CHECK_RANCHER2 CRITICAL - Cluster $clustername not found. Hint: Use '-t info' to identify cluster and project names."
 394 |     exit ${STATE_CRITICAL}
 395 |   fi
 396 | 
 397 |   clusteralias=$(echo "$api_out_single_cluster" | jq -r '.name')
 398 |   clusterstate=$(echo "$api_out_single_cluster" | jq -r '.state')
 399 |   k8sversion=$(echo "$api_out_single_cluster" | jq -r '.version.gitVersion')
 400 |   declare -a component=( $(echo "$api_out_single_cluster" | jq -r '.componentStatuses[]?.name') )
 401 |   declare -a healthstatus=( $(echo "$api_out_single_cluster" | jq -r '.componentStatuses[]?.conditions[].status') )
 402 | 
 403 |   # capacity
 404 |   declare -a capacity_cpu=( $(echo "$api_out_single_cluster" | jq -r '.capacity.cpu') )
 405 |   declare -a capacity_memory=( $(echo "$api_out_single_cluster" | jq -r '.capacity.memory') )
 406 |   declare -a capacity_pods=( $(echo "$api_out_single_cluster" | jq -r '.capacity.pods') )
 407 | 
 408 |   # requested
 409 |   declare -a requested_cpu=( $(echo "$api_out_single_cluster" | jq -r '.requested.cpu') )
 410 |   declare -a requested_memory=( $(echo "$api_out_single_cluster" | jq -r '.requested.memory') )
 411 |   declare -a requested_pods=( $(echo "$api_out_single_cluster" | jq -r '.requested.pods') )
 412 | 
 413 |   # split capacity_cpu
 414 |   capacity_cpu_unit=( $(echo "${capacity_cpu}" | sed 's/^[0-9]*//g') )
 415 |   capacity_cpu_count=( $(echo "${capacity_cpu}" | sed 's/[a-zA-Z]*$//g') )
 416 | 
 417 |   # convert capacity_cpu depending on unit
 418 |   capacity_cpu=$(convertCpu ${capacity_cpu_count} ${capacity_cpu_unit})
 419 | 
 420 |   # split capacity_memory
 421 |   declare -a capacity_memory_unit=( $(echo "${capacity_memory}" | sed 's/^[0-9]*//g') )
 422 |   declare -a capacity_memory_count=( $(echo "${capacity_memory}" | sed 's/[a-zA-Z]*$//g') )
 423 | 
 424 |   # convert capacity_memory depending on unit
 425 |   capacity_memory=$(convertMemory ${capacity_memory_count} ${capacity_memory_unit})
 426 | 
 427 |   # split capacity_pods
 428 |   declare -a capacity_pods_unit=( $(echo "${capacity_pods}" | sed 's/^[0-9]*//g') )
 429 |   declare -a capacity_pods_count=( $(echo "${capacity_pods}" | sed 's/[a-zA-Z]*$//g') )
 430 | 
 431 |   # convert capacity_pods depending on unit
 432 |   capacity_pods=$(convertPods ${capacity_pods_count} ${capacity_pods_unit})
 433 | 
 434 |   # split requested_cpu
 435 |   requested_cpu_unit=( $(echo "${requested_cpu}" | sed 's/^[0-9]*//g') )
 436 |   requested_cpu_count=( $(echo "${requested_cpu}" | sed 's/[a-zA-Z]*$//g') )
 437 | 
 438 |   # convert requested_cpu depending on unit
 439 |   requested_cpu=$(convertCpu ${requested_cpu_count} ${requested_cpu_unit})
 440 | 
 441 |   # split reqested_memory
 442 |   declare -a requested_memory_unit=( $(echo "${requested_memory}" | sed 's/^[0-9]*//g') )
 443 |   declare -a requested_memory_count=( $(echo "${requested_memory}" | sed 's/[a-zA-Z]*$//g') )
 444 | 
 445 |   # convert requested_memory depending on unit
 446 |   requested_memory=$(convertMemory ${requested_memory_count} ${requested_memory_unit})
 447 | 
 448 |   # split requested_pods
 449 |   declare -a requested_pods_unit=( $(echo "${requested_pods}" | sed 's/^[0-9]*//g') )
 450 |   declare -a requested_pods_count=( $(echo "${requested_pods}" | sed 's/[a-zA-Z]*$//g') )
 451 | 
 452 |   # convert requested_pods depending on unit
 453 |   requested_pods=$(convertPods ${requested_pods_count} ${requested_pods_unit})
 454 | 
 455 |   if [[ "${clusterstate}" != "active" ]]; then
 456 |       componenterrors+="cluster ${clusteralias} is in ${clusterstate} state -"
 457 |   fi
 458 |   
 459 |   for status in ${healthstatus[*]}; do
 460 |     if [[ ${status} != True ]]; then
 461 |       componenterrors+="${component[$i]} is not healthy -"
 462 |     fi
 463 |   done
 464 | 
 465 |   # usage
 466 |   usage_cpu=$(( 100 * $requested_cpu/$capacity_cpu ))
 467 |   usage_memory=$(( 100 * $requested_memory/$capacity_memory ))
 468 |   usage_pods=$(( 100 * $requested_pods/$capacity_pods ))
 469 | 
 470 |   # threshold checks
 471 |   # cpu
 472 |   if [ ! -z $cpu_warn ] || [ ! -z $cpu_crit ]; then
 473 |     if [[ "$usage_cpu" -gt "$cpu_crit" ]]; then
 474 |       resourceerrors+="CPU usage ${usage_cpu}% > threshold of ${cpu_crit}% "
 475 |     elif [[ "$usage_cpu" -gt "$cpu_warn" ]]; then
 476 |       resourceerrors+="CPU usage ${usage_cpu}% > threshold of ${cpu_warn}% "
 477 |     fi
 478 |   fi
 479 | 
 480 |   # memory
 481 |   if [ ! -z $memory_warn ] || [ ! -z $memory_crit ]; then
 482 |     if [[ "$usage_memory" -gt "$memory_crit" ]]; then
 483 |       resourceerrors+="MEMORY usage ${usage_memory}% > threshold of ${memory_crit}% "
 484 |     elif [[ "$usage_memory" -gt "$memory_warn" ]]; then
 485 |       resourceerrors+="MEMORY usage ${usage_memory}% > threshold of ${memory_warn}% "
 486 |     fi
 487 |   fi
 488 | 
 489 |   # pods
 490 |   if [ ! -z $pods_warn ] || [ ! -z $pods_crit ]; then
 491 |     if [[ "$usage_pods" -gt "$pods_crit" ]]; then
 492 |       resourceerrors+="PODS Usage ${usage_pods} > threshold of ${pods_crit} "
 493 |     elif [[ "$usage_pods" -gt "$pods_warn" ]]; then
 494 |       resourceerrors+="PODS Usage ${usage_pods} > threshold of ${pods_warn} "
 495 |     fi
 496 |   fi
 497 | 
 498 |   perf_output="'component_errors'=${#componenterrors[*]};;;; 'cpu'=${requested_cpu};;;;${capacity_cpu} 'memory'=${requested_memory}B;;;0;${capacity_memory} 'pods'=${requested_pods};;;;${capacity_pods} 'usage_cpu'=${usage_cpu}%;${cpu_warn};${cpu_crit};0;100 'usage_memory'=${usage_memory}%;${memory_warn};${memory_crit};0;100 'usage_pods'=${usage_pods}%;${pods_warn};${pods_crit};0;100"
 499 | 
 500 |   if [[ ${#componenterrors[*]} -gt 0 && ! -z ${resourceerrors} ]]; then
 501 |     echo "CHECK_RANCHER2 CRITICAL - Cluster $clusteralias has resource problems and component errors: ${resourceerrors} ${componenterrors[*]}|'cluster_healthy'=0;;;; ${perf_output}"
 502 |     exit ${STATE_CRITICAL}
 503 |   elif [[ ${#componenterrors[*]} -gt 0 ]]; then
 504 |     echo "CHECK_RANCHER2 CRITICAL - Cluster $clusteralias: ${componenterrors[*]}|'cluster_healthy'=0;;;; ${perf_output}"
 505 |     exit ${STATE_CRITICAL}
 506 |   elif [[ ! -z ${resourceerrors} ]]; then
 507 |     echo "CHECK_RANCHER2 CRITICAL - Cluster $clusteralias has resource problems: ${resourceerrors}|'cluster_healthy'=0;;;; ${perf_output}"
 508 |     exit ${STATE_CRITICAL}
 509 |   else
 510 |     echo "CHECK_RANCHER2 OK - Cluster $clusteralias ($k8sversion) is healthy|'cluster_healthy'=1;;;; ${perf_output}"
 511 |     exit ${STATE_OK}
 512 |   fi
 513 | 
 514 | fi
 515 | ;;
 516 | 
 517 | # --- node status check --- #
 518 | node)
 519 | if [[ -z $clustername ]]; then
 520 | 
 521 | # Check status of all nodes in all clusters
 522 |   api_out_nodes=$(curl -s ${selfsigned} -u "${apiuser}:${apipass}" "${proto}://${apihost}/v3/nodes")
 523 |   declare -a node_names=( $(echo "$api_out_nodes" | jq -r '.data[].nodeName') )
 524 |   declare -a node_status=( $(echo "$api_out_nodes" | jq -r '.data[].state') )
 525 |   declare -a node_cluster_member=( $(echo "$api_out_nodes" | jq -r '.data[].clusterId') )
 526 |   declare -a node_diskpressure=( $(echo "$api_out_nodes" | jq -r '.data[].conditions[] | select(.type=="DiskPressure").status' | awk '/True/ {print FNR}' ) )
 527 |   declare -a node_memorypressure=( $(echo "$api_out_nodes" | jq -r '.data[].conditions[] | select(.type=="MemoryPressure").status' | awk '/True/ {print FNR}' ) )
 528 |   declare -a node_kubeletready=( $(echo "$api_out_nodes" | jq -r '.data[].conditions[] | select(.type=="Ready").status' | awk '/False/ {print FNR}' ) )
 529 |   declare -a node_network=( $(echo "$api_out_nodes" | jq -r '.data[].conditions[] | select(.type=="NetworkUnavailable").status' | awk '/True/ {print FNR}' ) )
 530 | 
 531 |   # node capacity
 532 |   declare -a node_capacity_cpu=( $(echo "$api_out_nodes" | jq -r '.data[].capacity.cpu' ) )
 533 |   declare -a node_capacity_memory=( $(echo "$api_out_nodes" | jq -r '.data[].capacity.memory' ) )
 534 |   declare -a node_capacity_pods=( $(echo "$api_out_nodes" | jq -r '.data[].capacity.pods' ) )
 535 | 
 536 |   # node requested
 537 |   declare -a node_requested_cpu=( $(echo "$api_out_nodes" | jq -r '.data[].requested.cpu' ) )
 538 |   declare -a node_requested_memory=( $(echo "$api_out_nodes" | jq -r '.data[].requested.memory' ) )
 539 |   declare -a node_requested_pods=( $(echo "$api_out_nodes" | jq -r '.data[].requested.pods' ) )
 540 | 
 541 | 
 542 |   # Check node status (user controlled)
 543 |   i=0
 544 |   for node in ${node_names[*]}; do
 545 |     for status in ${node_status[$i]}; do
 546 |       if [[ ${status} != active ]]; then
 547 |         if [[ -n $(echo ${ignore} | grep -i ${status}) ]]; then
 548 |           nodeignored[$i]="${node} in cluster ${node_cluster_member[$i]} is ${node_status[$i]} but ignored \n"
 549 |         else
 550 |           nodeerrors[$i]="${node} in cluster ${node_cluster_member[$i]} is ${node_status[$i]} \n"
 551 |         fi
 552 |       fi
 553 |     done
 554 |   let i++
 555 |   done
 556 | 
 557 |   # Handle node pressure situations and other conditions (Kubernetes controlled)
 558 |   if [[ ${#node_diskpressure[*]} -gt 0 ]]; then
 559 |     for n in ${node_diskpressure[*]}; do
 560 |       hostid=$(( $n - 1 ))
 561 |       nodeerrors+=("${node_names[$hostid]} in cluster ${node_cluster_member[$hostid]} has Disk Pressure \n")
 562 |     done
 563 |   fi
 564 | 
 565 |   if [[ ${#node_memorypressure[*]} -gt 0 ]]; then
 566 |     for n in ${node_memorypressure[*]}; do
 567 |       hostid=$(( $n - 1 ))
 568 |       nodeerrors+=("${node_names[$hostid]} in cluster ${node_cluster_member[$hostid]} has Memory Pressure \n")
 569 |     done
 570 |   fi
 571 | 
 572 |   if [[ ${#node_kubeletready[*]} -gt 0 ]]; then
 573 |     for n in ${node_kubeletready[*]}; do
 574 |       hostid=$(( $n - 1 ))
 575 |       nodeerrors+=("Kubelet on node ${node_names[$hostid]} in cluster ${node_cluster_member[$hostid]} is not ready \n")
 576 |     done
 577 |   fi
 578 | 
 579 |   if [[ ${#node_network[*]} -gt 0 ]]; then
 580 |     for n in ${node_network[*]}; do
 581 |       hostid=$(( $n - 1 ))
 582 |       nodeerrors+=("Network on node ${node_names[$hostid]} in cluster ${node_cluster_member[$hostid]} is unavailable \n")
 583 |     done
 584 |   fi
 585 | 
 586 |   # calculate total capacities
 587 |   nodes_capacity_cpu_total=0
 588 |   for capacity_cpu in ${node_capacity_cpu[@]}; do
 589 |     # split capacity_cpu
 590 |     capacity_cpu_unit=( $(echo "${capacity_cpu}" | sed 's/^[0-9]*//g') )
 591 |     capacity_cpu_count=( $(echo "${capacity_cpu}" | sed 's/[a-zA-Z]*$//g') )
 592 | 
 593 |     # convert capacity_cpu depending on unit
 594 |     capacity_cpu=$(convertCpu ${capacity_cpu_count} ${capacity_cpu_unit})
 595 | 
 596 |     let nodes_capacity_cpu_total+=$capacity_cpu
 597 |   done
 598 | 
 599 |   nodes_capacity_memory_total=0
 600 |   for capacity_memory in ${node_capacity_memory[@]}; do
 601 |     # split capacity_memory
 602 |     capacity_memory_unit=( $(echo "${capacity_memory}" | sed 's/^[0-9]*//g') )
 603 |     capacity_memory_count=( $(echo "${capacity_memory}" | sed 's/[a-zA-Z]*$//g') )
 604 | 
 605 |     # convert capacity_memory depending on unit
 606 |     capacity_memory=$(convertMemory ${capacity_memory_count} ${capacity_memory_unit})
 607 | 
 608 |     let nodes_capacity_memory_total+=$capacity_memory
 609 |   done
 610 | 
 611 |   nodes_capacity_pods_total=0
 612 |   for capacity_pods in ${node_capacity_pods[@]}; do
 613 |     # split capacity_pods
 614 |     capacity_pods_unit=( $(echo "${capacity_pods}" | sed 's/^[0-9]*//g') )
 615 |     capacity_pods_count=( $(echo "${capacity_pods}" | sed 's/[a-zA-Z]*$//g') )
 616 | 
 617 |     # convert capacity_pods depending on unit
 618 |     capacity_pods=$(convertPods ${capacity_pods_count} ${capacity_pods_unit})
 619 |      
 620 |     let nodes_capacity_pods_total+=$capacity_pods
 621 |   done
 622 | 
 623 |   # calculate total requested
 624 |   nodes_requested_cpu_total=0
 625 |   for requested_cpu in ${node_requested_cpu[@]}; do
 626 |     # split requested_cpu
 627 |     requested_cpu_unit=( $(echo "${requested_cpu}" | sed 's/^[0-9]*//g') )
 628 |     requested_cpu_count=( $(echo "${requested_cpu}" | sed 's/[a-zA-Z]*$//g') )
 629 | 
 630 |     # convert requested_cpu depending on unit
 631 |     requested_cpu=$(convertCpu ${requested_cpu_count} ${requested_cpu_unit})
 632 | 
 633 |     let nodes_requested_cpu_total+=$requested_cpu
 634 |   done
 635 | 
 636 |   nodes_requested_memory_total=0
 637 |   for requested_memory in ${node_requested_memory[@]}; do
 638 |     # split requested_memory
 639 |     requested_memory_unit=( $(echo "${requested_memory}" | sed 's/^[0-9]*//g') )
 640 |     requested_memory_count=( $(echo "${requested_memory}" | sed 's/[a-zA-Z]*$//g') )
 641 | 
 642 |     # convert requested_memory depending on unit
 643 |     requested_memory=$(convertMemory ${requested_memory_count} ${requested_memory_unit})
 644 | 
 645 |     let nodes_requested_memory_total+=$requested_memory
 646 |   done
 647 | 
 648 |   nodes_requested_pods_total=0
 649 |   for requested_pods in ${node_requested_pods[@]}; do
 650 |     # split requested_pods
 651 |     requested_pods_unit=( $(echo "${requested_pods}" | sed 's/^[0-9]*//g') )
 652 |     requested_pods_count=( $(echo "${requested_pods}" | sed 's/[a-zA-Z]*$//g') )
 653 | 
 654 |     # convert requested_pods depending on unit
 655 |     requested_pods=$(convertPods ${requested_pods_count} ${requested_pods_unit})
 656 | 
 657 |     let nodes_requested_pods_total+=$requested_pods
 658 |   done
 659 | 
 660 |   perf_output="'nodes_total'=${#node_names[*]};;;; 'node_errors'=${#nodeerrors[*]};;;; 'node_ignored'=${#nodeignored[*]};;;; 'nodes_cpu_total'=${nodes_requested_cpu_total};;;0;${nodes_capacity_cpu_total} 'nodes_memory_total'=${nodes_requested_memory_total}B;;;0;${nodes_capacity_memory_total} 'nodes_pods_total'=${nodes_requested_pods_total};;;0;${nodes_capacity_pods_total}"
 661 |   
 662 |   if [[ ${#nodeerrors[*]} -gt 0 ]]; then
 663 |     echo "CHECK_RANCHER2 CRITICAL - ${#nodeerrors[*]} abnormal node states: ${nodeerrors[*]}${nodeignored[*]}|${perf_output}"
 664 |     exit ${STATE_CRITICAL}
 665 |   elif [[ ${#nodeignored[*]} -gt 0 ]]; then
 666 |     echo "CHECK_RANCHER2 OK - All nodes OK - Info: ${#nodeignored[*]} node errors ignored: ${nodeerrors[*]}${nodeignored[*]}|${perf_output}"
 667 |     exit ${STATE_OK}
 668 |   else
 669 |     echo "CHECK_RANCHER2 OK - All ${#node_names[*]} nodes are active|${perf_output}"
 670 |     exit ${STATE_OK}
 671 |   fi
 672 | 
 673 | else
 674 | 
 675 | # Check status of all nodes in a specific cluster
 676 |   api_out_nodes=$(curl -s ${selfsigned} -u "${apiuser}:${apipass}" "${proto}://${apihost}/v3/nodes/?clusterId=${clustername}")
 677 |   declare -a node_diskpressure=( $(echo "$api_out_nodes" | jq -r '.data[].conditions[] | select(.type=="DiskPressure").status' | awk '/True/ {print FNR}' ) )
 678 |   declare -a node_memorypressure=( $(echo "$api_out_nodes" | jq -r '.data[].conditions[] | select(.type=="MemoryPressure").status' | awk '/True/ {print FNR}' ) )
 679 |   declare -a node_kubeletready=( $(echo "$api_out_nodes" | jq -r '.data[].conditions[] | select(.type=="Ready").status' | awk '/False/ {print FNR}' ) )
 680 |   declare -a node_network=( $(echo "$api_out_nodes" | jq -r '.data[].conditions[] | select(.type=="NetworkUnavailable").status' | awk '/True/ {print FNR}' ) )
 681 | 
 682 |   # node capacity
 683 |   declare -a node_capacity_cpu=( $(echo "$api_out_nodes" | jq -r '.data[].capacity.cpu' ) )
 684 |   declare -a node_capacity_memory=( $(echo "$api_out_nodes" | jq -r '.data[].capacity.memory' ) )
 685 |   declare -a node_capacity_pods=( $(echo "$api_out_nodes" | jq -r '.data[].capacity.pods' ) )
 686 | 
 687 |   # node requested
 688 |   declare -a node_requested_cpu=( $(echo "$api_out_nodes" | jq -r '.data[].requested.cpu' ) )
 689 |   declare -a node_requested_memory=( $(echo "$api_out_nodes" | jq -r '.data[].requested.memory' ) )
 690 |   declare -a node_requested_pods=( $(echo "$api_out_nodes" | jq -r '.data[].requested.pods' ) )
 691 | 
 692 |   # Check if that given cluster name exists
 693 |   if [[ -n $(echo "$api_out_nodes" | grep -i "NotFound") ]]; then
 694 |     echo "CHECK_RANCHER2 CRITICAL - Cluster $clustername not found. Hint: Use '-t info' to identify cluster and project names."; exit ${STATE_CRITICAL}
 695 |   fi
 696 | 
 697 |   declare -a node_names=( $(echo "$api_out_nodes" | jq -r '.data[].nodeName') )
 698 |   declare -a node_status=( $(echo "$api_out_nodes" | jq -r '.data[].state') )
 699 | 
 700 |   # Check node status (user controlled)
 701 |   i=0
 702 |   for node in ${node_names[*]}; do
 703 |     for status in ${node_status[$i]}; do
 704 |       if [[ ${status} != active ]]; then
 705 |         if [[ -n $(echo ${ignore} | grep -i ${status}) ]]; then
 706 |           nodeignored[$i]="${node} in cluster ${node_cluster_member[$i]} is ${node_status[$i]} but ignored \n"
 707 |         else
 708 |           nodeerrors[$i]="${node} in cluster ${clustername} is ${node_status[$i]} \n"
 709 |         fi
 710 |       fi
 711 |     done
 712 |   let i++
 713 |   done
 714 |     
 715 |   # check capacities per node 
 716 |   i=0
 717 |   for node in ${node_names[*]}; do
 718 |     # split node_capacity_cpu
 719 |     node_capacity_cpu_unit=( $(echo "${node_capacity_cpu[$i]}" | sed 's/^[0-9]*//g') )
 720 |     node_capacity_cpu_count=( $(echo "${node_capacity_cpu[$i]}" | sed 's/[a-zA-Z]*$//g') )
 721 | 
 722 |     # convert node_capacity_cpu depding on unit
 723 |     capacity_cpu=$(convertCpu ${node_capacity_cpu_count} ${node_capacity_cpu_unit})
 724 | 
 725 |     # split node_capacity_memory
 726 |     node_capacity_memory_unit=( $(echo "${node_capacity_memory[$i]}" | sed 's/^[0-9]*//g') )
 727 |     node_capacity_memory_count=( $(echo "${node_capacity_memory[$i]}" | sed 's/[a-zA-Z]*$//g') )
 728 | 
 729 |     # convert node_capacity_memory depnding on unit
 730 |     capacity_memory=$(convertMemory ${node_capacity_memory_count} ${node_capacity_memory_unit})
 731 | 
 732 |     # split node_capacity_pods
 733 |     node_capacity_pods_unit=( $(echo "${node_capacity_pods[$i]}" | sed 's/^[0-9]*//g') )
 734 |     node_capacity_pods_count=( $(echo "${node_capacity_pods[$i]}" | sed 's/[a-zA-Z]*$//g') )
 735 | 
 736 |     # convert node_capacity_pods depending on unit
 737 |     capacity_pods=$(convertPods ${node_capacity_pods_count} ${node_capacity_pods_unit})
 738 | 
 739 |     # split node_requested_cpu
 740 |     node_requested_cpu_unit=( $(echo "${node_requested_cpu[$i]}" | sed 's/^[0-9]*//g') )
 741 |     node_requested_cpu_count=( $(echo "${node_requested_cpu[$i]}" | sed 's/[a-zA-Z]*$//g') )
 742 | 
 743 |     # convert node_requested_cpu depending on unit 
 744 |     requested_cpu=$(convertCpu ${node_requested_cpu_count} ${node_requested_cpu_unit})
 745 | 
 746 |     # split node_requested_memory
 747 |     node_requested_memory_unit=( $(echo "${node_requested_memory[$i]}" | sed 's/^[0-9]*//g') )
 748 |     node_requested_memory_count=( $(echo "${node_requested_memory[$i]}" | sed 's/[a-zA-Z]*$//g') )
 749 | 
 750 |     # convert node_requested_memory depending on unit
 751 |     requested_memory=$(convertMemory ${node_requested_memory_count} ${node_requested_memory_unit})
 752 | 
 753 |     # split node_requested_pods
 754 |     node_requested_pods_unit=( $(echo "${node_requested_pods[$i]}" | sed 's/^[0-9]*//g') )
 755 |     node_requested_pods_count=( $(echo "${node_requested_pods[$i]}" | sed 's/[a-zA-Z]*$//g') )
 756 | 
 757 |     # convert node_requested_pods depending on unit
 758 |     requested_pods=$(convertPods ${node_requested_pods_count} ${node_requested_pods_unit})
 759 | 
 760 |     # usage
 761 |     usage_cpu=$(( 100 * $requested_cpu/$capacity_cpu ))
 762 |     usage_memory=$(( 100 * $requested_memory/$capacity_memory ))
 763 |     usage_pods=$(( 100 * $requested_pods/$capacity_pods ))
 764 | 
 765 |   node_perf_output+="${node}_cpu=${requested_cpu};;;0;${capacity_cpu} ${node}_memory=${requested_memory}B;;;0;${capacity_memory} ${node}_pods=${requested_pods};;;0;${capacity_pods} "
 766 | 
 767 |   # threshold checks
 768 |   # cpu
 769 |   if [ ! -z $cpu_warn ] || [ ! -z $cpu_crit ]; then
 770 |     if [[ "$usage_cpu" -gt "$cpu_crit" ]]; then
 771 |       resourceerrors+="${node} - CPU usage ${usage_cpu} higher than crit threshold of ${cpu_crit} \n"
 772 |     elif [[ "$usage_cpu" -gt "$cpu_warn" ]]; then
 773 |       resourceerrors+="${node} - CPU usage ${usage_cpu} higher than warn threshold of ${cpu_warn} \n"
 774 |     fi
 775 |   fi
 776 | 
 777 |   # memory
 778 |   if [ ! -z $memory_warn ] || [ ! -z $memory_crit ]; then
 779 |     if [[ "$usage_memory" -gt "$memory_crit" ]]; then
 780 |       resourceerrors+="${node} - MEMORY usage ${usage_memory} higher than crit threshold of ${memory_crit} \n"
 781 |     elif [[ "$usage_memory" -gt "$memory_warn" ]]; then
 782 |       resourceerrors+="${node} - MEMORY usage ${usage_memory} higher than warn threshold of ${memory_warn} \n"
 783 |     fi
 784 |   fi
 785 | 
 786 |   # pods
 787 |   if [ ! -z $pods_warn ] || [ ! -z $pods_crit ]; then
 788 |     if [[ "$usage_pods" -gt "$pods_crit" ]]; then
 789 |       resourceerrors+="${node} - PODS Usage ${usage_pods} higher than crit threshold of ${pods_crit} \n"
 790 |     elif [[ "$usage_pods" -gt "$pods_warn" ]]; then
 791 |       resourceerrors+="${node} - PODS Usage ${usage_pods} higher than warn threshold of ${pods_warn} \n"
 792 |     fi
 793 |   fi
 794 | 
 795 |   let i++
 796 |   done
 797 | 
 798 |   # Handle node pressure situations and other conditions (Kubernetes controlled)
 799 |   if [[ ${#node_diskpressure[*]} -gt 0 ]]; then
 800 |     for n in ${node_diskpressure[*]}; do
 801 |       hostid=$(( $n - 1 ))
 802 |       nodeerrors+=("${node_names[$hostid]} in cluster ${node_cluster_member[$hostid]} has Disk Pressure \n")
 803 |     done
 804 |   fi
 805 | 
 806 |   if [[ ${#node_memorypressure[*]} -gt 0 ]]; then
 807 |     for n in ${node_memorypressure[*]}; do
 808 |       hostid=$(( $n - 1 ))
 809 |       nodeerrors+=("${node_names[$hostid]} in cluster ${node_cluster_member[$hostid]} has Memory Pressure \n")
 810 |     done
 811 |   fi
 812 | 
 813 |   if [[ ${#node_kubeletready[*]} -gt 0 ]]; then
 814 |     for n in ${node_kubeletready[*]}; do
 815 |       hostid=$(( $n - 1 ))
 816 |       nodeerrors+=("Kubelet on node ${node_names[$hostid]} in cluster ${node_cluster_member[$hostid]} is not ready \n")
 817 |     done
 818 |   fi
 819 | 
 820 |   if [[ ${#node_network[*]} -gt 0 ]]; then
 821 |     for n in ${node_network[*]}; do
 822 |       hostid=$(( $n - 1 ))
 823 |       nodeerrors+=("Network on node ${node_names[$hostid]} in cluster ${node_cluster_member[$hostid]} is unavailable \n")
 824 |     done
 825 |   fi
 826 | 
 827 |   # calculate total capacities
 828 |   nodes_capacity_cpu_total=0
 829 |   for capacity_cpu in ${node_capacity_cpu[@]}; do
 830 |     # split capacity_cpu
 831 |     capacity_cpu_unit=( $(echo "${capacity_cpu}" | sed 's/^[0-9]*//g') )
 832 |     capacity_cpu_count=( $(echo "${capacity_cpu}" | sed 's/[a-zA-Z]*$//g') )
 833 |     
 834 |     # convert capacity_cpu depending on unit
 835 |     capacity_cpu=$(convertCpu ${capacity_cpu_count} ${capacity_cpu_unit})
 836 |     
 837 |     let nodes_capacity_cpu_total+=$capacity_cpu
 838 |   done
 839 | 
 840 |   nodes_capacity_memory_total=0
 841 |   for capacity_memory in ${node_capacity_memory[@]}; do
 842 |     # split capacity_memory
 843 |     capacity_memory_unit=( $(echo "${capacity_memory}" | sed 's/^[0-9]*//g') )
 844 |     capacity_memory_count=( $(echo "${capacity_memory}" | sed 's/[a-zA-Z]*$//g') )
 845 |     
 846 |     # convert capacity_memory depending on unit
 847 |     capacity_memory=$(convertMemory ${capacity_memory_count} ${capacity_memory_unit})
 848 |     
 849 |     let nodes_capacity_memory_total+=$capacity_memory
 850 |   done
 851 | 
 852 |   nodes_capacity_pods_total=0
 853 |   for capacity_pods in ${node_capacity_pods[@]}; do
 854 |     # split capacity_pods
 855 |     capacity_pods_unit=( $(echo "${capacity_pods}" | sed 's/^[0-9]*//g') )
 856 |     capacity_pods_count=( $(echo "${capacity_pods}" | sed 's/[a-zA-Z]*$//g') )
 857 |     
 858 |     # convert capacity_pods depending on unit
 859 |     capacity_pods=$(convertPods ${capacity_pods_count} ${capacity_pods_unit})
 860 |     
 861 |     let nodes_capacity_pods_total+=$capacity_pods
 862 |   done
 863 | 
 864 |   # calculate total requested
 865 |   nodes_requested_cpu_total=0
 866 |   for requested_cpu in ${node_requested_cpu[@]}; do
 867 |     # split requested_cpu
 868 |     requested_cpu_unit=( $(echo "${requested_cpu}" | sed 's/^[0-9]*//g') )
 869 |     requested_cpu_count=( $(echo "${requested_cpu}" | sed 's/[a-zA-Z]*$//g') )
 870 |     
 871 |     # convert requested_cpu depending on unit
 872 |     requested_cpu=$(convertCpu ${requested_cpu_count} ${requested_cpu_unit})
 873 |     
 874 |     let nodes_requested_cpu_total+=$requested_cpu
 875 |   done
 876 | 
 877 |   nodes_requested_memory_total=0
 878 |   for requested_memory in ${node_requested_memory[@]}; do
 879 |     # split requested_memory
 880 |     requested_memory_unit=( $(echo "${requested_memory}" | sed 's/^[0-9]*//g') )
 881 |     requested_memory_count=( $(echo "${requested_memory}" | sed 's/[a-zA-Z]*$//g') )
 882 |     
 883 |     # convert requested_memory depending on unit
 884 |     requested_memory=$(convertMemory ${requested_memory_count} ${requested_memory_unit})
 885 | 
 886 |     let nodes_requested_memory_total+=$requested_memory
 887 |   done
 888 | 
 889 |   nodes_requested_pods_total=0
 890 |   for requested_pods in ${node_requested_pods[@]}; do
 891 |     # split requested_pods
 892 |     requested_pods_unit=( $(echo "${requested_pods}" | sed 's/^[0-9]*//g') )
 893 |     requested_pods_count=( $(echo "${requested_pods}" | sed 's/[a-zA-Z]*$//g') )
 894 | 
 895 |     # convert requested_pods depending on unit
 896 |     requested_pods=$(convertPods ${requested_pods_count} ${requested_pods_unit})
 897 | 
 898 |     let nodes_requested_pods_total+=$requested_pods
 899 |   done
 900 | 
 901 |   perf_output="'nodes_total'=${#node_names[*]};;;; 'node_errors'=${#nodeerrors[*]};;;; 'node_ignored'=${#nodeignored[*]};;;; 'nodes_cpu_total'=${nodes_requested_cpu_total};;;0;${nodes_capacity_cpu_total} 'nodes_memory_total'=${nodes_requested_memory_total}B;;;0;${nodes_capacity_memory_total} 'nodes_pods_total'=${nodes_requested_pods_total};;;0;${nodes_capacity_pods_total} ${node_perf_output}"
 902 | 
 903 |   if [[ ${#nodeerrors[*]} -gt 0 && ! -z ${resourceerrors} ]]; then
 904 |     echo "CHECK_RANCHER2 CRITICAL - ${#nodeerrors[*]} abnormal node states and resource problems: ${nodeerrors[*]}${resourceerrors}${nodeignored[*]}|${perf_output}"
 905 |     exit ${STATE_CRITICAL}
 906 |   elif [[ ${#nodeerrors[*]} -gt 0 ]]; then
 907 |     echo "CHECK_RANCHER2 CRITICAL - ${#nodeerrors[*]} abnormal node states: ${nodeerrors[*]}${resourceerrors}${nodeignored[*]}|${perf_output}"
 908 |     exit ${STATE_CRITICAL}
 909 |   elif [[ ! -z ${resourceerrors} ]]; then
 910 |     echo "CHECK_RANCHER2 CRITICAL - Nodes with resource problems: ${nodeerrors[*]}${resourceerrors}${nodeignored[*]}|${perf_output}"
 911 |     exit ${STATE_CRITICAL}
 912 |   elif [[ ${#nodeignored[*]} -gt 0 ]]; then
 913 |     echo "CHECK_RANCHER2 OK - All nodes OK - Info: ${nodeignored[*]}|${perf_output}"
 914 |     exit ${STATE_OK}
 915 |   else
 916 |     echo "CHECK_RANCHER2 OK - All ${#node_names[*]} nodes are active|${perf_output}"
 917 |     exit ${STATE_OK}
 918 |   fi
 919 | 
 920 | fi
 921 | ;;
 922 | 
 923 | 
 924 | # --- project status check --- #
 925 | project)
 926 | if [[ -z $projectname ]]; then
 927 | 
 928 | # Check status of all projects
 929 |   api_out_project=$(curl -s ${selfsigned} -u "${apiuser}:${apipass}" "${proto}://${apihost}/v3/project")
 930 |   declare -a project_ids=( $(echo "$api_out_project" | jq -r '.data[].id') )
 931 |   declare -a project_names=( $(echo "$api_out_project" | jq -r '.data[].name') )
 932 |   declare -a cluster_ids=( $(echo "$api_out_project" | jq -r '.data[].clusterId') )
 933 |   declare -a healthstatus=( $(echo "$api_out_project" | jq -r '.data[].state') )
 934 |   
 935 |   i=0
 936 |   for project in ${project_ids[*]}; do
 937 |     if [[ ${healthstatus[$i]} != "active" ]]; then
 938 |       projecterrors[$i]="${project} in cluster ${cluster_ids[$i]} is not healthy (state = ${healthstatus[$i]})"
 939 |     fi
 940 |     let i++
 941 |   done
 942 | 
 943 |   if [[ ${#projecterrors[*]} -gt 0 ]]; then
 944 |     echo "CHECK_RANCHER2 CRITICAL - ${projecterrors[*]}|'projects_total'=${#project_ids[*]};;;; 'project_errors'=${#projecterrors[*]};;;;"
 945 |     exit ${STATE_CRITICAL}
 946 |   else
 947 |     echo "CHECK_RANCHER2 OK - All projects (${#project_ids[*]}) are healthy|'projects_total'=${#project_ids[*]};;;; 'project_errors'=${#projecterrors[*]};;;;"
 948 |     exit ${STATE_OK}
 949 |   fi
 950 | 
 951 | else
 952 |  
 953 | # Check status of a single project
 954 |   api_out_single_project=$(curl -s ${selfsigned} -u "${apiuser}:${apipass}" "${proto}://${apihost}/v3/project/${projectname}")
 955 | 
 956 |   # Check if that given project name exists
 957 |   if [[ -n $(echo "$api_out_single_project" | grep -i "NotFound") ]]; then
 958 |     echo "CHECK_RANCHER2 CRITICAL - Project $projectname not found. Hint: Use '-t info' to identify cluster and project names."; exit ${STATE_CRITICAL}
 959 |   fi
 960 | 
 961 |   healthstatus=$(echo "$api_out_single_project" | jq -r '.state')
 962 |   
 963 |   if [[ ${healthstatus} != active ]]; then
 964 |     echo "CHECK_RANCHER2 CRITICAL - Project $projectname is not active|'project_active'=0;;;; 'project_error'=1;;;;"
 965 |     exit ${STATE_CRITICAL}
 966 |   else
 967 |     echo "CHECK_RANCHER2 OK - Project $projectname is active|'project_active'=1;;;; 'project_error'=0;;;;"
 968 |     exit ${STATE_OK}
 969 |   fi
 970 |   
 971 | fi
 972 | ;;
 973 | 
 974 | # --- workload status check (requires project)--- #
 975 | service)
 976 |   echo -e "CHECK_RANCHER2 UNKNOWN - In Rancher 2 services are called workloads. Use -t workload."
 977 |   exit ${STATE_UNKNOWN}
 978 | ;;
 979 | 
 980 | workload)
 981 | if [ -z $projectname ]; then
 982 |   echo -e "CHECK_RANCHER2 UNKNOWN - To check workloads you must also define the project (-p). This will check all workloads within the given project. To check a specific workload, define it with -w."
 983 |   exit ${STATE_UNKNOWN}
 984 | fi
 985 | 
 986 | if [[ -z $workloadname ]]; then
 987 | 
 988 | # Check status of all workloads within a project (project must be given)
 989 |   api_out_workloads=$(curl -s ${selfsigned} -u "${apiuser}:${apipass}" "${proto}://${apihost}/v3/project/${projectname}/workloads")
 990 | 
 991 |   if [[ -n $(echo "$api_out_workloads" | grep -i "ClusterUnavailable") ]]; then
 992 |     clustername=$(echo ${projectname} | awk -F':' '{print $1}')
 993 |     echo "CHECK_RANCHER2 CRITICAL - Cluster $clustername not found. Hint: Use '-t info' to identify cluster and project names."
 994 |     exit ${STATE_CRITICAL}
 995 |   fi
 996 | 
 997 |   declare -a workload_names=( $(echo "$api_out_workloads" | jq -r '.data[].name') )
 998 |   declare -a healthstatus=( $(echo "$api_out_workloads" | jq -r '.data[].state') )
 999 |   declare -a pausedstatus=( $(echo "$api_out_workloads" | jq -r '.data[].paused') )
1000 | 
1001 |   # We rather WARN than silently return OK for zero workloads
1002 |   if [[ ${#workload_names} -eq 0 ]]; then
1003 |     echo "CHECK_RANCHER2 WARNING - No workloads found in project ${projectname}."
1004 |     exit ${STATE_WARNING}
1005 |   fi
1006 |  
1007 |   i=0
1008 |   for workload in ${workload_names[*]}; do
1009 |     for status in ${healthstatus[$i]}; do
1010 |       if [[ "${ignore}" =~ "${workload}" ]]; then
1011 |         workloadignored[$i]="Workload ${workload} is ignored -"
1012 |         continue
1013 |       fi
1014 |       if [[ ${status} = updating ]]; then
1015 |         if [[ -n $(echo ${ignore} | grep -i ${status}) ]]; then
1016 |           workloadignored[$i]="Workload ${workload} is ${status} but ignored -"
1017 |         else
1018 |           workloadwarnings[$i]="Workload ${workload} is ${status} -"
1019 |         fi
1020 |       elif [[ ${status} != active ]]; then
1021 |         if [[ -n $(echo ${ignore} | grep -i ${status}) ]]; then
1022 |           workloadignored[$i]="Workload ${workload} is ${status} but ignored -"
1023 |         else
1024 |           workloaderrors[$i]="Workload ${workload} is ${status} -"
1025 |         fi
1026 |       fi
1027 |     done
1028 |     for paused in ${pausedstatus[$i]}; do
1029 |       if [[ ${paused} = true ]]; then
1030 |         workloadpaused[$i]="${workload} "
1031 |       fi
1032 |     done
1033 |     let i++
1034 |   done
1035 | 
1036 |   if [[ ${#workloadignored[*]} -gt 0 ]]; then
1037 |     ignoreoutput="- ${workloadignored[*]}"
1038 |   fi
1039 | 
1040 |   if [[ ${#workloaderrors[*]} -gt 0 ]]; then
1041 |     echo  "CHECK_RANCHER2 CRITICAL - ${#workloaderrors[*]} workload(s) in error state: ${workloaderrors[*]} ${ignoreoutput}|'workloads_total'=${#workload_names[*]};;;; 'workloads_errors'=${#workloaderrors[*]};;;; 'workloads_warnings'=${#workloadwarnings[*]};;;; 'workloads_paused'=${#workloadpaused[*]};;;; 'workloads_ignored'=${#workloadignored[*]};;;;"
1042 |     exit ${STATE_CRITICAL}
1043 |   elif [[ ${#workloadwarnings[*]} -gt 0 ]]; then
1044 |     echo "CHECK_RANCHER2 WARNING - ${#workloadwarnings[*]} workload(s) in warning state: ${workloadwarnings[*]} ${ignoreoutput}|'workloads_total'=${#workload_names[*]};;;; 'workloads_errors'=${#workloaderrors[*]};;;; 'workloads_warnings'=${#workloadwarnings[*]};;;; 'workloads_paused'=${#workloadpaused[*]};;;; 'workloads_ignored'=${#workloadignored[*]};;;;"
1045 |     exit ${STATE_WARNING}
1046 |   else
1047 |     if [[ ${#workloadpaused[*]} -gt 0 ]]; then
1048 |       echo "CHECK_RANCHER2 OK - All workloads (${#workload_names[*]}) in project ${projectname} are healthy/active ( Note: ${#workloadpaused[*]} workloads currently paused: ${workloadpaused[*]}) ${ignoreoutput}|'workloads_total'=${#workload_names[*]};;;; 'workloads_errors'=${#workloaderrors[*]};;;; 'workloads_warnings'=${#workloadwarnings[*]};;;; 'workloads_paused'=${#workloadpaused[*]};;;; 'workloads_ignored'=${#workloadignored[*]};;;;"
1049 |     else
1050 |       echo "CHECK_RANCHER2 OK - All workloads (${#workload_names[*]}) in project ${projectname} are healthy/active ${ignoreoutput}|'workloads_total'=${#workload_names[*]};;;; 'workloads_errors'=${#workloaderrors[*]};;;; 'workloads_warnings'=${#workloadwarnings[*]};;;; 'workloads_paused'=${#workloadpaused[*]};;;; 'workloads_ignored'=${#workloadignored[*]};;;;"
1051 |     fi
1052 |     exit ${STATE_OK}
1053 |   fi
1054 | 
1055 | else
1056 |  
1057 | # Check status of a single workload
1058 |   if [[ -n $namespacename && $namespacename != "" ]]; then
1059 |     nsappend="&namespaceId=$namespacename"
1060 |     nsoutputappend="in namespace $namespacename "
1061 |   fi
1062 | 
1063 |   api_out_single_workload=$(curl -s ${selfsigned} -u "${apiuser}:${apipass}" "${proto}://${apihost}/v3/project/${projectname}/workloads/?name=${workloadname}${nsappend}")
1064 | 
1065 |   if [[ -n $(echo "$api_out_single_workload" | grep -i "ClusterUnavailable") ]]; then
1066 |     clustername=$(echo ${projectname} | awk -F':' '{print $1}')
1067 |     echo "CHECK_RANCHER2 CRITICAL - Cluster $clustername not found. Hint: Use '-t info' to identify cluster and project names."; exit ${STATE_CRITICAL}
1068 |   fi
1069 | 
1070 |   # Check if that given project name exists
1071 |   if [[ -z $(echo "$api_out_single_workload" | grep -i "containers") ]]; then
1072 |     echo "CHECK_RANCHER2 CRITICAL - Workload $workloadname ${nsoutputappend}not found."; exit ${STATE_CRITICAL}
1073 |   fi
1074 | 
1075 |   # Check if there are multiple workloads with the same name
1076 |   workloadcount=$(echo "$api_out_single_workload" | jq -r '.data[].id' | wc -l)
1077 |   if [[ $workloadcount -gt 1 ]]; then
1078 |     echo "CHECK_RANCHER2 UNKNOWN - Identical workload names detected in multiple namespaces. To check a specific workload you must also define the namespace (-n)."
1079 |     exit ${STATE_CRITICAL}
1080 |   fi
1081 | 
1082 |   healthstatus=$(echo "$api_out_single_workload" | jq -r '.data[].state')
1083 |   
1084 |   if [[ ${healthstatus} = updating ]]; then
1085 |     if [[ -n $(echo ${ignore} | grep -i ${healthstatus}) ]]; then
1086 |       echo "CHECK_RANCHER2 OK - Workload $workloadname ${nsoutputappend}is ${healthstatus} but ignored|'workload_active'=0;;;; 'workload_error'=0;;;; 'workload_warning'=1;;;; 'workload_ignored'=1;;;;"
1087 |       exit ${STATE_WARNING}
1088 |     else
1089 |       echo "CHECK_RANCHER2 WARNING - Workload $workloadname ${nsoutputappend}is ${healthstatus}|'workload_active'=0;;;; 'workload_error'=0;;;; 'workload_warning'=1;;;; 'workload_ignored'=0;;;;"
1090 |       exit ${STATE_WARNING}
1091 |     fi
1092 |   elif [[ ${healthstatus} != active ]]; then
1093 |     if [[ -n $(echo ${ignore} | grep -i ${healthstatus}) ]]; then
1094 |       echo "CHECK_RANCHER2 CRITICAL - Workload $workloadname ${nsoutputappend}is ${healthstatus} but ignored|'workload_active'=0;;;; 'workload_error'=1;;;; 'workload_warning'=0;;;; 'workload_ignored'=1;;;;"
1095 |       exit ${STATE_CRITICAL}
1096 |     else
1097 |       echo "CHECK_RANCHER2 CRITICAL - Workload $workloadname ${nsoutputappend}is ${healthstatus}|'workload_active'=0;;;; 'workload_error'=1;;;; 'workload_warning'=0;;;; 'workload_ignored'=0;;;;"
1098 |       exit ${STATE_CRITICAL}
1099 |     fi
1100 |   else
1101 |     echo "CHECK_RANCHER2 OK - Workload $workloadname ${nsoutputappend}is active|'workload_active'=1;;;; 'workload_error'=0;;;; 'workload_warning'=0;;;; 'workload_ignored'=0;;;;"
1102 |     exit ${STATE_OK}
1103 |   fi
1104 |   
1105 | fi
1106 | ;;
1107 | 
1108 | # --- pod status check (requires project) --- #
1109 | pod)
1110 | if [ -z $projectname ]; then
1111 |   echo -e "CHECK_RANCHER2 UNKNOWN - To check pods you must also define the project (-p). This will check all pods within the given project. To check a specific pod, define it with -o podname and -n namespace."
1112 |   exit ${STATE_UNKNOWN}
1113 | fi
1114 | 
1115 | if [[ -z $podname ]]; then
1116 | 
1117 | # Check status of all pods within a project (project must be given)
1118 |   if [[ -n $namespacename && $namespacename != "" ]]; then
1119 |     nsappend="?namespaceId=$namespacename"
1120 |     outputappend="and namespace $namespacename "
1121 |   fi
1122 | 
1123 |   api_out_pods=$(curl -s ${selfsigned} -u "${apiuser}:${apipass}" "${proto}://${apihost}/v3/project/${projectname}/pods${nsappend}")
1124 | 
1125 |   if [[ -n $(echo "$api_out_pods" | grep -i "ClusterUnavailable") ]]; then
1126 |     clustername=$(echo ${projectname} | awk -F':' '{print $1}')
1127 |     echo "CHECK_RANCHER2 CRITICAL - Cluster $clustername not found. Hint: Use '-t info' to identify cluster and project names."
1128 |     exit ${STATE_CRITICAL}
1129 |   fi
1130 | 
1131 |   declare -a pod_names=( $(echo "$api_out_pods" | jq -r '.data[].name') )
1132 |   declare -a healthstatus=( $(echo "$api_out_pods" | jq -r '.data[].state') )
1133 | 
1134 |   # We rather WARN than silently return OK for zero pods
1135 |   if [[ ${#pod_names} -eq 0 ]]; then
1136 |     echo "CHECK_RANCHER2 WARNING - No pods found in project ${projectname}."
1137 |     exit ${STATE_WARNING}
1138 |   fi
1139 | 
1140 |   i=0
1141 |   for pod in ${pod_names[*]}; do
1142 |     for status in ${healthstatus[$i]}; do
1143 |       if [[ ${status} != running && ${status} != succeeded ]]; then
1144 |         poderrors[$i]="Pod ${pod} is ${status}\n"
1145 |       fi
1146 |     done
1147 |     let i++
1148 |   done
1149 | 
1150 |   if [[ ${#poderrors[*]} -gt 0 ]]; then
1151 |     echo "CHECK_RANCHER2 CRITICAL - ${#poderrors[*]} pod(s) in project ${projectname} ${outputappend}in abnormal state: ${poderrors[*]}|'pods_total'=${#pod_names[*]};;;; 'pods_errors'=${#poderrors[*]};;;;"
1152 |     exit ${STATE_CRITICAL}
1153 |   else
1154 |     echo "CHECK_RANCHER2 OK - All pods (${#pod_names[*]}) in project ${projectname} ${outputappend}are running|'pods_total'=${#pod_names[*]};;;; 'pods_errors'=${#poderrors[*]};;;;"
1155 |     exit ${STATE_OK}
1156 |   fi
1157 | 
1158 | else
1159 | # Check status of a single pod (requires project and namespace)
1160 | # Note: This only makes sense when you create static pods!
1161 |   if [ -z $namespacename ]; then
1162 |     echo -e "CHECK_RANCHER2 UNKNOWN - To check a single pod you must also define the namespace (-n)."
1163 |     exit ${STATE_UNKNOWN}
1164 |   fi
1165 |   
1166 |   api_out_single_pod=$(curl -s ${selfsigned} -u "${apiuser}:${apipass}" "${proto}://${apihost}/v3/project/${projectname}/pods/${namespacename}:${podname}")
1167 | 
1168 |   if [[ -n $(echo "$api_out_single_pod" | grep -i "ClusterUnavailable") ]]; then
1169 |     clustername=$(echo ${projectname} | awk -F':' '{print $1}')
1170 |     echo "CHECK_RANCHER2 CRITICAL - Cluster $clustername not found. Hint: Use '-t info' to identify cluster and project names."; exit ${STATE_CRITICAL}
1171 |   fi
1172 | 
1173 |   # Check if that given project name exists
1174 |   if [[ -z $(echo "$api_out_single_pod" | grep -i "containers") ]]; then
1175 |     echo "CHECK_RANCHER2 CRITICAL - Pod $podname not found. Verify project (-p) and pod (-o) names."; exit ${STATE_CRITICAL}
1176 |   fi
1177 | 
1178 |   healthstatus=$(echo "$api_out_single_pod" | jq -r '.state')
1179 | 
1180 |   if [[ ${healthstatus} != running ]]; then
1181 |     echo "CHECK_RANCHER2 CRITICAL - Pod $podname is ${healthstatus}|'pod_active'=0;;;; 'pod_error'=1;;;;"
1182 |     exit ${STATE_CRITICAL}
1183 |   else
1184 |     echo "CHECK_RANCHER2 OK - Pod $podname is running|'pod_active'=1;;;; 'pod_error'=0;;;;"
1185 |     exit ${STATE_OK}
1186 |   fi
1187 | 
1188 | fi
1189 | ;;
1190 | 
1191 | # --- local-certs --- #
1192 | local-certs)
1193 | rightnow=$(date +%s)
1194 | if [[ ${cert_warn} -gt 0 ]]; then let warning=(${rightnow}+${cert_warn}*86400); fi
1195 | projectid=$(curl -s ${selfsigned} -u "${apiuser}:${apipass}" "${proto}://${apihost}/v3/cluster/local/projects" | jq -r '.data[] | select(.name == "System").id')
1196 | 
1197 | api_out_certs=$(curl -s ${selfsigned} -u "${apiuser}:${apipass}" "${proto}://${apihost}/v3/projects/${projectid}/namespacedcertificates?namespaceId=cattle-system")
1198 | declare -a cert_names=( $(echo "$api_out_certs" | jq -r '.data[] | select(.type == "namespacedCertificate").name') )
1199 | declare -a cert_expiry=( $(echo "$api_out_certs" | jq -r '.data[] | select(.type == "namespacedCertificate").expiresAt') )
1200 | 
1201 | #echo ${cert_names[*]}     # Enable for debugging
1202 | #echo ${cert_expiry[*]}    # Enable for debugging
1203 | 
1204 | i=0
1205 | for entry in ${cert_expiry[*]}; do
1206 |   if [[ -n $(echo ${ignore} | grep -x ${cert_names[${i}]}) ]]; then
1207 |     cert_ignored[${i}]="${cert_names[${i}]}"
1208 |     continue
1209 |   fi
1210 |   expiry=$(date --date="${entry}" +%s)
1211 |   if [[ ${rightnow} -gt ${expiry} ]]; then
1212 |     let diff=(${rightnow}-${expiry})/86400
1213 |     cert_expired[${i}]="${cert_names[${i}]} expired ${diff} days ago -"
1214 |   elif [[ ${warning} -gt ${expiry} ]]; then
1215 |     let diff=(${warning}-${expiry})/86400
1216 |     echo "${cert_names[${i}]} will expire in ${diff} days -"
1217 |     cert_warning[${i}]="${cert_names[${i}]} will expire in ${diff} days -"
1218 |   fi
1219 |   let i++
1220 | done
1221 | 
1222 | if [[ ${#cert_ignored[*]} -gt 0 ]]; then
1223 |   ignoreoutput="- ${#cert_ignored[*]} certificate(s) ignored: ${cert_ignored[*]}"
1224 | fi
1225 | 
1226 | if [[ ${#cert_expired[*]} -gt 0 ]]; then
1227 |   echo "CHECK_RANCHER2 CRITICAL - ${#cert_expired[*]} certificate(s) expired (${cert_expired[*]}) ${ignoreoutput}|'total_certs'=${#cert_names[*]};;;; 'expired_certs'=${#cert_expired[*]};;;; 'warning_certs'=${#cert_warning[*]};;;; 'ignored_certs'=${#cert_ignored[*]};;;;"
1228 |   exit ${STATE_CRITICAL}
1229 | elif [[ ${#cert_warning[*]} -gt 0 ]]; then
1230 |   echo "CHECK_RANCHER2 WANRING - ${#cert_warning[*]} certificate(s) will expire soon (${cert_warning[*]}) ${ignoreoutput}|'total_certs'=${#cert_names[*]};;;; 'expired_certs'=${#cert_expired[*]};;;; 'warning_certs'=${#cert_warning[*]};;;; 'ignored_certs'=${#cert_ignored[*]};;;;"
1231 |   exit ${STATE_WARNING}
1232 | else
1233 |   echo "CHECK_RANCHER2 OK - All ${#cert_names[*]} certificates are valid ${ignoreoutput}|'total_certs'=${#cert_names[*]};;;; 'expired_certs'=${#cert_expired[*]};;;; 'warning_certs'=${#cert_warning[*]};;;; 'ignored_certs'=${#cert_ignored[*]};;;;"
1234 |   exit ${STATE_OK}
1235 | fi
1236 | 
1237 | ;;
1238 | 
1239 | esac
1240 | echo "UNKNOWN: should never reach this part"
1241 | exit ${STATE_UNKNOWN}
1242 | 


--------------------------------------------------------------------------------
/icinga2/command_check_rancher2.conf:
--------------------------------------------------------------------------------
 1 | # check_rancher2 command definition
 2 | object CheckCommand "check_rancher2" {
 3 |   import "plugin-check-command"
 4 | 
 5 |   command = [ "/usr/lib/nagios/plugins/check_rancher2.sh" ]
 6 | 
 7 |   arguments = {
 8 |     "-H" = {
 9 |       description = "Address of Rancher 2 API (e.g. rancher.example.com)"
10 |       value = "$rancher2_address$"
11 |     }
12 |     "-U" = {
13 |       description = "API username (Access Key)"
14 |       value = "$rancher2_username$"
15 |     }
16 |     "-P" = {
17 |       description = "API password (Secret Key)"
18 |       value = "$rancher2_password$"
19 |     }
20 |     "-S" = {
21 |       description = "Use https instead of http"
22 |       set_if = "$rancher2_ssl$"
23 |     }
24 |     "-s" = {
25 |       description = "Allow self-signed certificates"
26 |       set_if = "$rancher2_selfsigned_cert$"
27 |     }
28 |     "-t" = {
29 |       description = "Check type (see list below for available check types)"
30 |       value = "$rancher2_type$"
31 |     }
32 |     "-c" = {
33 |       description = "Cluster name (for specific cluster check)"
34 |       value = "$rancher2_cluster$"
35 |     }
36 |     "-p" = {
37 |       description = "Project name (for specific project check, needed for workload checks)"
38 |       value = "$rancher2_project$"
39 |     }
40 |     "-n" = {
41 |       description = "Namespace name (needed for specific pod checks)"
42 |       value = "$rancher2_namespace$"
43 |     }
44 |     "-w" = {
45 |       description = "Workload name (for specific workload check)"
46 |       value = "$rancher2_workload$"
47 |     }
48 |     "-o" = {
49 |       description = "Pod name (for specific pod check, this makes only sense if you use static pods)"
50 |       value = "$rancher2_pod$"
51 |     }
52 |     "-i" = {
53 |       description = "Comma-separated list of status(es) to ignore (currently only supported in node check type)"
54 |       value = "$rancher2_ignore$"
55 |     }
56 |     "--cpu-warn" = {
57 |       description = "Exit with WARNING status if more than PERCENT of cpu capacity is used (currently only supported in cluster specific node and cluster check type)"
58 |       value = "$rancher2_cpu_warn$"
59 |     }
60 |     "--cpu-crit" = {
61 |       description = "Exit with CRITICAL status if more than PERCENT of cpu capacity is used (currently only supported in cluster specific node and cluster check type)"
62 |       value = "$rancher2_cpu_crit$"
63 |     }
64 |     "--memory-warn" = {
65 |       description = "Exit with WARNING status if more than PERCENT of mem capacity is used (currently only supported in cluster specific node and cluster check type)"
66 |       value = "$rancher2_memory_warn$"
67 |     }
68 |     "--memory-crit" = {
69 |       description = "Exit with CRITICAL status if more than PERCENT of mem capacity is used (currently only supported in cluster specific node and cluster check type)"
70 |       value = "$rancher2_memory_crit$"
71 |     }
72 |     "--pods-warn" = {
73 |       description = "Exit with WARNING status if more than PERCENT of pod capacity is used (currently only supported in cluster specific node and cluster check type)"
74 |       value = "$rancher2_pods_warn$"
75 |     }
76 |     "--pods-crit" = {
77 |       description = "Exit with CRITICAL status if more than PERCENT of pod capacity is used (currently only supported in cluster specific node and cluster check type)"
78 |       value = "$rancher2_pods_crit$"
79 |     }
80 |     "--cert-warn" = {
81 |       description = "Warning threshold in days to warn before a certificate expires (supported check types: local-certs)"
82 |       value = "$rancher2_cert_warn$"
83 |     }
84 |     "-h" = {
85 |       description = "Help. I need somebody. Help. Not just anybody. Heeeeeelp!"
86 |       set_if = "$rancher2_help$"
87 |     }
88 |   }
89 | 
90 |   vars.rancher2_address = "$address$"
91 |   # If you only run one Rancher2, you can define api access here, too:
92 |   #vars.rancher2_username = "token-XXXXX"
93 |   #vars.rancher2_password = "iWahca3ohngeiReedeingaiiWahca3ohngeiReedeingai432k1dda"
94 |   #vars.rancher2_ssl = true
95 |   #vars.rancher2_selfsigned_cert = false
96 | }
97 | 


--------------------------------------------------------------------------------
/icinga2/example_service_checks.conf:
--------------------------------------------------------------------------------
  1 | ###############################################################################
  2 | # HOST DEFINITION
  3 | ###############################################################################
  4 | object Host "my-rancher2-host" {
  5 |   import "generic-host"
  6 |   address = "rancher2.example.com"
  7 | }
  8 | ###############################################################################
  9 | # SERVICE DEFINITIONS
 10 | ###############################################################################
 11 | # Just show some info about discovered clusters and projects
 12 | object Service "Rancher2 Info" {
 13 |   import "generic-service"
 14 |   host_name = "my-rancher2-host"
 15 |   check_command = "check_rancher2"
 16 |   vars.rancher2_username = "token-XXXXX"
 17 |   vars.rancher2_password = "iWahca3ohngeiReedeingaiiWahca3ohngeiReedeingai432k1dda"
 18 |   vars.rancher2_ssl = true
 19 |   vars.rancher2_type = "info"
 20 | }
 21 | 
 22 | # Check all avaiable/found clusters for their health
 23 | object Service "Rancher2 All Clusters" {
 24 |   import "generic-service"
 25 |   host_name = "my-rancher2-host"
 26 |   check_command = "check_rancher2"
 27 |   vars.rancher2_username = "token-XXXXX"
 28 |   vars.rancher2_password = "iWahca3ohngeiReedeingaiiWahca3ohngeiReedeingai432k1dda"
 29 |   vars.rancher2_ssl = true
 30 |   vars.rancher2_type = "cluster"
 31 | }
 32 | 
 33 | # Check a single cluster for its health
 34 | object Service "Rancher2 Cluster Test" {
 35 |   import "generic-service"
 36 |   host_name = "my-rancher2-host"
 37 |   check_command = "check_rancher2"
 38 |   vars.rancher2_username = "token-XXXXX"
 39 |   vars.rancher2_password = "iWahca3ohngeiReedeingaiiWahca3ohngeiReedeingai432k1dda"
 40 |   vars.rancher2_ssl = true
 41 |   vars.rancher2_type = "cluster"
 42 |   vars.rancher2_cluster = "c-4kd22"
 43 | }
 44 | 
 45 | # Check nodes in all clusters for their status but ignore cordoned and drained nodes
 46 | object Service "Rancher2 Nodes" {
 47 |   import "generic-service"
 48 |   host_name = "my-rancher2-host"
 49 |   check_command = "check_rancher2"
 50 |   vars.rancher2_username = "token-XXXXX"
 51 |   vars.rancher2_password = "iWahca3ohngeiReedeingaiiWahca3ohngeiReedeingai432k1dda"
 52 |   vars.rancher2_ssl = true
 53 |   vars.rancher2_type = "node"
 54 |   vars.rancher2_ignore_status = "cordoned,drained"
 55 | }
 56 | 
 57 | # Check all avaiable/found projects (across all clusters) for their health
 58 | object Service "Rancher2 All Projects" {
 59 |   import "generic-service"
 60 |   host_name = "my-rancher2-host"
 61 |   check_command = "check_rancher2"
 62 |   vars.rancher2_username = "token-XXXXX"
 63 |   vars.rancher2_password = "iWahca3ohngeiReedeingaiiWahca3ohngeiReedeingai432k1dda"
 64 |   vars.rancher2_ssl = true
 65 |   vars.rancher2_type = "project"
 66 | }
 67 | 
 68 | # Check a single projects 
 69 | object Service "Rancher2 Project Test" {
 70 |   import "generic-service"
 71 |   host_name = "my-rancher2-host"
 72 |   check_command = "check_rancher2"
 73 |   vars.rancher2_username = "token-XXXXX"
 74 |   vars.rancher2_password = "iWahca3ohngeiReedeingaiiWahca3ohngeiReedeingai432k1dda"
 75 |   vars.rancher2_ssl = true
 76 |   vars.rancher2_type = "project"
 77 |   vars.rancher2_project = "c-4kd22:p-44gjh"
 78 | }
 79 | 
 80 | # Check all workloads in a certain project
 81 | object Service "Rancher2 Workloads in Project Test" {
 82 |   import "generic-service"
 83 |   host_name = "my-rancher2-host"
 84 |   check_command = "check_rancher2"
 85 |   vars.rancher2_username = "token-XXXXX"
 86 |   vars.rancher2_password = "iWahca3ohngeiReedeingaiiWahca3ohngeiReedeingai432k1dda"
 87 |   vars.rancher2_ssl = true
 88 |   vars.rancher2_type = "workload"
 89 |   vars.rancher2_project = "c-4kd22:p-44gjh"
 90 | }
 91 | 
 92 | # Check a single workload in a certain project
 93 | object Service "Rancher2 Workload Web in Project Test" {
 94 |   import "generic-service"
 95 |   host_name = "my-rancher2-host"
 96 |   check_command = "check_rancher2"
 97 |   vars.rancher2_username = "token-XXXXX"
 98 |   vars.rancher2_password = "iWahca3ohngeiReedeingaiiWahca3ohngeiReedeingai432k1dda"
 99 |   vars.rancher2_ssl = true
100 |   vars.rancher2_type = "workload"
101 |   vars.rancher2_project = "c-4kd22:p-44gjh"
102 |   vars.rancher2_workload = "Web"
103 | }
104 | 
105 | 
106 | 


--------------------------------------------------------------------------------