├── .dockerignore ├── .flake8 ├── .gitignore ├── .idea └── workspace.xml ├── Dockerfile ├── Jenkinsfile ├── LICENSE ├── Makefile ├── README.md ├── base ├── __init__.py ├── config.py ├── daemon_thread.py ├── timed_threads.py ├── watcher_thread.py └── web_api.py ├── build.sh ├── check_kubernetesd ├── config-dev.ini ├── documentation ├── deployment_yed.graphml ├── deployment_yed.png ├── logo.svg └── template │ └── custom_service_kubernetes.html ├── k8sobjects ├── __init__.py ├── component.py ├── container.py ├── daemonset.py ├── deployment.py ├── ingress.py ├── k8sobject.py ├── k8sresourcemanager.py ├── node.py ├── pod.py ├── pvc.py ├── secret.py ├── service.py └── statefulset.py ├── kubernetes ├── incluster │ ├── 01_monitoring-user.yaml │ └── 02_deployment_with_incluster_config.yaml └── token │ ├── 01_monitoring-user.yaml │ ├── 02_ingress-apiserver.yaml │ ├── 03_service-apiserver.yaml │ └── 04_deployment_with_token_config.yaml ├── mypy.ini ├── requirements.txt ├── template ├── create_template_documentation ├── custom_service_kubernetes.xml └── transform.xsl └── tests └── unit ├── __init__.py ├── resources └── test.ini ├── test_config_loading.py └── test_k8sobject.py /.dockerignore: -------------------------------------------------------------------------------- 1 | Dockerfile 2 | Jenkinsfile 3 | build.sh 4 | *.swp 5 | .git 6 | .gitignore 7 | kubernetes/* 8 | venv/* 9 | config_* 10 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 120 3 | per-file-ignores = __init__.py:F401,F403 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | \#* 3 | lib/__pycache__/* 4 | *.pyc 5 | .git 6 | .idea 7 | .pytest_cache 8 | *.swp 9 | venv/* 10 | *.log 11 | /config*.py 12 | /config*.ini 13 | .idea 14 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 9 | 10 | 15 | 16 | 22 | 23 | 24 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 37 | { 38 | "associatedIndex": 1 39 | } 40 | 41 | 42 | 43 | 46 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 105 | 106 | 107 | 140 | 141 | 142 | 166 | 167 | 168 | 192 | 193 | 194 | 208 | 209 | 210 | 224 | 225 | 226 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 1619798041708 271 | 318 | 319 | 320 | 321 | 323 | 324 | 333 | 334 | 335 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11.0a3 2 | LABEL maintainer="ms-github@256bit.org" 3 | LABEL Description="zabbix-kubernetes - efficent kubernetes monitoring for zabbix" 4 | 5 | MAINTAINER operations@vico-research.com 6 | 7 | ENV K8S_API_HOST "" 8 | ENV K8S_API_TOKEN "" 9 | ENV ZABBIX_SERVER "zabbix" 10 | ENV ZABBIX_HOST "k8s" 11 | ENV CRYPTOGRAPHY_DONT_BUILD_RUST "1" 12 | 13 | COPY --chown=nobody:users requirements.txt /app/requirements.txt 14 | 15 | RUN apt-get update -y && \ 16 | apt-get install libffi-dev libffi7 libssl-dev bash screen ncdu -y && \ 17 | pip3 install --upgrade pip && \ 18 | pip3 install -r /app/requirements.txt && \ 19 | apt-get upgrade -y && \ 20 | apt-get dist-upgrade -y && \ 21 | apt-get remove base libssl-dev libffi-dev gcc -y && \ 22 | apt-get autoremove -y && \ 23 | rm -rf /var/lib/apt/lists/* /root/.cache 24 | 25 | COPY --chown=nobody:users base /app/base 26 | COPY --chown=nobody:users k8sobjects /app/k8sobjects 27 | COPY --chown=nobody:users check_kubernetesd /app/check_kubernetesd 28 | 29 | USER nobody 30 | WORKDIR /app 31 | 32 | ENTRYPOINT [ "/app/check_kubernetesd" ] 33 | -------------------------------------------------------------------------------- /Jenkinsfile: -------------------------------------------------------------------------------- 1 | #!groovy 2 | 3 | pipeline { 4 | agent any 5 | parameters { 6 | booleanParam(defaultValue: false, description: 'Create release', name: 'RELEASE') 7 | } 8 | 9 | triggers { 10 | cron('@daily') 11 | pollSCM('H/15 * * * *') 12 | } 13 | 14 | options{ 15 | buildDiscarder(logRotator(artifactDaysToKeepStr: '10', artifactNumToKeepStr: '10', daysToKeepStr: '3', numToKeepStr: '20')) 16 | disableConcurrentBuilds() 17 | } 18 | 19 | stages { 20 | stage('Prepare') { 21 | when { 22 | environment name: 'RELEASE', value: 'true' 23 | } 24 | steps { 25 | ansiColor('xterm') { 26 | sh 'git fetch --tags' 27 | sh './build.sh cleanup' 28 | } 29 | } 30 | } 31 | stage('Build and Test') { 32 | steps { 33 | ansiColor('xterm') { 34 | sh 'git fetch --tags' 35 | sh "./build.sh default vicoconsulting" 36 | } 37 | } 38 | } 39 | stage('Release') { 40 | when { 41 | environment name: 'RELEASE', value: 'true' 42 | } 43 | steps { 44 | ansiColor('xterm') { 45 | sh './build.sh publish_image vicoconsulting' 46 | } 47 | } 48 | } 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SHELL = bash 2 | 3 | activate = source venv/bin/activate 4 | python = python3 5 | dockerhub_repo = scoopex666 6 | 7 | all: deps 8 | .PHONY: all 9 | 10 | deps: venv/bin/activate 11 | .PHONY: deps 12 | 13 | 14 | venv/bin/activate: requirements.txt 15 | ${python} -m venv venv 16 | @# TODO: installation of wheel solves a pip install error, we have to check if that is needed permamently 17 | @# because it seems to be a packaging issue 18 | ${activate} && \ 19 | pip install wheel && \ 20 | pip install -r requirements.txt 21 | 22 | clean: 23 | rm -rf venv 24 | .PHONY: clean 25 | 26 | check: 27 | @# run sequentially so the output is easier to read 28 | ${MAKE} --no-print-directory lint 29 | ${MAKE} --no-print-directory type-check 30 | ${MAKE} --no-print-directory test 31 | .PHONY: check 32 | 33 | 34 | lint: deps 35 | ${activate} && ${python} -m flake8 base k8sobjects 36 | .PHONY: lint 37 | 38 | type-check: deps 39 | ${activate} && ${python} -m mypy --no-color-output --pretty base k8sobjects 40 | .PHONY: type-check 41 | 42 | test: deps 43 | ${activate} && ${python} -m pytest tests 44 | .PHONY: test 45 | 46 | run: deps 47 | # refresh token kubeconfig azure access token until kubernetes lib can handle this 48 | kubectl get nodes >/dev/null 2>&1 49 | ${activate} && ${python} check_kubernetesd config_flip-dev.ini 50 | .PHONY: run 51 | 52 | doc: 53 | cd template && ./create_template_documentation 54 | .PHONY: doc 55 | 56 | docker: 57 | ./build.sh default ${dockerhub_repo} 58 | .PHONY: docker 59 | 60 | publish: docker 61 | ./build.sh publish_image ${dockerhub_repo} 62 | .PHONY: publish 63 | 64 | release: test doc 65 | [ `git status --porcelain=v1 2>/dev/null | wc -l` -le 0 ] 66 | git commit -a template/custom_service_kubernetes.xml 67 | git push 68 | git push --tags 69 | ${MAKE} --no-print-directory publish 70 | .PHONY: release 71 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![K8SZabbix Logo](documentation/logo.svg) 2 | 3 | 4 | k8s-zabbix 5 | ================= 6 | 7 | This project provides kubernetes monitoring capabilities for zabbix using mainly the kubernetes watch api mechanism. 8 | Additionally, it provides capabilities to submit the same data to a system management solution by REST. 9 | 10 | New Kubernetes entities are submitted as [low level discovery](https://www.zabbix.com/documentation/current/manual/discovery/low_level_discovery) 11 | items in the moment of their creation (i.e. a new deployment). Kubernetes events (i.e. a pod restart) are submitted in moment of their occurrence. 12 | 13 | This tool aggregates status information of entities in some cases to the managing entity to improve the practical usage with zabbix 14 | (example: aggregation of the pod statuses to the deployment which manages the pods) 15 | Disappearing entities will be deleted by zabbix using the "Keep lost resources period" setting in LLC. 16 | 17 | Optionally this tool can submit kubernetes entities to a webservice in a unaggregated manner. 18 | This might be a very useful thing if you have left the GitOps paradigm behind and built a fully fledged management system for your infrastructure. 19 | 20 | The solution currently supervises the following types of Kubernetes entities: 21 | 22 | * apiserver : Check and discover apiservers 23 | * components : Check and discover health of k8s components (etcd, controller-manager, scheduler etc.) 24 | * nodes: Check and discover active nodes 25 | * pods: Check pods for restarts 26 | * statefulsets: Check and discover statefulsets 27 | * daemonset: Check and discover daemonsets 28 | * deployments: Check and discover deployments 29 | * daemonsets: Check and discover daemonsets readiness 30 | * replicasets: Check and discover replicasets readiness 31 | * tls: Check tls secrets expiration dates 32 | 33 | For details or a overview of the monitored kubernetes attributes, have a look at the [documentation](http://htmlpreview.github.io/?https://github.com/zabbix-tooling/k8s-zabbix/blob/master/documentation/template/custom_service_kubernetes.html) 34 | 35 | The current docker image is published at https://hub.docker.com/repository/docker/scoopex666/k8s-zabbix/ 36 | 37 | Architecture Details 38 | ===================== 39 | 40 | ![Deployment Diagram](documentation/deployment_yed.png) 41 | 42 | Behavior of the system: 43 | 44 | * k8s-zabbix queries the kubernetes api service for several types of k8s entities (see above) 45 | * discovered data is stored in a internal cache of k8s-zabbix 46 | * new k8s entities are sent to zabbix or optionally to a configurable webservice 47 | * if a k8s entity disappears, zabbix and/or optionally a configurable webservice are notified 48 | * if k8s entities appear/disappear the zabbix discover for low level disovery is updated 49 | * known entities (discovery and data) will be sent to zabbix and/or the webservice in a configurable schedule 50 | * sentry can optionally used as error tracking system 51 | 52 | 53 | Testing and development 54 | ======================= 55 | 56 | * Install the needed components, see [Dockerfile](./Dockerfile) 57 | * Clone Repo and install dependencies 58 | ``` 59 | git clone git@github.com:zabbix-tooling/k8s-zabbix.git 60 | virtualenv -p python3 venv 61 | source venv/bin/activate 62 | pip3 install -r requirements.txt 63 | ``` 64 | * Create monitoring account 65 | ``` 66 | kubectl apply -f kubernetes/monitoring-user.yaml 67 | ``` 68 | * Gather API Key 69 | ``` 70 | kubectl get secrets -n monitoring 71 | kubectl describe secret -n monitoring 72 | ``` 73 | * Test 74 | ``` 75 | source venv/bin/activate 76 | cp config_default.py configd_c1.py 77 | # edit to appropriate values for your setup 78 | vim configd_c1 79 | ./check_kubernetesd configd_c1 80 | ``` 81 | * Test in docker (IS ESSENTIAL FOR PUBLISH) 82 | ``` 83 | ./build.sh default 84 | ``` 85 | * Create release 86 | ``` 87 | git tag NEW_TAG 88 | git push --tags 89 | make publish 90 | ``` 91 | Production Deployment 92 | ===================== 93 | 94 | * Clone Repo and install dependencies 95 | ``` 96 | git clone git@github.com:zabbix-tooling/k8s-zabbix.git 97 | ``` 98 | * Clone Repo and install dependencies 99 | ``` 100 | ./build.sh default 101 | MY_PRIVATE_REGISTRY="docker-registry.foo.bar" 102 | docker tag k8s-zabbix:latest $MY_PRIVATE_REGISTRY:k8s-zabbix:latest 103 | docker push $MY_PRIVATE_REGISTRY:k8s-zabbix:latest 104 | ``` 105 | * Get API Key 106 | ``` 107 | kubectl get secrets -n monitoring 108 | kubectl describe secret -n monitoring 109 | ``` 110 | * Create monitoring account and api service 111 | ``` 112 | kubectl apply -f kubernetes/service-apiserver.yaml 113 | kubectl apply -f kubernetes/monitoring-user.yaml 114 | ``` 115 | * Configure a ingress for that service with valid ssl certificate for high available access to the kubernetes API
116 | (otherwise set SSL\_VERIFY to "False") 117 | ``` 118 | vi kubernetes/ingress-apiserver.yaml 119 | kubectl apply -f kubernetes/ingress-apiserver.yaml 120 | ``` 121 | * Zabbix Configuration 122 | * Import the monitoring template [zabbix template](template/custom_service_kubernetes.xml) to zabbix : Configuration → Templates → Import 123 | * Create a virtual monitoring host for your kubernetes cluster
124 | (i.e. "k8s-prod-001", name should match to the ZABBIX\_HOST in the deployment.yaml of the next step) 125 | * Assign the template to that host 126 | * Create and apply deployment 127 | (adapt the configuration values for your environment) 128 | ``` 129 | vi kubernetes/deployment.yaml 130 | ``` 131 | * Adapt values corresponding to your cluster setup, use ENV Variables defined in config_default.py 132 | ``` 133 | kubectl apply -f kubernetes/deployment.yaml 134 | ``` 135 | * Check proper function 136 | * Review the logs of the pod 137 | ``` 138 | kubectl logs -n monitoring k8s-zabbix... -f 139 | ``` 140 | * Review latest data in zabbix 141 | * "Monitoring" → "Latest data" → "Add Hosts": i.e. "k8s-prod-001" 142 | * Enable Option "Show items without data" → Button "Apply" 143 | 144 | Unix Signals 145 | ============ 146 | 147 | Unix signals are usefuil for debugging: 148 | 149 | * *SIGQUIT*: Dumps the stacktraces of all threads and terminates the daemon 150 | * *SIGUSR1*: Display a overview of data hold in CheckKubernetesDaemon.data and CheckKubernetesDaemon.discovery_sent 151 | * *SIGUSR2*: Display detailed data hold in CheckKubernetesDaemon.data and CheckKubernetesDaemon.discovery_sent 152 | 153 | 154 | Commandline arguments 155 | ===================== 156 | 157 | ``` 158 | $ ./check_kubernetesd -h 159 | usage: check_kubernetesd [-h] [--show_effective_config] [--show_ini] [--disable_colors] ini_file 160 | 161 | Zabbix monitoring daemon for kubernetes 162 | 163 | positional arguments: 164 | ini_file optional, use a additional inifile for configuration (environment variables take precedence) or execute a binary with a fully qualified file path 165 | 166 | optional arguments: 167 | -h, --help show this help message and exit 168 | --show_effective_config 169 | display the final config as environment variablesbased env variables and ini file parameters 170 | --show_ini show variables as ini files parameters instead of environment variables 171 | --disable_colors disable colors in logoutput 172 | 173 | ``` 174 | 175 | Authors 176 | ======= 177 | 178 | - Amin Dandache 179 | - Marc Schoechlin 180 | 181 | This project is based on prior work of [https://github.com/posuch/zabbix-kubernetes-1](https://github.com/posuch/zabbix-kubernetes-1) 182 | 183 | Licence 184 | ======= 185 | 186 | see "[LICENSE](./LICENSE)" file 187 | -------------------------------------------------------------------------------- /base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zabbix-tooling/k8s-zabbix/bb5e256133c7723cae4b740d9da9d869019804ca/base/__init__.py -------------------------------------------------------------------------------- /base/config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import re 4 | from configparser import ConfigParser 5 | from dataclasses import dataclass, field 6 | from enum import Enum 7 | from itertools import chain 8 | 9 | 10 | def str2bool(v: str | bool) -> bool: 11 | if isinstance(v, bool): 12 | return v 13 | return v.lower() in ("yes", "true", "t", "1") 14 | 15 | 16 | class ClusterAccessConfigType(Enum): 17 | KUBECONFIG = "kubeconfig" 18 | INCLUSTER = "incluster" 19 | TOKEN = "token" 20 | 21 | 22 | logger = logging.getLogger(__file__) 23 | 24 | 25 | @dataclass(order=True) 26 | class Configuration: 27 | k8s_config_type: ClusterAccessConfigType = ClusterAccessConfigType.INCLUSTER 28 | k8s_api_host: str = 'https://example.kube-apiserver.com' 29 | k8s_api_token: str = '' 30 | k8s_api_stream_timeout_seconds: int = 240 31 | k8s_api_request_timeout_seconds: int = 240 32 | verify_ssl: bool = True 33 | debug: bool = False 34 | debug_k8s_events: bool = False 35 | namespace_exclude_re: str = "" 36 | resources_exclude: list[str] = field(default_factory=lambda: []) 37 | 38 | sentry_enabled: bool = False 39 | sentry_dsn: str = "" 40 | 41 | zabbix_server: str = 'example.zabbix-server.com' 42 | zabbix_resources_exclude: list[str] = field(default_factory=lambda: []) 43 | zabbix_host: str = 'k8s-example-host' 44 | zabbix_debug: bool = False 45 | zabbix_single_debug: bool = False 46 | zabbix_dry_run: bool = False 47 | 48 | web_api_enable: bool = False 49 | web_api_resources_exclude: list[str] = field( 50 | default_factory=lambda: ["daemonsets", "components", "services"]) 51 | web_api_verify_ssl: bool = True 52 | web_api_host: str = "https://example.api.com/api/v1/k8s" 53 | web_api_token: str = "" 54 | web_api_cluster: str = 'k8s-test-cluster' 55 | 56 | discovery_interval_fast: int = 60 * 15 57 | resend_data_interval_fast: int = 60 * 2 58 | 59 | discovery_interval_slow: int = 60 * 60 * 2 60 | resend_data_interval_slow: int = 60 * 30 61 | 62 | def _convert_to_type(self, field_name: str, 63 | value: str | list[str] | bool | int | ClusterAccessConfigType) -> \ 64 | str | list[str] | bool | int | ClusterAccessConfigType: 65 | 66 | if not isinstance(value, str): 67 | return value 68 | 69 | if isinstance(getattr(self, field_name), str): 70 | return str(value) 71 | elif isinstance(getattr(self, field_name), bool): 72 | return str2bool(value) 73 | elif isinstance(getattr(self, field_name), int): 74 | return int(value) 75 | elif isinstance(getattr(self, field_name), list): 76 | return re.split(r"[\s,]+", value.strip()) 77 | elif isinstance(getattr(self, field_name), ClusterAccessConfigType): 78 | return ClusterAccessConfigType(value) 79 | else: 80 | raise ValueError(f"type not implemented {getattr(self, field_name)} {value}") 81 | 82 | def load_config_file(self, file_name: str) -> None: 83 | if not os.path.isfile(file_name): 84 | raise ValueError(f"file {file_name} does not exist") 85 | 86 | config_ini = ConfigParser(inline_comment_prefixes="#") 87 | 88 | # fake a "top" section because configparser wants mandatory sections 89 | with open(file_name) as lines_io: 90 | lines = chain(["[top]"], lines_io.readlines()) 91 | config_ini.read_file(lines) 92 | 93 | for field_name in self.__dataclass_fields__: 94 | if field_name not in config_ini["top"]: 95 | continue 96 | 97 | value = config_ini["top"][field_name] 98 | setattr(self, field_name, self._convert_to_type(field_name, value)) 99 | 100 | def load_from_environment_variables(self) -> None: 101 | for field_name in self.__dataclass_fields__: 102 | if field_name.upper() in os.environ and os.environ[field_name.upper()] != "": 103 | print("setting %s by environment variable %s" % (field_name, field_name.upper())) 104 | setattr(self, field_name, self._convert_to_type(field_name, os.environ[field_name.upper()])) 105 | 106 | def show_effective_config(self, show_as_ini_variables: bool = False) -> None: 107 | name_len = 0 108 | value_len = 0 109 | for field_name in self.__dataclass_fields__: 110 | name_len = max(name_len, len(field_name)) 111 | value_len = max(value_len, len(str(getattr(self, field_name)))) 112 | 113 | format_string = f"** %-{name_len + 2}s %-{value_len}s **" 114 | print("*" * (name_len + value_len + 9)) 115 | print(format_string % ("EFFECTIVE CONFIG", "")) 116 | print(format_string % ("", "")) 117 | for field_name in self.__dataclass_fields__: 118 | field_name_show = field_name 119 | if not show_as_ini_variables: 120 | field_name_show = field_name.upper() 121 | print(format_string % (field_name_show, getattr(self, field_name))) 122 | print(format_string % ("", "")) 123 | print("*" * (name_len + value_len + 9)) 124 | -------------------------------------------------------------------------------- /base/daemon_thread.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | import signal 4 | import sys 5 | import threading 6 | import time 7 | from dataclasses import dataclass 8 | from datetime import datetime, timedelta 9 | from pprint import pformat 10 | 11 | import kubernetes 12 | from kubernetes import client, watch 13 | from kubernetes import config as kube_config 14 | from kubernetes.client import ApiClient, CoreV1Api, AppsV1Api, ApiextensionsV1Api 15 | from pyzabbix import ZabbixMetric, ZabbixSender, ZabbixResponse 16 | 17 | from base.config import Configuration, ClusterAccessConfigType 18 | from base.timed_threads import TimedThread 19 | from base.watcher_thread import WatcherThread 20 | from k8sobjects.k8sobject import K8sObject 21 | from k8sobjects.k8sresourcemanager import K8sResourceManager 22 | from k8sobjects.pvc import get_pvc_volumes_for_all_nodes 23 | from .web_api import WebApi 24 | 25 | exit_flag = threading.Event() 26 | 27 | 28 | @dataclass 29 | class DryResult: 30 | failed: int = 0 31 | processed: int = 0 32 | 33 | 34 | def get_data_timeout_datetime() -> datetime: 35 | return datetime.now() - timedelta(minutes=1) 36 | 37 | 38 | def get_discovery_timeout_datetime() -> datetime: 39 | return datetime.now() - timedelta(hours=1) 40 | 41 | 42 | class KubernetesApi: 43 | __shared_state = dict(core_v1=None, 44 | apps_v1=None, 45 | extensions_v1=None) 46 | 47 | def __init__(self, api_client: ApiClient): 48 | self.__dict__ = self.__shared_state 49 | if not getattr(self, 'core_v1', None): 50 | self.core_v1 = client.CoreV1Api(api_client) 51 | if not getattr(self, 'apps_v1', None): 52 | self.apps_v1 = client.AppsV1Api(api_client) 53 | if not getattr(self, 'extensions_v1', None): 54 | self.extensions_v1 = client.ApiextensionsV1Api(api_client) 55 | 56 | 57 | class CheckKubernetesDaemon: 58 | data: dict[str, K8sResourceManager] = {} 59 | discovery_sent: dict[str, datetime] = {} 60 | thread_lock = threading.Lock() 61 | 62 | def __init__(self, config: Configuration, 63 | resources: list[str], 64 | discovery_interval: int, data_resend_interval: int, 65 | ): 66 | self.manage_threads: list[TimedThread | WatcherThread] = [] 67 | self.config = config 68 | self.logger = logging.getLogger(__file__) 69 | self.discovery_interval = int(discovery_interval) 70 | self.data_resend_interval = int(data_resend_interval) 71 | 72 | self.api_zabbix_interval = 60 73 | self.rate_limit_seconds = 30 74 | 75 | if config.k8s_config_type is ClusterAccessConfigType.INCLUSTER: 76 | kube_config.load_incluster_config() 77 | self.api_client = client.ApiClient() 78 | elif config.k8s_config_type is ClusterAccessConfigType.KUBECONFIG: 79 | kube_config.load_kube_config() 80 | self.api_client = kube_config.new_client_from_config() 81 | elif config.k8s_config_type is ClusterAccessConfigType.TOKEN: 82 | self.api_configuration = client.Configuration() 83 | self.api_configuration.host = config.k8s_api_host 84 | self.api_configuration.verify_ssl = config.verify_ssl 85 | self.api_configuration.api_key = {"authorization": "Bearer " + config.k8s_api_token} 86 | self.api_client = client.ApiClient(self.api_configuration) 87 | else: 88 | self.logger.fatal(f"k8s_config_type = {config.k8s_config_type} is not implemented") 89 | sys.exit(1) 90 | 91 | self.logger.info(f"Initialized cluster access for {config.k8s_config_type}") 92 | # K8S API 93 | self.debug_k8s_events = False 94 | self.core_v1 = KubernetesApi(self.api_client).core_v1 95 | self.apps_v1 = KubernetesApi(self.api_client).apps_v1 96 | self.extensions_v1 = KubernetesApi(self.api_client).extensions_v1 97 | 98 | self.zabbix_sender = ZabbixSender(zabbix_server=config.zabbix_server) 99 | self.zabbix_resources = CheckKubernetesDaemon.exclude_resources(resources, 100 | self.config.zabbix_resources_exclude) 101 | self.zabbix_host = config.zabbix_host 102 | self.zabbix_debug = config.zabbix_debug 103 | self.zabbix_single_debug = config.zabbix_single_debug 104 | self.zabbix_dry_run = config.zabbix_dry_run 105 | 106 | self.web_api = None 107 | self.web_api_enable = config.web_api_enable 108 | self.web_api_resources = CheckKubernetesDaemon.exclude_resources(resources, 109 | self.config.web_api_resources_exclude) 110 | 111 | self.web_api_host = config.web_api_host 112 | self.web_api_token = config.web_api_token 113 | self.web_api_cluster = config.web_api_cluster 114 | self.web_api_verify_ssl = config.web_api_verify_ssl 115 | 116 | self.resources = CheckKubernetesDaemon.exclude_resources(resources, self.config.resources_exclude) 117 | 118 | self.logger.info(f"Init K8S-ZABBIX Watcher for resources: {','.join(self.resources)}") 119 | self.logger.info(f"Zabbix Host: {self.zabbix_host} / Zabbix Proxy or Server: {config.zabbix_server}") 120 | if self.web_api_enable: 121 | self.logger.info(f"WEB Api Host {self.web_api_host} with resources {','.join(self.web_api_resources)}") 122 | 123 | @staticmethod 124 | def exclude_resources(available_types: list[str], excluded_types: list[str]) -> list[str]: 125 | result = [] 126 | for k8s_type_available in available_types: 127 | if k8s_type_available not in excluded_types: 128 | result.append(k8s_type_available) 129 | return result 130 | 131 | def handler(self, signum: int, *args: str) -> None: 132 | if signum in [signal.SIGTERM]: 133 | self.logger.info('Signal handler called with signal %s... stopping (max %s seconds)' % (signum, 3)) 134 | exit_flag.set() 135 | for thread in self.manage_threads: 136 | thread.join(timeout=3) 137 | self.logger.info('All threads exited... exit check_kubernetesd') 138 | sys.exit(0) 139 | elif signum in [signal.SIGUSR1]: 140 | self.logger.info('=== Listing count of data hold in CheckKubernetesDaemon.data ===') 141 | with self.thread_lock: 142 | for r, d in self.data.items(): 143 | for obj_name, obj_d in d.objects.items(): 144 | self.logger.info( 145 | f"resource={r}, last_sent_zabbix={obj_d.last_sent_zabbix}, " + 146 | f"last_sent_web={obj_d.last_sent_web}" 147 | ) 148 | for resource_discovered, resource_discovered_time in self.discovery_sent.items(): 149 | self.logger.info( 150 | f"resource={resource_discovered}, last_discovery_sent={resource_discovered_time}") 151 | elif signum in [signal.SIGUSR2]: 152 | self.logger.info('=== Listing all data hold in CheckKubernetesDaemon.data ===') 153 | with self.thread_lock: 154 | for r, d in self.data.items(): 155 | for obj_name, obj_d in d.objects.items(): 156 | data_print = pformat(obj_d.data, indent=2) 157 | self.logger.info(f"resource={r}, object_name={obj_name}, object_data={data_print}") 158 | 159 | def run(self) -> None: 160 | self.start_data_threads() 161 | self.start_api_info_threads() 162 | self.start_loop_send_discovery_threads() 163 | self.start_resend_threads() 164 | 165 | def start_data_threads(self) -> None: 166 | thread: WatcherThread | TimedThread 167 | for resource in self.resources: 168 | with self.thread_lock: 169 | self.data.setdefault(resource, K8sResourceManager(resource, zabbix_host=self.zabbix_host)) 170 | if resource == 'pods': 171 | self.data.setdefault('containers', K8sResourceManager('containers')) 172 | 173 | # watcher threads 174 | if resource == 'containers': 175 | pass 176 | elif resource == 'components': 177 | thread = TimedThread(resource, self.data_resend_interval, exit_flag, 178 | daemon_object=self, daemon_method='watch_data') 179 | self.manage_threads.append(thread) 180 | thread.start() 181 | elif resource == 'pvcs': 182 | thread = TimedThread(resource, self.data_resend_interval, exit_flag, 183 | daemon_object=self, daemon_method='watch_data') 184 | self.manage_threads.append(thread) 185 | thread.start() 186 | # additional looping data threads 187 | elif resource == 'services': 188 | thread = TimedThread(resource, self.data_resend_interval, exit_flag, 189 | daemon_object=self, daemon_method='report_global_data_zabbix', 190 | delay_first_run_seconds=self.discovery_interval + 5) 191 | self.manage_threads.append(thread) 192 | thread.start() 193 | elif resource == 'containers': 194 | thread = TimedThread(resource, self.data_resend_interval, exit_flag, 195 | daemon_object=self, daemon_method='report_global_data_zabbix', 196 | delay_first_run_seconds=self.discovery_interval + 5) 197 | self.manage_threads.append(thread) 198 | thread.start() 199 | else: 200 | thread = WatcherThread(resource, exit_flag, 201 | daemon_object=self, daemon_method='watch_data') 202 | self.manage_threads.append(thread) 203 | thread.start() 204 | 205 | def start_api_info_threads(self) -> None: 206 | if 'nodes' not in self.resources: 207 | # only send api heartbeat once 208 | return 209 | 210 | thread = TimedThread('api_heartbeat', self.api_zabbix_interval, exit_flag, 211 | daemon_object=self, daemon_method='send_heartbeat_info') 212 | self.manage_threads.append(thread) 213 | thread.start() 214 | 215 | def start_loop_send_discovery_threads(self) -> None: 216 | for resource in self.resources: 217 | send_discovery_thread = TimedThread(resource, self.discovery_interval, exit_flag, 218 | daemon_object=self, daemon_method='send_zabbix_discovery', 219 | delay_first_run=True, 220 | delay_first_run_seconds=30) 221 | self.manage_threads.append(send_discovery_thread) 222 | send_discovery_thread.start() 223 | 224 | def start_resend_threads(self) -> None: 225 | for resource in self.resources: 226 | resend_thread = TimedThread(resource, self.data_resend_interval, exit_flag, 227 | daemon_object=self, daemon_method='resend_data', 228 | delay_first_run=True, 229 | delay_first_run_seconds=60, 230 | ) 231 | self.manage_threads.append(resend_thread) 232 | resend_thread.start() 233 | 234 | def get_api_for_resource(self, resource: str) -> CoreV1Api | AppsV1Api | ApiextensionsV1Api: 235 | if resource in ['nodes', 'components', 'secrets', 'pods', 'services', 'pvcs']: 236 | api = self.core_v1 237 | elif resource in ['deployments', 'daemonsets', 'statefulsets']: 238 | api = self.apps_v1 239 | elif resource in ['ingresses']: 240 | api = self.extensions_v1 241 | else: 242 | raise AttributeError('No valid resource found: %s' % resource) 243 | return api 244 | 245 | def get_web_api(self) -> WebApi: 246 | if not hasattr(self, '_web_api'): 247 | self._web_api = WebApi(self.web_api_host, self.web_api_token, verify_ssl=self.web_api_verify_ssl) 248 | return self._web_api 249 | 250 | def watch_data(self, resource: str) -> None: 251 | api = self.get_api_for_resource(resource) 252 | stream_named_arguments = {"timeout_seconds": self.config.k8s_api_stream_timeout_seconds} 253 | request_named_arguments = {"_request_timeout": self.config.k8s_api_request_timeout_seconds} 254 | self.logger.info( 255 | "Watching for resource >>>%s<<< with a stream duration of %ss or request_timeout of %ss" % ( 256 | resource, 257 | self.config.k8s_api_stream_timeout_seconds, 258 | self.config.k8s_api_request_timeout_seconds) 259 | ) 260 | while True: 261 | w = watch.Watch() 262 | if resource == 'nodes': 263 | for obj in w.stream(api.list_node, **stream_named_arguments): 264 | self.watch_event_handler(resource, obj) 265 | elif resource == 'deployments': 266 | for obj in w.stream(api.list_deployment_for_all_namespaces, **stream_named_arguments): 267 | self.watch_event_handler(resource, obj) 268 | elif resource == 'daemonsets': 269 | for obj in w.stream(api.list_daemon_set_for_all_namespaces, **stream_named_arguments): 270 | self.watch_event_handler(resource, obj) 271 | elif resource == 'statefulsets': 272 | for obj in w.stream(api.list_stateful_set_for_all_namespaces, **stream_named_arguments): 273 | self.watch_event_handler(resource, obj) 274 | elif resource == 'components': 275 | # The api does not support watching on component status 276 | with self.thread_lock: 277 | for obj in api.list_component_status(watch=False, **request_named_arguments).to_dict().get('items'): 278 | self.data[resource].add_obj_from_data(obj) 279 | time.sleep(self.data_resend_interval) 280 | elif resource == 'pvcs': 281 | pvc_volumes = get_pvc_volumes_for_all_nodes(api=api, 282 | timeout=self.config.k8s_api_request_timeout_seconds, 283 | namespace_exclude_re=self.config.namespace_exclude_re, 284 | resource_manager=self.data[resource]) 285 | with self.thread_lock: 286 | for obj in pvc_volumes: 287 | self.data[resource].add_obj(obj) 288 | time.sleep(self.data_resend_interval) 289 | elif resource == 'ingresses': 290 | for obj in w.stream(api.list_ingress_for_all_namespaces, **stream_named_arguments): 291 | self.watch_event_handler(resource, obj) 292 | elif resource == 'tls': 293 | for obj in w.stream(api.list_secret_for_all_namespaces, **stream_named_arguments): 294 | self.watch_event_handler(resource, obj) 295 | elif resource == 'pods': 296 | for obj in w.stream(api.list_pod_for_all_namespaces, **stream_named_arguments): 297 | self.watch_event_handler(resource, obj) 298 | elif resource == 'services': 299 | for obj in w.stream(api.list_service_for_all_namespaces, **stream_named_arguments): 300 | self.watch_event_handler(resource, obj) 301 | else: 302 | self.logger.error("No watch handling for resource %s" % resource) 303 | time.sleep(60) 304 | self.logger.debug("Watch/fetch completed for resource >>>%s<<<, restarting" % resource) 305 | 306 | def watch_event_handler(self, resource: str, event: dict) -> None: 307 | 308 | obj = event['object'].to_dict() 309 | event_type = event['type'] 310 | name = obj['metadata']['name'] 311 | namespace = str(obj['metadata']['namespace']) 312 | 313 | if self.config.namespace_exclude_re and re.match(self.config.namespace_exclude_re, namespace): 314 | self.logger.debug(f"skip namespace {namespace}") 315 | return 316 | 317 | if self.debug_k8s_events: 318 | self.logger.info(f"{event_type} [{resource}]: {namespace}/{name} : >>>{pformat(obj, indent=2)}<<<") 319 | else: 320 | self.logger.debug(f"{event_type} [{resource}]: {namespace}/{name}") 321 | 322 | with self.thread_lock: 323 | if not self.data[resource].resource_class: 324 | self.logger.error('Could not add watch_event_handler! No resource_class for "%s"' % resource) 325 | return 326 | 327 | if event_type.lower() == 'added': 328 | with self.thread_lock: 329 | resourced_obj = self.data[resource].add_obj_from_data(obj) 330 | 331 | if resourced_obj and (resourced_obj.is_dirty_zabbix or resourced_obj.is_dirty_web): 332 | self.send_object(resource, resourced_obj, event_type, 333 | send_zabbix_data=resourced_obj.is_dirty_zabbix, 334 | send_web=resourced_obj.is_dirty_web) 335 | elif event_type.lower() == 'modified': 336 | with self.thread_lock: 337 | resourced_obj = self.data[resource].add_obj_from_data(obj) 338 | if resourced_obj and (resourced_obj.is_dirty_zabbix or resourced_obj.is_dirty_web): 339 | self.send_object(resource, resourced_obj, event_type, 340 | send_zabbix_data=resourced_obj.is_dirty_zabbix, 341 | send_web=resourced_obj.is_dirty_web) 342 | elif event_type.lower() == 'deleted': 343 | with self.thread_lock: 344 | resourced_obj = self.data[resource].del_obj(obj) 345 | if resourced_obj: 346 | self.delete_object(resource, resourced_obj) 347 | else: 348 | self.logger.info('event type "%s" not implemented' % event_type) 349 | 350 | def report_global_data_zabbix(self, resource: str) -> None: 351 | """ aggregate and report information for some speciality in resources """ 352 | if resource not in self.discovery_sent: 353 | self.logger.debug('skipping report_global_data_zabbix for %s, discovery not send yet!' % resource) 354 | return 355 | 356 | data_to_send = list() 357 | 358 | if resource == 'services': 359 | num_services = 0 360 | num_ingress_services = 0 361 | with self.thread_lock: 362 | for obj_uid, resourced_obj in self.data[resource].objects.items(): 363 | num_services += 1 364 | if resourced_obj.resource_data['is_ingress']: 365 | num_ingress_services += 1 366 | 367 | data_to_send.append( 368 | ZabbixMetric(self.zabbix_host, 'check_kubernetes[get,services,num_services]', 369 | str(num_services))) 370 | data_to_send.append( 371 | ZabbixMetric(self.zabbix_host, 'check_kubernetes[get,services,num_ingress_services]', 372 | str(num_ingress_services))) 373 | self.send_data_to_zabbix(resource, None, data_to_send) 374 | 375 | # TODO: disabled, rewrite later 376 | # elif resource == 'containers': 377 | # # aggregate pod data to containers for each namespace 378 | # with self.thread_lock: 379 | # containers: dict[str, dict[str, dict[str, object]]] = dict() 380 | # for obj_uid, resourced_obj in self.data['pods'].objects.items(): 381 | # if resourced_obj.name_space is None: 382 | # continue 383 | # if not isinstance(resourced_obj, Pod): 384 | # continue 385 | # ns = resourced_obj.name_space 386 | # containers.setdefault(resourced_obj.name_space, dict()) 387 | # 388 | # pod_data = resourced_obj.resource_data 389 | # pod_base_name = resourced_obj.base_name 390 | # try: 391 | # container_status: dict[str, dict[str, object]] = json.loads(pod_data['container_status']) 392 | # except Exception as e: 393 | # self.logger.error(e) 394 | # continue 395 | # 396 | # # aggregate container information 397 | # for container_name, container_data in container_status.items(): 398 | # containers[ns].setdefault(pod_base_name, dict()) 399 | # containers[ns][pod_base_name].setdefault(container_name, container_data) 400 | # 401 | # for k, v in containers[ns][pod_base_name][container_name].items(): 402 | # if isinstance(v, int): 403 | # containers[ns][pod_base_name][container_name][k] += container_data[k] 404 | # elif k == 'status' and container_data[k].startswith('ERROR'): 405 | # containers[ns][pod_base_name][container_name][k] = container_data[k] 406 | # 407 | # for ns, d1 in containers.items(): 408 | # for pod_base_name, d2 in d1.items(): 409 | # for container_name, container_data in d2.items(): 410 | # data_to_send += get_container_zabbix_metrics(self.zabbix_host, ns, pod_base_name, 411 | # container_name, container_data) 412 | # 413 | # self.send_data_to_zabbix(resource, None, data_to_send) 414 | 415 | def resend_data(self, resource: str) -> None: 416 | 417 | with self.thread_lock: 418 | try: 419 | metrics = list() 420 | if resource not in self.data or len(self.data[resource].objects) == 0: 421 | self.logger.warning("no resource data available for %s , stop delivery" % resource) 422 | return 423 | 424 | # Zabbix 425 | for obj_uid, obj in self.data[resource].objects.items(): 426 | zabbix_send = False 427 | if resource in self.discovery_sent: 428 | zabbix_send = True 429 | elif obj.last_sent_zabbix < (datetime.now() - timedelta(seconds=self.data_resend_interval)): 430 | self.logger.debug("resend zabbix : %s - %s/%s data because its outdated" % ( 431 | resource, obj.name_space, obj.name)) 432 | zabbix_send = True 433 | if zabbix_send: 434 | metrics += obj.get_zabbix_metrics() 435 | obj.last_sent_zabbix = datetime.now() 436 | obj.is_dirty_zabbix = False 437 | if len(metrics) > 0: 438 | if resource not in self.discovery_sent: 439 | self.logger.debug( 440 | 'skipping resend_data zabbix , discovery for %s - %s/%s not sent yet!' % ( 441 | resource, obj.name_space, obj.name)) 442 | else: 443 | self.send_data_to_zabbix(resource, metrics=metrics) 444 | 445 | # Web 446 | for obj_uid, obj in self.data[resource].objects.items(): 447 | if obj.is_dirty_web: 448 | if obj.is_unsubmitted_web(): 449 | self.send_to_web_api(resource, obj, 'ADDED') 450 | else: 451 | self.send_to_web_api(resource, obj, 'MODIFIED') 452 | else: 453 | if obj.is_unsubmitted_web(): 454 | self.send_to_web_api(resource, obj, 'ADDED') 455 | elif obj.last_sent_web < (datetime.now() - timedelta(seconds=self.data_resend_interval)): 456 | self.send_to_web_api(resource, obj, 'MODIFIED') 457 | self.logger.debug("resend web : %s/%s data because its outdated" % (resource, obj.name)) 458 | obj.last_sent_web = datetime.now() 459 | obj.is_dirty_web = False 460 | except RuntimeError as e: 461 | self.logger.warning(str(e)) 462 | 463 | def delete_object(self, resource_type: str, resourced_obj: K8sObject) -> None: 464 | # TODO: trigger zabbix discovery, srsly? 465 | self.send_to_web_api(resource_type, resourced_obj, "deleted") 466 | 467 | def send_zabbix_discovery(self, resource: str) -> None: 468 | # aggregate data and send to zabbix 469 | self.logger.info(f"send_zabbix_discovery: {resource}") 470 | with self.thread_lock: 471 | if resource not in self.data: 472 | self.logger.warning('send_zabbix_discovery: resource "%s" not in self.data... skipping!' % resource) 473 | return 474 | 475 | data = list() 476 | for obj_uid, obj in self.data[resource].objects.items(): 477 | data += obj.get_zabbix_discovery_data() 478 | 479 | if data: 480 | metric = obj.get_discovery_for_zabbix(data) 481 | self.logger.debug('send_zabbix_discovery: resource "%s": %s' % (resource, metric)) 482 | self.send_discovery_to_zabbix(resource, metric=metric) 483 | else: 484 | self.logger.warning('send_zabbix_discovery: resource "%s" has no discovery data' % resource) 485 | 486 | self.discovery_sent[resource] = datetime.now() 487 | 488 | def send_object(self, resource: str, resourced_obj: K8sObject, 489 | event_type: str, send_zabbix_data: bool = False, 490 | send_web: bool = False) -> None: 491 | # send single object for updates 492 | with self.thread_lock: 493 | if send_zabbix_data: 494 | if resourced_obj.last_sent_zabbix < datetime.now() - timedelta(seconds=self.rate_limit_seconds): 495 | self.send_data_to_zabbix(resource, obj=resourced_obj) 496 | resourced_obj.last_sent_zabbix = datetime.now() 497 | resourced_obj.is_dirty_zabbix = False 498 | else: 499 | self.logger.debug('obj >>>type: %s, name: %s/%s<<< not sending to zabbix! rate limited (%is)' % ( 500 | resource, resourced_obj.name_space, resourced_obj.name, self.rate_limit_seconds)) 501 | resourced_obj.is_dirty_zabbix = True 502 | 503 | if send_web: 504 | if resourced_obj.last_sent_web < datetime.now() - timedelta(seconds=self.rate_limit_seconds): 505 | self.send_to_web_api(resource, resourced_obj, event_type) 506 | resourced_obj.last_sent_web = datetime.now() 507 | if resourced_obj.is_dirty_web is True and not send_zabbix_data: 508 | # only set dirty False if send_to_web_api worked 509 | resourced_obj.is_dirty_web = False 510 | else: 511 | self.logger.debug('obj >>>type: %s, name: %s/%s<<< not sending to web! rate limited (%is)' % ( 512 | resource, resourced_obj.name_space, resourced_obj.name, self.rate_limit_seconds)) 513 | resourced_obj.is_dirty_web = True 514 | 515 | def send_heartbeat_info(self, resource: str) -> None: 516 | result = self.send_to_zabbix([ 517 | ZabbixMetric(self.zabbix_host, 'check_kubernetesd[discover,api]', str(int(time.time()))) 518 | ]) 519 | if result.failed > 0: 520 | self.logger.error(f"{resource} failed to send heartbeat to zabbix") 521 | else: 522 | self.logger.debug(f"{resource} successfully sent heartbeat to zabbix ") 523 | 524 | def send_to_zabbix(self, metrics: list[ZabbixMetric]) -> ZabbixResponse | DryResult: 525 | if self.zabbix_dry_run: 526 | result = DryResult() 527 | else: 528 | try: 529 | result = self.zabbix_sender.send(metrics) 530 | except Exception as e: 531 | self.logger.error(e) 532 | result = DryResult() 533 | result.failed = 1 534 | result.processed = 0 535 | 536 | if self.zabbix_debug: 537 | if len(metrics) > 1: 538 | self.logger.info('===> Sending to zabbix: >>>\n%s\n<<<' % pformat(metrics, indent=2)) 539 | else: 540 | self.logger.info('===> Sending to zabbix: >>>%s<<<' % metrics) 541 | return result 542 | 543 | def send_discovery_to_zabbix(self, resource: str, metric: ZabbixMetric = None, 544 | obj: K8sObject | None = None) -> None: 545 | if resource not in self.zabbix_resources: 546 | self.logger.warning( 547 | f'resource {resource} ist not activated, active resources are : {",".join(self.zabbix_resources)}') 548 | return 549 | 550 | if obj: 551 | discovery_data = obj.get_discovery_for_zabbix(metric) 552 | if not discovery_data: 553 | self.logger.warning('No discovery_data for obj %s, not sending to zabbix!' % obj.uid) 554 | return 555 | 556 | discovery_key = 'check_kubernetesd[discover,' + resource + ']' 557 | result = self.send_to_zabbix([ZabbixMetric(host=self.zabbix_host, key=discovery_key, value=discovery_data)]) 558 | if result.failed > 0: 559 | self.logger.error("failed to sent zabbix discovery: %s : >>>%s<<<" % (discovery_key, discovery_data)) 560 | elif self.zabbix_debug: 561 | self.logger.info("successfully sent zabbix discovery: %s >>>>%s<<<" % (discovery_key, discovery_data)) 562 | elif metric: 563 | result = self.send_to_zabbix([metric]) 564 | if result.failed > 0: 565 | self.logger.error("failed to sent mass zabbix discovery: >>>%s<<<" % metric) 566 | elif self.zabbix_debug: 567 | self.logger.info("successfully sent mass zabbix discovery: >>>%s<<<" % metric) 568 | else: 569 | self.logger.warning('No obj or metrics found for send_discovery_to_zabbix [%s]' % resource) 570 | 571 | def send_data_to_zabbix(self, resource: str, obj: K8sObject | None = None, 572 | metrics: list[ZabbixMetric] | None = None) -> None: 573 | if metrics is None: 574 | metrics = list() 575 | if resource not in self.zabbix_resources: 576 | return 577 | 578 | if obj and len(metrics) == 0: 579 | metrics = obj.get_zabbix_metrics() 580 | 581 | if len(metrics) == 0 and obj: 582 | self.logger.debug('No zabbix metrics to send for %s: %s' % (obj.uid, metrics)) 583 | return 584 | elif len(metrics) == 0: 585 | self.logger.debug('No zabbix metrics or no obj found for [%s]' % resource) 586 | return 587 | 588 | if self.zabbix_single_debug: 589 | for metric in metrics: 590 | result = self.send_to_zabbix([metric]) 591 | if result.failed > 0: 592 | self.logger.error("failed to sent zabbix items: %s", metric) 593 | else: 594 | self.logger.info("successfully sent zabbix items: %s", metric) 595 | else: 596 | result = self.send_to_zabbix(metrics) 597 | if result.failed > 0: 598 | self.logger.error("failed to sent %s zabbix items, processed %s items [%s: %s]" 599 | % (result.failed, result.processed, resource, obj.name if obj else 'metrics')) 600 | self.logger.debug(metrics) 601 | else: 602 | self.logger.debug("successfully sent %s zabbix items [%s: %s]" % ( 603 | len(metrics), resource, obj.name if obj else 'metrics')) 604 | 605 | def send_to_web_api(self, resource: str, obj: K8sObject, action: str) -> None: 606 | if resource not in self.web_api_resources: 607 | return 608 | 609 | if self.web_api_enable: 610 | api = self.get_web_api() 611 | data_to_send = obj.resource_data 612 | data_to_send['cluster'] = self.web_api_cluster 613 | 614 | api.send_data(resource, data_to_send, action) 615 | else: 616 | self.logger.debug("suppressing submission of %s %s/%s" % (resource, obj.name_space, obj.name)) 617 | -------------------------------------------------------------------------------- /base/timed_threads.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import threading 3 | import time 4 | 5 | from typing import TYPE_CHECKING 6 | 7 | if TYPE_CHECKING: 8 | from base.daemon_thread import CheckKubernetesDaemon 9 | 10 | 11 | class TimedThread(threading.Thread): 12 | stop_thread = False 13 | restart_thread = False 14 | daemon = True 15 | 16 | # TODO: change default of delay_first_run_seconds to 120 seconds 17 | def __init__(self, resource: str, interval: int, 18 | exit_flag: threading.Event, 19 | daemon_object: 'CheckKubernetesDaemon', 20 | daemon_method: str, 21 | delay_first_run: bool = False, 22 | delay_first_run_seconds: int = 60): 23 | self.cycle_interval_seconds = interval 24 | self.exit_flag = exit_flag 25 | self.resource = resource 26 | self.daemon_object = daemon_object 27 | self.daemon_method = daemon_method 28 | self.delay_first_run = delay_first_run 29 | self.delay_first_run_seconds = delay_first_run_seconds 30 | threading.Thread.__init__(self, target=self.run) 31 | self.logger = logging.getLogger(__file__) 32 | 33 | def stop(self) -> None: 34 | self.logger.info('OK: Thread "' + self.resource + '" is stopping"') 35 | self.stop_thread = True 36 | 37 | def run(self) -> None: 38 | # manage first run 39 | if self.delay_first_run: 40 | self.logger.info( 41 | '%s -> %s | delaying first run by %is [interval %is]' % 42 | (self.resource, self.daemon_method, self.delay_first_run_seconds, 43 | self.cycle_interval_seconds) 44 | ) 45 | time.sleep(self.delay_first_run_seconds) 46 | try: 47 | self.run_requests(first_run=True) 48 | except Exception as e: 49 | self.logger.exception(e) 50 | 51 | # manage timed runs 52 | while not self.exit_flag.wait(self.cycle_interval_seconds): 53 | try: 54 | self.run_requests() 55 | except Exception as e: 56 | self.logger.exception(e) 57 | self.logger.warning( 58 | 'looprun failed on timed thread %s.%s [interval %is]\nback off ... retrying in %s seconds' % 59 | (self.resource, self.daemon_method, self.cycle_interval_seconds, self.cycle_interval_seconds) 60 | ) 61 | time.sleep(self.cycle_interval_seconds) 62 | 63 | self.logger.info('terminating looprun thread %s.%s' % (self.resource, self.daemon_method)) 64 | 65 | def run_requests(self, first_run: bool = False) -> None: 66 | if first_run: 67 | self.logger.debug('first looprun on timed thread %s.%s [interval %is]' % 68 | (self.resource, self.daemon_method, self.cycle_interval_seconds)) 69 | getattr(self.daemon_object, self.daemon_method)(self.resource) 70 | self.logger.debug('first looprun complete on timed thread %s.%s [interval %is]' % 71 | (self.resource, self.daemon_method, self.cycle_interval_seconds)) 72 | else: 73 | self.logger.debug('looprun on timed thread %s.%s [interval %is]' % 74 | (self.resource, self.daemon_method, self.cycle_interval_seconds)) 75 | getattr(self.daemon_object, self.daemon_method)(self.resource) 76 | self.logger.debug('looprun complete on timed thread %s.%s [interval %is]' % 77 | (self.resource, self.daemon_method, self.cycle_interval_seconds)) 78 | -------------------------------------------------------------------------------- /base/watcher_thread.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import threading 3 | 4 | from urllib3.exceptions import ProtocolError 5 | 6 | from typing import TYPE_CHECKING 7 | 8 | if TYPE_CHECKING: 9 | from base.daemon_thread import CheckKubernetesDaemon 10 | 11 | 12 | class WatcherThread(threading.Thread): 13 | stop_thread = False 14 | restart_thread = False 15 | daemon = True 16 | 17 | def __init__(self, resource: str, exit_flag: threading.Event, 18 | daemon_object: 'CheckKubernetesDaemon', 19 | daemon_method: str): 20 | self.exit_flag = exit_flag 21 | self.resource = resource 22 | self.daemon_object = daemon_object 23 | self.daemon_method = daemon_method 24 | threading.Thread.__init__(self, target=self.run) 25 | self.logger = logging.getLogger(__file__) 26 | 27 | def stop(self) -> None: 28 | self.logger.info('OK: Thread "' + self.resource + '" is stopping"') 29 | self.stop_thread = True 30 | 31 | def run(self) -> None: 32 | self.logger.info('[start thread|watch] %s -> %s' % (self.resource, self.daemon_method)) 33 | try: 34 | getattr(self.daemon_object, self.daemon_method)(self.resource) 35 | except (ProtocolError, ConnectionError) as e: 36 | self.logger.error(e) 37 | self.restart_thread = True 38 | -------------------------------------------------------------------------------- /base/web_api.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import logging 3 | 4 | from k8sobjects.k8sobject import K8S_RESOURCES 5 | 6 | logger = logging.getLogger(__file__) 7 | 8 | 9 | class WebApi: 10 | def __init__(self, api_host: str, api_token: str, verify_ssl: bool = True): 11 | self.api_host = api_host 12 | self.api_token = api_token 13 | self.verify_ssl = verify_ssl 14 | 15 | url = self.get_url() 16 | r = requests.head(url) 17 | if r.status_code in [301, 302]: 18 | self.api_host = r.headers['location'] 19 | 20 | def get_headers(self): 21 | return { 22 | 'Authorization': self.api_token, 23 | 'User-Agent': 'k8s-zabbix agent', 24 | } 25 | 26 | def get_url(self, resource=None, path_append=""): 27 | api_resource = None 28 | if resource: 29 | api_resource = K8S_RESOURCES[resource] 30 | 31 | url = self.api_host 32 | if not url.endswith('/'): 33 | url += '/' 34 | 35 | if not api_resource: 36 | return url 37 | return url + api_resource + '/' + path_append 38 | 39 | def send_data(self, resource: str, data: dict[str, str], action: str) -> None: 40 | path_append = "" 41 | if action.lower() == 'added': 42 | func = requests.post 43 | elif action.lower() == 'modified': 44 | func = requests.put 45 | elif action.lower() == 'deleted': 46 | func = requests.delete 47 | if 'name_space' in data and data["name_space"]: 48 | path_append = "%s/%s/%s/" % ( 49 | data["cluster"], 50 | data["name_space"], 51 | data["name"], 52 | ) 53 | else: 54 | path_append = "%s/%s/" % ( 55 | data["cluster"], 56 | data["name"], 57 | ) 58 | data = {} 59 | else: 60 | return 61 | 62 | url = self.get_url(resource, path_append) 63 | 64 | # empty variables are NOT sent! 65 | r = func(url, 66 | data=data, 67 | headers=self.get_headers(), 68 | verify=self.verify_ssl, 69 | allow_redirects=True) 70 | 71 | if r.status_code > 399: 72 | logger.warning('%s [%s] %s sended %s but failed data >>>%s<<< (%s)' % ( 73 | self.api_host, r.status_code, url, resource, data, action)) 74 | logger.warning(r.text) 75 | else: 76 | logger.debug('%s [%s] %s sucessfully sended %s >>>%s<<< (%s)' % ( 77 | self.api_host, r.status_code, url, resource, data, action)) 78 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #################################################################### 4 | ## Helpers 5 | 6 | notice(){ 7 | echo -e "\e[1;32m$1\e[0m" 8 | } 9 | 10 | # Parameter: 11 | # 1: cmd 12 | # Execute simple shell command, exit if errorcode of shell command != 0 13 | exec_cmd(){ 14 | local CMD="$1" 15 | echo "+ $CMD" 16 | eval "$CMD 2>&1" 17 | local RET="$?" 18 | if [ "$RET" != "0" ];then 19 | echo "ERROR: execution failed (returncode $RET)" 20 | exit 2 21 | fi 22 | return 0 23 | } 24 | 25 | get_env(){ 26 | echo -n "--env ZABBIX_SERVER=localhost " 27 | echo -n "--env ZABBIX_HOST=localhost " 28 | echo -n "--env K8S_CONFIG_TYPE=token " 29 | } 30 | #################################################################### 31 | ## MAIN 32 | 33 | DEFAULT_PHASES="build_image cleanup docu test_container" 34 | 35 | VERSION="${VERSION:-$(git describe --abbrev=0 --tags)}" 36 | TIMESTAMP="$(date --date="today" "+%Y%m%d%H%M%S")" 37 | 38 | DOCKER_SQUASH="${DOCKER_SQUASH:-true}" 39 | 40 | DELAY="35" 41 | 42 | BDIR="$(dirname $(readlink -f $0))" 43 | cd $BDIR || exit 1 44 | 45 | # PHASES 46 | build_image(){ 47 | if [ -z "$VERSION" ];then 48 | echo "ERROR: no git release tag available" 49 | exit 1 50 | fi 51 | if [ "$DOCKER_SQUASH" == "true" ];then 52 | SQUASH_OPT="--squash" 53 | notice "Squashing of image is enabled, you can disable that by 'export DOCKER_SQUASH=false'" 54 | else 55 | SQUASH_OPT="" 56 | fi 57 | 58 | exec_cmd "docker build $SQUASH_OPT -t ${IMAGE_BASE} -f Dockerfile ." 59 | SIZE="$(docker inspect $IMAGE_BASE --format='{{.Size}}')" 60 | notice "Image size $(( $SIZE / 1024 / 1024 ))MB" 61 | } 62 | 63 | test_container(){ 64 | IDENT="${IMAGE_NAME}_test" 65 | docker kill $IDENT &> /dev/null 66 | docker rm $IDENT &> /dev/null 67 | exec_cmd "docker run --rm $(get_env) -d --name $IDENT ${IMAGE_BASE} --disable_colors" 68 | sleep 10 69 | echo "====== DOCKER LOGS" 70 | docker logs --until=50s $IDENT 71 | echo "==================" 72 | exec_cmd "docker ps |grep $IDENT" 73 | exec_cmd "docker kill $IDENT" 74 | } 75 | 76 | 77 | inspect(){ 78 | IDENT="${IMAGE_NAME}_test" 79 | exec_cmd "docker run -ti --rm $(get_env) --name $IDENT ${IMAGE_BASE} /bin/sh" 80 | } 81 | 82 | 83 | cleanup(){ 84 | exec_cmd "rm -rf /tmp/${IMAGE_NAME}*" 85 | exec_cmd "docker rmi ${IMAGE_NAME} --force" 86 | } 87 | 88 | docu(){ 89 | exec_cmd "template/create_template_documentation" 90 | } 91 | 92 | publish_image(){ 93 | TIMESTAMP="$(date --date="today" "+%Y-%m-%d_%H-%M-%S")" 94 | exec_cmd "docker tag ${IMAGE_REPO}/${IMAGE_NAME}:${VERSION} ${IMAGE_REPO}/${IMAGE_NAME}:${VERSION}" 95 | exec_cmd "docker push ${IMAGE_REPO}/${IMAGE_NAME}:${VERSION}" 96 | exec_cmd "docker tag ${IMAGE_REPO}/${IMAGE_NAME}:${VERSION} ${IMAGE_REPO}/${IMAGE_NAME}:latest" 97 | exec_cmd "docker push ${IMAGE_REPO}/${IMAGE_NAME}:latest" 98 | } 99 | 100 | display_hint(){ 101 | notice "CMD:" 102 | echo 103 | echo "$0 .. " 104 | echo 105 | notice "AVAILABLE PHASES:" 106 | echo " - default" 107 | echo " ($DEFAULT_PHASES)" 108 | echo " - inspect" 109 | for PHASE in $DEFAULT_PHASES; do 110 | echo " - $PHASE " 111 | done 112 | echo " - publish_image (optional)" 113 | echo " - inspect (optional)" 114 | } 115 | 116 | 117 | if [ ${#@} -lt 2 ];then 118 | display_hint 119 | exit 2 120 | fi 121 | 122 | IMAGE_REPO="${@: -1}" 123 | if type $IMAGE_REPO &>/dev/null;then 124 | echo "ERROR: last param is not the dockerhub repo" 125 | exit 1 126 | fi 127 | 128 | PHASES="" 129 | for arg in "${@:1:$(( ${#@} - 1 ))}"; do 130 | if [ "$arg" = "default" ];then 131 | PHASES="$PHASES $DEFAULT_PHASES" 132 | else 133 | PHASES="$PHASES $arg" 134 | fi 135 | done 136 | 137 | IMAGE_NAME="k8s-zabbix" 138 | IMAGE_BASE="${IMAGE_REPO}/${IMAGE_NAME}:${VERSION}" 139 | 140 | for PHASE in $PHASES; 141 | do 142 | if ( type "$PHASE" >/dev/null 2>&1 );then 143 | notice "INFO: PHASE >>>$PHASE<<< for $IMAGE_BASE" 144 | $PHASE 145 | else 146 | notice "ERROR: no such phase : $PHASE" 147 | exit 1 148 | fi 149 | done 150 | 151 | #SIZE="$(docker inspect $IMAGE_BASE --format='{{.Size}}')" 152 | #notice "Image size $(( $SIZE / 1024 / 1024 ))MB" 153 | notice "SUCESSFULLY COMPLETED" 154 | -------------------------------------------------------------------------------- /check_kubernetesd: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ kubernetes zabbix monitoring daemon 3 | - tries to read config from file (host, port, token) 4 | - sends data to zabbix 5 | - sends data to inventory REST-API 6 | """ 7 | import argparse 8 | import faulthandler 9 | import logging 10 | import os 11 | import signal 12 | import sys 13 | from typing import List 14 | 15 | import coloredlogs as coloredlogs 16 | import sentry_sdk 17 | import time 18 | 19 | from base.daemon_thread import CheckKubernetesDaemon 20 | from base.config import Configuration 21 | 22 | KNOWN_ACTIONS = ['discover', 'get'] 23 | 24 | formatter_string = '%(asctime)s - %(threadName)s : {%(name)s:%(lineno)d} : %(levelname)s : %(message)s' 25 | formatter = logging.Formatter(formatter_string) 26 | stream = logging.StreamHandler(sys.stdout) 27 | stream.setFormatter(formatter) 28 | 29 | logger = logging.getLogger(__file__) 30 | 31 | if __name__ == '__main__': 32 | parser = argparse.ArgumentParser( 33 | description='Zabbix monitoring daemon for kubernetes' 34 | ) 35 | 36 | parser.add_argument('--show_effective_config', action='store_true', 37 | help="display the final config as environment variables" + 38 | "based env variables and ini file parameters") 39 | parser.add_argument('--show_ini', action='store_true', 40 | help="show variables as ini files parameters instead of environment variables") 41 | parser.add_argument('--disable_colors', action='store_true', help="disable colors in logoutput") 42 | parser.add_argument('ini_file', nargs="?", type=str, 43 | help="optional, use a additional inifile for configuration " + 44 | "(environment variables take precedence) or execute a " + 45 | "binary with a fully qualified file path") 46 | args = parser.parse_args() 47 | 48 | coloredlogs.install(fmt=formatter_string, isatty=(not args.disable_colors)) 49 | 50 | config = Configuration() 51 | 52 | if args.ini_file: 53 | if args.ini_file.startswith("/bin/"): 54 | os.system(args.ini_file) 55 | sys.exit(1) 56 | else: 57 | try: 58 | config.load_config_file(args.ini_file) 59 | except Exception as e: 60 | logger.fatal(e) 61 | sys.exit(1) 62 | 63 | config.load_from_environment_variables() 64 | if args.show_effective_config: 65 | config.show_effective_config(show_as_ini_variables=args.show_ini) 66 | 67 | if config.sentry_enabled: 68 | logger.info("starting with sentry DSN %s" % config.sentry_dsn) 69 | config.sentry_enabled = True 70 | if not config.sentry_dsn: 71 | print("sentry enabled but no DNS set: '%s'! ABORTING!" % config.sentry_dsn) 72 | sentry_sdk.init(config.sentry_dsn) 73 | 74 | if config.zabbix_debug: 75 | logger.info("starting with zabbix debug") 76 | config.zabbix_debug = True 77 | log = logging.getLogger('pyzabbix') 78 | log.setLevel(logging.DEBUG) 79 | else: 80 | log = logging.getLogger('pyzabbix') 81 | log.setLevel(logging.INFO) 82 | 83 | if config.debug: 84 | stream.setLevel(logging.DEBUG) 85 | logger.setLevel(logging.DEBUG) 86 | else: 87 | logger.setLevel(logging.INFO) 88 | logger.addHandler(stream) 89 | 90 | daemons: List[CheckKubernetesDaemon] = list() 91 | 92 | mgmt_daemon = CheckKubernetesDaemon(config, 93 | ['nodes'], 94 | config.discovery_interval_slow, config.resend_data_interval_slow) 95 | daemons.append(mgmt_daemon) 96 | 97 | daemons.append( 98 | CheckKubernetesDaemon(config, 99 | # ['components', 'services', 'pvcs'], 100 | ['components', 'pvcs'], 101 | config.discovery_interval_slow, config.resend_data_interval_fast)) 102 | 103 | daemons.append( 104 | CheckKubernetesDaemon(config, 105 | # ['deployments', 'statefulsets', 'daemonsets', 'pods', 'containers', 106 | # 'ingresses', 'containers', 'pods'], 107 | ['deployments', 'statefulsets', "daemonsets", "pods"], 108 | config.discovery_interval_slow, config.resend_data_interval_slow)) 109 | 110 | if config.debug_k8s_events: 111 | for daemon in daemons: 112 | daemon.debug_k8s_events = True 113 | 114 | 115 | # SIGNAL processing 116 | def _signal_handler(signum, *args): 117 | mgmt_daemon.handler(signum) 118 | 119 | 120 | def stacktraces_and_terminate(signum, frame): 121 | print("#" * 80) 122 | print("# Threadump") 123 | faulthandler.dump_traceback() 124 | print("#") 125 | print("#" * 80) 126 | sys.exit(1) 127 | 128 | 129 | signal.signal(signal.SIGQUIT, stacktraces_and_terminate) 130 | signal.signal(signal.SIGUSR1, _signal_handler) 131 | signal.signal(signal.SIGUSR2, _signal_handler) 132 | 133 | # Daemon start 134 | try: 135 | logger.info("Starting daemon threads now") 136 | for daemon in daemons: 137 | daemon.run() 138 | while True: 139 | time.sleep(10) 140 | except KeyboardInterrupt: 141 | logger.info("got SIGINT, shutting down") 142 | for daemon in daemons: 143 | daemon.handler(signal.SIGTERM) 144 | sys.exit(1) 145 | -------------------------------------------------------------------------------- /config-dev.ini: -------------------------------------------------------------------------------- 1 | ############################################################## 2 | ### Development Settings 3 | ### 4 | ### Useful for debugging and bugfixing 5 | 6 | k8s_config_type = kubeconfig 7 | 8 | verify_ssl = False 9 | 10 | # Add more debug output 11 | debug = False 12 | debug_k8s_events = False 13 | 14 | # This excludes all ressources, remove the one you want to debug 15 | resources_exclude = components, statefulsets, daemonsets, nodes, services, pvcs, deployments 16 | #resources_exclude = 17 | namespace_exclude_re = ^\d\d\d\d$ 18 | 19 | sentry_enabled = False 20 | sentry_dsn = "" 21 | 22 | zabbix_server = example.zabbix-server.com 23 | # This allows you to prevent k8s to send metric to zabbix, but collects the metrics 24 | # (This is useful for debugging) 25 | #zabbix_resources_exclude = components, statefulsets, daemonsets, nodes 26 | zabbix_resources_exclude = 27 | zabbix_host = k8s-example-host 28 | zabbix_debug = True 29 | zabbix_single_debug = False 30 | zabbix_dry_run = True 31 | 32 | web_api_enable = False 33 | web_api_resources_exclude = daemonsets, components, services, statefulsets 34 | web_api_verify_ssl = True 35 | web_api_host = https://example.api.com/api/v1/k8s 36 | web_api_token = 37 | web_api_cluster = k8s-test-cluster 38 | 39 | discovery_interval_fast = 6 40 | resend_data_interval_fast = 12 41 | 42 | discovery_interval_slow = 12 43 | resend_data_interval_slow = 24 44 | -------------------------------------------------------------------------------- /documentation/deployment_yed.graphml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | true 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | {"version":"2.0.0","theme":{"name":"light","version":"1.0.0"},"layout":"layout-hierarchic","config":{"p_useDrawingAsSketch":false,"p_selectedElementsIncrementally":false,"p_nodeToNodeDistance":30,"p_automaticEdgeGroupingEnabled":false,"p_considerNodeLabels":true,"p_edgeLabeling":1,"p_orientation":0,"p_edgeRouting":0}} 58 | 59 | 60 | 0 61 | 62 | 63 | 64 | 65 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 1 89 | 90 | 91 | 92 | 93 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 2 117 | 118 | 119 | 120 | 121 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 3 145 | 146 | 147 | 148 | 149 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 4 174 | 175 | 176 | 177 | 178 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 5 204 | 205 | 206 | 207 | 208 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 6 241 | 242 | 243 | 244 | 245 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 7 270 | 271 | 272 | 273 | 274 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | -------------------------------------------------------------------------------- /documentation/deployment_yed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zabbix-tooling/k8s-zabbix/bb5e256133c7723cae4b740d9da9d869019804ca/documentation/deployment_yed.png -------------------------------------------------------------------------------- /documentation/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 19 | 21 | 43 | 45 | 46 | 48 | image/svg+xml 49 | 51 | 52 | 53 | 54 | 55 | 60 | 64 | 66 | 74 | 83 | 84 | 85 | 88 | 93 | 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /documentation/template/custom_service_kubernetes.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 33 | 34 |

Macro Overview

35 | 36 | The following macros can be overloaded on host level. 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 |
NameDefault
{$CONFIG_NAME}default
{$DISK_USAGE_ABOVE_1TB_MINFREE_GBYTES_ALARM}50
{$DISK_USAGE_ABOVE_1TB_MINFREE_GBYTES_WARN}50
{$DISK_USAGE_PERCENT_ALARM}97
{$DISK_USAGE_PERCENT_WARN}95
{$DISK_USAGE_REMAINING_DAYS_WARN}5
{$POD_RESTART_AVERAGE_LIMIT_30M}2
{$POD_RESTART_WARN_LIMIT_30M}0
{$TLS_MIN_VALID_DAYS}35
69 |

Static Elements

70 |

Trigger Overview

71 | 72 | 73 | 74 | 75 |
NameDescriptionPriorityExpressionDependencies
76 |

Graph Overview

77 | 78 | 79 | 80 | 81 | 82 | 84 | 85 |
NameElements
Servicescheck_kubernetes[get,services,num_ingress_services]
check_kubernetes[get,services,num_services]
83 |
86 |

Item Overview

87 | 88 | 89 | 90 | 91 | 92 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 |
TypeNameKeyDescriptionInterval (sec)History DaysTrend Days
TRAPCluster API Monitoring Heartbeatcheck_kubernetesd[discover,api]This item just receives a periodic epoch timestamp from k8s-zabbix. 93 | This is used to ensure that k8s-zabbix is still active.014d0
TRAPCluster Items Sendcheck_kubernetesd[get,items]014d0d
TRAPNumber of ingress servicescheck_kubernetes[get,services,num_ingress_services]07d
TRAPNumber of servicescheck_kubernetes[get,services,num_services]07d
105 |

Discovery rule "Custom - Service - Kubernetes - Components"

106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 |
NameValue
NameCustom - Service - Kubernetes - Components
Keycheck_kubernetesd[discover,components]
TypeTRAP
Delay0
123 |

Trigger Overview

124 | 125 | 126 | 127 | 128 |
NameDescriptionPriorityExpressionDependencies
129 |

Graph Overview

130 | 131 | 132 | 133 | 134 |
NameElements
135 |

Item Overview

136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 |
TypeNameKeyDescriptionInterval (sec)History DaysTrend Days
TRAPComponent {#NAME} - available_statuscheck_kubernetesd[get,components,{#NAME},available_status]00
144 |

Discovery rule "Custom - Service - Kubernetes - Containers"

145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 |
NameValue
NameCustom - Service - Kubernetes - Containers
Keycheck_kubernetesd[discover,containers]
TypeTRAP
Delay0
162 |

Trigger Overview

163 | 164 | 165 | 166 | 167 |
NameDescriptionPriorityExpressionDependencies
168 |

Graph Overview

169 | 170 | 171 | 172 | 173 | 174 | 176 | 177 |
NameElements
Pod {#NAMESPACE} / {#NAME} - Launch Statisticscheck_kubernetesd[get,containers,{#NAMESPACE},{#NAME},{#CONTAINER},ready]
check_kubernetesd[get,containers,{#NAMESPACE},{#NAME},{#CONTAINER},not_ready]
check_kubernetesd[get,containers,{#NAMESPACE},{#NAME},{#CONTAINER},restart_count]
175 |
178 |

Item Overview

179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 |
TypeNameKeyDescriptionInterval (sec)History DaysTrend Days
TRAP{#NAMESPACE} / {#NAME} / {#CONTAINER} - not_readycheck_kubernetesd[get,containers,{#NAMESPACE},{#NAME},{#CONTAINER},not_ready]014d
TRAP{#NAMESPACE} / {#NAME} / {#CONTAINER} - readycheck_kubernetesd[get,containers,{#NAMESPACE},{#NAME},{#CONTAINER},ready]014d
TRAP{#NAMESPACE} / {#NAME} / {#CONTAINER} - restart_countcheck_kubernetesd[get,containers,{#NAMESPACE},{#NAME},{#CONTAINER},restart_count]014d
TRAP{#NAMESPACE} / {#NAME} / {#CONTAINER} - statuscheck_kubernetesd[get,containers,{#NAMESPACE},{#NAME},{#CONTAINER},status]014d0
196 |

Discovery rule "Custom - Service - Kubernetes - Daemonsets"

197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 |
NameValue
NameCustom - Service - Kubernetes - Daemonsets
Keycheck_kubernetesd[discover,daemonsets]
TypeTRAP
Delay0
214 |

Trigger Overview

215 | 216 | 217 | 218 | 219 | 220 | 222 | 223 |
NameDescriptionPriorityExpressionDependencies
Daemonset {#NAMESPACE} / {#NAME} - available_status failed: {ITEM.LASTVALUE1}Raise a alarm if the available status is not rrue and no deployment is currently in progress. 221 | (the generation has not changed)Averagefind(/Custom - Service - Kubernetes/check_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},available_status],,"like","OK")<>1 and (last(/Custom - Service - Kubernetes/check_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},observed_generation]) = last(/Custom - Service - Kubernetes/check_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},observed_generation]))
224 |

Graph Overview

225 | 226 | 227 | 228 | 229 | 230 | 232 | 233 | 234 | 236 | 237 |
NameElements
Daemonset {#NAMESPACE} / {#NAME} - Misccheck_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},collision_count]
check_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},observed_generation]
231 |
Daemonset {#NAMESPACE} / {#NAME} - Replicascheck_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},number_unavailable]
check_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},updated_number_scheduled]
check_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},current_number_scheduled]
check_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},desired_number_scheduled]
check_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},number_available]
check_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},number_misscheduled]
check_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},number_ready]
check_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},number_unavailable]
check_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},updated_number_scheduled]
235 |
238 |

Item Overview

239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 |
TypeNameKeyDescriptionInterval (sec)History DaysTrend Days
TRAPDS {#NAMESPACE} / {#NAME} - available_statuscheck_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},available_status]05d0
TRAPDS {#NAMESPACE} / {#NAME} - collision_countcheck_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},collision_count]03d
TRAPDS {#NAMESPACE} / {#NAME} - current_number_scheduledcheck_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},current_number_scheduled]03d
TRAPDS {#NAMESPACE} / {#NAME} - desired_number_scheduled'check_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},desired_number_scheduled]03d
TRAPDS {#NAMESPACE} / {#NAME} - number_availablecheck_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},number_available]03d
TRAPDS {#NAMESPACE} / {#NAME} - number_misscheduledcheck_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},number_misscheduled]03d
TRAPDS {#NAMESPACE} / {#NAME} - number_readycheck_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},number_ready]03d
TRAPDS {#NAMESPACE} / {#NAME} - number_unavailablecheck_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},number_unavailable]03d
TRAPDS {#NAMESPACE} / {#NAME} - observed_generationcheck_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},observed_generation]03d
TRAPDS {#NAMESPACE} / {#NAME} - updated_number_scheduledcheck_kubernetesd[get,daemonsets,{#NAMESPACE},{#NAME},updated_number_scheduled]03d
274 |

Discovery rule "Custom - Service - Kubernetes - Deployments"

275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 |
NameValue
NameCustom - Service - Kubernetes - Deployments
Keycheck_kubernetesd[discover,deployments]
TypeTRAP
Delay0
292 |

Trigger Overview

293 | 294 | 295 | 296 | 297 | 298 | 300 | 301 |
NameDescriptionPriorityExpressionDependencies
Deployment {#NAMESPACE} / {#NAME} - available_status failed: {ITEM.LASTVALUE1}Raise a alarm if the available status is not rrue and no deployment is currently in progress. 299 | (the generation has not changed)Averagefind(/Custom - Service - Kubernetes/check_kubernetesd[get,deployments,{#NAMESPACE},{#NAME},available_status],,"like","OK")<>1 and (last(/Custom - Service - Kubernetes/check_kubernetesd[get,deployments,{#NAMESPACE},{#NAME},observed_generation]) = last(/Custom - Service - Kubernetes/check_kubernetesd[get,deployments,{#NAMESPACE},{#NAME},observed_generation]))
302 |

Graph Overview

303 | 304 | 305 | 306 | 307 | 308 | 310 | 311 | 312 | 314 | 315 |
NameElements
Deployment {#NAMESPACE} / {#NAME} - Misccheck_kubernetesd[get,deployments,{#NAMESPACE},{#NAME},collision_count]
check_kubernetesd[get,deployments,{#NAMESPACE},{#NAME},observed_generation]
309 |
Deployment {#NAMESPACE} / {#NAME} - Replicascheck_kubernetesd[get,deployments,{#NAMESPACE},{#NAME},ready_replicas]
check_kubernetesd[get,deployments,{#NAMESPACE},{#NAME},replicas]
check_kubernetesd[get,deployments,{#NAMESPACE},{#NAME},unavailable_replicas]
313 |
316 |

Item Overview

317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 |
TypeNameKeyDescriptionInterval (sec)History DaysTrend Days
TRAP{#NAMESPACE} / {#NAME} - available_replicascheck_kubernetesd[get,deployments,{#NAMESPACE},{#NAME},available_replicas]03d
TRAP{#NAMESPACE} / {#NAME} - available_statuscheck_kubernetesd[get,deployments,{#NAMESPACE},{#NAME},available_status]05d0
TRAP{#NAMESPACE} / {#NAME} - collision_countcheck_kubernetesd[get,deployments,{#NAMESPACE},{#NAME},collision_count]03d
TRAP{#NAMESPACE} / {#NAME} - observed_generationcheck_kubernetesd[get,deployments,{#NAMESPACE},{#NAME},observed_generation]03d
TRAP{#NAMESPACE} / {#NAME} - ready_replicascheck_kubernetesd[get,deployments,{#NAMESPACE},{#NAME},ready_replicas]03d
TRAP{#NAMESPACE} / {#NAME} - replicascheck_kubernetesd[get,deployments,{#NAMESPACE},{#NAME},replicas]03d
TRAP{#NAMESPACE} / {#NAME} - unavailable_replicascheck_kubernetesd[get,deployments,{#NAMESPACE},{#NAME},unavailable_replicas]03d
TRAP{#NAMESPACE} / {#NAME} - updated_replicascheck_kubernetesd[get,deployments,{#NAMESPACE},{#NAME},updated_replicas]03d
346 |

Discovery rule "Custom - Service - Kubernetes - Nodes"

347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 |
NameValue
NameCustom - Service - Kubernetes - Nodes
Keycheck_kubernetesd[discover,nodes]
TypeTRAP
Delay0
364 |

Trigger Overview

365 | 366 | 367 | 368 | 369 |
NameDescriptionPriorityExpressionDependencies
370 |

Graph Overview

371 | 372 | 373 | 374 | 375 | 376 | 378 | 379 | 380 | 382 | 383 | 384 | 386 | 387 | 388 | 390 | 391 |
NameElements
Node {#NAME} - CPUscheck_kubernetesd[get,nodes,{#NAME},allocatable.cpu]
check_kubernetesd[get,nodes,{#NAME},capacity.cpu]
377 |
Node {#NAME} - Ephemeral-Storagecheck_kubernetesd[get,nodes,{#NAME},allocatable.ephemeral-storage]
check_kubernetesd[get,nodes,{#NAME},capacity.ephemeral-storage]
381 |
Node {#NAME} - Memorycheck_kubernetesd[get,nodes,{#NAME},allocatable.memory]
check_kubernetesd[get,nodes,{#NAME},capacity.memory]
385 |
Node {#NAME} - Podscheck_kubernetesd[get,nodes,{#NAME},allocatable.pods]
check_kubernetesd[get,nodes,{#NAME},capacity.pods]
389 |
392 |

Item Overview

393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 |
TypeNameKeyDescriptionInterval (sec)History DaysTrend Days
TRAP{#NAME} - allocatable.cpuscheck_kubernetesd[get,nodes,{#NAME},allocatable.cpu]0
TRAP{#NAME} - allocatable.ephemeral-storagecheck_kubernetesd[get,nodes,{#NAME},allocatable.ephemeral-storage]0
TRAP{#NAME} - allocatable.memorycheck_kubernetesd[get,nodes,{#NAME},allocatable.memory]0
TRAP{#NAME} - allocatable.podscheck_kubernetesd[get,nodes,{#NAME},allocatable.pods]0
TRAP{#NAME} - available_statuscheck_kubernetesd[get,nodes,{#NAME},available_status]00
TRAP{#NAME} - capacity.cpuscheck_kubernetesd[get,nodes,{#NAME},capacity.cpu]0
TRAP{#NAME} - capacity.ephemeral-storagecheck_kubernetesd[get,nodes,{#NAME},capacity.ephemeral-storage]0
TRAP{#NAME} - capacity.memorycheck_kubernetesd[get,nodes,{#NAME},capacity.memory]0
TRAP{#NAME} - capacity.podscheck_kubernetesd[get,nodes,{#NAME},capacity.pods]0
TRAP{#NAME} - condition_status_failedcheck_kubernetesd[get,nodes,{#NAME},condition_status_failed]00
428 |

Discovery rule "Custom - Service - Kubernetes - PVCs"

429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 |
NameValue
NameCustom - Service - Kubernetes - PVCs
Keycheck_kubernetesd[discover,pvcs]
TypeTRAP
Delay0
446 |

Trigger Overview

447 | 448 | 449 | 450 | 451 |
NameDescriptionPriorityExpressionDependencies
452 |

Graph Overview

453 | 454 | 455 | 456 | 457 | 458 | 460 | 461 | 462 | 464 | 465 |
NameElements
PV {#NAMESPACE} / {#NAME} iNodescheck_kubernetesd[get,pvcs,{#NAMESPACE},{#NAME},inodesUsed]
check_kubernetesd[get,pvcs,{#NAMESPACE},{#NAME},inodes]
check_kubernetesd[get,pvcs,{#NAMESPACE},{#NAME},inodesUsedPercentage]
459 |
PV {#NAMESPACE} / {#NAME} Spacecheck_kubernetesd[get,pvcs,{#NAMESPACE},{#NAME},usedBytes]
check_kubernetesd[get,pvcs,{#NAMESPACE},{#NAME},capacityBytes]
check_kubernetesd[get,pvcs,{#NAMESPACE},{#NAME},usedBytesPercentage]
463 |
466 |

Item Overview

467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 |
TypeNameKeyDescriptionInterval (sec)History DaysTrend Days
CALCULATEDPV {#NAMESPACE} / {#NAME} - capacityBytesRemainingTime7dcheck_kubernetesd[get,pvcs,{#NAMESPACE},{#NAME},capacityBytesRemainingTime7d]The remaining time until volume reached 100% space usage based on the growth of the last 7 days.1h7d90d
TRAPPV {#NAMESPACE} / {#NAME} - capacityBytescheck_kubernetesd[get,pvcs,{#NAMESPACE},{#NAME},capacityBytes]07d
CALCULATEDPV {#NAMESPACE} / {#NAME} - inodesRemainingTime7dcheck_kubernetesd[get,pvcs,{#NAMESPACE},{#NAME},inodesRemainingTime7d]The remaining time until volume reached 100% inode usage based on the growth of the last 7 days.7d90d
TRAPPV {#NAMESPACE} / {#NAME} - inodesUsedPercentagecheck_kubernetesd[get,pvcs,{#NAMESPACE},{#NAME},inodesUsedPercentage]07d
TRAPPV {#NAMESPACE} / {#NAME} - inodesUsedcheck_kubernetesd[get,pvcs,{#NAMESPACE},{#NAME},inodesUsed]07d
TRAPPV {#NAMESPACE} / {#NAME} - inodescheck_kubernetesd[get,pvcs,{#NAMESPACE},{#NAME},inodes]07d
TRAPPV {#NAMESPACE} / {#NAME} - Nodenamecheck_kubernetesd[get,pvcs,{#NAMESPACE},{#NAME},nodename]014d0
TRAPPV {#NAMESPACE} / {#NAME} - usedBytesPercentagecheck_kubernetesd[get,pvcs,{#NAMESPACE},{#NAME},usedBytesPercentage]07d
TRAPPV {#NAMESPACE} / {#NAME} - usedBytescheck_kubernetesd[get,pvcs,{#NAMESPACE},{#NAME},usedBytes]07d
499 |

Discovery rule "Custom - Service - Kubernetes - Services"

500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 |
NameValue
NameCustom - Service - Kubernetes - Services
Keycheck_kubernetesd[discover,services]
TypeTRAP
Delay0
517 |

Trigger Overview

518 | 519 | 520 | 521 | 522 |
NameDescriptionPriorityExpressionDependencies
523 |

Graph Overview

524 | 525 | 526 | 527 | 528 |
NameElements
529 |

Item Overview

530 | 531 | 532 | 533 | 534 |
TypeNameKeyDescriptionInterval (sec)History DaysTrend Days
535 |

Discovery rule "Custom - Service - Kubernetes - Statefulsets"

536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 |
NameValue
NameCustom - Service - Kubernetes - Statefulsets
Keycheck_kubernetesd[discover,statefulsets]
TypeTRAP
Delay0
553 |

Trigger Overview

554 | 555 | 556 | 557 | 558 | 559 | 561 | 562 |
NameDescriptionPriorityExpressionDependencies
Statefulset {#NAMESPACE} / {#NAME} - available_status failed: {ITEM.LASTVALUE1}Raise a alarm if the available status is not rrue and no deployment is currently in progress. 560 | (the generation has not changed)Averagefind(/Custom - Service - Kubernetes/check_kubernetesd[get,statefulsets,{#NAMESPACE},{#NAME},available_status],,"like","OK")<>1 and (last(/Custom - Service - Kubernetes/check_kubernetesd[get,statefulsets,{#NAMESPACE},{#NAME},observed_generation]) = last(/Custom - Service - Kubernetes/check_kubernetesd[get,statefulsets,{#NAMESPACE},{#NAME},observed_generation]))
563 |

Graph Overview

564 | 565 | 566 | 567 | 568 | 569 | 571 | 572 | 573 | 575 | 576 |
NameElements
Statefulset {#NAMESPACE} / {#NAME} - Misccheck_kubernetesd[get,statefulsets,{#NAMESPACE},{#NAME},collision_count]
check_kubernetesd[get,statefulsets,{#NAMESPACE},{#NAME},observed_generation]
570 |
Statefulset {#NAMESPACE} / {#NAME} - Replicascheck_kubernetesd[get,statefulsets,{#NAMESPACE},{#NAME},ready_replicas]
check_kubernetesd[get,statefulsets,{#NAMESPACE},{#NAME},replicas]
574 |
577 |

Item Overview

578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | 599 | 600 | 601 | 602 | 603 | 604 | 605 | 606 |
TypeNameKeyDescriptionInterval (sec)History DaysTrend Days
TRAPSTS {#NAMESPACE} / {#NAME} - available_statuscheck_kubernetesd[get,statefulsets,{#NAMESPACE},{#NAME},available_status]05d0
TRAPSTS {#NAMESPACE} / {#NAME} - collision_countcheck_kubernetesd[get,statefulsets,{#NAMESPACE},{#NAME},collision_count]03d
TRAPSTS {#NAMESPACE} / {#NAME} - current_replicascheck_kubernetesd[get,statefulsets,{#NAMESPACE},{#NAME},current_replicas]03d
TRAPSTS {#NAMESPACE} / {#NAME} - current_revisioncheck_kubernetesd[get,statefulsets,{#NAMESPACE},{#NAME},current_revision]05d0
TRAPSTS {#NAMESPACE} / {#NAME} - observed_generationcheck_kubernetesd[get,statefulsets,{#NAMESPACE},{#NAME},observed_generation]03d
TRAPSTS {#NAMESPACE} / {#NAME} - ready_replicascheck_kubernetesd[get,statefulsets,{#NAMESPACE},{#NAME},ready_replicas]03d
TRAPSTS {#NAMESPACE} / {#NAME} - replicascheck_kubernetesd[get,statefulsets,{#NAMESPACE},{#NAME},replicas]03d
TRAPSTS {#NAMESPACE} / {#NAME} - updated_replicascheck_kubernetesd[get,statefulsets,{#NAMESPACE},{#NAME},updated_replicas]03d
607 |

Discovery rule "Custom - Service - Kubernetes - TLS"

608 | 609 | 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | 618 | 619 | 620 | 621 | 622 | 623 | 624 |
NameValue
NameCustom - Service - Kubernetes - TLS
Keycheck_kubernetesd[discover,tls]
TypeTRAP
Delay0
625 |

Trigger Overview

626 | 627 | 628 | 629 | 630 |
NameDescriptionPriorityExpressionDependencies
631 |

Graph Overview

632 | 633 | 634 | 635 | 636 |
NameElements
637 |

Item Overview

638 | 639 | 640 | 641 | 642 | 643 | 644 | 645 |
TypeNameKeyDescriptionInterval (sec)History DaysTrend Days
TRAPTLS {#NAMESPACE} / {#NAME} - tls valid_dayscheck_kubernetesd[get,tls,{#NAMESPACE},{#NAME},valid_days]03d
646 | 647 | 648 | -------------------------------------------------------------------------------- /k8sobjects/__init__.py: -------------------------------------------------------------------------------- 1 | from .deployment import * 2 | from .daemonset import * 3 | from .ingress import * 4 | from .node import * 5 | from .pvc import * 6 | from .pod import * 7 | from .service import * 8 | from .statefulset import * 9 | from .secret import * 10 | from .component import * 11 | from .container import * 12 | -------------------------------------------------------------------------------- /k8sobjects/component.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from pyzabbix import ZabbixMetric 4 | 5 | from .k8sobject import K8sObject 6 | 7 | logger = logging.getLogger(__file__) 8 | 9 | 10 | class Component(K8sObject): 11 | object_type = 'component' 12 | 13 | @property 14 | def resource_data(self): 15 | data = super().resource_data 16 | 17 | failed_conds = [] 18 | 19 | # exclude 20 | if self.name in ["controller-manager", "scheduler"]: 21 | # faked, unfortinately k8s prpject broke these checks https://github.com/kubernetes/kubernetes/issues/19570 22 | data['available_status'] = 'OK: faked' 23 | elif self.data['conditions']: 24 | available_conds = [x for x in self.data['conditions'] if x['type'].lower() == "healthy"] 25 | if available_conds: 26 | for cond in available_conds: 27 | if cond['status'] != 'True': 28 | failed_conds.append(cond['type']) 29 | 30 | if len(failed_conds) > 0: 31 | data['available_status'] = 'ERROR: ' + (','.join(failed_conds)) 32 | else: 33 | data['available_status'] = 'OK' 34 | else: 35 | data['available_status'] = 'OK' 36 | 37 | return data 38 | 39 | def get_zabbix_metrics(self): 40 | data_to_send = list() 41 | 42 | data_to_send.append(ZabbixMetric( 43 | self.zabbix_host, 44 | 'check_kubernetesd[get,components,%s,available_status]' % self.name, 45 | self.resource_data['available_status'])) 46 | 47 | return data_to_send 48 | -------------------------------------------------------------------------------- /k8sobjects/container.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from pyzabbix import ZabbixMetric 4 | 5 | logger = logging.getLogger(__file__) 6 | 7 | 8 | def get_container_zabbix_metrics(zabbix_host: str, name_space: str, 9 | pod_base_name: str, container_name: str, 10 | data: dict[str, str]) -> list[ZabbixMetric]: 11 | return [ZabbixMetric( 12 | zabbix_host, 'check_kubernetesd[get,containers,%s,%s,%s,ready]' % (name_space, pod_base_name, container_name), 13 | data["ready"], 14 | ), ZabbixMetric( 15 | zabbix_host, 16 | 'check_kubernetesd[get,containers,%s,%s,%s,not_ready]' % (name_space, pod_base_name, container_name), 17 | data["not_ready"], 18 | ), ZabbixMetric( 19 | zabbix_host, 20 | 'check_kubernetesd[get,containers,%s,%s,%s,restart_count]' % (name_space, pod_base_name, container_name), 21 | data["restart_count"], 22 | ), ZabbixMetric( 23 | zabbix_host, 'check_kubernetesd[get,containers,%s,%s,%s,status]' % (name_space, pod_base_name, container_name), 24 | data["status"], 25 | )] 26 | -------------------------------------------------------------------------------- /k8sobjects/daemonset.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from pyzabbix import ZabbixMetric 4 | 5 | from .k8sobject import K8sObject, transform_value 6 | 7 | logger = logging.getLogger(__file__) 8 | 9 | 10 | # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#daemonset-v1-apps 11 | # same as statefulset 12 | # 'status': { 'collision_count': None, X 13 | # 'conditions': None, X 14 | # 'current_number_scheduled': 8, 15 | # 'desired_number_scheduled': 8, 16 | # 'number_available': 8, 17 | # 'number_misscheduled': 0, 18 | # 'number_ready': 8, 19 | # 'number_unavailable': None, 20 | # 'observed_generation': 8, 21 | # 'updated_number_scheduled': 8}} 22 | 23 | class Daemonset(K8sObject): 24 | object_type = 'daemonset' 25 | 26 | @property 27 | def resource_data(self): 28 | data = super().resource_data 29 | 30 | for status_type in self.data['status']: 31 | if status_type in ['conditions']: 32 | continue 33 | data.update({status_type: transform_value(self.data['status'][status_type])}) 34 | 35 | failed_conds = [] 36 | if self.data['status']['conditions']: 37 | available_conds = [x for x in self.data['status']['conditions'] if x['type'].lower() == "available"] 38 | if available_conds: 39 | for cond in available_conds: 40 | if cond['status'] != 'True': 41 | failed_conds.append(cond['type']) 42 | 43 | if len(failed_conds) > 0: 44 | data['available_status'] = 'ERROR: ' + (','.join(failed_conds)) 45 | else: 46 | data['available_status'] = 'OK' 47 | else: 48 | data['available_status'] = 'OK' 49 | 50 | return data 51 | 52 | def get_zabbix_metrics(self): 53 | data_to_send = [] 54 | 55 | for status_type in self.data['status']: 56 | if status_type in ['conditions', 'update_revision']: 57 | continue 58 | 59 | data_to_send.append(ZabbixMetric( 60 | self.zabbix_host, 61 | 'check_kubernetesd[get,daemonsets,%s,%s,%s]' % (self.name_space, self.name, status_type), 62 | transform_value(self.resource_data[status_type])) 63 | ) 64 | 65 | data_to_send.append(ZabbixMetric( 66 | self.zabbix_host, 67 | 'check_kubernetesd[get,daemonsets,%s,%s,available_status]' % (self.name_space, self.name), 68 | self.resource_data['available_status'])) 69 | 70 | return data_to_send 71 | -------------------------------------------------------------------------------- /k8sobjects/deployment.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from pyzabbix import ZabbixMetric 4 | from .k8sobject import K8sObject, transform_value 5 | 6 | logger = logging.getLogger(__file__) 7 | 8 | 9 | class Deployment(K8sObject): 10 | object_type = 'deployment' 11 | 12 | @property 13 | def resource_data(self): 14 | data = super().resource_data 15 | 16 | for status_type in self.data['status']: 17 | if status_type == 'conditions': 18 | continue 19 | data.update({status_type: transform_value(self.data['status'][status_type])}) 20 | 21 | failed_conds = [] 22 | if self.data['status']['conditions']: 23 | available_conds = [x for x in self.data['status']['conditions'] if x['type'].lower() == "available"] 24 | if available_conds: 25 | for cond in available_conds: 26 | if cond['status'] != 'True': 27 | failed_conds.append(cond['type']) 28 | 29 | if len(failed_conds) > 0: 30 | data['available_status'] = 'ERROR: ' + (','.join(failed_conds)) 31 | else: 32 | data['available_status'] = 'OK' 33 | else: 34 | data['available_status'] = 'OK' 35 | return data 36 | 37 | def get_zabbix_metrics(self): 38 | data_to_send = [] 39 | 40 | for status_type in self.data['status']: 41 | if status_type == 'conditions': 42 | continue 43 | 44 | data_to_send.append(ZabbixMetric( 45 | self.zabbix_host, 46 | 'check_kubernetesd[get,deployments,%s,%s,%s]' % (self.name_space, self.name, status_type), 47 | transform_value(self.resource_data[status_type])) 48 | ) 49 | 50 | data_to_send.append(ZabbixMetric( 51 | self.zabbix_host, 52 | 'check_kubernetesd[get,deployments,%s,%s,available_status]' % (self.name_space, self.name), 53 | self.resource_data['available_status'])) 54 | 55 | return data_to_send 56 | -------------------------------------------------------------------------------- /k8sobjects/ingress.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .k8sobject import K8sObject 4 | 5 | logger = logging.getLogger(__file__) 6 | 7 | 8 | class Ingress(K8sObject): 9 | object_type = 'ingress' 10 | 11 | @property 12 | def resource_data(self): 13 | data = super().resource_data 14 | return data 15 | 16 | def get_zabbix_metrics(self): 17 | data = self.resource_data 18 | return data 19 | -------------------------------------------------------------------------------- /k8sobjects/k8sobject.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import hashlib 3 | import json 4 | import logging 5 | import re 6 | 7 | from typing import TYPE_CHECKING, TypedDict 8 | 9 | if TYPE_CHECKING: 10 | from k8sobjects.k8sresourcemanager import K8sResourceManager 11 | 12 | from pyzabbix import ZabbixMetric 13 | 14 | logger = logging.getLogger(__file__) 15 | 16 | K8S_RESOURCES = dict( 17 | nodes='node', 18 | components='component', 19 | services='service', 20 | deployments='deployment', 21 | statefulsets='statefulset', 22 | daemonsets='daemonset', 23 | pods='pod', 24 | containers='container', 25 | secrets='secret', 26 | ingresses='ingress', 27 | pvcs='pvc' 28 | ) 29 | 30 | INITIAL_DATE = datetime.datetime(2000, 1, 1, 0, 0) 31 | 32 | 33 | def json_encoder(obj: object) -> str: 34 | if isinstance(obj, (datetime.date, datetime.datetime)): 35 | return obj.isoformat() 36 | raise TypeError(f"custom json_encoder: unable to encode {type(obj)}") 37 | 38 | 39 | def transform_value(value: str) -> str: 40 | if value is None: 41 | return "0" 42 | m = re.match(r'^(\d+)(Ki)$', str(value)) 43 | if m: 44 | if m.group(2) == "Ki": 45 | return str(int(float(m.group(1)) * 1024)) 46 | 47 | m = re.match(r'^(\d+)(m)$', str(value)) 48 | if m: 49 | if m.group(2) == "m": 50 | return str(float(m.group(1)) / 1000) 51 | 52 | if value is None: 53 | return "0" 54 | 55 | return value 56 | 57 | 58 | def slugit(name_space: str, name: str, maxlen: int) -> str: 59 | if name_space: 60 | slug = name_space + '/' + name 61 | else: 62 | slug = name 63 | 64 | if len(slug) <= maxlen: 65 | return slug 66 | 67 | prefix_pos = int((maxlen / 2) - 1) 68 | suffix_pos = len(slug) - int(maxlen / 2) - 2 69 | return slug[:prefix_pos] + "~" + slug[suffix_pos:] 70 | 71 | 72 | class MetadataObjectType(TypedDict): 73 | name: str 74 | namespace: str 75 | generate_name: str | None 76 | owner_references: list[dict[str, str]] 77 | 78 | 79 | class ObjectDataType(TypedDict): 80 | metadata: MetadataObjectType 81 | item: dict[str, dict] 82 | 83 | 84 | def calculate_checksum_for_dict(data: ObjectDataType) -> str: 85 | json_str = json.dumps( 86 | data, 87 | sort_keys=True, 88 | default=json_encoder, 89 | indent=2 90 | ) 91 | checksum = hashlib.md5(json_str.encode('utf-8')).hexdigest() 92 | return checksum 93 | 94 | 95 | class K8sObject: 96 | """Holds the resource data""" 97 | object_type: str = "UNDEFINED" 98 | 99 | def __init__(self, obj_data: ObjectDataType, resource: str, manager: 'K8sResourceManager'): 100 | """Get the resource data from the k8s api""" 101 | self.is_dirty_zabbix = True 102 | self.is_dirty_web = True 103 | self.last_sent_zabbix_discovery = INITIAL_DATE 104 | self.last_sent_zabbix = INITIAL_DATE 105 | self.last_sent_web = INITIAL_DATE 106 | self.resource = resource 107 | self.data = obj_data 108 | self.data_checksum = calculate_checksum_for_dict(obj_data) 109 | self.manager = manager 110 | self.zabbix_host = self.manager.zabbix_host 111 | 112 | def __str__(self) -> str: 113 | return self.uid 114 | 115 | @property 116 | def resource_data(self) -> dict[str, str]: 117 | """ customized values for k8s objects """ 118 | if self.name_space is None: 119 | raise RuntimeError("name_space is None for %s" % self.name) 120 | return dict( 121 | name=self.name, 122 | name_space=self.name_space 123 | ) 124 | 125 | @property 126 | def uid(self) -> str: 127 | if not hasattr(self, 'object_type'): 128 | raise AttributeError('No object_type set! Dont use K8sObject itself!') 129 | elif not self.name: 130 | raise AttributeError('No name set for K8sObject.uid! [%s] name_space: %s, name: %s' 131 | % (self.object_type, self.name_space, self.name)) 132 | 133 | if self.name_space: 134 | return self.object_type + '_' + self.name_space + '_' + self.name 135 | return self.object_type + '_' + self.name 136 | 137 | @property 138 | def name(self) -> str: 139 | """The name of the object""" 140 | if 'metadata' in self.data and 'name' in self.data['metadata']: 141 | return self.data['metadata']['name'] 142 | else: 143 | raise Exception(f'Could not find name in metadata for resource {self.resource}') 144 | 145 | @property 146 | def name_space(self) -> str | None: 147 | from .node import Node 148 | from .component import Component 149 | if isinstance(self, Node) or isinstance(self, Component): 150 | return None 151 | 152 | name_space = self.data.get('metadata', {}).get('namespace') 153 | if not name_space: 154 | raise Exception('Could not find name_space for obj [%s] %s' % (self.resource, self.name)) 155 | return name_space 156 | 157 | def is_unsubmitted_web(self) -> bool: 158 | return self.last_sent_web == INITIAL_DATE 159 | 160 | def is_unsubmitted_zabbix(self) -> bool: 161 | return self.last_sent_zabbix == INITIAL_DATE 162 | 163 | def is_unsubmitted_zabbix_discovery(self) -> bool: 164 | return self.last_sent_zabbix_discovery == datetime.datetime(2000, 1, 1, 0, 0) 165 | 166 | def get_zabbix_discovery_data(self) -> list[dict[str, str]]: 167 | return [{ 168 | "{#NAME}": self.name, 169 | "{#NAMESPACE}": self.name_space or "None", 170 | "{#SLUG}": slugit(self.name_space or "None", self.name, 40), 171 | }] 172 | 173 | def get_discovery_for_zabbix(self, discovery_data: list[dict[str, str]] | None) -> ZabbixMetric: 174 | if discovery_data is None: 175 | discovery_data = self.get_zabbix_discovery_data() 176 | 177 | return ZabbixMetric( 178 | self.zabbix_host, 179 | 'check_kubernetesd[discover,%s]' % self.resource, 180 | json.dumps({ 181 | 'data': discovery_data, 182 | }) 183 | ) 184 | 185 | def get_zabbix_metrics(self) -> list[ZabbixMetric]: 186 | logger.fatal(f"get_zabbix_metrics: not implemented for {self.object_type}") 187 | return [] 188 | -------------------------------------------------------------------------------- /k8sobjects/k8sresourcemanager.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import logging 3 | 4 | from k8sobjects.k8sobject import K8S_RESOURCES, K8sObject 5 | 6 | logger = logging.getLogger(__file__) 7 | 8 | 9 | class K8sResourceManager: 10 | def __init__(self, resource: str, zabbix_host: str | None = None): 11 | self.resource = resource 12 | self.zabbix_host = zabbix_host 13 | 14 | self.objects: dict[str, K8sObject] = dict() 15 | self.containers: dict = dict() # containers only used for pods 16 | 17 | mod = importlib.import_module('k8sobjects') 18 | class_label = K8S_RESOURCES[resource] 19 | self.resource_class = getattr(mod, class_label.capitalize(), None) 20 | logger.info(f"Creating new resource manager for resource {resource} with class {self.resource_class}") 21 | 22 | def add_obj_from_data(self, data: dict) -> K8sObject | None: 23 | if not self.resource_class: 24 | logger.error('No Resource Class found for "%s"' % self.resource) 25 | return None 26 | 27 | try: 28 | new_obj = self.resource_class(data, self.resource, manager=self) 29 | return self.add_obj(new_obj) 30 | except Exception as e: 31 | logger.fatal(f"Unable to add object by data : {e} - >>><{data}<<") 32 | return None 33 | 34 | def add_obj(self, new_obj: K8sObject) -> K8sObject | None: 35 | 36 | if new_obj.uid not in self.objects: 37 | # new object 38 | self.objects[new_obj.uid] = new_obj 39 | elif self.objects[new_obj.uid].data_checksum != new_obj.data_checksum: 40 | # existing object with modified data 41 | new_obj.last_sent_zabbix_discovery = self.objects[new_obj.uid].last_sent_zabbix_discovery 42 | new_obj.last_sent_zabbix = self.objects[new_obj.uid].last_sent_zabbix 43 | new_obj.last_sent_web = self.objects[new_obj.uid].last_sent_web 44 | new_obj.is_dirty_web = True 45 | new_obj.is_dirty_zabbix = True 46 | self.objects[new_obj.uid] = new_obj 47 | 48 | # return created or updated object 49 | return self.objects[new_obj.uid] 50 | 51 | def del_obj(self, obj: K8sObject) -> K8sObject | None: 52 | if not self.resource_class: 53 | logger.error('No Resource Class found for "%s"' % self.resource) 54 | return None 55 | 56 | resourced_obj = self.resource_class(obj, self.resource, manager=self) 57 | if resourced_obj.uid in self.objects: 58 | del self.objects[resourced_obj.uid] 59 | return resourced_obj 60 | -------------------------------------------------------------------------------- /k8sobjects/node.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import cachetools.func 4 | from kubernetes.client import CoreV1Api 5 | from pyzabbix import ZabbixMetric 6 | 7 | from .k8sobject import K8sObject, transform_value 8 | 9 | logger = logging.getLogger(__file__) 10 | 11 | 12 | # TODO: remove after refactoring 13 | @cachetools.func.ttl_cache(maxsize=1, ttl=60 * 10) 14 | def get_node_names(api: CoreV1Api) -> list[str]: 15 | ret = api.list_node(watch=False) 16 | node_names = [] 17 | for item in ret.items: 18 | node_names.append(item.metadata.name) 19 | return node_names 20 | 21 | 22 | class Node(K8sObject): 23 | object_type = 'node' 24 | 25 | MONITOR_VALUES = ['allocatable.cpu', 26 | 'allocatable.ephemeral-storage', 27 | 'allocatable.memory', 28 | 'allocatable.pods', 29 | 'capacity.cpu', 30 | 'capacity.ephemeral-storage', 31 | 'capacity.memory', 32 | 'capacity.pods'] 33 | 34 | @property 35 | def resource_data(self): 36 | data = super().resource_data 37 | 38 | failed_conds = [] 39 | data['condition_ready'] = False 40 | for cond in self.data['status']['conditions']: 41 | if cond['type'].lower() == "ready" and cond['status'] == 'True': 42 | data['condition_ready'] = True 43 | else: 44 | if cond['status'] == 'True': 45 | failed_conds.append(cond['type']) 46 | 47 | data['failed_conds'] = failed_conds 48 | 49 | for monitor_value in self.MONITOR_VALUES: 50 | current_indirection = self.data['status'] 51 | for key in monitor_value.split("."): 52 | current_indirection = current_indirection[key] 53 | 54 | data[monitor_value] = transform_value(current_indirection) 55 | 56 | return data 57 | 58 | def get_zabbix_metrics(self): 59 | data_to_send = list() 60 | data = self.resource_data 61 | 62 | data_to_send.append( 63 | ZabbixMetric(self.zabbix_host, 'check_kubernetesd[get,nodes,' + self.name + ',available_status]', 64 | 'not available' if data['condition_ready'] is not True else 'OK')) 65 | data_to_send.append( 66 | ZabbixMetric(self.zabbix_host, 'check_kubernetesd[get,nodes,' + self.name + ',condition_status_failed]', 67 | data['failed_conds'] if len(data['failed_conds']) > 0 else 'OK')) 68 | for monitor_value in self.MONITOR_VALUES: 69 | data_to_send.append(ZabbixMetric( 70 | self.zabbix_host, 'check_kubernetesd[get,nodes,%s,%s]' % (self.name, monitor_value), 71 | transform_value(data[monitor_value])) 72 | ) 73 | return data_to_send 74 | -------------------------------------------------------------------------------- /k8sobjects/pod.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | 4 | from k8sobjects import K8sObject 5 | 6 | logger = logging.getLogger(__file__) 7 | 8 | 9 | class Pod(K8sObject): 10 | object_type = 'pod' 11 | kind = None 12 | 13 | @property 14 | def name(self) -> str: 15 | if 'metadata' not in self.data and 'name' in self.data['metadata']: 16 | raise Exception(f'Could not find name in metadata for resource {self.resource}') 17 | 18 | if "owner_references" in self.data['metadata']: 19 | for owner_refs in self.data['metadata']['owner_references']: 20 | self.kind = owner_refs['kind'] 21 | 22 | generate_name = self.data['metadata']['generate_name'] 23 | if generate_name is not None: 24 | match self.kind: 25 | case "Job": 26 | name = re.sub(r'-\d+-$', '', generate_name) 27 | case "ReplicaSet": 28 | name = re.sub(r'-[a-f0-9]{4,}-$', '', generate_name) 29 | case _: 30 | name = re.sub(r'-$', '', generate_name) 31 | 32 | return name 33 | 34 | def get_zabbix_discovery_data(self) -> list[dict[str, str]]: 35 | data = super().get_zabbix_discovery_data() 36 | if self.kind is not None: 37 | data[0]['{#KIND}'] = self.kind 38 | return data 39 | 40 | @property 41 | def resource_data(self) -> dict[str, str]: 42 | data = super().resource_data 43 | return data 44 | 45 | def get_zabbix_metrics(self): 46 | # TODO: Temporary 47 | # data = self.resource_data 48 | data_to_send = list() 49 | return data_to_send 50 | 51 | # class Pod(K8sObject): 52 | # object_type = 'pod' 53 | # 54 | # @property 55 | # def base_name(self): 56 | # for container in self.data['spec']['containers']: 57 | # if container['name'] in self.name: 58 | # return container['name'] 59 | # return self.name 60 | # 61 | # @property 62 | # def containers(self): 63 | # containers = {} 64 | # for container in self.data['spec']['containers']: 65 | # containers.setdefault(container['name'], 0) 66 | # containers[container['name']] += 1 67 | # return containers 68 | # 69 | # @property 70 | # def resource_data(self): 71 | # data = super().resource_data 72 | # data['containers'] = json.dumps(self.containers) 73 | # container_status = dict() 74 | # data['ready'] = True 75 | # pod_data = { 76 | # "restart_count": 0, 77 | # "ready": 0, 78 | # "not_ready": 0, 79 | # "status": "OK", 80 | # } 81 | # 82 | # if "container_statuses" in self.data['status'] and self.data['status']['container_statuses']: 83 | # for container in self.data['status']['container_statuses']: 84 | # status_values = [] 85 | # container_name = container['name'] 86 | # 87 | # # this pod data 88 | # if container_name not in container_status: 89 | # container_status[container_name] = { 90 | # "restart_count": 0, 91 | # "ready": 0, 92 | # "not_ready": 0, 93 | # "status": "OK", 94 | # } 95 | # container_status[container_name]['restart_count'] += container['restart_count'] 96 | # pod_data['restart_count'] += container['restart_count'] 97 | # 98 | # if container['ready'] is True: 99 | # container_status[container_name]['ready'] += 1 100 | # pod_data['ready'] += 1 101 | # else: 102 | # container_status[container_name]['not_ready'] += 1 103 | # pod_data['not_ready'] += 1 104 | # 105 | # if container["state"] and len(container["state"]) > 0: 106 | # for status, container_data in container["state"].items(): 107 | # if container_data and status != "running": 108 | # status_values.append(status) 109 | # 110 | # if len(status_values) > 0: 111 | # container_status[container_name]['status'] = 'ERROR: ' + (','.join(status_values)) 112 | # pod_data['status'] = container_status[container_name]['status'] 113 | # data['ready'] = False 114 | # 115 | # data['container_status'] = json.dumps(container_status) 116 | # data['pod_data'] = json.dumps(pod_data) 117 | # return data 118 | # 119 | # def get_zabbix_discovery_data(self): 120 | # data = list() 121 | # for container in self.containers: 122 | # data += [{ 123 | # "{#NAMESPACE}": self.name_space, 124 | # "{#NAME}": self.base_name, 125 | # "{#CONTAINER}": container, 126 | # }] 127 | # return data 128 | # 129 | # def get_discovery_for_zabbix(self, discovery_data=None): 130 | # if discovery_data is None: 131 | # discovery_data = self.get_zabbix_discovery_data() 132 | # 133 | # return ZabbixMetric( 134 | # self.zabbix_host, 135 | # 'check_kubernetesd[discover,containers]', 136 | # json.dumps({ 137 | # 'data': discovery_data, 138 | # }) 139 | # ) 140 | # 141 | # def get_zabbix_metrics(self): 142 | # data = self.resource_data 143 | # data_to_send = list() 144 | # 145 | # # if 'status' not in data: 146 | # # logger.error(data) 147 | # # 148 | # # for k, v in pod_data.items(): 149 | # # data_to_send.append(ZabbixMetric( 150 | # # self.zabbix_host, 'check_kubernetesd[get,pods,%s,%s,%s]' % (self.name_space, self.name, k), 151 | # # v, 152 | # # )) 153 | # 154 | # return data_to_send 155 | -------------------------------------------------------------------------------- /k8sobjects/pvc.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import re 4 | 5 | from kubernetes.client import CoreV1Api 6 | from pyzabbix import ZabbixMetric 7 | 8 | from . import get_node_names 9 | from .k8sobject import K8sObject, ObjectDataType, MetadataObjectType 10 | from .k8sresourcemanager import K8sResourceManager 11 | 12 | logger = logging.getLogger(__file__) 13 | 14 | 15 | def _get_pvc_data_for_node(api: CoreV1Api, node: str, pvc_volumes: list[K8sObject], timeout_seconds: int, 16 | namespace_exclude_re: str, 17 | resource_manager: K8sResourceManager) -> list[K8sObject]: 18 | query_params: list[str] = [] 19 | form_params: list[str] = [] 20 | header_params = {} 21 | body_params = None 22 | local_var_files: dict[str, str] = {} 23 | header_params['Accept'] = api.api_client.select_header_accept( 24 | ['application/json', 'application/yaml', 'application/vnd.kubernetes.protobuf', 'application/json;stream=watch', 25 | 'application/vnd.kubernetes.protobuf;stream=watch']) # noqa: E501 26 | 27 | auth_settings = ['BearerToken'] # noqa: E501 28 | path_params = {'node': node} 29 | logger.debug(f"Getting pvc infos for node {node}") 30 | ret = api.api_client.call_api( 31 | '/api/v1/nodes/{node}/proxy/stats/summary', 32 | 'GET', 33 | path_params, 34 | query_params, 35 | header_params, 36 | body=body_params, 37 | post_params=form_params, 38 | files=local_var_files, 39 | response_type='str', # noqa: E501 40 | auth_settings=auth_settings, 41 | async_req=False, 42 | _return_http_data_only=True, 43 | _preload_content=False, 44 | _request_timeout=timeout_seconds, 45 | collection_formats={} 46 | ) 47 | 48 | loaded_json = json.loads(ret.data) 49 | 50 | for item in loaded_json['pods']: 51 | if "volume" not in item: 52 | continue 53 | pvc_volumes = _process_volume(item=item, namespace_exclude_re=namespace_exclude_re, node=node, 54 | pvc_volumes=pvc_volumes, 55 | resource_manager=resource_manager) 56 | return pvc_volumes 57 | 58 | 59 | def _process_volume(item: dict, namespace_exclude_re: str, node: str, 60 | pvc_volumes: list[K8sObject], 61 | resource_manager: K8sResourceManager) -> list[K8sObject]: 62 | for volume in item['volume']: 63 | if 'pvcRef' not in volume: 64 | continue 65 | 66 | namespace = volume['pvcRef']['namespace'] 67 | name = volume['pvcRef']['name'] 68 | 69 | if namespace_exclude_re and re.match(namespace_exclude_re, namespace): 70 | continue 71 | 72 | for check_volume in pvc_volumes: 73 | if check_volume.name_space == namespace and name == check_volume.name: 74 | logger.warning(f"pvc already exists {namespace} / {name}") 75 | 76 | metadata: MetadataObjectType = MetadataObjectType(name=name, 77 | namespace=namespace, 78 | generate_name=None, 79 | owner_references=list()) 80 | 81 | volume['nodename'] = node 82 | volume['usedBytesPercentage'] = float(float( 83 | volume['usedBytes'] / volume['capacityBytes'])) * 100 84 | 85 | volume['inodesUsedPercentage'] = float(float( 86 | volume['inodesUsed'] / volume['inodes'])) * 100 87 | 88 | for key in ['name', 'pvcRef', 'time', 'availableBytes', 'inodesFree']: 89 | volume.pop(key, None) 90 | 91 | data: ObjectDataType = ObjectDataType(metadata=metadata, item=volume) 92 | pvc = Pvc(obj_data=data, resource="pvcs", manager=resource_manager) 93 | pvc_volumes.append(pvc) 94 | 95 | return pvc_volumes 96 | 97 | 98 | def get_pvc_volumes_for_all_nodes(api: CoreV1Api, timeout: int, namespace_exclude_re: str, 99 | resource_manager: K8sResourceManager) -> list[K8sObject]: 100 | pvc_volumes: list[K8sObject] = list() 101 | for node in get_node_names(api): 102 | pvc_volumes = _get_pvc_data_for_node(api=api, node=node, 103 | pvc_volumes=pvc_volumes, 104 | timeout_seconds=timeout, 105 | namespace_exclude_re=namespace_exclude_re, 106 | resource_manager=resource_manager, 107 | ) 108 | return pvc_volumes 109 | 110 | 111 | class Pvc(K8sObject): 112 | object_type = 'pvc' 113 | 114 | @property 115 | def resource_data(self): 116 | data = super().resource_data 117 | return data 118 | 119 | def get_zabbix_metrics(self): 120 | data_to_send = list() 121 | for key, value in self.data['item'].items(): 122 | data_to_send.append( 123 | ZabbixMetric( 124 | self.zabbix_host, 125 | f'check_kubernetesd[get,pvcs,{self.name_space},{self.name},{key}]', value 126 | )) 127 | 128 | return data_to_send 129 | -------------------------------------------------------------------------------- /k8sobjects/secret.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import datetime 3 | import logging 4 | 5 | from cryptography import x509 6 | from cryptography.hazmat.backends import default_backend 7 | from pyzabbix import ZabbixMetric 8 | 9 | from .k8sobject import K8sObject 10 | 11 | logger = logging.getLogger(__file__) 12 | 13 | 14 | class Secret(K8sObject): 15 | object_type = 'secret' 16 | 17 | @property 18 | def resource_data(self): 19 | data = super().resource_data 20 | 21 | if 'data' not in self.data or not self.data['data']: 22 | logger.debug('No data for tls_cert "' + self.name_space + '/' + self.name + '"', self.data) 23 | return data 24 | 25 | if "tls.crt" not in self.data["data"]: 26 | return data 27 | 28 | base64_decode = base64.b64decode(self.data["data"]["tls.crt"]) 29 | cert = x509.load_pem_x509_certificate(base64_decode, default_backend()) 30 | data['valid_days'] = (cert.not_valid_after - datetime.datetime.now()).days 31 | return data 32 | 33 | def get_zabbix_metrics(self): 34 | data = self.resource_data 35 | data_to_send = list() 36 | if 'valid_days' not in data: 37 | return data_to_send 38 | 39 | data_to_send.append(ZabbixMetric( 40 | self.zabbix_host, 'check_kubernetesd[get,secret,' + self.name_space + ',' + self.name + ',valid_days]', 41 | data['valid_days']) 42 | ) 43 | return data_to_send 44 | 45 | def get_zabbix_discovery_data(self): 46 | if self.data["data"] is not None and "tls.crt" in dict(self.data["data"]): 47 | return super().get_zabbix_discovery_data() 48 | return '' 49 | -------------------------------------------------------------------------------- /k8sobjects/service.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .k8sobject import K8sObject 4 | 5 | logger = logging.getLogger(__file__) 6 | 7 | 8 | class Service(K8sObject): 9 | object_type = 'service' 10 | 11 | @property 12 | def resource_data(self): 13 | data = super().resource_data 14 | data['is_ingress'] = False 15 | if self.data["status"]["load_balancer"]["ingress"] is not None: 16 | data['is_ingress'] = True 17 | return data 18 | 19 | def get_zabbix_metrics(self): 20 | data = self.resource_data 21 | return data 22 | -------------------------------------------------------------------------------- /k8sobjects/statefulset.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from pyzabbix import ZabbixMetric 4 | 5 | from .k8sobject import K8sObject, transform_value 6 | 7 | logger = logging.getLogger(__file__) 8 | 9 | 10 | class Statefulset(K8sObject): 11 | object_type = 'statefulset' 12 | 13 | @property 14 | def resource_data(self): 15 | data = super().resource_data 16 | 17 | for status_type in self.data['status']: 18 | if status_type == 'conditions': 19 | continue 20 | data.update({status_type: transform_value(self.data['status'][status_type])}) 21 | 22 | failed_conds = [] 23 | if self.data['status']['conditions']: 24 | available_conds = [x for x in self.data['status']['conditions'] if x['type'].lower() == "available"] 25 | if available_conds: 26 | for cond in available_conds: 27 | if cond['status'] != 'True': 28 | failed_conds.append(cond['type']) 29 | 30 | if len(failed_conds) > 0: 31 | data['available_status'] = 'ERROR: ' + (','.join(failed_conds)) 32 | else: 33 | data['available_status'] = 'OK' 34 | else: 35 | data['available_status'] = 'OK' 36 | 37 | return data 38 | 39 | def get_zabbix_metrics(self): 40 | data_to_send = [] 41 | 42 | for status_type in self.data['status']: 43 | if status_type in ['conditions', 'update_revision']: 44 | continue 45 | 46 | data_to_send.append(ZabbixMetric( 47 | self.zabbix_host, 48 | 'check_kubernetesd[get,statefulsets,%s,%s,%s]' % (self.name_space, self.name, status_type), 49 | transform_value(self.resource_data[status_type])) 50 | ) 51 | 52 | data_to_send.append(ZabbixMetric( 53 | self.zabbix_host, 54 | 'check_kubernetesd[get,statefulsets,%s,%s,available_status]' % (self.name_space, self.name), 55 | self.resource_data['available_status'])) 56 | 57 | return data_to_send 58 | -------------------------------------------------------------------------------- /kubernetes/incluster/01_monitoring-user.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: monitoring 6 | 7 | --- 8 | apiVersion: v1 9 | kind: ServiceAccount 10 | metadata: 11 | name: monitoring 12 | namespace: monitoring 13 | 14 | --- 15 | kind: ClusterRole 16 | apiVersion: rbac.authorization.k8s.io/v1 17 | metadata: 18 | name: monitoring 19 | namespace: monitoring 20 | rules: 21 | - apiGroups: 22 | - "" 23 | resources: 24 | - pods 25 | - nodes 26 | - nodes/proxy 27 | - services 28 | - componentstatuses 29 | - secrets 30 | verbs: 31 | - get 32 | - list 33 | - watch 34 | - apiGroups: 35 | - extensions 36 | - apps 37 | resources: 38 | - deployments 39 | - replicasets 40 | - daemonsets 41 | verbs: 42 | - get 43 | - list 44 | - watch 45 | 46 | --- 47 | apiVersion: rbac.authorization.k8s.io/v1 48 | kind: ClusterRoleBinding 49 | metadata: 50 | name: monitoring 51 | roleRef: 52 | apiGroup: rbac.authorization.k8s.io 53 | kind: ClusterRole 54 | name: monitoring 55 | subjects: 56 | - kind: ServiceAccount 57 | name: monitoring 58 | namespace: monitoring 59 | 60 | -------------------------------------------------------------------------------- /kubernetes/incluster/02_deployment_with_incluster_config.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: zabbix-kubernetes 5 | namespace: monitoring 6 | spec: 7 | selector: 8 | matchLabels: 9 | app: zabbix-kubernetes 10 | replicas: 1 11 | template: 12 | metadata: 13 | labels: 14 | app: zabbix-kubernetes 15 | spec: 16 | containers: 17 | - name: k8s-zabbix 18 | image: scoopex666/k8s-zabbix:latest 19 | args: [ "--show_effective_config" ] 20 | # review config_default.py for additionaol configuration values (convert the names to uppercase) 21 | env: 22 | - name: "ZABBIX_SERVER" 23 | value: "zabbix.api.foo.bar:10080" 24 | # the abstract/virtual host which is configured in zabbix to hold and alert k8s zabbix 25 | - name: "ZABBIX_HOST" 26 | value: "k8s-prod-001" 27 | # Enable the optional management api status submission 28 | - name: "WEB_API_ENABLE" 29 | value: "False" 30 | # The base uri of the anagement api status submission 31 | - name: "WEB_API_HOST" 32 | value: "https://example.api.com/api/v1/k8s" 33 | # The security token for management api status submission 34 | - name: "WEB_API_TOKEN" 35 | value: "17812110692887024374221963068327794883098412835131004" 36 | # The name of the k8s cluster in the management system 37 | - name: "WEB_API_CLUSTER" 38 | value: "k8s-test-cluster" 39 | imagePullPolicy: Always 40 | resources: 41 | requests: 42 | memory: "256Mi" 43 | cpu: "250m" 44 | limits: 45 | memory: "256Mi" 46 | cpu: "250m" 47 | serviceAccount: monitoring 48 | serviceAccountName: monitoring 49 | 50 | -------------------------------------------------------------------------------- /kubernetes/token/01_monitoring-user.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: monitoring 6 | 7 | --- 8 | apiVersion: v1 9 | kind: ServiceAccount 10 | metadata: 11 | name: monitoring 12 | namespace: monitoring 13 | 14 | --- 15 | kind: ClusterRole 16 | apiVersion: rbac.authorization.k8s.io/v1 17 | metadata: 18 | name: monitoring 19 | namespace: monitoring 20 | rules: 21 | - apiGroups: 22 | - "" 23 | resources: 24 | - pods 25 | - nodes 26 | - nodes/proxy 27 | - services 28 | - componentstatuses 29 | - secrets 30 | verbs: 31 | - get 32 | - list 33 | - watch 34 | - apiGroups: 35 | - extensions 36 | - apps 37 | resources: 38 | - deployments 39 | - replicasets 40 | - daemonsets 41 | verbs: 42 | - get 43 | - list 44 | - watch 45 | 46 | --- 47 | apiVersion: rbac.authorization.k8s.io/v1 48 | kind: ClusterRoleBinding 49 | metadata: 50 | name: monitoring 51 | roleRef: 52 | apiGroup: rbac.authorization.k8s.io 53 | kind: ClusterRole 54 | name: monitoring 55 | subjects: 56 | - kind: ServiceAccount 57 | name: monitoring 58 | namespace: monitoring 59 | 60 | -------------------------------------------------------------------------------- /kubernetes/token/02_ingress-apiserver.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: networking.k8s.io/v1beta1 2 | kind: Ingress 3 | metadata: 4 | name: ingress-apiserver 5 | namespace: kube-system 6 | annotations: 7 | nginx.ingress.kubernetes.io/secure-backends: "true" 8 | nginx.ingress.kubernetes.io/backend-protocol: "HTTPS" 9 | kubernetes.io/ingress.class: "nginx-ingress-internal" 10 | nginx.ingress.kubernetes.io/proxy-connect-timeout: "30" 11 | nginx.ingress.kubernetes.io/proxy-read-timeout: "300" 12 | nginx.ingress.kubernetes.io/proxy-send-timeout: "300" 13 | spec: 14 | tls: 15 | # Ideally use a offical tls certificate or set VERIFY_SSL to False in the deployment environment variables 16 | - secretName: tls-global 17 | hosts: 18 | - "k8s-test-api.foo.bar" 19 | backend: 20 | serviceName: kube-apiserver 21 | servicePort: 443 22 | rules: 23 | - host: "k8s-test-api.foo.bar" 24 | http: 25 | paths: 26 | - path: / 27 | backend: 28 | serviceName: kube-apiserver 29 | servicePort: 443 30 | 31 | -------------------------------------------------------------------------------- /kubernetes/token/03_service-apiserver.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | k8s-app: kube-apiserver 6 | kubernetes.io/cluster-service: "true" 7 | name: kube-apiserver 8 | namespace: kube-system 9 | spec: 10 | ports: 11 | - name: ssl 12 | port: 443 13 | protocol: TCP 14 | targetPort: 6443 15 | selector: 16 | component: kube-apiserver 17 | -------------------------------------------------------------------------------- /kubernetes/token/04_deployment_with_token_config.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: zabbix-kubernetes 5 | namespace: monitoring 6 | spec: 7 | selector: 8 | matchLabels: 9 | app: zabbix-kubernetes 10 | replicas: 1 11 | template: 12 | metadata: 13 | labels: 14 | app: zabbix-kubernetes 15 | spec: 16 | containers: 17 | - name: k8s-zabbix 18 | # add your 19 | #image: your-repo.foo.bar:k8s-zabbix:latest 20 | image: scoopex666/k8s-zabbix:latest 21 | args: [ "config_default" ] 22 | # review config_default.py for additionaol configuration values (convert the names to uppercase) 23 | env: 24 | # the ingress hostname where you can reach the k8s api 25 | - name: "K8S_API_HOST" 26 | value: "http://k8s-api.foo.bar/" 27 | # if you ignore the advice of the installation instructions to create a ingress with a offical certificate and instead to connect to a controller, you should set this to "False" 28 | - name: "VERIFY_SSL" 29 | value: "True" 30 | # the k8s api token, where you can fetch information from the k8s cluster 31 | - name: "K8S_API_TOKEN" 32 | value: "FAKE-4uo7ahn0HaireePhohmieCix5eecoox6luv9ahngaiwieweem1wooxie4ooch1Meingeetai7Esh9toiQuaith1uphae2thoonae9yoo0ye0eiy2Ien2iecaNgiudut3kohjahfeilooShaewoch3ibeezieTas8seijaiwed3ei0ShuGo5nooqua5Yaixieghaihiaquahvae6Oowiqu8Yahko4in6Nievai7rohghohbiThei5gai5Ohqu0bo5ahphah4uuquohfaif6rae0ahCheijeSahkae2Voox8seivo5ohg6niSh3thuk8seemo4Eixir8eb0miejeeSeem3aen5noovohchoop1weikieliith0eaPoJohkee8nou4Voja9eiyoh4oenie3reehohriichool6baegeeghien5uibiemooghoRahshaibea8Foo5zaiT5kood9quae6Naiw4hohvei6fae9goyei4yafeesh8Eiciepha1ahseZ7Eesho3oa0yiicieyaiShoh8eivi7kahv8nae5eivohZ8ierengaepoh1fiezo2Oth1boy6jeichah6eiDeiquun5Ach4chithui2Gei7ohchu1uephekalichahzeigh1ohV9aceyah1phahohsoo6eisieJohw7uk4Zo5Iedai8iW9ut4beePh1bai3Oogh7Aighi6uiPhapoo4loo2eth4el0Ieng7ZeeV0aiXiJaigh5AhlabieBir3Aegoh5aey8coRah2Aehee3shutiej1johy6mieXahxiePe" 33 | # the zabbix server/proxy where measurements and status information should be delivered 34 | - name: "ZABBIX_SERVER" 35 | value: "zabbix.api.foo.bar:10080" 36 | # the abstract/virtual host which is configured in zabbix to hold and alert k8s zabbix 37 | - name: "ZABBIX_HOST" 38 | value: "k8s-prod-001" 39 | # Enable the optional management api status submission 40 | - name: "WEB_API_ENABLE" 41 | value: "False" 42 | # The base uri of the anagement api status submission 43 | - name: "WEB_API_HOST" 44 | value: "https://example.api.com/api/v1/k8s" 45 | # The security token for management api status submission 46 | - name: "WEB_API_TOKEN" 47 | value: "17812110692887024374221963068327794883098412835131004" 48 | # The name of the k8s cluster in the management system 49 | - name: "WEB_API_CLUSTER" 50 | value: "k8s-test-cluster" 51 | imagePullPolicy: Always 52 | resources: 53 | requests: 54 | memory: "256Mi" 55 | cpu: "250m" 56 | limits: 57 | memory: "256Mi" 58 | cpu: "250m" 59 | serviceAccount: monitoring 60 | serviceAccountName: monitoring 61 | 62 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | #strict = true 3 | 4 | [mypy-sh,cachetools.*,pyzabbix.*,pause.*,coloredlogs.*,urllib3.*,requests,kubernetes.*] 5 | ignore_missing_imports = True 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | kubernetes==27.2.0 2 | cryptography==42.0.4 3 | types-cryptography==3.3.23.2 4 | py-zabbix==1.1.7 5 | sentry-sdk==1.29.0 6 | adal==1.2.7 7 | urllib3==2.2.0 8 | pytest==7.4.0 9 | mypy==1.4.1 10 | flake8==6.1.0 11 | coloredlogs==15.0.1 12 | -------------------------------------------------------------------------------- /template/create_template_documentation: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | BDIR="$(dirname $(readlink -f $0))" 5 | cd $BDIR || exit 1 6 | 7 | if ( !(type xalan >/dev/null 2>&1) );then 8 | echo "INFO: skipping documentation generation because xalan is not installed" 9 | echo " (apt-get install xalan)" 10 | exit 0 11 | fi 12 | 13 | rm -rf documentation 14 | mkdir documentation 15 | 16 | for i in custom*.xml; do 17 | echo "=> $i" 18 | DOCUFILE="../documentation/template/${i%%.xml}.html" 19 | DOCUFILE="$(echo $DOCUFILE|tr ' ' '_')" 20 | xalan -in "$i" -out $DOCUFILE -xsl transform.xsl; 21 | done 22 | 23 | -------------------------------------------------------------------------------- /template/transform.xsl: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | 36 | 37 |

Macro Overview

38 | 39 | The following macros can be overloaded on host level. 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 |
NameDefault
52 | 53 | 54 |

Static Elements

55 |

Trigger Overview

56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 82 | 83 | 84 |
NameDescriptionPriorityExpressionDependencies
InformationWarningAverageHighDisasterNOT CLASSIFIED 78 | 79 |

80 |
81 |
85 | 86 |

Graph Overview

87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 98 | 99 | 100 |
NameElements
96 |
97 |
101 | 102 | 103 |

Item Overview

104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 |
TypeNameKeyDescriptionInterval (sec)History DaysTrend Days
126 | 127 | 128 |

Discovery rule ""

129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 |
NameValue
Name
Key
Type
Delay
143 | 144 | 145 |

Trigger Overview

146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 172 | 173 | 174 |
NameDescriptionPriorityExpressionDependencies
InformationWarningAverageHighDisasterNOT CLASSIFIED 168 | 169 |

170 |
171 |
175 | 176 |

Graph Overview

177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 188 | 189 | 190 |
NameElements
186 |
187 |
191 | 192 | 193 |

Item Overview

194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 |
TypeNameKeyDescriptionInterval (sec)History DaysTrend Days
216 |
217 | 218 | 219 | 220 |
221 |
222 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zabbix-tooling/k8s-zabbix/bb5e256133c7723cae4b740d9da9d869019804ca/tests/unit/__init__.py -------------------------------------------------------------------------------- /tests/unit/resources/test.ini: -------------------------------------------------------------------------------- 1 | yolobanana = rofl 2 | debug = True 3 | discovery_interval_fast = 12 4 | zabbix_resources_exclude = jacco, wacco 5 | -------------------------------------------------------------------------------- /tests/unit/test_config_loading.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from base.config import Configuration 4 | 5 | RESOURCES_DIR = os.path.realpath(os.path.dirname(os.path.realpath(__file__))) + "/resources" 6 | 7 | 8 | def test_load_config(): 9 | cfg = Configuration() 10 | cfg.load_config_file(f"{RESOURCES_DIR}/test.ini") 11 | cfg.load_from_environment_variables() 12 | assert (cfg.debug is True) 13 | assert (cfg.discovery_interval_fast == 12) 14 | assert ("jacco" in cfg.zabbix_resources_exclude) 15 | assert ("wacco" in cfg.zabbix_resources_exclude) 16 | print("") 17 | cfg.show_effective_config() 18 | 19 | -------------------------------------------------------------------------------- /tests/unit/test_k8sobject.py: -------------------------------------------------------------------------------- 1 | from k8sobjects import transform_value 2 | 3 | 4 | def test_transform_value(): 5 | assert(transform_value("7820m") == "7.82") 6 | assert (transform_value("512Ki") == "524288") 7 | --------------------------------------------------------------------------------