├── .codeclimate.yml ├── .coveragerc ├── .dockerignore ├── .github └── workflows │ └── tests.yml ├── .gitignore ├── .travis.yml ├── DEVLOPEMENT.md ├── LICENSE ├── Pipfile ├── README.rst ├── check_docker ├── __init__.py ├── check_docker.py └── check_swarm.py ├── pyproject.toml ├── release_process.md ├── run_isolated_tests.sh ├── run_package_tests.sh ├── testing_tools ├── Dockerfile └── vagrant │ ├── Vagrantfile │ ├── bats_fixtures.bash │ └── tests.bats ├── tests ├── __init__.py ├── test_check_docker.py ├── test_check_swarm.py └── test_version.py └── tox.ini /.codeclimate.yml: -------------------------------------------------------------------------------- 1 | languages: 2 | Ruby: true 3 | JavaScript: true 4 | PHP: true 5 | Python: true 6 | exclude_paths: 7 | - "check_docker/tests/*" 8 | - "tests/*" 9 | plugins: 10 | radon: 11 | enabled: true 12 | sonar-python: 13 | enabled: true -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | include = 3 | check_docker/check_*.py 4 | omit = 5 | tests/* 6 | */__init__.py 7 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | * 3 | !testing_tools 4 | !dev_requirements.txt -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Run tests 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | python_version: [3.6, 3.7, 3.8] 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Set up Python 20 | uses: actions/setup-python@v2 21 | with: 22 | python-version: ${{ matrix.python_version }} 23 | - name: Install dependencies 24 | run: | 25 | python -V 26 | printenv 27 | python -m pip install --upgrade pip 28 | pip install flake8 pytest coverage pyfakefs pytest-cov 29 | - name: Lint with flake8 30 | run: | 31 | # stop the build if there are Python syntax errors or undefined names 32 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 33 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 34 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 35 | - name: Test with pytest 36 | run: | 37 | pytest --cov=check_docker --cov-fail-under 90 --cov-report term --cov-report html 38 | - uses: actions/upload-artifact@v2 39 | with: 40 | name: coverage_report 41 | path: htmlcov 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### Python template 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | MANIFEST 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | env/ 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *,cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask instance folder 59 | instance/ 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # IPython Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # dotenv 80 | .env 81 | 82 | # virtualenv 83 | venv/ 84 | ENV/ 85 | 86 | # Spyder project settings 87 | .spyderproject 88 | 89 | # Rope project settings 90 | .ropeproject 91 | 92 | .idea 93 | 94 | 95 | ## File-based project format: 96 | *.iws 97 | 98 | ## Plugin-specific files: 99 | 100 | # IntelliJ 101 | /out/ 102 | 103 | 104 | # JIRA plugin 105 | atlassian-ide-plugin.xml 106 | 107 | # Crashlytics plugin (for Android Studio and IntelliJ) 108 | com_crashlytics_export_strings.xml 109 | crashlytics.properties 110 | crashlytics-build.properties 111 | fabric.properties 112 | 113 | /coverage 114 | /.pytest_cache/ 115 | .pytest_cache/ 116 | config.yml 117 | cr-sess1.json 118 | 119 | 120 | testing_tools/vagrant/.vagrant 121 | !check_docker/ 122 | 123 | .DS_Store -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.5" 4 | - "3.6" 5 | - "3.7" 6 | - "3.8" 7 | install: 8 | - pip install pipenv 9 | - pipenv install 10 | - pipenv install codeclimate-test-reporter 11 | # command to run tests 12 | script: 13 | - py.test --cov=check_docker 14 | - codeclimate-test-reporter || echo "Ignoring Code Climate reporter upload failure" 15 | -------------------------------------------------------------------------------- /DEVLOPEMENT.md: -------------------------------------------------------------------------------- 1 | # Development environment setup 2 | 3 | You should have the following installed 4 | 5 | - docker 6 | - python (version >= 3.0) 7 | - pipenv 8 | - vagrant 9 | 10 | Initialize your pipenv 11 | 12 | pipenv install --skip-lock 13 | 14 | # Running the tests 15 | 16 | ## Normal tests 17 | tox and Pytest is used for testing. You can can run test by running the following from 18 | the root of the project 19 | 20 | tox 21 | 22 | ## Isolated tests 23 | Sometimes test cases can interact with Docker on the development machine making 24 | it hard to determine the cause of a test success or failure. To address this 25 | you can use the `run_isolated_tests.sh` script to run pytest inside a 26 | environment isolated from any network. Additionally this isolated test will 27 | run the unit tests on multiple versions of python so you can validate your 28 | changes are not python version specific. 29 | 30 | ./run_isolated_tests.sh 31 | 32 | ## Package tests 33 | These test verify that, after created, the package can be installed and 34 | runs successfully(not just passes unit tests). To do this a test environment is set up in vagrant. 35 | 36 | ./run_package_tests.sh 37 | 38 | # Coverage report 39 | The aim is to keep coverage above 90% on the actual checks 40 | (check_docker.py and check_swarm.py). To generate a coverage report. 41 | 42 | pipenv run py.test --cov=check_docker/ 43 | 44 | # Tips 45 | When jumping back and forth between normal and isolated tests the `__pycache__` 46 | directories can fall out fo sync with your execution environment. When this 47 | happens you see errors like `ImportError: No module named 'check_docker'. The 48 | fix is simple, just remove all the `__pycache__` directories in the project. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [packages] 7 | tox = '*' 8 | tox-pyenv = '*' 9 | pytest = '*' 10 | pytest-random-order = '*' 11 | coverage = '>4.0,<4.4' 12 | pyfakefs = '*' 13 | pytest-cov = '<2.6' 14 | poetry = "*" 15 | 16 | [requires] 17 | python_version = "3.8" 18 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | |Build Status| |Code Climate| |Test Coverage| |Downloads| 2 | 3 | 4 | ============ 5 | check_docker 6 | ============ 7 | 8 | Nagios/NRPE compatible plugins for checking docker based services. Currently there are two nagios checks 9 | 10 | - **check_docker** which checks docker container health 11 | - **check_swarm** which checks health of swarm nodes and services 12 | 13 | With **check_docker** can use it to check and alert on 14 | 15 | - memory consumption in absolute units (bytes, kb, mb, gb) and as a percentage (0-100%) 16 | of the container limit. 17 | - CPU usages as a percentage (0-100%) of container limit. 18 | - automatic restarts performed by the docker daemon 19 | - container status, i.e. is it running? 20 | - container health checks are passing? 21 | - uptime, i.e. is it able to stay running for a long enough time? 22 | - the presence of a container or containers matching specified names 23 | - image version, does the running image match that in the remote registry? 24 | - image age, when was the image built the last time? 25 | 26 | With **check_swarm** you can alert 27 | 28 | - if a node is not joined to a docker swarm 29 | - if a service is running in a swarm 30 | 31 | These checks can communicate with a local docker daemon socket file (default) or with local 32 | or remote docker daemons using secure and non-secure TCP connections. 33 | 34 | These plugins require python 3. It is tested on 3.5 and greater but may work on older 35 | versions of 3. 36 | 37 | Installation 38 | ----------------- 39 | 40 | With pip 41 | :: 42 | 43 | pip3 install check_docker 44 | --or-- 45 | pip install check_docker 46 | 47 | With curl 48 | :: 49 | 50 | curl -o /usr/local/bin/check_docker https://raw.githubusercontent.com/timdaman/check_docker/master/check_docker/check_docker.py 51 | curl -o /usr/local/bin/check_swarm https://raw.githubusercontent.com/timdaman/check_docker/master/check_docker/check_swarm.py 52 | chmod a+rx /usr/local/bin/check_docker /usr/local/bin/check_swarm 53 | 54 | With wget 55 | :: 56 | 57 | wget -O /usr/local/bin/check_docker https://raw.githubusercontent.com/timdaman/check_docker/master/check_docker/check_docker.py 58 | wget -O /usr/local/bin/check_swarm https://raw.githubusercontent.com/timdaman/check_docker/master/check_docker/check_swarm.py 59 | chmod a+rx /usr/local/bin/check_docker /usr/local/bin/check_swarm 60 | 61 | 62 | check_docker Usage 63 | ------------------ 64 | 65 | :: 66 | 67 | usage: check_docker.py [-h] 68 | [--connection [//docker.socket|:] 69 | | --secure-connection [:]] 70 | [--binary_units | --decimal_units] [--timeout TIMEOUT] 71 | [--containers CONTAINERS [CONTAINERS ...]] [--present] 72 | [--threads THREADS] [--cpu WARN:CRIT] 73 | [--memory WARN:CRIT:UNITS] [--status STATUS] [--health] 74 | [--uptime WARN:CRIT] [--image-age WARN:CRIT] [--version] 75 | [--insecure-registries INSECURE_REGISTRIES [INSECURE_REGISTRIES ...]] 76 | [--restarts WARN:CRIT] [--no-ok] [--no-performance] [-V] 77 | 78 | Check docker containers. 79 | 80 | optional arguments: 81 | -h, --help show this help message and exit 82 | --connection [//docker.socket|:] 83 | Where to find docker daemon socket. (default: 84 | /var/run/docker.sock) 85 | --secure-connection [:] 86 | Where to find TLS protected docker daemon socket. 87 | --binary_units Use a base of 1024 when doing calculations of KB, MB, 88 | GB, & TB (This is default) 89 | --decimal_units Use a base of 1000 when doing calculations of KB, MB, 90 | GB, & TB 91 | --timeout TIMEOUT Connection timeout in seconds. (default: 10.0) 92 | --containers CONTAINERS [CONTAINERS ...] 93 | One or more RegEx that match the names of the 94 | container(s) to check. If omitted all containers are 95 | checked. (default: ['all']) 96 | --present Modifies --containers so that each RegEx must match at 97 | least one container. 98 | --threads THREADS This + 1 is the maximum number of concurent 99 | threads/network connections. (default: 10) 100 | --cpu WARN:CRIT Check cpu usage percentage taking into account any 101 | limits. Valid values are 0 - 100. 102 | --memory WARN:CRIT:UNITS 103 | Check memory usage taking into account any limits. 104 | Valid values for units are %,B,KB,MB,GB. 105 | --status STATUS Desired container status (running, exited, etc). 106 | --health Check container's health check status 107 | --uptime WARN:CRIT Minimum container uptime in seconds. Use when 108 | infrequent crashes are tolerated. 109 | --image-age WARN:CRIT Maximum image age in days. 110 | --version Check if the running images are the same version as 111 | those in the registry. Useful for finding stale 112 | images. Does not support login. 113 | --insecure-registries INSECURE_REGISTRIES [INSECURE_REGISTRIES ...] 114 | List of registries to connect to with http(no TLS). 115 | Useful when using "--version" with images from 116 | insecure registries. 117 | --restarts WARN:CRIT Container restart thresholds. 118 | --no-ok Make output terse suppressing OK messages. If all 119 | checks are OK return a single OK. 120 | --no-performance Suppress performance data. Reduces output when 121 | performance data is not being used. 122 | -V show program's version number and exit 123 | 124 | 125 | check_swarm Usage 126 | ----------------- 127 | 128 | :: 129 | 130 | usage: check_swarm.py [-h] 131 | [--connection [//docker.socket|:] 132 | | --secure-connection [:]] 133 | [--timeout TIMEOUT] 134 | (--swarm | --service SERVICE [SERVICE ...] | --ignore_paused) 135 | [-V] 136 | 137 | Check docker swarm. 138 | 139 | optional arguments: 140 | -h, --help show this help message and exit 141 | --connection [//docker.socket|:] 142 | Where to find docker daemon socket. (default: 143 | /var/run/docker.sock) 144 | --secure-connection [:] 145 | Where to find TLS protected docker daemon socket. 146 | --timeout TIMEOUT Connection timeout in seconds. (default: 10.0) 147 | --swarm Check swarm status 148 | --service SERVICE [SERVICE ...] 149 | One or more RegEx that match the names of the 150 | services(s) to check. 151 | --ignore_paused Don't require global services to be running on paused nodes 152 | -V show program's version number and exit 153 | 154 | Gotchas 155 | ------- 156 | 157 | - When using check_docker with older versions of docker (I have seen 1.4 and 1.5) –status only supports ‘running’, ‘restarting’, and ‘paused’. 158 | - When using check_docker, if no container is specified, all containers are checked. Some containers may return critcal status if the selected check(s) require a running container. 159 | - When using check_docker, --present cannot be used without --containers to indicate what to check the presence of. 160 | 161 | .. |Build Status| image:: https://travis-ci.org/timdaman/check_docker.svg?branch=master 162 | :target: https://travis-ci.org/timdaman/check_docker 163 | .. |Code Climate| image:: https://codeclimate.com/github/timdaman/check_docker/badges/gpa.svg 164 | :target: https://codeclimate.com/github/timdaman/check_docker 165 | .. |Test Coverage| image:: https://codeclimate.com/github/timdaman/check_docker/badges/coverage.svg 166 | :target: https://codeclimate.com/github/timdaman/check_docker/coverage 167 | .. |Downloads| image:: http://pepy.tech/badge/check-docker 168 | :target: http://pepy.tech/count/check-docker 169 | -------------------------------------------------------------------------------- /check_docker/__init__.py: -------------------------------------------------------------------------------- 1 | """Nagios/NRPE compatible plugins for checking docker based services""" 2 | __version__ = "2.2.2" -------------------------------------------------------------------------------- /check_docker/check_docker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # logging.basicConfig(level=logging.DEBUG) 3 | import argparse 4 | import json 5 | import logging 6 | import math 7 | import os 8 | import re 9 | import socket 10 | import stat 11 | import traceback 12 | from collections import deque, namedtuple, UserDict, defaultdict 13 | from concurrent import futures 14 | from datetime import datetime, timezone 15 | from functools import lru_cache 16 | from http.client import HTTPConnection 17 | from sys import argv 18 | from urllib import request 19 | from urllib.error import HTTPError, URLError 20 | from urllib.request import AbstractHTTPHandler, HTTPHandler, HTTPSHandler, OpenerDirector, HTTPRedirectHandler, \ 21 | Request, HTTPBasicAuthHandler 22 | 23 | logger = logging.getLogger() 24 | __author__ = 'Tim Laurence' 25 | __copyright__ = "Copyright 2019" 26 | __credits__ = ['Tim Laurence'] 27 | __license__ = "GPL" 28 | __version__ = "2.2.2" 29 | 30 | ''' 31 | nrpe compatible check for docker containers. 32 | 33 | Requires Python 3 34 | 35 | Note: I really would have preferred to have used requests for all the network connections but that would have added a 36 | dependency. 37 | ''' 38 | 39 | DEFAULT_SOCKET = '/var/run/docker.sock' 40 | DEFAULT_TIMEOUT = 10.0 41 | DEFAULT_PORT = 2375 42 | DEFAULT_MEMORY_UNITS = 'B' 43 | DEFAULT_HEADERS = [('Accept', 'application/vnd.docker.distribution.manifest.v2+json')] 44 | DEFAULT_PUBLIC_REGISTRY = 'registry-1.docker.io' 45 | 46 | # The second value is the power to raise the base to. 47 | UNIT_ADJUSTMENTS_TEMPLATE = { 48 | '%': 0, 49 | 'B': 0, 50 | 'KB': 1, 51 | 'MB': 2, 52 | 'GB': 3, 53 | 'TB': 4 54 | } 55 | unit_adjustments = None 56 | 57 | # Reduce message to a single OK unless a checks fail. 58 | no_ok = False 59 | 60 | # Suppress performance data reporting 61 | no_performance = False 62 | 63 | OK_RC = 0 64 | WARNING_RC = 1 65 | CRITICAL_RC = 2 66 | UNKNOWN_RC = 3 67 | 68 | # These hold the final results 69 | rc = -1 70 | messages = [] 71 | performance_data = [] 72 | 73 | ImageName = namedtuple('ImageName', "registry name tag full_name") 74 | 75 | 76 | class ThresholdSpec(UserDict): 77 | def __init__(self, warn, crit, units=''): 78 | super().__init__(warn=warn, crit=crit, units=units) 79 | 80 | def __getattr__(self, item): 81 | if item in ('warn', 'crit', 'units'): 82 | return self.data[item] 83 | return super().__getattr__(item) 84 | 85 | 86 | # How much threading can we do? We are generally not CPU bound so I am using this a worse case cap 87 | DEFAULT_PARALLELISM = 10 88 | 89 | # Holds list of all threads 90 | threads = [] 91 | 92 | # This is used during testing 93 | DISABLE_THREADING = False 94 | 95 | 96 | # Hacked up urllib to handle sockets 97 | ############################################################################################# 98 | # Docker runs a http connection over a socket. http.client is knows how to deal with these 99 | # but lacks some niceties. Urllib wraps that and makes up for some of the deficiencies but 100 | # cannot fix the fact http.client can't read from socket files. In order to take advantage of 101 | # urllib and http.client's capabilities the class below tweaks HttpConnection and passes it 102 | # to urllib registering for socket:// connections 103 | 104 | # This is all side effect so excluding coverage 105 | class SocketFileHandler(AbstractHTTPHandler): 106 | class SocketFileToHttpConnectionAdaptor(HTTPConnection): # pragma: no cover 107 | def __init__(self, socket_file, timeout=DEFAULT_TIMEOUT): 108 | super().__init__(host='', port=0, timeout=timeout) 109 | self.socket_file = socket_file 110 | 111 | def connect(self): 112 | self.sock = socket.socket(family=socket.AF_UNIX, type=socket.SOCK_STREAM, proto=0, fileno=None) 113 | self.sock.settimeout(self.timeout) 114 | self.sock.connect(self.socket_file) 115 | 116 | def socket_open(self, req): 117 | socket_file, path = req.selector.split(':', 1) 118 | req.host = socket_file 119 | req.selector = path 120 | return self.do_open(self.SocketFileToHttpConnectionAdaptor, req) 121 | 122 | 123 | # Tokens are not cached because I expect the callers to cache the responses 124 | class Oauth2TokenAuthHandler(HTTPBasicAuthHandler): 125 | auth_failure_tracker = defaultdict(int) 126 | 127 | def http_response(self, request, response): 128 | code, hdrs = response.code, response.headers 129 | 130 | www_authenticate_header = response.headers.get('www-authenticate', None) 131 | if code == 401 and www_authenticate_header: 132 | scheme = www_authenticate_header.split()[0] 133 | if scheme.lower() == 'bearer': 134 | return self.process_oauth2(request, response, www_authenticate_header) 135 | 136 | return response 137 | 138 | https_response = http_response 139 | 140 | @staticmethod 141 | def _get_outh2_token(www_authenticate_header): 142 | auth_fields = dict(re.findall(r"""(?:(?P[^ ,=]+)="([^"]+)")""", www_authenticate_header)) 143 | 144 | auth_url = "{realm}?scope={scope}&service={service}".format( 145 | realm=auth_fields['realm'], 146 | scope=auth_fields['scope'], 147 | service=auth_fields['service'], 148 | ) 149 | token_request = Request(auth_url) 150 | token_request.add_header("Content-Type", "application/x-www-form-urlencoded; charset=utf-8") 151 | token_response = request.urlopen(token_request) 152 | return process_urllib_response(token_response)['token'] 153 | 154 | def process_oauth2(self, request, response, www_authenticate_header): 155 | 156 | # This keeps infinite auth loops from happening 157 | full_url = request.full_url 158 | self.auth_failure_tracker[full_url] += 1 159 | if self.auth_failure_tracker[full_url] > 1: 160 | raise HTTPError(full_url, 401, "Stopping Oauth2 failure loop for {}".format(full_url), 161 | response.headers, response) 162 | 163 | auth_token = self._get_outh2_token(www_authenticate_header) 164 | 165 | request.add_unredirected_header('Authorization', 'Bearer ' + auth_token) 166 | return self.parent.open(request, timeout=request.timeout) 167 | 168 | 169 | better_urllib_get = OpenerDirector() 170 | better_urllib_get.addheaders = DEFAULT_HEADERS.copy() 171 | better_urllib_get.add_handler(HTTPHandler()) 172 | better_urllib_get.add_handler(HTTPSHandler()) 173 | better_urllib_get.add_handler(HTTPRedirectHandler()) 174 | better_urllib_get.add_handler(SocketFileHandler()) 175 | better_urllib_get.add_handler(Oauth2TokenAuthHandler()) 176 | 177 | 178 | class RegistryError(Exception): 179 | def __init__(self, response): 180 | self.response_obj = response 181 | 182 | 183 | # Util functions 184 | ############################################################################################# 185 | def parse_thresholds(spec, include_units=True, units_required=True): 186 | """ 187 | Given a spec string break it up into ':' separated chunks. Convert strings to ints as it makes sense 188 | 189 | :param spec: The threshold specification being parsed 190 | :param include_units: Specifies that units should be processed and returned if present 191 | :param units_required: Mark spec as invalid if the units are missing. 192 | :return: A list containing the thresholds in order of warn, crit, and units(if included and present) 193 | """ 194 | parts = deque(spec.split(':')) 195 | if not all(parts): 196 | raise ValueError("Blanks are not allowed in a threshold specification: {}".format(spec)) 197 | 198 | # Warn 199 | warn = int(parts.popleft()) 200 | # Crit 201 | crit = int(parts.popleft()) 202 | 203 | units = '' 204 | if include_units: 205 | if len(parts): 206 | # units 207 | units = parts.popleft() 208 | elif units_required: 209 | raise ValueError("Missing units in {}".format(spec)) 210 | 211 | if len(parts) != 0: 212 | raise ValueError("Too many threshold specifiers in {}".format(spec)) 213 | 214 | return ThresholdSpec(warn=warn, crit=crit, units=units) 215 | 216 | 217 | def pretty_time(seconds): 218 | remainder = seconds 219 | result = [] 220 | if remainder > 24 * 60 * 60: 221 | days, remainder = divmod(remainder, 24 * 60 * 60) 222 | result.append("{}d".format(int(days))) 223 | if remainder > 60 * 60: 224 | hours, remainder = divmod(remainder, 60 * 60) 225 | result.append("{}h".format(int(hours))) 226 | if remainder > 60: 227 | minutes, remainder = divmod(remainder, 60) 228 | result.append("{}min".format(int(minutes))) 229 | result.append("{}s".format(int(remainder))) 230 | return result 231 | 232 | 233 | def evaluate_numeric_thresholds(container, value, thresholds, name, short_name, 234 | min=None, max=None, greater_than=True): 235 | rounder = lambda x: round(x, 2) 236 | 237 | INTEGER_UNITS = ['B', '%', ''] 238 | 239 | # Some units don't have decimal places 240 | rounded_value = int(value) if thresholds.units in INTEGER_UNITS else rounder(value) 241 | 242 | perf_string = "{container}_{short_name}={value}{units};{warn};{crit}".format( 243 | container=container, 244 | short_name=short_name, 245 | value=rounded_value, 246 | **thresholds) 247 | if min is not None: 248 | rounded_min = math.floor(min) if thresholds.units in INTEGER_UNITS else rounder(min) 249 | perf_string += ';{}'.format(rounded_min) 250 | if max is not None: 251 | rounded_max = math.ceil(max) if thresholds.units in INTEGER_UNITS else rounder(max) 252 | perf_string += ';{}'.format(rounded_max) 253 | 254 | global performance_data 255 | performance_data.append(perf_string) 256 | 257 | if thresholds.units == 's': 258 | nice_time = ' '.join(pretty_time(rounded_value)[:2]) 259 | results_str = "{} {} is {}".format(container, name, nice_time) 260 | else: 261 | results_str = "{} {} is {}{}".format(container, name, rounded_value, thresholds.units) 262 | 263 | if greater_than: 264 | comparator = lambda value, threshold: value >= threshold 265 | else: 266 | comparator = lambda value, threshold: value <= threshold 267 | 268 | if comparator(value, thresholds.crit): 269 | critical(results_str) 270 | elif comparator(value, thresholds.warn): 271 | warning(results_str) 272 | else: 273 | ok(results_str) 274 | 275 | 276 | @lru_cache(maxsize=None) 277 | def get_url(url): 278 | logger.debug("get_url: {}".format(url)) 279 | response = better_urllib_get.open(url, timeout=timeout) 280 | logger.debug("get_url: {} {}".format(url, response.status)) 281 | return process_urllib_response(response), response.status 282 | 283 | 284 | def process_urllib_response(response): 285 | response_bytes = response.read() 286 | body = response_bytes.decode('utf-8') 287 | # logger.debug("BODY: {}".format(body)) 288 | return json.loads(body) 289 | 290 | 291 | def get_container_info(name): 292 | content, _ = get_url(daemon + '/containers/{container}/json'.format(container=name)) 293 | return content 294 | 295 | 296 | def get_image_info(name): 297 | content, _ = get_url(daemon + '/images/{image}/json'.format(image=name)) 298 | return content 299 | 300 | 301 | def get_state(container): 302 | return get_container_info(container)['State'] 303 | 304 | 305 | def get_stats(container): 306 | content, _ = get_url(daemon + '/containers/{container}/stats?stream=0'.format(container=container)) 307 | return content 308 | 309 | 310 | def get_ps_name(name_list): 311 | # Pick the name that starts with a '/' but doesn't contain a '/' and return that value 312 | for name in name_list: 313 | if '/' not in name[1:] and name[0] == '/': 314 | return name[1:] 315 | else: 316 | raise NameError("Error when trying to identify 'ps' name in {}".format(name_list)) 317 | 318 | 319 | def get_containers(names, require_present): 320 | containers_list, _ = get_url(daemon + '/containers/json?all=1') 321 | 322 | all_container_names = set(get_ps_name(x['Names']) for x in containers_list) 323 | 324 | if 'all' in names: 325 | return all_container_names 326 | 327 | filtered = set() 328 | for matcher in names: 329 | found = False 330 | for candidate in all_container_names: 331 | if re.match("^{}$".format(matcher), candidate): 332 | filtered.add(candidate) 333 | found = True 334 | # If we don't find a container that matches out regex 335 | if require_present and not found: 336 | critical("No containers match {}".format(matcher)) 337 | 338 | return filtered 339 | 340 | 341 | def get_container_image_id(container): 342 | # find registry and tag 343 | inspection = get_container_info(container) 344 | return inspection['Image'] 345 | 346 | 347 | def get_container_image_urls(container): 348 | inspection = get_container_info(container) 349 | image_id = inspection['Image'] 350 | image_info = get_image_info(image_id) 351 | return image_info['RepoTags'] 352 | 353 | 354 | def normalize_image_name_to_manifest_url(image_name, insecure_registries): 355 | parsed_url = parse_image_name(image_name) 356 | 357 | lower_insecure = [reg.lower() for reg in insecure_registries] 358 | 359 | # Registry query url 360 | scheme = 'http' if parsed_url.registry.lower() in lower_insecure else 'https' 361 | url = '{scheme}://{registry}/v2/{image_name}/manifests/{image_tag}'.format(scheme=scheme, 362 | registry=parsed_url.registry, 363 | image_name=parsed_url.name, 364 | image_tag=parsed_url.tag) 365 | return url, parsed_url.registry 366 | 367 | 368 | # Auth servers seem picky about being hit too hard. Can't figure out why. ;) 369 | # As result it is best to single thread this check 370 | # This is based on https://docs.docker.com/registry/spec/auth/token/#requesting-a-token 371 | def get_digest_from_registry(url): 372 | logger.debug("get_digest_from_registry") 373 | # query registry 374 | # TODO: Handle logging in if needed 375 | registry_info, status_code = get_url(url=url) 376 | 377 | if status_code != 200: 378 | raise RegistryError(response=registry_info) 379 | return registry_info['config'].get('digest', None) 380 | 381 | 382 | def set_rc(new_rc): 383 | global rc 384 | rc = new_rc if new_rc > rc else rc 385 | 386 | 387 | def ok(message): 388 | set_rc(OK_RC) 389 | messages.append('OK: ' + message) 390 | 391 | 392 | def warning(message): 393 | set_rc(WARNING_RC) 394 | messages.append('WARNING: ' + message) 395 | 396 | 397 | def critical(message): 398 | set_rc(CRITICAL_RC) 399 | messages.append('CRITICAL: ' + message) 400 | 401 | 402 | def unknown(message): 403 | set_rc(UNKNOWN_RC) 404 | messages.append('UNKNOWN: ' + message) 405 | 406 | 407 | def require_running(name): 408 | def inner_decorator(func): 409 | def wrapper(container, *args, **kwargs): 410 | container_state = get_state(container) 411 | state = normalize_state(container_state) 412 | if state.lower() == "running": 413 | func(container, *args, **kwargs) 414 | else: 415 | # container is not running, can't perform check 416 | critical('{container} is not "running", cannot check {check}"'.format(container=container, 417 | check=name)) 418 | 419 | return wrapper 420 | 421 | return inner_decorator 422 | 423 | 424 | def multithread_execution(disable_threading=DISABLE_THREADING): 425 | def inner_decorator(func): 426 | def wrapper(container, *args, **kwargs): 427 | if DISABLE_THREADING: 428 | func(container, *args, **kwargs) 429 | else: 430 | threads.append(parallel_executor.submit(func, container, *args, **kwargs)) 431 | 432 | return wrapper 433 | 434 | return inner_decorator 435 | 436 | 437 | def singlethread_execution(disable_threading=DISABLE_THREADING): 438 | def inner_decorator(func): 439 | def wrapper(container, *args, **kwargs): 440 | if DISABLE_THREADING: 441 | func(container, *args, **kwargs) 442 | else: 443 | threads.append(serial_executor.submit(func, container, *args, **kwargs)) 444 | 445 | return wrapper 446 | 447 | return inner_decorator 448 | 449 | 450 | def parse_image_name(image_name): 451 | """ 452 | Parses image names into their constituent parts. 453 | :param image_name: 454 | :return: ImageName 455 | """ 456 | 457 | # These are based on information found here 458 | # https://docs.docker.com/engine/reference/commandline/tag/#extended-description 459 | # https://github.com/docker/distribution/blob/master/reference/regexp.go 460 | host_segment_re = '[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?' 461 | hostname_re = r'({host_segment}\.)+{host_segment}'.format(host_segment=host_segment_re) 462 | registry_re = r'((?P({hostname_re}(:\d+)?|{host_segment_re}:\d+))/)'.format( 463 | host_segment_re=host_segment_re, hostname_re=hostname_re) 464 | name_component_ends_re = '[a-z0-9]' 465 | name_component_middle_re = '[a-z0-9._-]' # Ignoring spec limit of two _ 466 | name_component_re = '({end}{middle}*{end}|{end})'.format(end=name_component_ends_re, 467 | middle=name_component_middle_re) 468 | image_name_re = "(?P({name_component}/)*{name_component})".format(name_component=name_component_re) 469 | image_tag_re = '(?P[a-zA-Z0-9_][a-zA-Z0-9_.-]*)' 470 | full_re = '^{registry}?{image_name}(:{image_tag})?$'.format(registry=registry_re, image_name=image_name_re, 471 | image_tag=image_tag_re) 472 | parsed = re.match(full_re, image_name) 473 | 474 | registry = parsed.group('registry') if parsed.group('registry') else DEFAULT_PUBLIC_REGISTRY 475 | 476 | image_name = parsed.group('image_name') 477 | image_name = image_name if '/' in image_name or registry != DEFAULT_PUBLIC_REGISTRY else 'library/' + image_name 478 | 479 | image_tag = parsed.group('image_tag') 480 | image_tag = image_tag if image_tag else 'latest' 481 | 482 | full_image_name = "{registry}/{image_name}:{image_tag}".format( 483 | registry=registry, 484 | image_name=image_name, 485 | image_tag=image_tag) 486 | 487 | return ImageName(registry=registry, name=image_name, tag=image_tag, full_name=full_image_name) 488 | 489 | 490 | def normalize_state(status_info): 491 | # Ugh, docker used to report state in as silly way then they figured out how to do it better. 492 | # This tries the simpler new way and if that doesn't work fails back to the old way 493 | 494 | # On new docker engines the status holds whatever the current state is, running, stopped, paused, etc. 495 | if "Status" in status_info: 496 | return status_info['Status'] 497 | 498 | status = 'Exited' 499 | if status_info["Restarting"]: 500 | status = 'Restarting' 501 | elif status_info["Paused"]: 502 | status = 'Paused' 503 | elif status_info["Dead"]: 504 | status = 'Dead' 505 | elif status_info["Running"]: 506 | return "Running" 507 | return status 508 | 509 | 510 | # Checks 511 | ############################################################################################# 512 | 513 | @multithread_execution() 514 | @require_running(name='memory') 515 | def check_memory(container, thresholds): 516 | if not thresholds.units in unit_adjustments: 517 | unknown("Memory units must be one of {}".format(list(unit_adjustments.keys()))) 518 | return 519 | 520 | inspection = get_stats(container) 521 | 522 | # Subtracting cache to match what `docker stats` does. 523 | adjusted_usage = inspection['memory_stats']['usage'] - inspection['memory_stats']['stats']['total_cache'] 524 | if thresholds.units == '%': 525 | max = 100 526 | usage = int(100 * adjusted_usage / inspection['memory_stats']['limit']) 527 | else: 528 | max = inspection['memory_stats']['limit'] / unit_adjustments[thresholds.units] 529 | usage = adjusted_usage / unit_adjustments[thresholds.units] 530 | 531 | evaluate_numeric_thresholds(container=container, value=usage, thresholds=thresholds, name='memory', 532 | short_name='mem', min=0, max=max) 533 | 534 | 535 | @multithread_execution() 536 | def check_status(container, desired_state): 537 | normized_desired_state = desired_state.lower() 538 | normalized_state = normalize_state(get_state(container)).lower() 539 | if normized_desired_state != normalized_state: 540 | critical("{} state is not {}".format(container, desired_state)) 541 | return 542 | ok("{} status is {}".format(container, desired_state)) 543 | 544 | 545 | @multithread_execution() 546 | @require_running('health') 547 | def check_health(container): 548 | state = get_state(container) 549 | if "Health" in state and "Status" in state["Health"]: 550 | health = state["Health"]["Status"] 551 | message = "{} is {}".format(container, health) 552 | if health == 'healthy': 553 | ok(message) 554 | elif health == 'unhealthy': 555 | critical(message) 556 | else: 557 | unknown(message) 558 | else: 559 | unknown('{} has no health check data'.format(container)) 560 | 561 | 562 | @multithread_execution() 563 | @require_running('uptime') 564 | def check_uptime(container, thresholds): 565 | inspection = get_container_info(container)['State']['StartedAt'] 566 | only_secs = inspection[0:19] 567 | start = datetime.strptime(only_secs, "%Y-%m-%dT%H:%M:%S") 568 | start = start.replace(tzinfo=timezone.utc) 569 | now = datetime.now(timezone.utc) 570 | uptime = (now - start).total_seconds() 571 | 572 | graph_padding = 2 573 | thresholds.units = 's' 574 | evaluate_numeric_thresholds(container=container, value=uptime, thresholds=thresholds, name='uptime', 575 | short_name='up', min=0, max=graph_padding, greater_than=False) 576 | 577 | 578 | @multithread_execution() 579 | def check_image_age(container, thresholds): 580 | container_image = get_container_info(container)['Image'] 581 | image_created = get_image_info(container_image)['Created'] 582 | only_secs = image_created[0:19] 583 | start = datetime.strptime(only_secs, "%Y-%m-%dT%H:%M:%S") 584 | start = start.replace(tzinfo=timezone.utc) 585 | now = datetime.now(timezone.utc) 586 | image_age = (now - start).days 587 | 588 | graph_padding = 2 589 | thresholds.units = 'd' 590 | evaluate_numeric_thresholds(container=container, value=image_age, thresholds=thresholds, name='image_age', 591 | short_name='age', min=0, max=graph_padding, greater_than=True) 592 | 593 | 594 | @multithread_execution() 595 | @require_running('restarts') 596 | def check_restarts(container, thresholds): 597 | inspection = get_container_info(container) 598 | 599 | restarts = int(inspection['RestartCount']) 600 | graph_padding = 2 601 | evaluate_numeric_thresholds(container=container, value=restarts, thresholds=thresholds, name='restarts', 602 | short_name='re', min=0, max=graph_padding) 603 | 604 | 605 | @singlethread_execution() 606 | def check_version(container, insecure_registries): 607 | image_id = get_container_image_id(container) 608 | logger.debug("Local container image ID: {}".format(image_id)) 609 | if image_id is None: 610 | unknown('Checksum missing for "{}", try doing a pull'.format(container)) 611 | return 612 | 613 | image_urls = get_container_image_urls(container=container) 614 | if len(image_urls) > 1: 615 | unknown('"{}" has multiple tags/names. Unsure which one to use to check the version.'.format(container)) 616 | return 617 | elif len(image_urls) == 0: 618 | unknown('"{}" has last no repository tag. Is this anywhere else?'.format(container)) 619 | return 620 | 621 | url, registry = normalize_image_name_to_manifest_url(image_urls[0], insecure_registries) 622 | logger.debug("Looking up image digest here {}".format(url)) 623 | try: 624 | registry_hash = get_digest_from_registry(url) 625 | except URLError as e: 626 | if hasattr(e.reason, 'reason') and e.reason.reason == 'UNKNOWN_PROTOCOL': 627 | unknown( 628 | "TLS error connecting to registry {} for {}, should you use the '--insecure-registry' flag?" \ 629 | .format(registry, container)) 630 | return 631 | elif hasattr(e.reason, 'strerror') and e.reason.strerror == 'nodename nor servname provided, or not known': 632 | unknown( 633 | "Cannot reach registry for {} at {}".format(container, url)) 634 | return 635 | else: 636 | raise e 637 | except RegistryError as e: 638 | unknown("Cannot check version, couldn't retrieve digest for {} while checking {}.".format(container, url)) 639 | return 640 | logger.debug("Image digests, local={} remote={}".format(image_id, registry_hash)) 641 | if registry_hash == image_id: 642 | ok("{}'s version matches registry".format(container)) 643 | return 644 | critical("{}'s version does not match registry".format(container)) 645 | 646 | 647 | def calculate_cpu_capacity_precentage(info, stats): 648 | host_config = info['HostConfig'] 649 | 650 | if 'online_cpus' in stats['cpu_stats']: 651 | num_cpus = stats['cpu_stats']['online_cpus'] 652 | else: 653 | num_cpus = len(stats['cpu_stats']['cpu_usage']['percpu_usage']) 654 | 655 | # Identify limit system being used 656 | # --cpus 657 | if 'NanoCpus' in host_config and host_config['NanoCpus'] != 0: 658 | period = 1000000000 659 | quota = host_config['NanoCpus'] 660 | # --cpu-quota 661 | elif 'CpuQuota' in host_config and host_config['CpuQuota'] != 0: 662 | period = 100000 if host_config['CpuPeriod'] == 0 else host_config['CpuPeriod'] 663 | quota = host_config['CpuQuota'] 664 | # unlimited 665 | else: 666 | period = 1 667 | quota = num_cpus 668 | 669 | if period * num_cpus < quota: 670 | # This handles the case where the quota is actually bigger than amount available by all the cpus. 671 | available_limit_ratio = 1 672 | else: 673 | available_limit_ratio = (period * num_cpus) / quota 674 | 675 | cpu_delta = stats['cpu_stats']['cpu_usage']['total_usage'] - stats['precpu_stats']['cpu_usage']['total_usage'] 676 | system_delta = stats['cpu_stats']['system_cpu_usage'] - stats['precpu_stats']['system_cpu_usage'] 677 | usage = (cpu_delta / system_delta) * available_limit_ratio 678 | usage = round(usage * 100, 0) 679 | return usage 680 | 681 | 682 | @multithread_execution() 683 | @require_running('cpu') 684 | def check_cpu(container, thresholds): 685 | info = get_container_info(container) 686 | 687 | stats = get_stats(container=container) 688 | 689 | usage = calculate_cpu_capacity_precentage(info=info, stats=stats) 690 | 691 | max = 100 692 | thresholds.units = '%' 693 | evaluate_numeric_thresholds(container=container, value=usage, thresholds=thresholds, name='cpu', short_name='cpu', 694 | min=0, max=max) 695 | 696 | 697 | def process_args(args): 698 | parser = argparse.ArgumentParser(description='Check docker containers.') 699 | 700 | # Connect to local socket or ip address 701 | connection_group = parser.add_mutually_exclusive_group() 702 | connection_group.add_argument('--connection', 703 | dest='connection', 704 | action='store', 705 | default=DEFAULT_SOCKET, 706 | type=str, 707 | metavar='[//docker.socket|:]', 708 | help='Where to find docker daemon socket. (default: %(default)s)') 709 | 710 | connection_group.add_argument('--secure-connection', 711 | dest='secure_connection', 712 | action='store', 713 | type=str, 714 | metavar='[:]', 715 | help='Where to find TLS protected docker daemon socket.') 716 | 717 | base_group = parser.add_mutually_exclusive_group() 718 | base_group.add_argument('--binary_units', 719 | dest='units_base', 720 | action='store_const', 721 | const=1024, 722 | help='Use a base of 1024 when doing calculations of KB, MB, GB, & TB (This is default)') 723 | 724 | base_group.add_argument('--decimal_units', 725 | dest='units_base', 726 | action='store_const', 727 | const=1000, 728 | help='Use a base of 1000 when doing calculations of KB, MB, GB, & TB') 729 | parser.set_defaults(units_base=1024) 730 | 731 | # Connection timeout 732 | parser.add_argument('--timeout', 733 | dest='timeout', 734 | action='store', 735 | type=float, 736 | default=DEFAULT_TIMEOUT, 737 | help='Connection timeout in seconds. (default: %(default)s)') 738 | 739 | # Container name 740 | parser.add_argument('--containers', 741 | dest='containers', 742 | action='store', 743 | nargs='+', 744 | type=str, 745 | default=['all'], 746 | help='One or more RegEx that match the names of the container(s) to check. If omitted all containers are checked. (default: %(default)s)') 747 | 748 | # Container name 749 | parser.add_argument('--present', 750 | dest='present', 751 | default=False, 752 | action='store_true', 753 | help='Modifies --containers so that each RegEx must match at least one container.') 754 | 755 | # Threads 756 | parser.add_argument('--threads', 757 | dest='threads', 758 | default=DEFAULT_PARALLELISM, 759 | action='store', 760 | type=int, 761 | help='This + 1 is the maximum number of concurent threads/network connections. (default: %(default)s)') 762 | 763 | # CPU 764 | parser.add_argument('--cpu', 765 | dest='cpu', 766 | action='store', 767 | type=str, 768 | metavar='WARN:CRIT', 769 | help='Check cpu usage percentage taking into account any limits.') 770 | 771 | # Memory 772 | parser.add_argument('--memory', 773 | dest='memory', 774 | action='store', 775 | type=str, 776 | metavar='WARN:CRIT:UNITS', 777 | help='Check memory usage taking into account any limits. Valid values for units are %%,B,KB,MB,GB.') 778 | 779 | # State 780 | parser.add_argument('--status', 781 | dest='status', 782 | action='store', 783 | type=str, 784 | help='Desired container status (running, exited, etc).') 785 | 786 | # Health 787 | parser.add_argument('--health', 788 | dest='health', 789 | default=None, 790 | action='store_true', 791 | help="Check container's health check status") 792 | 793 | # Age 794 | parser.add_argument('--uptime', 795 | dest='uptime', 796 | action='store', 797 | type=str, 798 | metavar='WARN:CRIT', 799 | help='Minimum container uptime in seconds. Use when infrequent crashes are tolerated.') 800 | 801 | # Image Age 802 | parser.add_argument('--image-age', 803 | dest='image_age', 804 | action='store', 805 | type=str, 806 | metavar='WARN:CRIT', 807 | help='Maximum image age in days.') 808 | 809 | # Version 810 | parser.add_argument('--version', 811 | dest='version', 812 | default=None, 813 | action='store_true', 814 | help='Check if the running images are the same version as those in the registry. Useful for finding stale images. Does not support login.') 815 | 816 | # Version 817 | parser.add_argument('--insecure-registries', 818 | dest='insecure_registries', 819 | action='store', 820 | nargs='+', 821 | type=str, 822 | default=[], 823 | help='List of registries to connect to with http(no TLS). Useful when using "--version" with images from insecure registries.') 824 | 825 | # Restart 826 | parser.add_argument('--restarts', 827 | dest='restarts', 828 | action='store', 829 | type=str, 830 | metavar='WARN:CRIT', 831 | help='Container restart thresholds.') 832 | 833 | # no-ok 834 | parser.add_argument('--no-ok', 835 | dest='no_ok', 836 | action='store_true', 837 | help='Make output terse suppressing OK messages. If all checks are OK return a single OK.') 838 | 839 | # no-performance 840 | parser.add_argument('--no-performance', 841 | dest='no_performance', 842 | action='store_true', 843 | help='Suppress performance data. Reduces output when performance data is not being used.') 844 | 845 | parser.add_argument('-V', action='version', version='%(prog)s {}'.format(__version__)) 846 | 847 | if len(args) == 0: 848 | parser.print_help() 849 | 850 | parsed_args = parser.parse_args(args=args) 851 | 852 | global timeout 853 | timeout = parsed_args.timeout 854 | 855 | global daemon 856 | global connection_type 857 | if parsed_args.secure_connection: 858 | daemon = 'https://' + parsed_args.secure_connection 859 | connection_type = 'https' 860 | elif parsed_args.connection: 861 | if parsed_args.connection[0] == '/': 862 | daemon = 'socket://' + parsed_args.connection + ':' 863 | connection_type = 'socket' 864 | else: 865 | daemon = 'http://' + parsed_args.connection 866 | connection_type = 'http' 867 | 868 | return parsed_args 869 | 870 | 871 | def no_checks_present(parsed_args): 872 | # Look for all functions whose name starts with 'check_' 873 | checks = [key[6:] for key in globals().keys() if key.startswith('check_')] 874 | # Act like --present is a check though it is not implemented like one 875 | return all(getattr(parsed_args, check) is None for check in checks) and not parsed_args.present 876 | 877 | 878 | def socketfile_permissions_failure(parsed_args): 879 | if connection_type == 'socket': 880 | return not (os.path.exists(parsed_args.connection) 881 | and stat.S_ISSOCK(os.stat(parsed_args.connection).st_mode) 882 | and os.access(parsed_args.connection, os.R_OK) 883 | and os.access(parsed_args.connection, os.W_OK)) 884 | else: 885 | return False 886 | 887 | 888 | def print_results(): 889 | if no_ok: 890 | # Remove all the "OK"s 891 | filtered_messages = [message for message in messages if not message.startswith('OK: ')] 892 | if len(filtered_messages) == 0: 893 | messages_concat = 'OK' 894 | else: 895 | messages_concat = '; '.join(filtered_messages) 896 | 897 | else: 898 | messages_concat = '; '.join(messages) 899 | 900 | if no_performance or len(performance_data) == 0: 901 | print(messages_concat) 902 | else: 903 | perfdata_concat = ' '.join(performance_data) 904 | print(messages_concat + '|' + perfdata_concat) 905 | 906 | 907 | def perform_checks(raw_args): 908 | args = process_args(raw_args) 909 | 910 | global parallel_executor 911 | parallel_executor = futures.ThreadPoolExecutor(max_workers=args.threads) 912 | global serial_executor 913 | serial_executor = futures.ThreadPoolExecutor(max_workers=1) 914 | 915 | global unit_adjustments 916 | unit_adjustments = {key: args.units_base ** value for key, value in UNIT_ADJUSTMENTS_TEMPLATE.items()} 917 | 918 | global no_ok 919 | no_ok = args.no_ok 920 | 921 | global no_performance 922 | no_performance = args.no_ok 923 | 924 | if socketfile_permissions_failure(args): 925 | unknown("Cannot access docker socket file. User ID={}, socket file={}".format(os.getuid(), args.connection)) 926 | return 927 | 928 | if args.containers == ["all"] and args.present: 929 | unknown("You can not use --present without --containers") 930 | return 931 | 932 | if no_checks_present(args): 933 | unknown("No checks specified.") 934 | return 935 | 936 | # Here is where all the work happens 937 | ############################################################################################# 938 | containers = get_containers(args.containers, args.present) 939 | 940 | if len(containers) == 0 and not args.present: 941 | unknown("No containers names found matching criteria") 942 | return 943 | 944 | for container in containers: 945 | 946 | # Check status 947 | if args.status: 948 | check_status(container, args.status) 949 | 950 | # Check version 951 | if args.version: 952 | check_version(container, args.insecure_registries) 953 | 954 | # below are checks that require a 'running' status 955 | 956 | # Check status 957 | if args.health: 958 | check_health(container) 959 | 960 | # Check cpu usage 961 | if args.cpu: 962 | check_cpu(container, parse_thresholds(args.cpu, units_required=False)) 963 | 964 | # Check memory usage 965 | if args.memory: 966 | check_memory(container, parse_thresholds(args.memory, units_required=False)) 967 | 968 | # Check uptime 969 | if args.uptime: 970 | check_uptime(container, parse_thresholds(args.uptime, include_units=False)) 971 | 972 | # Check image age 973 | if args.image_age: 974 | check_image_age(container, parse_thresholds(args.image_age, include_units=False)) 975 | 976 | # Check restart count 977 | if args.restarts: 978 | check_restarts(container, parse_thresholds(args.restarts, include_units=False)) 979 | 980 | 981 | def main(): 982 | try: 983 | perform_checks(argv[1:]) 984 | 985 | # get results to let exceptions in threads bubble out 986 | [x.result() for x in futures.as_completed(threads)] 987 | 988 | except Exception as e: 989 | traceback.print_exc() 990 | unknown("Exception raised during check': {}".format(repr(e))) 991 | print_results() 992 | exit(rc) 993 | 994 | 995 | if __name__ == '__main__': 996 | main() 997 | -------------------------------------------------------------------------------- /check_docker/check_swarm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import json 4 | import logging 5 | import os 6 | import re 7 | import socket 8 | import stat 9 | import traceback 10 | from functools import lru_cache 11 | from http.client import HTTPConnection 12 | from sys import argv 13 | from urllib.request import AbstractHTTPHandler, HTTPHandler, HTTPSHandler, OpenerDirector 14 | 15 | logger = logging.getLogger() 16 | __author__ = 'Tim Laurence' 17 | __copyright__ = "Copyright 2019" 18 | __credits__ = ['Tim Laurence'] 19 | __license__ = "GPL" 20 | __version__ = "2.2.2" 21 | 22 | ''' 23 | nrpe compatible check for docker swarm 24 | 25 | Requires Python 3 26 | 27 | Note: I really would have preferred to have used requests for all the network connections but that would have added a 28 | dependency. 29 | ''' 30 | 31 | DEFAULT_SOCKET = '/var/run/docker.sock' 32 | DEFAULT_TIMEOUT = 10.0 33 | DEFAULT_PORT = 2375 34 | DEFAULT_HEADERS = [('Accept', 'application/vnd.docker.distribution.manifest.v2+json')] 35 | OK_RC = 0 36 | WARNING_RC = 1 37 | CRITICAL_RC = 2 38 | UNKNOWN_RC = 3 39 | 40 | HTTP_GOOD_CODES = range(200, 299) 41 | 42 | # These hold the final results 43 | rc = -1 44 | messages = [] 45 | 46 | 47 | # Hacked up urllib to handle sockets 48 | ############################################################################################# 49 | # Docker runs a http connection over a socket. http.client is knows how to deal with these 50 | # but lacks some niceties. Urllib wraps that and makes up for some of the deficiencies but 51 | # cannot fix the fact http.client can't read from socket files. In order to take advantage of 52 | # urllib and http.client's capabilities the class below tweaks HttpConnection and passes it 53 | # to urllib registering for socket:// connections 54 | 55 | # This is all side effect so excluding coverage 56 | class SocketFileHandler(AbstractHTTPHandler): # pragma: no cover 57 | class SocketFileToHttpConnectionAdaptor(HTTPConnection): 58 | def __init__(self, socket_file, timeout=DEFAULT_TIMEOUT): 59 | super().__init__(host='', port=0, timeout=timeout) 60 | self.socket_file = socket_file 61 | 62 | def connect(self): 63 | self.sock = socket.socket(family=socket.AF_UNIX, type=socket.SOCK_STREAM, proto=0, fileno=None) 64 | self.sock.settimeout(self.timeout) 65 | self.sock.connect(self.socket_file) 66 | 67 | def socket_open(self, req): 68 | socket_file, path = req.selector.split(':', 1) 69 | req.host = socket_file 70 | req.selector = path 71 | return self.do_open(self.SocketFileToHttpConnectionAdaptor, req) 72 | 73 | 74 | better_urllib_get = OpenerDirector() 75 | better_urllib_get.addheaders = DEFAULT_HEADERS.copy() 76 | better_urllib_get.add_handler(HTTPHandler()) 77 | better_urllib_get.add_handler(HTTPSHandler()) 78 | better_urllib_get.add_handler(SocketFileHandler()) 79 | 80 | 81 | # Util functions 82 | ############################################################################################# 83 | 84 | 85 | @lru_cache() 86 | def get_url(url): 87 | response = better_urllib_get.open(url, timeout=timeout) 88 | return process_urllib_response(response), response.status 89 | 90 | 91 | def process_urllib_response(response): 92 | response_bytes = response.read() 93 | body = response_bytes.decode('utf-8') 94 | logger.debug(body) 95 | return json.loads(body) 96 | 97 | 98 | def get_swarm_status(): 99 | content, status = get_url(daemon + '/swarm') 100 | return status 101 | 102 | 103 | def get_service_info(name): 104 | return get_url(daemon + '/services/{service}'.format(service=name)) 105 | 106 | 107 | def get_service_tasks(name): 108 | tasks, status = get_url(daemon + '/tasks?filters={{"name":{{"{service}":true}}}}'.format(service=name)) 109 | return tasks 110 | 111 | 112 | def get_nodes(): 113 | return get_url(daemon + '/nodes') 114 | 115 | 116 | def get_services(names): 117 | services_list, status = get_url(daemon + '/services') 118 | if status == 406: 119 | critical("Error checking service status, node is not in swarm mode") 120 | return [] 121 | elif status not in HTTP_GOOD_CODES: 122 | unknown("Could not retrieve service info") 123 | return [] 124 | 125 | all_services_names = set(x['Spec']['Name'] for x in services_list) 126 | if 'all' in names: 127 | return all_services_names 128 | 129 | filtered = set() 130 | not_found = [] 131 | for matcher in names: 132 | found = False 133 | for candidate in all_services_names: 134 | if re.match("^{}$".format(matcher), candidate): 135 | filtered.add(candidate) 136 | found = True 137 | # If we don't find a service that matches out regex 138 | if not found: 139 | not_found.append(matcher) 140 | if len(not_found) > 0: 141 | critical("No services match {}".format(','.join(not_found))) 142 | return filtered 143 | 144 | 145 | def set_rc(new_rc): 146 | global rc 147 | rc = new_rc if new_rc > rc else rc 148 | 149 | 150 | def ok(message): 151 | set_rc(OK_RC) 152 | messages.append('OK: ' + message) 153 | 154 | 155 | def warning(message): 156 | set_rc(WARNING_RC) 157 | messages.append('WARNING: ' + message) 158 | 159 | 160 | def critical(message): 161 | set_rc(CRITICAL_RC) 162 | messages.append('CRITICAL: ' + message) 163 | 164 | 165 | def unknown(message): 166 | set_rc(UNKNOWN_RC) 167 | messages.append('UNKNOWN: ' + message) 168 | 169 | 170 | # Checks 171 | ############################################################################################# 172 | def check_swarm(): 173 | status = get_swarm_status() 174 | process_url_status(status, ok_msg='Node is in a swarm', 175 | critical_msg='Node is not in a swarm', unknown_msg='Error accessing swarm info') 176 | 177 | 178 | def process_global_service(name, ignore_paused=False): 179 | bad_node_states = {'drain'} 180 | if ignore_paused: 181 | bad_node_states.add('paused') 182 | 183 | # Get all the nodes we care about based on their state 184 | node_list, status = get_nodes() 185 | node_index = set() 186 | for node in node_list: 187 | if node['Spec']['Availability'] in bad_node_states: 188 | continue 189 | node_index.add(node['ID']) 190 | 191 | # If a task is on a targeted node confirm it is running 192 | # Services that are not running are considered bad. This is to prevent services in crash loops from being ignored 193 | # Also note, this ignores conditions where services state they are running on a node not in the index. 194 | service_tasks = get_service_tasks(name) 195 | for task in service_tasks: 196 | if task['Status']['State'] != 'running': 197 | critical('Global service {service} has one or more tasks not running'.format(service=name)) 198 | return 199 | node_index.discard(task['NodeID']) 200 | 201 | if len(node_index) > 0: 202 | critical('Global service {service} has {count} tasks not running'.format(service=name, count=len(node_list))) 203 | 204 | ok('Global service {service} OK'.format(service=name)) 205 | 206 | 207 | def process_replicated_service(name, replicas_desired): 208 | # Services that are not running are considered bad. This is to prevent services in crash loops from being ignored 209 | all_tasks = get_service_tasks(name) 210 | running_tasks = [task for task in all_tasks if task['Status']['State'] == 'running'] 211 | num_tasks = len(running_tasks) 212 | if num_tasks != replicas_desired: 213 | critical('Replicated service {service} has {num_tasks} tasks, {replicas_desired} desired'. 214 | format(service=name, num_tasks=num_tasks, replicas_desired=replicas_desired)) 215 | else: 216 | ok('Replicated service {service} OK'.format(service=name)) 217 | 218 | 219 | def check_service(name, ignore_paused=False): 220 | # get service mode 221 | service_info, status = get_service_info(name) 222 | mode_info = service_info['Spec']['Mode'] 223 | 224 | # if global ensure one per node 225 | if 'Global' in mode_info: 226 | process_global_service(name=name, ignore_paused=ignore_paused) 227 | # if replicated ensure sufficient number of replicas 228 | elif 'Replicated' in mode_info: 229 | process_replicated_service(name=name, replicas_desired=mode_info['Replicated']['Replicas']) 230 | 231 | 232 | def process_url_status(status, ok_msg=None, critical_msg=None, unknown_msg=None): 233 | if status in HTTP_GOOD_CODES: 234 | return ok(ok_msg) 235 | elif status in [503, 404, 406]: 236 | return critical(critical_msg) 237 | else: 238 | return unknown(unknown_msg) 239 | 240 | 241 | def process_args(args): 242 | parser = argparse.ArgumentParser(description='Check docker swarm.') 243 | 244 | # Connect to local socket or ip address 245 | connection_group = parser.add_mutually_exclusive_group() 246 | connection_group.add_argument('--connection', 247 | dest='connection', 248 | action='store', 249 | default=DEFAULT_SOCKET, 250 | type=str, 251 | metavar='[//docker.socket|:]', 252 | help='Where to find docker daemon socket. (default: %(default)s)') 253 | 254 | connection_group.add_argument('--secure-connection', 255 | dest='secure_connection', 256 | action='store', 257 | type=str, 258 | metavar='[:]', 259 | help='Where to find TLS protected docker daemon socket.') 260 | 261 | # Connection timeout 262 | parser.add_argument('--timeout', 263 | dest='timeout', 264 | action='store', 265 | type=float, 266 | default=DEFAULT_TIMEOUT, 267 | help='Connection timeout in seconds. (default: %(default)s)') 268 | 269 | swarm_group = parser.add_mutually_exclusive_group(required=True) 270 | 271 | # Swarm 272 | swarm_group.add_argument('--swarm', 273 | dest='swarm', 274 | default=None, 275 | action='store_true', 276 | help='Check swarm status') 277 | 278 | # Service 279 | swarm_group.add_argument('--service', 280 | dest='service', 281 | action='store', 282 | type=str, 283 | nargs='+', 284 | default=[], 285 | help='One or more RegEx that match the names of the services(s) to check.') 286 | 287 | swarm_group.add_argument('--ignore_paused', 288 | dest='ignore_paused', 289 | action='store_true', 290 | help="Don't require global services to be running on paused nodes") 291 | 292 | parser.add_argument('-V', action='version', version='%(prog)s {}'.format(__version__)) 293 | 294 | if len(args) == 0: 295 | parser.print_help() 296 | 297 | parsed_args = parser.parse_args(args=args) 298 | 299 | global timeout 300 | timeout = parsed_args.timeout 301 | 302 | global daemon 303 | global connection_type 304 | if parsed_args.secure_connection: 305 | daemon = 'https://' + parsed_args.secure_connection 306 | connection_type = 'https' 307 | elif parsed_args.connection: 308 | if parsed_args.connection[0] == '/': 309 | daemon = 'socket://' + parsed_args.connection + ':' 310 | connection_type = 'socket' 311 | else: 312 | daemon = 'http://' + parsed_args.connection 313 | connection_type = 'http' 314 | 315 | return parsed_args 316 | 317 | 318 | def socketfile_permissions_failure(parsed_args): 319 | if connection_type == 'socket': 320 | return not (os.path.exists(parsed_args.connection) 321 | and stat.S_ISSOCK(os.stat(parsed_args.connection).st_mode) 322 | and os.access(parsed_args.connection, os.R_OK) 323 | and os.access(parsed_args.connection, os.W_OK)) 324 | else: 325 | return False 326 | 327 | 328 | def print_results(): 329 | print('; '.join(messages)) 330 | 331 | 332 | def perform_checks(raw_args): 333 | args = process_args(raw_args) 334 | if socketfile_permissions_failure(args): 335 | unknown("Cannot access docker socket file. User ID={}, socket file={}".format(os.getuid(), args.connection)) 336 | else: 337 | # Here is where all the work happens 338 | ############################################################################################# 339 | try: 340 | if args.swarm: 341 | check_swarm() 342 | elif args.service: 343 | services = get_services(args.service) 344 | 345 | # Status is set to critical by get_services() if nothing is found for a name 346 | for service in services: 347 | check_service(name=service, ignore_paused=args.ignore_paused) 348 | 349 | except Exception as e: 350 | traceback.print_exc() 351 | unknown("Exception raised during check: {}".format(repr(e))) 352 | 353 | print_results() 354 | 355 | 356 | def main(): 357 | perform_checks(argv[1:]) 358 | exit(rc) 359 | 360 | 361 | if __name__ == '__main__': 362 | main() 363 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["poetry"] 3 | build-backend = "poetry.masonry.api" 4 | 5 | [tool.poetry] 6 | name = "check_docker" 7 | version = "2.2.2" 8 | description = "Nagios/NRPE compatible plugins for checking Docker based services" 9 | license = "GPL-3.0" 10 | authors = ["Tim Laurence "] 11 | readme = "README.rst" 12 | homepage = "https://github.com/timdaman/check_docker" 13 | repository = "https://github.com/timdaman/check_docker" 14 | 15 | classifiers=[ 16 | "Programming Language :: Python", 17 | "Programming Language :: Python :: 3", 18 | "Intended Audience :: System Administrators", 19 | "Environment :: Other Environment", 20 | "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", 21 | "Operating System :: OS Independent", 22 | "Topic :: System :: Networking", 23 | ] 24 | packages = [ 25 | { include = "check_docker" }, 26 | ] 27 | 28 | [tool.poetry.scripts] 29 | check_docker = "check_docker.check_docker:main" 30 | check_swarm = "check_docker.check_swarm:main" 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /release_process.md: -------------------------------------------------------------------------------- 1 | 2 | 1. Confirm documentation is updated 3 | - README 4 | - DEV doc 5 | 1. Unit tests pass 6 | 1. Isolated tests pass 7 | 8 | ./run_isolated_tests.sh 9 | 10 | 1. make package 11 | 12 | pipenv run poetry build 13 | 14 | 1. Uninstall check_docker and install package 15 | 16 | pipenv uninstall check_docker && pipenv run pip install dist/check_docker-X.X.X-py2.py3-none-any.whl 17 | 18 | 1. Bats smoke tests pass 19 | 20 | ./run_package_tests.sh 21 | 22 | 1. Push to branch 23 | 1. Confirm doc looks good on github 24 | 1. Travis tests pass 25 | 1. Create and merge PR 26 | 1. Confirm Travis still passes 27 | 1. CodeClimate does not show scary issues (need to modify analyzed branch) 28 | 1. Upload package to test repo 29 | 30 | poetry publish -r pypi -u timdaman -p xxxx 31 | 32 | 1. Check test project page for formatting 33 | 34 | https://test.pypi.org/project/check_docker/ 35 | 36 | 1. Upload package to prod repo 37 | 38 | poetry publish -r prodpypi -u timdaman -p xxxx 39 | 40 | 1. Check project page for formatting 41 | 42 | https://pypi.org/project/check_docker/ 43 | -------------------------------------------------------------------------------- /run_isolated_tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eu 3 | 4 | (cd testing_tools && docker build -t check_docker_tests .) 5 | 6 | docker run --rm -v $PWD:$PWD -w $PWD -ti check_docker_tests tox -------------------------------------------------------------------------------- /run_package_tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -eux 4 | 5 | cd testing_tools/vagrant 6 | vagrant up 7 | vagrant ssh -c "bats -p /check_docker/testing_tools/vagrant" 8 | vagrant suspend 9 | -------------------------------------------------------------------------------- /testing_tools/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 AS build 2 | ENV DEBIAN_FRONTEND=noninteractive 3 | 4 | ENV PYENV_ROOT="/pyenv" 5 | ENV PATH="$PYENV_ROOT/bin:$PATH" 6 | WORKDIR / 7 | RUN apt update 8 | RUN apt install --no-install-recommends --fix-missing -y build-essential make locales libssl1.1 libssl-dev \ 9 | libffi-dev libbz2-dev libreadline-dev libsqlite3-dev libjpeg-dev zlib1g-dev libxml2-dev libxslt1-dev \ 10 | curl ca-certificates 11 | RUN curl -kL https://github.com/pyenv/pyenv/archive/master.tar.gz | tar -xz \ 12 | && mv pyenv-master /pyenv 13 | RUN echo 3.5.6 3.6.7 3.7.1 | xargs -n 1 -P $(nproc) pyenv install 14 | RUN /pyenv/versions/3.7.1/bin/pip3.7 install setuptools wheel flit tox 15 | 16 | FROM ubuntu:20.04 17 | ENV DEBIAN_FRONTEND=noninteractive 18 | SHELL ["/bin/bash", "-lc"] 19 | ENTRYPOINT ["/bin/bash", "-lc"] 20 | RUN apt update \ 21 | && apt install --no-install-recommends --fix-missing -y git libssl1.1 ca-certificates netbase \ 22 | && apt-get autoremove -y \ 23 | && apt-get clean all \ 24 | && rm -rf /var/lib/apt/lists/* 25 | COPY --from=build /pyenv /pyenv 26 | ENV PYENV_ROOT="/pyenv" 27 | RUN echo 'PATH="/pyenv/bin:$PATH"' >> /etc/profile.d/02-pyenv.sh 28 | RUN echo 'eval "$(pyenv init -)"' >> /etc/profile.d/02-pyenv.sh 29 | RUN echo 'pyenv global 3.5.6 3.6.7 3.7.1' >> /etc/profile.d/02-pyenv.sh 30 | # These are needed for some tests 31 | ENV LC_ALL=C.UTF-8 32 | ENV LANG=C.UTF-8 33 | ENV isolated=true -------------------------------------------------------------------------------- /testing_tools/vagrant/Vagrantfile: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | 4 | # All Vagrant configuration is done below. The "2" in Vagrant.configure 5 | # configures the configuration version (we support older styles for 6 | # backwards compatibility). Please don't change it unless you know what 7 | # you're doing. 8 | Vagrant.configure("2") do |config| 9 | # The most common configuration options are documented and commented below. 10 | # For a complete reference, please see the online documentation at 11 | # https://docs.vagrantup.com. 12 | 13 | # Every Vagrant development environment requires a box. You can search for 14 | # boxes at https://atlas.hashicorp.com/search. 15 | config.vm.box = "geerlingguy/ubuntu1604" 16 | config.vm.box_version = "1.2.5" 17 | 18 | # Disable automatic box update checking. If you disable this, then 19 | # boxes will only be checked for updates when the user runs 20 | # `vagrant box outdated`. This is not recommended. 21 | # config.vm.box_check_update = false 22 | 23 | # Create a forwarded port mapping which allows access to a specific port 24 | # within the machine from a port on the host machine. In the example below, 25 | # accessing "localhost:8080" will access port 80 on the guest machine. 26 | # config.vm.network "forwarded_port", guest: 80, host: 8080 27 | 28 | # Create a private network, which allows host-only access to the machine 29 | # using a specific IP. 30 | # config.vm.network "private_network", ip: "192.168.33.10" 31 | 32 | # Create a public network, which generally matched to bridged network. 33 | # Bridged networks make the machine appear as another physical device on 34 | # your network. 35 | # config.vm.network "public_network" 36 | 37 | # Share an additional folder to the guest VM. The first argument is 38 | # the path on the host to the actual folder. The second argument is 39 | # the path on the guest to mount the folder. And the optional third 40 | # argument is a set of non-required options. 41 | # config.vm.synced_folder "../data", "/vagrant_data" 42 | 43 | # Provider-specific configuration so you can fine-tune various 44 | # backing providers for Vagrant. These expose provider-specific options. 45 | # Example for VirtualBox: 46 | # 47 | # config.vm.provider "virtualbox" do |vb| 48 | # # Display the VirtualBox GUI when booting the machine 49 | # vb.gui = true 50 | # 51 | # # Customize the amount of memory on the VM: 52 | # vb.memory = "1024" 53 | # end 54 | # 55 | # View the documentation for the provider you are using for more 56 | # information on available options. 57 | 58 | # Define a Vagrant Push strategy for pushing to Atlas. Other push strategies 59 | # such as FTP and Heroku are also available. See the documentation at 60 | # https://docs.vagrantup.com/v2/push/atlas.html for more information. 61 | # config.push.define "atlas" do |push| 62 | # push.app = "YOUR_ATLAS_USERNAME/YOUR_APPLICATION_NAME" 63 | # end 64 | 65 | # Enable provisioning with a shell script. Additional provisioners such as 66 | # Puppet, Chef, Ansible, Salt, and Docker are also available. Please see the 67 | # documentation for more information about their specific syntax and use. 68 | config.vm.provision "shell", inline: <<-SHELL 69 | add-apt-repository -y ppa:deadsnakes/ppa 70 | apt update 71 | apt install -y python3.8 3.8-distutils curl 72 | curl -s https://bootstrap.pypa.io/get-pip.py | python3.8 73 | curl -fsSL https://get.docker.com | sh 74 | usermod -a -G docker vagrant 75 | curl -s -L https://github.com/bats-core/bats-core/archive/master.tar.gz | tar -xzf - 76 | bash bats-core-master/install.sh /usr/local 77 | rm -rf ./bats-core-master 78 | docker swarm init 79 | SHELL 80 | # No FS share to allow any depds to the host 81 | config.vm.synced_folder "../../", "/check_docker", disabled: false, mount_options: ["ro"] 82 | 83 | end 84 | -------------------------------------------------------------------------------- /testing_tools/vagrant/bats_fixtures.bash: -------------------------------------------------------------------------------- 1 | 2 | good_container() { 3 | docker run -d --name good_sleep busybox sleep 1d 4 | } 5 | 6 | bad_container() { 7 | docker run -d --name bad_sleep busybox false 8 | } 9 | 10 | current_container() { 11 | docker pull busybox:latest 12 | docker run -d --name current_container busybox:latest sleep 1d 13 | } 14 | 15 | old_container() { 16 | docker pull busybox:1.28.1 17 | docker tag busybox:1.28.1 busybox:latest 18 | docker rmi busybox:1.28.1 19 | docker run -d --name old_container busybox:latest sleep 1d 20 | } 21 | 22 | 23 | crashing_container() { 24 | docker run -d --name crashes --restart always busybox false 25 | } 26 | 27 | get_check_docker_version() { 28 | pip3 show check_docker 2>/dev/null | sed -n '/^Version: /s/^Version: //p' 29 | } -------------------------------------------------------------------------------- /testing_tools/vagrant/tests.bats: -------------------------------------------------------------------------------- 1 | 2 | if ! id vagrant 3 | then 4 | echo "This is only intended to be run inside a vagrant box!" >&2 5 | echo "Running it outside may result in data loss" >&2 6 | fi 7 | 8 | NEWEST_SDIST="$(ls -t /check_docker/dist/check_docker-*.tar.gz | head -1)" 9 | NEWEST_WHEEL="$(ls -t /check_docker/dist/check_docker-*.whl | head -1)" 10 | 11 | teardown() 12 | { 13 | docker ps -aq 14 | COUNT=$(docker ps -aq | wc -l) 15 | if [ $COUNT -ne 0 ] 16 | then 17 | docker stop -t 0 $(docker ps -aq) 18 | docker rm -f $(docker ps -aq) 19 | fi 20 | STACKS=$(docker stack ls) 21 | if grep -q TEST_STACK <<<"$STACKS" 22 | then 23 | docker stack rm TEST_STACK 24 | TEST_CONTAINERS_COUNT=$(docker ps | grep TEST_STACK | wc -l) 25 | while [ $TEST_CONTAINERS_COUNT -ne 0 ] 26 | do 27 | sleep 1 28 | TEST_CONTAINERS_COUNT=$(docker ps | grep TEST_STACK | wc -l) 29 | done 30 | 31 | TEST_NETWORK_COUNT=$(docker network ls | grep TEST_STACK | wc -l) 32 | while [ $TEST_NETWORK_COUNT -ne 0 ] 33 | do 34 | sleep 1 35 | TEST_NETWORK_COUNT=$(docker network ls | grep TEST_STACK | wc -l) 36 | done 37 | fi 38 | } 39 | 40 | 41 | load bats_fixtures 42 | 43 | 44 | @test "Confirm check_docker is not in path" { 45 | 46 | # Before we start make sure check_docker is not present 47 | sudo -H pip3.8 uninstall -y check-docker || true 48 | run which check_docker 49 | [ "$status" -eq 1 ] 50 | } 51 | 52 | @test "Confirm 'check-docker' is not installed" { 53 | 54 | # Before we start make sure check_docker is not present 55 | pip3.8 list 2>&1 | grep -ve check-docker 56 | } 57 | 58 | @test "Confirm source package, $NEWEST_SDIST, is installable" { 59 | echo pip3.8 install "$NEWEST_SDIST" 60 | run sudo -H pip3.8 install "$NEWEST_SDIST" 61 | [ "$status" -eq 0 ] 62 | } 63 | 64 | @test "Re-Confirm 'check-docker' is not installed" { 65 | 66 | # This should never error since the previous step ensures package is already present 67 | sudo -H pip3.8 uninstall -y check-docker 68 | # Before we start make sure check_docker is not present 69 | pip3.8 list 2>&1 | grep -ve check-docker 70 | } 71 | 72 | @test "Confirm wheel package, $NEWEST_WHEEL, is installable" { 73 | 74 | run sudo -H pip3.8 install "$NEWEST_WHEEL" 75 | [ "$status" -eq 0 ] 76 | } 77 | 78 | @test "Confirm check_docker appears in path" { 79 | run which check_docker 80 | [ "$status" -eq 0 ] 81 | } 82 | 83 | @test "Confirm package is installed" { 84 | pip3.8 list | grep 'check-docker' 85 | } 86 | 87 | # It is normal for this to fail when preparing for a PR. 88 | @test "Confirm package version is not already in PyPi" { 89 | VERSION=$(get_check_docker_version) 90 | REMOTE_HTTP_STATUS=$(curl -LI https://pypi.org/project/check_docker/${VERSION}/ -w "%{http_code}" -o /dev/null -s) 91 | [ "$REMOTE_HTTP_STATUS" == 404 ] 92 | } 93 | 94 | @test "Confirm check_docker version matches package" { 95 | PACKAGE_VERSION=$(get_check_docker_version) 96 | CHECK_VERSION=$(python3.8 -c 'from check_docker import check_docker; print(check_docker.__version__)') 97 | 98 | [ "$PACKAGE_VERSION" == "$CHECK_VERSION" ] 99 | } 100 | 101 | @test "Confirm check_swarm version matches package" { 102 | PACKAGE_VERSION=$(get_check_docker_version) 103 | CHECK_VERSION=$(python3.8 -c 'from check_docker import check_swarm; print(check_swarm.__version__)') 104 | 105 | [ "$PACKAGE_VERSION" == "$CHECK_VERSION" ] 106 | } 107 | 108 | @test "Good status" { 109 | good_container 110 | sleep 1 111 | run check_docker --container good_sleep --status running 112 | echo "$status" 113 | echo $output 114 | [ "$status" -eq 0 ] 115 | } 116 | 117 | @test "Bad status" { 118 | bad_container 119 | run check_docker --container bad_sleep --status running 120 | echo "$status" 121 | echo $output 122 | [ "$status" -eq 2 ] 123 | } 124 | 125 | @test "Current version" { 126 | docker pull busybox 127 | current_container 128 | run check_docker --container current_container --version 129 | echo "$status" 130 | echo $output 131 | [ "$status" -eq 0 ] 132 | } 133 | 134 | @test "Old version" { 135 | old_container 136 | run check_docker --container old_container --version 137 | echo "$status" 138 | echo $output 139 | [ "$status" -eq 2 ] 140 | } 141 | 142 | @test "Doesn't crash" { 143 | good_container 144 | sleep 5 145 | run check_docker --container good_sleep --restarts 1:2 146 | echo "$status" 147 | echo $output 148 | [ "$status" -eq 0 ] 149 | } 150 | 151 | @test "Does crash" { 152 | crashing_container 153 | sleep 5 154 | run check_docker --container crashes --restarts 1:2 155 | echo "$status" 156 | echo $output 157 | [ "$status" -eq 2 ] 158 | } 159 | 160 | @test "Checks multiple containers" { 161 | good_container 162 | current_container 163 | run check_docker --container good_sleep current_container --status running 164 | echo "$status" 165 | echo $output 166 | [ "$status" -eq 0 ] 167 | } 168 | 169 | @test "Checks multiple containers regex" { 170 | good_container 171 | current_container 172 | run check_docker --container '.*' --status running 173 | echo "$status" 174 | echo $output 175 | [ "$status" -eq 0 ] 176 | } 177 | 178 | @test "Checks get all containers" { 179 | good_container 180 | current_container 181 | run check_docker --container '.*' --status running 182 | echo "$status" 183 | echo $output 184 | [ "$status" -eq 0 ] 185 | CONTIANERS_IN_CHECK=$(echo $output | tr ';' '\n' | wc -l) 186 | [ "$CONTIANERS_IN_CHECK" -eq 2 ] 187 | 188 | } 189 | 190 | SITE_PACKAGES_DIR=/$(pip3.8 show check_docker | grep '^Location' | cut -d ' ' -f 2)/check_docker 191 | @test "Can check_docker be run when called directly" { 192 | 193 | run python3.8 $SITE_PACKAGES_DIR/check_docker.py --help 194 | [ "$status" -eq 0 ] 195 | } 196 | 197 | @test "Can check_swarm be run when called directly" { 198 | 199 | run python3.8 $SITE_PACKAGES_DIR/check_swarm.py --help 200 | [ "$status" -eq 0 ] 201 | 202 | } 203 | 204 | @test "Confirm replicated service failures are noticed" { 205 | cat <