├── .codeclimate.yml
├── .coveragerc
├── .dockerignore
├── .github
└── workflows
│ └── tests.yml
├── .gitignore
├── .travis.yml
├── DEVLOPEMENT.md
├── LICENSE
├── Pipfile
├── README.rst
├── check_docker
├── __init__.py
├── check_docker.py
└── check_swarm.py
├── pyproject.toml
├── release_process.md
├── run_isolated_tests.sh
├── run_package_tests.sh
├── testing_tools
├── Dockerfile
└── vagrant
│ ├── Vagrantfile
│ ├── bats_fixtures.bash
│ └── tests.bats
├── tests
├── __init__.py
├── test_check_docker.py
├── test_check_swarm.py
└── test_version.py
└── tox.ini
/.codeclimate.yml:
--------------------------------------------------------------------------------
1 | languages:
2 | Ruby: true
3 | JavaScript: true
4 | PHP: true
5 | Python: true
6 | exclude_paths:
7 | - "check_docker/tests/*"
8 | - "tests/*"
9 | plugins:
10 | radon:
11 | enabled: true
12 | sonar-python:
13 | enabled: true
--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | include =
3 | check_docker/check_*.py
4 | omit =
5 | tests/*
6 | */__init__.py
7 |
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | # Created by .ignore support plugin (hsz.mobi)
2 | *
3 | !testing_tools
4 | !dev_requirements.txt
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: Run tests
2 |
3 | on:
4 | push:
5 | branches: [ master ]
6 | pull_request:
7 | branches: [ master ]
8 |
9 | jobs:
10 | build:
11 |
12 | runs-on: ubuntu-latest
13 | strategy:
14 | matrix:
15 | python_version: [3.6, 3.7, 3.8]
16 |
17 | steps:
18 | - uses: actions/checkout@v2
19 | - name: Set up Python
20 | uses: actions/setup-python@v2
21 | with:
22 | python-version: ${{ matrix.python_version }}
23 | - name: Install dependencies
24 | run: |
25 | python -V
26 | printenv
27 | python -m pip install --upgrade pip
28 | pip install flake8 pytest coverage pyfakefs pytest-cov
29 | - name: Lint with flake8
30 | run: |
31 | # stop the build if there are Python syntax errors or undefined names
32 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
33 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
34 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
35 | - name: Test with pytest
36 | run: |
37 | pytest --cov=check_docker --cov-fail-under 90 --cov-report term --cov-report html
38 | - uses: actions/upload-artifact@v2
39 | with:
40 | name: coverage_report
41 | path: htmlcov
42 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ### Python template
2 | # Byte-compiled / optimized / DLL files
3 | __pycache__/
4 | *.py[cod]
5 | *$py.class
6 | MANIFEST
7 |
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | env/
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .coverage
43 | .coverage.*
44 | .cache
45 | nosetests.xml
46 | coverage.xml
47 | *,cover
48 | .hypothesis/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 |
58 | # Flask instance folder
59 | instance/
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # IPython Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # dotenv
80 | .env
81 |
82 | # virtualenv
83 | venv/
84 | ENV/
85 |
86 | # Spyder project settings
87 | .spyderproject
88 |
89 | # Rope project settings
90 | .ropeproject
91 |
92 | .idea
93 |
94 |
95 | ## File-based project format:
96 | *.iws
97 |
98 | ## Plugin-specific files:
99 |
100 | # IntelliJ
101 | /out/
102 |
103 |
104 | # JIRA plugin
105 | atlassian-ide-plugin.xml
106 |
107 | # Crashlytics plugin (for Android Studio and IntelliJ)
108 | com_crashlytics_export_strings.xml
109 | crashlytics.properties
110 | crashlytics-build.properties
111 | fabric.properties
112 |
113 | /coverage
114 | /.pytest_cache/
115 | .pytest_cache/
116 | config.yml
117 | cr-sess1.json
118 |
119 |
120 | testing_tools/vagrant/.vagrant
121 | !check_docker/
122 |
123 | .DS_Store
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 | - "3.5"
4 | - "3.6"
5 | - "3.7"
6 | - "3.8"
7 | install:
8 | - pip install pipenv
9 | - pipenv install
10 | - pipenv install codeclimate-test-reporter
11 | # command to run tests
12 | script:
13 | - py.test --cov=check_docker
14 | - codeclimate-test-reporter || echo "Ignoring Code Climate reporter upload failure"
15 |
--------------------------------------------------------------------------------
/DEVLOPEMENT.md:
--------------------------------------------------------------------------------
1 | # Development environment setup
2 |
3 | You should have the following installed
4 |
5 | - docker
6 | - python (version >= 3.0)
7 | - pipenv
8 | - vagrant
9 |
10 | Initialize your pipenv
11 |
12 | pipenv install --skip-lock
13 |
14 | # Running the tests
15 |
16 | ## Normal tests
17 | tox and Pytest is used for testing. You can can run test by running the following from
18 | the root of the project
19 |
20 | tox
21 |
22 | ## Isolated tests
23 | Sometimes test cases can interact with Docker on the development machine making
24 | it hard to determine the cause of a test success or failure. To address this
25 | you can use the `run_isolated_tests.sh` script to run pytest inside a
26 | environment isolated from any network. Additionally this isolated test will
27 | run the unit tests on multiple versions of python so you can validate your
28 | changes are not python version specific.
29 |
30 | ./run_isolated_tests.sh
31 |
32 | ## Package tests
33 | These test verify that, after created, the package can be installed and
34 | runs successfully(not just passes unit tests). To do this a test environment is set up in vagrant.
35 |
36 | ./run_package_tests.sh
37 |
38 | # Coverage report
39 | The aim is to keep coverage above 90% on the actual checks
40 | (check_docker.py and check_swarm.py). To generate a coverage report.
41 |
42 | pipenv run py.test --cov=check_docker/
43 |
44 | # Tips
45 | When jumping back and forth between normal and isolated tests the `__pycache__`
46 | directories can fall out fo sync with your execution environment. When this
47 | happens you see errors like `ImportError: No module named 'check_docker'. The
48 | fix is simple, just remove all the `__pycache__` directories in the project.
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
1 | [[source]]
2 | name = "pypi"
3 | url = "https://pypi.org/simple"
4 | verify_ssl = true
5 |
6 | [packages]
7 | tox = '*'
8 | tox-pyenv = '*'
9 | pytest = '*'
10 | pytest-random-order = '*'
11 | coverage = '>4.0,<4.4'
12 | pyfakefs = '*'
13 | pytest-cov = '<2.6'
14 | poetry = "*"
15 |
16 | [requires]
17 | python_version = "3.8"
18 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | |Build Status| |Code Climate| |Test Coverage| |Downloads|
2 |
3 |
4 | ============
5 | check_docker
6 | ============
7 |
8 | Nagios/NRPE compatible plugins for checking docker based services. Currently there are two nagios checks
9 |
10 | - **check_docker** which checks docker container health
11 | - **check_swarm** which checks health of swarm nodes and services
12 |
13 | With **check_docker** can use it to check and alert on
14 |
15 | - memory consumption in absolute units (bytes, kb, mb, gb) and as a percentage (0-100%)
16 | of the container limit.
17 | - CPU usages as a percentage (0-100%) of container limit.
18 | - automatic restarts performed by the docker daemon
19 | - container status, i.e. is it running?
20 | - container health checks are passing?
21 | - uptime, i.e. is it able to stay running for a long enough time?
22 | - the presence of a container or containers matching specified names
23 | - image version, does the running image match that in the remote registry?
24 | - image age, when was the image built the last time?
25 |
26 | With **check_swarm** you can alert
27 |
28 | - if a node is not joined to a docker swarm
29 | - if a service is running in a swarm
30 |
31 | These checks can communicate with a local docker daemon socket file (default) or with local
32 | or remote docker daemons using secure and non-secure TCP connections.
33 |
34 | These plugins require python 3. It is tested on 3.5 and greater but may work on older
35 | versions of 3.
36 |
37 | Installation
38 | -----------------
39 |
40 | With pip
41 | ::
42 |
43 | pip3 install check_docker
44 | --or--
45 | pip install check_docker
46 |
47 | With curl
48 | ::
49 |
50 | curl -o /usr/local/bin/check_docker https://raw.githubusercontent.com/timdaman/check_docker/master/check_docker/check_docker.py
51 | curl -o /usr/local/bin/check_swarm https://raw.githubusercontent.com/timdaman/check_docker/master/check_docker/check_swarm.py
52 | chmod a+rx /usr/local/bin/check_docker /usr/local/bin/check_swarm
53 |
54 | With wget
55 | ::
56 |
57 | wget -O /usr/local/bin/check_docker https://raw.githubusercontent.com/timdaman/check_docker/master/check_docker/check_docker.py
58 | wget -O /usr/local/bin/check_swarm https://raw.githubusercontent.com/timdaman/check_docker/master/check_docker/check_swarm.py
59 | chmod a+rx /usr/local/bin/check_docker /usr/local/bin/check_swarm
60 |
61 |
62 | check_docker Usage
63 | ------------------
64 |
65 | ::
66 |
67 | usage: check_docker.py [-h]
68 | [--connection [//docker.socket|:]
69 | | --secure-connection [:]]
70 | [--binary_units | --decimal_units] [--timeout TIMEOUT]
71 | [--containers CONTAINERS [CONTAINERS ...]] [--present]
72 | [--threads THREADS] [--cpu WARN:CRIT]
73 | [--memory WARN:CRIT:UNITS] [--status STATUS] [--health]
74 | [--uptime WARN:CRIT] [--image-age WARN:CRIT] [--version]
75 | [--insecure-registries INSECURE_REGISTRIES [INSECURE_REGISTRIES ...]]
76 | [--restarts WARN:CRIT] [--no-ok] [--no-performance] [-V]
77 |
78 | Check docker containers.
79 |
80 | optional arguments:
81 | -h, --help show this help message and exit
82 | --connection [//docker.socket|:]
83 | Where to find docker daemon socket. (default:
84 | /var/run/docker.sock)
85 | --secure-connection [:]
86 | Where to find TLS protected docker daemon socket.
87 | --binary_units Use a base of 1024 when doing calculations of KB, MB,
88 | GB, & TB (This is default)
89 | --decimal_units Use a base of 1000 when doing calculations of KB, MB,
90 | GB, & TB
91 | --timeout TIMEOUT Connection timeout in seconds. (default: 10.0)
92 | --containers CONTAINERS [CONTAINERS ...]
93 | One or more RegEx that match the names of the
94 | container(s) to check. If omitted all containers are
95 | checked. (default: ['all'])
96 | --present Modifies --containers so that each RegEx must match at
97 | least one container.
98 | --threads THREADS This + 1 is the maximum number of concurent
99 | threads/network connections. (default: 10)
100 | --cpu WARN:CRIT Check cpu usage percentage taking into account any
101 | limits. Valid values are 0 - 100.
102 | --memory WARN:CRIT:UNITS
103 | Check memory usage taking into account any limits.
104 | Valid values for units are %,B,KB,MB,GB.
105 | --status STATUS Desired container status (running, exited, etc).
106 | --health Check container's health check status
107 | --uptime WARN:CRIT Minimum container uptime in seconds. Use when
108 | infrequent crashes are tolerated.
109 | --image-age WARN:CRIT Maximum image age in days.
110 | --version Check if the running images are the same version as
111 | those in the registry. Useful for finding stale
112 | images. Does not support login.
113 | --insecure-registries INSECURE_REGISTRIES [INSECURE_REGISTRIES ...]
114 | List of registries to connect to with http(no TLS).
115 | Useful when using "--version" with images from
116 | insecure registries.
117 | --restarts WARN:CRIT Container restart thresholds.
118 | --no-ok Make output terse suppressing OK messages. If all
119 | checks are OK return a single OK.
120 | --no-performance Suppress performance data. Reduces output when
121 | performance data is not being used.
122 | -V show program's version number and exit
123 |
124 |
125 | check_swarm Usage
126 | -----------------
127 |
128 | ::
129 |
130 | usage: check_swarm.py [-h]
131 | [--connection [//docker.socket|:]
132 | | --secure-connection [:]]
133 | [--timeout TIMEOUT]
134 | (--swarm | --service SERVICE [SERVICE ...] | --ignore_paused)
135 | [-V]
136 |
137 | Check docker swarm.
138 |
139 | optional arguments:
140 | -h, --help show this help message and exit
141 | --connection [//docker.socket|:]
142 | Where to find docker daemon socket. (default:
143 | /var/run/docker.sock)
144 | --secure-connection [:]
145 | Where to find TLS protected docker daemon socket.
146 | --timeout TIMEOUT Connection timeout in seconds. (default: 10.0)
147 | --swarm Check swarm status
148 | --service SERVICE [SERVICE ...]
149 | One or more RegEx that match the names of the
150 | services(s) to check.
151 | --ignore_paused Don't require global services to be running on paused nodes
152 | -V show program's version number and exit
153 |
154 | Gotchas
155 | -------
156 |
157 | - When using check_docker with older versions of docker (I have seen 1.4 and 1.5) –status only supports ‘running’, ‘restarting’, and ‘paused’.
158 | - When using check_docker, if no container is specified, all containers are checked. Some containers may return critcal status if the selected check(s) require a running container.
159 | - When using check_docker, --present cannot be used without --containers to indicate what to check the presence of.
160 |
161 | .. |Build Status| image:: https://travis-ci.org/timdaman/check_docker.svg?branch=master
162 | :target: https://travis-ci.org/timdaman/check_docker
163 | .. |Code Climate| image:: https://codeclimate.com/github/timdaman/check_docker/badges/gpa.svg
164 | :target: https://codeclimate.com/github/timdaman/check_docker
165 | .. |Test Coverage| image:: https://codeclimate.com/github/timdaman/check_docker/badges/coverage.svg
166 | :target: https://codeclimate.com/github/timdaman/check_docker/coverage
167 | .. |Downloads| image:: http://pepy.tech/badge/check-docker
168 | :target: http://pepy.tech/count/check-docker
169 |
--------------------------------------------------------------------------------
/check_docker/__init__.py:
--------------------------------------------------------------------------------
1 | """Nagios/NRPE compatible plugins for checking docker based services"""
2 | __version__ = "2.2.2"
--------------------------------------------------------------------------------
/check_docker/check_docker.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # logging.basicConfig(level=logging.DEBUG)
3 | import argparse
4 | import json
5 | import logging
6 | import math
7 | import os
8 | import re
9 | import socket
10 | import stat
11 | import traceback
12 | from collections import deque, namedtuple, UserDict, defaultdict
13 | from concurrent import futures
14 | from datetime import datetime, timezone
15 | from functools import lru_cache
16 | from http.client import HTTPConnection
17 | from sys import argv
18 | from urllib import request
19 | from urllib.error import HTTPError, URLError
20 | from urllib.request import AbstractHTTPHandler, HTTPHandler, HTTPSHandler, OpenerDirector, HTTPRedirectHandler, \
21 | Request, HTTPBasicAuthHandler
22 |
23 | logger = logging.getLogger()
24 | __author__ = 'Tim Laurence'
25 | __copyright__ = "Copyright 2019"
26 | __credits__ = ['Tim Laurence']
27 | __license__ = "GPL"
28 | __version__ = "2.2.2"
29 |
30 | '''
31 | nrpe compatible check for docker containers.
32 |
33 | Requires Python 3
34 |
35 | Note: I really would have preferred to have used requests for all the network connections but that would have added a
36 | dependency.
37 | '''
38 |
39 | DEFAULT_SOCKET = '/var/run/docker.sock'
40 | DEFAULT_TIMEOUT = 10.0
41 | DEFAULT_PORT = 2375
42 | DEFAULT_MEMORY_UNITS = 'B'
43 | DEFAULT_HEADERS = [('Accept', 'application/vnd.docker.distribution.manifest.v2+json')]
44 | DEFAULT_PUBLIC_REGISTRY = 'registry-1.docker.io'
45 |
46 | # The second value is the power to raise the base to.
47 | UNIT_ADJUSTMENTS_TEMPLATE = {
48 | '%': 0,
49 | 'B': 0,
50 | 'KB': 1,
51 | 'MB': 2,
52 | 'GB': 3,
53 | 'TB': 4
54 | }
55 | unit_adjustments = None
56 |
57 | # Reduce message to a single OK unless a checks fail.
58 | no_ok = False
59 |
60 | # Suppress performance data reporting
61 | no_performance = False
62 |
63 | OK_RC = 0
64 | WARNING_RC = 1
65 | CRITICAL_RC = 2
66 | UNKNOWN_RC = 3
67 |
68 | # These hold the final results
69 | rc = -1
70 | messages = []
71 | performance_data = []
72 |
73 | ImageName = namedtuple('ImageName', "registry name tag full_name")
74 |
75 |
76 | class ThresholdSpec(UserDict):
77 | def __init__(self, warn, crit, units=''):
78 | super().__init__(warn=warn, crit=crit, units=units)
79 |
80 | def __getattr__(self, item):
81 | if item in ('warn', 'crit', 'units'):
82 | return self.data[item]
83 | return super().__getattr__(item)
84 |
85 |
86 | # How much threading can we do? We are generally not CPU bound so I am using this a worse case cap
87 | DEFAULT_PARALLELISM = 10
88 |
89 | # Holds list of all threads
90 | threads = []
91 |
92 | # This is used during testing
93 | DISABLE_THREADING = False
94 |
95 |
96 | # Hacked up urllib to handle sockets
97 | #############################################################################################
98 | # Docker runs a http connection over a socket. http.client is knows how to deal with these
99 | # but lacks some niceties. Urllib wraps that and makes up for some of the deficiencies but
100 | # cannot fix the fact http.client can't read from socket files. In order to take advantage of
101 | # urllib and http.client's capabilities the class below tweaks HttpConnection and passes it
102 | # to urllib registering for socket:// connections
103 |
104 | # This is all side effect so excluding coverage
105 | class SocketFileHandler(AbstractHTTPHandler):
106 | class SocketFileToHttpConnectionAdaptor(HTTPConnection): # pragma: no cover
107 | def __init__(self, socket_file, timeout=DEFAULT_TIMEOUT):
108 | super().__init__(host='', port=0, timeout=timeout)
109 | self.socket_file = socket_file
110 |
111 | def connect(self):
112 | self.sock = socket.socket(family=socket.AF_UNIX, type=socket.SOCK_STREAM, proto=0, fileno=None)
113 | self.sock.settimeout(self.timeout)
114 | self.sock.connect(self.socket_file)
115 |
116 | def socket_open(self, req):
117 | socket_file, path = req.selector.split(':', 1)
118 | req.host = socket_file
119 | req.selector = path
120 | return self.do_open(self.SocketFileToHttpConnectionAdaptor, req)
121 |
122 |
123 | # Tokens are not cached because I expect the callers to cache the responses
124 | class Oauth2TokenAuthHandler(HTTPBasicAuthHandler):
125 | auth_failure_tracker = defaultdict(int)
126 |
127 | def http_response(self, request, response):
128 | code, hdrs = response.code, response.headers
129 |
130 | www_authenticate_header = response.headers.get('www-authenticate', None)
131 | if code == 401 and www_authenticate_header:
132 | scheme = www_authenticate_header.split()[0]
133 | if scheme.lower() == 'bearer':
134 | return self.process_oauth2(request, response, www_authenticate_header)
135 |
136 | return response
137 |
138 | https_response = http_response
139 |
140 | @staticmethod
141 | def _get_outh2_token(www_authenticate_header):
142 | auth_fields = dict(re.findall(r"""(?:(?P[^ ,=]+)="([^"]+)")""", www_authenticate_header))
143 |
144 | auth_url = "{realm}?scope={scope}&service={service}".format(
145 | realm=auth_fields['realm'],
146 | scope=auth_fields['scope'],
147 | service=auth_fields['service'],
148 | )
149 | token_request = Request(auth_url)
150 | token_request.add_header("Content-Type", "application/x-www-form-urlencoded; charset=utf-8")
151 | token_response = request.urlopen(token_request)
152 | return process_urllib_response(token_response)['token']
153 |
154 | def process_oauth2(self, request, response, www_authenticate_header):
155 |
156 | # This keeps infinite auth loops from happening
157 | full_url = request.full_url
158 | self.auth_failure_tracker[full_url] += 1
159 | if self.auth_failure_tracker[full_url] > 1:
160 | raise HTTPError(full_url, 401, "Stopping Oauth2 failure loop for {}".format(full_url),
161 | response.headers, response)
162 |
163 | auth_token = self._get_outh2_token(www_authenticate_header)
164 |
165 | request.add_unredirected_header('Authorization', 'Bearer ' + auth_token)
166 | return self.parent.open(request, timeout=request.timeout)
167 |
168 |
169 | better_urllib_get = OpenerDirector()
170 | better_urllib_get.addheaders = DEFAULT_HEADERS.copy()
171 | better_urllib_get.add_handler(HTTPHandler())
172 | better_urllib_get.add_handler(HTTPSHandler())
173 | better_urllib_get.add_handler(HTTPRedirectHandler())
174 | better_urllib_get.add_handler(SocketFileHandler())
175 | better_urllib_get.add_handler(Oauth2TokenAuthHandler())
176 |
177 |
178 | class RegistryError(Exception):
179 | def __init__(self, response):
180 | self.response_obj = response
181 |
182 |
183 | # Util functions
184 | #############################################################################################
185 | def parse_thresholds(spec, include_units=True, units_required=True):
186 | """
187 | Given a spec string break it up into ':' separated chunks. Convert strings to ints as it makes sense
188 |
189 | :param spec: The threshold specification being parsed
190 | :param include_units: Specifies that units should be processed and returned if present
191 | :param units_required: Mark spec as invalid if the units are missing.
192 | :return: A list containing the thresholds in order of warn, crit, and units(if included and present)
193 | """
194 | parts = deque(spec.split(':'))
195 | if not all(parts):
196 | raise ValueError("Blanks are not allowed in a threshold specification: {}".format(spec))
197 |
198 | # Warn
199 | warn = int(parts.popleft())
200 | # Crit
201 | crit = int(parts.popleft())
202 |
203 | units = ''
204 | if include_units:
205 | if len(parts):
206 | # units
207 | units = parts.popleft()
208 | elif units_required:
209 | raise ValueError("Missing units in {}".format(spec))
210 |
211 | if len(parts) != 0:
212 | raise ValueError("Too many threshold specifiers in {}".format(spec))
213 |
214 | return ThresholdSpec(warn=warn, crit=crit, units=units)
215 |
216 |
217 | def pretty_time(seconds):
218 | remainder = seconds
219 | result = []
220 | if remainder > 24 * 60 * 60:
221 | days, remainder = divmod(remainder, 24 * 60 * 60)
222 | result.append("{}d".format(int(days)))
223 | if remainder > 60 * 60:
224 | hours, remainder = divmod(remainder, 60 * 60)
225 | result.append("{}h".format(int(hours)))
226 | if remainder > 60:
227 | minutes, remainder = divmod(remainder, 60)
228 | result.append("{}min".format(int(minutes)))
229 | result.append("{}s".format(int(remainder)))
230 | return result
231 |
232 |
233 | def evaluate_numeric_thresholds(container, value, thresholds, name, short_name,
234 | min=None, max=None, greater_than=True):
235 | rounder = lambda x: round(x, 2)
236 |
237 | INTEGER_UNITS = ['B', '%', '']
238 |
239 | # Some units don't have decimal places
240 | rounded_value = int(value) if thresholds.units in INTEGER_UNITS else rounder(value)
241 |
242 | perf_string = "{container}_{short_name}={value}{units};{warn};{crit}".format(
243 | container=container,
244 | short_name=short_name,
245 | value=rounded_value,
246 | **thresholds)
247 | if min is not None:
248 | rounded_min = math.floor(min) if thresholds.units in INTEGER_UNITS else rounder(min)
249 | perf_string += ';{}'.format(rounded_min)
250 | if max is not None:
251 | rounded_max = math.ceil(max) if thresholds.units in INTEGER_UNITS else rounder(max)
252 | perf_string += ';{}'.format(rounded_max)
253 |
254 | global performance_data
255 | performance_data.append(perf_string)
256 |
257 | if thresholds.units == 's':
258 | nice_time = ' '.join(pretty_time(rounded_value)[:2])
259 | results_str = "{} {} is {}".format(container, name, nice_time)
260 | else:
261 | results_str = "{} {} is {}{}".format(container, name, rounded_value, thresholds.units)
262 |
263 | if greater_than:
264 | comparator = lambda value, threshold: value >= threshold
265 | else:
266 | comparator = lambda value, threshold: value <= threshold
267 |
268 | if comparator(value, thresholds.crit):
269 | critical(results_str)
270 | elif comparator(value, thresholds.warn):
271 | warning(results_str)
272 | else:
273 | ok(results_str)
274 |
275 |
276 | @lru_cache(maxsize=None)
277 | def get_url(url):
278 | logger.debug("get_url: {}".format(url))
279 | response = better_urllib_get.open(url, timeout=timeout)
280 | logger.debug("get_url: {} {}".format(url, response.status))
281 | return process_urllib_response(response), response.status
282 |
283 |
284 | def process_urllib_response(response):
285 | response_bytes = response.read()
286 | body = response_bytes.decode('utf-8')
287 | # logger.debug("BODY: {}".format(body))
288 | return json.loads(body)
289 |
290 |
291 | def get_container_info(name):
292 | content, _ = get_url(daemon + '/containers/{container}/json'.format(container=name))
293 | return content
294 |
295 |
296 | def get_image_info(name):
297 | content, _ = get_url(daemon + '/images/{image}/json'.format(image=name))
298 | return content
299 |
300 |
301 | def get_state(container):
302 | return get_container_info(container)['State']
303 |
304 |
305 | def get_stats(container):
306 | content, _ = get_url(daemon + '/containers/{container}/stats?stream=0'.format(container=container))
307 | return content
308 |
309 |
310 | def get_ps_name(name_list):
311 | # Pick the name that starts with a '/' but doesn't contain a '/' and return that value
312 | for name in name_list:
313 | if '/' not in name[1:] and name[0] == '/':
314 | return name[1:]
315 | else:
316 | raise NameError("Error when trying to identify 'ps' name in {}".format(name_list))
317 |
318 |
319 | def get_containers(names, require_present):
320 | containers_list, _ = get_url(daemon + '/containers/json?all=1')
321 |
322 | all_container_names = set(get_ps_name(x['Names']) for x in containers_list)
323 |
324 | if 'all' in names:
325 | return all_container_names
326 |
327 | filtered = set()
328 | for matcher in names:
329 | found = False
330 | for candidate in all_container_names:
331 | if re.match("^{}$".format(matcher), candidate):
332 | filtered.add(candidate)
333 | found = True
334 | # If we don't find a container that matches out regex
335 | if require_present and not found:
336 | critical("No containers match {}".format(matcher))
337 |
338 | return filtered
339 |
340 |
341 | def get_container_image_id(container):
342 | # find registry and tag
343 | inspection = get_container_info(container)
344 | return inspection['Image']
345 |
346 |
347 | def get_container_image_urls(container):
348 | inspection = get_container_info(container)
349 | image_id = inspection['Image']
350 | image_info = get_image_info(image_id)
351 | return image_info['RepoTags']
352 |
353 |
354 | def normalize_image_name_to_manifest_url(image_name, insecure_registries):
355 | parsed_url = parse_image_name(image_name)
356 |
357 | lower_insecure = [reg.lower() for reg in insecure_registries]
358 |
359 | # Registry query url
360 | scheme = 'http' if parsed_url.registry.lower() in lower_insecure else 'https'
361 | url = '{scheme}://{registry}/v2/{image_name}/manifests/{image_tag}'.format(scheme=scheme,
362 | registry=parsed_url.registry,
363 | image_name=parsed_url.name,
364 | image_tag=parsed_url.tag)
365 | return url, parsed_url.registry
366 |
367 |
368 | # Auth servers seem picky about being hit too hard. Can't figure out why. ;)
369 | # As result it is best to single thread this check
370 | # This is based on https://docs.docker.com/registry/spec/auth/token/#requesting-a-token
371 | def get_digest_from_registry(url):
372 | logger.debug("get_digest_from_registry")
373 | # query registry
374 | # TODO: Handle logging in if needed
375 | registry_info, status_code = get_url(url=url)
376 |
377 | if status_code != 200:
378 | raise RegistryError(response=registry_info)
379 | return registry_info['config'].get('digest', None)
380 |
381 |
382 | def set_rc(new_rc):
383 | global rc
384 | rc = new_rc if new_rc > rc else rc
385 |
386 |
387 | def ok(message):
388 | set_rc(OK_RC)
389 | messages.append('OK: ' + message)
390 |
391 |
392 | def warning(message):
393 | set_rc(WARNING_RC)
394 | messages.append('WARNING: ' + message)
395 |
396 |
397 | def critical(message):
398 | set_rc(CRITICAL_RC)
399 | messages.append('CRITICAL: ' + message)
400 |
401 |
402 | def unknown(message):
403 | set_rc(UNKNOWN_RC)
404 | messages.append('UNKNOWN: ' + message)
405 |
406 |
407 | def require_running(name):
408 | def inner_decorator(func):
409 | def wrapper(container, *args, **kwargs):
410 | container_state = get_state(container)
411 | state = normalize_state(container_state)
412 | if state.lower() == "running":
413 | func(container, *args, **kwargs)
414 | else:
415 | # container is not running, can't perform check
416 | critical('{container} is not "running", cannot check {check}"'.format(container=container,
417 | check=name))
418 |
419 | return wrapper
420 |
421 | return inner_decorator
422 |
423 |
424 | def multithread_execution(disable_threading=DISABLE_THREADING):
425 | def inner_decorator(func):
426 | def wrapper(container, *args, **kwargs):
427 | if DISABLE_THREADING:
428 | func(container, *args, **kwargs)
429 | else:
430 | threads.append(parallel_executor.submit(func, container, *args, **kwargs))
431 |
432 | return wrapper
433 |
434 | return inner_decorator
435 |
436 |
437 | def singlethread_execution(disable_threading=DISABLE_THREADING):
438 | def inner_decorator(func):
439 | def wrapper(container, *args, **kwargs):
440 | if DISABLE_THREADING:
441 | func(container, *args, **kwargs)
442 | else:
443 | threads.append(serial_executor.submit(func, container, *args, **kwargs))
444 |
445 | return wrapper
446 |
447 | return inner_decorator
448 |
449 |
450 | def parse_image_name(image_name):
451 | """
452 | Parses image names into their constituent parts.
453 | :param image_name:
454 | :return: ImageName
455 | """
456 |
457 | # These are based on information found here
458 | # https://docs.docker.com/engine/reference/commandline/tag/#extended-description
459 | # https://github.com/docker/distribution/blob/master/reference/regexp.go
460 | host_segment_re = '[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?'
461 | hostname_re = r'({host_segment}\.)+{host_segment}'.format(host_segment=host_segment_re)
462 | registry_re = r'((?P({hostname_re}(:\d+)?|{host_segment_re}:\d+))/)'.format(
463 | host_segment_re=host_segment_re, hostname_re=hostname_re)
464 | name_component_ends_re = '[a-z0-9]'
465 | name_component_middle_re = '[a-z0-9._-]' # Ignoring spec limit of two _
466 | name_component_re = '({end}{middle}*{end}|{end})'.format(end=name_component_ends_re,
467 | middle=name_component_middle_re)
468 | image_name_re = "(?P({name_component}/)*{name_component})".format(name_component=name_component_re)
469 | image_tag_re = '(?P[a-zA-Z0-9_][a-zA-Z0-9_.-]*)'
470 | full_re = '^{registry}?{image_name}(:{image_tag})?$'.format(registry=registry_re, image_name=image_name_re,
471 | image_tag=image_tag_re)
472 | parsed = re.match(full_re, image_name)
473 |
474 | registry = parsed.group('registry') if parsed.group('registry') else DEFAULT_PUBLIC_REGISTRY
475 |
476 | image_name = parsed.group('image_name')
477 | image_name = image_name if '/' in image_name or registry != DEFAULT_PUBLIC_REGISTRY else 'library/' + image_name
478 |
479 | image_tag = parsed.group('image_tag')
480 | image_tag = image_tag if image_tag else 'latest'
481 |
482 | full_image_name = "{registry}/{image_name}:{image_tag}".format(
483 | registry=registry,
484 | image_name=image_name,
485 | image_tag=image_tag)
486 |
487 | return ImageName(registry=registry, name=image_name, tag=image_tag, full_name=full_image_name)
488 |
489 |
490 | def normalize_state(status_info):
491 | # Ugh, docker used to report state in as silly way then they figured out how to do it better.
492 | # This tries the simpler new way and if that doesn't work fails back to the old way
493 |
494 | # On new docker engines the status holds whatever the current state is, running, stopped, paused, etc.
495 | if "Status" in status_info:
496 | return status_info['Status']
497 |
498 | status = 'Exited'
499 | if status_info["Restarting"]:
500 | status = 'Restarting'
501 | elif status_info["Paused"]:
502 | status = 'Paused'
503 | elif status_info["Dead"]:
504 | status = 'Dead'
505 | elif status_info["Running"]:
506 | return "Running"
507 | return status
508 |
509 |
510 | # Checks
511 | #############################################################################################
512 |
513 | @multithread_execution()
514 | @require_running(name='memory')
515 | def check_memory(container, thresholds):
516 | if not thresholds.units in unit_adjustments:
517 | unknown("Memory units must be one of {}".format(list(unit_adjustments.keys())))
518 | return
519 |
520 | inspection = get_stats(container)
521 |
522 | # Subtracting cache to match what `docker stats` does.
523 | adjusted_usage = inspection['memory_stats']['usage'] - inspection['memory_stats']['stats']['total_cache']
524 | if thresholds.units == '%':
525 | max = 100
526 | usage = int(100 * adjusted_usage / inspection['memory_stats']['limit'])
527 | else:
528 | max = inspection['memory_stats']['limit'] / unit_adjustments[thresholds.units]
529 | usage = adjusted_usage / unit_adjustments[thresholds.units]
530 |
531 | evaluate_numeric_thresholds(container=container, value=usage, thresholds=thresholds, name='memory',
532 | short_name='mem', min=0, max=max)
533 |
534 |
535 | @multithread_execution()
536 | def check_status(container, desired_state):
537 | normized_desired_state = desired_state.lower()
538 | normalized_state = normalize_state(get_state(container)).lower()
539 | if normized_desired_state != normalized_state:
540 | critical("{} state is not {}".format(container, desired_state))
541 | return
542 | ok("{} status is {}".format(container, desired_state))
543 |
544 |
545 | @multithread_execution()
546 | @require_running('health')
547 | def check_health(container):
548 | state = get_state(container)
549 | if "Health" in state and "Status" in state["Health"]:
550 | health = state["Health"]["Status"]
551 | message = "{} is {}".format(container, health)
552 | if health == 'healthy':
553 | ok(message)
554 | elif health == 'unhealthy':
555 | critical(message)
556 | else:
557 | unknown(message)
558 | else:
559 | unknown('{} has no health check data'.format(container))
560 |
561 |
562 | @multithread_execution()
563 | @require_running('uptime')
564 | def check_uptime(container, thresholds):
565 | inspection = get_container_info(container)['State']['StartedAt']
566 | only_secs = inspection[0:19]
567 | start = datetime.strptime(only_secs, "%Y-%m-%dT%H:%M:%S")
568 | start = start.replace(tzinfo=timezone.utc)
569 | now = datetime.now(timezone.utc)
570 | uptime = (now - start).total_seconds()
571 |
572 | graph_padding = 2
573 | thresholds.units = 's'
574 | evaluate_numeric_thresholds(container=container, value=uptime, thresholds=thresholds, name='uptime',
575 | short_name='up', min=0, max=graph_padding, greater_than=False)
576 |
577 |
578 | @multithread_execution()
579 | def check_image_age(container, thresholds):
580 | container_image = get_container_info(container)['Image']
581 | image_created = get_image_info(container_image)['Created']
582 | only_secs = image_created[0:19]
583 | start = datetime.strptime(only_secs, "%Y-%m-%dT%H:%M:%S")
584 | start = start.replace(tzinfo=timezone.utc)
585 | now = datetime.now(timezone.utc)
586 | image_age = (now - start).days
587 |
588 | graph_padding = 2
589 | thresholds.units = 'd'
590 | evaluate_numeric_thresholds(container=container, value=image_age, thresholds=thresholds, name='image_age',
591 | short_name='age', min=0, max=graph_padding, greater_than=True)
592 |
593 |
594 | @multithread_execution()
595 | @require_running('restarts')
596 | def check_restarts(container, thresholds):
597 | inspection = get_container_info(container)
598 |
599 | restarts = int(inspection['RestartCount'])
600 | graph_padding = 2
601 | evaluate_numeric_thresholds(container=container, value=restarts, thresholds=thresholds, name='restarts',
602 | short_name='re', min=0, max=graph_padding)
603 |
604 |
605 | @singlethread_execution()
606 | def check_version(container, insecure_registries):
607 | image_id = get_container_image_id(container)
608 | logger.debug("Local container image ID: {}".format(image_id))
609 | if image_id is None:
610 | unknown('Checksum missing for "{}", try doing a pull'.format(container))
611 | return
612 |
613 | image_urls = get_container_image_urls(container=container)
614 | if len(image_urls) > 1:
615 | unknown('"{}" has multiple tags/names. Unsure which one to use to check the version.'.format(container))
616 | return
617 | elif len(image_urls) == 0:
618 | unknown('"{}" has last no repository tag. Is this anywhere else?'.format(container))
619 | return
620 |
621 | url, registry = normalize_image_name_to_manifest_url(image_urls[0], insecure_registries)
622 | logger.debug("Looking up image digest here {}".format(url))
623 | try:
624 | registry_hash = get_digest_from_registry(url)
625 | except URLError as e:
626 | if hasattr(e.reason, 'reason') and e.reason.reason == 'UNKNOWN_PROTOCOL':
627 | unknown(
628 | "TLS error connecting to registry {} for {}, should you use the '--insecure-registry' flag?" \
629 | .format(registry, container))
630 | return
631 | elif hasattr(e.reason, 'strerror') and e.reason.strerror == 'nodename nor servname provided, or not known':
632 | unknown(
633 | "Cannot reach registry for {} at {}".format(container, url))
634 | return
635 | else:
636 | raise e
637 | except RegistryError as e:
638 | unknown("Cannot check version, couldn't retrieve digest for {} while checking {}.".format(container, url))
639 | return
640 | logger.debug("Image digests, local={} remote={}".format(image_id, registry_hash))
641 | if registry_hash == image_id:
642 | ok("{}'s version matches registry".format(container))
643 | return
644 | critical("{}'s version does not match registry".format(container))
645 |
646 |
647 | def calculate_cpu_capacity_precentage(info, stats):
648 | host_config = info['HostConfig']
649 |
650 | if 'online_cpus' in stats['cpu_stats']:
651 | num_cpus = stats['cpu_stats']['online_cpus']
652 | else:
653 | num_cpus = len(stats['cpu_stats']['cpu_usage']['percpu_usage'])
654 |
655 | # Identify limit system being used
656 | # --cpus
657 | if 'NanoCpus' in host_config and host_config['NanoCpus'] != 0:
658 | period = 1000000000
659 | quota = host_config['NanoCpus']
660 | # --cpu-quota
661 | elif 'CpuQuota' in host_config and host_config['CpuQuota'] != 0:
662 | period = 100000 if host_config['CpuPeriod'] == 0 else host_config['CpuPeriod']
663 | quota = host_config['CpuQuota']
664 | # unlimited
665 | else:
666 | period = 1
667 | quota = num_cpus
668 |
669 | if period * num_cpus < quota:
670 | # This handles the case where the quota is actually bigger than amount available by all the cpus.
671 | available_limit_ratio = 1
672 | else:
673 | available_limit_ratio = (period * num_cpus) / quota
674 |
675 | cpu_delta = stats['cpu_stats']['cpu_usage']['total_usage'] - stats['precpu_stats']['cpu_usage']['total_usage']
676 | system_delta = stats['cpu_stats']['system_cpu_usage'] - stats['precpu_stats']['system_cpu_usage']
677 | usage = (cpu_delta / system_delta) * available_limit_ratio
678 | usage = round(usage * 100, 0)
679 | return usage
680 |
681 |
682 | @multithread_execution()
683 | @require_running('cpu')
684 | def check_cpu(container, thresholds):
685 | info = get_container_info(container)
686 |
687 | stats = get_stats(container=container)
688 |
689 | usage = calculate_cpu_capacity_precentage(info=info, stats=stats)
690 |
691 | max = 100
692 | thresholds.units = '%'
693 | evaluate_numeric_thresholds(container=container, value=usage, thresholds=thresholds, name='cpu', short_name='cpu',
694 | min=0, max=max)
695 |
696 |
697 | def process_args(args):
698 | parser = argparse.ArgumentParser(description='Check docker containers.')
699 |
700 | # Connect to local socket or ip address
701 | connection_group = parser.add_mutually_exclusive_group()
702 | connection_group.add_argument('--connection',
703 | dest='connection',
704 | action='store',
705 | default=DEFAULT_SOCKET,
706 | type=str,
707 | metavar='[//docker.socket|:]',
708 | help='Where to find docker daemon socket. (default: %(default)s)')
709 |
710 | connection_group.add_argument('--secure-connection',
711 | dest='secure_connection',
712 | action='store',
713 | type=str,
714 | metavar='[:]',
715 | help='Where to find TLS protected docker daemon socket.')
716 |
717 | base_group = parser.add_mutually_exclusive_group()
718 | base_group.add_argument('--binary_units',
719 | dest='units_base',
720 | action='store_const',
721 | const=1024,
722 | help='Use a base of 1024 when doing calculations of KB, MB, GB, & TB (This is default)')
723 |
724 | base_group.add_argument('--decimal_units',
725 | dest='units_base',
726 | action='store_const',
727 | const=1000,
728 | help='Use a base of 1000 when doing calculations of KB, MB, GB, & TB')
729 | parser.set_defaults(units_base=1024)
730 |
731 | # Connection timeout
732 | parser.add_argument('--timeout',
733 | dest='timeout',
734 | action='store',
735 | type=float,
736 | default=DEFAULT_TIMEOUT,
737 | help='Connection timeout in seconds. (default: %(default)s)')
738 |
739 | # Container name
740 | parser.add_argument('--containers',
741 | dest='containers',
742 | action='store',
743 | nargs='+',
744 | type=str,
745 | default=['all'],
746 | help='One or more RegEx that match the names of the container(s) to check. If omitted all containers are checked. (default: %(default)s)')
747 |
748 | # Container name
749 | parser.add_argument('--present',
750 | dest='present',
751 | default=False,
752 | action='store_true',
753 | help='Modifies --containers so that each RegEx must match at least one container.')
754 |
755 | # Threads
756 | parser.add_argument('--threads',
757 | dest='threads',
758 | default=DEFAULT_PARALLELISM,
759 | action='store',
760 | type=int,
761 | help='This + 1 is the maximum number of concurent threads/network connections. (default: %(default)s)')
762 |
763 | # CPU
764 | parser.add_argument('--cpu',
765 | dest='cpu',
766 | action='store',
767 | type=str,
768 | metavar='WARN:CRIT',
769 | help='Check cpu usage percentage taking into account any limits.')
770 |
771 | # Memory
772 | parser.add_argument('--memory',
773 | dest='memory',
774 | action='store',
775 | type=str,
776 | metavar='WARN:CRIT:UNITS',
777 | help='Check memory usage taking into account any limits. Valid values for units are %%,B,KB,MB,GB.')
778 |
779 | # State
780 | parser.add_argument('--status',
781 | dest='status',
782 | action='store',
783 | type=str,
784 | help='Desired container status (running, exited, etc).')
785 |
786 | # Health
787 | parser.add_argument('--health',
788 | dest='health',
789 | default=None,
790 | action='store_true',
791 | help="Check container's health check status")
792 |
793 | # Age
794 | parser.add_argument('--uptime',
795 | dest='uptime',
796 | action='store',
797 | type=str,
798 | metavar='WARN:CRIT',
799 | help='Minimum container uptime in seconds. Use when infrequent crashes are tolerated.')
800 |
801 | # Image Age
802 | parser.add_argument('--image-age',
803 | dest='image_age',
804 | action='store',
805 | type=str,
806 | metavar='WARN:CRIT',
807 | help='Maximum image age in days.')
808 |
809 | # Version
810 | parser.add_argument('--version',
811 | dest='version',
812 | default=None,
813 | action='store_true',
814 | help='Check if the running images are the same version as those in the registry. Useful for finding stale images. Does not support login.')
815 |
816 | # Version
817 | parser.add_argument('--insecure-registries',
818 | dest='insecure_registries',
819 | action='store',
820 | nargs='+',
821 | type=str,
822 | default=[],
823 | help='List of registries to connect to with http(no TLS). Useful when using "--version" with images from insecure registries.')
824 |
825 | # Restart
826 | parser.add_argument('--restarts',
827 | dest='restarts',
828 | action='store',
829 | type=str,
830 | metavar='WARN:CRIT',
831 | help='Container restart thresholds.')
832 |
833 | # no-ok
834 | parser.add_argument('--no-ok',
835 | dest='no_ok',
836 | action='store_true',
837 | help='Make output terse suppressing OK messages. If all checks are OK return a single OK.')
838 |
839 | # no-performance
840 | parser.add_argument('--no-performance',
841 | dest='no_performance',
842 | action='store_true',
843 | help='Suppress performance data. Reduces output when performance data is not being used.')
844 |
845 | parser.add_argument('-V', action='version', version='%(prog)s {}'.format(__version__))
846 |
847 | if len(args) == 0:
848 | parser.print_help()
849 |
850 | parsed_args = parser.parse_args(args=args)
851 |
852 | global timeout
853 | timeout = parsed_args.timeout
854 |
855 | global daemon
856 | global connection_type
857 | if parsed_args.secure_connection:
858 | daemon = 'https://' + parsed_args.secure_connection
859 | connection_type = 'https'
860 | elif parsed_args.connection:
861 | if parsed_args.connection[0] == '/':
862 | daemon = 'socket://' + parsed_args.connection + ':'
863 | connection_type = 'socket'
864 | else:
865 | daemon = 'http://' + parsed_args.connection
866 | connection_type = 'http'
867 |
868 | return parsed_args
869 |
870 |
871 | def no_checks_present(parsed_args):
872 | # Look for all functions whose name starts with 'check_'
873 | checks = [key[6:] for key in globals().keys() if key.startswith('check_')]
874 | # Act like --present is a check though it is not implemented like one
875 | return all(getattr(parsed_args, check) is None for check in checks) and not parsed_args.present
876 |
877 |
878 | def socketfile_permissions_failure(parsed_args):
879 | if connection_type == 'socket':
880 | return not (os.path.exists(parsed_args.connection)
881 | and stat.S_ISSOCK(os.stat(parsed_args.connection).st_mode)
882 | and os.access(parsed_args.connection, os.R_OK)
883 | and os.access(parsed_args.connection, os.W_OK))
884 | else:
885 | return False
886 |
887 |
888 | def print_results():
889 | if no_ok:
890 | # Remove all the "OK"s
891 | filtered_messages = [message for message in messages if not message.startswith('OK: ')]
892 | if len(filtered_messages) == 0:
893 | messages_concat = 'OK'
894 | else:
895 | messages_concat = '; '.join(filtered_messages)
896 |
897 | else:
898 | messages_concat = '; '.join(messages)
899 |
900 | if no_performance or len(performance_data) == 0:
901 | print(messages_concat)
902 | else:
903 | perfdata_concat = ' '.join(performance_data)
904 | print(messages_concat + '|' + perfdata_concat)
905 |
906 |
907 | def perform_checks(raw_args):
908 | args = process_args(raw_args)
909 |
910 | global parallel_executor
911 | parallel_executor = futures.ThreadPoolExecutor(max_workers=args.threads)
912 | global serial_executor
913 | serial_executor = futures.ThreadPoolExecutor(max_workers=1)
914 |
915 | global unit_adjustments
916 | unit_adjustments = {key: args.units_base ** value for key, value in UNIT_ADJUSTMENTS_TEMPLATE.items()}
917 |
918 | global no_ok
919 | no_ok = args.no_ok
920 |
921 | global no_performance
922 | no_performance = args.no_ok
923 |
924 | if socketfile_permissions_failure(args):
925 | unknown("Cannot access docker socket file. User ID={}, socket file={}".format(os.getuid(), args.connection))
926 | return
927 |
928 | if args.containers == ["all"] and args.present:
929 | unknown("You can not use --present without --containers")
930 | return
931 |
932 | if no_checks_present(args):
933 | unknown("No checks specified.")
934 | return
935 |
936 | # Here is where all the work happens
937 | #############################################################################################
938 | containers = get_containers(args.containers, args.present)
939 |
940 | if len(containers) == 0 and not args.present:
941 | unknown("No containers names found matching criteria")
942 | return
943 |
944 | for container in containers:
945 |
946 | # Check status
947 | if args.status:
948 | check_status(container, args.status)
949 |
950 | # Check version
951 | if args.version:
952 | check_version(container, args.insecure_registries)
953 |
954 | # below are checks that require a 'running' status
955 |
956 | # Check status
957 | if args.health:
958 | check_health(container)
959 |
960 | # Check cpu usage
961 | if args.cpu:
962 | check_cpu(container, parse_thresholds(args.cpu, units_required=False))
963 |
964 | # Check memory usage
965 | if args.memory:
966 | check_memory(container, parse_thresholds(args.memory, units_required=False))
967 |
968 | # Check uptime
969 | if args.uptime:
970 | check_uptime(container, parse_thresholds(args.uptime, include_units=False))
971 |
972 | # Check image age
973 | if args.image_age:
974 | check_image_age(container, parse_thresholds(args.image_age, include_units=False))
975 |
976 | # Check restart count
977 | if args.restarts:
978 | check_restarts(container, parse_thresholds(args.restarts, include_units=False))
979 |
980 |
981 | def main():
982 | try:
983 | perform_checks(argv[1:])
984 |
985 | # get results to let exceptions in threads bubble out
986 | [x.result() for x in futures.as_completed(threads)]
987 |
988 | except Exception as e:
989 | traceback.print_exc()
990 | unknown("Exception raised during check': {}".format(repr(e)))
991 | print_results()
992 | exit(rc)
993 |
994 |
995 | if __name__ == '__main__':
996 | main()
997 |
--------------------------------------------------------------------------------
/check_docker/check_swarm.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import argparse
3 | import json
4 | import logging
5 | import os
6 | import re
7 | import socket
8 | import stat
9 | import traceback
10 | from functools import lru_cache
11 | from http.client import HTTPConnection
12 | from sys import argv
13 | from urllib.request import AbstractHTTPHandler, HTTPHandler, HTTPSHandler, OpenerDirector
14 |
15 | logger = logging.getLogger()
16 | __author__ = 'Tim Laurence'
17 | __copyright__ = "Copyright 2019"
18 | __credits__ = ['Tim Laurence']
19 | __license__ = "GPL"
20 | __version__ = "2.2.2"
21 |
22 | '''
23 | nrpe compatible check for docker swarm
24 |
25 | Requires Python 3
26 |
27 | Note: I really would have preferred to have used requests for all the network connections but that would have added a
28 | dependency.
29 | '''
30 |
31 | DEFAULT_SOCKET = '/var/run/docker.sock'
32 | DEFAULT_TIMEOUT = 10.0
33 | DEFAULT_PORT = 2375
34 | DEFAULT_HEADERS = [('Accept', 'application/vnd.docker.distribution.manifest.v2+json')]
35 | OK_RC = 0
36 | WARNING_RC = 1
37 | CRITICAL_RC = 2
38 | UNKNOWN_RC = 3
39 |
40 | HTTP_GOOD_CODES = range(200, 299)
41 |
42 | # These hold the final results
43 | rc = -1
44 | messages = []
45 |
46 |
47 | # Hacked up urllib to handle sockets
48 | #############################################################################################
49 | # Docker runs a http connection over a socket. http.client is knows how to deal with these
50 | # but lacks some niceties. Urllib wraps that and makes up for some of the deficiencies but
51 | # cannot fix the fact http.client can't read from socket files. In order to take advantage of
52 | # urllib and http.client's capabilities the class below tweaks HttpConnection and passes it
53 | # to urllib registering for socket:// connections
54 |
55 | # This is all side effect so excluding coverage
56 | class SocketFileHandler(AbstractHTTPHandler): # pragma: no cover
57 | class SocketFileToHttpConnectionAdaptor(HTTPConnection):
58 | def __init__(self, socket_file, timeout=DEFAULT_TIMEOUT):
59 | super().__init__(host='', port=0, timeout=timeout)
60 | self.socket_file = socket_file
61 |
62 | def connect(self):
63 | self.sock = socket.socket(family=socket.AF_UNIX, type=socket.SOCK_STREAM, proto=0, fileno=None)
64 | self.sock.settimeout(self.timeout)
65 | self.sock.connect(self.socket_file)
66 |
67 | def socket_open(self, req):
68 | socket_file, path = req.selector.split(':', 1)
69 | req.host = socket_file
70 | req.selector = path
71 | return self.do_open(self.SocketFileToHttpConnectionAdaptor, req)
72 |
73 |
74 | better_urllib_get = OpenerDirector()
75 | better_urllib_get.addheaders = DEFAULT_HEADERS.copy()
76 | better_urllib_get.add_handler(HTTPHandler())
77 | better_urllib_get.add_handler(HTTPSHandler())
78 | better_urllib_get.add_handler(SocketFileHandler())
79 |
80 |
81 | # Util functions
82 | #############################################################################################
83 |
84 |
85 | @lru_cache()
86 | def get_url(url):
87 | response = better_urllib_get.open(url, timeout=timeout)
88 | return process_urllib_response(response), response.status
89 |
90 |
91 | def process_urllib_response(response):
92 | response_bytes = response.read()
93 | body = response_bytes.decode('utf-8')
94 | logger.debug(body)
95 | return json.loads(body)
96 |
97 |
98 | def get_swarm_status():
99 | content, status = get_url(daemon + '/swarm')
100 | return status
101 |
102 |
103 | def get_service_info(name):
104 | return get_url(daemon + '/services/{service}'.format(service=name))
105 |
106 |
107 | def get_service_tasks(name):
108 | tasks, status = get_url(daemon + '/tasks?filters={{"name":{{"{service}":true}}}}'.format(service=name))
109 | return tasks
110 |
111 |
112 | def get_nodes():
113 | return get_url(daemon + '/nodes')
114 |
115 |
116 | def get_services(names):
117 | services_list, status = get_url(daemon + '/services')
118 | if status == 406:
119 | critical("Error checking service status, node is not in swarm mode")
120 | return []
121 | elif status not in HTTP_GOOD_CODES:
122 | unknown("Could not retrieve service info")
123 | return []
124 |
125 | all_services_names = set(x['Spec']['Name'] for x in services_list)
126 | if 'all' in names:
127 | return all_services_names
128 |
129 | filtered = set()
130 | not_found = []
131 | for matcher in names:
132 | found = False
133 | for candidate in all_services_names:
134 | if re.match("^{}$".format(matcher), candidate):
135 | filtered.add(candidate)
136 | found = True
137 | # If we don't find a service that matches out regex
138 | if not found:
139 | not_found.append(matcher)
140 | if len(not_found) > 0:
141 | critical("No services match {}".format(','.join(not_found)))
142 | return filtered
143 |
144 |
145 | def set_rc(new_rc):
146 | global rc
147 | rc = new_rc if new_rc > rc else rc
148 |
149 |
150 | def ok(message):
151 | set_rc(OK_RC)
152 | messages.append('OK: ' + message)
153 |
154 |
155 | def warning(message):
156 | set_rc(WARNING_RC)
157 | messages.append('WARNING: ' + message)
158 |
159 |
160 | def critical(message):
161 | set_rc(CRITICAL_RC)
162 | messages.append('CRITICAL: ' + message)
163 |
164 |
165 | def unknown(message):
166 | set_rc(UNKNOWN_RC)
167 | messages.append('UNKNOWN: ' + message)
168 |
169 |
170 | # Checks
171 | #############################################################################################
172 | def check_swarm():
173 | status = get_swarm_status()
174 | process_url_status(status, ok_msg='Node is in a swarm',
175 | critical_msg='Node is not in a swarm', unknown_msg='Error accessing swarm info')
176 |
177 |
178 | def process_global_service(name, ignore_paused=False):
179 | bad_node_states = {'drain'}
180 | if ignore_paused:
181 | bad_node_states.add('paused')
182 |
183 | # Get all the nodes we care about based on their state
184 | node_list, status = get_nodes()
185 | node_index = set()
186 | for node in node_list:
187 | if node['Spec']['Availability'] in bad_node_states:
188 | continue
189 | node_index.add(node['ID'])
190 |
191 | # If a task is on a targeted node confirm it is running
192 | # Services that are not running are considered bad. This is to prevent services in crash loops from being ignored
193 | # Also note, this ignores conditions where services state they are running on a node not in the index.
194 | service_tasks = get_service_tasks(name)
195 | for task in service_tasks:
196 | if task['Status']['State'] != 'running':
197 | critical('Global service {service} has one or more tasks not running'.format(service=name))
198 | return
199 | node_index.discard(task['NodeID'])
200 |
201 | if len(node_index) > 0:
202 | critical('Global service {service} has {count} tasks not running'.format(service=name, count=len(node_list)))
203 |
204 | ok('Global service {service} OK'.format(service=name))
205 |
206 |
207 | def process_replicated_service(name, replicas_desired):
208 | # Services that are not running are considered bad. This is to prevent services in crash loops from being ignored
209 | all_tasks = get_service_tasks(name)
210 | running_tasks = [task for task in all_tasks if task['Status']['State'] == 'running']
211 | num_tasks = len(running_tasks)
212 | if num_tasks != replicas_desired:
213 | critical('Replicated service {service} has {num_tasks} tasks, {replicas_desired} desired'.
214 | format(service=name, num_tasks=num_tasks, replicas_desired=replicas_desired))
215 | else:
216 | ok('Replicated service {service} OK'.format(service=name))
217 |
218 |
219 | def check_service(name, ignore_paused=False):
220 | # get service mode
221 | service_info, status = get_service_info(name)
222 | mode_info = service_info['Spec']['Mode']
223 |
224 | # if global ensure one per node
225 | if 'Global' in mode_info:
226 | process_global_service(name=name, ignore_paused=ignore_paused)
227 | # if replicated ensure sufficient number of replicas
228 | elif 'Replicated' in mode_info:
229 | process_replicated_service(name=name, replicas_desired=mode_info['Replicated']['Replicas'])
230 |
231 |
232 | def process_url_status(status, ok_msg=None, critical_msg=None, unknown_msg=None):
233 | if status in HTTP_GOOD_CODES:
234 | return ok(ok_msg)
235 | elif status in [503, 404, 406]:
236 | return critical(critical_msg)
237 | else:
238 | return unknown(unknown_msg)
239 |
240 |
241 | def process_args(args):
242 | parser = argparse.ArgumentParser(description='Check docker swarm.')
243 |
244 | # Connect to local socket or ip address
245 | connection_group = parser.add_mutually_exclusive_group()
246 | connection_group.add_argument('--connection',
247 | dest='connection',
248 | action='store',
249 | default=DEFAULT_SOCKET,
250 | type=str,
251 | metavar='[//docker.socket|:]',
252 | help='Where to find docker daemon socket. (default: %(default)s)')
253 |
254 | connection_group.add_argument('--secure-connection',
255 | dest='secure_connection',
256 | action='store',
257 | type=str,
258 | metavar='[:]',
259 | help='Where to find TLS protected docker daemon socket.')
260 |
261 | # Connection timeout
262 | parser.add_argument('--timeout',
263 | dest='timeout',
264 | action='store',
265 | type=float,
266 | default=DEFAULT_TIMEOUT,
267 | help='Connection timeout in seconds. (default: %(default)s)')
268 |
269 | swarm_group = parser.add_mutually_exclusive_group(required=True)
270 |
271 | # Swarm
272 | swarm_group.add_argument('--swarm',
273 | dest='swarm',
274 | default=None,
275 | action='store_true',
276 | help='Check swarm status')
277 |
278 | # Service
279 | swarm_group.add_argument('--service',
280 | dest='service',
281 | action='store',
282 | type=str,
283 | nargs='+',
284 | default=[],
285 | help='One or more RegEx that match the names of the services(s) to check.')
286 |
287 | swarm_group.add_argument('--ignore_paused',
288 | dest='ignore_paused',
289 | action='store_true',
290 | help="Don't require global services to be running on paused nodes")
291 |
292 | parser.add_argument('-V', action='version', version='%(prog)s {}'.format(__version__))
293 |
294 | if len(args) == 0:
295 | parser.print_help()
296 |
297 | parsed_args = parser.parse_args(args=args)
298 |
299 | global timeout
300 | timeout = parsed_args.timeout
301 |
302 | global daemon
303 | global connection_type
304 | if parsed_args.secure_connection:
305 | daemon = 'https://' + parsed_args.secure_connection
306 | connection_type = 'https'
307 | elif parsed_args.connection:
308 | if parsed_args.connection[0] == '/':
309 | daemon = 'socket://' + parsed_args.connection + ':'
310 | connection_type = 'socket'
311 | else:
312 | daemon = 'http://' + parsed_args.connection
313 | connection_type = 'http'
314 |
315 | return parsed_args
316 |
317 |
318 | def socketfile_permissions_failure(parsed_args):
319 | if connection_type == 'socket':
320 | return not (os.path.exists(parsed_args.connection)
321 | and stat.S_ISSOCK(os.stat(parsed_args.connection).st_mode)
322 | and os.access(parsed_args.connection, os.R_OK)
323 | and os.access(parsed_args.connection, os.W_OK))
324 | else:
325 | return False
326 |
327 |
328 | def print_results():
329 | print('; '.join(messages))
330 |
331 |
332 | def perform_checks(raw_args):
333 | args = process_args(raw_args)
334 | if socketfile_permissions_failure(args):
335 | unknown("Cannot access docker socket file. User ID={}, socket file={}".format(os.getuid(), args.connection))
336 | else:
337 | # Here is where all the work happens
338 | #############################################################################################
339 | try:
340 | if args.swarm:
341 | check_swarm()
342 | elif args.service:
343 | services = get_services(args.service)
344 |
345 | # Status is set to critical by get_services() if nothing is found for a name
346 | for service in services:
347 | check_service(name=service, ignore_paused=args.ignore_paused)
348 |
349 | except Exception as e:
350 | traceback.print_exc()
351 | unknown("Exception raised during check: {}".format(repr(e)))
352 |
353 | print_results()
354 |
355 |
356 | def main():
357 | perform_checks(argv[1:])
358 | exit(rc)
359 |
360 |
361 | if __name__ == '__main__':
362 | main()
363 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["poetry"]
3 | build-backend = "poetry.masonry.api"
4 |
5 | [tool.poetry]
6 | name = "check_docker"
7 | version = "2.2.2"
8 | description = "Nagios/NRPE compatible plugins for checking Docker based services"
9 | license = "GPL-3.0"
10 | authors = ["Tim Laurence "]
11 | readme = "README.rst"
12 | homepage = "https://github.com/timdaman/check_docker"
13 | repository = "https://github.com/timdaman/check_docker"
14 |
15 | classifiers=[
16 | "Programming Language :: Python",
17 | "Programming Language :: Python :: 3",
18 | "Intended Audience :: System Administrators",
19 | "Environment :: Other Environment",
20 | "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
21 | "Operating System :: OS Independent",
22 | "Topic :: System :: Networking",
23 | ]
24 | packages = [
25 | { include = "check_docker" },
26 | ]
27 |
28 | [tool.poetry.scripts]
29 | check_docker = "check_docker.check_docker:main"
30 | check_swarm = "check_docker.check_swarm:main"
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/release_process.md:
--------------------------------------------------------------------------------
1 |
2 | 1. Confirm documentation is updated
3 | - README
4 | - DEV doc
5 | 1. Unit tests pass
6 | 1. Isolated tests pass
7 |
8 | ./run_isolated_tests.sh
9 |
10 | 1. make package
11 |
12 | pipenv run poetry build
13 |
14 | 1. Uninstall check_docker and install package
15 |
16 | pipenv uninstall check_docker && pipenv run pip install dist/check_docker-X.X.X-py2.py3-none-any.whl
17 |
18 | 1. Bats smoke tests pass
19 |
20 | ./run_package_tests.sh
21 |
22 | 1. Push to branch
23 | 1. Confirm doc looks good on github
24 | 1. Travis tests pass
25 | 1. Create and merge PR
26 | 1. Confirm Travis still passes
27 | 1. CodeClimate does not show scary issues (need to modify analyzed branch)
28 | 1. Upload package to test repo
29 |
30 | poetry publish -r pypi -u timdaman -p xxxx
31 |
32 | 1. Check test project page for formatting
33 |
34 | https://test.pypi.org/project/check_docker/
35 |
36 | 1. Upload package to prod repo
37 |
38 | poetry publish -r prodpypi -u timdaman -p xxxx
39 |
40 | 1. Check project page for formatting
41 |
42 | https://pypi.org/project/check_docker/
43 |
--------------------------------------------------------------------------------
/run_isolated_tests.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -eu
3 |
4 | (cd testing_tools && docker build -t check_docker_tests .)
5 |
6 | docker run --rm -v $PWD:$PWD -w $PWD -ti check_docker_tests tox
--------------------------------------------------------------------------------
/run_package_tests.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -eux
4 |
5 | cd testing_tools/vagrant
6 | vagrant up
7 | vagrant ssh -c "bats -p /check_docker/testing_tools/vagrant"
8 | vagrant suspend
9 |
--------------------------------------------------------------------------------
/testing_tools/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:20.04 AS build
2 | ENV DEBIAN_FRONTEND=noninteractive
3 |
4 | ENV PYENV_ROOT="/pyenv"
5 | ENV PATH="$PYENV_ROOT/bin:$PATH"
6 | WORKDIR /
7 | RUN apt update
8 | RUN apt install --no-install-recommends --fix-missing -y build-essential make locales libssl1.1 libssl-dev \
9 | libffi-dev libbz2-dev libreadline-dev libsqlite3-dev libjpeg-dev zlib1g-dev libxml2-dev libxslt1-dev \
10 | curl ca-certificates
11 | RUN curl -kL https://github.com/pyenv/pyenv/archive/master.tar.gz | tar -xz \
12 | && mv pyenv-master /pyenv
13 | RUN echo 3.5.6 3.6.7 3.7.1 | xargs -n 1 -P $(nproc) pyenv install
14 | RUN /pyenv/versions/3.7.1/bin/pip3.7 install setuptools wheel flit tox
15 |
16 | FROM ubuntu:20.04
17 | ENV DEBIAN_FRONTEND=noninteractive
18 | SHELL ["/bin/bash", "-lc"]
19 | ENTRYPOINT ["/bin/bash", "-lc"]
20 | RUN apt update \
21 | && apt install --no-install-recommends --fix-missing -y git libssl1.1 ca-certificates netbase \
22 | && apt-get autoremove -y \
23 | && apt-get clean all \
24 | && rm -rf /var/lib/apt/lists/*
25 | COPY --from=build /pyenv /pyenv
26 | ENV PYENV_ROOT="/pyenv"
27 | RUN echo 'PATH="/pyenv/bin:$PATH"' >> /etc/profile.d/02-pyenv.sh
28 | RUN echo 'eval "$(pyenv init -)"' >> /etc/profile.d/02-pyenv.sh
29 | RUN echo 'pyenv global 3.5.6 3.6.7 3.7.1' >> /etc/profile.d/02-pyenv.sh
30 | # These are needed for some tests
31 | ENV LC_ALL=C.UTF-8
32 | ENV LANG=C.UTF-8
33 | ENV isolated=true
--------------------------------------------------------------------------------
/testing_tools/vagrant/Vagrantfile:
--------------------------------------------------------------------------------
1 | # -*- mode: ruby -*-
2 | # vi: set ft=ruby :
3 |
4 | # All Vagrant configuration is done below. The "2" in Vagrant.configure
5 | # configures the configuration version (we support older styles for
6 | # backwards compatibility). Please don't change it unless you know what
7 | # you're doing.
8 | Vagrant.configure("2") do |config|
9 | # The most common configuration options are documented and commented below.
10 | # For a complete reference, please see the online documentation at
11 | # https://docs.vagrantup.com.
12 |
13 | # Every Vagrant development environment requires a box. You can search for
14 | # boxes at https://atlas.hashicorp.com/search.
15 | config.vm.box = "geerlingguy/ubuntu1604"
16 | config.vm.box_version = "1.2.5"
17 |
18 | # Disable automatic box update checking. If you disable this, then
19 | # boxes will only be checked for updates when the user runs
20 | # `vagrant box outdated`. This is not recommended.
21 | # config.vm.box_check_update = false
22 |
23 | # Create a forwarded port mapping which allows access to a specific port
24 | # within the machine from a port on the host machine. In the example below,
25 | # accessing "localhost:8080" will access port 80 on the guest machine.
26 | # config.vm.network "forwarded_port", guest: 80, host: 8080
27 |
28 | # Create a private network, which allows host-only access to the machine
29 | # using a specific IP.
30 | # config.vm.network "private_network", ip: "192.168.33.10"
31 |
32 | # Create a public network, which generally matched to bridged network.
33 | # Bridged networks make the machine appear as another physical device on
34 | # your network.
35 | # config.vm.network "public_network"
36 |
37 | # Share an additional folder to the guest VM. The first argument is
38 | # the path on the host to the actual folder. The second argument is
39 | # the path on the guest to mount the folder. And the optional third
40 | # argument is a set of non-required options.
41 | # config.vm.synced_folder "../data", "/vagrant_data"
42 |
43 | # Provider-specific configuration so you can fine-tune various
44 | # backing providers for Vagrant. These expose provider-specific options.
45 | # Example for VirtualBox:
46 | #
47 | # config.vm.provider "virtualbox" do |vb|
48 | # # Display the VirtualBox GUI when booting the machine
49 | # vb.gui = true
50 | #
51 | # # Customize the amount of memory on the VM:
52 | # vb.memory = "1024"
53 | # end
54 | #
55 | # View the documentation for the provider you are using for more
56 | # information on available options.
57 |
58 | # Define a Vagrant Push strategy for pushing to Atlas. Other push strategies
59 | # such as FTP and Heroku are also available. See the documentation at
60 | # https://docs.vagrantup.com/v2/push/atlas.html for more information.
61 | # config.push.define "atlas" do |push|
62 | # push.app = "YOUR_ATLAS_USERNAME/YOUR_APPLICATION_NAME"
63 | # end
64 |
65 | # Enable provisioning with a shell script. Additional provisioners such as
66 | # Puppet, Chef, Ansible, Salt, and Docker are also available. Please see the
67 | # documentation for more information about their specific syntax and use.
68 | config.vm.provision "shell", inline: <<-SHELL
69 | add-apt-repository -y ppa:deadsnakes/ppa
70 | apt update
71 | apt install -y python3.8 3.8-distutils curl
72 | curl -s https://bootstrap.pypa.io/get-pip.py | python3.8
73 | curl -fsSL https://get.docker.com | sh
74 | usermod -a -G docker vagrant
75 | curl -s -L https://github.com/bats-core/bats-core/archive/master.tar.gz | tar -xzf -
76 | bash bats-core-master/install.sh /usr/local
77 | rm -rf ./bats-core-master
78 | docker swarm init
79 | SHELL
80 | # No FS share to allow any depds to the host
81 | config.vm.synced_folder "../../", "/check_docker", disabled: false, mount_options: ["ro"]
82 |
83 | end
84 |
--------------------------------------------------------------------------------
/testing_tools/vagrant/bats_fixtures.bash:
--------------------------------------------------------------------------------
1 |
2 | good_container() {
3 | docker run -d --name good_sleep busybox sleep 1d
4 | }
5 |
6 | bad_container() {
7 | docker run -d --name bad_sleep busybox false
8 | }
9 |
10 | current_container() {
11 | docker pull busybox:latest
12 | docker run -d --name current_container busybox:latest sleep 1d
13 | }
14 |
15 | old_container() {
16 | docker pull busybox:1.28.1
17 | docker tag busybox:1.28.1 busybox:latest
18 | docker rmi busybox:1.28.1
19 | docker run -d --name old_container busybox:latest sleep 1d
20 | }
21 |
22 |
23 | crashing_container() {
24 | docker run -d --name crashes --restart always busybox false
25 | }
26 |
27 | get_check_docker_version() {
28 | pip3 show check_docker 2>/dev/null | sed -n '/^Version: /s/^Version: //p'
29 | }
--------------------------------------------------------------------------------
/testing_tools/vagrant/tests.bats:
--------------------------------------------------------------------------------
1 |
2 | if ! id vagrant
3 | then
4 | echo "This is only intended to be run inside a vagrant box!" >&2
5 | echo "Running it outside may result in data loss" >&2
6 | fi
7 |
8 | NEWEST_SDIST="$(ls -t /check_docker/dist/check_docker-*.tar.gz | head -1)"
9 | NEWEST_WHEEL="$(ls -t /check_docker/dist/check_docker-*.whl | head -1)"
10 |
11 | teardown()
12 | {
13 | docker ps -aq
14 | COUNT=$(docker ps -aq | wc -l)
15 | if [ $COUNT -ne 0 ]
16 | then
17 | docker stop -t 0 $(docker ps -aq)
18 | docker rm -f $(docker ps -aq)
19 | fi
20 | STACKS=$(docker stack ls)
21 | if grep -q TEST_STACK <<<"$STACKS"
22 | then
23 | docker stack rm TEST_STACK
24 | TEST_CONTAINERS_COUNT=$(docker ps | grep TEST_STACK | wc -l)
25 | while [ $TEST_CONTAINERS_COUNT -ne 0 ]
26 | do
27 | sleep 1
28 | TEST_CONTAINERS_COUNT=$(docker ps | grep TEST_STACK | wc -l)
29 | done
30 |
31 | TEST_NETWORK_COUNT=$(docker network ls | grep TEST_STACK | wc -l)
32 | while [ $TEST_NETWORK_COUNT -ne 0 ]
33 | do
34 | sleep 1
35 | TEST_NETWORK_COUNT=$(docker network ls | grep TEST_STACK | wc -l)
36 | done
37 | fi
38 | }
39 |
40 |
41 | load bats_fixtures
42 |
43 |
44 | @test "Confirm check_docker is not in path" {
45 |
46 | # Before we start make sure check_docker is not present
47 | sudo -H pip3.8 uninstall -y check-docker || true
48 | run which check_docker
49 | [ "$status" -eq 1 ]
50 | }
51 |
52 | @test "Confirm 'check-docker' is not installed" {
53 |
54 | # Before we start make sure check_docker is not present
55 | pip3.8 list 2>&1 | grep -ve check-docker
56 | }
57 |
58 | @test "Confirm source package, $NEWEST_SDIST, is installable" {
59 | echo pip3.8 install "$NEWEST_SDIST"
60 | run sudo -H pip3.8 install "$NEWEST_SDIST"
61 | [ "$status" -eq 0 ]
62 | }
63 |
64 | @test "Re-Confirm 'check-docker' is not installed" {
65 |
66 | # This should never error since the previous step ensures package is already present
67 | sudo -H pip3.8 uninstall -y check-docker
68 | # Before we start make sure check_docker is not present
69 | pip3.8 list 2>&1 | grep -ve check-docker
70 | }
71 |
72 | @test "Confirm wheel package, $NEWEST_WHEEL, is installable" {
73 |
74 | run sudo -H pip3.8 install "$NEWEST_WHEEL"
75 | [ "$status" -eq 0 ]
76 | }
77 |
78 | @test "Confirm check_docker appears in path" {
79 | run which check_docker
80 | [ "$status" -eq 0 ]
81 | }
82 |
83 | @test "Confirm package is installed" {
84 | pip3.8 list | grep 'check-docker'
85 | }
86 |
87 | # It is normal for this to fail when preparing for a PR.
88 | @test "Confirm package version is not already in PyPi" {
89 | VERSION=$(get_check_docker_version)
90 | REMOTE_HTTP_STATUS=$(curl -LI https://pypi.org/project/check_docker/${VERSION}/ -w "%{http_code}" -o /dev/null -s)
91 | [ "$REMOTE_HTTP_STATUS" == 404 ]
92 | }
93 |
94 | @test "Confirm check_docker version matches package" {
95 | PACKAGE_VERSION=$(get_check_docker_version)
96 | CHECK_VERSION=$(python3.8 -c 'from check_docker import check_docker; print(check_docker.__version__)')
97 |
98 | [ "$PACKAGE_VERSION" == "$CHECK_VERSION" ]
99 | }
100 |
101 | @test "Confirm check_swarm version matches package" {
102 | PACKAGE_VERSION=$(get_check_docker_version)
103 | CHECK_VERSION=$(python3.8 -c 'from check_docker import check_swarm; print(check_swarm.__version__)')
104 |
105 | [ "$PACKAGE_VERSION" == "$CHECK_VERSION" ]
106 | }
107 |
108 | @test "Good status" {
109 | good_container
110 | sleep 1
111 | run check_docker --container good_sleep --status running
112 | echo "$status"
113 | echo $output
114 | [ "$status" -eq 0 ]
115 | }
116 |
117 | @test "Bad status" {
118 | bad_container
119 | run check_docker --container bad_sleep --status running
120 | echo "$status"
121 | echo $output
122 | [ "$status" -eq 2 ]
123 | }
124 |
125 | @test "Current version" {
126 | docker pull busybox
127 | current_container
128 | run check_docker --container current_container --version
129 | echo "$status"
130 | echo $output
131 | [ "$status" -eq 0 ]
132 | }
133 |
134 | @test "Old version" {
135 | old_container
136 | run check_docker --container old_container --version
137 | echo "$status"
138 | echo $output
139 | [ "$status" -eq 2 ]
140 | }
141 |
142 | @test "Doesn't crash" {
143 | good_container
144 | sleep 5
145 | run check_docker --container good_sleep --restarts 1:2
146 | echo "$status"
147 | echo $output
148 | [ "$status" -eq 0 ]
149 | }
150 |
151 | @test "Does crash" {
152 | crashing_container
153 | sleep 5
154 | run check_docker --container crashes --restarts 1:2
155 | echo "$status"
156 | echo $output
157 | [ "$status" -eq 2 ]
158 | }
159 |
160 | @test "Checks multiple containers" {
161 | good_container
162 | current_container
163 | run check_docker --container good_sleep current_container --status running
164 | echo "$status"
165 | echo $output
166 | [ "$status" -eq 0 ]
167 | }
168 |
169 | @test "Checks multiple containers regex" {
170 | good_container
171 | current_container
172 | run check_docker --container '.*' --status running
173 | echo "$status"
174 | echo $output
175 | [ "$status" -eq 0 ]
176 | }
177 |
178 | @test "Checks get all containers" {
179 | good_container
180 | current_container
181 | run check_docker --container '.*' --status running
182 | echo "$status"
183 | echo $output
184 | [ "$status" -eq 0 ]
185 | CONTIANERS_IN_CHECK=$(echo $output | tr ';' '\n' | wc -l)
186 | [ "$CONTIANERS_IN_CHECK" -eq 2 ]
187 |
188 | }
189 |
190 | SITE_PACKAGES_DIR=/$(pip3.8 show check_docker | grep '^Location' | cut -d ' ' -f 2)/check_docker
191 | @test "Can check_docker be run when called directly" {
192 |
193 | run python3.8 $SITE_PACKAGES_DIR/check_docker.py --help
194 | [ "$status" -eq 0 ]
195 | }
196 |
197 | @test "Can check_swarm be run when called directly" {
198 |
199 | run python3.8 $SITE_PACKAGES_DIR/check_swarm.py --help
200 | [ "$status" -eq 0 ]
201 |
202 | }
203 |
204 | @test "Confirm replicated service failures are noticed" {
205 | cat <