├── .gitignore
├── LICENSE
├── README.md
├── alert.go
├── alert_message.go
├── check_functions.go
├── config.go
├── config_alert.go
├── config_host.go
├── config_probe.go
├── current_fails.go
├── deploy
├── ssh-agent-nosee.sh
├── supervisord
│ └── nosee.conf
└── systemd
│ └── nosee.service
├── doc
└── images
│ ├── howto.txt
│ ├── img_base.png
│ ├── img_base.txt
│ ├── img_general.png
│ ├── img_general.txt
│ ├── img_illu.jpeg
│ └── nosee-influxdb-grafana.png
├── etc
├── alerts.d
│ ├── example.txt
│ ├── mail_general.toml
│ └── nosee-console.toml
├── hosts.d
│ ├── example.txt
│ └── test.toml
├── nosee.toml
├── probes.d
│ ├── apache_modstatus.toml
│ ├── backup_daily.toml
│ ├── backup_week.toml
│ ├── cert_example.toml
│ ├── cpu_lms_temp.toml
│ ├── cpu_temp.toml
│ ├── curl_expect_example.toml
│ ├── df.toml
│ ├── example.txt
│ ├── ifband.toml
│ ├── load.toml
│ ├── mdstat.toml
│ ├── mem.toml
│ ├── ping.toml
│ ├── port_80.toml
│ └── systemd_httpd.toml
└── scripts
│ ├── alerts
│ ├── nosee-console.sh
│ └── test.sh
│ ├── heartbeats
│ └── nosee-console.sh
│ ├── loggers
│ └── influxdb.sh
│ └── probes
│ ├── apache_modstatus.sh
│ ├── backup.sh
│ ├── cert_check.sh
│ ├── cpu_lms_temp.sh
│ ├── cpu_temp.sh
│ ├── curl.sh
│ ├── curl_expect.sh
│ ├── df.sh
│ ├── ifband.sh
│ ├── load.sh
│ ├── load_win.sh
│ ├── mdstat.sh
│ ├── mem.sh
│ ├── ping.sh
│ ├── port.sh
│ └── systemctl_status.sh
├── go.mod
├── go.sum
├── heartbeat.go
├── host.go
├── log.go
├── loggers.go
├── main.go
├── pid.go
├── probe.go
├── run.go
├── run_alerts.go
├── run_streams.go
├── ssh.go
├── task.go
├── task_result.go
└── tools.go
/.gitignore:
--------------------------------------------------------------------------------
1 | # Compiled Object files, Static and Dynamic libs (Shared Objects)
2 | *.o
3 | *.a
4 | *.so
5 |
6 | # Folders
7 | _obj
8 | _test
9 |
10 | # Architecture specific extensions/prefixes
11 | *.[568vq]
12 | [568vq].out
13 |
14 | *.cgo1.go
15 | *.cgo2.c
16 | _cgo_defun.c
17 | _cgo_gotypes.go
18 | _cgo_export.*
19 |
20 | _testmain.go
21 |
22 | *.exe
23 | *.test
24 | *.prof
25 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 | {one line to give the program's name and a brief idea of what it does.}
635 | Copyright (C) {year} {name of author}
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | {project} Copyright (C) {year} {fullname}
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Nosee
2 | A nosey, agentless, easy monitoring tool over SSH.
3 |
4 | **Warning: Heavy WIP!**
5 |
6 | What is it?
7 | -----------
8 |
9 | It's an answer when you found usual monitoring systems too heavy and complex.
10 |
11 | Nosee uses SSH protocol to execute scripts on monitored systems, checking
12 | for whatever you want. The result is evaluated and Nosee will ring an alert
13 | of your choice if anything is wrong.
14 |
15 | In short : SSH, no agent, simple configuration, usual scripting.
16 |
17 | 
18 |
19 | Currently, Nosee requires bash on monitored hosts. It was successfully
20 | tested with Linux (of course) but using Cygwin sshd on Windows hosts too.
21 |
22 | The Nosee daemon itself can virtually run with any Go supported platform.
23 |
24 | Show me!
25 | --------
26 |
27 | Here is an alert triggered by a "port connection testing" probe. This alert
28 | is then configured to be sent using `mail` and a HTTP request to Pushover
29 | for realtime mobile device notification.
30 |
31 | 
32 |
33 | You can also have a look at the [Nosee-console](https://github.com/Xfennec/nosee-console)
34 | project, it provides a cool Web monitoring interface.
35 |
36 | How do you build it?
37 | --------------------
38 |
39 | If you have Go installed:
40 |
41 | go get github.com/Xfennec/nosee
42 |
43 | You will then be able to launch the binary located in you Go "bin" directory.
44 | (since Go 1.8, `~/go/bin` if you haven't defined any `$GOPATH`)
45 |
46 | The project is still too young to provide binaries. Later. (and `go get` is so powerful…)
47 |
48 | As a reminder, you can use the `-u` flag to update the project and its dependencies if
49 | you don't want to use `git` for that.
50 |
51 | go get -u github.com/Xfennec/nosee
52 |
53 | How do you use it?
54 | ------------------
55 |
56 | You may have a look at the "template" configuration directory
57 | provided in `$GOPATH/src/github.com/Xfennec/nosee/etc` as a more complete
58 | example or as a base for the following tutorial. (edit `hosts.d/test.toml`
59 | for connection settings and `alerts.d/mail_general.toml` for email address,
60 | at least)
61 |
62 | Here's a general figure of how Nosee works:
63 |
64 | 
65 |
66 | ### Small tutorial
67 |
68 | Configuration is mainly done by simple text file using
69 | the [TOML](https://github.com/toml-lang/toml) syntax.
70 |
71 | **Let's monitor CPU temperature of one of our Web servers.**
72 |
73 | ### Step1. Create a *Host* (SSH connection)
74 |
75 | Create a file in the `hosts.d` directory. (ex: `hosts.d/web_myapp.toml`).
76 |
77 | ```toml
78 | name = "MyApp Webserver"
79 | classes = ["linux", "web", "myapp"]
80 |
81 | [network]
82 | host = "192.168.0.100"
83 | port = 22
84 |
85 | [auth]
86 | user = "test5"
87 | password = "test5"
88 | ```
89 |
90 | The `classes` parameter is completely free, you may chose anything that
91 | fits your infrastructure. It will determine what checks will be done on
92 | this host (see below).
93 |
94 | Authentication by password is extremely bad, of course, as writing down
95 | a password in a configuration file. Nosee supports other (preferred) options
96 | such as passphrases and ssh-agent.
97 |
98 | ### Step2. Create a *Probe*
99 |
100 | Create a file in the `probes.d` directory. (ex: `probes.d/cpu_temp.toml`).
101 |
102 | ```toml
103 | name = "CPU temperature"
104 | targets = ["linux"]
105 |
106 | script = "cpu_temp.sh"
107 |
108 | delay = "1m"
109 |
110 | # Checks
111 |
112 | [[check]]
113 | desc = "critical CPU temperature"
114 | if = "TEMP > 85"
115 | classes = ["critical"]
116 | ```
117 |
118 | The `targets` parameter will match the `classes` of our host. Targets can
119 | be more precise with things like `linux & web`. (both `linux` and `web` classes
120 | must exist in host)
121 |
122 | The `delay` explains that this probe must be run every minute. This is
123 | the lowest delay available.
124 |
125 | Then we have a check. You can have multiple checks in a probe. This check
126 | will look at the `TEMP` value returned by the `cpu_temp.sh`
127 | script (see below) and evaluate the `if` expression. You can have a look
128 | at [govaluate](https://github.com/Knetic/govaluate) for details about
129 | expression's syntax.
130 |
131 | If this expression becomes true, the probe will ring a `critical` alert. Here
132 | again, you are free to use any class of your choice to create your own
133 | error typology. (ex: `["warning", "hardware_guys"]` to ring a specific group
134 | of users in charge of critical failures of the hardware)
135 |
136 | ### Step3. Create a *script* (or use a provided one)
137 |
138 | Scripts are hosted in the `scripts/probes/` directory.
139 |
140 | ```bash
141 | #!/bin/bash
142 |
143 | val=$(cat /sys/class/thermal/thermal_zone0/temp)
144 | temp=$(awk "BEGIN {print $val/1000}")
145 | echo "TEMP:" $temp
146 | ```
147 |
148 | This script will run on monitored hosts (so… stay light). Here, we read
149 | the first thermal zone and divide it by 1000 to get Celsius value.
150 |
151 | Scripts must print `KEY: val` lines to feed checks, as seen above. That's it.
152 |
153 | ### Step4. Create an *Alert*
154 |
155 | Create a file in the `alerts.d` directory. (ex: `alerts.d/mail_julien.toml`).
156 |
157 | ```toml
158 | name = "Mail Julien"
159 |
160 | targets = ["julien", "warning", "critical", "general"]
161 |
162 | command = "mail"
163 |
164 | arguments = [
165 | "-s",
166 | "Nosee: $SUBJECT",
167 | "julien@domain.tld"
168 | ]
169 | ```
170 |
171 | This simple alert will use the usual `mail` command when an alert matches
172 | one (or more) of the given targets. It works exactly the same as classes/targets
173 | for Hosts/Probes to let you create your own vocabulary.
174 | (ex: `"web & production & critical"` is a valid target)
175 |
176 | As you may have seen, some variables are available for arguments, like
177 | the `$SUBJECT` of the alert message.
178 |
179 | There's a special class `general` for very important general messages. At
180 | least one alert must listen permanently at this class.
181 |
182 | ### Step5. Run Nosee!
183 |
184 | cd $GOPATH/bin
185 | ./nosee -l info -c ../src/github.com/Xfennec/nosee/etc/
186 |
187 | You are now ready to burn your Web server CPU to get your alert mail. The `-c`
188 | parameter gives the configuration path, and the `-l` will make Nosee way
189 | more verbose.
190 |
191 | ./nosee help
192 |
193 | … will tell you more about command line arguments.
194 |
195 | Anything else? (WIP)
196 | --------------------
197 |
198 | Oh yes. I want to explain:
199 |
200 | - "threaded" (Goroutines)
201 | - global `nosee.toml` configuration
202 | - SSH runs (group of probes)
203 | - `*` targets
204 | - needed_failures / needed_successes
205 | - defaults
206 | - host overriding of probe's defaults
207 | - use of defaults for probe script arguments
208 | - probe `run_if` condition
209 | - alert scripts
210 | - alert limits
211 | - alert env and stdin
212 | - timeouts
213 | - rescheduling
214 | - GOOD and BAD alerts
215 | - UniqueID for alerts
216 | - configuration "recap/summary" command
217 | - extensive configuration validation (and connection tests)
218 | - alert examples (pushover, SMS, …)
219 | - probe examples!
220 | - check "If" functions (date)
221 | - nosee-alerts.json current alerts
222 | - heartbeat scripts
223 | - systemd / supervisord sample files (see deploy/ directory)
224 | - test subcommand
225 | - loggers / InfluxDB
226 |
227 | 
228 |
229 | (example: Nosee → InfluxDB → Grafana)
230 |
231 | What is the future of Nosee? (WIP)
232 | ----------------------------
233 |
234 | - remote Nosee interconnections
235 |
--------------------------------------------------------------------------------
/alert.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "os"
6 | "os/exec"
7 | "strings"
8 | "time"
9 | )
10 |
11 | // HourRange hold a Start and an End in the form of int arrays ([0] = hours, [1] = minutes)
12 | type HourRange struct {
13 | Start [2]int
14 | End [2]int
15 | }
16 |
17 | // Alert is the final form of alerts.d files
18 | type Alert struct {
19 | Name string
20 | Disabled bool
21 | Targets []string
22 | Command string
23 | Arguments []string
24 | Hours []HourRange
25 | Days []int
26 | }
27 |
28 | // Ring will send an AlertMessage using this Alert, executing the
29 | // configured command
30 | func (alert *Alert) Ring(msg *AlertMessage) {
31 | Info.Println("ring: " + alert.Name + ", " + alert.Command /* + " " + strings.Join(alert.Arguments, " ") */)
32 |
33 | varMap := make(map[string]interface{})
34 | varMap["SUBJECT"] = msg.Subject
35 | varMap["TYPE"] = msg.Type.String()
36 | varMap["UNIQUEID"] = msg.UniqueID
37 | varMap["HOST_NAME"] = msg.Hostname
38 | varMap["CLASSES"] = strings.Join(msg.Classes, ",")
39 | varMap["NOSEE_SRV"] = GlobalConfig.Name
40 | varMap["DATETIME"] = msg.DateTime.Format(time.RFC3339)
41 | // "Level" ? (Run, Task, Checks)
42 | // Probe Name, Check Name, Alert Name ?
43 |
44 | var args []string
45 | for _, arg := range alert.Arguments {
46 | expArg := StringExpandVariables(arg, varMap)
47 | args = append(args, expArg)
48 | }
49 |
50 | go func() {
51 | cmd := exec.Command(alert.Command, args...)
52 |
53 | env := os.Environ()
54 | for key, val := range varMap {
55 | env = append(env, fmt.Sprintf("%s=%s", key, InterfaceValueToString(val)))
56 | }
57 | cmd.Env = env
58 |
59 | // we also inject Details thru stdin:
60 | cmd.Stdin = strings.NewReader(msg.Details)
61 |
62 | if cmdOut, err := cmd.CombinedOutput(); err != nil {
63 | if len(msg.Classes) == 1 && msg.Classes[0] == GeneralClass {
64 | Error.Printf("unable to ring an alert to general class! error: %s (%s)\n", err, alert.Command)
65 | return
66 | }
67 |
68 | Warning.Printf("error running alert '%s': %s", alert.Command, err)
69 |
70 | msg.Subject = msg.Subject + " (Fwd)"
71 | prepend := fmt.Sprintf("WARNING: This alert is re-routed to the 'general' class, because\noriginal alert failed with the following error: %s (%s)\nOutput: %s\n\n", err.Error(), alert.Command, string(cmdOut))
72 | msg.Details = prepend + msg.Details
73 | msg.Classes = []string{GeneralClass}
74 | msg.RingAlerts()
75 | }
76 | }()
77 | }
78 |
79 | // Ringable will return true if this Alert is currently able to ring
80 | // (no matching day or hour limit)
81 | func (alert *Alert) Ringable() bool {
82 | now := time.Now()
83 | nowMins := now.Hour()*60 + now.Minute()
84 | nowDay := int(now.Weekday())
85 | hourOk := len(alert.Hours) == 0
86 | for _, hourRange := range alert.Hours {
87 | start := hourRange.Start[0]*60 + hourRange.Start[1]
88 | end := hourRange.End[0]*60 + hourRange.End[1]
89 | if nowMins >= start && nowMins <= end {
90 | hourOk = true
91 | break
92 | }
93 | }
94 | dayOk := len(alert.Days) == 0
95 | for _, day := range alert.Days {
96 | if nowDay == day {
97 | dayOk = true
98 | }
99 | }
100 | return hourOk && dayOk
101 | }
102 |
--------------------------------------------------------------------------------
/alert_message.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bytes"
5 | "fmt"
6 | "strings"
7 | "time"
8 | )
9 |
10 | // AlertMessageType definition
11 | type AlertMessageType uint8
12 |
13 | // AlertMessageType numeric values
14 | const (
15 | AlertGood AlertMessageType = iota + 1
16 | AlertBad
17 | )
18 |
19 | // AlertMessageTypeStr stores matching strings
20 | var AlertMessageTypeStr = [...]string{
21 | "GOOD",
22 | "BAD",
23 | }
24 |
25 | // AlertMessage will store the text of the error
26 | type AlertMessage struct {
27 | Type AlertMessageType
28 | Subject string
29 | Details string
30 | Classes []string
31 | UniqueID string
32 | Hostname string
33 | DateTime time.Time
34 | }
35 |
36 | // GeneralClass is a "general" class for very important general messages
37 | const GeneralClass = "general"
38 |
39 | func (amt AlertMessageType) String() string {
40 | if amt == 0 {
41 | return "INVALID_TYPE"
42 | }
43 | return AlertMessageTypeStr[amt-1]
44 | }
45 |
46 | // AlertMessageCreateForRun creates a new AlertMessage with AlertGood or
47 | // AlertBad type for a Run
48 | func AlertMessageCreateForRun(aType AlertMessageType, run *Run, currentFail *CurrentFail) *AlertMessage {
49 | var message AlertMessage
50 |
51 | message.Subject = fmt.Sprintf("[%s] %s: run error(s)", aType, run.Host.Name)
52 | message.Type = aType
53 | message.UniqueID = currentFail.UniqueID
54 | message.Hostname = run.Host.Name
55 | message.DateTime = run.StartTime
56 |
57 | var details bytes.Buffer
58 |
59 | switch aType {
60 | case AlertBad:
61 | details.WriteString("A least one error occured during a run for this host. (" + run.StartTime.Format("2006-01-02 15:04:05") + ")\n")
62 | details.WriteString("\n")
63 | details.WriteString("Error(s):\n")
64 | for _, err := range run.Errors {
65 | details.WriteString(err.Error() + "\n")
66 | }
67 | case AlertGood:
68 | details.WriteString("No more run errors for this host. (" + run.StartTime.Format("2006-01-02 15:04:05") + ")\n")
69 | }
70 |
71 | details.WriteString("\n")
72 | details.WriteString("Unique failure ID: " + message.UniqueID + "\n")
73 | message.Details = details.String()
74 |
75 | message.Classes = []string{GeneralClass}
76 |
77 | return &message
78 | }
79 |
80 | // AlertMessageCreateForTaskResult creates an AlertGood or AlertBad message for a TaskResult
81 | func AlertMessageCreateForTaskResult(aType AlertMessageType, run *Run, taskResult *TaskResult, currentFail *CurrentFail) *AlertMessage {
82 | var message AlertMessage
83 |
84 | message.Subject = fmt.Sprintf("[%s] %s: %s: task error(s)", aType, run.Host.Name, taskResult.Task.Probe.Name)
85 | message.Type = aType
86 | message.UniqueID = currentFail.UniqueID
87 | message.Hostname = run.Host.Name
88 | message.DateTime = taskResult.StartTime
89 |
90 | var details bytes.Buffer
91 |
92 | switch aType {
93 | case AlertBad:
94 | details.WriteString("A least one error occured during a task for this host. (" + taskResult.StartTime.Format("2006-01-02 15:04:05") + ")\n")
95 | details.WriteString("\n")
96 | details.WriteString("Error(s):\n")
97 | for _, err := range taskResult.Errors {
98 | details.WriteString(err.Error() + "\n")
99 | }
100 | if len(taskResult.Logs) > 0 {
101 | details.WriteString("\n")
102 | details.WriteString("Logs(s):\n")
103 | for _, log := range taskResult.Logs {
104 | details.WriteString(log + "\n")
105 | }
106 | }
107 | case AlertGood:
108 | details.WriteString("No more errors for this task on this host. (" + taskResult.StartTime.Format("2006-01-02 15:04:05") + ")\n")
109 | }
110 |
111 | details.WriteString("\n")
112 | details.WriteString("Unique failure ID: " + message.UniqueID + "\n")
113 | message.Details = details.String()
114 |
115 | message.Classes = []string{GeneralClass}
116 |
117 | return &message
118 | }
119 |
120 | // AlertMessageCreateForCheck creates a AlertGood or AlertBad message for a Check
121 | func AlertMessageCreateForCheck(aType AlertMessageType, run *Run, taskRes *TaskResult, check *Check, currentFail *CurrentFail) *AlertMessage {
122 | var message AlertMessage
123 |
124 | // Host: Check (Task)
125 | message.Subject = fmt.Sprintf("[%s] %s: %s (%s)", aType, run.Host.Name, check.Desc, taskRes.Task.Probe.Name)
126 | message.Type = aType
127 | message.UniqueID = currentFail.UniqueID
128 | message.Hostname = run.Host.Name
129 |
130 | var details bytes.Buffer
131 |
132 | switch aType {
133 | case AlertBad:
134 | details.WriteString("An alert **is** ringing.\n\n")
135 | message.DateTime = currentFail.FailStart
136 | case AlertGood:
137 | details.WriteString("This alert is **no more** ringing.\n\n")
138 | message.DateTime = taskRes.StartTime
139 | }
140 |
141 | details.WriteString("Failure time: " + currentFail.FailStart.Format("2006-01-02 15:04:05") + "\n")
142 | details.WriteString("Last task time: " + taskRes.StartTime.Format("2006-01-02 15:04:05") + "\n")
143 | details.WriteString("Class(es): " + strings.Join(check.Classes, ", ") + "\n")
144 | details.WriteString("Failed condition was: " + check.If.String() + "\n")
145 | details.WriteString("\n")
146 | details.WriteString("Values:\n")
147 | for _, token := range check.If.Vars() {
148 | if IsAllUpper(token) {
149 | details.WriteString("- " + token + ": " + taskRes.Values[token] + "\n")
150 | } else {
151 | val := InterfaceValueToString(taskRes.Task.Probe.Defaults[token])
152 | if _, exists := taskRes.Host.Defaults[token]; exists == true {
153 | val = InterfaceValueToString(taskRes.Host.Defaults[token])
154 | }
155 | details.WriteString("- " + token + ": " + val + "\n")
156 | }
157 | }
158 | details.WriteString("\n")
159 | details.WriteString(fmt.Sprintf("All values for this run (%s):\n", run.Duration))
160 | for _, tr := range run.TaskResults {
161 | details.WriteString(fmt.Sprintf("- %s (%s):\n", tr.Task.Probe.Name, tr.Duration))
162 | for key, val := range tr.Values {
163 | details.WriteString("--- " + key + ": " + val + "\n")
164 | }
165 | }
166 | details.WriteString("\n")
167 | details.WriteString("Unique failure ID: " + message.UniqueID + "\n")
168 | message.Details = details.String()
169 |
170 | message.Classes = check.Classes
171 |
172 | return &message
173 | }
174 |
175 | // Dump prints AlertMessage informations on the screen for debugging purposes
176 | func (msg *AlertMessage) Dump() {
177 | fmt.Printf("---\n")
178 | fmt.Printf("Subject: %s\n", msg.Subject)
179 | fmt.Printf("%s\n---\n", msg.Details)
180 | }
181 |
182 | // RingAlerts will search and ring all alerts for this AlertMessage
183 | func (msg *AlertMessage) RingAlerts() {
184 | ringCount := 0
185 | for _, alert := range globalAlerts {
186 | if msg.MatchAlertTargets(alert) {
187 | if alert.Ringable() {
188 | alert.Ring(msg)
189 | ringCount++
190 | }
191 | }
192 | }
193 |
194 | if ringCount == 0 {
195 | // if class is already "general", we're f*cked :(
196 | if len(msg.Classes) == 1 && msg.Classes[0] == GeneralClass {
197 | Error.Printf("unable to ring an alert : can't match the 'general' class!\n")
198 | return
199 | }
200 |
201 | Warning.Printf("no matching alert for this failure: '%s' with class(es): %s\n", msg.Subject, strings.Join(msg.Classes, ", "))
202 |
203 | // forward the alert to 'general' class:
204 | msg.Subject = msg.Subject + " (Fwd)"
205 | prepend := "WARNING: This alert is re-routed to the 'general' class, because no alert matches its orginial classes (" + strings.Join(msg.Classes, ", ") + ")\n\n"
206 | msg.Details = prepend + msg.Details
207 | msg.Classes = []string{GeneralClass}
208 | msg.RingAlerts()
209 | }
210 | }
211 |
212 | // HasClass returns true if this AlertMessage has this class
213 | func (msg *AlertMessage) HasClass(class string) bool {
214 | if class == "*" {
215 | return true
216 | }
217 |
218 | for _, hClass := range msg.Classes {
219 | if hClass == class {
220 | return true
221 | }
222 | }
223 | return false
224 | }
225 |
226 | // MatchAlertTargets returns true if this AlertMessage matches alert's classes
227 | func (msg *AlertMessage) MatchAlertTargets(alert *Alert) bool {
228 | for _, pTargets := range alert.Targets {
229 | tokens := strings.Split(pTargets, "&")
230 | matched := 0
231 | mustMatch := len(tokens)
232 | for _, token := range tokens {
233 | ttoken := strings.TrimSpace(token)
234 | if msg.HasClass(ttoken) {
235 | matched++
236 | }
237 | }
238 | if matched == mustMatch {
239 | return true
240 | }
241 | }
242 | return false
243 | }
244 |
--------------------------------------------------------------------------------
/check_functions.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "regexp"
6 | "time"
7 |
8 | "github.com/Knetic/govaluate"
9 | )
10 |
11 | // CheckFunctions will hold all custom govaluate functions for Check 'If'
12 | // expressions
13 | var CheckFunctions map[string]govaluate.ExpressionFunction
14 |
15 | // CheckFunctionsInit will initialize CheckFunctions global variable
16 | func CheckFunctionsInit() {
17 | CheckFunctions = map[string]govaluate.ExpressionFunction{
18 |
19 | "strlen": func(args ...interface{}) (interface{}, error) {
20 | length := len(args[0].(string))
21 | return (float64)(length), nil
22 | },
23 |
24 | "ping": func(args ...interface{}) (interface{}, error) {
25 | if len(args) > 0 {
26 | return nil, fmt.Errorf("ping function: too much arguments")
27 | }
28 | return (string)("pong"), nil
29 | },
30 |
31 | "date": func(args ...interface{}) (interface{}, error) {
32 | if len(args) != 1 {
33 | return nil, fmt.Errorf("date function: wrong argument count (1 required)")
34 | }
35 | format := args[0].(string)
36 | now := time.Now()
37 | switch format {
38 | case "hour":
39 | return (float64)(now.Hour()), nil
40 | case "minute":
41 | return (float64)(now.Minute()), nil
42 | case "time":
43 | return (float64)((float64)(now.Hour()) + (float64)(now.Minute())/60.0), nil
44 | case "dow", "day-of-week":
45 | // Sunday = 0
46 | return (float64)(now.Weekday()), nil
47 | case "dom", "day-of-month":
48 | return (float64)(now.Day()), nil
49 | case "now":
50 | return (float64)(now.Unix()), nil
51 | }
52 |
53 | if match, _ := regexp.MatchString("^[0-9]{1,2}:[0-9]{2}$", format); match == true {
54 | t, err := alertCheckHour(format)
55 | if err != nil {
56 | return nil, fmt.Errorf("date function: invalid hour '%s': %s", format, err)
57 | }
58 | return (float64)((float64)(t[0]) + (float64)(t[1])/60.0), nil
59 | }
60 |
61 | return nil, fmt.Errorf("date function: invalid format '%s'", format)
62 | },
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/config.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "os"
7 | "path"
8 | "time"
9 |
10 | "github.com/BurntSushi/toml"
11 | )
12 |
13 | type tomlConfig struct {
14 | Name string
15 | StartTimeSpread Duration `toml:"start_time_spread"`
16 | SSHConnTimeWarn Duration `toml:"ssh_connection_time_warn"`
17 | SSHBlindTrust bool `toml:"ssh_blindtrust_fingerprints"`
18 | SavePath string `toml:"save_path"`
19 | HeartbeatDelay Duration `toml:"heartbeat_delay"`
20 | }
21 |
22 | // Config is the final form of the nosee.toml config file
23 | type Config struct {
24 | configPath string
25 | loadDisabled bool
26 | doConnTest bool
27 |
28 | Name string
29 | StartTimeSpreadSeconds int
30 | SSHConnTimeWarn time.Duration
31 | SSHBlindTrust bool
32 | SavePath string
33 | HeartbeatDelay time.Duration
34 | }
35 |
36 | // GlobalConfig exports the Nosee server configuration
37 | var GlobalConfig *Config
38 |
39 | // GlobalConfigRead reads given file and returns a Config
40 | func GlobalConfigRead(dir, file string) (*Config, error) {
41 | var config Config
42 | var tConfig tomlConfig
43 |
44 | // defaults:
45 | // config.xxx -> default if config file not exists
46 | // tConfig.xxx -> default if parameter's not provided in config file
47 | config.Name = ""
48 | tConfig.Name = ""
49 |
50 | config.StartTimeSpreadSeconds = 15
51 | tConfig.StartTimeSpread.Duration = 15 * time.Second
52 |
53 | config.SSHConnTimeWarn = 10 * time.Second
54 | tConfig.SSHConnTimeWarn.Duration = config.SSHConnTimeWarn
55 |
56 | config.SSHBlindTrust = false
57 | tConfig.SSHBlindTrust = false
58 |
59 | config.SavePath = "./"
60 | tConfig.SavePath = config.SavePath
61 |
62 | config.HeartbeatDelay = 30 * time.Second
63 | tConfig.HeartbeatDelay.Duration = config.HeartbeatDelay
64 |
65 | config.configPath = dir
66 | config.loadDisabled = false
67 | config.doConnTest = true
68 |
69 | if stat, err := os.Stat(config.configPath); err != nil || !stat.Mode().IsDir() {
70 | return nil, fmt.Errorf("configuration directory not found: %s (%s)", err, config.configPath)
71 | }
72 |
73 | configPath := path.Clean(dir + "/" + file)
74 |
75 | if stat, err := os.Stat(configPath); err != nil || !stat.Mode().IsRegular() {
76 | Warning.Printf("no %s file, using defaults\n", configPath)
77 | return &config, nil
78 | }
79 |
80 | if _, err := toml.DecodeFile(configPath, &tConfig); err != nil {
81 | return nil, fmt.Errorf("decoding %s: %s", file, err)
82 | }
83 |
84 | if tConfig.Name != "" {
85 | config.Name = tConfig.Name
86 | }
87 |
88 | if tConfig.StartTimeSpread.Duration > (1 * time.Minute) {
89 | return nil, errors.New("'start_time_spread' can't be more than a minute")
90 | }
91 | config.StartTimeSpreadSeconds = int(tConfig.StartTimeSpread.Duration.Seconds())
92 |
93 | if tConfig.SSHConnTimeWarn.Duration < (1 * time.Second) {
94 | return nil, errors.New("'ssh_connection_time_warn' can't be less than a second")
95 | }
96 | config.SSHConnTimeWarn = tConfig.SSHConnTimeWarn.Duration
97 |
98 | config.SSHBlindTrust = tConfig.SSHBlindTrust
99 |
100 | // should check if writable
101 | config.SavePath = tConfig.SavePath
102 |
103 | if tConfig.HeartbeatDelay.Duration < (5 * time.Second) {
104 | return nil, errors.New("'heartbeat_delay' can't be less than 5 seconds")
105 | }
106 | config.HeartbeatDelay = tConfig.HeartbeatDelay.Duration
107 |
108 | return &config, nil
109 | }
110 |
--------------------------------------------------------------------------------
/config_alert.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "io/ioutil"
7 | "os"
8 | "os/exec"
9 | "path"
10 | "strconv"
11 | "strings"
12 | )
13 |
14 | type tomlAlert struct {
15 | Name string
16 | Disabled bool
17 | Targets []string
18 | Command string
19 | Arguments []string
20 | Hours []string
21 | Days []int
22 | }
23 |
24 | func alertCheckHour(hour string) ([2]int, error) {
25 | var err error
26 | var res [2]int
27 |
28 | parts := strings.Split(hour, ":")
29 | if len(parts) != 2 {
30 | return res, fmt.Errorf("invalid format '%s' (ex: '19:30')", hour)
31 | }
32 | res[0], err = strconv.Atoi(parts[0])
33 | if err != nil {
34 | return res, fmt.Errorf("can't convert '%s' hour to integer: %s", hour, err)
35 | }
36 | res[1], err = strconv.Atoi(parts[1])
37 | if err != nil {
38 | return res, fmt.Errorf("can't convert '%s' minute to integer: %s", hour, err)
39 | }
40 |
41 | if res[0] < 0 {
42 | return res, fmt.Errorf("hour can't be less than 0: %s", hour)
43 | }
44 | if res[1] < 0 {
45 | return res, fmt.Errorf("minute can't be less than 0: %s", hour)
46 | }
47 | if res[0] > 23 {
48 | return res, fmt.Errorf("hour can't more than 23: %s", hour)
49 | }
50 | if res[1] > 59 {
51 | return res, fmt.Errorf("minute can't more than 59: %s", hour)
52 | }
53 |
54 | return res, nil
55 | }
56 |
57 | func alertCheckHours(hours []string) ([]HourRange, error) {
58 | var hourRanges []HourRange
59 |
60 | for _, hour := range hours {
61 | var (
62 | hourRange HourRange
63 | err error
64 | )
65 |
66 | rng := strings.Split(hour, "-")
67 | if len(rng) != 2 {
68 | return nil, fmt.Errorf("invalid format '%s' (ex: '8:90 - 19:00')", hour)
69 | }
70 | rng[0] = strings.TrimSpace(rng[0])
71 | rng[1] = strings.TrimSpace(rng[1])
72 |
73 | if hourRange.Start, err = alertCheckHour(rng[0]); err != nil {
74 | return nil, fmt.Errorf("invalid start hour: %s", err)
75 | }
76 | if hourRange.End, err = alertCheckHour(rng[1]); err != nil {
77 | return nil, fmt.Errorf("invalid end hour: %s", err)
78 | }
79 |
80 | start := hourRange.Start[0]*60 + hourRange.Start[1]
81 | end := hourRange.End[0]*60 + hourRange.End[1]
82 | if start >= end {
83 | return nil, fmt.Errorf("end of the hour range (%s) is before its start", hour)
84 | }
85 |
86 | hourRanges = append(hourRanges, hourRange)
87 | }
88 | return hourRanges, nil
89 | }
90 |
91 | func alertCheckAndCleanDays(days []int) error {
92 | for key, day := range days {
93 | if day < 0 {
94 | return fmt.Errorf("day can't be less than 0: %d", day)
95 | }
96 | if day > 7 {
97 | return fmt.Errorf("day can't be more than 7: %d", day)
98 | }
99 |
100 | if day == 7 {
101 | days[key] = 0
102 | }
103 | }
104 | return nil
105 | }
106 |
107 | func tomlAlertToAlert(tAlert *tomlAlert, config *Config) (*Alert, error) {
108 | var alert Alert
109 |
110 | if tAlert.Disabled == true && config.loadDisabled == false {
111 | return nil, nil
112 | }
113 |
114 | if tAlert.Name == "" {
115 | return nil, errors.New("invalid or missing 'name'")
116 | }
117 | alert.Name = tAlert.Name
118 |
119 | if tAlert.Command == "" {
120 | return nil, errors.New("invalid or missing 'command'")
121 | }
122 |
123 | scriptPath := path.Clean(config.configPath + "/scripts/alerts/" + tAlert.Command)
124 | stat, err := os.Stat(scriptPath)
125 |
126 | if err == nil {
127 | if !stat.Mode().IsRegular() {
128 | return nil, fmt.Errorf("is not a regular 'script' file '%s'", scriptPath)
129 | }
130 | tAlert.Command = scriptPath
131 | } else {
132 | path, errp := exec.LookPath(tAlert.Command)
133 | if errp != nil {
134 | return nil, fmt.Errorf("'%s' command not found in PATH: %s", tAlert.Command, errp)
135 | }
136 | tAlert.Command = path
137 | }
138 |
139 | alert.Command = tAlert.Command
140 |
141 | _, err = ioutil.ReadFile(alert.Command)
142 | if err != nil {
143 | return nil, fmt.Errorf("error reading script file '%s': %s", alert.Command, err)
144 | }
145 |
146 | if tAlert.Targets == nil {
147 | return nil, errors.New("no valid 'targets' parameter found")
148 | }
149 |
150 | if len(tAlert.Targets) == 0 {
151 | return nil, errors.New("empty 'targets'")
152 | }
153 | // explode targets on & and check IsValidTokenName
154 | hasGeneralClass := false
155 | for _, targets := range tAlert.Targets {
156 | if targets == "*" || targets == GeneralClass {
157 | hasGeneralClass = true
158 | continue
159 | }
160 | tokens := strings.Split(targets, "&")
161 | for _, token := range tokens {
162 | ttoken := strings.TrimSpace(token)
163 | if !IsValidTokenName(ttoken) {
164 | return nil, fmt.Errorf("invalid 'target' class name '%s'", ttoken)
165 | }
166 | }
167 | }
168 | alert.Targets = tAlert.Targets
169 |
170 | alert.Arguments = tAlert.Arguments
171 |
172 | hours, err := alertCheckHours(tAlert.Hours)
173 | if err != nil {
174 | return nil, fmt.Errorf("'hours' parameter: %s", err)
175 | }
176 | alert.Hours = hours
177 |
178 | if err := alertCheckAndCleanDays(tAlert.Days); err != nil {
179 | return nil, fmt.Errorf("'days' parameter: %s", err)
180 | }
181 | alert.Days = tAlert.Days
182 |
183 | if hasGeneralClass == true && len(alert.Hours) > 0 && len(alert.Days) > 0 {
184 | return nil, fmt.Errorf("a 'general' (or '*') alert can't have hours/days restrictions, since you may miss alerts")
185 | }
186 |
187 | return &alert, nil
188 | }
189 |
--------------------------------------------------------------------------------
/config_host.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "os"
7 | "time"
8 |
9 | "golang.org/x/crypto/ssh"
10 | )
11 |
12 | type tomlNetwork struct {
13 | Host string
14 | Port int
15 | Ciphers []string
16 | SSHConnTimeWarn Duration `toml:"ssh_connection_time_warn"`
17 | }
18 |
19 | type tomlAuth struct {
20 | User string
21 | Password string
22 | Key string
23 | KeyPassphrase string `toml:"key_passphrase"`
24 | SSHAgent bool `toml:"ssh_agent"`
25 | Pubkey string
26 | }
27 |
28 | type tomlHost struct {
29 | Disabled bool
30 | Name string
31 | Network tomlNetwork
32 | Auth tomlAuth
33 | Classes []string
34 | Default []tomlDefault
35 | }
36 |
37 | func tomlHostToHost(tHost *tomlHost, config *Config, filename string) (*Host, error) {
38 | var (
39 | connection Connection
40 | host Host
41 | )
42 |
43 | host.Connection = &connection
44 | host.Filename = filename
45 |
46 | if tHost.Disabled == true && config.loadDisabled == false {
47 | return nil, nil
48 | }
49 | host.Disabled = (tHost.Disabled == true)
50 |
51 | if tHost.Name == "" {
52 | return nil, errors.New("invalid or missing 'name'")
53 | }
54 | host.Name = tHost.Name
55 |
56 | if tHost.Classes == nil {
57 | return nil, errors.New("no valid 'classes' parameter found")
58 | }
59 |
60 | if len(tHost.Classes) == 0 {
61 | return nil, errors.New("empty classes")
62 | }
63 | for _, class := range tHost.Classes {
64 | if !IsValidTokenName(class) {
65 | return nil, fmt.Errorf("invalid class name '%s'", class)
66 | }
67 | }
68 | host.Classes = tHost.Classes
69 |
70 | host.Defaults = make(map[string]interface{})
71 | if err := checkTomlDefault(host.Defaults, tHost.Default); err != nil {
72 | return nil, err
73 | }
74 |
75 | if tHost.Network.Host == "" {
76 | return nil, errors.New("[network] section, invalid or missing 'host'")
77 | }
78 | connection.Host = tHost.Network.Host
79 |
80 | if tHost.Network.Port == 0 {
81 | return nil, errors.New("[network] section, invalid or missing 'port'")
82 | }
83 | connection.Port = tHost.Network.Port
84 |
85 | if tHost.Network.SSHConnTimeWarn.Duration < (1 * time.Second) {
86 | return nil, errors.New("'ssh_connection_time_warn' can't be less than a second")
87 | }
88 | connection.SSHConnTimeWarn = tHost.Network.SSHConnTimeWarn.Duration
89 |
90 | if tHost.Auth.User == "" {
91 | return nil, errors.New("[auth] section, invalid or missing 'user'")
92 | }
93 | connection.User = tHost.Auth.User
94 | connection.Ciphers = tHost.Network.Ciphers
95 |
96 | if tHost.Auth.Key != "" && tHost.Auth.Password != "" {
97 | return nil, errors.New("[auth] section, can't use key and password at the same time (see key_passphrase parameter, perhaps?)")
98 | }
99 | if tHost.Auth.KeyPassphrase != "" && tHost.Auth.Password != "" {
100 | return nil, errors.New("[auth] section, can't use key_passphrase and password at the same time")
101 | }
102 | if tHost.Auth.SSHAgent == true && tHost.Auth.Password != "" {
103 | return nil, errors.New("[auth] section, can't use SSH agent and password at the same time")
104 | }
105 | if tHost.Auth.SSHAgent == true && tHost.Auth.KeyPassphrase != "" {
106 | return nil, errors.New("[auth] section, can't use SSH agent and key_passphrase at the same time")
107 | }
108 | if tHost.Auth.SSHAgent == true && tHost.Auth.Key != "" {
109 | return nil, errors.New("[auth] section, can't use SSH agent and key at the same time (see pubkey parameter, perhaps?)")
110 | }
111 |
112 | if tHost.Auth.Key != "" {
113 | fd, err := os.Open(tHost.Auth.Key)
114 | if err != nil {
115 | return nil, fmt.Errorf("can't access to key '%s': %s", tHost.Auth.Key, err)
116 | }
117 | fd.Close()
118 | }
119 |
120 | // !!! there's many returns following this line, be careful
121 |
122 | if tHost.Auth.Password != "" {
123 | connection.Auths = []ssh.AuthMethod{
124 | ssh.Password(tHost.Auth.Password),
125 | }
126 | return &host, nil
127 | }
128 |
129 | if tHost.Auth.SSHAgent == true {
130 | agent, err := SSHAgent(tHost.Auth.Pubkey)
131 | if err != nil {
132 | return nil, err
133 | }
134 | connection.Auths = []ssh.AuthMethod{
135 | agent,
136 | }
137 | return &host, nil
138 | }
139 |
140 | if tHost.Auth.Key != "" && tHost.Auth.KeyPassphrase == "" {
141 | connection.Auths = []ssh.AuthMethod{
142 | PublicKeyFile(tHost.Auth.Key),
143 | }
144 | return &host, nil
145 | }
146 |
147 | if tHost.Auth.Key != "" && tHost.Auth.KeyPassphrase != "" {
148 | connection.Auths = []ssh.AuthMethod{
149 | PublicKeyFilePassPhrase(tHost.Auth.Key, tHost.Auth.KeyPassphrase),
150 | }
151 | return &host, nil
152 | }
153 |
154 | return nil, errors.New("[auth] section, at least one auth method is needed (password, key or ssh_agent)")
155 | }
156 |
--------------------------------------------------------------------------------
/config_probe.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "io/ioutil"
7 | "os"
8 | "path"
9 | "reflect"
10 | "strings"
11 | "time"
12 |
13 | "github.com/Knetic/govaluate"
14 | )
15 |
16 | // Duration hides time.Duration for TOML file reading (see UnmarshalText)
17 | type Duration struct {
18 | time.Duration
19 | }
20 |
21 | // UnmarshalText is needed to satisfy the encoding.TextUnmarshaler interface
22 | func (d *Duration) UnmarshalText(text []byte) error {
23 | var err error
24 | d.Duration, err = time.ParseDuration(string(text))
25 | return err
26 | }
27 |
28 | type tomlDefault struct {
29 | Name string
30 | Value interface{}
31 | }
32 |
33 | type tomlCheck struct {
34 | Desc string
35 | If string
36 | Classes []string
37 | NeededFailures int `toml:"needed_failures"`
38 | NeededSuccesses int `toml:"needed_successes"`
39 | }
40 |
41 | type tomlProbe struct {
42 | Name string
43 | Disabled bool
44 | Script string
45 | Targets []string
46 | Delay Duration
47 | Timeout Duration
48 | Arguments string
49 | Default []tomlDefault
50 | Check []tomlCheck
51 | RunIf string `toml:"run_if"`
52 | }
53 |
54 | func checkTomlDefault(pDefaults map[string]interface{}, tDefaults []tomlDefault) error {
55 | for _, tDefault := range tDefaults {
56 |
57 | if tDefault.Name == "" {
58 | return errors.New("[[default]] with invalid or missing 'name'")
59 | }
60 |
61 | if IsAllUpper(tDefault.Name) {
62 | return fmt.Errorf("[[default]] name is invalid (all uppercase): %s", tDefault.Name)
63 | }
64 |
65 | valid := false
66 | switch tDefault.Value.(type) {
67 | case string:
68 | valid = true
69 | case int32:
70 | valid = true
71 | case int64:
72 | valid = true
73 | case float32:
74 | valid = true
75 | case float64:
76 | valid = true
77 | }
78 |
79 | if valid == false {
80 | return fmt.Errorf("[[default]] invalid value type '%s' for '%s'", reflect.TypeOf(tDefault.Value), tDefault.Name)
81 | }
82 |
83 | if _, exists := pDefaults[tDefault.Name]; exists == true {
84 | return fmt.Errorf("Config error: duplicate default name '%s'", tDefault.Name)
85 | }
86 |
87 | pDefaults[tDefault.Name] = tDefault.Value
88 | }
89 | return nil
90 | }
91 |
92 | func tomlProbeToProbe(tProbe *tomlProbe, config *Config, filename string) (*Probe, error) {
93 | var probe Probe
94 |
95 | if tProbe.Disabled == true && config.loadDisabled == false {
96 | return nil, nil
97 | }
98 | probe.Disabled = (tProbe.Disabled == true)
99 |
100 | probe.Filename = filename
101 |
102 | if tProbe.Name == "" {
103 | return nil, errors.New("invalid or missing 'name'")
104 | }
105 | probe.Name = tProbe.Name
106 |
107 | if tProbe.Script == "" {
108 | return nil, errors.New("invalid or missing 'script'")
109 | }
110 |
111 | scriptPath := path.Clean(config.configPath + "/scripts/probes/" + tProbe.Script)
112 | stat, err := os.Stat(scriptPath)
113 |
114 | if err != nil {
115 | return nil, fmt.Errorf("invalid 'script' file '%s': %s", scriptPath, err)
116 | }
117 |
118 | if !stat.Mode().IsRegular() {
119 | return nil, fmt.Errorf("is not a regular 'script' file '%s'", scriptPath)
120 | }
121 | probe.Script = scriptPath
122 |
123 | _, err = ioutil.ReadFile(scriptPath)
124 | if err != nil {
125 | return nil, fmt.Errorf("error reading script file '%s': %s", scriptPath, err)
126 | }
127 |
128 | if tProbe.Targets == nil {
129 | return nil, errors.New("no valid 'targets' parameter found")
130 | }
131 |
132 | if len(tProbe.Targets) == 0 {
133 | return nil, errors.New("empty 'targets'")
134 | }
135 | // explode targets on & and check IsValidTokenName
136 | for _, targets := range tProbe.Targets {
137 | if targets == "*" {
138 | continue
139 | }
140 | tokens := strings.Split(targets, "&")
141 | for _, token := range tokens {
142 | ttoken := strings.TrimSpace(token)
143 | if !IsValidTokenName(ttoken) {
144 | return nil, fmt.Errorf("invalid 'target' class name '%s'", ttoken)
145 | }
146 | }
147 | }
148 | probe.Targets = tProbe.Targets
149 |
150 | if tProbe.Delay.Duration == 0 {
151 | return nil, errors.New("invalid or missing 'delay'")
152 | }
153 |
154 | if tProbe.Delay.Duration < (1 * time.Minute) {
155 | return nil, errors.New("'delay' can't be less than a minute")
156 | }
157 |
158 | minutes := float64(tProbe.Delay.Duration) / float64(time.Minute)
159 | if float64(int(minutes)) != minutes {
160 | return nil, errors.New("'delay' granularity is in minutes (ex: 5m)")
161 | }
162 | probe.Delay = tProbe.Delay.Duration
163 |
164 | if tProbe.Timeout.Duration == 0 {
165 | //~ return nil, errors.New("invalid or missing 'timeout'")
166 | tProbe.Timeout.Duration = 20 * time.Second
167 | }
168 |
169 | if tProbe.Timeout.Duration < (1 * time.Second) {
170 | return nil, errors.New("'timeout' can't be less than 1 second")
171 | }
172 | probe.Timeout = tProbe.Timeout.Duration
173 |
174 | // should warn about dangerous characters? (;& …)
175 | probe.Arguments = tProbe.Arguments
176 |
177 | if tProbe.RunIf != "" {
178 | expr, err := govaluate.NewEvaluableExpressionWithFunctions(tProbe.RunIf, CheckFunctions)
179 | if err != nil {
180 | return nil, fmt.Errorf("invalid 'run_if' expression: %s (\"%s\")", err, tProbe.RunIf)
181 | }
182 | if vars := expr.Vars(); len(vars) > 0 {
183 | return nil, fmt.Errorf("undefined variable(s) in 'run_if' expression: %s", strings.Join(vars, ", "))
184 | }
185 | probe.RunIf = expr
186 | }
187 |
188 | probe.Defaults = make(map[string]interface{})
189 | if err := checkTomlDefault(probe.Defaults, tProbe.Default); err != nil {
190 | return nil, err
191 | }
192 |
193 | for index, tCheck := range tProbe.Check {
194 | var check Check
195 |
196 | check.Index = index
197 |
198 | if tCheck.Desc == "" {
199 | return nil, errors.New("[[check]] with invalid or missing 'desc'")
200 | }
201 | check.Desc = tCheck.Desc
202 |
203 | if tCheck.If == "" {
204 | return nil, errors.New("[[check]] with invalid or missing 'if'")
205 | }
206 | expr, err := govaluate.NewEvaluableExpressionWithFunctions(tCheck.If, CheckFunctions)
207 | if err != nil {
208 | return nil, fmt.Errorf("[[check]] invalid 'if' expression: %s (\"%s\")", err, tCheck.If)
209 | }
210 | check.If = expr
211 |
212 | if tCheck.Classes == nil {
213 | return nil, errors.New("no valid 'classes' parameter found")
214 | }
215 |
216 | if len(tCheck.Classes) == 0 {
217 | return nil, errors.New("empty classes")
218 | }
219 | for _, class := range tCheck.Classes {
220 | if !IsValidTokenName(class) {
221 | return nil, fmt.Errorf("invalid class name '%s'", class)
222 | }
223 | }
224 | check.Classes = tCheck.Classes
225 |
226 | if tCheck.NeededFailures == 0 {
227 | tCheck.NeededFailures = 1
228 | }
229 | check.NeededFailures = tCheck.NeededFailures
230 |
231 | if tCheck.NeededSuccesses == 0 {
232 | tCheck.NeededSuccesses = check.NeededFailures
233 | }
234 | check.NeededSuccesses = tCheck.NeededSuccesses
235 |
236 | probe.Checks = append(probe.Checks, &check)
237 | }
238 |
239 | if miss := probe.MissingDefaults(); len(miss) > 0 {
240 | return nil, fmt.Errorf("missing defaults (used in 'if' expressions or 'arguments' parameter): %s", strings.Join(miss, ", "))
241 | }
242 |
243 | return &probe, nil
244 | }
245 |
--------------------------------------------------------------------------------
/current_fails.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 | "os"
6 | "path"
7 | "sync"
8 | "time"
9 |
10 | uuid "github.com/satori/go.uuid"
11 | )
12 |
13 | // CurrentFail type hold informations about a failure currently detected
14 | // and not resolved yet
15 | type CurrentFail struct {
16 | FailStart time.Time
17 | FailCount int
18 | OkCount int
19 | UniqueID string
20 |
21 | // optional "payload"
22 | RelatedTask *Task // for Checks (!!)
23 | RelatedHost *Host // for Runs
24 | RelatedTTask *Task // for Tasks
25 | }
26 |
27 | var (
28 | currentFails map[string]*CurrentFail
29 | currentFailsMutex sync.Mutex
30 | )
31 |
32 | const statusFile string = "nosee-fails.json"
33 |
34 | // CurrentFailsCreate initialize the global currentFails variable
35 | func CurrentFailsCreate() {
36 | currentFails = make(map[string]*CurrentFail)
37 | }
38 |
39 | // CurrentFailsSave dumps current alerts to disk
40 | func CurrentFailsSave() {
41 | // doing this in a go routine allows this function to be called
42 | // by functions that are already locking the mutex
43 | go func() {
44 | currentFailsMutex.Lock()
45 | defer currentFailsMutex.Unlock()
46 |
47 | path := path.Clean(GlobalConfig.SavePath + "/" + statusFile)
48 | f, err := os.Create(path)
49 | if err != nil {
50 | Error.Printf("can't save fails in '%s': %s (see save_path param?)", path, err)
51 | return
52 | }
53 | defer f.Close()
54 |
55 | enc := json.NewEncoder(f)
56 | err = enc.Encode(¤tFails)
57 | if err != nil {
58 | Error.Printf("fails json encode: %s", err)
59 | return
60 | }
61 | Info.Printf("current fails successfully saved to '%s'", path)
62 | }()
63 | }
64 |
65 | // CurrentFailsLoad will load from disk previous "fails"
66 | func CurrentFailsLoad() {
67 | currentFailsMutex.Lock()
68 | defer currentFailsMutex.Unlock()
69 |
70 | path := path.Clean(GlobalConfig.SavePath + "/" + statusFile)
71 | f, err := os.Open(path)
72 | if err != nil {
73 | Warning.Printf("can't read previous status: %s, no fails loaded", err)
74 | return
75 | }
76 | defer f.Close()
77 |
78 | dec := json.NewDecoder(f)
79 | err = dec.Decode(¤tFails)
80 | if err != nil {
81 | Error.Printf("'%s' json decode: %s", path, err)
82 | }
83 | Info.Printf("'%s' loaded: %d fail(s)", path, len(currentFails))
84 | }
85 |
86 | // CurrentFailDelete deleted the CurrentFail with the given hash of the global currentFails
87 | func CurrentFailDelete(hash string) {
88 | currentFailsMutex.Lock()
89 | defer currentFailsMutex.Unlock()
90 | delete(currentFails, hash)
91 | CurrentFailsSave()
92 | }
93 |
94 | // CurrentFailAdd adds a CurrentFail to the global currentFails using given hash
95 | func CurrentFailAdd(hash string, failedCheck *CurrentFail) {
96 | currentFailsMutex.Lock()
97 | defer currentFailsMutex.Unlock()
98 | currentFails[hash] = failedCheck
99 | CurrentFailsSave()
100 | }
101 |
102 | // CurrentFailInc increments FailCount of the CurrentFail with the given hash
103 | func CurrentFailInc(hash string) {
104 | currentFailsMutex.Lock()
105 | defer currentFailsMutex.Unlock()
106 | currentFails[hash].FailCount++
107 | currentFails[hash].OkCount = 0
108 | CurrentFailsSave()
109 | }
110 |
111 | // CurrentFailDec increments OkCount of the CurrentFail with the given hash
112 | func CurrentFailDec(hash string) {
113 | currentFailsMutex.Lock()
114 | defer currentFailsMutex.Unlock()
115 | currentFails[hash].OkCount++
116 | CurrentFailsSave()
117 | }
118 |
119 | // CurrentFailGetAndInc returns the CurrentFail with the given hash and
120 | // increments its FailCount. The CurrentFail is created if it does not
121 | // already exists.
122 | func CurrentFailGetAndInc(hash string) *CurrentFail {
123 | cf, ok := currentFails[hash]
124 | if !ok {
125 | var cf CurrentFail
126 | uuid := uuid.NewV4()
127 | cf.FailCount = 1
128 | cf.OkCount = 0
129 | cf.FailStart = time.Now()
130 | cf.UniqueID = uuid.String()
131 | CurrentFailAdd(hash, &cf)
132 | return &cf
133 | }
134 |
135 | CurrentFailInc(hash)
136 | return cf
137 | }
138 |
139 | // CurrentFailGetAndDec returns the CurrentFail with the given hash and
140 | // increments its OkCount
141 | func CurrentFailGetAndDec(hash string) *CurrentFail {
142 | cf, ok := currentFails[hash]
143 | if !ok {
144 | return nil
145 | }
146 | CurrentFailDec(hash)
147 | return cf
148 | }
149 |
--------------------------------------------------------------------------------
/deploy/ssh-agent-nosee.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # If you are using SSH keys with private passphrase:
4 | # This sample script runs an agent for the current user, creating
5 | # a socket that the nosee service will use.
6 |
7 | agent_link="$HOME/.ssh-agent-sock"
8 |
9 | if [ -S "$agent_link" ]; then
10 | echo "Agent is already here."
11 | exit 0
12 | fi
13 |
14 | eval $(ssh-agent -a "$agent_link")
15 | ssh-add "$HOME/keys/id_rsa_xxx"
16 | ssh-add "$HOME/keys/id_rsa_yyy"
17 | # ...
18 |
--------------------------------------------------------------------------------
/deploy/supervisord/nosee.conf:
--------------------------------------------------------------------------------
1 | ; Sample supervisord configuration using SSH agent
2 |
3 | [program:nosee]
4 | command=/home/nosee_server/go/bin/nosee --log-level info --log-timestamp
5 | autostart=false
6 | autorestart=false
7 | user=nosee_server
8 | ; See ssh-agent-nosee.sh
9 | environment=SSH_AUTH_SOCK="/home/nosee_server/.ssh-agent-sock",HOME="/home/nosee_server"
10 | redirect_stderr=true
11 | stdout_logfile=/var/log/supervisor/nosee.log
12 | stdout_logfile_maxbytes=50MB
13 |
--------------------------------------------------------------------------------
/deploy/systemd/nosee.service:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=A nosey, agentless, easy monitoring tool over SSH
3 | After=network-online.target
4 |
5 | [Service]
6 | User={USER}
7 | ExecStart=/home/{USER}/go/bin/nosee -c /home/{USER}/nosee/etc/ --log-level info --log-timestamp
8 | Type=simple
9 | Restart=on-failure
10 | Environment=SSH_AUTH_SOCK=/home/{USER}/.ssh-agent-sock
11 |
12 | [Install]
13 | WantedBy=multi-user.target
14 |
--------------------------------------------------------------------------------
/doc/images/howto.txt:
--------------------------------------------------------------------------------
1 | - Using "DIagrams Through Ascii Art" (ditaa) syntax
2 | https://github.com/stathissideris/ditaa
3 |
4 | - Generated using PlantUML online demo server
5 | http://plantuml.com/
6 |
7 | @startditaa
8 | ...
9 | @endditaa
10 |
--------------------------------------------------------------------------------
/doc/images/img_base.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xfennec/nosee/6dad5bbc946242dd56f53de3c26feb0bb88042e6/doc/images/img_base.png
--------------------------------------------------------------------------------
/doc/images/img_base.txt:
--------------------------------------------------------------------------------
1 | +--------+
2 | | Nosee |
3 | | Daemon |
4 | +--+-+-+-+
5 | | | |
6 | +------------+ | +------------+
7 | | | |
8 | v v v
9 | SSH SSH SSH
10 | +-----------+ +-----+-----+ +-----------+
11 | | Monitored | | Monitored | | Monitored |
12 | | Host | | Host | | Host |
13 | +-----------+ +-----------+ +-----------+
14 |
15 | (Only SSH server is needed on hosts)
16 |
17 |
--------------------------------------------------------------------------------
/doc/images/img_general.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xfennec/nosee/6dad5bbc946242dd56f53de3c26feb0bb88042e6/doc/images/img_general.png
--------------------------------------------------------------------------------
/doc/images/img_general.txt:
--------------------------------------------------------------------------------
1 | hosts.d/ probes.d/ alerts.d/
2 | +------+ +----------------+ +-------+
3 | | | | | | |
4 | | Host +-->+ Probe | +->| Alert |
5 | | | | | | | |
6 | +------+ +--------+-------+ | +-------+
7 | | Script | Check +-+
8 | +----+---+-------+
9 | : ^
10 | | |
11 | | SSH |
12 | +-------+
13 | Remote machine
14 | (monitored)
15 |
--------------------------------------------------------------------------------
/doc/images/img_illu.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xfennec/nosee/6dad5bbc946242dd56f53de3c26feb0bb88042e6/doc/images/img_illu.jpeg
--------------------------------------------------------------------------------
/doc/images/nosee-influxdb-grafana.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xfennec/nosee/6dad5bbc946242dd56f53de3c26feb0bb88042e6/doc/images/nosee-influxdb-grafana.png
--------------------------------------------------------------------------------
/etc/alerts.d/example.txt:
--------------------------------------------------------------------------------
1 | ## Rename this file with ".toml" extension
2 |
3 | name = "My alert"
4 | disabled = false
5 |
6 | targets = ["preprod", "linux & production"]
7 | # to capture all check failures:
8 | targets = ["*"]
9 |
10 | # command in the path or full path of a command
11 | # alert details are sent to stdin, as various env vars (see test.sh)
12 | command = "cmd"
13 | # any script in "scripts/alerts/" directory is available without any path:
14 | #command = "test.sh"
15 |
16 | arguments = [
17 | "arg1",
18 | "arg2: $SUBJECT ($TYPE)",
19 | ]
20 |
21 | # Warning: this part may change. We should probably switch to a more
22 | # generic expression here, like probe's run_if condition
23 | # This alert is only available during...
24 | hours = ["8:30 - 12:30", "14:00 - 18:00"]
25 | # sunday is 0 or 7
26 | days = [1, 2, 3, 4, 5]
27 |
28 |
29 | # Note: alerts listening for special class "general" can't have
30 | # such hour/day limitations
31 |
--------------------------------------------------------------------------------
/etc/alerts.d/mail_general.toml:
--------------------------------------------------------------------------------
1 | name = "Mail general"
2 | disabled = false
3 |
4 | targets = ["general"]
5 |
6 | command = "mail"
7 |
8 | arguments = [
9 | "-s",
10 | "Nosee $NOSEE_SRV GENERAL: $SUBJECT",
11 | "user@domain.tld"
12 | ]
13 |
--------------------------------------------------------------------------------
/etc/alerts.d/nosee-console.toml:
--------------------------------------------------------------------------------
1 | name = "Nosee console"
2 | #disabled = true
3 |
4 | targets = ["*"]
5 |
6 | command = "nosee-console.sh"
7 |
8 | arguments = ["http://localhost:8080/alerts"]
9 |
--------------------------------------------------------------------------------
/etc/hosts.d/example.txt:
--------------------------------------------------------------------------------
1 | ## Rename this file with ".toml" extension
2 |
3 | # "name" is a key for the database. Change it and it'll become another host!
4 | name = "My Host"
5 | classes = ["linux", "http", "testing"]
6 | disabled = false
7 |
8 | [network]
9 | host = "192.168.0.1"
10 | port = 22
11 | # Nosee defaults to sensible ciphers, but you may want to specify older
12 | # ciphers (at your own risk) for compatibility:
13 | #ciphers = ["arcfouraa", "aes128-cbc"]
14 |
15 | [auth]
16 | user = "user"
17 |
18 | # (password) OR (key) OR (key + passphrase) OR (ssh_agent) OR (ssh_agent + key)
19 |
20 | password = "mypassword"
21 |
22 | key = "/home/xxx/.ssh/id_rsa_sample"
23 | key_passphrase = "mypassphrase"
24 |
25 | ssh_agent = true
26 | # If you don't want to test every single key in the agent, give the
27 | # corresponding public key:
28 | #pubkey = "/home/xxx/.ssh/id_rsa_sample.pub"
29 |
30 | # you can override probe defaults for a specific host:
31 | [[default]]
32 | name = "warn_ping_latency"
33 | value = 10
34 |
35 | # or defaults for a probe:
36 | [[default]]
37 | name = "ifband_interface"
38 | value = "enp1s0f0"
39 |
--------------------------------------------------------------------------------
/etc/hosts.d/test.toml:
--------------------------------------------------------------------------------
1 | name = "Development server"
2 | classes = ["linux", "development"]
3 |
4 | [network]
5 | host = "192.168.0.41"
6 | port = 22
7 |
8 | [auth]
9 | user = "deploy"
10 | key = "/home/user/.ssh/id_rsa_devsrv"
11 |
--------------------------------------------------------------------------------
/etc/nosee.toml:
--------------------------------------------------------------------------------
1 | # global configuration for Nosee
2 |
3 | # Nosee server name (useful if you have multiple Nosee servers)
4 | # default: ""
5 | #name="Test"
6 |
7 | # This option helps to ease the global load induced by all SSH connections.
8 | # default: 15s
9 | #start_time_spread = "15s"
10 |
11 | # Maximum connection time for a SSH connection. (will trigger a "general" class alert)
12 | # default: 10s
13 | #ssh_connection_time_warn = "6s"
14 |
15 | # Currently, nosee will look at $HOME/.ssh/known_hosts for host fingerprints,
16 | # unless you set this to true, accepting blindly any fingerprint.
17 | # This is a potential security issue. (MitM attack)
18 | #ssh_blindtrust_fingerprints = false
19 |
20 | # Path to save current fails so Nosee can be restarted without losing status
21 | # (see nosee-fails.json file)
22 | # default: "./"
23 | #save_path = "/home/user/.nosee/"
24 |
25 | # Nosee will regularly execute all "scripts/heartbeats" as a keepalive
26 | # default: 30s
27 | #heartbeat_delay = "5s"
28 |
--------------------------------------------------------------------------------
/etc/probes.d/apache_modstatus.toml:
--------------------------------------------------------------------------------
1 | name = "Apache mod_status"
2 | targets = ["linux & mod_status"]
3 |
4 | script = "apache_modstatus.sh"
5 |
6 | delay = "1m"
7 | timeout = "5s"
8 |
--------------------------------------------------------------------------------
/etc/probes.d/backup_daily.toml:
--------------------------------------------------------------------------------
1 | name = "daily backup"
2 | targets = ["linux & backupd"]
3 | #disabled = true
4 |
5 | script = "backup.sh"
6 | arguments = "$start_file $ok_file"
7 |
8 | delay = "30m"
9 | timeout = "8s"
10 |
11 | ### Default values
12 |
13 | [[default]]
14 | name = "start_file"
15 | value = "/tmp/backup.start"
16 |
17 | [[default]]
18 | name = "ok_file"
19 | value = "/tmp/backup.ok"
20 |
21 | [[default]]
22 | name = "backup_margin_hours"
23 | value = 3
24 |
25 | [[default]]
26 | name = "backup_duration_warn"
27 | value = 5
28 |
29 | ### Checks
30 |
31 | [[check]]
32 | desc = "backup too old"
33 | if = "LAST_OK_HOURS > (24+backup_margin_hours)"
34 | classes = ["critical"]
35 |
36 | [[check]]
37 | desc = "backup too long"
38 | if = "LAST_DURATION_HOURS > backup_duration_warn"
39 | classes = ["warning"]
40 |
--------------------------------------------------------------------------------
/etc/probes.d/backup_week.toml:
--------------------------------------------------------------------------------
1 | name = "backup check"
2 | targets = ["linux & backupw"]
3 | #disabled = true
4 |
5 | script = "backup.sh"
6 | arguments = "$start_file $ok_file"
7 |
8 | # +------+ +-----
9 | # exp: 27h | "" | 27h 27h |
10 | # +------+ +------+ +------+
11 | # | || || | | ||
12 | # +#.--#+#.--#+#.---+-.---+----#+#---#+#.--#+
13 | # enab: ****************-----------------**********
14 | # Thu Fri Sat Sun Mon Tue Wen
15 | # dow: 4 5 6 0 1 2 3
16 |
17 | run_if = """
18 | (date('dow') == 3 || date('dow') == 4 || date('dow') == 5) ||
19 | (date('dow') == 6 && date('time') <= 8) ||
20 | (date('dow') == 2 && date('time') >= 8)
21 | """
22 |
23 | delay = "30m"
24 | timeout = "8s"
25 |
26 | ### Default values
27 |
28 | [[default]]
29 | name = "start_file"
30 | value = "/tmp/backup.start"
31 |
32 | [[default]]
33 | name = "ok_file"
34 | value = "/tmp/backup.ok"
35 |
36 | [[default]]
37 | name = "backup_margin_hours"
38 | value = 3
39 |
40 | [[default]]
41 | name = "backup_duration_warn"
42 | value = 5
43 |
44 | ### Checks
45 |
46 | [[check]]
47 | desc = "backup too old"
48 | if = "LAST_OK_HOURS > (24+backup_margin_hours)"
49 | classes = ["critical"]
50 |
51 | [[check]]
52 | desc = "backup too long"
53 | if = "LAST_DURATION_HOURS > backup_duration_warn"
54 | classes = ["warning"]
55 |
--------------------------------------------------------------------------------
/etc/probes.d/cert_example.toml:
--------------------------------------------------------------------------------
1 | name = "example.com certificate validity"
2 | targets = ["example_com"]
3 | #disabled = true
4 |
5 | script = "cert_check.sh"
6 | arguments = "/etc/pki/tls/certs/example.com.crt 15"
7 |
8 | delay = "60m"
9 | timeout = "5s"
10 |
11 | ### Checks
12 |
13 | [[check]]
14 | desc = "certificate will expire soon"
15 | if = "WILL_EXPIRE != 0"
16 | classes = ["warning"]
17 |
--------------------------------------------------------------------------------
/etc/probes.d/cpu_lms_temp.toml:
--------------------------------------------------------------------------------
1 | name = "CPU lm_sensors temperature"
2 | targets = ["linux & lm_sensors"]
3 |
4 | script = "cpu_lms_temp.sh"
5 |
6 | delay = "1m"
7 | timeout = "5s"
8 |
9 | ### Checks
10 |
11 | [[check]]
12 | desc = "high CPU temperature"
13 | if = "TEMP > HIGH"
14 | classes = ["warning"]
15 | needed_failures = 2
16 |
17 | [[check]]
18 | desc = "critical CPU temperature"
19 | if = "TEMP > CRIT"
20 | classes = ["critical"]
21 |
--------------------------------------------------------------------------------
/etc/probes.d/cpu_temp.toml:
--------------------------------------------------------------------------------
1 | ## Sample probe
2 |
3 | name = "CPU temperature"
4 | targets = ["linux"]
5 | disabled = false
6 |
7 | script = "cpu_temp.sh"
8 | arguments = "0"
9 |
10 | delay = "2m"
11 | timeout = "5s"
12 |
13 | ### Default values
14 | # types: int, float, string
15 |
16 | [[default]]
17 | name = "warn_cpu_temp"
18 | value = 75
19 |
20 | [[default]]
21 | name = "error_cpu_temp"
22 | value = 85
23 |
24 | ### Checks
25 |
26 | [[check]]
27 | desc = "high CPU0 temperature"
28 | if = "TEMP > warn_cpu_temp"
29 | classes = ["warning"]
30 | needed_failures = 2
31 |
32 | [[check]]
33 | desc = "critical CPU0 temperature"
34 | if = "TEMP > error_cpu_temp"
35 | classes = ["critical"]
36 |
--------------------------------------------------------------------------------
/etc/probes.d/curl_expect_example.toml:
--------------------------------------------------------------------------------
1 | name = "example.com Website"
2 | targets = ["linux & example"]
3 |
4 | script = "curl_expect.sh"
5 | arguments = "http://example.com/ 'used for illustrative examples'"
6 |
7 | delay = "5m"
8 | timeout = "20s"
9 |
10 | ### Checks
11 |
12 | [[check]]
13 | desc = "can't find expected content"
14 | if = "FOUND_EXPECTED != 1"
15 | classes = ["critical"]
16 |
--------------------------------------------------------------------------------
/etc/probes.d/df.toml:
--------------------------------------------------------------------------------
1 | name = "disk free"
2 | targets = ["linux"]
3 | #disabled = true
4 |
5 | script = "df.sh"
6 |
7 | delay = "30m"
8 | timeout = "8s"
9 |
10 | ### Default values
11 |
12 | [[default]]
13 | name = "df_warn_perc"
14 | value = 95
15 |
16 | ### Checks
17 |
18 | [[check]]
19 | desc = "disk almost full"
20 | if = "FULLEST_PERC > df_warn_perc"
21 | classes = ["warning"]
22 |
--------------------------------------------------------------------------------
/etc/probes.d/example.txt:
--------------------------------------------------------------------------------
1 | ## Rename this file with ".toml" extension
2 |
3 | name="My Probe"
4 |
5 | script = "script.sh"
6 | disabled = false
7 |
8 | targets = ["linux & test", "windows & test"]
9 | # If you want to match all hosts (all classes):
10 | # targets = ["*"]
11 |
12 | # probe repetition delay (must be minutes "dead" [not 2m30, for instance])
13 | # minimum value: 1m
14 | delay = "5m"
15 |
16 | # if the probes takes more than this time, it will trigger an error
17 | # default: 20s
18 | timeout = "30s"
19 |
20 | # check only between 8:00 and 18:00
21 | run_if = "date('time') >= 8 && date('time') <= 18"
22 |
23 | ### Default values (used by checks)
24 | # types: int, float, string
25 | # not "all uppercase" (reserved for probe values)
26 |
27 | [[default]]
28 | name = "value_foo"
29 | value = 0.90
30 |
31 | [[default]]
32 | name = "value_bar"
33 | value = "200 OK"
34 |
35 | ### Checks
36 |
37 | [[check]]
38 | desc = "check description"
39 | if = "VALUE1_FROM_SCRIPT > value_foo"
40 | classes = ["critical"]
41 | # will trigger alert if append two times (default: 1)
42 | needed_failures = 2
43 | # will delete the "suspicion" if check is OK three times (default: needed_failures)
44 | needed_successes = 3
45 |
46 | [[check]]
47 | desc = "check description"
48 | if = "VALUE1_FROM_SCRIPT+VALUE2_FROM_SCRIPT < value_foo"
49 | classes = ["warning"]
50 |
--------------------------------------------------------------------------------
/etc/probes.d/ifband.toml:
--------------------------------------------------------------------------------
1 | name = "bandwidth"
2 | targets = ["linux & ifband"]
3 | #disabled = true
4 |
5 | script = "ifband.sh"
6 | arguments = "$ifband_interface"
7 |
8 | delay = "1m"
9 | timeout = "5s"
10 |
11 | ### Default values
12 |
13 | [[default]]
14 | name = "ifband_interface"
15 | value = "eth0"
16 |
--------------------------------------------------------------------------------
/etc/probes.d/load.toml:
--------------------------------------------------------------------------------
1 | name = "system load"
2 | targets = ["linux"]
3 | #disabled = true
4 |
5 | script = "load.sh"
6 | arguments = "$load_normal_cmd"
7 |
8 | delay = "1m"
9 | timeout = "8s"
10 |
11 | ### Default values
12 |
13 | [[default]]
14 | name = "load_normal_cmd"
15 | value = "/root/backup.sh"
16 |
17 | [[default]]
18 | name = "load_margin"
19 | value = 0
20 |
21 | ### Checks
22 |
23 | [[check]]
24 | desc = "heavy system load"
25 | if = "LOAD > (CPU_COUNT+load_margin) && PROG_DETECTED == 0"
26 | classes = ["warning"]
27 | needed_failures = 2
28 |
--------------------------------------------------------------------------------
/etc/probes.d/mdstat.toml:
--------------------------------------------------------------------------------
1 | name = "Linux md-raid states"
2 | targets = ["linux"]
3 | #disabled = true
4 |
5 | script = "mdstat.sh"
6 |
7 | delay = "5m"
8 | timeout = "15s"
9 |
10 | ### Checks
11 |
12 | [[check]]
13 | desc = "md-raid failure"
14 | if = "ERR_ARRAYS > 0"
15 | classes = ["critical"]
16 |
--------------------------------------------------------------------------------
/etc/probes.d/mem.toml:
--------------------------------------------------------------------------------
1 | name = "memory (RAM and swap)"
2 |
3 | script = "mem.sh"
4 | disabled = false
5 |
6 | targets = ["linux", "windows"]
7 |
8 | delay = "5m"
9 | # WMI can be veeeery slow :(
10 | timeout = "30s"
11 |
12 | ### Default values
13 | # types: int, float, string
14 | # not "all uppercase" (reserved for probe values)
15 | [[default]]
16 | name = "min_available_ratio"
17 | value = 0.20
18 |
19 | [[default]]
20 | name = "warn_swap_ratio"
21 | value = 0.30
22 |
23 | ### Checks
24 |
25 | [[check]]
26 | desc = "critical available memory ratio"
27 | if = "MEM_AVAILABLE_RATIO < min_available_ratio"
28 | classes = ["critical"]
29 |
30 | [[check]]
31 | desc = "high swap usage ratio"
32 | if = "SWAP_USED_RATIO > warn_swap_ratio"
33 | classes = ["warning"]
34 |
--------------------------------------------------------------------------------
/etc/probes.d/ping.toml:
--------------------------------------------------------------------------------
1 | name = "ping to router"
2 | targets = ["linux"]
3 | #disabled = true
4 |
5 | script = "ping.sh"
6 | arguments = "192.168.0.250"
7 |
8 | delay = "1m"
9 | timeout = "8s"
10 |
11 | ### Default values
12 |
13 | [[default]]
14 | name = "err_ping_loss"
15 | value = 1
16 |
17 | [[default]]
18 | name = "warn_ping_latency"
19 | value = 1
20 |
21 | ### Checks
22 |
23 | [[check]]
24 | desc = "critical ping loss"
25 | if = "LOSS_PERC >= err_ping_loss"
26 | classes = ["critical"]
27 | needed_failures = 2
28 |
29 | [[check]]
30 | desc = "ping latency"
31 | if = "AVG_MS > warn_ping_latency"
32 | classes = ["warning"]
33 | needed_failures = 2
34 |
--------------------------------------------------------------------------------
/etc/probes.d/port_80.toml:
--------------------------------------------------------------------------------
1 | name = "HTTP port"
2 | targets = ["linux & http"]
3 | #disabled = true
4 |
5 | script = "port.sh"
6 | arguments = "80"
7 |
8 | delay = "1m"
9 |
10 | ### Checks
11 |
12 | [[check]]
13 | desc = "port 80 is not open"
14 | if = "OPEN != 1"
15 | classes = ["critical"]
16 |
--------------------------------------------------------------------------------
/etc/probes.d/systemd_httpd.toml:
--------------------------------------------------------------------------------
1 | name = "Apache status (systemd)"
2 | targets = ["linux & systemd & apache"]
3 | #disabled = true
4 |
5 | script = "systemctl_status.sh"
6 | arguments = "httpd.service"
7 |
8 | delay = "1m"
9 | timeout = "5s"
10 |
11 | ### Checks
12 |
13 | [[check]]
14 | desc = "Apache status"
15 | if = "STATUS != 'active'"
16 | classes = ["critical"]
17 |
--------------------------------------------------------------------------------
/etc/scripts/alerts/nosee-console.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ -z "$1" ]; then
4 | (>&2 echo "ERROR: give nosee console URL (ex: http://localhost:8080/alerts)")
5 | exit 1
6 | fi
7 |
8 | DETAILS=$(cat)
9 |
10 | curl -s -f -w "HTTP Code %{http_code}\n" \
11 | --form-string "type=$TYPE" \
12 | --form-string "subject=$SUBJECT" \
13 | --form-string "details=$DETAILS" \
14 | --form-string "classes=$CLASSES" \
15 | --form-string "hostname=$HOST_NAME" \
16 | --form-string "nosee_srv=$NOSEE_SRV" \
17 | --form-string "uniqueid=$UNIQUEID" \
18 | --form-string "datetime=$DATETIME" \
19 | "$1"
20 |
21 | if [ $? -ne 0 ]; then
22 | exit 1
23 | fi
24 |
--------------------------------------------------------------------------------
/etc/scripts/alerts/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Test script to show all input channels
4 |
5 | file="/tmp/remove_me"
6 |
7 | echo "stdout test"
8 | (>&2 echo "stderr test")
9 |
10 | date > $file
11 | echo "$0" >> $file
12 | echo "$1" >> $file
13 | echo "$2" >> $file
14 | echo "$3" >> $file
15 | echo "$4" >> $file
16 |
17 | echo "$SUBJECT" >> $file
18 | echo $USER >> $file
19 | echo $TYPE >> $file
20 | echo $NOSEE_SRV >> $file
21 |
22 | # stdin is $DETAILS
23 | cat >> $file
24 | echo $HOME >> $file
25 |
--------------------------------------------------------------------------------
/etc/scripts/heartbeats/nosee-console.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # nosee console heartbeat URL
4 | url="http://localhost:8080/heartbeat"
5 |
6 | # NOSEE_SRV, VERSION, DATETIME, STARTTIME, UPTIME
7 |
8 | curl -s -f -w "HTTP Code %{http_code}\n" \
9 | --form-string "uptime=$UPTIME" \
10 | --form-string "server=$NOSEE_SRV" \
11 | --form-string "version=$VERSION" \
12 | "$url"
13 |
14 | if [ $? -ne 0 ]; then
15 | exit 1
16 | fi
17 |
--------------------------------------------------------------------------------
/etc/scripts/loggers/influxdb.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | host=${HOST_FILE%.toml}
4 |
5 | # input lines looks like:
6 | # df.toml;DISK_FULLEST_PERC;27
7 | res=$(cat | awk -v host=$host -F\; '{
8 | probe=$1
9 | key=$2
10 | val=$3
11 | sub(/\.toml$/, "", probe)
12 | measurement=sprintf("%s_%s", probe, key)
13 | if (val ~ /[0-9.]/)
14 | printf("%s,host=%s value=%s\n", measurement,host,val)
15 | else
16 | printf("%s,host=%s value=\"%s\"\n", measurement,host,val)
17 | }')
18 |
19 | curl -i -XPOST 'http://localhost:8086/write?db=nosee' --data-binary "$res"
20 |
--------------------------------------------------------------------------------
/etc/scripts/probes/apache_modstatus.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Server must have mod_status loaded and configured with something like:
4 | #
5 | # SetHandler server-status
6 | # Order deny,allow
7 | # Deny from all
8 | # Allow from 127.0.0.1 ::1
9 | #
10 |
11 | # ExtendedStatus must be set to On (default since Apache 2.3.6)
12 |
13 | stat_file="$HOME/.apache-modstatus"
14 | NOW=$(date +%s)
15 |
16 | page=$(curl --silent -f "http://localhost/server-status?auto")
17 | if [ $? -ne 0 ]; then
18 | (>&2 echo "ERROR: unable to get status (mod_status OK on localhost?)")
19 | exit 1
20 | fi
21 |
22 | requests=$(echo "$page" | grep '^Total Accesses' | awk -F ': ' '{print $2}')
23 | kbytes=$(echo "$page" | grep '^Total kBytes' | awk -F ': ' '{print $2}')
24 |
25 | LAST_CALL=$NOW
26 | LAST_REQUESTS=$requests
27 | LAST_KBYTES=$kbytes
28 | if [ -f $stat_file ]; then
29 | . $stat_file
30 | fi
31 |
32 | REQUESTS=$requests
33 | KBYTES=$kbytes
34 |
35 | time_diff=$(echo $LAST_CALL $NOW | awk '{print ($2 - $1)}')
36 | requests_diff=$(echo $LAST_REQUESTS $REQUESTS | awk '{print ($2 - $1)}')
37 | kbytes_diff=$(echo $LAST_KBYTES $KBYTES | awk '{print ($2 - $1)}')
38 |
39 | if [ $time_diff -eq 0 ]; then
40 | RPS=0
41 | KBPS=0
42 | else
43 | RPS=$(echo $requests_diff $time_diff | awk '{t=$1/$2; printf ("%f", (t>0?t:0))}')
44 | KBPS=$(echo $kbytes_diff $time_diff | awk '{t=$1/$2; printf ("%f", (t>0?t:0))}')
45 | fi
46 |
47 |
48 | echo > $stat_file
49 | echo "LAST_CALL=$NOW" >> $stat_file
50 | echo "LAST_REQUESTS=$REQUESTS" >> $stat_file
51 | echo "LAST_KBYTES=$KBYTES" >> $stat_file
52 |
53 | echo RPS: $RPS
54 | echo KBPS: $KBPS
55 |
--------------------------------------------------------------------------------
/etc/scripts/probes/backup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # ex: backup.sh /tmp/backup.start /tmp/backup.ok
4 |
5 | if [ -z "$2" ]; then
6 | (>&2 echo "ERROR: give 'start' flag file and 'ok' flag file")
7 | exit 1
8 | fi
9 |
10 | start_file="$1"
11 | ok_file="$2"
12 |
13 | if [ ! -f "$start_file" ]; then
14 | (>&2 echo "ERROR: can't read start file '$start_file'")
15 | exit 1
16 | fi
17 | if [ ! -f "$ok_file" ]; then
18 | (>&2 echo "ERROR: can't read ok file '$ok_file'")
19 | exit 1
20 | fi
21 |
22 | ok_tmsp=$(date +%s -r "$ok_file")
23 | start_tmsp=$(date +%s -r "$start_file")
24 | now=$(date +%s)
25 |
26 | last_ok_hours=$(echo $ok_tmsp $now | awk '{ diff=$2-$1; print diff/60/60 }')
27 | last_duration=$(echo $start_tmsp $ok_tmsp | awk '{
28 | diff=$2-$1;
29 | if (diff > 0)
30 | print diff/60/60
31 | else
32 | print 0
33 | }')
34 |
35 | echo "LAST_OK_HOURS:" $last_ok_hours
36 | echo "LAST_DURATION_HOURS:" $last_duration
37 |
--------------------------------------------------------------------------------
/etc/scripts/probes/cert_check.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ -z "$2" ]; then
4 | (>&2 echo "ERROR: give certificate path and 'days to expire'")
5 | (>&2 echo "ERROR: Usage: $0 /etc/pki/tls/certs/myweb.crt 15")
6 | exit 1
7 | fi
8 |
9 | cert_path=$1
10 | days_to_expire=$2
11 |
12 | timestamp=$(echo $(($days_to_expire*24*60*60)))
13 |
14 | openssl x509 -checkend $timestamp -noout -in "$1"
15 | res=$?
16 |
17 | echo "WILL_EXPIRE:" $res
18 |
--------------------------------------------------------------------------------
/etc/scripts/probes/cpu_lms_temp.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # this script use lm_sensors to average temperature of all CPU cores
4 | # Required sensors output format:
5 | # ...
6 | # Core 0: +33.0°C (high = +82.0°C, crit = +102.0°C)
7 | # Core 1: +32.0°C (high = +82.0°C, crit = +102.0°C)
8 |
9 | sensors | awk '
10 | BEGIN {
11 | total = 0
12 | cores = 0
13 | high = 999
14 | crit = 999
15 | }
16 | /^Core/ {
17 | if (match($0, /\+([0-9.]+)°C.*\+([0-9.]+)°C,.*\+([0-9.]+)°C/, g) > 0) {
18 | total += g[1]
19 | high = (g[2] < high ? g[2] : high)
20 | crit = (g[3] < crit ? g[3] : crit)
21 | cores++
22 | } else if (match($0, /\+([0-9.]+)°C/, g) > 0) {
23 | total += g[1]
24 | cores++
25 | }
26 | }
27 | END {
28 | printf("TEMP: %f\n", total / cores)
29 | printf("HIGH: %f\n", high)
30 | printf("CRIT: %f\n", crit)
31 | }
32 | '
33 |
--------------------------------------------------------------------------------
/etc/scripts/probes/cpu_temp.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ -z "$1" ]; then
4 | (>&2 echo "ERROR: give thermal zone number (ex: 0)")
5 | exit 1
6 | fi
7 |
8 | file="/sys/class/thermal/thermal_zone$1/temp"
9 |
10 | if [ ! -f "$file" ]; then
11 | (>&2 echo "ERROR: invalid path: $file")
12 | exit 2
13 | fi
14 |
15 | val=$(cat "$file")
16 | temp=$(awk "BEGIN {print $val/1000}")
17 | echo "TEMP:" $temp
18 |
--------------------------------------------------------------------------------
/etc/scripts/probes/curl.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # the URL must display usual "KEY: val\nKEY2: val2" format
4 |
5 | if [ -z "$1" ]; then
6 | (>&2 echo "ERROR: give URL")
7 | exit 1
8 | fi
9 |
10 | curl --max-time 15 --silent -f "$1"
11 |
--------------------------------------------------------------------------------
/etc/scripts/probes/curl_expect.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ -z "$2" ]; then
4 | (>&2 echo "ERROR: give URL and an expected string")
5 | (>&2 echo "Usage example: $0 'http://www.perdu.com/' 'Pas de panique'")
6 | exit 1
7 | fi
8 |
9 | url=$1
10 | expected=$2
11 |
12 | status=0
13 |
14 | page=$(curl --max-time 15 --silent -f "$url")
15 | if [ $? -eq 0 ]; then
16 | n=$(echo "$page" | grep "$expected" | wc -l)
17 | if [ $n -gt 0 ]; then
18 | status=1
19 | fi
20 | fi
21 |
22 | echo "FOUND_EXPECTED:" $status
23 |
--------------------------------------------------------------------------------
/etc/scripts/probes/df.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | lines=$(df -kP | grep '^/dev/' | grep -v '[[:space:]]/mnt/' | grep -v '[[:space:]]/snap/')
4 | fullest=$(echo "$lines" | awk '{print $5}' | cut -d% -f1 | sort -n | tail -n1)
5 |
6 | echo "FULLEST_PERC:" $fullest
7 |
8 | all=$(echo "$lines" | awk '{print $5,$6}')
9 | while read -r line; do
10 | dfree=$(echo "$line" | awk '{print $1}' | cut -d% -f1)
11 | name=$(echo "$line" | awk '{print $2}')
12 | name=$(echo "$name" | sed 's#/#_#g' |sed 's/-/_/' | sed 's/^_//')
13 | if [ -z "$name" ]; then
14 | name="ROOT"
15 | fi
16 | echo "DF_${name^^}_PERC:" $dfree
17 | done <<< "$all"
18 |
--------------------------------------------------------------------------------
/etc/scripts/probes/ifband.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | interface=$1
4 | if_dir="/sys/class/net/$interface/statistics"
5 | stat_file="$HOME/.ifband-$interface"
6 | NOW=$(date +%s)
7 |
8 | if [ -z "$1" ]; then
9 | (>&2 echo "USAGE: $0 interface-name")
10 | exit 1
11 | fi
12 |
13 | if [ ! -d $if_dir ]; then
14 | (>&2 echo "ERROR: unable to find $interface stats")
15 | exit 1
16 | fi
17 |
18 | LAST_CALL=$NOW
19 | LAST_RX=$(cat $if_dir/rx_bytes)
20 | LAST_TX=$(cat $if_dir/tx_bytes)
21 |
22 | if [ -f $stat_file ]; then
23 | . $stat_file
24 | fi
25 |
26 | RX=$(cat $if_dir/rx_bytes)
27 | TX=$(cat $if_dir/tx_bytes)
28 |
29 | time_diff=$(echo $LAST_CALL $NOW | awk '{print ($2 - $1)}')
30 | rx_diff=$(echo $LAST_RX $RX | awk '{print ($2 - $1)}')
31 | tx_diff=$(echo $LAST_TX $TX | awk '{print ($2 - $1)}')
32 |
33 | #echo $time_diff $rx_diff $tx_diff
34 | if [ $time_diff -eq 0 ]; then
35 | RX_KBPS=0
36 | TX_KBPS=0
37 | else
38 | RX_KBPS=$(echo $rx_diff $time_diff | awk '{printf ("%i", $1 / $2 / 1024)}')
39 | TX_KBPS=$(echo $tx_diff $time_diff | awk '{printf ("%i", $1 / $2 / 1024)}')
40 | fi
41 |
42 | if [ $RX_KBPS -le 0 ]; then
43 | RX_KBPS=0
44 | fi
45 | if [ $TX_KBPS -le 0 ]; then
46 | TX_KBPS=0
47 | fi
48 |
49 | echo > $stat_file
50 | echo "LAST_CALL=$NOW" >> $stat_file
51 | echo "LAST_RX=$RX" >> $stat_file
52 | echo "LAST_TX=$TX" >> $stat_file
53 |
54 | echo RX_KBPS: $RX_KBPS
55 | echo TX_KBPS: $TX_KBPS
56 |
57 |
--------------------------------------------------------------------------------
/etc/scripts/probes/load.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # load.sh [prog1] [prog2] [script3] [...]
4 | # will return "PROG_DETECTED: 1" if any
5 | # of the prog/script is found ("my load is high
6 | # but my backup is running, so it's ok")
7 |
8 | # CentOS 6/7 have a minimalist PATH on non-login SSH connections
9 | # and 'pidof' is often hosted in /sbin
10 | PATH=$PATH:/sbin
11 |
12 | if [ -f /proc/loadavg ]; then
13 | load=$(awk '{print $1}' /proc/loadavg)
14 | else
15 | load_field=$(LANG=C uptime | awk -F, '{print $(NF-2)}')
16 | load=$(echo "$load_field" | awk -F: '{print $2}')
17 | fi
18 |
19 | detected=0
20 | if [ -n $2 ]; then
21 | while [ ${#} -gt 0 ]; do
22 | pidof -x "$1" > /dev/null
23 | if [ $? -eq 0 ]; then
24 | detected=1
25 | fi
26 | shift
27 | done
28 | fi
29 |
30 | echo "LOAD:" $load
31 | echo "CPU_COUNT:" $(nproc)
32 | echo "PROG_DETECTED:" $detected
33 |
--------------------------------------------------------------------------------
/etc/scripts/probes/load_win.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ $(uname -o) != "Cygwin" ]; then
4 | (>&2 echo "Cygwin needed")
5 | exit 1
6 | fi
7 |
8 | pql=$(wmic path Win32_PerfFormattedData_PerfOS_System get ProcessorQueueLength | awk 'NR==2')
9 | echo "CPU_QUEUE_LEN:" $pql
10 |
11 | # select PercentProcessorTime from Win32_PerfFormattedData_PerfOS_Processor where Name = '_Total'
12 |
13 | #p=$(wmic path Win32_PerfFormattedData_PerfOS_System get PercentProcessorQueueLength | awk 'NR==2')
14 | #echo "CPU_QUEUE_LEN:" $pql
15 |
16 | ppt=$(wmic path Win32_PerfFormattedData_PerfOS_Processor where "Name = '_Total'" get PercentProcessorTime | awk 'NR==2')
17 | echo CPU_PERCENT: $ppt
18 |
19 | lp=$(wmic cpu get loadpercentage | awk 'NR==2')
20 | echo CPU_LOAD_PERCENT: $lp
21 |
22 | pdt=$(wmic path Win32_PerfFormattedData_PerfDisk_PhysicalDisk where "Name='_Total'" get PercentDiskTime | awk 'NR==2')
23 | echo DISK_PERCENT: $pdt
24 |
--------------------------------------------------------------------------------
/etc/scripts/probes/mdstat.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | mdstat="/proc/mdstat"
4 |
5 | if [ ! -f "$mdstat" ]; then
6 | (>&2 echo "ERROR: cant find md RAID support ($mdstat)")
7 | exit 1
8 | fi
9 |
10 | fcount=$(grep -c "\[.*_.*\]" $mdstat)
11 |
12 | echo "ERR_ARRAYS:" $fcount
13 |
--------------------------------------------------------------------------------
/etc/scripts/probes/mem.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # is MemAvailable supported?
4 | ma_supported=$(grep "MemAvailable:" /proc/meminfo)
5 |
6 | function meminfo_fmt() {
7 | val=$(grep "^$1:" /proc/meminfo)
8 | val=$(echo "$val" | awk '{printf("%i\n", $2/1024)}')
9 | echo $val
10 | }
11 |
12 | if [ -z "$ma_supported" ]; then
13 | mem=$(free -m | grep '^Mem')
14 | swap=$(free -m | grep '^Swap')
15 |
16 | mem_total_mb=$(echo $mem | cut -d\ -f2)
17 | mem_free_mb=$(echo $mem | cut -d\ -f4)
18 |
19 | mem_cached_mb=$(echo $mem | cut -d\ -f7)
20 | mem_buffers_mb=$(echo $mem | cut -d\ -f6)
21 | mem_buffcache_mb=$(($mem_cached_mb + $mem_buffers_mb))
22 |
23 | mem_hardused_mb=$(echo "$mem" | awk '{printf("%.2f\n", $3-$5-$6-$7);}')
24 | mem_hardused_ratio=$(echo $mem_hardused_mb $mem_total_mb | awk '{printf("%.2f", $1/$2);}')
25 |
26 | mem_available_mb=$(($mem_free_mb + $mem_buffcache_mb))
27 |
28 | swap_total_mb=$(echo $swap | cut -d\ -f2)
29 | swap_free_mb=$(echo $swap | cut -d\ -f4)
30 | swap_used_mb=$(echo $swap | cut -d\ -f3)
31 | if [ $swap_total_mb -eq 0 ]; then
32 | swap_used_ratio=0
33 | else
34 | swap_used_ratio=$(echo "$swap" | awk '{printf("%.2f\n", $3/$2);}')
35 | fi
36 | else
37 | mem_total_mb=$(meminfo_fmt MemTotal)
38 | mem_available_mb=$(meminfo_fmt MemAvailable)
39 | mem_hardused_mb=$(( $mem_total_mb - $mem_available_mb ))
40 | mem_hardused_ratio=$(echo $mem_hardused_mb $mem_total_mb | awk '{printf("%.2f", $1/$2);}')
41 | mem_buffers_mb=$(meminfo_fmt Buffers)
42 | mem_cached_mb=$(meminfo_fmt Cached)
43 |
44 | swap_total_mb=$(meminfo_fmt SwapTotal)
45 | swap_free_mb=$(meminfo_fmt SwapFree)
46 | swap_used_mb=$(( $swap_total_mb - $swap_free_mb ))
47 | if [ $swap_total_mb -eq 0 ]; then
48 | swap_used_ratio=0
49 | else
50 | swap_used_ratio=$(echo "$swap_used_mb" "$swap_total_mb" | awk '{printf("%.2f\n", $1/$2);}')
51 | fi
52 | fi
53 |
54 | mem_buffcache_mb=$(($mem_cached_mb + $mem_buffers_mb))
55 | mem_buffcache_ratio=$(echo $mem_total_mb $mem_buffcache_mb\
56 | | awk '{printf("%.2f\n", $2/$1);}')
57 | mem_available_ratio=$(echo $mem_total_mb $mem_available_mb\
58 | | awk '{printf("%.2f\n", $2/$1);}')
59 |
60 | echo "MEM_TOTAL_MB:" $mem_total_mb
61 | echo "MEM_AVAILABLE_MB:" $mem_available_mb
62 | echo "MEM_AVAILABLE_RATIO:" $mem_available_ratio
63 | echo "MEM_HARDUSED_MB:" $mem_hardused_mb
64 | echo "MEM_HARDUSED_RATIO:" $mem_hardused_ratio
65 | echo "MEM_BUFFCACHE_MB:" $mem_buffcache_mb
66 | echo "MEM_BUFFCACHE_RATIO:" $mem_buffcache_ratio
67 | echo "SWAP_TOTAL_MB:" $swap_total_mb
68 | echo "SWAP_FREE_MB:" $swap_free_mb
69 | echo "SWAP_USED_MB:" $swap_used_mb
70 | echo "SWAP_USED_RATIO:" $swap_used_ratio
71 |
--------------------------------------------------------------------------------
/etc/scripts/probes/ping.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ -z "$1" ]; then
4 | (>&2 echo "ERROR: give IP to test")
5 | exit 1
6 | fi
7 | dest=$1
8 |
9 | res=$(ping -qAc5 "$dest")
10 |
11 | loss=$(echo "$res" | grep "packets transmitted" | sed -r 's/.* ([0-9]+)%.*/\1/g')
12 | avg=$(echo "$res" | grep "^rtt" | awk -F/ '{print $5}')
13 |
14 | echo LOSS_PERC: $loss
15 | echo AVG_MS: $avg
16 |
--------------------------------------------------------------------------------
/etc/scripts/probes/port.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ -z "$1" ]; then
4 | (>&2 echo "ERROR: give port number (ex: 443)")
5 | exit 1
6 | fi
7 |
8 | nc -z localhost $1 > /dev/null 2>&1
9 | res=$?
10 |
11 | open=0
12 | if [ $res -eq 0 ]; then
13 | open=1
14 | fi
15 |
16 | echo "OPEN:" $open
17 |
--------------------------------------------------------------------------------
/etc/scripts/probes/systemctl_status.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ -z "$1" ]; then
4 | (>&2 echo "ERROR: give unit name (ex: httpd.service)")
5 | exit 1
6 | fi
7 |
8 |
9 | status=$(systemctl is-active "$1")
10 | echo "STATUS:" $status
11 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/Xfennec/nosee
2 |
3 | go 1.18
4 |
5 | require (
6 | github.com/BurntSushi/toml v1.2.0
7 | github.com/Knetic/govaluate v3.0.0+incompatible
8 | github.com/fatih/color v1.13.0
9 | github.com/satori/go.uuid v1.2.0
10 | github.com/urfave/cli v1.22.9
11 | golang.org/x/crypto v0.0.0-20220817201139-bc19a97f63c8
12 | )
13 |
14 | require (
15 | github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d // indirect
16 | github.com/mattn/go-colorable v0.1.9 // indirect
17 | github.com/mattn/go-isatty v0.0.14 // indirect
18 | github.com/russross/blackfriday/v2 v2.0.1 // indirect
19 | github.com/shurcooL/sanitized_anchor_name v1.0.0 // indirect
20 | golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c // indirect
21 | )
22 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
2 | github.com/BurntSushi/toml v1.2.0 h1:Rt8g24XnyGTyglgET/PRUNlrUeu9F5L+7FilkXfZgs0=
3 | github.com/BurntSushi/toml v1.2.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
4 | github.com/Knetic/govaluate v3.0.0+incompatible h1:7o6+MAPhYTCF0+fdvoz1xDedhRb4f6s9Tn1Tt7/WTEg=
5 | github.com/Knetic/govaluate v3.0.0+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0=
6 | github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d h1:U+s90UTSYgptZMwQh2aRr3LuazLJIa+Pg3Kc1ylSYVY=
7 | github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
8 | github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w=
9 | github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=
10 | github.com/mattn/go-colorable v0.1.9 h1:sqDoxXbdeALODt0DAeJCVp38ps9ZogZEAXjus69YV3U=
11 | github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
12 | github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
13 | github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y=
14 | github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
15 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
16 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
17 | github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q=
18 | github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
19 | github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww=
20 | github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
21 | github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
22 | github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
23 | github.com/urfave/cli v1.22.9 h1:cv3/KhXGBGjEXLC4bH0sLuJ9BewaAbpk5oyMOveu4pw=
24 | github.com/urfave/cli v1.22.9/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
25 | golang.org/x/crypto v0.0.0-20220817201139-bc19a97f63c8 h1:GIAS/yBem/gq2MUqgNIzUHW7cJMmx3TGZOrnyYaNQ6c=
26 | golang.org/x/crypto v0.0.0-20220817201139-bc19a97f63c8/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
27 | golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
28 | golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
29 | golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c h1:F1jZWGFhYfh0Ci55sIpILtKKK8p3i2/krTr0H1rg74I=
30 | golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
31 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1 h1:v+OssWQX+hTHEmOBgwxdZxK4zHq3yOs8F9J7mk0PY8E=
32 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
33 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
34 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
35 |
--------------------------------------------------------------------------------
/heartbeat.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bytes"
5 | "fmt"
6 | "io/ioutil"
7 | "os"
8 | "os/exec"
9 | "path"
10 | "path/filepath"
11 | "time"
12 | )
13 |
14 | func heartbeatsList(config *Config) ([]string, error) {
15 | hbDirPath := path.Clean(config.configPath + "/scripts/heartbeats/")
16 | stat, err := os.Stat(hbDirPath)
17 |
18 | if err != nil {
19 | return nil, fmt.Errorf("invalid 'heartbeats' directory '%s': %s", hbDirPath, err)
20 | }
21 |
22 | if !stat.Mode().IsDir() {
23 | return nil, fmt.Errorf("is not a directory '%s'", hbDirPath)
24 | }
25 |
26 | scripts, err := filepath.Glob(hbDirPath + "/*")
27 | if err != nil {
28 | return nil, fmt.Errorf("error listing '%s' directory: %s", hbDirPath, err)
29 | }
30 |
31 | for _, scriptPath := range scripts {
32 | stat, err := os.Stat(scriptPath)
33 |
34 | if err != nil {
35 | return nil, fmt.Errorf("invalid 'script' file '%s': %s", scriptPath, err)
36 | }
37 |
38 | if !stat.Mode().IsRegular() {
39 | return nil, fmt.Errorf("is not a regular 'script' file '%s'", scriptPath)
40 | }
41 |
42 | _, err = ioutil.ReadFile(scriptPath)
43 | if err != nil {
44 | return nil, fmt.Errorf("error reading script file '%s': %s", scriptPath, err)
45 | }
46 | }
47 |
48 | return scripts, nil
49 | }
50 |
51 | func heartbeatExecute(script string) {
52 | varMap := make(map[string]interface{})
53 | varMap["NOSEE_SRV"] = GlobalConfig.Name
54 | varMap["VERSION"] = NoseeVersion
55 | varMap["DATETIME"] = time.Now().Format(time.RFC3339)
56 | varMap["STARTTIME"] = appStartTime.Format(time.RFC3339)
57 | varMap["UPTIME"] = (int)(time.Since(appStartTime).Seconds())
58 |
59 | cmd := exec.Command(script)
60 |
61 | env := os.Environ()
62 | for key, val := range varMap {
63 | env = append(env, fmt.Sprintf("%s=%s", key, InterfaceValueToString(val)))
64 | }
65 | cmd.Env = env
66 |
67 | if cmdOut, err := cmd.CombinedOutput(); err != nil {
68 | Warning.Printf("error running heartbeat '%s': %s: %s", script, err, bytes.TrimSpace(cmdOut))
69 | } else {
70 | Trace.Printf("heartbeat '%s' OK: %s", script, bytes.TrimSpace(cmdOut))
71 | }
72 | }
73 |
74 | func heartbeatsExecute(scripts []string) {
75 | for _, script := range scripts {
76 | heartbeatExecute(script)
77 | }
78 | }
79 |
80 | func heartbeatsSchedule(scripts []string, delay time.Duration) {
81 | go func() {
82 | for {
83 | heartbeatsExecute(scripts)
84 | Info.Printf("heartbeat, %d scripts", len(scripts))
85 | // should check total exec duration and compare to delay, here!
86 | time.Sleep(delay)
87 | }
88 | }()
89 | }
90 |
--------------------------------------------------------------------------------
/host.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "strings"
6 | "time"
7 | )
8 |
9 | // Host is the final form of hosts.d files
10 | type Host struct {
11 | Name string
12 | Filename string
13 | Disabled bool
14 | Classes []string
15 | Connection *Connection
16 | Defaults map[string]interface{}
17 | Tasks []*Task
18 | }
19 |
20 | // HasClass returns true if this Host has this class
21 | func (host *Host) HasClass(class string) bool {
22 | if class == "*" {
23 | return true
24 | }
25 |
26 | for _, hClass := range host.Classes {
27 | if hClass == class {
28 | return true
29 | }
30 | }
31 | return false
32 | }
33 |
34 | // MatchProbeTargets returns true if this Host matches probe's classes
35 | func (host *Host) MatchProbeTargets(probe *Probe) bool {
36 | for _, pTargets := range probe.Targets {
37 | tokens := strings.Split(pTargets, "&")
38 | matched := 0
39 | mustMatch := len(tokens)
40 | for _, token := range tokens {
41 | ttoken := strings.TrimSpace(token)
42 | if host.HasClass(ttoken) {
43 | matched++
44 | }
45 | }
46 | if matched == mustMatch {
47 | return true
48 | }
49 | }
50 | return false
51 | }
52 |
53 | // Schedule will loop forever, creating and executing runs for this host
54 | func (host *Host) Schedule() {
55 | for {
56 | start := time.Now()
57 |
58 | var run Run
59 | run.Host = host
60 | run.StartTime = start
61 |
62 | for _, task := range host.Tasks {
63 | if start.After(task.NextRun) || start.Equal(task.NextRun) {
64 | taskable, err := task.Taskable()
65 | if err != nil {
66 | Trace.Printf("Taskable() failed: %s", err)
67 | run.addError(err)
68 | continue
69 | }
70 | if taskable == false {
71 | Info.Printf("host '%s', paused task '%s'\n", host.Name, task.Probe.Name)
72 | continue
73 | }
74 |
75 | task.ReSchedule(start.Add(task.Probe.Delay))
76 | Info.Printf("host '%s', running task '%s'\n", host.Name, task.Probe.Name)
77 | run.Tasks = append(run.Tasks, task)
78 | }
79 | }
80 |
81 | if len(run.Tasks) > 0 {
82 | run.Go()
83 | run.Alerts()
84 | Trace.Printf("currentFails count = %d\n", len(currentFails))
85 | loggersExec(&run)
86 | }
87 | Info.Printf("host '%s', run ended", host.Name)
88 |
89 | end := time.Now()
90 | dur := end.Sub(start)
91 |
92 | if dur < time.Minute {
93 | remains := time.Minute - dur
94 | time.Sleep(remains)
95 | } else {
96 | run.addError(fmt.Errorf("run duration was too long (%s)", run.Duration))
97 | }
98 | Trace.Printf("(loop %s)\n", host.Name)
99 | }
100 | }
101 |
102 | // TestConnection will return nil if connection to the host was successful
103 | func (host *Host) TestConnection() error {
104 |
105 | //const bootstrap = "bash -s --"
106 |
107 | startTime := time.Now()
108 |
109 | channel := make(chan error, 1)
110 | go func() {
111 | if err := host.Connection.Connect(); err != nil {
112 | channel <- err
113 | }
114 | defer host.Connection.Close()
115 | channel <- nil
116 | }()
117 |
118 | connTimeout := host.Connection.SSHConnTimeWarn * 2
119 |
120 | select {
121 | case err := <-channel:
122 | if err != nil {
123 | return err
124 | }
125 | case <-time.After(connTimeout):
126 | return fmt.Errorf("SSH connection timeout (after %s)", connTimeout)
127 | }
128 |
129 | dialDuration := time.Now().Sub(startTime)
130 |
131 | if dialDuration > host.Connection.SSHConnTimeWarn {
132 | return fmt.Errorf("SSH connection time was too long: %s (ssh_connection_time_warn = %s)", dialDuration, host.Connection.SSHConnTimeWarn)
133 | }
134 |
135 | /*if err := run.prepareTestPipes(); err != nil {
136 | return err
137 | }*/
138 |
139 | /*if err := host.TestRun(bootstrap); err != nil {
140 | return err
141 | }*/
142 | Info.Printf("Connection to '%s' OK (%s)", host.Name, dialDuration)
143 |
144 | return nil
145 | }
146 |
--------------------------------------------------------------------------------
/log.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "io"
6 | "io/ioutil"
7 | "log"
8 | "os"
9 |
10 | "github.com/urfave/cli"
11 | )
12 |
13 | // Loggers for trace, info, warning and error severity
14 | var (
15 | Trace *log.Logger
16 | Info *log.Logger
17 | Warning *log.Logger
18 | Error *log.Logger
19 | )
20 |
21 | func writerCreate(std io.Writer, fd *os.File, quiet bool) io.Writer {
22 | if quiet {
23 | if fd != nil {
24 | if std != ioutil.Discard {
25 | return fd
26 | }
27 | }
28 | return ioutil.Discard
29 | }
30 |
31 | // no log at all for this stream (no std, no file)
32 | if std == ioutil.Discard {
33 | return ioutil.Discard
34 | }
35 | // both
36 | if fd != nil {
37 | return io.MultiWriter(fd, std)
38 | }
39 | return std
40 | }
41 |
42 | // LogInit will initialize loggers
43 | func LogInit(ctx *cli.Context) {
44 | var (
45 | traceHandle io.Writer
46 | infoHandle io.Writer
47 | warningHandle io.Writer
48 | errorHandle io.Writer
49 | )
50 |
51 | level := ctx.String("log-level")
52 | file := ctx.String("log-file")
53 | quiet := ctx.Bool("quiet")
54 | timestamp := ctx.Bool("log-timestamp")
55 |
56 | var (
57 | err error
58 | fd *os.File
59 | )
60 | if file != "" {
61 | fd, err = os.OpenFile(file, os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0640)
62 | if err != nil {
63 | fmt.Fprintf(os.Stderr, "Unable to create log file '%s' (%s)\n", file, err)
64 | os.Exit(1)
65 | }
66 | } else {
67 | fd = nil
68 | }
69 |
70 | switch level {
71 | case "trace":
72 | traceHandle = writerCreate(os.Stdout, fd, quiet)
73 | infoHandle = writerCreate(os.Stdout, fd, quiet)
74 | warningHandle = writerCreate(os.Stdout, fd, quiet)
75 | errorHandle = writerCreate(os.Stderr, fd, quiet)
76 | case "info":
77 | traceHandle = writerCreate(ioutil.Discard, fd, quiet)
78 | infoHandle = writerCreate(os.Stdout, fd, quiet)
79 | warningHandle = writerCreate(os.Stdout, fd, quiet)
80 | errorHandle = writerCreate(os.Stderr, fd, quiet)
81 | case "warning":
82 | traceHandle = writerCreate(ioutil.Discard, fd, quiet)
83 | infoHandle = writerCreate(ioutil.Discard, fd, quiet)
84 | warningHandle = writerCreate(os.Stdout, fd, quiet)
85 | errorHandle = writerCreate(os.Stderr, fd, quiet)
86 | default:
87 | fmt.Fprintf(os.Stderr, "ERROR: invalid log level '%s'\n", level)
88 | os.Exit(1)
89 | }
90 |
91 | var flags = 0
92 | if timestamp {
93 | flags = log.Ldate | log.Ltime
94 | }
95 |
96 | Trace = log.New(traceHandle,
97 | "TRACE: ",
98 | flags|log.Lshortfile)
99 |
100 | Info = log.New(infoHandle,
101 | "INFO: ",
102 | flags)
103 |
104 | Warning = log.New(warningHandle,
105 | "WARNING: ",
106 | flags)
107 |
108 | Error = log.New(errorHandle,
109 | "ERROR: ",
110 | flags)
111 |
112 | Trace.Println("Log init")
113 | }
114 |
--------------------------------------------------------------------------------
/loggers.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bytes"
5 | "fmt"
6 | "io/ioutil"
7 | "os"
8 | "os/exec"
9 | "path"
10 | "path/filepath"
11 | "strings"
12 | )
13 |
14 | func loggersList(config *Config) ([]string, error) {
15 | lgDirPath := path.Clean(config.configPath + "/scripts/loggers/")
16 | stat, err := os.Stat(lgDirPath)
17 |
18 | if err != nil {
19 | return nil, fmt.Errorf("invalid 'loggers' directory '%s': %s", lgDirPath, err)
20 | }
21 |
22 | if !stat.Mode().IsDir() {
23 | return nil, fmt.Errorf("is not a directory '%s'", lgDirPath)
24 | }
25 |
26 | scripts, err := filepath.Glob(lgDirPath + "/*")
27 | if err != nil {
28 | return nil, fmt.Errorf("error listing '%s' directory: %s", lgDirPath, err)
29 | }
30 |
31 | for _, scriptPath := range scripts {
32 | stat, err := os.Stat(scriptPath)
33 |
34 | if err != nil {
35 | return nil, fmt.Errorf("invalid 'script' file '%s': %s", scriptPath, err)
36 | }
37 |
38 | if !stat.Mode().IsRegular() {
39 | return nil, fmt.Errorf("is not a regular 'script' file '%s'", scriptPath)
40 | }
41 |
42 | _, err = ioutil.ReadFile(scriptPath)
43 | if err != nil {
44 | return nil, fmt.Errorf("error reading script file '%s': %s", scriptPath, err)
45 | }
46 | }
47 |
48 | return scripts, nil
49 | }
50 |
51 | func loggersExec(run *Run) {
52 | varMap := make(map[string]interface{})
53 | varMap["NOSEE_SRV"] = GlobalConfig.Name
54 | varMap["VERSION"] = NoseeVersion
55 | varMap["HOST_NAME"] = run.Host.Name
56 | varMap["HOST_FILE"] = run.Host.Filename
57 | varMap["CLASSES"] = strings.Join(run.Host.Classes, ",")
58 |
59 | var valuesBuff bytes.Buffer
60 | for _, result := range run.TaskResults {
61 | for key, val := range result.Values {
62 | // df.toml;DISK_FULLEST_PERC;27
63 | str := fmt.Sprintf("%s;%s;%s\n", result.Task.Probe.Filename, key, val)
64 | valuesBuff.WriteString(str)
65 | }
66 | }
67 |
68 | go func() {
69 | for _, script := range globalLogers {
70 | cmd := exec.Command(script)
71 |
72 | // we inject Values thru stdin:
73 | cmd.Stdin = strings.NewReader(valuesBuff.String())
74 |
75 | env := os.Environ()
76 | for key, val := range varMap {
77 | env = append(env, fmt.Sprintf("%s=%s", key, InterfaceValueToString(val)))
78 | }
79 | cmd.Env = env
80 |
81 | if cmdOut, err := cmd.CombinedOutput(); err != nil {
82 | Warning.Printf("error running logger '%s': %s: %s", script, err, bytes.TrimSpace(cmdOut))
83 | } else {
84 | Trace.Printf("logger '%s' OK: %s", script, bytes.TrimSpace(cmdOut))
85 | }
86 | }
87 | }()
88 | }
89 |
--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bytes"
5 | "fmt"
6 | "math/rand"
7 | "os"
8 | "path"
9 | "path/filepath"
10 | "strings"
11 | "sync"
12 | "time"
13 |
14 | "github.com/BurntSushi/toml"
15 | "github.com/Knetic/govaluate"
16 | "github.com/fatih/color"
17 | "github.com/urfave/cli"
18 | )
19 |
20 | // NoseeVersion in X.Y string format
21 | const NoseeVersion = "0.1"
22 |
23 | var myRand *rand.Rand
24 | var globalAlerts []*Alert
25 | var globalLogers []string
26 | var appStartTime time.Time
27 |
28 | func configurationDirList(inpath string, dirPath string) ([]string, error) {
29 | configPath := path.Clean(dirPath + "/" + inpath)
30 |
31 | stat, err := os.Stat(configPath)
32 |
33 | if err != nil {
34 | return nil, fmt.Errorf("invalid directory '%s': %s", configPath, err)
35 | }
36 |
37 | if !stat.Mode().IsDir() {
38 | return nil, fmt.Errorf("is not a directory '%s'", configPath)
39 | }
40 |
41 | list, err := filepath.Glob(configPath + "/*.toml")
42 | if err != nil {
43 | return nil, fmt.Errorf("error listing '%s' directory: %s", configPath, err)
44 | }
45 |
46 | return list, nil
47 | }
48 |
49 | func createProbes(ctx *cli.Context, config *Config) ([]*Probe, error) {
50 | probesdFiles, errd := configurationDirList("probes.d", config.configPath)
51 | if errd != nil {
52 | return nil, fmt.Errorf("Error: %s", errd)
53 | }
54 |
55 | var probes []*Probe
56 | pNames := make(map[string]string)
57 |
58 | for _, file := range probesdFiles {
59 | var tProbe tomlProbe
60 |
61 | if _, err := toml.DecodeFile(file, &tProbe); err != nil {
62 | return nil, fmt.Errorf("Error decoding %s: %s", file, err)
63 | }
64 |
65 | _, filename := path.Split(file)
66 | probe, err := tomlProbeToProbe(&tProbe, config, filename)
67 | if err != nil {
68 | return nil, fmt.Errorf("Error using %s: %s", file, err)
69 | }
70 |
71 | if probe != nil {
72 | if f, exists := pNames[probe.Name]; exists == true {
73 | return nil, fmt.Errorf("Config error: duplicate name '%s' (%s, %s)", probe.Name, f, file)
74 | }
75 |
76 | probes = append(probes, probe)
77 | pNames[probe.Name] = file
78 | }
79 | }
80 | Info.Printf("probe count = %d\n", len(probes))
81 | return probes, nil
82 | }
83 |
84 | func createAlerts(ctx *cli.Context, config *Config) ([]*Alert, error) {
85 | alertdFiles, err := configurationDirList("alerts.d", config.configPath)
86 | if err != nil {
87 | return nil, fmt.Errorf("Error: %s", err)
88 | }
89 |
90 | var alerts []*Alert
91 | aNames := make(map[string]string)
92 | for _, file := range alertdFiles {
93 | var tAlert tomlAlert
94 |
95 | if _, err := toml.DecodeFile(file, &tAlert); err != nil {
96 | return nil, fmt.Errorf("Error decoding %s: %s", file, err)
97 | }
98 |
99 | alert, err := tomlAlertToAlert(&tAlert, config)
100 | if err != nil {
101 | return nil, fmt.Errorf("Error using %s: %s", file, err)
102 | }
103 |
104 | if alert != nil {
105 | if f, exists := aNames[alert.Name]; exists == true {
106 | return nil, fmt.Errorf("Config error: duplicate name '%s' (%s, %s)", alert.Name, f, file)
107 | }
108 |
109 | alerts = append(alerts, alert)
110 | aNames[alert.Name] = file
111 | }
112 | }
113 | // = alerts
114 | Info.Printf("alert count = %d\n", len(alerts))
115 |
116 | // check if we have at least one "general" alert receiver
117 | generalReceivers := 0
118 | for _, alert := range alerts {
119 | for _, target := range alert.Targets {
120 | if target == GeneralClass || target == "*" {
121 | generalReceivers++
122 | }
123 | }
124 | }
125 | if generalReceivers == 0 {
126 | return nil, fmt.Errorf("Config error: at least one alert must match the 'general' class")
127 | }
128 | return alerts, nil
129 | }
130 |
131 | func createHosts(ctx *cli.Context, config *Config) ([]*Host, error) {
132 | hostsdFiles, errc := configurationDirList("hosts.d", config.configPath)
133 | if errc != nil {
134 | return nil, fmt.Errorf("Error: %s", errc)
135 | }
136 |
137 | var hosts []*Host
138 | hNames := make(map[string]string)
139 |
140 | for _, file := range hostsdFiles {
141 | var tHost tomlHost
142 |
143 | // defaults
144 | tHost.Network.SSHConnTimeWarn.Duration = config.SSHConnTimeWarn
145 |
146 | if _, err := toml.DecodeFile(file, &tHost); err != nil {
147 | return nil, fmt.Errorf("Error decoding %s: %s", file, err)
148 | }
149 |
150 | _, filename := path.Split(file)
151 | host, err := tomlHostToHost(&tHost, config, filename)
152 | if err != nil {
153 | return nil, fmt.Errorf("Error using %s: %s", file, err)
154 | }
155 |
156 | if host != nil {
157 | if f, exists := hNames[host.Name]; exists == true {
158 | return nil, fmt.Errorf("Config error: duplicate name '%s' (%s, %s)", host.Name, f, file)
159 | }
160 |
161 | hosts = append(hosts, host)
162 | hNames[host.Name] = file
163 | }
164 | }
165 | Info.Printf("host count = %d\n", len(hosts))
166 |
167 | if config.doConnTest == true {
168 | Info.Print("Testing connections…")
169 | errors := make(chan error, len(hosts))
170 | for _, host := range hosts {
171 | go func(host *Host) {
172 | if err := host.TestConnection(); err != nil {
173 | errors <- fmt.Errorf("Error connecting %s: %s", host.Name, err)
174 | } else {
175 | errors <- nil
176 | }
177 | }(host)
178 | }
179 | for i := 0; i < len(hosts); i++ {
180 | select {
181 | case err := <-errors:
182 | if err != nil {
183 | return nil, err
184 | }
185 | }
186 | }
187 | }
188 |
189 | probes, err := createProbes(ctx, config)
190 | if err != nil {
191 | return nil, err
192 | }
193 |
194 | globalAlerts, err = createAlerts(ctx, config)
195 | if err != nil {
196 | return nil, err
197 | }
198 |
199 | // update hosts with tasks
200 | var taskCount int
201 | for _, host := range hosts {
202 | for _, probe := range probes {
203 | if host.MatchProbeTargets(probe) {
204 | var task Task
205 | task.Probe = probe
206 | task.PrevRun = time.Now()
207 | task.NextRun = time.Now()
208 | host.Tasks = append(host.Tasks, &task)
209 | taskCount++
210 | }
211 | }
212 | }
213 | Info.Printf("task count = %d\n", taskCount)
214 |
215 | return hosts, nil
216 | }
217 |
218 | func scheduleHosts(hosts []*Host, config *Config) error {
219 | var hostGroup sync.WaitGroup
220 | for i, host := range hosts {
221 | hostGroup.Add(1)
222 | go func(i int, host *Host) {
223 | defer hostGroup.Done()
224 | if config.StartTimeSpreadSeconds > 0 {
225 | // Sleep here, to ease global load
226 | fact := float32(i) / float32(len(hosts)) * 1000 * float32(config.StartTimeSpreadSeconds)
227 | wait := time.Duration(fact) * time.Millisecond
228 | time.Sleep(wait)
229 | }
230 | host.Schedule()
231 | }(i, host)
232 | }
233 |
234 | hostGroup.Wait()
235 | return fmt.Errorf("QUIT: empty wait group, everyone died :(")
236 | }
237 |
238 | func mainDefault(ctx *cli.Context) error {
239 | LogInit(ctx)
240 |
241 | config, err := GlobalConfigRead(ctx.String("config-path"), "nosee.toml")
242 | if err != nil {
243 | Error.Printf("Config (nosee.toml): %s", err)
244 | return cli.NewExitError("", 1)
245 | }
246 | GlobalConfig = config
247 |
248 | heartbeats, err := heartbeatsList(config)
249 | if err != nil {
250 | Error.Println(err)
251 | return cli.NewExitError("", 2)
252 | }
253 |
254 | globalLogers, err = loggersList(config)
255 | if err != nil {
256 | Error.Println(err)
257 | return cli.NewExitError("", 2)
258 | }
259 |
260 | hosts, err := createHosts(ctx, config)
261 | if err != nil {
262 | Error.Println(err)
263 | return cli.NewExitError("", 10)
264 | }
265 |
266 | CurrentFailsCreate()
267 | CurrentFailsLoad()
268 |
269 | if pidPath := ctx.String("pid-file"); pidPath != "" {
270 | pid, err := NewPIDFile(pidPath)
271 | if err != nil {
272 | return cli.NewExitError(fmt.Errorf("Error with pid file: %s", err), 100)
273 | }
274 | defer pid.Remove()
275 | }
276 |
277 | heartbeatsSchedule(heartbeats, config.HeartbeatDelay)
278 |
279 | if err := scheduleHosts(hosts, config); err != nil {
280 | return cli.NewExitError(err, 1)
281 | }
282 |
283 | return nil
284 | }
285 |
286 | func mainCheck(ctx *cli.Context) error {
287 | LogInit(ctx.Parent())
288 |
289 | fmt.Printf("Checking configuration and connections…\n")
290 |
291 | config, err := GlobalConfigRead(ctx.Parent().String("config-path"), "nosee.toml")
292 | if err != nil {
293 | Error.Printf("Config (nosee.toml): %s", err)
294 | return cli.NewExitError("", 1)
295 | }
296 | GlobalConfig = config
297 |
298 | _, err = heartbeatsList(config)
299 | if err != nil {
300 | Error.Println(err)
301 | return cli.NewExitError("", 2)
302 | }
303 |
304 | _, err = loggersList(config)
305 | if err != nil {
306 | Error.Println(err)
307 | return cli.NewExitError("", 2)
308 | }
309 |
310 | _, err = createHosts(ctx, config)
311 | if err != nil {
312 | Error.Println(err)
313 | return cli.NewExitError("", 10)
314 | }
315 | fmt.Println("OK")
316 | return nil
317 | }
318 |
319 | func mainRecap(ctx *cli.Context) error {
320 | LogInit(ctx.Parent())
321 |
322 | config, err := GlobalConfigRead(ctx.Parent().String("config-path"), "nosee.toml")
323 | if err != nil {
324 | Error.Printf("Config (nosee.toml): %s", err)
325 | return cli.NewExitError("", 1)
326 | }
327 | GlobalConfig = config
328 |
329 | // TODO: should probably display heartbeats/loggers in the recap, then?
330 | _, err = heartbeatsList(config)
331 | if err != nil {
332 | Error.Println(err)
333 | return cli.NewExitError("", 2)
334 | }
335 |
336 | hosts, err := createHosts(ctx, config)
337 | if err != nil {
338 | Error.Println(err)
339 | return cli.NewExitError("", 10)
340 | }
341 |
342 | if ctx.Bool("no-color") == true {
343 | color.NoColor = true
344 | }
345 |
346 | red := color.New(color.FgRed).SprintFunc()
347 | yellow := color.New(color.FgYellow).SprintFunc()
348 | green := color.New(color.FgGreen).SprintFunc()
349 | cyan := color.New(color.FgCyan).SprintFunc()
350 |
351 | for _, host := range hosts {
352 | fmt.Printf("%s: %s\n", cyan("Host"), host.Name)
353 | for _, task := range host.Tasks {
354 | fmt.Printf(" %s: %s (%dm)\n", green("Probe"), task.Probe.Name, int(task.Probe.Delay.Minutes()))
355 | for _, check := range task.Probe.Checks {
356 | fmt.Printf(" %s: %s (%s)\n", yellow("Check"), check.Desc, strings.Join(check.Classes, ", "))
357 | var msg AlertMessage
358 | msg.Classes = check.Classes
359 | alertCount := 0
360 | for _, alert := range globalAlerts {
361 | if msg.MatchAlertTargets(alert) {
362 | alertCount++
363 | fmt.Printf(" %s: %s\n", red("Alert"), alert.Name)
364 | }
365 | }
366 | if alertCount == 0 {
367 | fmt.Println(red(" No valid alert for this check!"))
368 | }
369 | }
370 | }
371 | }
372 |
373 | return nil
374 | }
375 |
376 | func mainExpr(ctx *cli.Context) error {
377 | LogInit(ctx.Parent())
378 | if ctx.NArg() == 0 {
379 | err := fmt.Errorf("Error, you must provide a govaluate expression parameter, see https://github.com/Knetic/govaluate for syntax and features")
380 | return cli.NewExitError(err, 1)
381 | }
382 | exprString := ctx.Args().Get(0)
383 |
384 | expr, err := govaluate.NewEvaluableExpressionWithFunctions(exprString, CheckFunctions)
385 | if err != nil {
386 | return cli.NewExitError(err, 2)
387 | }
388 |
389 | if vars := expr.Vars(); len(vars) > 0 {
390 | errv := fmt.Errorf("Undefined variables: %s", strings.Join(vars, ", "))
391 | return cli.NewExitError(errv, 11)
392 | }
393 |
394 | result, err := expr.Evaluate(nil)
395 | if err != nil {
396 | return cli.NewExitError(err, 3)
397 | }
398 |
399 | fmt.Println(InterfaceValueToString(result))
400 | return nil
401 | }
402 |
403 | func mainTest(ctx *cli.Context) error {
404 | LogInit(ctx.Parent())
405 |
406 | config, err := GlobalConfigRead(ctx.Parent().String("config-path"), "nosee.toml")
407 | if err != nil {
408 | Error.Printf("Config (nosee.toml): %s", err)
409 | return cli.NewExitError("", 1)
410 | }
411 | config.loadDisabled = true // WARNING!
412 | config.doConnTest = false // WARNING!
413 | GlobalConfig = config
414 |
415 | hosts, err := createHosts(ctx, config)
416 | if err != nil {
417 | Error.Println(err)
418 | return cli.NewExitError("", 10)
419 | }
420 |
421 | // createHosts already load probes, but we need the full list
422 | // and not only probes targeting our host
423 | probes, err := createProbes(ctx, config)
424 | if err != nil {
425 | Error.Println(err)
426 | return cli.NewExitError("", 10)
427 | }
428 |
429 | requestedHost := ctx.Args().Get(0)
430 | requestedProbe := ctx.Args().Get(1)
431 |
432 | if requestedHost == "" {
433 | var list bytes.Buffer
434 | for _, host := range hosts {
435 | list.WriteString(fmt.Sprintf("- %s (%s)\n", host.Filename, host.Name))
436 | }
437 | Error.Printf("you must give a host Name or hosts.d/ filename:\n%s", list.String())
438 | return cli.NewExitError("", 1)
439 | }
440 |
441 | if requestedProbe == "" {
442 | var list bytes.Buffer
443 | for _, probe := range probes {
444 | list.WriteString(fmt.Sprintf("- %s (%s)\n", probe.Filename, probe.Name))
445 | }
446 | Error.Printf("you must give a probe Name or probes.d/ filename:\n%s", list.String())
447 | return cli.NewExitError("", 1)
448 | }
449 |
450 | // Locate requested host and probe…
451 | var foundHost *Host
452 | for _, host := range hosts {
453 | if host.Name == requestedHost || host.Filename == requestedHost {
454 | foundHost = host
455 | break
456 | }
457 | }
458 | if foundHost == nil {
459 | Error.Printf("can't find '%s' host", requestedHost)
460 | return cli.NewExitError("", 1)
461 | }
462 |
463 | var foundProbe *Probe
464 | for _, probe := range probes {
465 | if probe.Name == requestedProbe || probe.Filename == requestedProbe {
466 | foundProbe = probe
467 | break
468 | }
469 | }
470 | if foundProbe == nil {
471 | Error.Printf("can't find '%s' probe", requestedProbe)
472 | return cli.NewExitError("", 1)
473 | }
474 |
475 | if ctx.Bool("no-color") == true {
476 | color.NoColor = true
477 | }
478 |
479 | red := color.New(color.FgRed).SprintFunc()
480 | yellow := color.New(color.FgYellow).SprintFunc()
481 | green := color.New(color.FgGreen).SprintFunc()
482 | cyan := color.New(color.FgCyan).SprintFunc()
483 | magenta := color.New(color.FgMagenta).SprintFunc()
484 | magentaS := color.New(color.FgMagenta).Add(color.CrossedOut).SprintFunc()
485 |
486 | _, scriptName := path.Split(foundProbe.Script)
487 | fmt.Printf("Testing: host '%s' with probe '%s' (%s, %s) using script '%s'\n", cyan(foundHost.Name), green(foundProbe.Name), foundHost.Filename, foundProbe.Filename, magenta(scriptName))
488 | if foundHost.Disabled == true {
489 | fmt.Printf("Note: the host '%s' is currently %s\n", red(foundHost.Name), red("disabled"))
490 | }
491 | if foundProbe.Disabled == true {
492 | fmt.Printf("Note: the probe '%s' is currently %s\n", red(foundProbe.Name), red("disabled"))
493 | }
494 | if foundHost.MatchProbeTargets(foundProbe) == false {
495 | fmt.Printf("Note: the probe '%s' does %s match host '%s' (see classes and targets)\n", red(foundProbe.Name), red("not"), red(foundHost.Name))
496 | }
497 |
498 | // print defaults
499 | for key, val := range foundProbe.Defaults {
500 | if _, ok := foundHost.Defaults[key]; ok == true {
501 | fmt.Printf("default: %s = %s -> %s (host override)\n",
502 | magenta(key),
503 | magentaS(InterfaceValueToString(val)),
504 | magenta(foundHost.Defaults[key]))
505 | } else {
506 | fmt.Printf("default: %s = %s\n", magenta(key), magenta(InterfaceValueToString(val)))
507 | }
508 | }
509 |
510 | var run Run
511 | run.StartTime = time.Now()
512 | run.Host = foundHost
513 |
514 | var task Task
515 | task.Probe = foundProbe
516 | task.PrevRun = time.Now()
517 | task.NextRun = time.Now()
518 |
519 | run.Tasks = append(run.Tasks, &task)
520 | run.Go()
521 |
522 | if len(run.Errors) > 0 {
523 | for _, err := range run.Errors {
524 | fmt.Printf("run error: %s\n", red(err))
525 | }
526 | return nil
527 | }
528 |
529 | result := run.TaskResults[0]
530 |
531 | for key, val := range result.Values {
532 | fmt.Printf("value: %s = %s\n", yellow(key), yellow(val))
533 | }
534 |
535 | for _, err := range result.Logs {
536 | fmt.Printf("log: %s\n", cyan(err))
537 | }
538 |
539 | if result.ExitStatus == 0 {
540 | fmt.Printf("script exit status: %s (success)\n", green(result.ExitStatus))
541 | } else {
542 | fmt.Printf("script exit status: %s (error)\n", red(result.ExitStatus))
543 | }
544 | fmt.Printf("script duration: %s (+ ssh dial duration: %s)\n", result.Duration, run.DialDuration)
545 |
546 | if run.totalErrorCount() > 0 {
547 | for _, err := range result.Errors {
548 | fmt.Printf("error: %s\n", red(err))
549 | }
550 | return nil
551 | }
552 |
553 | result.DoChecks()
554 |
555 | // DoChecks may add its own errors
556 | for _, err := range result.Errors {
557 | fmt.Printf("error: %s\n", red(err))
558 | }
559 |
560 | for _, check := range result.SuccessfulChecks {
561 | fmt.Printf("check %s: %s: false (no alert)\n", green("GOOD"), green(check.Desc))
562 | }
563 | for _, check := range result.FailedChecks {
564 | fmt.Printf("check %s: %s: true (alert)\n", red("BAD"), red(check.Desc))
565 | }
566 |
567 | return nil
568 | }
569 |
570 | func main() {
571 | // generic (aka "not cli command specific") inits
572 | source := rand.NewSource(time.Now().UnixNano())
573 | myRand = rand.New(source)
574 | CheckFunctionsInit()
575 | appStartTime = time.Now()
576 |
577 | app := cli.NewApp()
578 | app.Usage = "Nosee: a nosey, agentless, easy monitoring tool over SSH"
579 | app.Version = NoseeVersion
580 |
581 | app.Flags = []cli.Flag{
582 | cli.StringFlag{
583 | Name: "config-path, c",
584 | Value: "/etc/nosee/",
585 | Usage: "configuration directory `PATH`",
586 | EnvVar: "NOSEE_CONFIG",
587 | },
588 | cli.StringFlag{
589 | Name: "log-level, l",
590 | Value: "warning",
591 | Usage: "log `level` verbosity (trace, info, warning)",
592 | },
593 | cli.StringFlag{
594 | Name: "log-file, f",
595 | Usage: "log file to `FILE` (append)",
596 | },
597 | cli.BoolFlag{
598 | Name: "log-timestamp, t",
599 | Usage: "add timestamp to log output",
600 | },
601 | cli.BoolFlag{
602 | Name: "quiet, q",
603 | Usage: "no stdout/err output (except launch errors)",
604 | },
605 | cli.StringFlag{
606 | Name: "pid-file, p",
607 | Usage: "create pid `FILE`",
608 | },
609 | }
610 |
611 | app.Action = mainDefault
612 |
613 | app.Commands = []cli.Command{
614 | {
615 | Name: "check",
616 | Aliases: []string{"c"},
617 | Usage: "Check configuration files and connections",
618 | ArgsUsage: " ",
619 | Action: mainCheck,
620 | },
621 | {
622 | Name: "recap",
623 | Aliases: []string{"r"},
624 | Usage: "Recap configuration",
625 | ArgsUsage: " ",
626 | Action: mainRecap,
627 | Flags: []cli.Flag{
628 | cli.BoolFlag{
629 | Name: "no-color",
630 | Usage: "disable color output ",
631 | },
632 | },
633 | },
634 | {
635 | Name: "expr",
636 | Aliases: []string{"e"},
637 | Usage: "Test 'govaluate' expression (See Checks 'If')",
638 | ArgsUsage: "expression",
639 | Action: mainExpr,
640 | },
641 | {
642 | Name: "test",
643 | Aliases: []string{"t"},
644 | Usage: "Test any Probe on a any Host",
645 | ArgsUsage: "host probe",
646 | Description: "use Name or filename.toml (without path) for host and probe (disabled or not, targeted or not)",
647 | Action: mainTest,
648 | Flags: []cli.Flag{
649 | cli.BoolFlag{
650 | Name: "no-color",
651 | Usage: "disable color output ",
652 | },
653 | },
654 | },
655 | }
656 |
657 | app.Run(os.Args)
658 | }
659 |
--------------------------------------------------------------------------------
/pid.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "io/ioutil"
6 | "os"
7 | "path/filepath"
8 | "strconv"
9 | "strings"
10 | "syscall"
11 | )
12 |
13 | // PIDFile stores (few) informations about a PID file
14 | type PIDFile struct {
15 | Path string
16 | }
17 |
18 | func checkPIDFileExists(path string) error {
19 | if pidByte, err := ioutil.ReadFile(path); err == nil {
20 | pidString := strings.TrimSpace(string(pidByte))
21 | if pid, err := strconv.Atoi(pidString); err == nil {
22 | if pidIsRunning(pid) {
23 | return fmt.Errorf("pid file '%s' already exists", path)
24 | }
25 | }
26 | }
27 | return nil
28 | }
29 |
30 | // NewPIDFile create a PIDFile if there no other instance already running
31 | func NewPIDFile(path string) (*PIDFile, error) {
32 | if err := checkPIDFileExists(path); err != nil {
33 | return nil, err
34 | }
35 | if err := os.MkdirAll(filepath.Dir(path), os.FileMode(0755)); err != nil {
36 | return nil, err
37 | }
38 | if err := ioutil.WriteFile(path, []byte(fmt.Sprintf("%d", os.Getpid())), 0644); err != nil {
39 | return nil, err
40 | }
41 |
42 | return &PIDFile{Path: path}, nil
43 | }
44 |
45 | // Remove deletes the PIDFile
46 | func (file PIDFile) Remove() error {
47 | return os.Remove(file.Path)
48 | }
49 |
50 | func pidIsRunning(pid int) bool {
51 | process, err := os.FindProcess(pid)
52 | if err != nil {
53 | return false
54 | }
55 |
56 | err = process.Signal(syscall.Signal(0))
57 |
58 | if err != nil && err.Error() == "no such process" {
59 | return false
60 | }
61 |
62 | if err != nil && err.Error() == "os: process already finished" {
63 | return false
64 | }
65 |
66 | return true
67 | }
68 |
--------------------------------------------------------------------------------
/probe.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "time"
5 |
6 | "github.com/Knetic/govaluate"
7 | )
8 |
9 | // Check holds final informations about a check of a probes.d file
10 | type Check struct {
11 | Index int
12 | Desc string
13 | If *govaluate.EvaluableExpression
14 | Classes []string
15 | NeededFailures int
16 | NeededSuccesses int
17 | }
18 |
19 | // Probe is the final form of probes.d files
20 | type Probe struct {
21 | Name string
22 | Filename string
23 | Disabled bool
24 | Script string
25 | Targets []string
26 | Delay time.Duration
27 | Timeout time.Duration
28 | Arguments string
29 | Defaults map[string]interface{}
30 | Checks []*Check
31 | RunIf *govaluate.EvaluableExpression
32 | }
33 |
34 | // MissingDefaults return a slice with names of defaults used in Check 'If'
35 | // expressions and Probe script arguments. The slice length is 0 if no
36 | // missing default were found.
37 | func (probe *Probe) MissingDefaults() []string {
38 | missing := make(map[string]bool)
39 |
40 | for _, check := range probe.Checks {
41 | for _, name := range check.If.Vars() {
42 | if IsAllUpper(name) {
43 | continue
44 | }
45 | if _, ok := probe.Defaults[name]; ok != true {
46 | missing[name] = true
47 | }
48 | }
49 | }
50 |
51 | vars := StringFindVariables(probe.Arguments)
52 | for _, name := range vars {
53 | if _, ok := probe.Defaults[name]; ok != true {
54 | missing[name] = true
55 | }
56 | }
57 |
58 | // map to slice:
59 | var missSlice []string
60 | for key := range missing {
61 | missSlice = append(missSlice, key)
62 | }
63 |
64 | return missSlice
65 | }
66 |
--------------------------------------------------------------------------------
/run.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "time"
6 | )
7 |
8 | // Run is a list of Tasks on Host, including task results
9 | type Run struct {
10 | Host *Host
11 | Tasks []*Task
12 | StartTime time.Time
13 | Duration time.Duration
14 | DialDuration time.Duration
15 | TaskResults []*TaskResult
16 | Errors []error
17 | }
18 |
19 | // Dump prints Run informations on the screen for debugging purposes
20 | func (run *Run) Dump() {
21 | fmt.Printf("-\n")
22 | fmt.Printf("- host: %s\n", run.Host.Name)
23 | fmt.Printf("- %d task(s)\n", len(run.Tasks))
24 | fmt.Printf("- start: %s\n", run.StartTime)
25 | fmt.Printf("- duration: %s\n", run.Duration)
26 | fmt.Printf("- ssh dial duration: %s\n", run.DialDuration)
27 | for _, err := range run.Errors {
28 | fmt.Printf("-e %s\n", err)
29 | }
30 | for _, res := range run.TaskResults {
31 | fmt.Printf("-- task probe: %s\n", res.Task.Probe.Name)
32 | fmt.Printf("-- start time: %s\n", res.StartTime)
33 | fmt.Printf("-- duration: %s\n", res.Duration)
34 | fmt.Printf("-- exit status: %d\n", res.ExitStatus)
35 | fmt.Printf("-- next task run: %s\n", res.Task.NextRun)
36 | for key, val := range res.Values {
37 | fmt.Printf("-v- '%s' = '%s'\n", key, val)
38 | }
39 | for _, err := range res.Errors {
40 | fmt.Printf("-e- %s\n", err)
41 | }
42 | for _, check := range res.FailedChecks {
43 | fmt.Printf("-F- %s\n", check.Desc)
44 | }
45 | for _, log := range res.Logs {
46 | fmt.Printf("-l- %s\n", log)
47 | }
48 | }
49 | }
50 |
51 | func (run *Run) addError(err error) {
52 | Info.Printf("Run error: %s (host '%s')", err, run.Host.Name)
53 | run.Errors = append(run.Errors, err)
54 | }
55 |
56 | func (run *Run) currentTaskResult() *TaskResult {
57 | if len(run.TaskResults) == 0 {
58 | return nil
59 | }
60 | return run.TaskResults[len(run.TaskResults)-1]
61 | }
62 |
63 | func (run *Run) totalErrorCount() int {
64 | total := len(run.Errors)
65 | for _, taskResult := range run.TaskResults {
66 | total += len(taskResult.Errors)
67 | total += len(taskResult.FailedChecks)
68 | }
69 | return total
70 | }
71 |
72 | func (run *Run) totalTaskResultErrorCount() int {
73 | total := 0
74 | for _, taskResult := range run.TaskResults {
75 | total += len(taskResult.Errors)
76 | }
77 | return total
78 | }
79 |
80 | // ReSchedule will force all Run tasks to run on next time step
81 | func (run *Run) ReSchedule() {
82 | for _, task := range run.Tasks {
83 | task.NextRun = task.PrevRun
84 | }
85 | Info.Printf("re-scheduling all tasks for '%s'\n", run.Host.Name)
86 | }
87 |
88 | // ReScheduleFailedTasks will force all Run failed tasks to run on next time step
89 | func (run *Run) ReScheduleFailedTasks() {
90 | for _, task := range run.Tasks {
91 | for _, cf := range currentFails {
92 | if cf.RelatedTask == task || cf.RelatedTTask == task {
93 | task.ReSchedule(time.Now())
94 | Info.Printf("re-scheduling task '%s'\n", task.Probe.Name)
95 | }
96 | }
97 | }
98 | }
99 |
100 | // DoChecks will evaluate checks on every TaskResult of the Run
101 | func (run *Run) DoChecks() {
102 | for _, taskResult := range run.TaskResults {
103 | taskResult.DoChecks()
104 | }
105 | }
106 |
107 | // Go will execute the Run
108 | func (run *Run) Go() {
109 | const bootstrap = "bash -s --"
110 |
111 | timeout := time.Second * 59
112 | timeoutChan := time.After(timeout)
113 |
114 | run.StartTime = time.Now()
115 | defer func() {
116 | run.Duration = time.Now().Sub(run.StartTime)
117 | }()
118 |
119 | if err := run.Host.Connection.Connect(); err != nil {
120 | run.addError(err)
121 | return
122 | }
123 | defer run.Host.Connection.Close()
124 |
125 | run.DialDuration = time.Now().Sub(run.StartTime)
126 | if run.DialDuration > run.Host.Connection.SSHConnTimeWarn {
127 | run.addError(fmt.Errorf("SSH connection time was too long: %s (ssh_connection_time_warn = %s)", run.DialDuration, run.Host.Connection.SSHConnTimeWarn))
128 | return
129 | }
130 |
131 | if err := run.preparePipes(); err != nil {
132 | run.addError(err)
133 | return
134 | }
135 |
136 | ended := make(chan int, 1)
137 |
138 | go func() {
139 | if err := run.Host.Connection.Session.Run(bootstrap); err != nil {
140 | run.addError(err)
141 | }
142 | ended <- 1
143 | }()
144 |
145 | select {
146 | case <-ended:
147 | // nice
148 | case <-timeoutChan:
149 | run.addError(fmt.Errorf("timeout for this run, after %s", timeout))
150 | Trace.Println("run timeout")
151 | }
152 | }
153 |
--------------------------------------------------------------------------------
/run_alerts.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bytes"
5 | "strconv"
6 | )
7 |
8 | // AlertsForRun creates a currentFail entry for this Run (if not already done)
9 | // and rings corresponding alerts
10 | func (run *Run) AlertsForRun() {
11 | var bbuf bytes.Buffer
12 | bbuf.WriteString(run.Host.Name)
13 | // We now limit to one Fail per host, otherwise we may flood
14 | // the user with Errors (ex: "alert, ssh connection 11s", then the same
15 | // with 11.5s, etc). If there's an issue with a host, you have to fix it
16 | // to get the others (if any left), it makes sense.
17 | /*for _, err := range run.Errors {
18 | bbuf.WriteString(err.Error())
19 | }*/
20 | hash := MD5Hash(bbuf.String())
21 |
22 | currentFail := CurrentFailGetAndInc(hash)
23 | currentFail.RelatedHost = run.Host
24 |
25 | if currentFail.FailCount > 1 {
26 | return
27 | }
28 |
29 | message := AlertMessageCreateForRun(AlertBad, run, currentFail)
30 | message.RingAlerts()
31 | }
32 |
33 | // AlertsForTasks creates currentFail entries for each failed TaskResults
34 | // (if not already done) and rings corresponding alerts
35 | func (run *Run) AlertsForTasks() {
36 | for _, taskRes := range run.TaskResults {
37 | if len(taskRes.Errors) > 0 {
38 | var bbuf bytes.Buffer
39 | bbuf.WriteString(run.Host.Name + taskRes.Task.Probe.Name)
40 | for _, err := range taskRes.Errors {
41 | bbuf.WriteString(err.Error())
42 | }
43 | hash := MD5Hash(bbuf.String())
44 |
45 | currentFail := CurrentFailGetAndInc(hash)
46 | currentFail.RelatedTTask = taskRes.Task
47 | if currentFail.FailCount > 1 {
48 | return
49 | }
50 |
51 | message := AlertMessageCreateForTaskResult(AlertBad, run, taskRes, currentFail)
52 | message.RingAlerts()
53 | }
54 | }
55 | }
56 |
57 | // AlertsForChecks creates currentFail entries for every FailedChecks of
58 | // every TaskResults (if not already done) and rings corresponding alerts
59 | func (run *Run) AlertsForChecks() {
60 | // Failures
61 | for _, taskRes := range run.TaskResults {
62 | for _, check := range taskRes.FailedChecks {
63 | Info.Printf("task '%s', check '%s' failed (%s)\n", taskRes.Task.Probe.Name, check.Desc, run.Host.Name)
64 |
65 | hash := MD5Hash(run.Host.Name + taskRes.Task.Probe.Name + strconv.Itoa(check.Index))
66 | currentFail := CurrentFailGetAndInc(hash)
67 | currentFail.RelatedTask = taskRes.Task
68 | if currentFail.FailCount != check.NeededFailures {
69 | continue // not yet / already done
70 | }
71 |
72 | message := AlertMessageCreateForCheck(AlertBad, run, taskRes, check, currentFail)
73 | message.RingAlerts()
74 | }
75 | }
76 |
77 | // Successes
78 | for _, taskRes := range run.TaskResults {
79 | for _, check := range taskRes.SuccessfulChecks {
80 | hash := MD5Hash(run.Host.Name + taskRes.Task.Probe.Name + strconv.Itoa(check.Index))
81 | // we had a failure for that?
82 | if currentFail := CurrentFailGetAndDec(hash); currentFail != nil {
83 | if currentFail.OkCount == check.NeededSuccesses {
84 | Info.Printf("task '%s', check '%s' is now OK (%s)\n", taskRes.Task.Probe.Name, check.Desc, run.Host.Name)
85 | // send the good news (if the bad one was sent) and delete this currentFail
86 | if currentFail.FailCount >= check.NeededFailures {
87 | message := AlertMessageCreateForCheck(AlertGood, run, taskRes, check, currentFail)
88 | message.RingAlerts()
89 | }
90 | CurrentFailDelete(hash)
91 | }
92 | }
93 | }
94 | }
95 | }
96 |
97 | // Alerts checks for Run failures, Task failures and Check
98 | // failures and call corresponding AlertsFor*() functions
99 | func (run *Run) Alerts() {
100 | run.ClearAnyCurrentTasksFails()
101 |
102 | if run.totalErrorCount() == 0 {
103 | run.ClearAnyCurrentRunFails()
104 | run.DoChecks()
105 | if run.totalTaskResultErrorCount() > 0 {
106 | Info.Printf("found some 'tasks' error(s) (post-checks)\n")
107 | run.AlertsForTasks()
108 | } else {
109 | // ideal path, let's see if there's any check errors ?
110 | run.AlertsForChecks()
111 | }
112 | } else { // run & tasks errors
113 | if len(run.Errors) > 0 {
114 | Info.Printf("found some 'run' error(s)\n")
115 | run.AlertsForRun()
116 | run.ReSchedule()
117 | } else {
118 | Info.Printf("found some 'tasks' error(s)\n")
119 | run.AlertsForTasks()
120 | }
121 | }
122 |
123 | run.ReScheduleFailedTasks()
124 | }
125 |
126 | // ClearAnyCurrentRunFails deletes any currentFail for the Run (same Host)
127 | // and then rings GOOD alerts
128 | func (run *Run) ClearAnyCurrentRunFails() {
129 | for hash, cf := range currentFails {
130 | if cf.RelatedHost == run.Host {
131 | // there was a time when we were only ringing one message
132 | // for the whole host, but it's compliant with UniqueID idea
133 | message := AlertMessageCreateForRun(AlertGood, run, cf)
134 | message.RingAlerts()
135 | CurrentFailDelete(hash)
136 | }
137 | }
138 | }
139 |
140 | // ClearAnyCurrentTasksFails deletes any currentFail for Run Tasks
141 | // and then rings GOOD alerts
142 | func (run *Run) ClearAnyCurrentTasksFails() {
143 | for _, taskRes := range run.TaskResults {
144 | if len(taskRes.Errors) == 0 {
145 | for hash, cf := range currentFails {
146 | if taskRes.Task == cf.RelatedTTask {
147 | message := AlertMessageCreateForTaskResult(AlertGood, run, taskRes, cf)
148 | message.RingAlerts()
149 | CurrentFailDelete(hash)
150 | }
151 | }
152 | }
153 | }
154 | }
155 |
--------------------------------------------------------------------------------
/run_streams.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bufio"
5 | "fmt"
6 | "io"
7 | "os"
8 | "path/filepath"
9 | "strconv"
10 | "strings"
11 | "time"
12 | )
13 |
14 | func (run *Run) readStdout(std io.Reader, exitStatus chan int) {
15 | scanner := bufio.NewScanner(std)
16 |
17 | for scanner.Scan() {
18 | text := scanner.Text()
19 | result := run.currentTaskResult()
20 |
21 | Trace.Printf("stdout=%s (%s)\n", text, run.Host.Name)
22 |
23 | if len(text) > 2 && text[0:2] == "__" {
24 | parts := strings.Split(text, "=")
25 | switch parts[0] {
26 | case "__EXIT":
27 | if len(parts) != 2 {
28 | run.addError(fmt.Errorf("Invalid __EXIT: %s", text))
29 | continue
30 | }
31 | status, err := strconv.Atoi(parts[1])
32 | if err != nil {
33 | run.addError(fmt.Errorf("Invalid __EXIT value: %s", text))
34 | continue
35 | }
36 | Trace.Printf("EXIT detected: %s (status %d, %s)\n", text, status, run.Host.Name)
37 | exitStatus <- status
38 | default:
39 | run.addError(fmt.Errorf("Unknown keyword: %s", text))
40 | }
41 | continue
42 | }
43 |
44 | if len(text) > 1 && text[0:1] == "#" {
45 | result.addLog(text)
46 | continue
47 | }
48 |
49 | sep := strings.Index(text, ":")
50 |
51 | if sep == -1 || sep == 0 {
52 | result.addError(fmt.Errorf("invalid script output: '%s'", text))
53 | continue
54 | }
55 |
56 | paramName := strings.TrimSpace(text[0:sep])
57 | if !IsValidTokenName(paramName) {
58 | result.addError(fmt.Errorf("invalid parameter name: '%s' (not a valid token name): '%s'", paramName, text))
59 | continue
60 | }
61 | if !IsAllUpper(paramName) {
62 | result.addError(fmt.Errorf("invalid parameter name: '%s' (upper case needed): '%s'", paramName, text))
63 | continue
64 | }
65 |
66 | if _, exists := result.Values[paramName]; exists == true {
67 | result.addError(fmt.Errorf("parameter '%s' defined multiple times", paramName))
68 | continue
69 | }
70 |
71 | value := strings.TrimSpace(text[sep+1:])
72 | if len(value) == 0 {
73 | result.addError(fmt.Errorf("empty value for parameter '%s'", paramName))
74 | continue
75 | }
76 |
77 | result.Values[paramName] = value
78 | }
79 |
80 | if err := scanner.Err(); err != nil {
81 | run.addError(fmt.Errorf("Error reading stdout: %s", err))
82 | }
83 | }
84 |
85 | func (run *Run) readStderr(std io.Reader) {
86 | scanner := bufio.NewScanner(std)
87 |
88 | for scanner.Scan() {
89 | text := scanner.Text()
90 | file := filepath.Base(run.currentTaskResult().Task.Probe.Script)
91 | Trace.Printf("stderr=%s\n", text)
92 | run.currentTaskResult().addError(fmt.Errorf("%s, stderr: %s", file, text))
93 | }
94 |
95 | if err := scanner.Err(); err != nil {
96 | run.addError(fmt.Errorf("Error reading stderr: %s", err))
97 | return // !!!
98 | }
99 | }
100 |
101 | // scripts -> ssh
102 | func (run *Run) stdinInject(out io.WriteCloser, exitStatus chan int) {
103 |
104 | defer out.Close()
105 |
106 | // "pkill" dependency or Linux "ps"? (ie: not Cygwin)
107 | _, err := out.Write([]byte("export __MAIN_PID=$$\nfunction __kill_subshells() { pkill -TERM -P $__MAIN_PID cat; }\nexport -f __kill_subshells\n"))
108 | if err != nil {
109 | run.addError(fmt.Errorf("Error writing (setup parent bash): %s", err))
110 | return
111 | }
112 |
113 | for num, task := range run.Tasks {
114 |
115 | var result TaskResult
116 | run.TaskResults = append(run.TaskResults, &result)
117 | result.StartTime = time.Now()
118 | result.Task = task
119 | result.Host = run.Host
120 | result.ExitStatus = -1
121 | result.Values = make(map[string]string)
122 |
123 | var scanner *bufio.Scanner
124 |
125 | file, erro := os.Open(task.Probe.Script)
126 | if erro != nil {
127 | result.addError(fmt.Errorf("Failed to open script: %s", erro))
128 | continue
129 | }
130 | defer file.Close()
131 |
132 | scanner = bufio.NewScanner(file)
133 |
134 | args := task.Probe.Arguments
135 | params := make(map[string]interface{})
136 | for key, val := range task.Probe.Defaults {
137 | params[key] = val
138 | }
139 | // … and let's override defaults with host's ones
140 | for key, val := range run.Host.Defaults {
141 | params[key] = val
142 | }
143 | args = StringExpandVariables(args, params)
144 |
145 | // cat is needed to "focus" stdin only on the child bash
146 | str := fmt.Sprintf("cat | __SCRIPT_ID=%d bash -s -- %s ; echo __EXIT=$?\n", num, args)
147 | Trace.Printf("child(%s)=%s", run.Host.Name, str)
148 |
149 | _, err = out.Write([]byte(str))
150 | if err != nil {
151 | run.addError(fmt.Errorf("Error writing (starting child bash): %s", err))
152 | return
153 | }
154 |
155 | // no newline so we dont change line numbers
156 | _, err = out.Write([]byte("trap __kill_subshells EXIT ; "))
157 | if err != nil {
158 | run.addError(fmt.Errorf("Error writing (init child bash): %s", err))
159 | return
160 | }
161 |
162 | for scanner.Scan() {
163 | text := scanner.Text()
164 | Trace.Printf("stdin=%s (%s)\n", text, run.Host.Name)
165 | _, errw := out.Write([]byte(text + "\n"))
166 | if errw != nil {
167 | run.addError(fmt.Errorf("Error writing: %s", errw))
168 | return
169 | }
170 | }
171 |
172 | Trace.Printf("killing subshell (%s)\n", run.Host.Name)
173 | _, err = out.Write([]byte("__kill_subshells\n"))
174 | if err != nil {
175 | run.addError(fmt.Errorf("Error writing (while killing subshell): %s", err))
176 | return
177 | }
178 |
179 | if err := scanner.Err(); err != nil {
180 | run.addError(fmt.Errorf("Error scanner: %s", err))
181 | return
182 | }
183 |
184 | status := <-exitStatus
185 | result.ExitStatus = status
186 | if status != 0 {
187 | result.addError(fmt.Errorf("detected non-zero exit status: %d", status))
188 | }
189 |
190 | result.Duration = time.Now().Sub(result.StartTime)
191 | if result.Duration > result.Task.Probe.Timeout {
192 | result.addError(fmt.Errorf("task duration was too long (timeout is %s)", result.Task.Probe.Timeout))
193 | }
194 | }
195 | }
196 |
197 | func (run *Run) preparePipes() error {
198 | exitStatus := make(chan int)
199 | session := run.Host.Connection.Session
200 |
201 | stdin, err := session.StdinPipe()
202 | if err != nil {
203 | return fmt.Errorf("Unable to setup stdin for session: %v", err)
204 | }
205 | go run.stdinInject(stdin, exitStatus)
206 |
207 | stdout, err := session.StdoutPipe()
208 | if err != nil {
209 | return fmt.Errorf("Unable to setup stdout for session: %v", err)
210 | }
211 | go run.readStdout(stdout, exitStatus)
212 |
213 | stderr, err := session.StderrPipe()
214 | if err != nil {
215 | return fmt.Errorf("Unable to setup stderr for session: %v", err)
216 | }
217 | go run.readStderr(stderr)
218 |
219 | return nil
220 | }
221 |
--------------------------------------------------------------------------------
/ssh.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bufio"
5 | "bytes"
6 | "crypto/hmac"
7 | "crypto/sha1"
8 | "crypto/x509"
9 | "encoding/base64"
10 | "encoding/pem"
11 | "fmt"
12 | "io"
13 | "io/ioutil"
14 | "net"
15 | "os"
16 | "path/filepath"
17 | "strings"
18 | "time"
19 |
20 | "golang.org/x/crypto/ssh"
21 | "golang.org/x/crypto/ssh/agent"
22 | "golang.org/x/crypto/ssh/knownhosts"
23 | )
24 |
25 | // Connection is the final form of connection informations of hosts.d files
26 | type Connection struct {
27 | User string
28 | Auths []ssh.AuthMethod
29 | Host string
30 | Port int
31 | Ciphers []string
32 | SSHConnTimeWarn time.Duration
33 | Session *ssh.Session
34 | Client *ssh.Client
35 | }
36 |
37 | // Close will clone the connection and the session
38 | func (connection *Connection) Close() error {
39 | var (
40 | sessionError error
41 | clientError error
42 | )
43 |
44 | Trace.Printf("SSH closing connection (%s)\n", connection.Host)
45 |
46 | if connection.Session != nil {
47 | sessionError = connection.Session.Close()
48 | }
49 | if connection.Client != nil {
50 | clientError = connection.Client.Close()
51 | }
52 |
53 | if clientError != nil {
54 | return clientError
55 | }
56 |
57 | return sessionError
58 | }
59 |
60 | // knownHostHash hash hostname using salt64 like ssh is
61 | // doing for "hashed" .ssh/known_hosts files
62 | func knownHostHash(hostname string, salt64 string) string {
63 | buffer, err := base64.StdEncoding.DecodeString(salt64)
64 | if err != nil {
65 | return ""
66 | }
67 | h := hmac.New(sha1.New, buffer)
68 | h.Write([]byte(hostname))
69 | res := h.Sum(nil)
70 |
71 | hash := base64.StdEncoding.EncodeToString(res)
72 | return hash
73 | }
74 |
75 | // Implements ssh.HostKeyCallback which is now required due to CVE-2017-3204
76 | // see https://github.com/golang/go/issues/29286 for the ecdsa-sha2-nistp256 part
77 | // ("If ClientConfig.HostKeyAlgorithms is not set, a reasonable default is set for acceptable host key type")
78 | func hostKeyChecker(hostname string, remote net.Addr, key ssh.PublicKey) error {
79 | path := filepath.Join(os.Getenv("HOME"), ".ssh", "known_hosts")
80 | hostKeyCallback, err := knownhosts.New(path)
81 | if err != nil {
82 | return err
83 | }
84 |
85 | err = hostKeyCallback(hostname, remote, key)
86 | if err != nil {
87 | return fmt.Errorf("%s, use ssh client to manually connect to %s (you may have to specify algo: ssh -o HostKeyAlgorithms=ecdsa-sha2-nistp256 …)", err, hostname)
88 | }
89 | return nil
90 | }
91 |
92 | // Old ssh.HostKeyCallback implementation
93 | // We parse $HOME/.ssh/known_hosts and check for a matching key + hostname
94 | // Supported : Hashed hostnames, revoked keys (or any other marker), non-standard ports
95 | // Unsupported yet: patterns (*? wildcards)
96 | // This code is temporary, x/crypto/ssh will probably provide something similar. One day.
97 | func _hostKeyChecker(hostname string, remote net.Addr, key ssh.PublicKey) error {
98 | path := filepath.Join(os.Getenv("HOME"), ".ssh", "known_hosts")
99 | file, err := os.Open(path)
100 | if err != nil {
101 | return fmt.Errorf("opening '%s': %s", path, err)
102 | }
103 | defer file.Close()
104 |
105 | // remove standard port if given, add square brackets for non-standard ones
106 | hp := strings.Split(hostname, ":")
107 | if len(hp) == 2 {
108 | if hp[1] == "22" {
109 | hostname = hp[0]
110 | } else {
111 | hostname = "[" + hp[0] + "]:" + hp[1]
112 | }
113 | }
114 |
115 | scanner := bufio.NewScanner(file)
116 | for scanner.Scan() {
117 | marker, hosts, hostKey, _, _, err := ssh.ParseKnownHosts(scanner.Bytes())
118 | if err == io.EOF {
119 | continue
120 | }
121 | if err != nil {
122 | return fmt.Errorf("parsing '%s': %s", path, err)
123 | }
124 | if marker != "" {
125 | continue // @cert-authority or @revoked
126 | }
127 | fmt.Printf("%s VS %s", key.Marshal(), hostKey.Marshal())
128 | if bytes.Equal(key.Marshal(), hostKey.Marshal()) {
129 | for _, host := range hosts {
130 | if len(host) > 1 && host[0:1] == "|" {
131 | parts := strings.Split(host, "|")
132 | if parts[1] != "1" {
133 | Trace.Printf("'%s': only type 1 is supported for hashed hosts", path)
134 | continue
135 | }
136 | if knownHostHash(hostname, parts[2]) == parts[3] {
137 | Trace.Printf("successfully found a matching key in '%s' for (hashed) '%s'", path, hostname)
138 | return nil
139 | }
140 | } else {
141 | if host == hostname {
142 | Trace.Printf("successfully found a matching key in '%s' for '%s'", path, hostname)
143 | return nil
144 | }
145 | }
146 | }
147 | Info.Printf("searching '%s' in '%s': found a matching key, but not with exact hostname(s): %s (patterns are not supported yet)", hostname, path, strings.Join(hosts, ", "))
148 | }
149 | }
150 |
151 | return fmt.Errorf("can't find matching key in '%s' for '%s' (try 'ssh %s' to add it?)", path, hostname, hostname)
152 | }
153 |
154 | func hostKeyBilndTrustChecker(hostname string, remote net.Addr, key ssh.PublicKey) error {
155 | return nil
156 | }
157 |
158 | // Connect will dial SSH server and open a session
159 | func (connection *Connection) Connect() error {
160 | sshConfig := &ssh.ClientConfig{
161 | User: connection.User,
162 | Auth: connection.Auths,
163 | }
164 |
165 | if GlobalConfig.SSHBlindTrust == true {
166 | sshConfig.HostKeyCallback = hostKeyBilndTrustChecker
167 | } else {
168 | sshConfig.HostKeyCallback = hostKeyChecker
169 | }
170 |
171 | if len(connection.Ciphers) > 0 {
172 | sshConfig.Config = ssh.Config{
173 | Ciphers: connection.Ciphers,
174 | }
175 | }
176 |
177 | dial, err := ssh.Dial("tcp", fmt.Sprintf("%s:%d", connection.Host, connection.Port), sshConfig)
178 | Trace.Printf("SSH connection to %s@%s:%d\n", connection.User, connection.Host, connection.Port)
179 | if err != nil {
180 | return fmt.Errorf("Failed to dial: %s", err)
181 | }
182 | connection.Client = dial
183 |
184 | session, err := dial.NewSession()
185 | if err != nil {
186 | return fmt.Errorf("Failed to create session: %s", err)
187 | }
188 | connection.Session = session
189 |
190 | return nil
191 | }
192 |
193 | // PublicKeyFile returns an AuthMethod using a private key file
194 | func PublicKeyFile(file string) ssh.AuthMethod {
195 | buffer, err := ioutil.ReadFile(file)
196 | if err != nil {
197 | return nil
198 | }
199 |
200 | key, err := ssh.ParsePrivateKey(buffer)
201 | if err != nil {
202 | return nil
203 | }
204 | return ssh.PublicKeys(key)
205 | }
206 |
207 | // PublicKeyFilePassPhrase returns an AuthMethod using a private key file
208 | // and a passphrase
209 | func PublicKeyFilePassPhrase(file, passphrase string) ssh.AuthMethod {
210 | buffer, err := ioutil.ReadFile(file)
211 | if err != nil {
212 | return nil
213 | }
214 |
215 | block, _ := pem.Decode(buffer)
216 | private, err := x509.DecryptPEMBlock(block, []byte(passphrase))
217 | if err != nil {
218 | return nil
219 | }
220 | block.Headers = nil
221 | block.Bytes = private
222 | key, err := ssh.ParsePrivateKey(pem.EncodeToMemory(block))
223 | if err != nil {
224 | return nil
225 | }
226 | return ssh.PublicKeys(key)
227 | }
228 |
229 | // SSHAgent returns an AuthMethod using SSH agent connection. The pubkeyFile
230 | // params restricts the AuthMethod to only one key, so it wont spam the
231 | // SSH server if the agent holds multiple keys.
232 | func SSHAgent(pubkeyFile string) (ssh.AuthMethod, error) {
233 | sshAgent, errd := net.Dial("unix", os.Getenv("SSH_AUTH_SOCK"))
234 | if errd == nil {
235 | agent := agent.NewClient(sshAgent)
236 |
237 | // we'll try every key, then
238 | if pubkeyFile == "" {
239 | return ssh.PublicKeysCallback(agent.Signers), nil
240 | }
241 |
242 | agentSigners, err := agent.Signers()
243 | if err != nil {
244 | return nil, fmt.Errorf("requesting SSH agent key/signer list: %s", err)
245 | }
246 |
247 | buffer, err := ioutil.ReadFile(pubkeyFile)
248 | if err != nil {
249 | return nil, fmt.Errorf("reading public key '%s': %s", pubkeyFile, err)
250 | }
251 |
252 | fields := strings.Fields(string(buffer))
253 |
254 | if len(fields) < 3 {
255 | return nil, fmt.Errorf("invalid field count for public key '%s'", pubkeyFile)
256 | }
257 |
258 | buffer2, err := base64.StdEncoding.DecodeString(fields[1])
259 | if err != nil {
260 | return nil, fmt.Errorf("decoding public key '%s': %s", pubkeyFile, err)
261 | }
262 |
263 | key, err := ssh.ParsePublicKey(buffer2)
264 | if err != nil {
265 | return nil, fmt.Errorf("parsing public key '%s': %s", pubkeyFile, err)
266 | }
267 |
268 | for _, potentialSigner := range agentSigners {
269 | if bytes.Compare(key.Marshal(), potentialSigner.PublicKey().Marshal()) == 0 {
270 | Trace.Printf("successfully found %s key in the SSH agent (%s)", pubkeyFile, fields[2])
271 | cb := func() ([]ssh.Signer, error) {
272 | signers := []ssh.Signer{potentialSigner}
273 | return signers, nil
274 | }
275 | return ssh.PublicKeysCallback(cb), nil
276 | }
277 | }
278 | return nil, fmt.Errorf("can't find '%s' key in the SSH agent", pubkeyFile)
279 | }
280 | return nil, fmt.Errorf("SSH agent: %v (check SSH_AUTH_SOCK?)", errd)
281 | }
282 |
--------------------------------------------------------------------------------
/task.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "time"
6 | )
7 |
8 | // Task structure holds (mainly timing) informations about a Task
9 | // next and previous execution
10 | type Task struct {
11 | Probe *Probe
12 | //~ LastRun time.Time
13 | //~ RunCount int
14 | //~ RemainingTicks int
15 | NextRun time.Time
16 | PrevRun time.Time
17 | }
18 |
19 | // ReSchedule is used to schedule another run for this
20 | // task in the future
21 | func (task *Task) ReSchedule(val time.Time) {
22 | task.PrevRun = task.NextRun
23 | task.NextRun = val
24 | }
25 |
26 | // Taskable returns true if the task is currently available (see RunIf expression)
27 | func (task *Task) Taskable() (bool, error) {
28 | // no RunIf condition? taskable, then
29 | if task.Probe.RunIf == nil {
30 | return true, nil
31 | }
32 | res, err := task.Probe.RunIf.Evaluate(nil)
33 | if err != nil {
34 | return false, fmt.Errorf("%s (run_if expression '%s' probe)", err, task.Probe.Name)
35 | }
36 | if _, ok := res.(bool); ok == false {
37 | return false, fmt.Errorf("'run_if' must return a boolean value (probe '%s')", task.Probe.Name)
38 | }
39 | return res.(bool), nil
40 | }
41 |
--------------------------------------------------------------------------------
/task_result.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "regexp"
6 | "strconv"
7 | "time"
8 | )
9 |
10 | // TaskResult holds informations about Task execution and Check results
11 | type TaskResult struct {
12 | Task *Task
13 | Host *Host
14 | Values map[string]string
15 | ExitStatus int
16 | StartTime time.Time
17 | Duration time.Duration
18 | Logs []string // currently, only output # lines
19 | Errors []error
20 | FailedChecks []*Check
21 | SuccessfulChecks []*Check
22 | }
23 |
24 | func (result *TaskResult) addError(err error) {
25 | Info.Printf("TaskResult error: %s (host '%s')", err, result.Host.Name)
26 | result.Errors = append(result.Errors, err)
27 | }
28 |
29 | func (result *TaskResult) addLog(line string) {
30 | Trace.Printf("TaskResult log: %s (host '%s')", line, result.Host.Name)
31 | result.Logs = append(result.Logs, line)
32 | }
33 |
34 | // DoChecks evaluates every Check in the TaskResult and fills
35 | // FailedChecks and SuccessfulChecks arrays
36 | func (result *TaskResult) DoChecks() {
37 | // build parameter map (with values and defaults)
38 | params := make(map[string]interface{})
39 |
40 | for key, val := range result.Values {
41 | var err error
42 | if match, _ := regexp.MatchString("^[0-9]+$", val); match == true {
43 | params[key], err = strconv.Atoi(val)
44 | if err != nil {
45 | result.addError(fmt.Errorf("can't convert '%s' to an int (%s)", val, err))
46 | }
47 | continue
48 | }
49 | if match, _ := regexp.MatchString("^[0-9]+\\.[0-9]+$", val); match == true {
50 | params[key], err = strconv.ParseFloat(val, 64)
51 | if err != nil {
52 | result.addError(fmt.Errorf("can't convert '%s' to a float64 (%s)", val, err))
53 | }
54 | continue
55 | }
56 | // string
57 | params[key] = val
58 | }
59 |
60 | for key, val := range result.Task.Probe.Defaults {
61 | params[key] = val
62 | }
63 |
64 | // … and let's override defaults with host's ones
65 | for key, val := range result.Host.Defaults {
66 | params[key] = val
67 | }
68 |
69 | for _, check := range result.Task.Probe.Checks {
70 | res, err := check.If.Evaluate(params)
71 | Trace.Printf("%s: %t (err: %s)\n", check.Desc, res, err)
72 | if err != nil {
73 | result.addError(fmt.Errorf("%s (expression '%s' in '%s' check)", err, check.If, check.Desc))
74 | continue
75 | }
76 | if _, ok := res.(bool); ok == false {
77 | result.addError(fmt.Errorf("[[check]] 'if' must return a boolean value (expression '%s' in '%s' check)", check.If, check.Desc))
78 | continue
79 | }
80 |
81 | if res == true {
82 | result.FailedChecks = append(result.FailedChecks, check)
83 | } else {
84 | result.SuccessfulChecks = append(result.SuccessfulChecks, check)
85 | }
86 | }
87 | }
88 |
--------------------------------------------------------------------------------
/tools.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "crypto/md5"
5 | "encoding/hex"
6 | "fmt"
7 | "regexp"
8 | "strconv"
9 | "strings"
10 | )
11 |
12 | const stringWordSeparators = "[ \t\n,.;:\\(\\)\\[\\]{}'\"/\\\\!\\?<>@#|*+-=]"
13 |
14 | // IsValidTokenName returns true is argument use only allowed chars for a token
15 | func IsValidTokenName(token string) bool {
16 | match, _ := regexp.MatchString("^[A-Za-z0-9_]+$", token)
17 | return match
18 | }
19 |
20 | // IsAllUpper returns true if string is all uppercase
21 | func IsAllUpper(str string) bool {
22 | return str == strings.ToUpper(str)
23 | }
24 |
25 | // MD5Hash will hash input text and return MD5 sum
26 | func MD5Hash(text string) string {
27 | hasher := md5.New()
28 | hasher.Write([]byte(text))
29 | return hex.EncodeToString(hasher.Sum(nil))
30 | }
31 |
32 | // InterfaceValueToString converts most interface types to string
33 | func InterfaceValueToString(iv interface{}) string {
34 | switch iv.(type) {
35 | case int:
36 | return fmt.Sprintf("%d", iv.(int))
37 | case int32:
38 | return fmt.Sprintf("%d", iv.(int32))
39 | case int64:
40 | return strconv.FormatInt(iv.(int64), 10)
41 | case float32:
42 | return fmt.Sprintf("%f", iv.(float32))
43 | case float64:
44 | return strconv.FormatFloat(iv.(float64), 'f', -1, 64)
45 | case string:
46 | return iv.(string)
47 | case bool:
48 | return strconv.FormatBool(iv.(bool))
49 | }
50 | return "INVALID_TYPE"
51 | }
52 |
53 | // StringFindVariables returns a deduplicated slice of all "variables" ($test)
54 | // in the string
55 | func StringFindVariables(str string) []string {
56 | re := regexp.MustCompile("\\$([a-zA-Z0-9_]+)(" + stringWordSeparators + "|$)")
57 | all := re.FindAllStringSubmatch(str, -1)
58 |
59 | // deduplicate using a map
60 | varMap := make(map[string]bool)
61 | for _, v := range all {
62 | varMap[v[1]] = true
63 | }
64 |
65 | // map to slice
66 | res := []string{}
67 | for name := range varMap {
68 | res = append(res, name)
69 | }
70 | return res
71 | }
72 |
73 | // StringExpandVariables expands "variables" ($test, for instance) in str
74 | // and returns a new string
75 | func StringExpandVariables(str string, variables map[string]interface{}) string {
76 | vars := StringFindVariables(str)
77 | for _, v := range vars {
78 | if val, exists := variables[v]; exists == true {
79 | re := regexp.MustCompile("\\$" + v + "(" + stringWordSeparators + "|$)")
80 | str = re.ReplaceAllString(str, InterfaceValueToString(val)+"${1}")
81 | }
82 | }
83 | return str
84 | }
85 |
--------------------------------------------------------------------------------