├── .gitignore
├── LICENSE
├── README.md
├── alert.go
├── alert_message.go
├── check_functions.go
├── config.go
├── config_alert.go
├── config_host.go
├── config_probe.go
├── current_fails.go
├── deploy
    ├── ssh-agent-nosee.sh
    ├── supervisord
    │   └── nosee.conf
    └── systemd
    │   └── nosee.service
├── doc
    └── images
    │   ├── howto.txt
    │   ├── img_base.png
    │   ├── img_base.txt
    │   ├── img_general.png
    │   ├── img_general.txt
    │   ├── img_illu.jpeg
    │   └── nosee-influxdb-grafana.png
├── etc
    ├── alerts.d
    │   ├── example.txt
    │   ├── mail_general.toml
    │   └── nosee-console.toml
    ├── hosts.d
    │   ├── example.txt
    │   └── test.toml
    ├── nosee.toml
    ├── probes.d
    │   ├── apache_modstatus.toml
    │   ├── backup_daily.toml
    │   ├── backup_week.toml
    │   ├── cert_example.toml
    │   ├── cpu_lms_temp.toml
    │   ├── cpu_temp.toml
    │   ├── curl_expect_example.toml
    │   ├── df.toml
    │   ├── example.txt
    │   ├── ifband.toml
    │   ├── load.toml
    │   ├── mdstat.toml
    │   ├── mem.toml
    │   ├── ping.toml
    │   ├── port_80.toml
    │   └── systemd_httpd.toml
    └── scripts
    │   ├── alerts
    │       ├── nosee-console.sh
    │       └── test.sh
    │   ├── heartbeats
    │       └── nosee-console.sh
    │   ├── loggers
    │       └── influxdb.sh
    │   └── probes
    │       ├── apache_modstatus.sh
    │       ├── backup.sh
    │       ├── cert_check.sh
    │       ├── cpu_lms_temp.sh
    │       ├── cpu_temp.sh
    │       ├── curl.sh
    │       ├── curl_expect.sh
    │       ├── df.sh
    │       ├── ifband.sh
    │       ├── load.sh
    │       ├── load_win.sh
    │       ├── mdstat.sh
    │       ├── mem.sh
    │       ├── ping.sh
    │       ├── port.sh
    │       └── systemctl_status.sh
├── go.mod
├── go.sum
├── heartbeat.go
├── host.go
├── log.go
├── loggers.go
├── main.go
├── pid.go
├── probe.go
├── run.go
├── run_alerts.go
├── run_streams.go
├── ssh.go
├── task.go
├── task_result.go
└── tools.go


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects)
 2 | *.o
 3 | *.a
 4 | *.so
 5 | 
 6 | # Folders
 7 | _obj
 8 | _test
 9 | 
10 | # Architecture specific extensions/prefixes
11 | *.[568vq]
12 | [568vq].out
13 | 
14 | *.cgo1.go
15 | *.cgo2.c
16 | _cgo_defun.c
17 | _cgo_gotypes.go
18 | _cgo_export.*
19 | 
20 | _testmain.go
21 | 
22 | *.exe
23 | *.test
24 | *.prof
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     {one line to give the program's name and a brief idea of what it does.}
635 |     Copyright (C) {year}  {name of author}
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     {project}  Copyright (C) {year}  {fullname}
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <http://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <http://www.gnu.org/philosophy/why-not-lgpl.html>.
675 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Nosee
  2 | A nosey, agentless, easy monitoring tool over SSH.
  3 | 
  4 | **Warning: Heavy WIP!**
  5 | 
  6 | What is it?
  7 | -----------
  8 | 
  9 | It's an answer when you found usual monitoring systems too heavy and complex.
 10 | 
 11 | Nosee uses SSH protocol to execute scripts on monitored systems, checking
 12 | for whatever you want. The result is evaluated and Nosee will ring an alert
 13 | of your choice if anything is wrong.
 14 | 
 15 | In short : SSH, no agent, simple configuration, usual scripting.
 16 | 
 17 | ![Nosee basic schema](https://raw.github.com/Xfennec/nosee/master/doc/images/img_base.png)
 18 | 
 19 | Currently, Nosee requires bash on monitored hosts. It was successfully
 20 | tested with Linux (of course) but using Cygwin sshd on Windows hosts too.
 21 | 
 22 | The Nosee daemon itself can virtually run with any Go supported platform.
 23 | 
 24 | Show me!
 25 | --------
 26 | 
 27 | Here is an alert triggered by a "port connection testing" probe. This alert
 28 | is then configured to be sent using `mail` and a HTTP request to Pushover
 29 | for realtime mobile device notification.
 30 | 
 31 | ![Nosee mobile and mail failure notifications](https://raw.github.com/Xfennec/nosee/master/doc/images/img_illu.jpeg)
 32 | 
 33 | You can also have a look at the [Nosee-console](https://github.com/Xfennec/nosee-console)
 34 | project, it provides a cool Web monitoring interface.
 35 | 
 36 | How do you build it?
 37 | --------------------
 38 | 
 39 | If you have Go installed:
 40 | 
 41 | 	go get github.com/Xfennec/nosee
 42 | 
 43 | You will then be able to launch the binary located in you Go "bin" directory.
 44 | (since Go 1.8, `~/go/bin` if you haven't defined any `$GOPATH`)
 45 | 
 46 | The project is still too young to provide binaries. Later. (and `go get` is so powerful…)
 47 | 
 48 | As a reminder, you can use the `-u` flag to update the project and its dependencies  if
 49 | you don't want to use `git` for that.
 50 | 
 51 | 	go get -u github.com/Xfennec/nosee
 52 | 
 53 | How do you use it?
 54 | ------------------
 55 | 
 56 | You may have a look at the "template" configuration directory
 57 | provided in `$GOPATH/src/github.com/Xfennec/nosee/etc` as a more complete
 58 | example or as a base for the following tutorial. (edit `hosts.d/test.toml`
 59 | for connection settings and `alerts.d/mail_general.toml` for email address,
 60 | at least)
 61 | 
 62 | Here's a general figure of how Nosee works:
 63 | 
 64 | ![Nosee general configuration structure](https://raw.github.com/Xfennec/nosee/master/doc/images/img_general.png)
 65 | 
 66 | ### Small tutorial
 67 | 
 68 | Configuration is mainly done by simple text file using
 69 | the [TOML](https://github.com/toml-lang/toml) syntax.
 70 | 
 71 | **Let's monitor CPU temperature of one of our Web servers.**
 72 | 
 73 | ### Step1. Create a *Host* (SSH connection)
 74 | 
 75 | Create a file in the `hosts.d` directory. (ex: `hosts.d/web_myapp.toml`).
 76 | 
 77 | ```toml
 78 | name = "MyApp Webserver"
 79 | classes = ["linux", "web", "myapp"]
 80 | 
 81 | [network]
 82 | host = "192.168.0.100"
 83 | port = 22
 84 | 
 85 | [auth]
 86 | user = "test5"
 87 | password = "test5"
 88 | ```
 89 | 
 90 | The `classes` parameter is completely free, you may chose anything that
 91 | fits your infrastructure. It will determine what checks will be done on
 92 | this host (see below).
 93 | 
 94 | Authentication by password is extremely bad, of course, as writing down
 95 | a password in a configuration file. Nosee supports other (preferred) options
 96 | such as passphrases and ssh-agent.
 97 | 
 98 | ### Step2. Create a *Probe*
 99 | 
100 | Create a file in the `probes.d` directory. (ex: `probes.d/cpu_temp.toml`).
101 | 
102 | ```toml
103 | name = "CPU temperature"
104 | targets = ["linux"]
105 | 
106 | script = "cpu_temp.sh"
107 | 
108 | delay = "1m"
109 | 
110 | # Checks
111 | 
112 | [[check]]
113 | desc = "critical CPU temperature"
114 | if = "TEMP > 85"
115 | classes = ["critical"]
116 | ```
117 | 
118 | The `targets` parameter will match the `classes` of our host. Targets can
119 | be more precise with things like `linux & web`. (both `linux` and `web` classes
120 | must exist in host)
121 | 
122 | The `delay` explains that this probe must be run every minute. This is
123 | the lowest delay available.
124 | 
125 | Then we have a check. You can have multiple checks in a probe. This check
126 | will look at the `TEMP` value returned by the `cpu_temp.sh`
127 | script (see below) and evaluate the `if` expression. You can have a look
128 | at [govaluate](https://github.com/Knetic/govaluate) for details about
129 | expression's syntax.
130 | 
131 | If this expression becomes true, the probe will ring a `critical` alert. Here
132 | again, you are free to use any class of your choice to create your own
133 | error typology. (ex: `["warning", "hardware_guys"]` to ring a specific group
134 | of users in charge of critical failures of the hardware)
135 | 
136 | ### Step3. Create a *script* (or use a provided one)
137 | 
138 | Scripts are hosted in the `scripts/probes/` directory.
139 | 
140 | ```bash
141 | #!/bin/bash
142 | 
143 | val=$(cat /sys/class/thermal/thermal_zone0/temp)
144 | temp=$(awk "BEGIN {print $val/1000}")
145 | echo "TEMP:" $temp
146 | ```
147 | 
148 | This script will run on monitored hosts (so… stay light). Here, we read
149 | the first thermal zone and divide it by 1000 to get Celsius value.
150 | 
151 | Scripts must print `KEY: val` lines to feed checks, as seen above. That's it.
152 | 
153 | ### Step4. Create an *Alert*
154 | 
155 | Create a file in the `alerts.d` directory. (ex: `alerts.d/mail_julien.toml`).
156 | 
157 | ```toml
158 | name = "Mail Julien"
159 | 
160 | targets = ["julien", "warning", "critical", "general"]
161 | 
162 | command = "mail"
163 | 
164 | arguments = [
165 |     "-s",
166 |     "Nosee: $SUBJECT",
167 |     "julien@domain.tld"
168 | ]
169 | ```
170 | 
171 | This simple alert will use the usual `mail` command when an alert matches
172 | one (or more) of the given targets. It works exactly the same as classes/targets
173 | for Hosts/Probes to let you create your own vocabulary.
174 | (ex: `"web & production & critical"` is a valid target)
175 | 
176 | As you may have seen, some variables are available for arguments, like
177 | the `$SUBJECT` of the alert message.
178 | 
179 | There's a special class `general` for very important general messages. At
180 | least one alert must listen permanently at this class.
181 | 
182 | ### Step5. Run Nosee!
183 | 
184 | 	cd $GOPATH/bin
185 | 	./nosee -l info -c ../src/github.com/Xfennec/nosee/etc/
186 | 
187 | You are now ready to burn your Web server CPU to get your alert mail. The `-c`
188 | parameter gives the configuration path, and the `-l` will make Nosee way
189 | more verbose.
190 | 
191 | 	./nosee help
192 | 
193 | … will tell you more about command line arguments.
194 | 
195 | Anything else? (WIP)
196 | --------------------
197 | 
198 | Oh yes. I want to explain:
199 | 
200 |  - "threaded" (Goroutines)
201 |  - global `nosee.toml` configuration
202 |  - SSH runs (group of probes)
203 |  - `*` targets
204 |  - needed_failures / needed_successes
205 |  - defaults
206 |  - host overriding of probe's defaults
207 |  - use of defaults for probe script arguments
208 |  - probe `run_if` condition
209 |  - alert scripts
210 |  - alert limits
211 |  - alert env and stdin
212 |  - timeouts
213 |  - rescheduling
214 |  - GOOD and BAD alerts
215 |  - UniqueID for alerts
216 |  - configuration "recap/summary" command
217 |  - extensive configuration validation (and connection tests)
218 |  - alert examples (pushover, SMS, …)
219 |  - probe examples!
220 |  - check "If" functions (date)
221 |  - nosee-alerts.json current alerts
222 |  - heartbeat scripts
223 |  - systemd / supervisord sample files (see deploy/ directory)
224 |  - test subcommand
225 |  - loggers / InfluxDB
226 | 
227 | ![Nosee + InfluxDB + Grafana](https://raw.github.com/Xfennec/nosee/master/doc/images/nosee-influxdb-grafana.png)
228 | 
229 | (example: Nosee → InfluxDB → Grafana)
230 | 
231 | What is the future of Nosee? (WIP)
232 | ----------------------------
233 | 
234 |  - remote Nosee interconnections
235 | 


--------------------------------------------------------------------------------
/alert.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"os"
  6 | 	"os/exec"
  7 | 	"strings"
  8 | 	"time"
  9 | )
 10 | 
 11 | // HourRange hold a Start and an End in the form of int arrays ([0] = hours, [1] = minutes)
 12 | type HourRange struct {
 13 | 	Start [2]int
 14 | 	End   [2]int
 15 | }
 16 | 
 17 | // Alert is the final form of alerts.d files
 18 | type Alert struct {
 19 | 	Name      string
 20 | 	Disabled  bool
 21 | 	Targets   []string
 22 | 	Command   string
 23 | 	Arguments []string
 24 | 	Hours     []HourRange
 25 | 	Days      []int
 26 | }
 27 | 
 28 | // Ring will send an AlertMessage using this Alert, executing the
 29 | // configured command
 30 | func (alert *Alert) Ring(msg *AlertMessage) {
 31 | 	Info.Println("ring: " + alert.Name + ", " + alert.Command /* + " " + strings.Join(alert.Arguments, " ") */)
 32 | 
 33 | 	varMap := make(map[string]interface{})
 34 | 	varMap["SUBJECT"] = msg.Subject
 35 | 	varMap["TYPE"] = msg.Type.String()
 36 | 	varMap["UNIQUEID"] = msg.UniqueID
 37 | 	varMap["HOST_NAME"] = msg.Hostname
 38 | 	varMap["CLASSES"] = strings.Join(msg.Classes, ",")
 39 | 	varMap["NOSEE_SRV"] = GlobalConfig.Name
 40 | 	varMap["DATETIME"] = msg.DateTime.Format(time.RFC3339)
 41 | 	// "Level" ? (Run, Task, Checks)
 42 | 	// Probe Name, Check Name, Alert Name ?
 43 | 
 44 | 	var args []string
 45 | 	for _, arg := range alert.Arguments {
 46 | 		expArg := StringExpandVariables(arg, varMap)
 47 | 		args = append(args, expArg)
 48 | 	}
 49 | 
 50 | 	go func() {
 51 | 		cmd := exec.Command(alert.Command, args...)
 52 | 
 53 | 		env := os.Environ()
 54 | 		for key, val := range varMap {
 55 | 			env = append(env, fmt.Sprintf("%s=%s", key, InterfaceValueToString(val)))
 56 | 		}
 57 | 		cmd.Env = env
 58 | 
 59 | 		// we also inject Details thru stdin:
 60 | 		cmd.Stdin = strings.NewReader(msg.Details)
 61 | 
 62 | 		if cmdOut, err := cmd.CombinedOutput(); err != nil {
 63 | 			if len(msg.Classes) == 1 && msg.Classes[0] == GeneralClass {
 64 | 				Error.Printf("unable to ring an alert to general class! error: %s (%s)\n", err, alert.Command)
 65 | 				return
 66 | 			}
 67 | 
 68 | 			Warning.Printf("error running alert '%s': %s", alert.Command, err)
 69 | 
 70 | 			msg.Subject = msg.Subject + " (Fwd)"
 71 | 			prepend := fmt.Sprintf("WARNING: This alert is re-routed to the 'general' class, because\noriginal alert failed with the following error: %s (%s)\nOutput: %s\n\n", err.Error(), alert.Command, string(cmdOut))
 72 | 			msg.Details = prepend + msg.Details
 73 | 			msg.Classes = []string{GeneralClass}
 74 | 			msg.RingAlerts()
 75 | 		}
 76 | 	}()
 77 | }
 78 | 
 79 | // Ringable will return true if this Alert is currently able to ring
 80 | // (no matching day or hour limit)
 81 | func (alert *Alert) Ringable() bool {
 82 | 	now := time.Now()
 83 | 	nowMins := now.Hour()*60 + now.Minute()
 84 | 	nowDay := int(now.Weekday())
 85 | 	hourOk := len(alert.Hours) == 0
 86 | 	for _, hourRange := range alert.Hours {
 87 | 		start := hourRange.Start[0]*60 + hourRange.Start[1]
 88 | 		end := hourRange.End[0]*60 + hourRange.End[1]
 89 | 		if nowMins >= start && nowMins <= end {
 90 | 			hourOk = true
 91 | 			break
 92 | 		}
 93 | 	}
 94 | 	dayOk := len(alert.Days) == 0
 95 | 	for _, day := range alert.Days {
 96 | 		if nowDay == day {
 97 | 			dayOk = true
 98 | 		}
 99 | 	}
100 | 	return hourOk && dayOk
101 | }
102 | 


--------------------------------------------------------------------------------
/alert_message.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"fmt"
  6 | 	"strings"
  7 | 	"time"
  8 | )
  9 | 
 10 | // AlertMessageType definition
 11 | type AlertMessageType uint8
 12 | 
 13 | // AlertMessageType numeric values
 14 | const (
 15 | 	AlertGood AlertMessageType = iota + 1
 16 | 	AlertBad
 17 | )
 18 | 
 19 | // AlertMessageTypeStr stores matching strings
 20 | var AlertMessageTypeStr = [...]string{
 21 | 	"GOOD",
 22 | 	"BAD",
 23 | }
 24 | 
 25 | // AlertMessage will store the text of the error
 26 | type AlertMessage struct {
 27 | 	Type     AlertMessageType
 28 | 	Subject  string
 29 | 	Details  string
 30 | 	Classes  []string
 31 | 	UniqueID string
 32 | 	Hostname string
 33 | 	DateTime time.Time
 34 | }
 35 | 
 36 | // GeneralClass is a "general" class for very important general messages
 37 | const GeneralClass = "general"
 38 | 
 39 | func (amt AlertMessageType) String() string {
 40 | 	if amt == 0 {
 41 | 		return "INVALID_TYPE"
 42 | 	}
 43 | 	return AlertMessageTypeStr[amt-1]
 44 | }
 45 | 
 46 | // AlertMessageCreateForRun creates a new AlertMessage with AlertGood or
 47 | // AlertBad type for a Run
 48 | func AlertMessageCreateForRun(aType AlertMessageType, run *Run, currentFail *CurrentFail) *AlertMessage {
 49 | 	var message AlertMessage
 50 | 
 51 | 	message.Subject = fmt.Sprintf("[%s] %s: run error(s)", aType, run.Host.Name)
 52 | 	message.Type = aType
 53 | 	message.UniqueID = currentFail.UniqueID
 54 | 	message.Hostname = run.Host.Name
 55 | 	message.DateTime = run.StartTime
 56 | 
 57 | 	var details bytes.Buffer
 58 | 
 59 | 	switch aType {
 60 | 	case AlertBad:
 61 | 		details.WriteString("A least one error occured during a run for this host. (" + run.StartTime.Format("2006-01-02 15:04:05") + ")\n")
 62 | 		details.WriteString("\n")
 63 | 		details.WriteString("Error(s):\n")
 64 | 		for _, err := range run.Errors {
 65 | 			details.WriteString(err.Error() + "\n")
 66 | 		}
 67 | 	case AlertGood:
 68 | 		details.WriteString("No more run errors for this host. (" + run.StartTime.Format("2006-01-02 15:04:05") + ")\n")
 69 | 	}
 70 | 
 71 | 	details.WriteString("\n")
 72 | 	details.WriteString("Unique failure ID: " + message.UniqueID + "\n")
 73 | 	message.Details = details.String()
 74 | 
 75 | 	message.Classes = []string{GeneralClass}
 76 | 
 77 | 	return &message
 78 | }
 79 | 
 80 | // AlertMessageCreateForTaskResult creates an AlertGood or AlertBad message for a TaskResult
 81 | func AlertMessageCreateForTaskResult(aType AlertMessageType, run *Run, taskResult *TaskResult, currentFail *CurrentFail) *AlertMessage {
 82 | 	var message AlertMessage
 83 | 
 84 | 	message.Subject = fmt.Sprintf("[%s] %s: %s: task error(s)", aType, run.Host.Name, taskResult.Task.Probe.Name)
 85 | 	message.Type = aType
 86 | 	message.UniqueID = currentFail.UniqueID
 87 | 	message.Hostname = run.Host.Name
 88 | 	message.DateTime = taskResult.StartTime
 89 | 
 90 | 	var details bytes.Buffer
 91 | 
 92 | 	switch aType {
 93 | 	case AlertBad:
 94 | 		details.WriteString("A least one error occured during a task for this host. (" + taskResult.StartTime.Format("2006-01-02 15:04:05") + ")\n")
 95 | 		details.WriteString("\n")
 96 | 		details.WriteString("Error(s):\n")
 97 | 		for _, err := range taskResult.Errors {
 98 | 			details.WriteString(err.Error() + "\n")
 99 | 		}
100 | 		if len(taskResult.Logs) > 0 {
101 | 			details.WriteString("\n")
102 | 			details.WriteString("Logs(s):\n")
103 | 			for _, log := range taskResult.Logs {
104 | 				details.WriteString(log + "\n")
105 | 			}
106 | 		}
107 | 	case AlertGood:
108 | 		details.WriteString("No more errors for this task on this host. (" + taskResult.StartTime.Format("2006-01-02 15:04:05") + ")\n")
109 | 	}
110 | 
111 | 	details.WriteString("\n")
112 | 	details.WriteString("Unique failure ID: " + message.UniqueID + "\n")
113 | 	message.Details = details.String()
114 | 
115 | 	message.Classes = []string{GeneralClass}
116 | 
117 | 	return &message
118 | }
119 | 
120 | // AlertMessageCreateForCheck creates a AlertGood or AlertBad message for a Check
121 | func AlertMessageCreateForCheck(aType AlertMessageType, run *Run, taskRes *TaskResult, check *Check, currentFail *CurrentFail) *AlertMessage {
122 | 	var message AlertMessage
123 | 
124 | 	// Host: Check (Task)
125 | 	message.Subject = fmt.Sprintf("[%s] %s: %s (%s)", aType, run.Host.Name, check.Desc, taskRes.Task.Probe.Name)
126 | 	message.Type = aType
127 | 	message.UniqueID = currentFail.UniqueID
128 | 	message.Hostname = run.Host.Name
129 | 
130 | 	var details bytes.Buffer
131 | 
132 | 	switch aType {
133 | 	case AlertBad:
134 | 		details.WriteString("An alert **is** ringing.\n\n")
135 | 		message.DateTime = currentFail.FailStart
136 | 	case AlertGood:
137 | 		details.WriteString("This alert is **no more** ringing.\n\n")
138 | 		message.DateTime = taskRes.StartTime
139 | 	}
140 | 
141 | 	details.WriteString("Failure time: " + currentFail.FailStart.Format("2006-01-02 15:04:05") + "\n")
142 | 	details.WriteString("Last task time: " + taskRes.StartTime.Format("2006-01-02 15:04:05") + "\n")
143 | 	details.WriteString("Class(es): " + strings.Join(check.Classes, ", ") + "\n")
144 | 	details.WriteString("Failed condition was: " + check.If.String() + "\n")
145 | 	details.WriteString("\n")
146 | 	details.WriteString("Values:\n")
147 | 	for _, token := range check.If.Vars() {
148 | 		if IsAllUpper(token) {
149 | 			details.WriteString("- " + token + ": " + taskRes.Values[token] + "\n")
150 | 		} else {
151 | 			val := InterfaceValueToString(taskRes.Task.Probe.Defaults[token])
152 | 			if _, exists := taskRes.Host.Defaults[token]; exists == true {
153 | 				val = InterfaceValueToString(taskRes.Host.Defaults[token])
154 | 			}
155 | 			details.WriteString("- " + token + ": " + val + "\n")
156 | 		}
157 | 	}
158 | 	details.WriteString("\n")
159 | 	details.WriteString(fmt.Sprintf("All values for this run (%s):\n", run.Duration))
160 | 	for _, tr := range run.TaskResults {
161 | 		details.WriteString(fmt.Sprintf("- %s (%s):\n", tr.Task.Probe.Name, tr.Duration))
162 | 		for key, val := range tr.Values {
163 | 			details.WriteString("--- " + key + ": " + val + "\n")
164 | 		}
165 | 	}
166 | 	details.WriteString("\n")
167 | 	details.WriteString("Unique failure ID: " + message.UniqueID + "\n")
168 | 	message.Details = details.String()
169 | 
170 | 	message.Classes = check.Classes
171 | 
172 | 	return &message
173 | }
174 | 
175 | // Dump prints AlertMessage informations on the screen for debugging purposes
176 | func (msg *AlertMessage) Dump() {
177 | 	fmt.Printf("---\n")
178 | 	fmt.Printf("Subject: %s\n", msg.Subject)
179 | 	fmt.Printf("%s\n---\n", msg.Details)
180 | }
181 | 
182 | // RingAlerts will search and ring all alerts for this AlertMessage
183 | func (msg *AlertMessage) RingAlerts() {
184 | 	ringCount := 0
185 | 	for _, alert := range globalAlerts {
186 | 		if msg.MatchAlertTargets(alert) {
187 | 			if alert.Ringable() {
188 | 				alert.Ring(msg)
189 | 				ringCount++
190 | 			}
191 | 		}
192 | 	}
193 | 
194 | 	if ringCount == 0 {
195 | 		// if class is already "general", we're f*cked :(
196 | 		if len(msg.Classes) == 1 && msg.Classes[0] == GeneralClass {
197 | 			Error.Printf("unable to ring an alert : can't match the 'general' class!\n")
198 | 			return
199 | 		}
200 | 
201 | 		Warning.Printf("no matching alert for this failure: '%s' with class(es): %s\n", msg.Subject, strings.Join(msg.Classes, ", "))
202 | 
203 | 		// forward the alert to 'general' class:
204 | 		msg.Subject = msg.Subject + " (Fwd)"
205 | 		prepend := "WARNING: This alert is re-routed to the 'general' class, because no alert matches its orginial classes (" + strings.Join(msg.Classes, ", ") + ")\n\n"
206 | 		msg.Details = prepend + msg.Details
207 | 		msg.Classes = []string{GeneralClass}
208 | 		msg.RingAlerts()
209 | 	}
210 | }
211 | 
212 | // HasClass returns true if this AlertMessage has this class
213 | func (msg *AlertMessage) HasClass(class string) bool {
214 | 	if class == "*" {
215 | 		return true
216 | 	}
217 | 
218 | 	for _, hClass := range msg.Classes {
219 | 		if hClass == class {
220 | 			return true
221 | 		}
222 | 	}
223 | 	return false
224 | }
225 | 
226 | // MatchAlertTargets returns true if this AlertMessage matches alert's classes
227 | func (msg *AlertMessage) MatchAlertTargets(alert *Alert) bool {
228 | 	for _, pTargets := range alert.Targets {
229 | 		tokens := strings.Split(pTargets, "&")
230 | 		matched := 0
231 | 		mustMatch := len(tokens)
232 | 		for _, token := range tokens {
233 | 			ttoken := strings.TrimSpace(token)
234 | 			if msg.HasClass(ttoken) {
235 | 				matched++
236 | 			}
237 | 		}
238 | 		if matched == mustMatch {
239 | 			return true
240 | 		}
241 | 	}
242 | 	return false
243 | }
244 | 


--------------------------------------------------------------------------------
/check_functions.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"regexp"
 6 | 	"time"
 7 | 
 8 | 	"github.com/Knetic/govaluate"
 9 | )
10 | 
11 | // CheckFunctions will hold all custom govaluate functions for Check 'If'
12 | // expressions
13 | var CheckFunctions map[string]govaluate.ExpressionFunction
14 | 
15 | // CheckFunctionsInit will initialize CheckFunctions global variable
16 | func CheckFunctionsInit() {
17 | 	CheckFunctions = map[string]govaluate.ExpressionFunction{
18 | 
19 | 		"strlen": func(args ...interface{}) (interface{}, error) {
20 | 			length := len(args[0].(string))
21 | 			return (float64)(length), nil
22 | 		},
23 | 
24 | 		"ping": func(args ...interface{}) (interface{}, error) {
25 | 			if len(args) > 0 {
26 | 				return nil, fmt.Errorf("ping function: too much arguments")
27 | 			}
28 | 			return (string)("pong"), nil
29 | 		},
30 | 
31 | 		"date": func(args ...interface{}) (interface{}, error) {
32 | 			if len(args) != 1 {
33 | 				return nil, fmt.Errorf("date function: wrong argument count (1 required)")
34 | 			}
35 | 			format := args[0].(string)
36 | 			now := time.Now()
37 | 			switch format {
38 | 			case "hour":
39 | 				return (float64)(now.Hour()), nil
40 | 			case "minute":
41 | 				return (float64)(now.Minute()), nil
42 | 			case "time":
43 | 				return (float64)((float64)(now.Hour()) + (float64)(now.Minute())/60.0), nil
44 | 			case "dow", "day-of-week":
45 | 				// Sunday = 0
46 | 				return (float64)(now.Weekday()), nil
47 | 			case "dom", "day-of-month":
48 | 				return (float64)(now.Day()), nil
49 | 			case "now":
50 | 				return (float64)(now.Unix()), nil
51 | 			}
52 | 
53 | 			if match, _ := regexp.MatchString("^[0-9]{1,2}:[0-9]{2}$", format); match == true {
54 | 				t, err := alertCheckHour(format)
55 | 				if err != nil {
56 | 					return nil, fmt.Errorf("date function: invalid hour '%s': %s", format, err)
57 | 				}
58 | 				return (float64)((float64)(t[0]) + (float64)(t[1])/60.0), nil
59 | 			}
60 | 
61 | 			return nil, fmt.Errorf("date function: invalid format '%s'", format)
62 | 		},
63 | 	}
64 | }
65 | 


--------------------------------------------------------------------------------
/config.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"os"
  7 | 	"path"
  8 | 	"time"
  9 | 
 10 | 	"github.com/BurntSushi/toml"
 11 | )
 12 | 
 13 | type tomlConfig struct {
 14 | 	Name            string
 15 | 	StartTimeSpread Duration `toml:"start_time_spread"`
 16 | 	SSHConnTimeWarn Duration `toml:"ssh_connection_time_warn"`
 17 | 	SSHBlindTrust   bool     `toml:"ssh_blindtrust_fingerprints"`
 18 | 	SavePath        string   `toml:"save_path"`
 19 | 	HeartbeatDelay  Duration `toml:"heartbeat_delay"`
 20 | }
 21 | 
 22 | // Config is the final form of the nosee.toml config file
 23 | type Config struct {
 24 | 	configPath   string
 25 | 	loadDisabled bool
 26 | 	doConnTest   bool
 27 | 
 28 | 	Name                   string
 29 | 	StartTimeSpreadSeconds int
 30 | 	SSHConnTimeWarn        time.Duration
 31 | 	SSHBlindTrust          bool
 32 | 	SavePath               string
 33 | 	HeartbeatDelay         time.Duration
 34 | }
 35 | 
 36 | // GlobalConfig exports the Nosee server configuration
 37 | var GlobalConfig *Config
 38 | 
 39 | // GlobalConfigRead reads given file and returns a Config
 40 | func GlobalConfigRead(dir, file string) (*Config, error) {
 41 | 	var config Config
 42 | 	var tConfig tomlConfig
 43 | 
 44 | 	// defaults:
 45 | 	// config.xxx -> default if config file not exists
 46 | 	// tConfig.xxx -> default if parameter's not provided in config file
 47 | 	config.Name = ""
 48 | 	tConfig.Name = ""
 49 | 
 50 | 	config.StartTimeSpreadSeconds = 15
 51 | 	tConfig.StartTimeSpread.Duration = 15 * time.Second
 52 | 
 53 | 	config.SSHConnTimeWarn = 10 * time.Second
 54 | 	tConfig.SSHConnTimeWarn.Duration = config.SSHConnTimeWarn
 55 | 
 56 | 	config.SSHBlindTrust = false
 57 | 	tConfig.SSHBlindTrust = false
 58 | 
 59 | 	config.SavePath = "./"
 60 | 	tConfig.SavePath = config.SavePath
 61 | 
 62 | 	config.HeartbeatDelay = 30 * time.Second
 63 | 	tConfig.HeartbeatDelay.Duration = config.HeartbeatDelay
 64 | 
 65 | 	config.configPath = dir
 66 | 	config.loadDisabled = false
 67 | 	config.doConnTest = true
 68 | 
 69 | 	if stat, err := os.Stat(config.configPath); err != nil || !stat.Mode().IsDir() {
 70 | 		return nil, fmt.Errorf("configuration directory not found: %s (%s)", err, config.configPath)
 71 | 	}
 72 | 
 73 | 	configPath := path.Clean(dir + "/" + file)
 74 | 
 75 | 	if stat, err := os.Stat(configPath); err != nil || !stat.Mode().IsRegular() {
 76 | 		Warning.Printf("no %s file, using defaults\n", configPath)
 77 | 		return &config, nil
 78 | 	}
 79 | 
 80 | 	if _, err := toml.DecodeFile(configPath, &tConfig); err != nil {
 81 | 		return nil, fmt.Errorf("decoding %s: %s", file, err)
 82 | 	}
 83 | 
 84 | 	if tConfig.Name != "" {
 85 | 		config.Name = tConfig.Name
 86 | 	}
 87 | 
 88 | 	if tConfig.StartTimeSpread.Duration > (1 * time.Minute) {
 89 | 		return nil, errors.New("'start_time_spread' can't be more than a minute")
 90 | 	}
 91 | 	config.StartTimeSpreadSeconds = int(tConfig.StartTimeSpread.Duration.Seconds())
 92 | 
 93 | 	if tConfig.SSHConnTimeWarn.Duration < (1 * time.Second) {
 94 | 		return nil, errors.New("'ssh_connection_time_warn' can't be less than a second")
 95 | 	}
 96 | 	config.SSHConnTimeWarn = tConfig.SSHConnTimeWarn.Duration
 97 | 
 98 | 	config.SSHBlindTrust = tConfig.SSHBlindTrust
 99 | 
100 | 	// should check if writable
101 | 	config.SavePath = tConfig.SavePath
102 | 
103 | 	if tConfig.HeartbeatDelay.Duration < (5 * time.Second) {
104 | 		return nil, errors.New("'heartbeat_delay' can't be less than 5 seconds")
105 | 	}
106 | 	config.HeartbeatDelay = tConfig.HeartbeatDelay.Duration
107 | 
108 | 	return &config, nil
109 | }
110 | 


--------------------------------------------------------------------------------
/config_alert.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"io/ioutil"
  7 | 	"os"
  8 | 	"os/exec"
  9 | 	"path"
 10 | 	"strconv"
 11 | 	"strings"
 12 | )
 13 | 
 14 | type tomlAlert struct {
 15 | 	Name      string
 16 | 	Disabled  bool
 17 | 	Targets   []string
 18 | 	Command   string
 19 | 	Arguments []string
 20 | 	Hours     []string
 21 | 	Days      []int
 22 | }
 23 | 
 24 | func alertCheckHour(hour string) ([2]int, error) {
 25 | 	var err error
 26 | 	var res [2]int
 27 | 
 28 | 	parts := strings.Split(hour, ":")
 29 | 	if len(parts) != 2 {
 30 | 		return res, fmt.Errorf("invalid format '%s' (ex: '19:30')", hour)
 31 | 	}
 32 | 	res[0], err = strconv.Atoi(parts[0])
 33 | 	if err != nil {
 34 | 		return res, fmt.Errorf("can't convert '%s' hour to integer: %s", hour, err)
 35 | 	}
 36 | 	res[1], err = strconv.Atoi(parts[1])
 37 | 	if err != nil {
 38 | 		return res, fmt.Errorf("can't convert '%s' minute to integer: %s", hour, err)
 39 | 	}
 40 | 
 41 | 	if res[0] < 0 {
 42 | 		return res, fmt.Errorf("hour can't be less than 0: %s", hour)
 43 | 	}
 44 | 	if res[1] < 0 {
 45 | 		return res, fmt.Errorf("minute can't be less than 0: %s", hour)
 46 | 	}
 47 | 	if res[0] > 23 {
 48 | 		return res, fmt.Errorf("hour can't more than 23: %s", hour)
 49 | 	}
 50 | 	if res[1] > 59 {
 51 | 		return res, fmt.Errorf("minute can't more than 59: %s", hour)
 52 | 	}
 53 | 
 54 | 	return res, nil
 55 | }
 56 | 
 57 | func alertCheckHours(hours []string) ([]HourRange, error) {
 58 | 	var hourRanges []HourRange
 59 | 
 60 | 	for _, hour := range hours {
 61 | 		var (
 62 | 			hourRange HourRange
 63 | 			err       error
 64 | 		)
 65 | 
 66 | 		rng := strings.Split(hour, "-")
 67 | 		if len(rng) != 2 {
 68 | 			return nil, fmt.Errorf("invalid format '%s' (ex: '8:90 - 19:00')", hour)
 69 | 		}
 70 | 		rng[0] = strings.TrimSpace(rng[0])
 71 | 		rng[1] = strings.TrimSpace(rng[1])
 72 | 
 73 | 		if hourRange.Start, err = alertCheckHour(rng[0]); err != nil {
 74 | 			return nil, fmt.Errorf("invalid start hour: %s", err)
 75 | 		}
 76 | 		if hourRange.End, err = alertCheckHour(rng[1]); err != nil {
 77 | 			return nil, fmt.Errorf("invalid end hour: %s", err)
 78 | 		}
 79 | 
 80 | 		start := hourRange.Start[0]*60 + hourRange.Start[1]
 81 | 		end := hourRange.End[0]*60 + hourRange.End[1]
 82 | 		if start >= end {
 83 | 			return nil, fmt.Errorf("end of the hour range (%s) is before its start", hour)
 84 | 		}
 85 | 
 86 | 		hourRanges = append(hourRanges, hourRange)
 87 | 	}
 88 | 	return hourRanges, nil
 89 | }
 90 | 
 91 | func alertCheckAndCleanDays(days []int) error {
 92 | 	for key, day := range days {
 93 | 		if day < 0 {
 94 | 			return fmt.Errorf("day can't be less than 0: %d", day)
 95 | 		}
 96 | 		if day > 7 {
 97 | 			return fmt.Errorf("day can't be more than 7: %d", day)
 98 | 		}
 99 | 
100 | 		if day == 7 {
101 | 			days[key] = 0
102 | 		}
103 | 	}
104 | 	return nil
105 | }
106 | 
107 | func tomlAlertToAlert(tAlert *tomlAlert, config *Config) (*Alert, error) {
108 | 	var alert Alert
109 | 
110 | 	if tAlert.Disabled == true && config.loadDisabled == false {
111 | 		return nil, nil
112 | 	}
113 | 
114 | 	if tAlert.Name == "" {
115 | 		return nil, errors.New("invalid or missing 'name'")
116 | 	}
117 | 	alert.Name = tAlert.Name
118 | 
119 | 	if tAlert.Command == "" {
120 | 		return nil, errors.New("invalid or missing 'command'")
121 | 	}
122 | 
123 | 	scriptPath := path.Clean(config.configPath + "/scripts/alerts/" + tAlert.Command)
124 | 	stat, err := os.Stat(scriptPath)
125 | 
126 | 	if err == nil {
127 | 		if !stat.Mode().IsRegular() {
128 | 			return nil, fmt.Errorf("is not a regular 'script' file '%s'", scriptPath)
129 | 		}
130 | 		tAlert.Command = scriptPath
131 | 	} else {
132 | 		path, errp := exec.LookPath(tAlert.Command)
133 | 		if errp != nil {
134 | 			return nil, fmt.Errorf("'%s' command not found in PATH: %s", tAlert.Command, errp)
135 | 		}
136 | 		tAlert.Command = path
137 | 	}
138 | 
139 | 	alert.Command = tAlert.Command
140 | 
141 | 	_, err = ioutil.ReadFile(alert.Command)
142 | 	if err != nil {
143 | 		return nil, fmt.Errorf("error reading script file '%s': %s", alert.Command, err)
144 | 	}
145 | 
146 | 	if tAlert.Targets == nil {
147 | 		return nil, errors.New("no valid 'targets' parameter found")
148 | 	}
149 | 
150 | 	if len(tAlert.Targets) == 0 {
151 | 		return nil, errors.New("empty 'targets'")
152 | 	}
153 | 	// explode targets on & and check IsValidTokenName
154 | 	hasGeneralClass := false
155 | 	for _, targets := range tAlert.Targets {
156 | 		if targets == "*" || targets == GeneralClass {
157 | 			hasGeneralClass = true
158 | 			continue
159 | 		}
160 | 		tokens := strings.Split(targets, "&")
161 | 		for _, token := range tokens {
162 | 			ttoken := strings.TrimSpace(token)
163 | 			if !IsValidTokenName(ttoken) {
164 | 				return nil, fmt.Errorf("invalid 'target' class name '%s'", ttoken)
165 | 			}
166 | 		}
167 | 	}
168 | 	alert.Targets = tAlert.Targets
169 | 
170 | 	alert.Arguments = tAlert.Arguments
171 | 
172 | 	hours, err := alertCheckHours(tAlert.Hours)
173 | 	if err != nil {
174 | 		return nil, fmt.Errorf("'hours' parameter: %s", err)
175 | 	}
176 | 	alert.Hours = hours
177 | 
178 | 	if err := alertCheckAndCleanDays(tAlert.Days); err != nil {
179 | 		return nil, fmt.Errorf("'days' parameter: %s", err)
180 | 	}
181 | 	alert.Days = tAlert.Days
182 | 
183 | 	if hasGeneralClass == true && len(alert.Hours) > 0 && len(alert.Days) > 0 {
184 | 		return nil, fmt.Errorf("a 'general' (or '*') alert can't have hours/days restrictions, since you may miss alerts")
185 | 	}
186 | 
187 | 	return &alert, nil
188 | }
189 | 


--------------------------------------------------------------------------------
/config_host.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"os"
  7 | 	"time"
  8 | 
  9 | 	"golang.org/x/crypto/ssh"
 10 | )
 11 | 
 12 | type tomlNetwork struct {
 13 | 	Host            string
 14 | 	Port            int
 15 | 	Ciphers         []string
 16 | 	SSHConnTimeWarn Duration `toml:"ssh_connection_time_warn"`
 17 | }
 18 | 
 19 | type tomlAuth struct {
 20 | 	User          string
 21 | 	Password      string
 22 | 	Key           string
 23 | 	KeyPassphrase string `toml:"key_passphrase"`
 24 | 	SSHAgent      bool   `toml:"ssh_agent"`
 25 | 	Pubkey        string
 26 | }
 27 | 
 28 | type tomlHost struct {
 29 | 	Disabled bool
 30 | 	Name     string
 31 | 	Network  tomlNetwork
 32 | 	Auth     tomlAuth
 33 | 	Classes  []string
 34 | 	Default  []tomlDefault
 35 | }
 36 | 
 37 | func tomlHostToHost(tHost *tomlHost, config *Config, filename string) (*Host, error) {
 38 | 	var (
 39 | 		connection Connection
 40 | 		host       Host
 41 | 	)
 42 | 
 43 | 	host.Connection = &connection
 44 | 	host.Filename = filename
 45 | 
 46 | 	if tHost.Disabled == true && config.loadDisabled == false {
 47 | 		return nil, nil
 48 | 	}
 49 | 	host.Disabled = (tHost.Disabled == true)
 50 | 
 51 | 	if tHost.Name == "" {
 52 | 		return nil, errors.New("invalid or missing 'name'")
 53 | 	}
 54 | 	host.Name = tHost.Name
 55 | 
 56 | 	if tHost.Classes == nil {
 57 | 		return nil, errors.New("no valid 'classes' parameter found")
 58 | 	}
 59 | 
 60 | 	if len(tHost.Classes) == 0 {
 61 | 		return nil, errors.New("empty classes")
 62 | 	}
 63 | 	for _, class := range tHost.Classes {
 64 | 		if !IsValidTokenName(class) {
 65 | 			return nil, fmt.Errorf("invalid class name '%s'", class)
 66 | 		}
 67 | 	}
 68 | 	host.Classes = tHost.Classes
 69 | 
 70 | 	host.Defaults = make(map[string]interface{})
 71 | 	if err := checkTomlDefault(host.Defaults, tHost.Default); err != nil {
 72 | 		return nil, err
 73 | 	}
 74 | 
 75 | 	if tHost.Network.Host == "" {
 76 | 		return nil, errors.New("[network] section, invalid or missing 'host'")
 77 | 	}
 78 | 	connection.Host = tHost.Network.Host
 79 | 
 80 | 	if tHost.Network.Port == 0 {
 81 | 		return nil, errors.New("[network] section, invalid or missing 'port'")
 82 | 	}
 83 | 	connection.Port = tHost.Network.Port
 84 | 
 85 | 	if tHost.Network.SSHConnTimeWarn.Duration < (1 * time.Second) {
 86 | 		return nil, errors.New("'ssh_connection_time_warn' can't be less than a second")
 87 | 	}
 88 | 	connection.SSHConnTimeWarn = tHost.Network.SSHConnTimeWarn.Duration
 89 | 
 90 | 	if tHost.Auth.User == "" {
 91 | 		return nil, errors.New("[auth] section, invalid or missing 'user'")
 92 | 	}
 93 | 	connection.User = tHost.Auth.User
 94 | 	connection.Ciphers = tHost.Network.Ciphers
 95 | 
 96 | 	if tHost.Auth.Key != "" && tHost.Auth.Password != "" {
 97 | 		return nil, errors.New("[auth] section, can't use key and password at the same time (see key_passphrase parameter, perhaps?)")
 98 | 	}
 99 | 	if tHost.Auth.KeyPassphrase != "" && tHost.Auth.Password != "" {
100 | 		return nil, errors.New("[auth] section, can't use key_passphrase and password at the same time")
101 | 	}
102 | 	if tHost.Auth.SSHAgent == true && tHost.Auth.Password != "" {
103 | 		return nil, errors.New("[auth] section, can't use SSH agent and password at the same time")
104 | 	}
105 | 	if tHost.Auth.SSHAgent == true && tHost.Auth.KeyPassphrase != "" {
106 | 		return nil, errors.New("[auth] section, can't use SSH agent and key_passphrase at the same time")
107 | 	}
108 | 	if tHost.Auth.SSHAgent == true && tHost.Auth.Key != "" {
109 | 		return nil, errors.New("[auth] section, can't use SSH agent and key at the same time (see pubkey parameter, perhaps?)")
110 | 	}
111 | 
112 | 	if tHost.Auth.Key != "" {
113 | 		fd, err := os.Open(tHost.Auth.Key)
114 | 		if err != nil {
115 | 			return nil, fmt.Errorf("can't access to key '%s': %s", tHost.Auth.Key, err)
116 | 		}
117 | 		fd.Close()
118 | 	}
119 | 
120 | 	// !!! there's many returns following this line, be careful
121 | 
122 | 	if tHost.Auth.Password != "" {
123 | 		connection.Auths = []ssh.AuthMethod{
124 | 			ssh.Password(tHost.Auth.Password),
125 | 		}
126 | 		return &host, nil
127 | 	}
128 | 
129 | 	if tHost.Auth.SSHAgent == true {
130 | 		agent, err := SSHAgent(tHost.Auth.Pubkey)
131 | 		if err != nil {
132 | 			return nil, err
133 | 		}
134 | 		connection.Auths = []ssh.AuthMethod{
135 | 			agent,
136 | 		}
137 | 		return &host, nil
138 | 	}
139 | 
140 | 	if tHost.Auth.Key != "" && tHost.Auth.KeyPassphrase == "" {
141 | 		connection.Auths = []ssh.AuthMethod{
142 | 			PublicKeyFile(tHost.Auth.Key),
143 | 		}
144 | 		return &host, nil
145 | 	}
146 | 
147 | 	if tHost.Auth.Key != "" && tHost.Auth.KeyPassphrase != "" {
148 | 		connection.Auths = []ssh.AuthMethod{
149 | 			PublicKeyFilePassPhrase(tHost.Auth.Key, tHost.Auth.KeyPassphrase),
150 | 		}
151 | 		return &host, nil
152 | 	}
153 | 
154 | 	return nil, errors.New("[auth] section, at least one auth method is needed (password, key or ssh_agent)")
155 | }
156 | 


--------------------------------------------------------------------------------
/config_probe.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"io/ioutil"
  7 | 	"os"
  8 | 	"path"
  9 | 	"reflect"
 10 | 	"strings"
 11 | 	"time"
 12 | 
 13 | 	"github.com/Knetic/govaluate"
 14 | )
 15 | 
 16 | // Duration hides time.Duration for TOML file reading (see UnmarshalText)
 17 | type Duration struct {
 18 | 	time.Duration
 19 | }
 20 | 
 21 | // UnmarshalText is needed to satisfy the encoding.TextUnmarshaler interface
 22 | func (d *Duration) UnmarshalText(text []byte) error {
 23 | 	var err error
 24 | 	d.Duration, err = time.ParseDuration(string(text))
 25 | 	return err
 26 | }
 27 | 
 28 | type tomlDefault struct {
 29 | 	Name  string
 30 | 	Value interface{}
 31 | }
 32 | 
 33 | type tomlCheck struct {
 34 | 	Desc            string
 35 | 	If              string
 36 | 	Classes         []string
 37 | 	NeededFailures  int `toml:"needed_failures"`
 38 | 	NeededSuccesses int `toml:"needed_successes"`
 39 | }
 40 | 
 41 | type tomlProbe struct {
 42 | 	Name      string
 43 | 	Disabled  bool
 44 | 	Script    string
 45 | 	Targets   []string
 46 | 	Delay     Duration
 47 | 	Timeout   Duration
 48 | 	Arguments string
 49 | 	Default   []tomlDefault
 50 | 	Check     []tomlCheck
 51 | 	RunIf     string `toml:"run_if"`
 52 | }
 53 | 
 54 | func checkTomlDefault(pDefaults map[string]interface{}, tDefaults []tomlDefault) error {
 55 | 	for _, tDefault := range tDefaults {
 56 | 
 57 | 		if tDefault.Name == "" {
 58 | 			return errors.New("[[default]] with invalid or missing 'name'")
 59 | 		}
 60 | 
 61 | 		if IsAllUpper(tDefault.Name) {
 62 | 			return fmt.Errorf("[[default]] name is invalid (all uppercase): %s", tDefault.Name)
 63 | 		}
 64 | 
 65 | 		valid := false
 66 | 		switch tDefault.Value.(type) {
 67 | 		case string:
 68 | 			valid = true
 69 | 		case int32:
 70 | 			valid = true
 71 | 		case int64:
 72 | 			valid = true
 73 | 		case float32:
 74 | 			valid = true
 75 | 		case float64:
 76 | 			valid = true
 77 | 		}
 78 | 
 79 | 		if valid == false {
 80 | 			return fmt.Errorf("[[default]] invalid value type '%s' for '%s'", reflect.TypeOf(tDefault.Value), tDefault.Name)
 81 | 		}
 82 | 
 83 | 		if _, exists := pDefaults[tDefault.Name]; exists == true {
 84 | 			return fmt.Errorf("Config error: duplicate default name '%s'", tDefault.Name)
 85 | 		}
 86 | 
 87 | 		pDefaults[tDefault.Name] = tDefault.Value
 88 | 	}
 89 | 	return nil
 90 | }
 91 | 
 92 | func tomlProbeToProbe(tProbe *tomlProbe, config *Config, filename string) (*Probe, error) {
 93 | 	var probe Probe
 94 | 
 95 | 	if tProbe.Disabled == true && config.loadDisabled == false {
 96 | 		return nil, nil
 97 | 	}
 98 | 	probe.Disabled = (tProbe.Disabled == true)
 99 | 
100 | 	probe.Filename = filename
101 | 
102 | 	if tProbe.Name == "" {
103 | 		return nil, errors.New("invalid or missing 'name'")
104 | 	}
105 | 	probe.Name = tProbe.Name
106 | 
107 | 	if tProbe.Script == "" {
108 | 		return nil, errors.New("invalid or missing 'script'")
109 | 	}
110 | 
111 | 	scriptPath := path.Clean(config.configPath + "/scripts/probes/" + tProbe.Script)
112 | 	stat, err := os.Stat(scriptPath)
113 | 
114 | 	if err != nil {
115 | 		return nil, fmt.Errorf("invalid 'script' file '%s': %s", scriptPath, err)
116 | 	}
117 | 
118 | 	if !stat.Mode().IsRegular() {
119 | 		return nil, fmt.Errorf("is not a regular 'script' file '%s'", scriptPath)
120 | 	}
121 | 	probe.Script = scriptPath
122 | 
123 | 	_, err = ioutil.ReadFile(scriptPath)
124 | 	if err != nil {
125 | 		return nil, fmt.Errorf("error reading script file '%s': %s", scriptPath, err)
126 | 	}
127 | 
128 | 	if tProbe.Targets == nil {
129 | 		return nil, errors.New("no valid 'targets' parameter found")
130 | 	}
131 | 
132 | 	if len(tProbe.Targets) == 0 {
133 | 		return nil, errors.New("empty 'targets'")
134 | 	}
135 | 	// explode targets on & and check IsValidTokenName
136 | 	for _, targets := range tProbe.Targets {
137 | 		if targets == "*" {
138 | 			continue
139 | 		}
140 | 		tokens := strings.Split(targets, "&")
141 | 		for _, token := range tokens {
142 | 			ttoken := strings.TrimSpace(token)
143 | 			if !IsValidTokenName(ttoken) {
144 | 				return nil, fmt.Errorf("invalid 'target' class name '%s'", ttoken)
145 | 			}
146 | 		}
147 | 	}
148 | 	probe.Targets = tProbe.Targets
149 | 
150 | 	if tProbe.Delay.Duration == 0 {
151 | 		return nil, errors.New("invalid or missing 'delay'")
152 | 	}
153 | 
154 | 	if tProbe.Delay.Duration < (1 * time.Minute) {
155 | 		return nil, errors.New("'delay' can't be less than a minute")
156 | 	}
157 | 
158 | 	minutes := float64(tProbe.Delay.Duration) / float64(time.Minute)
159 | 	if float64(int(minutes)) != minutes {
160 | 		return nil, errors.New("'delay' granularity is in minutes (ex: 5m)")
161 | 	}
162 | 	probe.Delay = tProbe.Delay.Duration
163 | 
164 | 	if tProbe.Timeout.Duration == 0 {
165 | 		//~ return nil, errors.New("invalid or missing 'timeout'")
166 | 		tProbe.Timeout.Duration = 20 * time.Second
167 | 	}
168 | 
169 | 	if tProbe.Timeout.Duration < (1 * time.Second) {
170 | 		return nil, errors.New("'timeout' can't be less than 1 second")
171 | 	}
172 | 	probe.Timeout = tProbe.Timeout.Duration
173 | 
174 | 	// should warn about dangerous characters? (;& …)
175 | 	probe.Arguments = tProbe.Arguments
176 | 
177 | 	if tProbe.RunIf != "" {
178 | 		expr, err := govaluate.NewEvaluableExpressionWithFunctions(tProbe.RunIf, CheckFunctions)
179 | 		if err != nil {
180 | 			return nil, fmt.Errorf("invalid 'run_if' expression: %s (\"%s\")", err, tProbe.RunIf)
181 | 		}
182 | 		if vars := expr.Vars(); len(vars) > 0 {
183 | 			return nil, fmt.Errorf("undefined variable(s) in 'run_if' expression: %s", strings.Join(vars, ", "))
184 | 		}
185 | 		probe.RunIf = expr
186 | 	}
187 | 
188 | 	probe.Defaults = make(map[string]interface{})
189 | 	if err := checkTomlDefault(probe.Defaults, tProbe.Default); err != nil {
190 | 		return nil, err
191 | 	}
192 | 
193 | 	for index, tCheck := range tProbe.Check {
194 | 		var check Check
195 | 
196 | 		check.Index = index
197 | 
198 | 		if tCheck.Desc == "" {
199 | 			return nil, errors.New("[[check]] with invalid or missing 'desc'")
200 | 		}
201 | 		check.Desc = tCheck.Desc
202 | 
203 | 		if tCheck.If == "" {
204 | 			return nil, errors.New("[[check]] with invalid or missing 'if'")
205 | 		}
206 | 		expr, err := govaluate.NewEvaluableExpressionWithFunctions(tCheck.If, CheckFunctions)
207 | 		if err != nil {
208 | 			return nil, fmt.Errorf("[[check]] invalid 'if' expression: %s (\"%s\")", err, tCheck.If)
209 | 		}
210 | 		check.If = expr
211 | 
212 | 		if tCheck.Classes == nil {
213 | 			return nil, errors.New("no valid 'classes' parameter found")
214 | 		}
215 | 
216 | 		if len(tCheck.Classes) == 0 {
217 | 			return nil, errors.New("empty classes")
218 | 		}
219 | 		for _, class := range tCheck.Classes {
220 | 			if !IsValidTokenName(class) {
221 | 				return nil, fmt.Errorf("invalid class name '%s'", class)
222 | 			}
223 | 		}
224 | 		check.Classes = tCheck.Classes
225 | 
226 | 		if tCheck.NeededFailures == 0 {
227 | 			tCheck.NeededFailures = 1
228 | 		}
229 | 		check.NeededFailures = tCheck.NeededFailures
230 | 
231 | 		if tCheck.NeededSuccesses == 0 {
232 | 			tCheck.NeededSuccesses = check.NeededFailures
233 | 		}
234 | 		check.NeededSuccesses = tCheck.NeededSuccesses
235 | 
236 | 		probe.Checks = append(probe.Checks, &check)
237 | 	}
238 | 
239 | 	if miss := probe.MissingDefaults(); len(miss) > 0 {
240 | 		return nil, fmt.Errorf("missing defaults (used in 'if' expressions or 'arguments' parameter): %s", strings.Join(miss, ", "))
241 | 	}
242 | 
243 | 	return &probe, nil
244 | }
245 | 


--------------------------------------------------------------------------------
/current_fails.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"os"
  6 | 	"path"
  7 | 	"sync"
  8 | 	"time"
  9 | 
 10 | 	uuid "github.com/satori/go.uuid"
 11 | )
 12 | 
 13 | // CurrentFail type hold informations about a failure currently detected
 14 | // and not resolved yet
 15 | type CurrentFail struct {
 16 | 	FailStart time.Time
 17 | 	FailCount int
 18 | 	OkCount   int
 19 | 	UniqueID  string
 20 | 
 21 | 	// optional "payload"
 22 | 	RelatedTask  *Task // for Checks (!!)
 23 | 	RelatedHost  *Host // for Runs
 24 | 	RelatedTTask *Task // for Tasks
 25 | }
 26 | 
 27 | var (
 28 | 	currentFails      map[string]*CurrentFail
 29 | 	currentFailsMutex sync.Mutex
 30 | )
 31 | 
 32 | const statusFile string = "nosee-fails.json"
 33 | 
 34 | // CurrentFailsCreate initialize the global currentFails variable
 35 | func CurrentFailsCreate() {
 36 | 	currentFails = make(map[string]*CurrentFail)
 37 | }
 38 | 
 39 | // CurrentFailsSave dumps current alerts to disk
 40 | func CurrentFailsSave() {
 41 | 	// doing this in a go routine allows this function to be called
 42 | 	// by functions that are already locking the mutex
 43 | 	go func() {
 44 | 		currentFailsMutex.Lock()
 45 | 		defer currentFailsMutex.Unlock()
 46 | 
 47 | 		path := path.Clean(GlobalConfig.SavePath + "/" + statusFile)
 48 | 		f, err := os.Create(path)
 49 | 		if err != nil {
 50 | 			Error.Printf("can't save fails in '%s': %s (see save_path param?)", path, err)
 51 | 			return
 52 | 		}
 53 | 		defer f.Close()
 54 | 
 55 | 		enc := json.NewEncoder(f)
 56 | 		err = enc.Encode(&currentFails)
 57 | 		if err != nil {
 58 | 			Error.Printf("fails json encode: %s", err)
 59 | 			return
 60 | 		}
 61 | 		Info.Printf("current fails successfully saved to '%s'", path)
 62 | 	}()
 63 | }
 64 | 
 65 | // CurrentFailsLoad will load from disk previous "fails"
 66 | func CurrentFailsLoad() {
 67 | 	currentFailsMutex.Lock()
 68 | 	defer currentFailsMutex.Unlock()
 69 | 
 70 | 	path := path.Clean(GlobalConfig.SavePath + "/" + statusFile)
 71 | 	f, err := os.Open(path)
 72 | 	if err != nil {
 73 | 		Warning.Printf("can't read previous status: %s, no fails loaded", err)
 74 | 		return
 75 | 	}
 76 | 	defer f.Close()
 77 | 
 78 | 	dec := json.NewDecoder(f)
 79 | 	err = dec.Decode(&currentFails)
 80 | 	if err != nil {
 81 | 		Error.Printf("'%s' json decode: %s", path, err)
 82 | 	}
 83 | 	Info.Printf("'%s' loaded: %d fail(s)", path, len(currentFails))
 84 | }
 85 | 
 86 | // CurrentFailDelete deleted the CurrentFail with the given hash of the global currentFails
 87 | func CurrentFailDelete(hash string) {
 88 | 	currentFailsMutex.Lock()
 89 | 	defer currentFailsMutex.Unlock()
 90 | 	delete(currentFails, hash)
 91 | 	CurrentFailsSave()
 92 | }
 93 | 
 94 | // CurrentFailAdd adds a CurrentFail to the global currentFails using given hash
 95 | func CurrentFailAdd(hash string, failedCheck *CurrentFail) {
 96 | 	currentFailsMutex.Lock()
 97 | 	defer currentFailsMutex.Unlock()
 98 | 	currentFails[hash] = failedCheck
 99 | 	CurrentFailsSave()
100 | }
101 | 
102 | // CurrentFailInc increments FailCount of the CurrentFail with the given hash
103 | func CurrentFailInc(hash string) {
104 | 	currentFailsMutex.Lock()
105 | 	defer currentFailsMutex.Unlock()
106 | 	currentFails[hash].FailCount++
107 | 	currentFails[hash].OkCount = 0
108 | 	CurrentFailsSave()
109 | }
110 | 
111 | // CurrentFailDec increments OkCount of the CurrentFail with the given hash
112 | func CurrentFailDec(hash string) {
113 | 	currentFailsMutex.Lock()
114 | 	defer currentFailsMutex.Unlock()
115 | 	currentFails[hash].OkCount++
116 | 	CurrentFailsSave()
117 | }
118 | 
119 | // CurrentFailGetAndInc returns the CurrentFail with the given hash and
120 | // increments its FailCount. The CurrentFail is created if it does not
121 | // already exists.
122 | func CurrentFailGetAndInc(hash string) *CurrentFail {
123 | 	cf, ok := currentFails[hash]
124 | 	if !ok {
125 | 		var cf CurrentFail
126 | 		uuid := uuid.NewV4()
127 | 		cf.FailCount = 1
128 | 		cf.OkCount = 0
129 | 		cf.FailStart = time.Now()
130 | 		cf.UniqueID = uuid.String()
131 | 		CurrentFailAdd(hash, &cf)
132 | 		return &cf
133 | 	}
134 | 
135 | 	CurrentFailInc(hash)
136 | 	return cf
137 | }
138 | 
139 | // CurrentFailGetAndDec returns the CurrentFail with the given hash and
140 | // increments its OkCount
141 | func CurrentFailGetAndDec(hash string) *CurrentFail {
142 | 	cf, ok := currentFails[hash]
143 | 	if !ok {
144 | 		return nil
145 | 	}
146 | 	CurrentFailDec(hash)
147 | 	return cf
148 | }
149 | 


--------------------------------------------------------------------------------
/deploy/ssh-agent-nosee.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # If you are using SSH keys with private passphrase:
 4 | # This sample script runs an agent for the current user, creating
 5 | # a socket that the nosee service will use.
 6 | 
 7 | agent_link="$HOME/.ssh-agent-sock"
 8 | 
 9 | if [ -S "$agent_link" ]; then
10 |     echo "Agent is already here."
11 |     exit 0
12 | fi
13 | 
14 | eval $(ssh-agent -a "$agent_link")
15 | ssh-add "$HOME/keys/id_rsa_xxx"
16 | ssh-add "$HOME/keys/id_rsa_yyy"
17 | # ...
18 | 


--------------------------------------------------------------------------------
/deploy/supervisord/nosee.conf:
--------------------------------------------------------------------------------
 1 | ; Sample supervisord configuration using SSH agent
 2 | 
 3 | [program:nosee]
 4 | command=/home/nosee_server/go/bin/nosee --log-level info --log-timestamp
 5 | autostart=false
 6 | autorestart=false
 7 | user=nosee_server
 8 | ; See ssh-agent-nosee.sh
 9 | environment=SSH_AUTH_SOCK="/home/nosee_server/.ssh-agent-sock",HOME="/home/nosee_server"
10 | redirect_stderr=true
11 | stdout_logfile=/var/log/supervisor/nosee.log
12 | stdout_logfile_maxbytes=50MB
13 | 


--------------------------------------------------------------------------------
/deploy/systemd/nosee.service:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=A nosey, agentless, easy monitoring tool over SSH
 3 | After=network-online.target
 4 | 
 5 | [Service]
 6 | User={USER}
 7 | ExecStart=/home/{USER}/go/bin/nosee -c /home/{USER}/nosee/etc/ --log-level info --log-timestamp
 8 | Type=simple
 9 | Restart=on-failure
10 | Environment=SSH_AUTH_SOCK=/home/{USER}/.ssh-agent-sock
11 | 
12 | [Install]
13 | WantedBy=multi-user.target
14 | 


--------------------------------------------------------------------------------
/doc/images/howto.txt:
--------------------------------------------------------------------------------
 1 | - Using "DIagrams Through Ascii Art" (ditaa) syntax
 2 | https://github.com/stathissideris/ditaa
 3 | 
 4 | - Generated using PlantUML online demo server
 5 | http://plantuml.com/
 6 | 
 7 | @startditaa
 8 | ...
 9 | @endditaa
10 | 


--------------------------------------------------------------------------------
/doc/images/img_base.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xfennec/nosee/6dad5bbc946242dd56f53de3c26feb0bb88042e6/doc/images/img_base.png


--------------------------------------------------------------------------------
/doc/images/img_base.txt:
--------------------------------------------------------------------------------
 1 |                 +--------+
 2 |                 | Nosee  |
 3 |                 | Daemon |
 4 |                 +--+-+-+-+
 5 |                    | | |
 6 |       +------------+ | +------------+
 7 |       |              |              |
 8 |       v              v              v
 9 |      SSH            SSH            SSH
10 | +-----------+  +-----+-----+  +-----------+
11 | | Monitored |  | Monitored |  | Monitored |
12 | |    Host   |  |    Host   |  |    Host   |
13 | +-----------+  +-----------+  +-----------+
14 | 
15 |     (Only SSH server is needed on hosts)
16 | 
17 | 


--------------------------------------------------------------------------------
/doc/images/img_general.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xfennec/nosee/6dad5bbc946242dd56f53de3c26feb0bb88042e6/doc/images/img_general.png


--------------------------------------------------------------------------------
/doc/images/img_general.txt:
--------------------------------------------------------------------------------
 1 | hosts.d/        probes.d/        alerts.d/
 2 | +------+   +----------------+    +-------+
 3 | |      |   |                |    |       |
 4 | | Host +-->+     Probe      | +->| Alert |
 5 | |      |   |                | |  |       |
 6 | +------+   +--------+-------+ |  +-------+
 7 |            | Script | Check +-+
 8 |            +----+---+-------+
 9 |                 :       ^
10 |                 |       |
11 |                 |  SSH  |
12 |                 +-------+
13 |               Remote machine
14 |                (monitored)
15 | 


--------------------------------------------------------------------------------
/doc/images/img_illu.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xfennec/nosee/6dad5bbc946242dd56f53de3c26feb0bb88042e6/doc/images/img_illu.jpeg


--------------------------------------------------------------------------------
/doc/images/nosee-influxdb-grafana.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xfennec/nosee/6dad5bbc946242dd56f53de3c26feb0bb88042e6/doc/images/nosee-influxdb-grafana.png


--------------------------------------------------------------------------------
/etc/alerts.d/example.txt:
--------------------------------------------------------------------------------
 1 | ## Rename this file with ".toml" extension
 2 | 
 3 | name = "My alert"
 4 | disabled = false
 5 | 
 6 | targets = ["preprod", "linux & production"]
 7 | # to capture all check failures:
 8 | targets = ["*"]
 9 | 
10 | # command in the path or full path of a command
11 | # alert details are sent to stdin, as various env vars (see test.sh)
12 | command = "cmd"
13 | # any script in "scripts/alerts/" directory is available without any path:
14 | #command = "test.sh"
15 | 
16 | arguments = [
17 |     "arg1",
18 |     "arg2: $SUBJECT ($TYPE)",
19 | ]
20 | 
21 | # Warning: this part may change. We should probably switch to a more
22 | # generic expression here, like probe's run_if condition
23 | # This alert is only available during...
24 | hours = ["8:30 - 12:30", "14:00 - 18:00"]
25 | # sunday is 0 or 7
26 | days = [1, 2, 3, 4, 5]
27 | 
28 | 
29 | # Note: alerts listening for special class "general" can't have
30 | # such hour/day limitations
31 | 


--------------------------------------------------------------------------------
/etc/alerts.d/mail_general.toml:
--------------------------------------------------------------------------------
 1 | name = "Mail general"
 2 | disabled = false
 3 | 
 4 | targets = ["general"]
 5 | 
 6 | command = "mail"
 7 | 
 8 | arguments = [
 9 |     "-s",
10 |     "Nosee $NOSEE_SRV GENERAL: $SUBJECT",
11 |     "user@domain.tld"
12 | ]
13 | 


--------------------------------------------------------------------------------
/etc/alerts.d/nosee-console.toml:
--------------------------------------------------------------------------------
1 | name = "Nosee console"
2 | #disabled = true
3 | 
4 | targets = ["*"]
5 | 
6 | command = "nosee-console.sh"
7 | 
8 | arguments = ["http://localhost:8080/alerts"]
9 | 


--------------------------------------------------------------------------------
/etc/hosts.d/example.txt:
--------------------------------------------------------------------------------
 1 | ## Rename this file with ".toml" extension
 2 | 
 3 | # "name" is a key for the database. Change it and it'll become another host!
 4 | name = "My Host"
 5 | classes = ["linux", "http", "testing"]
 6 | disabled = false
 7 | 
 8 | [network]
 9 | host = "192.168.0.1"
10 | port = 22
11 | # Nosee defaults to sensible ciphers, but you may want to specify older
12 | # ciphers (at your own risk) for compatibility:
13 | #ciphers = ["arcfouraa", "aes128-cbc"]
14 | 
15 | [auth]
16 | user = "user"
17 | 
18 | # (password) OR (key) OR (key + passphrase) OR (ssh_agent) OR (ssh_agent + key)
19 | 
20 | password = "mypassword"
21 | 
22 | key = "/home/xxx/.ssh/id_rsa_sample"
23 | key_passphrase = "mypassphrase"
24 | 
25 | ssh_agent = true
26 | # If you don't want to test every single key in the agent, give the
27 | # corresponding public key:
28 | #pubkey = "/home/xxx/.ssh/id_rsa_sample.pub"
29 | 
30 | # you can override probe defaults for a specific host:
31 | [[default]]
32 | name = "warn_ping_latency"
33 | value = 10
34 | 
35 | # or defaults for a probe:
36 | [[default]]
37 | name = "ifband_interface"
38 | value = "enp1s0f0"
39 | 


--------------------------------------------------------------------------------
/etc/hosts.d/test.toml:
--------------------------------------------------------------------------------
 1 | name = "Development server"
 2 | classes = ["linux", "development"]
 3 | 
 4 | [network]
 5 | host = "192.168.0.41"
 6 | port = 22
 7 | 
 8 | [auth]
 9 | user = "deploy"
10 | key = "/home/user/.ssh/id_rsa_devsrv"
11 | 


--------------------------------------------------------------------------------
/etc/nosee.toml:
--------------------------------------------------------------------------------
 1 | # global configuration for Nosee
 2 | 
 3 | # Nosee server name (useful if you have multiple Nosee servers)
 4 | # default: ""
 5 | #name="Test"
 6 | 
 7 | # This option helps to ease the global load induced by all SSH connections.
 8 | # default: 15s
 9 | #start_time_spread = "15s"
10 | 
11 | # Maximum connection time for a SSH connection. (will trigger a "general" class alert)
12 | # default: 10s
13 | #ssh_connection_time_warn = "6s"
14 | 
15 | # Currently, nosee will look at $HOME/.ssh/known_hosts for host fingerprints,
16 | # unless you set this to true, accepting blindly any fingerprint.
17 | # This is a potential security issue. (MitM attack)
18 | #ssh_blindtrust_fingerprints = false
19 | 
20 | # Path to save current fails so Nosee can be restarted without losing status
21 | # (see nosee-fails.json file)
22 | # default: "./"
23 | #save_path = "/home/user/.nosee/"
24 | 
25 | # Nosee will regularly execute all "scripts/heartbeats" as a keepalive
26 | # default: 30s
27 | #heartbeat_delay = "5s"
28 | 


--------------------------------------------------------------------------------
/etc/probes.d/apache_modstatus.toml:
--------------------------------------------------------------------------------
1 | name = "Apache mod_status"
2 | targets = ["linux & mod_status"]
3 | 
4 | script = "apache_modstatus.sh"
5 | 
6 | delay = "1m"
7 | timeout = "5s"
8 | 


--------------------------------------------------------------------------------
/etc/probes.d/backup_daily.toml:
--------------------------------------------------------------------------------
 1 | name = "daily backup"
 2 | targets = ["linux & backupd"]
 3 | #disabled = true
 4 | 
 5 | script = "backup.sh"
 6 | arguments = "$start_file $ok_file"
 7 | 
 8 | delay = "30m"
 9 | timeout = "8s"
10 | 
11 | ### Default values
12 | 
13 | [[default]]
14 | name = "start_file"
15 | value = "/tmp/backup.start"
16 | 
17 | [[default]]
18 | name = "ok_file"
19 | value = "/tmp/backup.ok"
20 | 
21 | [[default]]
22 | name = "backup_margin_hours"
23 | value = 3
24 | 
25 | [[default]]
26 | name = "backup_duration_warn"
27 | value = 5
28 | 
29 | ### Checks
30 | 
31 | [[check]]
32 | desc = "backup too old"
33 | if = "LAST_OK_HOURS > (24+backup_margin_hours)"
34 | classes = ["critical"]
35 | 
36 | [[check]]
37 | desc = "backup too long"
38 | if = "LAST_DURATION_HOURS > backup_duration_warn"
39 | classes = ["warning"]
40 | 


--------------------------------------------------------------------------------
/etc/probes.d/backup_week.toml:
--------------------------------------------------------------------------------
 1 | name = "backup check"
 2 | targets = ["linux & backupw"]
 3 | #disabled = true
 4 | 
 5 | script = "backup.sh"
 6 | arguments = "$start_file $ok_file"
 7 | 
 8 | #              +------+                      +-----
 9 | # exp:     27h |  ""  | 27h              27h |
10 | #        +------+    +------+          +------+
11 | #        |     ||    ||     |          |     ||
12 | #       +#.--#+#.--#+#.---+-.---+----#+#---#+#.--#+
13 | # enab: ****************-----------------**********
14 | #         Thu   Fri   Sat   Sun   Mon   Tue   Wen
15 | # dow:     4     5     6     0     1     2     3
16 | 
17 | run_if = """
18 |     (date('dow') == 3 || date('dow') == 4 || date('dow') == 5) ||
19 |     (date('dow') == 6 && date('time') <= 8) ||
20 |     (date('dow') == 2 && date('time') >= 8)
21 | """
22 | 
23 | delay = "30m"
24 | timeout = "8s"
25 | 
26 | ### Default values
27 | 
28 | [[default]]
29 | name = "start_file"
30 | value = "/tmp/backup.start"
31 | 
32 | [[default]]
33 | name = "ok_file"
34 | value = "/tmp/backup.ok"
35 | 
36 | [[default]]
37 | name = "backup_margin_hours"
38 | value = 3
39 | 
40 | [[default]]
41 | name = "backup_duration_warn"
42 | value = 5
43 | 
44 | ### Checks
45 | 
46 | [[check]]
47 | desc = "backup too old"
48 | if = "LAST_OK_HOURS > (24+backup_margin_hours)"
49 | classes = ["critical"]
50 | 
51 | [[check]]
52 | desc = "backup too long"
53 | if = "LAST_DURATION_HOURS > backup_duration_warn"
54 | classes = ["warning"]
55 | 


--------------------------------------------------------------------------------
/etc/probes.d/cert_example.toml:
--------------------------------------------------------------------------------
 1 | name = "example.com certificate validity"
 2 | targets = ["example_com"]
 3 | #disabled = true
 4 | 
 5 | script = "cert_check.sh"
 6 | arguments = "/etc/pki/tls/certs/example.com.crt 15"
 7 | 
 8 | delay = "60m"
 9 | timeout = "5s"
10 | 
11 | ### Checks
12 | 
13 | [[check]]
14 | desc = "certificate will expire soon"
15 | if = "WILL_EXPIRE != 0"
16 | classes = ["warning"]
17 | 


--------------------------------------------------------------------------------
/etc/probes.d/cpu_lms_temp.toml:
--------------------------------------------------------------------------------
 1 | name = "CPU lm_sensors temperature"
 2 | targets = ["linux & lm_sensors"]
 3 | 
 4 | script = "cpu_lms_temp.sh"
 5 | 
 6 | delay = "1m"
 7 | timeout = "5s"
 8 | 
 9 | ### Checks
10 | 
11 | [[check]]
12 | desc = "high CPU temperature"
13 | if = "TEMP > HIGH"
14 | classes = ["warning"]
15 | needed_failures = 2
16 | 
17 | [[check]]
18 | desc = "critical CPU temperature"
19 | if = "TEMP > CRIT"
20 | classes = ["critical"]
21 | 


--------------------------------------------------------------------------------
/etc/probes.d/cpu_temp.toml:
--------------------------------------------------------------------------------
 1 | ## Sample probe
 2 | 
 3 | name = "CPU temperature"
 4 | targets = ["linux"]
 5 | disabled = false
 6 | 
 7 | script = "cpu_temp.sh"
 8 | arguments = "0"
 9 | 
10 | delay = "2m"
11 | timeout = "5s"
12 | 
13 | ### Default values
14 | # types: int, float, string
15 | 
16 | [[default]]
17 | name = "warn_cpu_temp"
18 | value = 75
19 | 
20 | [[default]]
21 | name = "error_cpu_temp"
22 | value = 85
23 | 
24 | ### Checks
25 | 
26 | [[check]]
27 | desc = "high CPU0 temperature"
28 | if = "TEMP > warn_cpu_temp"
29 | classes = ["warning"]
30 | needed_failures = 2
31 | 
32 | [[check]]
33 | desc = "critical CPU0 temperature"
34 | if = "TEMP > error_cpu_temp"
35 | classes = ["critical"]
36 | 


--------------------------------------------------------------------------------
/etc/probes.d/curl_expect_example.toml:
--------------------------------------------------------------------------------
 1 | name = "example.com Website"
 2 | targets = ["linux & example"]
 3 | 
 4 | script = "curl_expect.sh"
 5 | arguments = "http://example.com/ 'used for illustrative examples'"
 6 | 
 7 | delay = "5m"
 8 | timeout = "20s"
 9 | 
10 | ### Checks
11 | 
12 | [[check]]
13 | desc = "can't find expected content"
14 | if = "FOUND_EXPECTED != 1"
15 | classes = ["critical"]
16 | 


--------------------------------------------------------------------------------
/etc/probes.d/df.toml:
--------------------------------------------------------------------------------
 1 | name = "disk free"
 2 | targets = ["linux"]
 3 | #disabled = true
 4 | 
 5 | script = "df.sh"
 6 | 
 7 | delay = "30m"
 8 | timeout = "8s"
 9 | 
10 | ### Default values
11 | 
12 | [[default]]
13 | name = "df_warn_perc"
14 | value = 95
15 | 
16 | ### Checks
17 | 
18 | [[check]]
19 | desc = "disk almost full"
20 | if = "FULLEST_PERC > df_warn_perc"
21 | classes = ["warning"]
22 | 


--------------------------------------------------------------------------------
/etc/probes.d/example.txt:
--------------------------------------------------------------------------------
 1 | ## Rename this file with ".toml" extension
 2 | 
 3 | name="My Probe"
 4 | 
 5 | script = "script.sh"
 6 | disabled = false
 7 | 
 8 | targets = ["linux & test", "windows & test"]
 9 | # If you want to match all hosts (all classes):
10 | # targets = ["*"]
11 | 
12 | # probe repetition delay (must be minutes "dead" [not 2m30, for instance])
13 | # minimum value: 1m
14 | delay = "5m"
15 | 
16 | # if the probes takes more than this time, it will trigger an error
17 | # default: 20s
18 | timeout = "30s"
19 | 
20 | # check only between 8:00 and 18:00
21 | run_if = "date('time') >= 8 && date('time') <= 18"
22 | 
23 | ### Default values (used by checks)
24 | # types: int, float, string
25 | # not "all uppercase" (reserved for probe values)
26 | 
27 | [[default]]
28 | name = "value_foo"
29 | value = 0.90
30 | 
31 | [[default]]
32 | name = "value_bar"
33 | value = "200 OK"
34 | 
35 | ### Checks
36 | 
37 | [[check]]
38 | desc = "check description"
39 | if = "VALUE1_FROM_SCRIPT > value_foo"
40 | classes = ["critical"]
41 | # will trigger alert if append two times (default: 1)
42 | needed_failures = 2
43 | # will delete the "suspicion" if check is OK three times (default: needed_failures)
44 | needed_successes = 3
45 | 
46 | [[check]]
47 | desc = "check description"
48 | if = "VALUE1_FROM_SCRIPT+VALUE2_FROM_SCRIPT < value_foo"
49 | classes = ["warning"]
50 | 


--------------------------------------------------------------------------------
/etc/probes.d/ifband.toml:
--------------------------------------------------------------------------------
 1 | name = "bandwidth"
 2 | targets = ["linux & ifband"]
 3 | #disabled = true
 4 | 
 5 | script = "ifband.sh"
 6 | arguments = "$ifband_interface"
 7 | 
 8 | delay = "1m"
 9 | timeout = "5s"
10 | 
11 | ### Default values
12 | 
13 | [[default]]
14 | name = "ifband_interface"
15 | value = "eth0"
16 | 


--------------------------------------------------------------------------------
/etc/probes.d/load.toml:
--------------------------------------------------------------------------------
 1 | name = "system load"
 2 | targets = ["linux"]
 3 | #disabled = true
 4 | 
 5 | script = "load.sh"
 6 | arguments = "$load_normal_cmd"
 7 | 
 8 | delay = "1m"
 9 | timeout = "8s"
10 | 
11 | ### Default values
12 | 
13 | [[default]]
14 | name = "load_normal_cmd"
15 | value = "/root/backup.sh"
16 | 
17 | [[default]]
18 | name = "load_margin"
19 | value = 0
20 | 
21 | ### Checks
22 | 
23 | [[check]]
24 | desc = "heavy system load"
25 | if = "LOAD > (CPU_COUNT+load_margin) && PROG_DETECTED == 0"
26 | classes = ["warning"]
27 | needed_failures = 2
28 | 


--------------------------------------------------------------------------------
/etc/probes.d/mdstat.toml:
--------------------------------------------------------------------------------
 1 | name = "Linux md-raid states"
 2 | targets = ["linux"]
 3 | #disabled = true
 4 | 
 5 | script = "mdstat.sh"
 6 | 
 7 | delay = "5m"
 8 | timeout = "15s"
 9 | 
10 | ### Checks
11 | 
12 | [[check]]
13 | desc = "md-raid failure"
14 | if = "ERR_ARRAYS > 0"
15 | classes = ["critical"]
16 | 


--------------------------------------------------------------------------------
/etc/probes.d/mem.toml:
--------------------------------------------------------------------------------
 1 | name = "memory (RAM and swap)"
 2 | 
 3 | script = "mem.sh"
 4 | disabled = false
 5 | 
 6 | targets = ["linux", "windows"]
 7 | 
 8 | delay = "5m"
 9 | # WMI can be veeeery slow :(
10 | timeout = "30s"
11 | 
12 | ### Default values
13 | # types: int, float, string
14 | # not "all uppercase" (reserved for probe values)
15 | [[default]]
16 | name = "min_available_ratio"
17 | value = 0.20
18 | 
19 | [[default]]
20 | name = "warn_swap_ratio"
21 | value = 0.30
22 | 
23 | ### Checks
24 | 
25 | [[check]]
26 | desc = "critical available memory ratio"
27 | if = "MEM_AVAILABLE_RATIO < min_available_ratio"
28 | classes = ["critical"]
29 | 
30 | [[check]]
31 | desc = "high swap usage ratio"
32 | if = "SWAP_USED_RATIO > warn_swap_ratio"
33 | classes = ["warning"]
34 | 


--------------------------------------------------------------------------------
/etc/probes.d/ping.toml:
--------------------------------------------------------------------------------
 1 | name = "ping to router"
 2 | targets = ["linux"]
 3 | #disabled = true
 4 | 
 5 | script = "ping.sh"
 6 | arguments = "192.168.0.250"
 7 | 
 8 | delay = "1m"
 9 | timeout = "8s"
10 | 
11 | ### Default values
12 | 
13 | [[default]]
14 | name = "err_ping_loss"
15 | value = 1
16 | 
17 | [[default]]
18 | name = "warn_ping_latency"
19 | value = 1
20 | 
21 | ### Checks
22 | 
23 | [[check]]
24 | desc = "critical ping loss"
25 | if = "LOSS_PERC >= err_ping_loss"
26 | classes = ["critical"]
27 | needed_failures = 2
28 | 
29 | [[check]]
30 | desc = "ping latency"
31 | if = "AVG_MS > warn_ping_latency"
32 | classes = ["warning"]
33 | needed_failures = 2
34 | 


--------------------------------------------------------------------------------
/etc/probes.d/port_80.toml:
--------------------------------------------------------------------------------
 1 | name = "HTTP port"
 2 | targets = ["linux & http"]
 3 | #disabled = true
 4 | 
 5 | script = "port.sh"
 6 | arguments = "80"
 7 | 
 8 | delay = "1m"
 9 | 
10 | ### Checks
11 | 
12 | [[check]]
13 | desc = "port 80 is not open"
14 | if = "OPEN != 1"
15 | classes = ["critical"]
16 | 


--------------------------------------------------------------------------------
/etc/probes.d/systemd_httpd.toml:
--------------------------------------------------------------------------------
 1 | name = "Apache status (systemd)"
 2 | targets = ["linux & systemd & apache"]
 3 | #disabled = true
 4 | 
 5 | script = "systemctl_status.sh"
 6 | arguments = "httpd.service"
 7 | 
 8 | delay = "1m"
 9 | timeout = "5s"
10 | 
11 | ### Checks
12 | 
13 | [[check]]
14 | desc = "Apache status"
15 | if = "STATUS != 'active'"
16 | classes = ["critical"]
17 | 


--------------------------------------------------------------------------------
/etc/scripts/alerts/nosee-console.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$1" ]; then
 4 |     (>&2 echo "ERROR: give nosee console URL (ex: http://localhost:8080/alerts)")
 5 |     exit 1
 6 | fi
 7 | 
 8 | DETAILS=$(cat)
 9 | 
10 | curl -s -f -w "HTTP Code %{http_code}\n" \
11 |     --form-string "type=$TYPE" \
12 |     --form-string "subject=$SUBJECT" \
13 |     --form-string "details=$DETAILS" \
14 |     --form-string "classes=$CLASSES" \
15 |     --form-string "hostname=$HOST_NAME" \
16 |     --form-string "nosee_srv=$NOSEE_SRV" \
17 |     --form-string "uniqueid=$UNIQUEID" \
18 |     --form-string "datetime=$DATETIME" \
19 |     "$1"
20 | 
21 | if [ $? -ne 0 ]; then
22 |     exit 1
23 | fi
24 | 


--------------------------------------------------------------------------------
/etc/scripts/alerts/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Test script to show all input channels
 4 | 
 5 | file="/tmp/remove_me"
 6 | 
 7 | echo "stdout test"
 8 | (>&2 echo "stderr test")
 9 | 
10 | date > $file
11 | echo "$0" >> $file
12 | echo "$1" >> $file
13 | echo "$2" >> $file
14 | echo "$3" >> $file
15 | echo "$4" >> $file
16 | 
17 | echo "$SUBJECT" >> $file
18 | echo $USER >> $file
19 | echo $TYPE >> $file
20 | echo $NOSEE_SRV >> $file
21 | 
22 | # stdin is $DETAILS
23 | cat >> $file
24 | echo $HOME >> $file
25 | 


--------------------------------------------------------------------------------
/etc/scripts/heartbeats/nosee-console.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # nosee console heartbeat URL
 4 | url="http://localhost:8080/heartbeat"
 5 | 
 6 | # NOSEE_SRV, VERSION, DATETIME, STARTTIME, UPTIME
 7 | 
 8 | curl -s -f -w "HTTP Code %{http_code}\n" \
 9 |     --form-string "uptime=$UPTIME" \
10 |     --form-string "server=$NOSEE_SRV" \
11 |     --form-string "version=$VERSION" \
12 |     "$url"
13 | 
14 | if [ $? -ne 0 ]; then
15 |     exit 1
16 | fi
17 | 


--------------------------------------------------------------------------------
/etc/scripts/loggers/influxdb.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | host=${HOST_FILE%.toml}
 4 | 
 5 | # input lines looks like:
 6 | # df.toml;DISK_FULLEST_PERC;27
 7 | res=$(cat | awk -v host=$host -F\; '{
 8 |     probe=$1
 9 |     key=$2
10 |     val=$3
11 |     sub(/\.toml$/, "", probe)
12 |     measurement=sprintf("%s_%s", probe, key)
13 |     if (val ~ /[0-9.]/)
14 | 	printf("%s,host=%s value=%s\n", measurement,host,val)
15 |     else
16 | 	printf("%s,host=%s value=\"%s\"\n", measurement,host,val)
17 | }')
18 | 
19 | curl -i -XPOST 'http://localhost:8086/write?db=nosee' --data-binary "$res"
20 | 


--------------------------------------------------------------------------------
/etc/scripts/probes/apache_modstatus.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Server must have mod_status loaded and configured with something like:
 4 | #<Location /server-status>
 5 | #    SetHandler server-status
 6 | #    Order deny,allow
 7 | #    Deny from all
 8 | #    Allow from 127.0.0.1 ::1
 9 | #</Location>
10 | 
11 | # ExtendedStatus must be set to On (default since Apache 2.3.6)
12 | 
13 | stat_file="$HOME/.apache-modstatus"
14 | NOW=$(date +%s)
15 | 
16 | page=$(curl --silent -f "http://localhost/server-status?auto")
17 | if [ $? -ne 0 ]; then
18 |     (>&2 echo "ERROR: unable to get status (mod_status OK on localhost?)")
19 |     exit 1
20 | fi
21 | 
22 | requests=$(echo "$page" | grep '^Total Accesses' | awk -F ': ' '{print $2}')
23 | kbytes=$(echo "$page" | grep '^Total kBytes' | awk -F ': ' '{print $2}')
24 | 
25 | LAST_CALL=$NOW
26 | LAST_REQUESTS=$requests
27 | LAST_KBYTES=$kbytes
28 | if [ -f $stat_file ]; then
29 | . $stat_file
30 | fi
31 | 
32 | REQUESTS=$requests
33 | KBYTES=$kbytes
34 | 
35 | time_diff=$(echo $LAST_CALL $NOW | awk '{print ($2 - $1)}')
36 | requests_diff=$(echo $LAST_REQUESTS $REQUESTS | awk '{print ($2 - $1)}')
37 | kbytes_diff=$(echo $LAST_KBYTES $KBYTES | awk '{print ($2 - $1)}')
38 | 
39 | if [ $time_diff -eq 0 ]; then
40 |     RPS=0
41 |     KBPS=0
42 | else
43 |     RPS=$(echo $requests_diff $time_diff | awk '{t=$1/$2; printf ("%f", (t>0?t:0))}')
44 |     KBPS=$(echo $kbytes_diff $time_diff | awk '{t=$1/$2; printf ("%f", (t>0?t:0))}')
45 | fi
46 | 
47 | 
48 | echo > $stat_file
49 | echo "LAST_CALL=$NOW" >> $stat_file
50 | echo "LAST_REQUESTS=$REQUESTS" >> $stat_file
51 | echo "LAST_KBYTES=$KBYTES" >> $stat_file
52 | 
53 | echo RPS: $RPS
54 | echo KBPS: $KBPS
55 | 


--------------------------------------------------------------------------------
/etc/scripts/probes/backup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # ex: backup.sh /tmp/backup.start /tmp/backup.ok
 4 | 
 5 | if [ -z "$2" ]; then
 6 |     (>&2 echo "ERROR: give 'start' flag file and 'ok' flag file")
 7 |     exit 1
 8 | fi
 9 | 
10 | start_file="$1"
11 | ok_file="$2"
12 | 
13 | if [ ! -f "$start_file" ]; then
14 |     (>&2 echo "ERROR: can't read start file '$start_file'")
15 |     exit 1
16 | fi
17 | if [ ! -f "$ok_file" ]; then
18 |     (>&2 echo "ERROR: can't read ok file '$ok_file'")
19 |     exit 1
20 | fi
21 | 
22 | ok_tmsp=$(date +%s -r "$ok_file")
23 | start_tmsp=$(date +%s -r "$start_file")
24 | now=$(date +%s)
25 | 
26 | last_ok_hours=$(echo $ok_tmsp $now | awk '{ diff=$2-$1; print diff/60/60 }')
27 | last_duration=$(echo $start_tmsp $ok_tmsp | awk '{
28 |     diff=$2-$1;
29 |     if (diff > 0)
30 | 	print diff/60/60
31 |     else
32 | 	print 0
33 | }')
34 | 
35 | echo "LAST_OK_HOURS:" $last_ok_hours
36 | echo "LAST_DURATION_HOURS:" $last_duration
37 | 


--------------------------------------------------------------------------------
/etc/scripts/probes/cert_check.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$2" ]; then
 4 |     (>&2 echo "ERROR: give certificate path and 'days to expire'")
 5 |     (>&2 echo "ERROR: Usage: $0 /etc/pki/tls/certs/myweb.crt 15")
 6 |     exit 1
 7 | fi
 8 | 
 9 | cert_path=$1
10 | days_to_expire=$2
11 | 
12 | timestamp=$(echo $(($days_to_expire*24*60*60)))
13 | 
14 | openssl x509 -checkend $timestamp -noout -in "$1"
15 | res=$?
16 | 
17 | echo "WILL_EXPIRE:" $res
18 | 


--------------------------------------------------------------------------------
/etc/scripts/probes/cpu_lms_temp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # this script use lm_sensors to average temperature of all CPU cores
 4 | # Required sensors output format:
 5 | # ...
 6 | # Core 0:         +33.0°C  (high = +82.0°C, crit = +102.0°C)
 7 | # Core 1:         +32.0°C  (high = +82.0°C, crit = +102.0°C)
 8 | 
 9 | sensors | awk '
10 | BEGIN {
11 |     total = 0
12 |     cores = 0
13 |     high = 999
14 |     crit = 999
15 | }
16 | /^Core/ {
17 |     if (match($0, /\+([0-9.]+)°C.*\+([0-9.]+)°C,.*\+([0-9.]+)°C/, g) > 0) {
18 | 	total += g[1]
19 | 	high = (g[2] < high ? g[2] : high)
20 | 	crit = (g[3] < crit ? g[3] : crit)
21 | 	cores++
22 |     } else if (match($0, /\+([0-9.]+)°C/, g) > 0) {
23 | 	total += g[1]
24 | 	cores++
25 |     }
26 | }
27 | END {
28 |     printf("TEMP: %f\n", total / cores)
29 |     printf("HIGH: %f\n", high)
30 |     printf("CRIT: %f\n", crit)
31 | }
32 | '
33 | 


--------------------------------------------------------------------------------
/etc/scripts/probes/cpu_temp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$1" ]; then
 4 |     (>&2 echo "ERROR: give thermal zone number (ex: 0)")
 5 |     exit 1
 6 | fi
 7 | 
 8 | file="/sys/class/thermal/thermal_zone$1/temp"
 9 | 
10 | if [ ! -f "$file" ]; then
11 |     (>&2 echo "ERROR: invalid path: $file")
12 |     exit 2
13 | fi
14 | 
15 | val=$(cat "$file")
16 | temp=$(awk "BEGIN {print $val/1000}")
17 | echo "TEMP:" $temp
18 | 


--------------------------------------------------------------------------------
/etc/scripts/probes/curl.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # the URL must display usual "KEY: val\nKEY2: val2" format
 4 | 
 5 | if [ -z "$1" ]; then
 6 |     (>&2 echo "ERROR: give URL")
 7 |     exit 1
 8 | fi
 9 | 
10 | curl --max-time 15 --silent -f "$1"
11 | 


--------------------------------------------------------------------------------
/etc/scripts/probes/curl_expect.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$2" ]; then
 4 |     (>&2 echo "ERROR: give URL and an expected string")
 5 |     (>&2 echo "Usage example: $0 'http://www.perdu.com/' 'Pas de panique'")
 6 |     exit 1
 7 | fi
 8 | 
 9 | url=$1
10 | expected=$2
11 | 
12 | status=0
13 | 
14 | page=$(curl --max-time 15 --silent -f "$url")
15 | if [ $? -eq 0 ]; then
16 |     n=$(echo "$page" | grep "$expected" | wc -l)
17 |     if [ $n -gt 0 ]; then
18 | 	status=1
19 |     fi
20 | fi
21 | 
22 | echo "FOUND_EXPECTED:" $status
23 | 


--------------------------------------------------------------------------------
/etc/scripts/probes/df.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | lines=$(df -kP | grep '^/dev/' | grep -v '[[:space:]]/mnt/' | grep -v '[[:space:]]/snap/')
 4 | fullest=$(echo "$lines" | awk '{print $5}' | cut -d% -f1 | sort -n | tail -n1)
 5 | 
 6 | echo "FULLEST_PERC:" $fullest
 7 | 
 8 | all=$(echo "$lines" | awk '{print $5,$6}')
 9 | while read -r line; do
10 |     dfree=$(echo "$line" | awk '{print $1}' | cut -d% -f1)
11 |     name=$(echo "$line" | awk '{print $2}')
12 |     name=$(echo "$name" | sed 's#/#_#g' |sed 's/-/_/' | sed 's/^_//')
13 |     if [ -z "$name" ]; then
14 | 	name="ROOT"
15 |     fi
16 |     echo "DF_${name^^}_PERC:" $dfree
17 | done <<< "$all"
18 | 


--------------------------------------------------------------------------------
/etc/scripts/probes/ifband.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | interface=$1
 4 | if_dir="/sys/class/net/$interface/statistics"
 5 | stat_file="$HOME/.ifband-$interface"
 6 | NOW=$(date +%s)
 7 | 
 8 | if [ -z "$1" ]; then
 9 |     (>&2 echo "USAGE: $0 interface-name")
10 |     exit 1
11 | fi
12 | 
13 | if [ ! -d $if_dir ]; then
14 |     (>&2 echo "ERROR: unable to find $interface stats")
15 |     exit 1
16 | fi
17 | 
18 | LAST_CALL=$NOW
19 | LAST_RX=$(cat $if_dir/rx_bytes)
20 | LAST_TX=$(cat $if_dir/tx_bytes)
21 | 
22 | if [ -f $stat_file ]; then
23 | . $stat_file
24 | fi
25 | 
26 | RX=$(cat $if_dir/rx_bytes)
27 | TX=$(cat $if_dir/tx_bytes)
28 | 
29 | time_diff=$(echo $LAST_CALL $NOW | awk '{print ($2 - $1)}')
30 | rx_diff=$(echo $LAST_RX $RX | awk '{print ($2 - $1)}')
31 | tx_diff=$(echo $LAST_TX $TX | awk '{print ($2 - $1)}')
32 | 
33 | #echo $time_diff $rx_diff $tx_diff
34 | if [ $time_diff -eq 0 ]; then
35 |     RX_KBPS=0
36 |     TX_KBPS=0
37 | else
38 |     RX_KBPS=$(echo $rx_diff $time_diff | awk '{printf ("%i", $1 / $2 / 1024)}')
39 |     TX_KBPS=$(echo $tx_diff $time_diff | awk '{printf ("%i", $1 / $2 / 1024)}')
40 | fi
41 | 
42 | if [ $RX_KBPS -le 0 ]; then
43 |     RX_KBPS=0
44 | fi
45 | if [ $TX_KBPS -le 0 ]; then
46 |     TX_KBPS=0
47 | fi
48 | 
49 | echo > $stat_file
50 | echo "LAST_CALL=$NOW" >> $stat_file
51 | echo "LAST_RX=$RX" >> $stat_file
52 | echo "LAST_TX=$TX" >> $stat_file
53 | 
54 | echo RX_KBPS: $RX_KBPS
55 | echo TX_KBPS: $TX_KBPS
56 | 
57 | 


--------------------------------------------------------------------------------
/etc/scripts/probes/load.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # load.sh [prog1] [prog2] [script3] [...]
 4 | # will return "PROG_DETECTED: 1" if any
 5 | # of the prog/script is found ("my load is high
 6 | # but my backup is running, so it's ok")
 7 | 
 8 | # CentOS 6/7 have a minimalist PATH on non-login SSH connections
 9 | # and 'pidof' is often hosted in /sbin
10 | PATH=$PATH:/sbin
11 | 
12 | if [ -f /proc/loadavg ]; then
13 |     load=$(awk '{print $1}' /proc/loadavg)
14 | else
15 |     load_field=$(LANG=C uptime | awk -F, '{print $(NF-2)}')
16 |     load=$(echo "$load_field" | awk -F: '{print $2}')
17 | fi
18 | 
19 | detected=0
20 | if [ -n $2 ]; then
21 |     while [ ${#} -gt 0 ]; do
22 |         pidof -x "$1" > /dev/null
23 |         if [ $? -eq 0 ]; then
24 |             detected=1
25 |         fi
26 |         shift
27 |     done
28 | fi
29 | 
30 | echo "LOAD:" $load
31 | echo "CPU_COUNT:" $(nproc)
32 | echo "PROG_DETECTED:" $detected
33 | 


--------------------------------------------------------------------------------
/etc/scripts/probes/load_win.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $(uname -o) != "Cygwin" ]; then
 4 |     (>&2 echo "Cygwin needed")
 5 |     exit 1
 6 | fi
 7 | 
 8 | pql=$(wmic path Win32_PerfFormattedData_PerfOS_System get ProcessorQueueLength | awk 'NR==2')
 9 | echo "CPU_QUEUE_LEN:" $pql
10 | 
11 | # select PercentProcessorTime from Win32_PerfFormattedData_PerfOS_Processor where Name = '_Total'
12 | 
13 | #p=$(wmic path Win32_PerfFormattedData_PerfOS_System get PercentProcessorQueueLength | awk 'NR==2')
14 | #echo "CPU_QUEUE_LEN:" $pql
15 | 
16 | ppt=$(wmic path Win32_PerfFormattedData_PerfOS_Processor where "Name = '_Total'" get PercentProcessorTime | awk 'NR==2')
17 | echo CPU_PERCENT: $ppt
18 | 
19 | lp=$(wmic cpu get loadpercentage | awk 'NR==2')
20 | echo CPU_LOAD_PERCENT: $lp
21 | 
22 | pdt=$(wmic path Win32_PerfFormattedData_PerfDisk_PhysicalDisk where "Name='_Total'" get PercentDiskTime | awk 'NR==2')
23 | echo DISK_PERCENT: $pdt
24 | 


--------------------------------------------------------------------------------
/etc/scripts/probes/mdstat.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | mdstat="/proc/mdstat"
 4 | 
 5 | if [ ! -f "$mdstat" ]; then
 6 |     (>&2 echo "ERROR: cant find md RAID support ($mdstat)")
 7 |     exit 1
 8 | fi
 9 | 
10 | fcount=$(grep -c "\[.*_.*\]" $mdstat)
11 | 
12 | echo "ERR_ARRAYS:" $fcount
13 | 


--------------------------------------------------------------------------------
/etc/scripts/probes/mem.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # is MemAvailable supported?
 4 | ma_supported=$(grep "MemAvailable:" /proc/meminfo)
 5 | 
 6 | function meminfo_fmt() {
 7 |     val=$(grep "^$1:" /proc/meminfo)
 8 |     val=$(echo "$val" | awk '{printf("%i\n", $2/1024)}')
 9 |     echo $val
10 | }
11 | 
12 | if [ -z "$ma_supported" ]; then
13 |     mem=$(free -m | grep '^Mem')
14 |     swap=$(free -m | grep '^Swap')
15 | 
16 |     mem_total_mb=$(echo $mem | cut -d\  -f2)
17 |     mem_free_mb=$(echo $mem | cut -d\  -f4)
18 | 
19 |     mem_cached_mb=$(echo $mem | cut -d\  -f7)
20 |     mem_buffers_mb=$(echo $mem | cut -d\  -f6)
21 |     mem_buffcache_mb=$(($mem_cached_mb + $mem_buffers_mb))
22 | 
23 |     mem_hardused_mb=$(echo "$mem" | awk '{printf("%.2f\n", $3-$5-$6-$7);}')
24 |     mem_hardused_ratio=$(echo $mem_hardused_mb $mem_total_mb | awk '{printf("%.2f", $1/$2);}')
25 | 
26 |     mem_available_mb=$(($mem_free_mb + $mem_buffcache_mb))
27 | 
28 |     swap_total_mb=$(echo $swap | cut -d\  -f2)
29 |     swap_free_mb=$(echo $swap | cut -d\  -f4)
30 |     swap_used_mb=$(echo $swap | cut -d\  -f3)
31 |     if [ $swap_total_mb -eq 0 ]; then
32 |         swap_used_ratio=0
33 |     else
34 |         swap_used_ratio=$(echo "$swap" | awk '{printf("%.2f\n", $3/$2);}')
35 |     fi
36 | else
37 |     mem_total_mb=$(meminfo_fmt MemTotal)
38 |     mem_available_mb=$(meminfo_fmt MemAvailable)
39 |     mem_hardused_mb=$(( $mem_total_mb - $mem_available_mb ))
40 |     mem_hardused_ratio=$(echo $mem_hardused_mb $mem_total_mb | awk '{printf("%.2f", $1/$2);}')
41 |     mem_buffers_mb=$(meminfo_fmt Buffers)
42 |     mem_cached_mb=$(meminfo_fmt Cached)
43 | 
44 |     swap_total_mb=$(meminfo_fmt SwapTotal)
45 |     swap_free_mb=$(meminfo_fmt SwapFree)
46 |     swap_used_mb=$(( $swap_total_mb - $swap_free_mb ))
47 |     if [ $swap_total_mb -eq 0 ]; then
48 |         swap_used_ratio=0
49 |     else
50 |         swap_used_ratio=$(echo "$swap_used_mb" "$swap_total_mb" | awk '{printf("%.2f\n", $1/$2);}')
51 |     fi
52 | fi
53 | 
54 | mem_buffcache_mb=$(($mem_cached_mb + $mem_buffers_mb))
55 | mem_buffcache_ratio=$(echo $mem_total_mb $mem_buffcache_mb\
56 |     | awk '{printf("%.2f\n", $2/$1);}')
57 | mem_available_ratio=$(echo $mem_total_mb $mem_available_mb\
58 |     | awk '{printf("%.2f\n", $2/$1);}')
59 | 
60 | echo "MEM_TOTAL_MB:" $mem_total_mb
61 | echo "MEM_AVAILABLE_MB:" $mem_available_mb
62 | echo "MEM_AVAILABLE_RATIO:" $mem_available_ratio
63 | echo "MEM_HARDUSED_MB:" $mem_hardused_mb
64 | echo "MEM_HARDUSED_RATIO:" $mem_hardused_ratio
65 | echo "MEM_BUFFCACHE_MB:" $mem_buffcache_mb
66 | echo "MEM_BUFFCACHE_RATIO:" $mem_buffcache_ratio
67 | echo "SWAP_TOTAL_MB:" $swap_total_mb
68 | echo "SWAP_FREE_MB:" $swap_free_mb
69 | echo "SWAP_USED_MB:" $swap_used_mb
70 | echo "SWAP_USED_RATIO:" $swap_used_ratio
71 | 


--------------------------------------------------------------------------------
/etc/scripts/probes/ping.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$1" ]; then
 4 |     (>&2 echo "ERROR: give IP to test")
 5 |     exit 1
 6 | fi
 7 | dest=$1
 8 | 
 9 | res=$(ping -qAc5 "$dest")
10 | 
11 | loss=$(echo "$res" | grep "packets transmitted" | sed -r 's/.* ([0-9]+)%.*/\1/g')
12 | avg=$(echo "$res" | grep "^rtt" | awk -F/ '{print $5}')
13 | 
14 | echo LOSS_PERC: $loss
15 | echo AVG_MS: $avg
16 | 


--------------------------------------------------------------------------------
/etc/scripts/probes/port.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$1" ]; then
 4 |     (>&2 echo "ERROR: give port number (ex: 443)")
 5 |     exit 1
 6 | fi
 7 | 
 8 | nc -z localhost $1 > /dev/null 2>&1
 9 | res=$?
10 | 
11 | open=0
12 | if [ $res -eq 0 ]; then
13 |     open=1
14 | fi
15 | 
16 | echo "OPEN:" $open
17 | 


--------------------------------------------------------------------------------
/etc/scripts/probes/systemctl_status.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$1" ]; then
 4 |     (>&2 echo "ERROR: give unit name (ex: httpd.service)")
 5 |     exit 1
 6 | fi
 7 | 
 8 | 
 9 | status=$(systemctl is-active "$1")
10 | echo "STATUS:" $status
11 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/Xfennec/nosee
 2 | 
 3 | go 1.18
 4 | 
 5 | require (
 6 | 	github.com/BurntSushi/toml v1.2.0
 7 | 	github.com/Knetic/govaluate v3.0.0+incompatible
 8 | 	github.com/fatih/color v1.13.0
 9 | 	github.com/satori/go.uuid v1.2.0
10 | 	github.com/urfave/cli v1.22.9
11 | 	golang.org/x/crypto v0.0.0-20220817201139-bc19a97f63c8
12 | )
13 | 
14 | require (
15 | 	github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d // indirect
16 | 	github.com/mattn/go-colorable v0.1.9 // indirect
17 | 	github.com/mattn/go-isatty v0.0.14 // indirect
18 | 	github.com/russross/blackfriday/v2 v2.0.1 // indirect
19 | 	github.com/shurcooL/sanitized_anchor_name v1.0.0 // indirect
20 | 	golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c // indirect
21 | )
22 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 2 | github.com/BurntSushi/toml v1.2.0 h1:Rt8g24XnyGTyglgET/PRUNlrUeu9F5L+7FilkXfZgs0=
 3 | github.com/BurntSushi/toml v1.2.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
 4 | github.com/Knetic/govaluate v3.0.0+incompatible h1:7o6+MAPhYTCF0+fdvoz1xDedhRb4f6s9Tn1Tt7/WTEg=
 5 | github.com/Knetic/govaluate v3.0.0+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0=
 6 | github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d h1:U+s90UTSYgptZMwQh2aRr3LuazLJIa+Pg3Kc1ylSYVY=
 7 | github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
 8 | github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w=
 9 | github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=
10 | github.com/mattn/go-colorable v0.1.9 h1:sqDoxXbdeALODt0DAeJCVp38ps9ZogZEAXjus69YV3U=
11 | github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
12 | github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
13 | github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y=
14 | github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
15 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
16 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
17 | github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q=
18 | github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
19 | github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww=
20 | github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
21 | github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
22 | github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
23 | github.com/urfave/cli v1.22.9 h1:cv3/KhXGBGjEXLC4bH0sLuJ9BewaAbpk5oyMOveu4pw=
24 | github.com/urfave/cli v1.22.9/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
25 | golang.org/x/crypto v0.0.0-20220817201139-bc19a97f63c8 h1:GIAS/yBem/gq2MUqgNIzUHW7cJMmx3TGZOrnyYaNQ6c=
26 | golang.org/x/crypto v0.0.0-20220817201139-bc19a97f63c8/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
27 | golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
28 | golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
29 | golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c h1:F1jZWGFhYfh0Ci55sIpILtKKK8p3i2/krTr0H1rg74I=
30 | golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
31 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1 h1:v+OssWQX+hTHEmOBgwxdZxK4zHq3yOs8F9J7mk0PY8E=
32 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
33 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
34 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
35 | 


--------------------------------------------------------------------------------
/heartbeat.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"fmt"
 6 | 	"io/ioutil"
 7 | 	"os"
 8 | 	"os/exec"
 9 | 	"path"
10 | 	"path/filepath"
11 | 	"time"
12 | )
13 | 
14 | func heartbeatsList(config *Config) ([]string, error) {
15 | 	hbDirPath := path.Clean(config.configPath + "/scripts/heartbeats/")
16 | 	stat, err := os.Stat(hbDirPath)
17 | 
18 | 	if err != nil {
19 | 		return nil, fmt.Errorf("invalid 'heartbeats' directory '%s': %s", hbDirPath, err)
20 | 	}
21 | 
22 | 	if !stat.Mode().IsDir() {
23 | 		return nil, fmt.Errorf("is not a directory '%s'", hbDirPath)
24 | 	}
25 | 
26 | 	scripts, err := filepath.Glob(hbDirPath + "/*")
27 | 	if err != nil {
28 | 		return nil, fmt.Errorf("error listing '%s' directory: %s", hbDirPath, err)
29 | 	}
30 | 
31 | 	for _, scriptPath := range scripts {
32 | 		stat, err := os.Stat(scriptPath)
33 | 
34 | 		if err != nil {
35 | 			return nil, fmt.Errorf("invalid 'script' file '%s': %s", scriptPath, err)
36 | 		}
37 | 
38 | 		if !stat.Mode().IsRegular() {
39 | 			return nil, fmt.Errorf("is not a regular 'script' file '%s'", scriptPath)
40 | 		}
41 | 
42 | 		_, err = ioutil.ReadFile(scriptPath)
43 | 		if err != nil {
44 | 			return nil, fmt.Errorf("error reading script file '%s': %s", scriptPath, err)
45 | 		}
46 | 	}
47 | 
48 | 	return scripts, nil
49 | }
50 | 
51 | func heartbeatExecute(script string) {
52 | 	varMap := make(map[string]interface{})
53 | 	varMap["NOSEE_SRV"] = GlobalConfig.Name
54 | 	varMap["VERSION"] = NoseeVersion
55 | 	varMap["DATETIME"] = time.Now().Format(time.RFC3339)
56 | 	varMap["STARTTIME"] = appStartTime.Format(time.RFC3339)
57 | 	varMap["UPTIME"] = (int)(time.Since(appStartTime).Seconds())
58 | 
59 | 	cmd := exec.Command(script)
60 | 
61 | 	env := os.Environ()
62 | 	for key, val := range varMap {
63 | 		env = append(env, fmt.Sprintf("%s=%s", key, InterfaceValueToString(val)))
64 | 	}
65 | 	cmd.Env = env
66 | 
67 | 	if cmdOut, err := cmd.CombinedOutput(); err != nil {
68 | 		Warning.Printf("error running heartbeat '%s': %s: %s", script, err, bytes.TrimSpace(cmdOut))
69 | 	} else {
70 | 		Trace.Printf("heartbeat '%s' OK: %s", script, bytes.TrimSpace(cmdOut))
71 | 	}
72 | }
73 | 
74 | func heartbeatsExecute(scripts []string) {
75 | 	for _, script := range scripts {
76 | 		heartbeatExecute(script)
77 | 	}
78 | }
79 | 
80 | func heartbeatsSchedule(scripts []string, delay time.Duration) {
81 | 	go func() {
82 | 		for {
83 | 			heartbeatsExecute(scripts)
84 | 			Info.Printf("heartbeat, %d scripts", len(scripts))
85 | 			// should check total exec duration and compare to delay, here!
86 | 			time.Sleep(delay)
87 | 		}
88 | 	}()
89 | }
90 | 


--------------------------------------------------------------------------------
/host.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strings"
  6 | 	"time"
  7 | )
  8 | 
  9 | // Host is the final form of hosts.d files
 10 | type Host struct {
 11 | 	Name       string
 12 | 	Filename   string
 13 | 	Disabled   bool
 14 | 	Classes    []string
 15 | 	Connection *Connection
 16 | 	Defaults   map[string]interface{}
 17 | 	Tasks      []*Task
 18 | }
 19 | 
 20 | // HasClass returns true if this Host has this class
 21 | func (host *Host) HasClass(class string) bool {
 22 | 	if class == "*" {
 23 | 		return true
 24 | 	}
 25 | 
 26 | 	for _, hClass := range host.Classes {
 27 | 		if hClass == class {
 28 | 			return true
 29 | 		}
 30 | 	}
 31 | 	return false
 32 | }
 33 | 
 34 | // MatchProbeTargets returns true if this Host matches probe's classes
 35 | func (host *Host) MatchProbeTargets(probe *Probe) bool {
 36 | 	for _, pTargets := range probe.Targets {
 37 | 		tokens := strings.Split(pTargets, "&")
 38 | 		matched := 0
 39 | 		mustMatch := len(tokens)
 40 | 		for _, token := range tokens {
 41 | 			ttoken := strings.TrimSpace(token)
 42 | 			if host.HasClass(ttoken) {
 43 | 				matched++
 44 | 			}
 45 | 		}
 46 | 		if matched == mustMatch {
 47 | 			return true
 48 | 		}
 49 | 	}
 50 | 	return false
 51 | }
 52 | 
 53 | // Schedule will loop forever, creating and executing runs for this host
 54 | func (host *Host) Schedule() {
 55 | 	for {
 56 | 		start := time.Now()
 57 | 
 58 | 		var run Run
 59 | 		run.Host = host
 60 | 		run.StartTime = start
 61 | 
 62 | 		for _, task := range host.Tasks {
 63 | 			if start.After(task.NextRun) || start.Equal(task.NextRun) {
 64 | 				taskable, err := task.Taskable()
 65 | 				if err != nil {
 66 | 					Trace.Printf("Taskable() failed: %s", err)
 67 | 					run.addError(err)
 68 | 					continue
 69 | 				}
 70 | 				if taskable == false {
 71 | 					Info.Printf("host '%s', paused task '%s'\n", host.Name, task.Probe.Name)
 72 | 					continue
 73 | 				}
 74 | 
 75 | 				task.ReSchedule(start.Add(task.Probe.Delay))
 76 | 				Info.Printf("host '%s', running task '%s'\n", host.Name, task.Probe.Name)
 77 | 				run.Tasks = append(run.Tasks, task)
 78 | 			}
 79 | 		}
 80 | 
 81 | 		if len(run.Tasks) > 0 {
 82 | 			run.Go()
 83 | 			run.Alerts()
 84 | 			Trace.Printf("currentFails count = %d\n", len(currentFails))
 85 | 			loggersExec(&run)
 86 | 		}
 87 | 		Info.Printf("host '%s', run ended", host.Name)
 88 | 
 89 | 		end := time.Now()
 90 | 		dur := end.Sub(start)
 91 | 
 92 | 		if dur < time.Minute {
 93 | 			remains := time.Minute - dur
 94 | 			time.Sleep(remains)
 95 | 		} else {
 96 | 			run.addError(fmt.Errorf("run duration was too long (%s)", run.Duration))
 97 | 		}
 98 | 		Trace.Printf("(loop %s)\n", host.Name)
 99 | 	}
100 | }
101 | 
102 | // TestConnection will return nil if connection to the host was successful
103 | func (host *Host) TestConnection() error {
104 | 
105 | 	//const bootstrap = "bash -s --"
106 | 
107 | 	startTime := time.Now()
108 | 
109 | 	channel := make(chan error, 1)
110 | 	go func() {
111 | 		if err := host.Connection.Connect(); err != nil {
112 | 			channel <- err
113 | 		}
114 | 		defer host.Connection.Close()
115 | 		channel <- nil
116 | 	}()
117 | 
118 | 	connTimeout := host.Connection.SSHConnTimeWarn * 2
119 | 
120 | 	select {
121 | 	case err := <-channel:
122 | 		if err != nil {
123 | 			return err
124 | 		}
125 | 	case <-time.After(connTimeout):
126 | 		return fmt.Errorf("SSH connection timeout (after %s)", connTimeout)
127 | 	}
128 | 
129 | 	dialDuration := time.Now().Sub(startTime)
130 | 
131 | 	if dialDuration > host.Connection.SSHConnTimeWarn {
132 | 		return fmt.Errorf("SSH connection time was too long: %s (ssh_connection_time_warn = %s)", dialDuration, host.Connection.SSHConnTimeWarn)
133 | 	}
134 | 
135 | 	/*if err := run.prepareTestPipes(); err != nil {
136 | 		return err
137 | 	}*/
138 | 
139 | 	/*if err := host.TestRun(bootstrap); err != nil {
140 | 		return err
141 | 	}*/
142 | 	Info.Printf("Connection to '%s' OK (%s)", host.Name, dialDuration)
143 | 
144 | 	return nil
145 | }
146 | 


--------------------------------------------------------------------------------
/log.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"io"
  6 | 	"io/ioutil"
  7 | 	"log"
  8 | 	"os"
  9 | 
 10 | 	"github.com/urfave/cli"
 11 | )
 12 | 
 13 | // Loggers for trace, info, warning and error severity
 14 | var (
 15 | 	Trace   *log.Logger
 16 | 	Info    *log.Logger
 17 | 	Warning *log.Logger
 18 | 	Error   *log.Logger
 19 | )
 20 | 
 21 | func writerCreate(std io.Writer, fd *os.File, quiet bool) io.Writer {
 22 | 	if quiet {
 23 | 		if fd != nil {
 24 | 			if std != ioutil.Discard {
 25 | 				return fd
 26 | 			}
 27 | 		}
 28 | 		return ioutil.Discard
 29 | 	}
 30 | 
 31 | 	// no log at all for this stream (no std, no file)
 32 | 	if std == ioutil.Discard {
 33 | 		return ioutil.Discard
 34 | 	}
 35 | 	// both
 36 | 	if fd != nil {
 37 | 		return io.MultiWriter(fd, std)
 38 | 	}
 39 | 	return std
 40 | }
 41 | 
 42 | // LogInit will initialize loggers
 43 | func LogInit(ctx *cli.Context) {
 44 | 	var (
 45 | 		traceHandle   io.Writer
 46 | 		infoHandle    io.Writer
 47 | 		warningHandle io.Writer
 48 | 		errorHandle   io.Writer
 49 | 	)
 50 | 
 51 | 	level := ctx.String("log-level")
 52 | 	file := ctx.String("log-file")
 53 | 	quiet := ctx.Bool("quiet")
 54 | 	timestamp := ctx.Bool("log-timestamp")
 55 | 
 56 | 	var (
 57 | 		err error
 58 | 		fd  *os.File
 59 | 	)
 60 | 	if file != "" {
 61 | 		fd, err = os.OpenFile(file, os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0640)
 62 | 		if err != nil {
 63 | 			fmt.Fprintf(os.Stderr, "Unable to create log file '%s' (%s)\n", file, err)
 64 | 			os.Exit(1)
 65 | 		}
 66 | 	} else {
 67 | 		fd = nil
 68 | 	}
 69 | 
 70 | 	switch level {
 71 | 	case "trace":
 72 | 		traceHandle = writerCreate(os.Stdout, fd, quiet)
 73 | 		infoHandle = writerCreate(os.Stdout, fd, quiet)
 74 | 		warningHandle = writerCreate(os.Stdout, fd, quiet)
 75 | 		errorHandle = writerCreate(os.Stderr, fd, quiet)
 76 | 	case "info":
 77 | 		traceHandle = writerCreate(ioutil.Discard, fd, quiet)
 78 | 		infoHandle = writerCreate(os.Stdout, fd, quiet)
 79 | 		warningHandle = writerCreate(os.Stdout, fd, quiet)
 80 | 		errorHandle = writerCreate(os.Stderr, fd, quiet)
 81 | 	case "warning":
 82 | 		traceHandle = writerCreate(ioutil.Discard, fd, quiet)
 83 | 		infoHandle = writerCreate(ioutil.Discard, fd, quiet)
 84 | 		warningHandle = writerCreate(os.Stdout, fd, quiet)
 85 | 		errorHandle = writerCreate(os.Stderr, fd, quiet)
 86 | 	default:
 87 | 		fmt.Fprintf(os.Stderr, "ERROR: invalid log level '%s'\n", level)
 88 | 		os.Exit(1)
 89 | 	}
 90 | 
 91 | 	var flags = 0
 92 | 	if timestamp {
 93 | 		flags = log.Ldate | log.Ltime
 94 | 	}
 95 | 
 96 | 	Trace = log.New(traceHandle,
 97 | 		"TRACE: ",
 98 | 		flags|log.Lshortfile)
 99 | 
100 | 	Info = log.New(infoHandle,
101 | 		"INFO: ",
102 | 		flags)
103 | 
104 | 	Warning = log.New(warningHandle,
105 | 		"WARNING: ",
106 | 		flags)
107 | 
108 | 	Error = log.New(errorHandle,
109 | 		"ERROR: ",
110 | 		flags)
111 | 
112 | 	Trace.Println("Log init")
113 | }
114 | 


--------------------------------------------------------------------------------
/loggers.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"fmt"
 6 | 	"io/ioutil"
 7 | 	"os"
 8 | 	"os/exec"
 9 | 	"path"
10 | 	"path/filepath"
11 | 	"strings"
12 | )
13 | 
14 | func loggersList(config *Config) ([]string, error) {
15 | 	lgDirPath := path.Clean(config.configPath + "/scripts/loggers/")
16 | 	stat, err := os.Stat(lgDirPath)
17 | 
18 | 	if err != nil {
19 | 		return nil, fmt.Errorf("invalid 'loggers' directory '%s': %s", lgDirPath, err)
20 | 	}
21 | 
22 | 	if !stat.Mode().IsDir() {
23 | 		return nil, fmt.Errorf("is not a directory '%s'", lgDirPath)
24 | 	}
25 | 
26 | 	scripts, err := filepath.Glob(lgDirPath + "/*")
27 | 	if err != nil {
28 | 		return nil, fmt.Errorf("error listing '%s' directory: %s", lgDirPath, err)
29 | 	}
30 | 
31 | 	for _, scriptPath := range scripts {
32 | 		stat, err := os.Stat(scriptPath)
33 | 
34 | 		if err != nil {
35 | 			return nil, fmt.Errorf("invalid 'script' file '%s': %s", scriptPath, err)
36 | 		}
37 | 
38 | 		if !stat.Mode().IsRegular() {
39 | 			return nil, fmt.Errorf("is not a regular 'script' file '%s'", scriptPath)
40 | 		}
41 | 
42 | 		_, err = ioutil.ReadFile(scriptPath)
43 | 		if err != nil {
44 | 			return nil, fmt.Errorf("error reading script file '%s': %s", scriptPath, err)
45 | 		}
46 | 	}
47 | 
48 | 	return scripts, nil
49 | }
50 | 
51 | func loggersExec(run *Run) {
52 | 	varMap := make(map[string]interface{})
53 | 	varMap["NOSEE_SRV"] = GlobalConfig.Name
54 | 	varMap["VERSION"] = NoseeVersion
55 | 	varMap["HOST_NAME"] = run.Host.Name
56 | 	varMap["HOST_FILE"] = run.Host.Filename
57 | 	varMap["CLASSES"] = strings.Join(run.Host.Classes, ",")
58 | 
59 | 	var valuesBuff bytes.Buffer
60 | 	for _, result := range run.TaskResults {
61 | 		for key, val := range result.Values {
62 | 			// df.toml;DISK_FULLEST_PERC;27
63 | 			str := fmt.Sprintf("%s;%s;%s\n", result.Task.Probe.Filename, key, val)
64 | 			valuesBuff.WriteString(str)
65 | 		}
66 | 	}
67 | 
68 | 	go func() {
69 | 		for _, script := range globalLogers {
70 | 			cmd := exec.Command(script)
71 | 
72 | 			// we inject Values thru stdin:
73 | 			cmd.Stdin = strings.NewReader(valuesBuff.String())
74 | 
75 | 			env := os.Environ()
76 | 			for key, val := range varMap {
77 | 				env = append(env, fmt.Sprintf("%s=%s", key, InterfaceValueToString(val)))
78 | 			}
79 | 			cmd.Env = env
80 | 
81 | 			if cmdOut, err := cmd.CombinedOutput(); err != nil {
82 | 				Warning.Printf("error running logger '%s': %s: %s", script, err, bytes.TrimSpace(cmdOut))
83 | 			} else {
84 | 				Trace.Printf("logger '%s' OK: %s", script, bytes.TrimSpace(cmdOut))
85 | 			}
86 | 		}
87 | 	}()
88 | }
89 | 


--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"fmt"
  6 | 	"math/rand"
  7 | 	"os"
  8 | 	"path"
  9 | 	"path/filepath"
 10 | 	"strings"
 11 | 	"sync"
 12 | 	"time"
 13 | 
 14 | 	"github.com/BurntSushi/toml"
 15 | 	"github.com/Knetic/govaluate"
 16 | 	"github.com/fatih/color"
 17 | 	"github.com/urfave/cli"
 18 | )
 19 | 
 20 | // NoseeVersion in X.Y string format
 21 | const NoseeVersion = "0.1"
 22 | 
 23 | var myRand *rand.Rand
 24 | var globalAlerts []*Alert
 25 | var globalLogers []string
 26 | var appStartTime time.Time
 27 | 
 28 | func configurationDirList(inpath string, dirPath string) ([]string, error) {
 29 | 	configPath := path.Clean(dirPath + "/" + inpath)
 30 | 
 31 | 	stat, err := os.Stat(configPath)
 32 | 
 33 | 	if err != nil {
 34 | 		return nil, fmt.Errorf("invalid directory '%s': %s", configPath, err)
 35 | 	}
 36 | 
 37 | 	if !stat.Mode().IsDir() {
 38 | 		return nil, fmt.Errorf("is not a directory '%s'", configPath)
 39 | 	}
 40 | 
 41 | 	list, err := filepath.Glob(configPath + "/*.toml")
 42 | 	if err != nil {
 43 | 		return nil, fmt.Errorf("error listing '%s' directory: %s", configPath, err)
 44 | 	}
 45 | 
 46 | 	return list, nil
 47 | }
 48 | 
 49 | func createProbes(ctx *cli.Context, config *Config) ([]*Probe, error) {
 50 | 	probesdFiles, errd := configurationDirList("probes.d", config.configPath)
 51 | 	if errd != nil {
 52 | 		return nil, fmt.Errorf("Error: %s", errd)
 53 | 	}
 54 | 
 55 | 	var probes []*Probe
 56 | 	pNames := make(map[string]string)
 57 | 
 58 | 	for _, file := range probesdFiles {
 59 | 		var tProbe tomlProbe
 60 | 
 61 | 		if _, err := toml.DecodeFile(file, &tProbe); err != nil {
 62 | 			return nil, fmt.Errorf("Error decoding %s: %s", file, err)
 63 | 		}
 64 | 
 65 | 		_, filename := path.Split(file)
 66 | 		probe, err := tomlProbeToProbe(&tProbe, config, filename)
 67 | 		if err != nil {
 68 | 			return nil, fmt.Errorf("Error using %s: %s", file, err)
 69 | 		}
 70 | 
 71 | 		if probe != nil {
 72 | 			if f, exists := pNames[probe.Name]; exists == true {
 73 | 				return nil, fmt.Errorf("Config error: duplicate name '%s' (%s, %s)", probe.Name, f, file)
 74 | 			}
 75 | 
 76 | 			probes = append(probes, probe)
 77 | 			pNames[probe.Name] = file
 78 | 		}
 79 | 	}
 80 | 	Info.Printf("probe count = %d\n", len(probes))
 81 | 	return probes, nil
 82 | }
 83 | 
 84 | func createAlerts(ctx *cli.Context, config *Config) ([]*Alert, error) {
 85 | 	alertdFiles, err := configurationDirList("alerts.d", config.configPath)
 86 | 	if err != nil {
 87 | 		return nil, fmt.Errorf("Error: %s", err)
 88 | 	}
 89 | 
 90 | 	var alerts []*Alert
 91 | 	aNames := make(map[string]string)
 92 | 	for _, file := range alertdFiles {
 93 | 		var tAlert tomlAlert
 94 | 
 95 | 		if _, err := toml.DecodeFile(file, &tAlert); err != nil {
 96 | 			return nil, fmt.Errorf("Error decoding %s: %s", file, err)
 97 | 		}
 98 | 
 99 | 		alert, err := tomlAlertToAlert(&tAlert, config)
100 | 		if err != nil {
101 | 			return nil, fmt.Errorf("Error using %s: %s", file, err)
102 | 		}
103 | 
104 | 		if alert != nil {
105 | 			if f, exists := aNames[alert.Name]; exists == true {
106 | 				return nil, fmt.Errorf("Config error: duplicate name '%s' (%s, %s)", alert.Name, f, file)
107 | 			}
108 | 
109 | 			alerts = append(alerts, alert)
110 | 			aNames[alert.Name] = file
111 | 		}
112 | 	}
113 | 	//  = alerts
114 | 	Info.Printf("alert count = %d\n", len(alerts))
115 | 
116 | 	// check if we have at least one "general" alert receiver
117 | 	generalReceivers := 0
118 | 	for _, alert := range alerts {
119 | 		for _, target := range alert.Targets {
120 | 			if target == GeneralClass || target == "*" {
121 | 				generalReceivers++
122 | 			}
123 | 		}
124 | 	}
125 | 	if generalReceivers == 0 {
126 | 		return nil, fmt.Errorf("Config error: at least one alert must match the 'general' class")
127 | 	}
128 | 	return alerts, nil
129 | }
130 | 
131 | func createHosts(ctx *cli.Context, config *Config) ([]*Host, error) {
132 | 	hostsdFiles, errc := configurationDirList("hosts.d", config.configPath)
133 | 	if errc != nil {
134 | 		return nil, fmt.Errorf("Error: %s", errc)
135 | 	}
136 | 
137 | 	var hosts []*Host
138 | 	hNames := make(map[string]string)
139 | 
140 | 	for _, file := range hostsdFiles {
141 | 		var tHost tomlHost
142 | 
143 | 		// defaults
144 | 		tHost.Network.SSHConnTimeWarn.Duration = config.SSHConnTimeWarn
145 | 
146 | 		if _, err := toml.DecodeFile(file, &tHost); err != nil {
147 | 			return nil, fmt.Errorf("Error decoding %s: %s", file, err)
148 | 		}
149 | 
150 | 		_, filename := path.Split(file)
151 | 		host, err := tomlHostToHost(&tHost, config, filename)
152 | 		if err != nil {
153 | 			return nil, fmt.Errorf("Error using %s: %s", file, err)
154 | 		}
155 | 
156 | 		if host != nil {
157 | 			if f, exists := hNames[host.Name]; exists == true {
158 | 				return nil, fmt.Errorf("Config error: duplicate name '%s' (%s, %s)", host.Name, f, file)
159 | 			}
160 | 
161 | 			hosts = append(hosts, host)
162 | 			hNames[host.Name] = file
163 | 		}
164 | 	}
165 | 	Info.Printf("host count = %d\n", len(hosts))
166 | 
167 | 	if config.doConnTest == true {
168 | 		Info.Print("Testing connections…")
169 | 		errors := make(chan error, len(hosts))
170 | 		for _, host := range hosts {
171 | 			go func(host *Host) {
172 | 				if err := host.TestConnection(); err != nil {
173 | 					errors <- fmt.Errorf("Error connecting %s: %s", host.Name, err)
174 | 				} else {
175 | 					errors <- nil
176 | 				}
177 | 			}(host)
178 | 		}
179 | 		for i := 0; i < len(hosts); i++ {
180 | 			select {
181 | 			case err := <-errors:
182 | 				if err != nil {
183 | 					return nil, err
184 | 				}
185 | 			}
186 | 		}
187 | 	}
188 | 
189 | 	probes, err := createProbes(ctx, config)
190 | 	if err != nil {
191 | 		return nil, err
192 | 	}
193 | 
194 | 	globalAlerts, err = createAlerts(ctx, config)
195 | 	if err != nil {
196 | 		return nil, err
197 | 	}
198 | 
199 | 	// update hosts with tasks
200 | 	var taskCount int
201 | 	for _, host := range hosts {
202 | 		for _, probe := range probes {
203 | 			if host.MatchProbeTargets(probe) {
204 | 				var task Task
205 | 				task.Probe = probe
206 | 				task.PrevRun = time.Now()
207 | 				task.NextRun = time.Now()
208 | 				host.Tasks = append(host.Tasks, &task)
209 | 				taskCount++
210 | 			}
211 | 		}
212 | 	}
213 | 	Info.Printf("task count = %d\n", taskCount)
214 | 
215 | 	return hosts, nil
216 | }
217 | 
218 | func scheduleHosts(hosts []*Host, config *Config) error {
219 | 	var hostGroup sync.WaitGroup
220 | 	for i, host := range hosts {
221 | 		hostGroup.Add(1)
222 | 		go func(i int, host *Host) {
223 | 			defer hostGroup.Done()
224 | 			if config.StartTimeSpreadSeconds > 0 {
225 | 				// Sleep here, to ease global load
226 | 				fact := float32(i) / float32(len(hosts)) * 1000 * float32(config.StartTimeSpreadSeconds)
227 | 				wait := time.Duration(fact) * time.Millisecond
228 | 				time.Sleep(wait)
229 | 			}
230 | 			host.Schedule()
231 | 		}(i, host)
232 | 	}
233 | 
234 | 	hostGroup.Wait()
235 | 	return fmt.Errorf("QUIT: empty wait group, everyone died :(")
236 | }
237 | 
238 | func mainDefault(ctx *cli.Context) error {
239 | 	LogInit(ctx)
240 | 
241 | 	config, err := GlobalConfigRead(ctx.String("config-path"), "nosee.toml")
242 | 	if err != nil {
243 | 		Error.Printf("Config (nosee.toml): %s", err)
244 | 		return cli.NewExitError("", 1)
245 | 	}
246 | 	GlobalConfig = config
247 | 
248 | 	heartbeats, err := heartbeatsList(config)
249 | 	if err != nil {
250 | 		Error.Println(err)
251 | 		return cli.NewExitError("", 2)
252 | 	}
253 | 
254 | 	globalLogers, err = loggersList(config)
255 | 	if err != nil {
256 | 		Error.Println(err)
257 | 		return cli.NewExitError("", 2)
258 | 	}
259 | 
260 | 	hosts, err := createHosts(ctx, config)
261 | 	if err != nil {
262 | 		Error.Println(err)
263 | 		return cli.NewExitError("", 10)
264 | 	}
265 | 
266 | 	CurrentFailsCreate()
267 | 	CurrentFailsLoad()
268 | 
269 | 	if pidPath := ctx.String("pid-file"); pidPath != "" {
270 | 		pid, err := NewPIDFile(pidPath)
271 | 		if err != nil {
272 | 			return cli.NewExitError(fmt.Errorf("Error with pid file: %s", err), 100)
273 | 		}
274 | 		defer pid.Remove()
275 | 	}
276 | 
277 | 	heartbeatsSchedule(heartbeats, config.HeartbeatDelay)
278 | 
279 | 	if err := scheduleHosts(hosts, config); err != nil {
280 | 		return cli.NewExitError(err, 1)
281 | 	}
282 | 
283 | 	return nil
284 | }
285 | 
286 | func mainCheck(ctx *cli.Context) error {
287 | 	LogInit(ctx.Parent())
288 | 
289 | 	fmt.Printf("Checking configuration and connections…\n")
290 | 
291 | 	config, err := GlobalConfigRead(ctx.Parent().String("config-path"), "nosee.toml")
292 | 	if err != nil {
293 | 		Error.Printf("Config (nosee.toml): %s", err)
294 | 		return cli.NewExitError("", 1)
295 | 	}
296 | 	GlobalConfig = config
297 | 
298 | 	_, err = heartbeatsList(config)
299 | 	if err != nil {
300 | 		Error.Println(err)
301 | 		return cli.NewExitError("", 2)
302 | 	}
303 | 
304 | 	_, err = loggersList(config)
305 | 	if err != nil {
306 | 		Error.Println(err)
307 | 		return cli.NewExitError("", 2)
308 | 	}
309 | 
310 | 	_, err = createHosts(ctx, config)
311 | 	if err != nil {
312 | 		Error.Println(err)
313 | 		return cli.NewExitError("", 10)
314 | 	}
315 | 	fmt.Println("OK")
316 | 	return nil
317 | }
318 | 
319 | func mainRecap(ctx *cli.Context) error {
320 | 	LogInit(ctx.Parent())
321 | 
322 | 	config, err := GlobalConfigRead(ctx.Parent().String("config-path"), "nosee.toml")
323 | 	if err != nil {
324 | 		Error.Printf("Config (nosee.toml): %s", err)
325 | 		return cli.NewExitError("", 1)
326 | 	}
327 | 	GlobalConfig = config
328 | 
329 | 	// TODO: should probably display heartbeats/loggers in the recap, then?
330 | 	_, err = heartbeatsList(config)
331 | 	if err != nil {
332 | 		Error.Println(err)
333 | 		return cli.NewExitError("", 2)
334 | 	}
335 | 
336 | 	hosts, err := createHosts(ctx, config)
337 | 	if err != nil {
338 | 		Error.Println(err)
339 | 		return cli.NewExitError("", 10)
340 | 	}
341 | 
342 | 	if ctx.Bool("no-color") == true {
343 | 		color.NoColor = true
344 | 	}
345 | 
346 | 	red := color.New(color.FgRed).SprintFunc()
347 | 	yellow := color.New(color.FgYellow).SprintFunc()
348 | 	green := color.New(color.FgGreen).SprintFunc()
349 | 	cyan := color.New(color.FgCyan).SprintFunc()
350 | 
351 | 	for _, host := range hosts {
352 | 		fmt.Printf("%s: %s\n", cyan("Host"), host.Name)
353 | 		for _, task := range host.Tasks {
354 | 			fmt.Printf("  %s: %s (%dm)\n", green("Probe"), task.Probe.Name, int(task.Probe.Delay.Minutes()))
355 | 			for _, check := range task.Probe.Checks {
356 | 				fmt.Printf("    %s: %s (%s)\n", yellow("Check"), check.Desc, strings.Join(check.Classes, ", "))
357 | 				var msg AlertMessage
358 | 				msg.Classes = check.Classes
359 | 				alertCount := 0
360 | 				for _, alert := range globalAlerts {
361 | 					if msg.MatchAlertTargets(alert) {
362 | 						alertCount++
363 | 						fmt.Printf("      %s: %s\n", red("Alert"), alert.Name)
364 | 					}
365 | 				}
366 | 				if alertCount == 0 {
367 | 					fmt.Println(red("      No valid alert for this check!"))
368 | 				}
369 | 			}
370 | 		}
371 | 	}
372 | 
373 | 	return nil
374 | }
375 | 
376 | func mainExpr(ctx *cli.Context) error {
377 | 	LogInit(ctx.Parent())
378 | 	if ctx.NArg() == 0 {
379 | 		err := fmt.Errorf("Error, you must provide a govaluate expression parameter, see https://github.com/Knetic/govaluate for syntax and features")
380 | 		return cli.NewExitError(err, 1)
381 | 	}
382 | 	exprString := ctx.Args().Get(0)
383 | 
384 | 	expr, err := govaluate.NewEvaluableExpressionWithFunctions(exprString, CheckFunctions)
385 | 	if err != nil {
386 | 		return cli.NewExitError(err, 2)
387 | 	}
388 | 
389 | 	if vars := expr.Vars(); len(vars) > 0 {
390 | 		errv := fmt.Errorf("Undefined variables: %s", strings.Join(vars, ", "))
391 | 		return cli.NewExitError(errv, 11)
392 | 	}
393 | 
394 | 	result, err := expr.Evaluate(nil)
395 | 	if err != nil {
396 | 		return cli.NewExitError(err, 3)
397 | 	}
398 | 
399 | 	fmt.Println(InterfaceValueToString(result))
400 | 	return nil
401 | }
402 | 
403 | func mainTest(ctx *cli.Context) error {
404 | 	LogInit(ctx.Parent())
405 | 
406 | 	config, err := GlobalConfigRead(ctx.Parent().String("config-path"), "nosee.toml")
407 | 	if err != nil {
408 | 		Error.Printf("Config (nosee.toml): %s", err)
409 | 		return cli.NewExitError("", 1)
410 | 	}
411 | 	config.loadDisabled = true // WARNING!
412 | 	config.doConnTest = false  // WARNING!
413 | 	GlobalConfig = config
414 | 
415 | 	hosts, err := createHosts(ctx, config)
416 | 	if err != nil {
417 | 		Error.Println(err)
418 | 		return cli.NewExitError("", 10)
419 | 	}
420 | 
421 | 	// createHosts already load probes, but we need the full list
422 | 	// and not only probes targeting our host
423 | 	probes, err := createProbes(ctx, config)
424 | 	if err != nil {
425 | 		Error.Println(err)
426 | 		return cli.NewExitError("", 10)
427 | 	}
428 | 
429 | 	requestedHost := ctx.Args().Get(0)
430 | 	requestedProbe := ctx.Args().Get(1)
431 | 
432 | 	if requestedHost == "" {
433 | 		var list bytes.Buffer
434 | 		for _, host := range hosts {
435 | 			list.WriteString(fmt.Sprintf("- %s (%s)\n", host.Filename, host.Name))
436 | 		}
437 | 		Error.Printf("you must give a host Name or hosts.d/ filename:\n%s", list.String())
438 | 		return cli.NewExitError("", 1)
439 | 	}
440 | 
441 | 	if requestedProbe == "" {
442 | 		var list bytes.Buffer
443 | 		for _, probe := range probes {
444 | 			list.WriteString(fmt.Sprintf("- %s (%s)\n", probe.Filename, probe.Name))
445 | 		}
446 | 		Error.Printf("you must give a probe Name or probes.d/ filename:\n%s", list.String())
447 | 		return cli.NewExitError("", 1)
448 | 	}
449 | 
450 | 	// Locate requested host and probe…
451 | 	var foundHost *Host
452 | 	for _, host := range hosts {
453 | 		if host.Name == requestedHost || host.Filename == requestedHost {
454 | 			foundHost = host
455 | 			break
456 | 		}
457 | 	}
458 | 	if foundHost == nil {
459 | 		Error.Printf("can't find '%s' host", requestedHost)
460 | 		return cli.NewExitError("", 1)
461 | 	}
462 | 
463 | 	var foundProbe *Probe
464 | 	for _, probe := range probes {
465 | 		if probe.Name == requestedProbe || probe.Filename == requestedProbe {
466 | 			foundProbe = probe
467 | 			break
468 | 		}
469 | 	}
470 | 	if foundProbe == nil {
471 | 		Error.Printf("can't find '%s' probe", requestedProbe)
472 | 		return cli.NewExitError("", 1)
473 | 	}
474 | 
475 | 	if ctx.Bool("no-color") == true {
476 | 		color.NoColor = true
477 | 	}
478 | 
479 | 	red := color.New(color.FgRed).SprintFunc()
480 | 	yellow := color.New(color.FgYellow).SprintFunc()
481 | 	green := color.New(color.FgGreen).SprintFunc()
482 | 	cyan := color.New(color.FgCyan).SprintFunc()
483 | 	magenta := color.New(color.FgMagenta).SprintFunc()
484 | 	magentaS := color.New(color.FgMagenta).Add(color.CrossedOut).SprintFunc()
485 | 
486 | 	_, scriptName := path.Split(foundProbe.Script)
487 | 	fmt.Printf("Testing: host '%s' with probe '%s' (%s, %s) using script '%s'\n", cyan(foundHost.Name), green(foundProbe.Name), foundHost.Filename, foundProbe.Filename, magenta(scriptName))
488 | 	if foundHost.Disabled == true {
489 | 		fmt.Printf("Note: the host '%s' is currently %s\n", red(foundHost.Name), red("disabled"))
490 | 	}
491 | 	if foundProbe.Disabled == true {
492 | 		fmt.Printf("Note: the probe '%s' is currently %s\n", red(foundProbe.Name), red("disabled"))
493 | 	}
494 | 	if foundHost.MatchProbeTargets(foundProbe) == false {
495 | 		fmt.Printf("Note: the probe '%s' does %s match host '%s' (see classes and targets)\n", red(foundProbe.Name), red("not"), red(foundHost.Name))
496 | 	}
497 | 
498 | 	// print defaults
499 | 	for key, val := range foundProbe.Defaults {
500 | 		if _, ok := foundHost.Defaults[key]; ok == true {
501 | 			fmt.Printf("default: %s = %s -> %s (host override)\n",
502 | 				magenta(key),
503 | 				magentaS(InterfaceValueToString(val)),
504 | 				magenta(foundHost.Defaults[key]))
505 | 		} else {
506 | 			fmt.Printf("default: %s = %s\n", magenta(key), magenta(InterfaceValueToString(val)))
507 | 		}
508 | 	}
509 | 
510 | 	var run Run
511 | 	run.StartTime = time.Now()
512 | 	run.Host = foundHost
513 | 
514 | 	var task Task
515 | 	task.Probe = foundProbe
516 | 	task.PrevRun = time.Now()
517 | 	task.NextRun = time.Now()
518 | 
519 | 	run.Tasks = append(run.Tasks, &task)
520 | 	run.Go()
521 | 
522 | 	if len(run.Errors) > 0 {
523 | 		for _, err := range run.Errors {
524 | 			fmt.Printf("run error: %s\n", red(err))
525 | 		}
526 | 		return nil
527 | 	}
528 | 
529 | 	result := run.TaskResults[0]
530 | 
531 | 	for key, val := range result.Values {
532 | 		fmt.Printf("value: %s = %s\n", yellow(key), yellow(val))
533 | 	}
534 | 
535 | 	for _, err := range result.Logs {
536 | 		fmt.Printf("log: %s\n", cyan(err))
537 | 	}
538 | 
539 | 	if result.ExitStatus == 0 {
540 | 		fmt.Printf("script exit status: %s (success)\n", green(result.ExitStatus))
541 | 	} else {
542 | 		fmt.Printf("script exit status: %s (error)\n", red(result.ExitStatus))
543 | 	}
544 | 	fmt.Printf("script duration: %s (+ ssh dial duration: %s)\n", result.Duration, run.DialDuration)
545 | 
546 | 	if run.totalErrorCount() > 0 {
547 | 		for _, err := range result.Errors {
548 | 			fmt.Printf("error: %s\n", red(err))
549 | 		}
550 | 		return nil
551 | 	}
552 | 
553 | 	result.DoChecks()
554 | 
555 | 	// DoChecks may add its own errors
556 | 	for _, err := range result.Errors {
557 | 		fmt.Printf("error: %s\n", red(err))
558 | 	}
559 | 
560 | 	for _, check := range result.SuccessfulChecks {
561 | 		fmt.Printf("check %s: %s: false (no alert)\n", green("GOOD"), green(check.Desc))
562 | 	}
563 | 	for _, check := range result.FailedChecks {
564 | 		fmt.Printf("check %s: %s: true (alert)\n", red("BAD"), red(check.Desc))
565 | 	}
566 | 
567 | 	return nil
568 | }
569 | 
570 | func main() {
571 | 	// generic (aka "not cli command specific") inits
572 | 	source := rand.NewSource(time.Now().UnixNano())
573 | 	myRand = rand.New(source)
574 | 	CheckFunctionsInit()
575 | 	appStartTime = time.Now()
576 | 
577 | 	app := cli.NewApp()
578 | 	app.Usage = "Nosee: a nosey, agentless, easy monitoring tool over SSH"
579 | 	app.Version = NoseeVersion
580 | 
581 | 	app.Flags = []cli.Flag{
582 | 		cli.StringFlag{
583 | 			Name:   "config-path, c",
584 | 			Value:  "/etc/nosee/",
585 | 			Usage:  "configuration directory `PATH`",
586 | 			EnvVar: "NOSEE_CONFIG",
587 | 		},
588 | 		cli.StringFlag{
589 | 			Name:  "log-level, l",
590 | 			Value: "warning",
591 | 			Usage: "log `level` verbosity (trace, info, warning)",
592 | 		},
593 | 		cli.StringFlag{
594 | 			Name:  "log-file, f",
595 | 			Usage: "log file to `FILE` (append)",
596 | 		},
597 | 		cli.BoolFlag{
598 | 			Name:  "log-timestamp, t",
599 | 			Usage: "add timestamp to log output",
600 | 		},
601 | 		cli.BoolFlag{
602 | 			Name:  "quiet, q",
603 | 			Usage: "no stdout/err output (except launch errors)",
604 | 		},
605 | 		cli.StringFlag{
606 | 			Name:  "pid-file, p",
607 | 			Usage: "create pid `FILE`",
608 | 		},
609 | 	}
610 | 
611 | 	app.Action = mainDefault
612 | 
613 | 	app.Commands = []cli.Command{
614 | 		{
615 | 			Name:      "check",
616 | 			Aliases:   []string{"c"},
617 | 			Usage:     "Check configuration files and connections",
618 | 			ArgsUsage: " ",
619 | 			Action:    mainCheck,
620 | 		},
621 | 		{
622 | 			Name:      "recap",
623 | 			Aliases:   []string{"r"},
624 | 			Usage:     "Recap configuration",
625 | 			ArgsUsage: " ",
626 | 			Action:    mainRecap,
627 | 			Flags: []cli.Flag{
628 | 				cli.BoolFlag{
629 | 					Name:  "no-color",
630 | 					Usage: "disable color output ",
631 | 				},
632 | 			},
633 | 		},
634 | 		{
635 | 			Name:      "expr",
636 | 			Aliases:   []string{"e"},
637 | 			Usage:     "Test 'govaluate' expression (See Checks 'If')",
638 | 			ArgsUsage: "expression",
639 | 			Action:    mainExpr,
640 | 		},
641 | 		{
642 | 			Name:        "test",
643 | 			Aliases:     []string{"t"},
644 | 			Usage:       "Test any Probe on a any Host",
645 | 			ArgsUsage:   "host probe",
646 | 			Description: "use Name or filename.toml (without path) for host and probe (disabled or not, targeted or not)",
647 | 			Action:      mainTest,
648 | 			Flags: []cli.Flag{
649 | 				cli.BoolFlag{
650 | 					Name:  "no-color",
651 | 					Usage: "disable color output ",
652 | 				},
653 | 			},
654 | 		},
655 | 	}
656 | 
657 | 	app.Run(os.Args)
658 | }
659 | 


--------------------------------------------------------------------------------
/pid.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"io/ioutil"
 6 | 	"os"
 7 | 	"path/filepath"
 8 | 	"strconv"
 9 | 	"strings"
10 | 	"syscall"
11 | )
12 | 
13 | // PIDFile stores (few) informations about a PID file
14 | type PIDFile struct {
15 | 	Path string
16 | }
17 | 
18 | func checkPIDFileExists(path string) error {
19 | 	if pidByte, err := ioutil.ReadFile(path); err == nil {
20 | 		pidString := strings.TrimSpace(string(pidByte))
21 | 		if pid, err := strconv.Atoi(pidString); err == nil {
22 | 			if pidIsRunning(pid) {
23 | 				return fmt.Errorf("pid file '%s' already exists", path)
24 | 			}
25 | 		}
26 | 	}
27 | 	return nil
28 | }
29 | 
30 | // NewPIDFile create a PIDFile if there no other instance already running
31 | func NewPIDFile(path string) (*PIDFile, error) {
32 | 	if err := checkPIDFileExists(path); err != nil {
33 | 		return nil, err
34 | 	}
35 | 	if err := os.MkdirAll(filepath.Dir(path), os.FileMode(0755)); err != nil {
36 | 		return nil, err
37 | 	}
38 | 	if err := ioutil.WriteFile(path, []byte(fmt.Sprintf("%d", os.Getpid())), 0644); err != nil {
39 | 		return nil, err
40 | 	}
41 | 
42 | 	return &PIDFile{Path: path}, nil
43 | }
44 | 
45 | // Remove deletes the PIDFile
46 | func (file PIDFile) Remove() error {
47 | 	return os.Remove(file.Path)
48 | }
49 | 
50 | func pidIsRunning(pid int) bool {
51 | 	process, err := os.FindProcess(pid)
52 | 	if err != nil {
53 | 		return false
54 | 	}
55 | 
56 | 	err = process.Signal(syscall.Signal(0))
57 | 
58 | 	if err != nil && err.Error() == "no such process" {
59 | 		return false
60 | 	}
61 | 
62 | 	if err != nil && err.Error() == "os: process already finished" {
63 | 		return false
64 | 	}
65 | 
66 | 	return true
67 | }
68 | 


--------------------------------------------------------------------------------
/probe.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"time"
 5 | 
 6 | 	"github.com/Knetic/govaluate"
 7 | )
 8 | 
 9 | // Check holds final informations about a check of a probes.d file
10 | type Check struct {
11 | 	Index           int
12 | 	Desc            string
13 | 	If              *govaluate.EvaluableExpression
14 | 	Classes         []string
15 | 	NeededFailures  int
16 | 	NeededSuccesses int
17 | }
18 | 
19 | // Probe is the final form of probes.d files
20 | type Probe struct {
21 | 	Name      string
22 | 	Filename  string
23 | 	Disabled  bool
24 | 	Script    string
25 | 	Targets   []string
26 | 	Delay     time.Duration
27 | 	Timeout   time.Duration
28 | 	Arguments string
29 | 	Defaults  map[string]interface{}
30 | 	Checks    []*Check
31 | 	RunIf     *govaluate.EvaluableExpression
32 | }
33 | 
34 | // MissingDefaults return a slice with names of defaults used in Check 'If'
35 | // expressions and Probe script arguments. The slice length is 0 if no
36 | // missing default were found.
37 | func (probe *Probe) MissingDefaults() []string {
38 | 	missing := make(map[string]bool)
39 | 
40 | 	for _, check := range probe.Checks {
41 | 		for _, name := range check.If.Vars() {
42 | 			if IsAllUpper(name) {
43 | 				continue
44 | 			}
45 | 			if _, ok := probe.Defaults[name]; ok != true {
46 | 				missing[name] = true
47 | 			}
48 | 		}
49 | 	}
50 | 
51 | 	vars := StringFindVariables(probe.Arguments)
52 | 	for _, name := range vars {
53 | 		if _, ok := probe.Defaults[name]; ok != true {
54 | 			missing[name] = true
55 | 		}
56 | 	}
57 | 
58 | 	// map to slice:
59 | 	var missSlice []string
60 | 	for key := range missing {
61 | 		missSlice = append(missSlice, key)
62 | 	}
63 | 
64 | 	return missSlice
65 | }
66 | 


--------------------------------------------------------------------------------
/run.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"time"
  6 | )
  7 | 
  8 | // Run is a list of Tasks on Host, including task results
  9 | type Run struct {
 10 | 	Host         *Host
 11 | 	Tasks        []*Task
 12 | 	StartTime    time.Time
 13 | 	Duration     time.Duration
 14 | 	DialDuration time.Duration
 15 | 	TaskResults  []*TaskResult
 16 | 	Errors       []error
 17 | }
 18 | 
 19 | // Dump prints Run informations on the screen for debugging purposes
 20 | func (run *Run) Dump() {
 21 | 	fmt.Printf("-\n")
 22 | 	fmt.Printf("- host: %s\n", run.Host.Name)
 23 | 	fmt.Printf("- %d task(s)\n", len(run.Tasks))
 24 | 	fmt.Printf("- start: %s\n", run.StartTime)
 25 | 	fmt.Printf("- duration: %s\n", run.Duration)
 26 | 	fmt.Printf("- ssh dial duration: %s\n", run.DialDuration)
 27 | 	for _, err := range run.Errors {
 28 | 		fmt.Printf("-e %s\n", err)
 29 | 	}
 30 | 	for _, res := range run.TaskResults {
 31 | 		fmt.Printf("-- task probe: %s\n", res.Task.Probe.Name)
 32 | 		fmt.Printf("-- start time: %s\n", res.StartTime)
 33 | 		fmt.Printf("-- duration: %s\n", res.Duration)
 34 | 		fmt.Printf("-- exit status: %d\n", res.ExitStatus)
 35 | 		fmt.Printf("-- next task run: %s\n", res.Task.NextRun)
 36 | 		for key, val := range res.Values {
 37 | 			fmt.Printf("-v- '%s' = '%s'\n", key, val)
 38 | 		}
 39 | 		for _, err := range res.Errors {
 40 | 			fmt.Printf("-e- %s\n", err)
 41 | 		}
 42 | 		for _, check := range res.FailedChecks {
 43 | 			fmt.Printf("-F- %s\n", check.Desc)
 44 | 		}
 45 | 		for _, log := range res.Logs {
 46 | 			fmt.Printf("-l- %s\n", log)
 47 | 		}
 48 | 	}
 49 | }
 50 | 
 51 | func (run *Run) addError(err error) {
 52 | 	Info.Printf("Run error: %s (host '%s')", err, run.Host.Name)
 53 | 	run.Errors = append(run.Errors, err)
 54 | }
 55 | 
 56 | func (run *Run) currentTaskResult() *TaskResult {
 57 | 	if len(run.TaskResults) == 0 {
 58 | 		return nil
 59 | 	}
 60 | 	return run.TaskResults[len(run.TaskResults)-1]
 61 | }
 62 | 
 63 | func (run *Run) totalErrorCount() int {
 64 | 	total := len(run.Errors)
 65 | 	for _, taskResult := range run.TaskResults {
 66 | 		total += len(taskResult.Errors)
 67 | 		total += len(taskResult.FailedChecks)
 68 | 	}
 69 | 	return total
 70 | }
 71 | 
 72 | func (run *Run) totalTaskResultErrorCount() int {
 73 | 	total := 0
 74 | 	for _, taskResult := range run.TaskResults {
 75 | 		total += len(taskResult.Errors)
 76 | 	}
 77 | 	return total
 78 | }
 79 | 
 80 | // ReSchedule will force all Run tasks to run on next time step
 81 | func (run *Run) ReSchedule() {
 82 | 	for _, task := range run.Tasks {
 83 | 		task.NextRun = task.PrevRun
 84 | 	}
 85 | 	Info.Printf("re-scheduling all tasks for '%s'\n", run.Host.Name)
 86 | }
 87 | 
 88 | // ReScheduleFailedTasks will force all Run failed tasks to run on next time step
 89 | func (run *Run) ReScheduleFailedTasks() {
 90 | 	for _, task := range run.Tasks {
 91 | 		for _, cf := range currentFails {
 92 | 			if cf.RelatedTask == task || cf.RelatedTTask == task {
 93 | 				task.ReSchedule(time.Now())
 94 | 				Info.Printf("re-scheduling task '%s'\n", task.Probe.Name)
 95 | 			}
 96 | 		}
 97 | 	}
 98 | }
 99 | 
100 | // DoChecks will evaluate checks on every TaskResult of the Run
101 | func (run *Run) DoChecks() {
102 | 	for _, taskResult := range run.TaskResults {
103 | 		taskResult.DoChecks()
104 | 	}
105 | }
106 | 
107 | // Go will execute the Run
108 | func (run *Run) Go() {
109 | 	const bootstrap = "bash -s --"
110 | 
111 | 	timeout := time.Second * 59
112 | 	timeoutChan := time.After(timeout)
113 | 
114 | 	run.StartTime = time.Now()
115 | 	defer func() {
116 | 		run.Duration = time.Now().Sub(run.StartTime)
117 | 	}()
118 | 
119 | 	if err := run.Host.Connection.Connect(); err != nil {
120 | 		run.addError(err)
121 | 		return
122 | 	}
123 | 	defer run.Host.Connection.Close()
124 | 
125 | 	run.DialDuration = time.Now().Sub(run.StartTime)
126 | 	if run.DialDuration > run.Host.Connection.SSHConnTimeWarn {
127 | 		run.addError(fmt.Errorf("SSH connection time was too long: %s (ssh_connection_time_warn = %s)", run.DialDuration, run.Host.Connection.SSHConnTimeWarn))
128 | 		return
129 | 	}
130 | 
131 | 	if err := run.preparePipes(); err != nil {
132 | 		run.addError(err)
133 | 		return
134 | 	}
135 | 
136 | 	ended := make(chan int, 1)
137 | 
138 | 	go func() {
139 | 		if err := run.Host.Connection.Session.Run(bootstrap); err != nil {
140 | 			run.addError(err)
141 | 		}
142 | 		ended <- 1
143 | 	}()
144 | 
145 | 	select {
146 | 	case <-ended:
147 | 		// nice
148 | 	case <-timeoutChan:
149 | 		run.addError(fmt.Errorf("timeout for this run, after %s", timeout))
150 | 		Trace.Println("run timeout")
151 | 	}
152 | }
153 | 


--------------------------------------------------------------------------------
/run_alerts.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"strconv"
  6 | )
  7 | 
  8 | // AlertsForRun creates a currentFail entry for this Run (if not already done)
  9 | // and rings corresponding alerts
 10 | func (run *Run) AlertsForRun() {
 11 | 	var bbuf bytes.Buffer
 12 | 	bbuf.WriteString(run.Host.Name)
 13 | 	// We now limit to one Fail per host, otherwise we may flood
 14 | 	// the user with Errors (ex: "alert, ssh connection 11s", then the same
 15 | 	// with 11.5s, etc). If there's an issue with a host, you have to fix it
 16 | 	// to get the others (if any left), it makes sense.
 17 | 	/*for _, err := range run.Errors {
 18 | 		bbuf.WriteString(err.Error())
 19 | 	}*/
 20 | 	hash := MD5Hash(bbuf.String())
 21 | 
 22 | 	currentFail := CurrentFailGetAndInc(hash)
 23 | 	currentFail.RelatedHost = run.Host
 24 | 
 25 | 	if currentFail.FailCount > 1 {
 26 | 		return
 27 | 	}
 28 | 
 29 | 	message := AlertMessageCreateForRun(AlertBad, run, currentFail)
 30 | 	message.RingAlerts()
 31 | }
 32 | 
 33 | // AlertsForTasks creates currentFail entries for each failed TaskResults
 34 | // (if not already done) and rings corresponding alerts
 35 | func (run *Run) AlertsForTasks() {
 36 | 	for _, taskRes := range run.TaskResults {
 37 | 		if len(taskRes.Errors) > 0 {
 38 | 			var bbuf bytes.Buffer
 39 | 			bbuf.WriteString(run.Host.Name + taskRes.Task.Probe.Name)
 40 | 			for _, err := range taskRes.Errors {
 41 | 				bbuf.WriteString(err.Error())
 42 | 			}
 43 | 			hash := MD5Hash(bbuf.String())
 44 | 
 45 | 			currentFail := CurrentFailGetAndInc(hash)
 46 | 			currentFail.RelatedTTask = taskRes.Task
 47 | 			if currentFail.FailCount > 1 {
 48 | 				return
 49 | 			}
 50 | 
 51 | 			message := AlertMessageCreateForTaskResult(AlertBad, run, taskRes, currentFail)
 52 | 			message.RingAlerts()
 53 | 		}
 54 | 	}
 55 | }
 56 | 
 57 | // AlertsForChecks creates currentFail entries for every FailedChecks of
 58 | // every TaskResults (if not already done) and rings corresponding alerts
 59 | func (run *Run) AlertsForChecks() {
 60 | 	// Failures
 61 | 	for _, taskRes := range run.TaskResults {
 62 | 		for _, check := range taskRes.FailedChecks {
 63 | 			Info.Printf("task '%s', check '%s' failed (%s)\n", taskRes.Task.Probe.Name, check.Desc, run.Host.Name)
 64 | 
 65 | 			hash := MD5Hash(run.Host.Name + taskRes.Task.Probe.Name + strconv.Itoa(check.Index))
 66 | 			currentFail := CurrentFailGetAndInc(hash)
 67 | 			currentFail.RelatedTask = taskRes.Task
 68 | 			if currentFail.FailCount != check.NeededFailures {
 69 | 				continue // not yet / already done
 70 | 			}
 71 | 
 72 | 			message := AlertMessageCreateForCheck(AlertBad, run, taskRes, check, currentFail)
 73 | 			message.RingAlerts()
 74 | 		}
 75 | 	}
 76 | 
 77 | 	// Successes
 78 | 	for _, taskRes := range run.TaskResults {
 79 | 		for _, check := range taskRes.SuccessfulChecks {
 80 | 			hash := MD5Hash(run.Host.Name + taskRes.Task.Probe.Name + strconv.Itoa(check.Index))
 81 | 			// we had a failure for that?
 82 | 			if currentFail := CurrentFailGetAndDec(hash); currentFail != nil {
 83 | 				if currentFail.OkCount == check.NeededSuccesses {
 84 | 					Info.Printf("task '%s', check '%s' is now OK (%s)\n", taskRes.Task.Probe.Name, check.Desc, run.Host.Name)
 85 | 					// send the good news (if the bad one was sent) and delete this currentFail
 86 | 					if currentFail.FailCount >= check.NeededFailures {
 87 | 						message := AlertMessageCreateForCheck(AlertGood, run, taskRes, check, currentFail)
 88 | 						message.RingAlerts()
 89 | 					}
 90 | 					CurrentFailDelete(hash)
 91 | 				}
 92 | 			}
 93 | 		}
 94 | 	}
 95 | }
 96 | 
 97 | // Alerts checks for Run failures, Task failures and Check
 98 | // failures and call corresponding AlertsFor*() functions
 99 | func (run *Run) Alerts() {
100 | 	run.ClearAnyCurrentTasksFails()
101 | 
102 | 	if run.totalErrorCount() == 0 {
103 | 		run.ClearAnyCurrentRunFails()
104 | 		run.DoChecks()
105 | 		if run.totalTaskResultErrorCount() > 0 {
106 | 			Info.Printf("found some 'tasks' error(s) (post-checks)\n")
107 | 			run.AlertsForTasks()
108 | 		} else {
109 | 			// ideal path, let's see if there's any check errors ?
110 | 			run.AlertsForChecks()
111 | 		}
112 | 	} else { // run & tasks errors
113 | 		if len(run.Errors) > 0 {
114 | 			Info.Printf("found some 'run' error(s)\n")
115 | 			run.AlertsForRun()
116 | 			run.ReSchedule()
117 | 		} else {
118 | 			Info.Printf("found some 'tasks' error(s)\n")
119 | 			run.AlertsForTasks()
120 | 		}
121 | 	}
122 | 
123 | 	run.ReScheduleFailedTasks()
124 | }
125 | 
126 | // ClearAnyCurrentRunFails deletes any currentFail for the Run (same Host)
127 | // and then rings GOOD alerts
128 | func (run *Run) ClearAnyCurrentRunFails() {
129 | 	for hash, cf := range currentFails {
130 | 		if cf.RelatedHost == run.Host {
131 | 			// there was a time when we were only ringing one message
132 | 			// for the whole host, but it's compliant with UniqueID idea
133 | 			message := AlertMessageCreateForRun(AlertGood, run, cf)
134 | 			message.RingAlerts()
135 | 			CurrentFailDelete(hash)
136 | 		}
137 | 	}
138 | }
139 | 
140 | // ClearAnyCurrentTasksFails deletes any currentFail for Run Tasks
141 | // and then rings GOOD alerts
142 | func (run *Run) ClearAnyCurrentTasksFails() {
143 | 	for _, taskRes := range run.TaskResults {
144 | 		if len(taskRes.Errors) == 0 {
145 | 			for hash, cf := range currentFails {
146 | 				if taskRes.Task == cf.RelatedTTask {
147 | 					message := AlertMessageCreateForTaskResult(AlertGood, run, taskRes, cf)
148 | 					message.RingAlerts()
149 | 					CurrentFailDelete(hash)
150 | 				}
151 | 			}
152 | 		}
153 | 	}
154 | }
155 | 


--------------------------------------------------------------------------------
/run_streams.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"fmt"
  6 | 	"io"
  7 | 	"os"
  8 | 	"path/filepath"
  9 | 	"strconv"
 10 | 	"strings"
 11 | 	"time"
 12 | )
 13 | 
 14 | func (run *Run) readStdout(std io.Reader, exitStatus chan int) {
 15 | 	scanner := bufio.NewScanner(std)
 16 | 
 17 | 	for scanner.Scan() {
 18 | 		text := scanner.Text()
 19 | 		result := run.currentTaskResult()
 20 | 
 21 | 		Trace.Printf("stdout=%s (%s)\n", text, run.Host.Name)
 22 | 
 23 | 		if len(text) > 2 && text[0:2] == "__" {
 24 | 			parts := strings.Split(text, "=")
 25 | 			switch parts[0] {
 26 | 			case "__EXIT":
 27 | 				if len(parts) != 2 {
 28 | 					run.addError(fmt.Errorf("Invalid __EXIT: %s", text))
 29 | 					continue
 30 | 				}
 31 | 				status, err := strconv.Atoi(parts[1])
 32 | 				if err != nil {
 33 | 					run.addError(fmt.Errorf("Invalid __EXIT value: %s", text))
 34 | 					continue
 35 | 				}
 36 | 				Trace.Printf("EXIT detected: %s (status %d, %s)\n", text, status, run.Host.Name)
 37 | 				exitStatus <- status
 38 | 			default:
 39 | 				run.addError(fmt.Errorf("Unknown keyword: %s", text))
 40 | 			}
 41 | 			continue
 42 | 		}
 43 | 
 44 | 		if len(text) > 1 && text[0:1] == "#" {
 45 | 			result.addLog(text)
 46 | 			continue
 47 | 		}
 48 | 
 49 | 		sep := strings.Index(text, ":")
 50 | 
 51 | 		if sep == -1 || sep == 0 {
 52 | 			result.addError(fmt.Errorf("invalid script output: '%s'", text))
 53 | 			continue
 54 | 		}
 55 | 
 56 | 		paramName := strings.TrimSpace(text[0:sep])
 57 | 		if !IsValidTokenName(paramName) {
 58 | 			result.addError(fmt.Errorf("invalid parameter name: '%s' (not a valid token name): '%s'", paramName, text))
 59 | 			continue
 60 | 		}
 61 | 		if !IsAllUpper(paramName) {
 62 | 			result.addError(fmt.Errorf("invalid parameter name: '%s' (upper case needed): '%s'", paramName, text))
 63 | 			continue
 64 | 		}
 65 | 
 66 | 		if _, exists := result.Values[paramName]; exists == true {
 67 | 			result.addError(fmt.Errorf("parameter '%s' defined multiple times", paramName))
 68 | 			continue
 69 | 		}
 70 | 
 71 | 		value := strings.TrimSpace(text[sep+1:])
 72 | 		if len(value) == 0 {
 73 | 			result.addError(fmt.Errorf("empty value for parameter '%s'", paramName))
 74 | 			continue
 75 | 		}
 76 | 
 77 | 		result.Values[paramName] = value
 78 | 	}
 79 | 
 80 | 	if err := scanner.Err(); err != nil {
 81 | 		run.addError(fmt.Errorf("Error reading stdout: %s", err))
 82 | 	}
 83 | }
 84 | 
 85 | func (run *Run) readStderr(std io.Reader) {
 86 | 	scanner := bufio.NewScanner(std)
 87 | 
 88 | 	for scanner.Scan() {
 89 | 		text := scanner.Text()
 90 | 		file := filepath.Base(run.currentTaskResult().Task.Probe.Script)
 91 | 		Trace.Printf("stderr=%s\n", text)
 92 | 		run.currentTaskResult().addError(fmt.Errorf("%s, stderr: %s", file, text))
 93 | 	}
 94 | 
 95 | 	if err := scanner.Err(); err != nil {
 96 | 		run.addError(fmt.Errorf("Error reading stderr: %s", err))
 97 | 		return // !!!
 98 | 	}
 99 | }
100 | 
101 | // scripts -> ssh
102 | func (run *Run) stdinInject(out io.WriteCloser, exitStatus chan int) {
103 | 
104 | 	defer out.Close()
105 | 
106 | 	// "pkill" dependency or Linux "ps"? (ie: not Cygwin)
107 | 	_, err := out.Write([]byte("export __MAIN_PID=$$\nfunction __kill_subshells() { pkill -TERM -P $__MAIN_PID cat; }\nexport -f __kill_subshells\n"))
108 | 	if err != nil {
109 | 		run.addError(fmt.Errorf("Error writing (setup parent bash): %s", err))
110 | 		return
111 | 	}
112 | 
113 | 	for num, task := range run.Tasks {
114 | 
115 | 		var result TaskResult
116 | 		run.TaskResults = append(run.TaskResults, &result)
117 | 		result.StartTime = time.Now()
118 | 		result.Task = task
119 | 		result.Host = run.Host
120 | 		result.ExitStatus = -1
121 | 		result.Values = make(map[string]string)
122 | 
123 | 		var scanner *bufio.Scanner
124 | 
125 | 		file, erro := os.Open(task.Probe.Script)
126 | 		if erro != nil {
127 | 			result.addError(fmt.Errorf("Failed to open script: %s", erro))
128 | 			continue
129 | 		}
130 | 		defer file.Close()
131 | 
132 | 		scanner = bufio.NewScanner(file)
133 | 
134 | 		args := task.Probe.Arguments
135 | 		params := make(map[string]interface{})
136 | 		for key, val := range task.Probe.Defaults {
137 | 			params[key] = val
138 | 		}
139 | 		// … and let's override defaults with host's ones
140 | 		for key, val := range run.Host.Defaults {
141 | 			params[key] = val
142 | 		}
143 | 		args = StringExpandVariables(args, params)
144 | 
145 | 		// cat is needed to "focus" stdin only on the child bash
146 | 		str := fmt.Sprintf("cat | __SCRIPT_ID=%d bash -s -- %s ; echo __EXIT=$?\n", num, args)
147 | 		Trace.Printf("child(%s)=%s", run.Host.Name, str)
148 | 
149 | 		_, err = out.Write([]byte(str))
150 | 		if err != nil {
151 | 			run.addError(fmt.Errorf("Error writing (starting child bash): %s", err))
152 | 			return
153 | 		}
154 | 
155 | 		// no newline so we dont change line numbers
156 | 		_, err = out.Write([]byte("trap __kill_subshells EXIT ; "))
157 | 		if err != nil {
158 | 			run.addError(fmt.Errorf("Error writing (init child bash): %s", err))
159 | 			return
160 | 		}
161 | 
162 | 		for scanner.Scan() {
163 | 			text := scanner.Text()
164 | 			Trace.Printf("stdin=%s (%s)\n", text, run.Host.Name)
165 | 			_, errw := out.Write([]byte(text + "\n"))
166 | 			if errw != nil {
167 | 				run.addError(fmt.Errorf("Error writing: %s", errw))
168 | 				return
169 | 			}
170 | 		}
171 | 
172 | 		Trace.Printf("killing subshell (%s)\n", run.Host.Name)
173 | 		_, err = out.Write([]byte("__kill_subshells\n"))
174 | 		if err != nil {
175 | 			run.addError(fmt.Errorf("Error writing (while killing subshell): %s", err))
176 | 			return
177 | 		}
178 | 
179 | 		if err := scanner.Err(); err != nil {
180 | 			run.addError(fmt.Errorf("Error scanner: %s", err))
181 | 			return
182 | 		}
183 | 
184 | 		status := <-exitStatus
185 | 		result.ExitStatus = status
186 | 		if status != 0 {
187 | 			result.addError(fmt.Errorf("detected non-zero exit status: %d", status))
188 | 		}
189 | 
190 | 		result.Duration = time.Now().Sub(result.StartTime)
191 | 		if result.Duration > result.Task.Probe.Timeout {
192 | 			result.addError(fmt.Errorf("task duration was too long (timeout is %s)", result.Task.Probe.Timeout))
193 | 		}
194 | 	}
195 | }
196 | 
197 | func (run *Run) preparePipes() error {
198 | 	exitStatus := make(chan int)
199 | 	session := run.Host.Connection.Session
200 | 
201 | 	stdin, err := session.StdinPipe()
202 | 	if err != nil {
203 | 		return fmt.Errorf("Unable to setup stdin for session: %v", err)
204 | 	}
205 | 	go run.stdinInject(stdin, exitStatus)
206 | 
207 | 	stdout, err := session.StdoutPipe()
208 | 	if err != nil {
209 | 		return fmt.Errorf("Unable to setup stdout for session: %v", err)
210 | 	}
211 | 	go run.readStdout(stdout, exitStatus)
212 | 
213 | 	stderr, err := session.StderrPipe()
214 | 	if err != nil {
215 | 		return fmt.Errorf("Unable to setup stderr for session: %v", err)
216 | 	}
217 | 	go run.readStderr(stderr)
218 | 
219 | 	return nil
220 | }
221 | 


--------------------------------------------------------------------------------
/ssh.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"bytes"
  6 | 	"crypto/hmac"
  7 | 	"crypto/sha1"
  8 | 	"crypto/x509"
  9 | 	"encoding/base64"
 10 | 	"encoding/pem"
 11 | 	"fmt"
 12 | 	"io"
 13 | 	"io/ioutil"
 14 | 	"net"
 15 | 	"os"
 16 | 	"path/filepath"
 17 | 	"strings"
 18 | 	"time"
 19 | 
 20 | 	"golang.org/x/crypto/ssh"
 21 | 	"golang.org/x/crypto/ssh/agent"
 22 | 	"golang.org/x/crypto/ssh/knownhosts"
 23 | )
 24 | 
 25 | // Connection is the final form of connection informations of hosts.d files
 26 | type Connection struct {
 27 | 	User            string
 28 | 	Auths           []ssh.AuthMethod
 29 | 	Host            string
 30 | 	Port            int
 31 | 	Ciphers         []string
 32 | 	SSHConnTimeWarn time.Duration
 33 | 	Session         *ssh.Session
 34 | 	Client          *ssh.Client
 35 | }
 36 | 
 37 | // Close will clone the connection and the session
 38 | func (connection *Connection) Close() error {
 39 | 	var (
 40 | 		sessionError error
 41 | 		clientError  error
 42 | 	)
 43 | 
 44 | 	Trace.Printf("SSH closing connection (%s)\n", connection.Host)
 45 | 
 46 | 	if connection.Session != nil {
 47 | 		sessionError = connection.Session.Close()
 48 | 	}
 49 | 	if connection.Client != nil {
 50 | 		clientError = connection.Client.Close()
 51 | 	}
 52 | 
 53 | 	if clientError != nil {
 54 | 		return clientError
 55 | 	}
 56 | 
 57 | 	return sessionError
 58 | }
 59 | 
 60 | // knownHostHash hash hostname using salt64 like ssh is
 61 | // doing for "hashed" .ssh/known_hosts files
 62 | func knownHostHash(hostname string, salt64 string) string {
 63 | 	buffer, err := base64.StdEncoding.DecodeString(salt64)
 64 | 	if err != nil {
 65 | 		return ""
 66 | 	}
 67 | 	h := hmac.New(sha1.New, buffer)
 68 | 	h.Write([]byte(hostname))
 69 | 	res := h.Sum(nil)
 70 | 
 71 | 	hash := base64.StdEncoding.EncodeToString(res)
 72 | 	return hash
 73 | }
 74 | 
 75 | // Implements ssh.HostKeyCallback which is now required due to CVE-2017-3204
 76 | // see https://github.com/golang/go/issues/29286 for the ecdsa-sha2-nistp256 part
 77 | // ("If ClientConfig.HostKeyAlgorithms is not set, a reasonable default is set for acceptable host key type")
 78 | func hostKeyChecker(hostname string, remote net.Addr, key ssh.PublicKey) error {
 79 | 	path := filepath.Join(os.Getenv("HOME"), ".ssh", "known_hosts")
 80 | 	hostKeyCallback, err := knownhosts.New(path)
 81 | 	if err != nil {
 82 | 		return err
 83 | 	}
 84 | 
 85 | 	err = hostKeyCallback(hostname, remote, key)
 86 | 	if err != nil {
 87 | 		return fmt.Errorf("%s, use ssh client to manually connect to %s (you may have to specify algo: ssh -o HostKeyAlgorithms=ecdsa-sha2-nistp256 …)", err, hostname)
 88 | 	}
 89 | 	return nil
 90 | }
 91 | 
 92 | // Old ssh.HostKeyCallback implementation
 93 | // We parse $HOME/.ssh/known_hosts and check for a matching key + hostname
 94 | // Supported : Hashed hostnames, revoked keys (or any other marker), non-standard ports
 95 | // Unsupported yet: patterns (*? wildcards)
 96 | // This code is temporary, x/crypto/ssh will probably provide something similar. One day.
 97 | func _hostKeyChecker(hostname string, remote net.Addr, key ssh.PublicKey) error {
 98 | 	path := filepath.Join(os.Getenv("HOME"), ".ssh", "known_hosts")
 99 | 	file, err := os.Open(path)
100 | 	if err != nil {
101 | 		return fmt.Errorf("opening '%s': %s", path, err)
102 | 	}
103 | 	defer file.Close()
104 | 
105 | 	// remove standard port if given, add square brackets for non-standard ones
106 | 	hp := strings.Split(hostname, ":")
107 | 	if len(hp) == 2 {
108 | 		if hp[1] == "22" {
109 | 			hostname = hp[0]
110 | 		} else {
111 | 			hostname = "[" + hp[0] + "]:" + hp[1]
112 | 		}
113 | 	}
114 | 
115 | 	scanner := bufio.NewScanner(file)
116 | 	for scanner.Scan() {
117 | 		marker, hosts, hostKey, _, _, err := ssh.ParseKnownHosts(scanner.Bytes())
118 | 		if err == io.EOF {
119 | 			continue
120 | 		}
121 | 		if err != nil {
122 | 			return fmt.Errorf("parsing '%s': %s", path, err)
123 | 		}
124 | 		if marker != "" {
125 | 			continue // @cert-authority or @revoked
126 | 		}
127 | 		fmt.Printf("%s VS %s", key.Marshal(), hostKey.Marshal())
128 | 		if bytes.Equal(key.Marshal(), hostKey.Marshal()) {
129 | 			for _, host := range hosts {
130 | 				if len(host) > 1 && host[0:1] == "|" {
131 | 					parts := strings.Split(host, "|")
132 | 					if parts[1] != "1" {
133 | 						Trace.Printf("'%s': only type 1 is supported for hashed hosts", path)
134 | 						continue
135 | 					}
136 | 					if knownHostHash(hostname, parts[2]) == parts[3] {
137 | 						Trace.Printf("successfully found a matching key in '%s' for (hashed) '%s'", path, hostname)
138 | 						return nil
139 | 					}
140 | 				} else {
141 | 					if host == hostname {
142 | 						Trace.Printf("successfully found a matching key in '%s' for '%s'", path, hostname)
143 | 						return nil
144 | 					}
145 | 				}
146 | 			}
147 | 			Info.Printf("searching '%s' in '%s': found a matching key, but not with exact hostname(s): %s (patterns are not supported yet)", hostname, path, strings.Join(hosts, ", "))
148 | 		}
149 | 	}
150 | 
151 | 	return fmt.Errorf("can't find matching key in '%s' for '%s' (try 'ssh %s' to add it?)", path, hostname, hostname)
152 | }
153 | 
154 | func hostKeyBilndTrustChecker(hostname string, remote net.Addr, key ssh.PublicKey) error {
155 | 	return nil
156 | }
157 | 
158 | // Connect will dial SSH server and open a session
159 | func (connection *Connection) Connect() error {
160 | 	sshConfig := &ssh.ClientConfig{
161 | 		User: connection.User,
162 | 		Auth: connection.Auths,
163 | 	}
164 | 
165 | 	if GlobalConfig.SSHBlindTrust == true {
166 | 		sshConfig.HostKeyCallback = hostKeyBilndTrustChecker
167 | 	} else {
168 | 		sshConfig.HostKeyCallback = hostKeyChecker
169 | 	}
170 | 
171 | 	if len(connection.Ciphers) > 0 {
172 | 		sshConfig.Config = ssh.Config{
173 | 			Ciphers: connection.Ciphers,
174 | 		}
175 | 	}
176 | 
177 | 	dial, err := ssh.Dial("tcp", fmt.Sprintf("%s:%d", connection.Host, connection.Port), sshConfig)
178 | 	Trace.Printf("SSH connection to %s@%s:%d\n", connection.User, connection.Host, connection.Port)
179 | 	if err != nil {
180 | 		return fmt.Errorf("Failed to dial: %s", err)
181 | 	}
182 | 	connection.Client = dial
183 | 
184 | 	session, err := dial.NewSession()
185 | 	if err != nil {
186 | 		return fmt.Errorf("Failed to create session: %s", err)
187 | 	}
188 | 	connection.Session = session
189 | 
190 | 	return nil
191 | }
192 | 
193 | // PublicKeyFile returns an AuthMethod using a private key file
194 | func PublicKeyFile(file string) ssh.AuthMethod {
195 | 	buffer, err := ioutil.ReadFile(file)
196 | 	if err != nil {
197 | 		return nil
198 | 	}
199 | 
200 | 	key, err := ssh.ParsePrivateKey(buffer)
201 | 	if err != nil {
202 | 		return nil
203 | 	}
204 | 	return ssh.PublicKeys(key)
205 | }
206 | 
207 | // PublicKeyFilePassPhrase returns an AuthMethod using a private key file
208 | // and a passphrase
209 | func PublicKeyFilePassPhrase(file, passphrase string) ssh.AuthMethod {
210 | 	buffer, err := ioutil.ReadFile(file)
211 | 	if err != nil {
212 | 		return nil
213 | 	}
214 | 
215 | 	block, _ := pem.Decode(buffer)
216 | 	private, err := x509.DecryptPEMBlock(block, []byte(passphrase))
217 | 	if err != nil {
218 | 		return nil
219 | 	}
220 | 	block.Headers = nil
221 | 	block.Bytes = private
222 | 	key, err := ssh.ParsePrivateKey(pem.EncodeToMemory(block))
223 | 	if err != nil {
224 | 		return nil
225 | 	}
226 | 	return ssh.PublicKeys(key)
227 | }
228 | 
229 | // SSHAgent returns an AuthMethod using SSH agent connection. The pubkeyFile
230 | // params restricts the AuthMethod to only one key, so it wont spam the
231 | // SSH server if the agent holds multiple keys.
232 | func SSHAgent(pubkeyFile string) (ssh.AuthMethod, error) {
233 | 	sshAgent, errd := net.Dial("unix", os.Getenv("SSH_AUTH_SOCK"))
234 | 	if errd == nil {
235 | 		agent := agent.NewClient(sshAgent)
236 | 
237 | 		// we'll try every key, then
238 | 		if pubkeyFile == "" {
239 | 			return ssh.PublicKeysCallback(agent.Signers), nil
240 | 		}
241 | 
242 | 		agentSigners, err := agent.Signers()
243 | 		if err != nil {
244 | 			return nil, fmt.Errorf("requesting SSH agent key/signer list: %s", err)
245 | 		}
246 | 
247 | 		buffer, err := ioutil.ReadFile(pubkeyFile)
248 | 		if err != nil {
249 | 			return nil, fmt.Errorf("reading public key '%s': %s", pubkeyFile, err)
250 | 		}
251 | 
252 | 		fields := strings.Fields(string(buffer))
253 | 
254 | 		if len(fields) < 3 {
255 | 			return nil, fmt.Errorf("invalid field count for public key '%s'", pubkeyFile)
256 | 		}
257 | 
258 | 		buffer2, err := base64.StdEncoding.DecodeString(fields[1])
259 | 		if err != nil {
260 | 			return nil, fmt.Errorf("decoding public key '%s': %s", pubkeyFile, err)
261 | 		}
262 | 
263 | 		key, err := ssh.ParsePublicKey(buffer2)
264 | 		if err != nil {
265 | 			return nil, fmt.Errorf("parsing public key '%s': %s", pubkeyFile, err)
266 | 		}
267 | 
268 | 		for _, potentialSigner := range agentSigners {
269 | 			if bytes.Compare(key.Marshal(), potentialSigner.PublicKey().Marshal()) == 0 {
270 | 				Trace.Printf("successfully found %s key in the SSH agent (%s)", pubkeyFile, fields[2])
271 | 				cb := func() ([]ssh.Signer, error) {
272 | 					signers := []ssh.Signer{potentialSigner}
273 | 					return signers, nil
274 | 				}
275 | 				return ssh.PublicKeysCallback(cb), nil
276 | 			}
277 | 		}
278 | 		return nil, fmt.Errorf("can't find '%s' key in the SSH agent", pubkeyFile)
279 | 	}
280 | 	return nil, fmt.Errorf("SSH agent: %v (check SSH_AUTH_SOCK?)", errd)
281 | }
282 | 


--------------------------------------------------------------------------------
/task.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"time"
 6 | )
 7 | 
 8 | // Task structure holds (mainly timing) informations about a Task
 9 | // next and previous execution
10 | type Task struct {
11 | 	Probe *Probe
12 | 	//~ LastRun        time.Time
13 | 	//~ RunCount       int
14 | 	//~ RemainingTicks int
15 | 	NextRun time.Time
16 | 	PrevRun time.Time
17 | }
18 | 
19 | // ReSchedule is used to schedule another run for this
20 | // task in the future
21 | func (task *Task) ReSchedule(val time.Time) {
22 | 	task.PrevRun = task.NextRun
23 | 	task.NextRun = val
24 | }
25 | 
26 | // Taskable returns true if the task is currently available (see RunIf expression)
27 | func (task *Task) Taskable() (bool, error) {
28 | 	// no RunIf condition? taskable, then
29 | 	if task.Probe.RunIf == nil {
30 | 		return true, nil
31 | 	}
32 | 	res, err := task.Probe.RunIf.Evaluate(nil)
33 | 	if err != nil {
34 | 		return false, fmt.Errorf("%s (run_if expression '%s' probe)", err, task.Probe.Name)
35 | 	}
36 | 	if _, ok := res.(bool); ok == false {
37 | 		return false, fmt.Errorf("'run_if' must return a boolean value (probe '%s')", task.Probe.Name)
38 | 	}
39 | 	return res.(bool), nil
40 | }
41 | 


--------------------------------------------------------------------------------
/task_result.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"regexp"
 6 | 	"strconv"
 7 | 	"time"
 8 | )
 9 | 
10 | // TaskResult holds informations about Task execution and Check results
11 | type TaskResult struct {
12 | 	Task             *Task
13 | 	Host             *Host
14 | 	Values           map[string]string
15 | 	ExitStatus       int
16 | 	StartTime        time.Time
17 | 	Duration         time.Duration
18 | 	Logs             []string // currently, only output # lines
19 | 	Errors           []error
20 | 	FailedChecks     []*Check
21 | 	SuccessfulChecks []*Check
22 | }
23 | 
24 | func (result *TaskResult) addError(err error) {
25 | 	Info.Printf("TaskResult error: %s (host '%s')", err, result.Host.Name)
26 | 	result.Errors = append(result.Errors, err)
27 | }
28 | 
29 | func (result *TaskResult) addLog(line string) {
30 | 	Trace.Printf("TaskResult log: %s (host '%s')", line, result.Host.Name)
31 | 	result.Logs = append(result.Logs, line)
32 | }
33 | 
34 | // DoChecks evaluates every Check in the TaskResult and fills
35 | // FailedChecks and SuccessfulChecks arrays
36 | func (result *TaskResult) DoChecks() {
37 | 	// build parameter map (with values and defaults)
38 | 	params := make(map[string]interface{})
39 | 
40 | 	for key, val := range result.Values {
41 | 		var err error
42 | 		if match, _ := regexp.MatchString("^[0-9]+$", val); match == true {
43 | 			params[key], err = strconv.Atoi(val)
44 | 			if err != nil {
45 | 				result.addError(fmt.Errorf("can't convert '%s' to an int (%s)", val, err))
46 | 			}
47 | 			continue
48 | 		}
49 | 		if match, _ := regexp.MatchString("^[0-9]+\\.[0-9]+$", val); match == true {
50 | 			params[key], err = strconv.ParseFloat(val, 64)
51 | 			if err != nil {
52 | 				result.addError(fmt.Errorf("can't convert '%s' to a float64 (%s)", val, err))
53 | 			}
54 | 			continue
55 | 		}
56 | 		// string
57 | 		params[key] = val
58 | 	}
59 | 
60 | 	for key, val := range result.Task.Probe.Defaults {
61 | 		params[key] = val
62 | 	}
63 | 
64 | 	// … and let's override defaults with host's ones
65 | 	for key, val := range result.Host.Defaults {
66 | 		params[key] = val
67 | 	}
68 | 
69 | 	for _, check := range result.Task.Probe.Checks {
70 | 		res, err := check.If.Evaluate(params)
71 | 		Trace.Printf("%s: %t (err: %s)\n", check.Desc, res, err)
72 | 		if err != nil {
73 | 			result.addError(fmt.Errorf("%s (expression '%s' in '%s' check)", err, check.If, check.Desc))
74 | 			continue
75 | 		}
76 | 		if _, ok := res.(bool); ok == false {
77 | 			result.addError(fmt.Errorf("[[check]] 'if' must return a boolean value (expression '%s' in '%s' check)", check.If, check.Desc))
78 | 			continue
79 | 		}
80 | 
81 | 		if res == true {
82 | 			result.FailedChecks = append(result.FailedChecks, check)
83 | 		} else {
84 | 			result.SuccessfulChecks = append(result.SuccessfulChecks, check)
85 | 		}
86 | 	}
87 | }
88 | 


--------------------------------------------------------------------------------
/tools.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"crypto/md5"
 5 | 	"encoding/hex"
 6 | 	"fmt"
 7 | 	"regexp"
 8 | 	"strconv"
 9 | 	"strings"
10 | )
11 | 
12 | const stringWordSeparators = "[ \t\n,.;:\\(\\)\\[\\]{}'\"/\\\\!\\?<>@#|*+-=]"
13 | 
14 | // IsValidTokenName returns true is argument use only allowed chars for a token
15 | func IsValidTokenName(token string) bool {
16 | 	match, _ := regexp.MatchString("^[A-Za-z0-9_]+$", token)
17 | 	return match
18 | }
19 | 
20 | // IsAllUpper returns true if string is all uppercase
21 | func IsAllUpper(str string) bool {
22 | 	return str == strings.ToUpper(str)
23 | }
24 | 
25 | // MD5Hash will hash input text and return MD5 sum
26 | func MD5Hash(text string) string {
27 | 	hasher := md5.New()
28 | 	hasher.Write([]byte(text))
29 | 	return hex.EncodeToString(hasher.Sum(nil))
30 | }
31 | 
32 | // InterfaceValueToString converts most interface types to string
33 | func InterfaceValueToString(iv interface{}) string {
34 | 	switch iv.(type) {
35 | 	case int:
36 | 		return fmt.Sprintf("%d", iv.(int))
37 | 	case int32:
38 | 		return fmt.Sprintf("%d", iv.(int32))
39 | 	case int64:
40 | 		return strconv.FormatInt(iv.(int64), 10)
41 | 	case float32:
42 | 		return fmt.Sprintf("%f", iv.(float32))
43 | 	case float64:
44 | 		return strconv.FormatFloat(iv.(float64), 'f', -1, 64)
45 | 	case string:
46 | 		return iv.(string)
47 | 	case bool:
48 | 		return strconv.FormatBool(iv.(bool))
49 | 	}
50 | 	return "INVALID_TYPE"
51 | }
52 | 
53 | // StringFindVariables returns a deduplicated slice of all "variables" ($test)
54 | // in the string
55 | func StringFindVariables(str string) []string {
56 | 	re := regexp.MustCompile("\\$([a-zA-Z0-9_]+)(" + stringWordSeparators + "|$)")
57 | 	all := re.FindAllStringSubmatch(str, -1)
58 | 
59 | 	// deduplicate using a map
60 | 	varMap := make(map[string]bool)
61 | 	for _, v := range all {
62 | 		varMap[v[1]] = true
63 | 	}
64 | 
65 | 	// map to slice
66 | 	res := []string{}
67 | 	for name := range varMap {
68 | 		res = append(res, name)
69 | 	}
70 | 	return res
71 | }
72 | 
73 | // StringExpandVariables expands "variables" ($test, for instance) in str
74 | // and returns a new string
75 | func StringExpandVariables(str string, variables map[string]interface{}) string {
76 | 	vars := StringFindVariables(str)
77 | 	for _, v := range vars {
78 | 		if val, exists := variables[v]; exists == true {
79 | 			re := regexp.MustCompile("\\$" + v + "(" + stringWordSeparators + "|$)")
80 | 			str = re.ReplaceAllString(str, InterfaceValueToString(val)+"${1}")
81 | 		}
82 | 	}
83 | 	return str
84 | }
85 | 


--------------------------------------------------------------------------------