├── .gitignore ├── .travis.yml ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── kube-alerts-rc.yml.sample ├── src └── kubernetes-alerts │ ├── api-client.go │ ├── email-notifier.go │ ├── heapster.go │ ├── kubernetes-alerts.go │ ├── kubernetes.go │ ├── kv-client.go │ ├── node-checker.go │ ├── notifier.go │ ├── slack-notifier.go │ └── util.go └── vendor └── manifest /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | *.prof 25 | 26 | /kubernetes-alerts 27 | /pkg 28 | /bin 29 | /tar 30 | vendor/src 31 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | go: 3 | - tip 4 | install: 5 | - go get github.com/constabulary/gb/... 6 | script: 7 | - make clean deps build 8 | before_deploy: 9 | - GOARCH=arm GOOS=linux make deps build 10 | - GOARCH=386 GOOS=linux make deps build 11 | - GOARCH=amd64 GOOS=linux make deps build 12 | 13 | deploy: 14 | provider: releases 15 | api_key: 16 | secure: "nt8Ny2r5lL5sF6osYMfVtSsS7TfRDWPynYtFiZqaDpvMMLpwg/HkBSeJye4yV4LB3/g31ahgJdq4AJEwlAAa/2t7YpkQTiBNSIW7Qsk+KjtlU0bXkZAivAfWuyWfACIThzfz7561TFoLolsZL8ZR8ezv+bNXLSZabWRENDps2OwQGEctp5tJvgNy7xyBCdhfEZhFA30Ot+NisS+xAF+HDgprgbdmFIJOHVvTl95AcHyEVquyjWwpzw8ZEp6C9Vn6WwWDpRb4+mmDxsGhIq1ehXlwvaeTh/mDQ8i/XAOqoulAHVPUNQzmWSkKCrBJVQoQs7vFnn88ZxeIWxohTEyusvTNiseNyWHVB2dXDieUB9aNW/YZGGAT5bBSEabMr9xruOkNkxv3JjeEfzNvwbLX+E8KY83yTx4/RhpWMEMdgBbmLOspOgPPdYJmn+iSlyNc7s+5bXxHsCfJSgiej/+A5XjB+lugLnCQcoyVq60CkrgS1H7itRXWGbR4SxJiEZnvgfZL29mQ6udF8tKvq6v4bJAL3eg+VX1M4wYfQZ892wzKqLSXafHdBfsRi1qT8pvyELoD4fZM/5pEhVAvcj7CrpKEpeQ8PQ4pdu1Gsmkvz+CRM5c1e8u0dQyH+Z9OVzUuvYx2oEdPK8FIz92a6iQMk7TZq8wvHB9QHQM54O463QY=" 17 | file: tar/* 18 | file_glob: true 19 | on: 20 | repo: AcalephStorage/kubernetes-alerts 21 | tags: true 22 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.5 2 | MAINTAINER Acaleph 3 | 4 | ADD . /kube-alerts 5 | RUN go get github.com/constabulary/gb/... 6 | 7 | WORKDIR /kube-alerts 8 | RUN gb vendor restore 9 | RUN gb build 10 | 11 | EXPOSE 9000 12 | CMD [] 13 | ENTRYPOINT [ "/kube-alerts/bin/kubernetes-alerts" ] 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | APP_NAME = kube-alerts 2 | 3 | all: clean format deps build 4 | 5 | clean: 6 | @echo "--> Cleaning build" 7 | @rm -rf ./bin ./tar ./pkg 8 | 9 | format: 10 | @echo "--> Formatting source code" 11 | @go fmt ./... 12 | 13 | deps: 14 | @echo "--> Getting dependencies" 15 | @gb vendor restore 16 | 17 | # test: format 18 | # @echo "--> Testing application" 19 | # @gb test ... 20 | 21 | build: format 22 | @echo "--> Building all application" 23 | @gb build ... 24 | @mkdir -p bin/`go env GOOS`/`go env GOARCH` 25 | @mkdir -p tar 26 | @if [ -e bin/kubernetes-alerts-`go env GOOS`-`go env GOARCH` ]; then mv bin/kubernetes-alerts-`go env GOOS`-`go env GOARCH` bin/`go env GOOS`/`go env GOARCH`/${APP_NAME}; fi; 27 | @if [ -e bin/kubernetes-alerts ]; then mv bin/kubernetes-alerts bin/`go env GOOS`/`go env GOARCH`/${APP_NAME}; fi; 28 | @tar cfz tar/${APP_NAME}-`go env GOOS`-`go env GOARCH`.tgz -C bin/`go env GOOS`/`go env GOARCH` ${APP_NAME} 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | kube-alerts 2 | =========== 3 | 4 | [![Build Status](https://travis-ci.org/AcalephStorage/kubernetes-alerts.svg?branch=develop)](https://travis-ci.org/AcalephStorage/kubernetes-alerts) 5 | 6 | Monitor Kubernetes and send alert notifications to Email, Slack etc. 7 | 8 | This follows a similar approach to [consul-alerts](https://github.com/AcalephStorage/consul-alerts). 9 | 10 | ## Requirement 11 | 12 | 1. Kubernetes 13 | 2. Heapster 14 | 3. Etcd 15 | 16 | Releases 17 | -------- 18 | 19 | Binaries are [here](https://github.com/AcalephStorage/kubernetes-alerts/releases) and docker images [here](https://quay.io/repository/acaleph/kube-alerts). 20 | 21 | Build 22 | ----- 23 | 24 | To build from source, clone the repo: 25 | 26 | ``` 27 | $ git clone https://github.com/AcalephStorage/kubernetes-alerts.git 28 | $ cd kubernetes-alerts 29 | ``` 30 | 31 | Get gb (package manager): 32 | 33 | ``` 34 | $ go get github.com/constabulary/gb/... 35 | ``` 36 | 37 | Get dependencies: 38 | 39 | ``` 40 | $ make deps 41 | ``` 42 | 43 | and lastly, build: 44 | 45 | ``` 46 | $ make build 47 | ``` 48 | 49 | The binary will be in `bin/$GOOS/$GOARCH` directory. 50 | 51 | Docker 52 | ------ 53 | 54 | The docker image can be pulled from quay.io. 55 | 56 | ``` 57 | $ docker pull quay.io/acaleph/kube-alerts:$tag 58 | ``` 59 | 60 | Usage 61 | ----- 62 | 63 | ``` 64 | $ kube-alerts [options] 65 | ``` 66 | 67 | or using docker: 68 | 69 | ``` 70 | $ docker run quay.io/acaleph/kube-alerts:$tag [options] 71 | ``` 72 | 73 | Configuration 74 | ------------- 75 | 76 | ### Connection 77 | 78 | kube-alerts requires to connect to Kubernetes, Heapster, and ETCD. Here are the flags to configure the connections: 79 | 80 | #### Kubernetes flags 81 | 82 | | flag | description | example | 83 | |----------------------------|-------------------------------------------------|-----------------------------------| 84 | | -k8s-api | the base url for the Kubernetes API | https://localhost/api/v1 | 85 | | -k8s-certificate-authority | the certificate authority of the Kubernetes API | /etc/kubernetes/ssl/ca.pem | 86 | | -k8s-client-certificate | the client certificate for authentication | /etc/kubernetes/ssl/admin.pem | 87 | | -k8s-client-key | the client key for authentication | /etc/kubernetes/ssl/admin-key.pem | 88 | | -k8s-token | the token for authentication | F0XBLTDaL3xDlBsq5YKAFIH7yzZNBhs6 | 89 | 90 | #### Heapster flags 91 | 92 | | flag | description | example | 93 | |---------------------------------|-------------------------------------------------|-----------------------------------| 94 | | -heapster-api | the base url for the Heapster API | https://localhost/api/v1 | 95 | | -heapster-certificate-authority | the certificate authority of the Heapster API | /etc/kubernetes/ssl/ca.pem | 96 | | -heapster-client-certificate | the client certificate for authentication | /etc/kubernetes/ssl/admin.pem | 97 | | -heapster-client-key | the client key for authentication | /etc/kubernetes/ssl/admin-key.pem | 98 | | -heapster-token | the token for authentication | F0XBLTDaL3xDlBsq5YKAFIH7yzZNBhs6 | 99 | 100 | Note: Heapster can be accessed via Kubernetes. The heapster flag may change (not yet used). 101 | 102 | #### KV store flags 103 | 104 | | flag | description | example | 105 | |---------------------------|----------------------------------------------------|------------------------------| 106 | | -kv-addresses | comma separated addresses for the KV store | https://localhost:2379 | 107 | | -kv-backend | the KV store backend (only etcd supported for now) | etcd | 108 | | -kv-certificate-authority | the certificate authority of the KV store | /etc/etcd/ssl/ca.pem | 109 | | -kv-client-certificate | the client certificate for authentication | /etc/etcd/ssl/client.pem | 110 | | -kv-client-key | the client key for authentication | /etc/etcd/ssl/client-key.pem | 111 | 112 | 113 | ### Monitoring 114 | 115 | There are three major kinds of checks that are monitored by kube-alerts. Node checks, cluster checks, and resource checks (pods). At the moment, only node checks are available. Here the options: 116 | 117 | #### Node check flags 118 | 119 | | flag | description | example | 120 | |-----------------------|------------------------------------------------------------------------------|---------| 121 | | -node-check-interval | interval when running the node checks (seconds) | 10 | 122 | | -node-check-threshold | amount of time (seconds) a change of state needed to qualify as state change | 60 | 123 | 124 | 125 | ### Notification 126 | 127 | Different notifiers can be configured. At the moment, only Slack and Email are supported. 128 | 129 | #### General notification flags 130 | 131 | | flag | description | example | 132 | |------------------------|-----------------------------------------------------------------------|---------| 133 | | -notification-interval | amount of time (seconds) to wait before sending pending notifications | 60 | 134 | | -enable-email | enable email notifier | true | 135 | | -enable-slack | enable slack notifier | true | 136 | 137 | #### Email notifier flags 138 | 139 | | flag | description | example | 140 | |------------------------|---------------------------------------------------------|---------------| 141 | | -email-cluster-name | the cluster name to appear on the default email message | acaleph | 142 | | -email-url | the SMTP server URL | localhost | 143 | | -email-port | the SMTP server port | 25 | 144 | | -email-username | the SMTP username | user | 145 | | -email-password | the SMTP password | password | 146 | | -email-receivers | comma-separated list of email to receive notifications | dood@acale.ph | 147 | | -email-sender-email | the email of the sender | food@acale.ph | 148 | | -email-sender-alias | alias of the sender | kube-alerts | 149 | | -email-template | custom email template | | 150 | 151 | Note: The custom email template is optional, a default template will be used if this is not provided. (TODO: document custom template) 152 | 153 | #### Slack notifier flags 154 | 155 | | flag | description | example | 156 | |---------------------|---------------------------------------------------------|--------------------------------------| 157 | | -slack-cluster-name | the cluster name to appear on the default slack message | acaleph | 158 | | -slack-url | the slack webhook URL | https://hooks.slack.com/services/... | 159 | | -slack-username | the username to appear on the slack message | 25 | 160 | 161 | ### Logging 162 | 163 | Log level can be set to limit the verbosity of the log. 164 | 165 | | flag | description | example | 166 | |------------|---------------------------------------------------------------|---------| 167 | | -log-level | log level, valid values are [debug, info, warn, error, panic] | debug | 168 | 169 | TODO 170 | ---- 171 | 172 | This is an initial release, a few more things needs to be done: 173 | 174 | - [ ] implement cluster level checks 175 | - [ ] implement pod/resource level checks 176 | - [ ] document email template 177 | - [ ] add more notifiers 178 | - [ ] simpler configuration (via YAML?) 179 | - [ ] Real tests 180 | 181 | Contribution 182 | ------------ 183 | 184 | PRs are more than welcome. Just fork, create a feature branch, and open a PR. :) 185 | -------------------------------------------------------------------------------- /kube-alerts-rc.yml.sample: -------------------------------------------------------------------------------- 1 | --- 2 | kind: ReplicationController 3 | apiVersion: v1 4 | metadata: 5 | name: kube-alerts 6 | spec: 7 | replicas: 1 8 | selector: 9 | app: kube-alerts 10 | type: monitoring 11 | template: 12 | metadata: 13 | name: kube-alerts 14 | labels: 15 | app: kube-alerts 16 | type: monitoring 17 | spec: 18 | containers: 19 | - name: kube-alerts 20 | image: quay.io/acaleph/kube-alerts:latest 21 | args: 22 | - -k8s-api=https://kubernetes.default/api/v1 23 | - -k8s-certificate-authority=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt 24 | - -k8s-token=/var/run/secrets/kubernetes.io/serviceaccount/token 25 | - -kv-addresses=http://{{etcd-ip}}:{{etcd-port}}/v2 26 | - -kv-backend=etcd 27 | - -node-check-threshold=15 28 | - -enable-slack=true 29 | - -slack-cluster-name={{cluster-name}} 30 | - -slack-url={{slack-url}} 31 | - -slack-username=kube-alerts 32 | 33 | -------------------------------------------------------------------------------- /src/kubernetes-alerts/api-client.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "errors" 5 | "io" 6 | "time" 7 | 8 | "crypto/tls" 9 | "crypto/x509" 10 | "encoding/json" 11 | "io/ioutil" 12 | "net/http" 13 | 14 | "github.com/Sirupsen/logrus" 15 | ) 16 | 17 | type ApiClient struct { 18 | *http.Client 19 | apiBaseUrl string 20 | certificateAuthority string 21 | clientCertificate string 22 | clientKey string 23 | token string 24 | tokenFile string 25 | } 26 | 27 | func (a *ApiClient) prepareClient() error { 28 | var cacert *x509.CertPool 29 | if a.certificateAuthority != "" { 30 | capem, err := ioutil.ReadFile(a.certificateAuthority) 31 | if err != nil { 32 | return err 33 | } 34 | cacert = x509.NewCertPool() 35 | if !cacert.AppendCertsFromPEM(capem) { 36 | return errors.New("unable to load certificate authority") 37 | } 38 | } 39 | 40 | var cert tls.Certificate 41 | if a.clientCertificate != "" && a.clientKey != "" { 42 | c := a.clientCertificate 43 | k := a.clientKey 44 | var err error 45 | cert, err = tls.LoadX509KeyPair(c, k) 46 | if err != nil { 47 | return err 48 | } 49 | } 50 | 51 | if cacert != nil || &cert != nil { 52 | config := &tls.Config{ 53 | RootCAs: cacert, 54 | Certificates: []tls.Certificate{cert}, 55 | } 56 | transport := &http.Transport{ 57 | TLSClientConfig: config, 58 | TLSHandshakeTimeout: 5 * time.Second, 59 | } 60 | client := &http.Client{Transport: transport} 61 | a.Client = client 62 | } else { 63 | a.Client = &http.Client{} 64 | } 65 | 66 | if a.token == "" && a.tokenFile != "" { 67 | token, err := ioutil.ReadFile(a.tokenFile) 68 | if err != nil { 69 | return err 70 | } 71 | a.token = string(token) 72 | } 73 | 74 | return nil 75 | } 76 | 77 | func (a *ApiClient) GetRequest(path string, resData interface{}) error { 78 | endpoint := a.apiBaseUrl + path 79 | logrus.Debugf("GET request to: %s", endpoint) 80 | req, err := http.NewRequest("GET", endpoint, nil) 81 | if err != nil { 82 | return err 83 | } 84 | if a.token != "" { 85 | req.Header.Add("Authorization", "Bearer "+a.token) 86 | } 87 | res, err := a.Do(req) 88 | if err != nil { 89 | return err 90 | } 91 | defer res.Body.Close() 92 | body, err := ioutil.ReadAll(res.Body) 93 | if err != nil { 94 | return err 95 | } 96 | err = json.Unmarshal(body, resData) 97 | if err != nil { 98 | return err 99 | } 100 | logrus.Debug("Get request successful") 101 | return nil 102 | } 103 | 104 | func (a *ApiClient) PostRequest(path string, data io.Reader) error { 105 | endpoint := a.apiBaseUrl + path 106 | logrus.Debugf("POST request to: %s", endpoint) 107 | req, err := http.NewRequest("POST", endpoint, data) 108 | if err != nil { 109 | return err 110 | } 111 | if a.token != "" { 112 | req.Header.Add("Authorization", "Bearer "+a.token) 113 | } 114 | res, err := a.Do(req) 115 | if err != nil { 116 | return err 117 | } 118 | defer res.Body.Close() 119 | if res.StatusCode == http.StatusCreated || res.StatusCode == http.StatusOK { 120 | logrus.Debug("POST request successful.") 121 | return nil 122 | } 123 | return errors.New(res.Status) 124 | } 125 | 126 | func (a *ApiClient) PutRequest(path, data string) error { 127 | endpoint := a.apiBaseUrl + path 128 | logrus.Debugf("PUT request to: %s", endpoint) 129 | req, err := http.NewRequest("PUT", endpoint, nil) 130 | req.PostForm["value"] = []string{data} 131 | 132 | if err != nil { 133 | return err 134 | } 135 | if a.token != "" { 136 | req.Header.Add("Authorization", "Bearer "+a.token) 137 | } 138 | res, err := a.Do(req) 139 | if err != nil { 140 | return err 141 | } 142 | defer res.Body.Close() 143 | if res.StatusCode == http.StatusCreated || res.StatusCode == http.StatusOK { 144 | logrus.Debug("PUT request successful") 145 | return nil 146 | } 147 | return errors.New(res.Status) 148 | } 149 | -------------------------------------------------------------------------------- /src/kubernetes-alerts/email-notifier.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | 7 | "html/template" 8 | "net/smtp" 9 | 10 | "github.com/Sirupsen/logrus" 11 | ) 12 | 13 | type EmailNotifier struct { 14 | Enabled bool 15 | ClusterName string 16 | Template string 17 | Url string 18 | Port int 19 | Username string 20 | Password string 21 | SenderAlias string 22 | SenderEmail string 23 | Receivers []string 24 | } 25 | 26 | type EmailData struct { 27 | ClusterName string 28 | SystemStatus string 29 | FailCount int 30 | WarnCount int 31 | PassCount int 32 | Nodes map[string][]KubeCheck 33 | } 34 | 35 | func (email *EmailNotifier) Notify(checks []KubeCheck) bool { 36 | logrus.Infof("Sending %d notification email", len(checks)) 37 | 38 | overall, pass, warn, fail := NotifSummary(checks) 39 | nodeMap := mapByNodes(checks) 40 | 41 | e := EmailData{ 42 | ClusterName: email.ClusterName, 43 | SystemStatus: string(overall), 44 | FailCount: fail, 45 | WarnCount: warn, 46 | PassCount: pass, 47 | Nodes: nodeMap, 48 | } 49 | 50 | var tmpl *template.Template 51 | var err error 52 | if email.Template == "" { 53 | tmpl, err = template.New("base").Parse(defaultTemplate) 54 | } else { 55 | tmpl, err = template.ParseFiles(email.Template) 56 | } 57 | 58 | if err != nil { 59 | logrus.WithError(err).Error("Invalid Template") 60 | return false 61 | } 62 | 63 | var body bytes.Buffer 64 | if err := tmpl.Execute(&body, e); err != nil { 65 | logrus.WithError(err).Error("Unable to execute template") 66 | return false 67 | } 68 | 69 | msg := "" 70 | msg += fmt.Sprintf("From: \"%s\" <%s>\n", email.SenderAlias, email.SenderEmail) 71 | msg += fmt.Sprintf("Subject: %s is %s\n", email.ClusterName, overall) 72 | msg += "MIME-version: 1.0;\nContent-Type: text/html; charset=\"UTF-8\";\n\n" 73 | msg += body.String() 74 | 75 | addr := fmt.Sprintf("%s:%d", email.Url, email.Port) 76 | auth := smtp.PlainAuth("", email.Username, email.Password, email.Url) 77 | if err := smtp.SendMail(addr, auth, email.SenderEmail, email.Receivers, []byte(msg)); err != nil { 78 | logrus.WithError(err).Error("Unable to send notification.") 79 | return false 80 | } 81 | logrus.Infof("Email notification sent.") 82 | return true 83 | 84 | } 85 | 86 | func (email *EmailNotifier) NotifEnabled() bool { 87 | return email.Enabled 88 | } 89 | 90 | func mapByNodes(checks []KubeCheck) map[string][]KubeCheck { 91 | nodeMap := make(map[string][]KubeCheck) 92 | for _, check := range checks { 93 | nodeName := check.Node 94 | nodeChecks := nodeMap[nodeName] 95 | if nodeChecks == nil { 96 | nodeChecks = make([]KubeCheck, 0) 97 | } 98 | nodeChecks = append(nodeChecks, check) 99 | nodeMap[nodeName] = nodeChecks 100 | } 101 | return nodeMap 102 | } 103 | 104 | var defaultTemplate string = ` 105 | 106 | 107 | 108 | {{ .ClusterName }} 109 | 110 | 111 | 112 | 113 |
114 |
115 | {{ .ClusterName }} 116 |
117 |
118 | 119 |
120 |

121 | System is {{ .SystemStatus }} 122 |
123 | The following nodes are currently experiencing issues: 124 |

125 |
126 | Failed: 127 | {{ .FailCount }} 128 |
129 |
130 | Warning: 131 | {{ .WarnCount }} 132 |
133 |
134 | Passed: 135 | {{ .PassCount }} 136 |
137 |
138 |

139 | 140 |
141 | 142 | {{ range $name, $checks := .Nodes }} 143 |
144 |
145 | Node: 146 | {{ $name }} 147 |
148 | 149 | {{ range $check := $checks }} 150 |
151 |
152 | {{ with $check.Service }} 153 | {{ $check.Service }}: 154 | {{ end }} 155 | {{ $check.Check }} 156 |
157 |
158 | Since: 159 | {{ $check.Timestamp }} 160 |
161 | {{ with $check.Notes }} 162 |
163 | Notes: 164 |
{{ $check.Notes }}
165 |
166 | {{end }} 167 |
168 | Output: 169 |
{{ $check.Output }}
170 |
171 |
172 | {{ end }} 173 | 174 |
175 | {{ end }} 176 | 177 | 178 | 179 | 180 | 181 | ` 182 | -------------------------------------------------------------------------------- /src/kubernetes-alerts/heapster.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | type HeapsterModelApi struct { 4 | *ApiClient 5 | } 6 | -------------------------------------------------------------------------------- /src/kubernetes-alerts/kubernetes-alerts.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "os" 6 | "strings" 7 | "time" 8 | 9 | "github.com/Sirupsen/logrus" 10 | "github.com/docker/libkv/store" 11 | "github.com/docker/libkv/store/etcd" 12 | ) 13 | 14 | const ( 15 | CheckGroupCluster = KubeCheckGroup("cluster") 16 | CheckGroupNode = KubeCheckGroup("node") 17 | CheckGroupPod = KubeCheckGroup("pod") 18 | 19 | CheckTypeNodeReady = KubeCheckType("node-ready") 20 | CheckTypeNodeOutOfDisk = KubeCheckType("node-out-of-disk") 21 | CheckTypeNodeCpu = KubeCheckType("node-cpu") 22 | CheckTypeNodeMem = KubeCheckType("node-mem") 23 | 24 | CheckStatusPass = CheckStatus("pass") 25 | CheckStatusWarn = CheckStatus("warn") 26 | CheckStatusFail = CheckStatus("fail") 27 | ) 28 | 29 | type KubeCheckGroup string 30 | type KubeCheckType string 31 | type CheckStatus string 32 | 33 | type KubeCheck struct { 34 | Name string `json:"name"` 35 | Node string `json:"node"` 36 | CheckGroup KubeCheckGroup `json:"checkGroup,string"` 37 | CheckType KubeCheckType `json:"checkType,string"` 38 | Status CheckStatus `json:"status"` 39 | Message string `json:"message"` 40 | Timestamp time.Time `json:"timestamp"` 41 | Labels map[string]string `json:"labels"` 42 | } 43 | 44 | func main() { 45 | 46 | kubernetes := &KubernetesApi{ApiClient: &ApiClient{}} 47 | heapster := &HeapsterModelApi{ApiClient: &ApiClient{}} 48 | kv := &KVClient{} 49 | slack := &SlackNotifier{Detailed: true} 50 | email := &EmailNotifier{} 51 | 52 | notifManager := &NotifManager{ 53 | Notifiers: []Notifier{slack, email}, 54 | } 55 | 56 | nodeChecker := &NodeChecker{ 57 | KubernetesApi: kubernetes, 58 | HeapsterModelApi: heapster, 59 | KVClient: kv, 60 | NotifManager: notifManager, 61 | } 62 | 63 | // need better way for configuring this... 64 | parseFlags(kubernetes, heapster, kv, notifManager, nodeChecker, slack, email) 65 | initLibKV() 66 | 67 | if err := kubernetes.prepareClient(); err != nil { 68 | logrus.WithError(err).Error("unable to create kubernetes client") 69 | os.Exit(-1) 70 | } 71 | 72 | if err := heapster.prepareClient(); err != nil { 73 | logrus.WithError(err).Error("unable to create heapster client") 74 | os.Exit(-1) 75 | } 76 | 77 | if err := kv.prepareClient(); err != nil { 78 | logrus.WithError(err).Error("unable to create kv client") 79 | os.Exit(-1) 80 | } 81 | 82 | logrus.Info("Starting kube-alerts...") 83 | 84 | notifManager.Start() 85 | nodeChecker.start() 86 | 87 | nodeChecker.RunWaitGroup.Wait() 88 | 89 | // clean up aka stop all services 90 | } 91 | 92 | func parseFlags(kubernetes *KubernetesApi, heapster *HeapsterModelApi, kv *KVClient, notifManager *NotifManager, nodeChecker *NodeChecker, slack *SlackNotifier, email *EmailNotifier) { 93 | flag.StringVar(&kubernetes.apiBaseUrl, "k8s-api", "", "Kubernetes API Base URL") 94 | flag.StringVar(&kubernetes.certificateAuthority, "k8s-certificate-authority", "", "Kubernetes Certificate Authority") 95 | flag.StringVar(&kubernetes.clientCertificate, "k8s-client-certificate", "", "Kubernetes Client Certificate") 96 | flag.StringVar(&kubernetes.clientKey, "k8s-client-key", "", "Kubernetes Client Key") 97 | flag.StringVar(&kubernetes.token, "k8s-token", "", "Kubernetes Token") 98 | flag.StringVar(&kubernetes.tokenFile, "k8s-token-file", "", "Kubernetes Token File") 99 | 100 | flag.StringVar(&heapster.apiBaseUrl, "heapster-api", "", "Heapster API Base URL") 101 | flag.StringVar(&heapster.certificateAuthority, "heapster-certificate-authority", "", "Heapster Certificate Authority") 102 | flag.StringVar(&heapster.clientCertificate, "heapster-client-certificate", "", "Heapster Client Certificate") 103 | flag.StringVar(&heapster.clientKey, "heapster-client-key", "", "Heapster Client Key") 104 | flag.StringVar(&heapster.token, "heapster-token", "", "Heapster Token") 105 | 106 | flag.StringVar(&kv.certificateAuthority, "kv-certificate-authority", "", "KV Certificate Authority") 107 | flag.StringVar(&kv.clientCertificate, "kv-client-certificate", "", "KV Client Certificate") 108 | flag.StringVar(&kv.clientKey, "kv-client-key", "", "KV Client Key") 109 | 110 | notifIntervalSecs := flag.Int("notification-interval", 60, "the interval to wait before sending notifications (seconds)") 111 | 112 | nodeCheckIntervalSecs := flag.Int("node-check-interval", 10, "interval in seconds before running node checks") 113 | nodeCheckThresholdSecs := flag.Int("node-check-threshold", 60, "threshold before marking a node status as changed") 114 | 115 | flag.BoolVar(&slack.Enabled, "enable-slack", false, "Enable slack notifier") 116 | flag.StringVar(&slack.ClusterName, "slack-cluster-name", "", "Cluster name to display on slack notifications") 117 | flag.StringVar(&slack.Url, "slack-url", "", "The slack URL for notification") 118 | flag.StringVar(&slack.Username, "slack-username", "kube-alerts", "The slack username") 119 | 120 | flag.BoolVar(&email.Enabled, "enable-email", false, "Enable email notifier") 121 | flag.StringVar(&email.ClusterName, "email-cluster-name", "kubernetes", "The name of the kubernetes cluster") 122 | flag.StringVar(&email.Template, "email-template", "", "The email template file") 123 | flag.StringVar(&email.Url, "email-url", "", "The smtp server URL") 124 | flag.IntVar(&email.Port, "email-port", 0, "The smtp port") 125 | flag.StringVar(&email.Username, "email-username", "", "The smtp username") 126 | flag.StringVar(&email.Password, "email-password", "", "The smtp password") 127 | flag.StringVar(&email.SenderAlias, "email-sender-alias", "kube-alerts", "The email sender alias") 128 | flag.StringVar(&email.SenderEmail, "email-sender-email", "", "The email of the sender") 129 | 130 | emailReceivers := flag.String("email-receivers", "", "Comma separated list of receiver's email") 131 | email.Receivers = strings.Split(*emailReceivers, ",") 132 | 133 | addresses := flag.String("kv-addresses", "", "addresses for the KV store") 134 | backend := flag.String("kv-backend", "", "KV Store Backend. Only etcd for now") 135 | 136 | logLevel := flag.String("log-level", "info", "set the log level, valid values are [debug, info, warn, error, fatal, panic]") 137 | flag.Parse() 138 | 139 | kv.addresses = strings.Split(*addresses, ",") 140 | switch *backend { 141 | case "etcd": 142 | kv.backend = store.ETCD 143 | case "consul": 144 | kv.backend = store.CONSUL 145 | case "zk": 146 | kv.backend = store.ZK 147 | case "boltdb": 148 | kv.backend = store.BOLTDB 149 | } 150 | 151 | notifManager.NotifInterval = time.Duration(*notifIntervalSecs) * time.Second 152 | nodeChecker.CheckInterval = time.Duration(*nodeCheckIntervalSecs) * time.Second 153 | nodeChecker.Threshold = time.Duration(*nodeCheckThresholdSecs) * time.Second 154 | 155 | logrusLevel, err := logrus.ParseLevel(*logLevel) 156 | if err != nil { 157 | logrus.SetLevel(logrus.InfoLevel) 158 | } else { 159 | logrus.SetLevel(logrusLevel) 160 | } 161 | 162 | } 163 | 164 | func initLibKV() { 165 | etcd.Register() 166 | } 167 | -------------------------------------------------------------------------------- /src/kubernetes-alerts/kubernetes.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "time" 4 | 5 | type KubernetesApi struct { 6 | *ApiClient 7 | } 8 | 9 | type NodeList struct { 10 | Items []Node `json:"items"` 11 | } 12 | 13 | type Node struct { 14 | Metadata ResourceMetadata `json:"metadata"` 15 | Status NodeStatus `json:"status"` 16 | } 17 | 18 | type ResourceMetadata struct { 19 | Name string `json:"name"` 20 | Labels map[string]string `json:"labels"` 21 | } 22 | 23 | type NodeStatus struct { 24 | Capacity NodeCapacity `json:"capacity"` 25 | Conditions []NodeCondition `json:"conditions"` 26 | } 27 | 28 | type NodeCapacity struct { 29 | Cpu string `json:"cpu"` 30 | Memory string `json:"memory"` 31 | Pods string `json:"pods"` 32 | } 33 | 34 | type NodeCondition struct { 35 | Type string `json:"type"` 36 | Status string `json:"status"` 37 | LastHeartbeatTime time.Time `json:"lastHearbeatTime"` 38 | LastTransitionTime time.Time `json:"lastTransitionTime"` 39 | Reason string `json:"reason"` 40 | Message string `json:"message"` 41 | } 42 | 43 | func (k *KubernetesApi) Nodes() ([]Node, error) { 44 | var nodeList NodeList 45 | err := k.GetRequest("/nodes", &nodeList) 46 | if err != nil { 47 | return nil, err 48 | } 49 | return nodeList.Items, nil 50 | } 51 | -------------------------------------------------------------------------------- /src/kubernetes-alerts/kv-client.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "time" 7 | 8 | "crypto/tls" 9 | "crypto/x509" 10 | "encoding/json" 11 | "io/ioutil" 12 | 13 | "github.com/Sirupsen/logrus" 14 | "github.com/docker/libkv" 15 | "github.com/docker/libkv/store" 16 | ) 17 | 18 | type KVClient struct { 19 | backend store.Backend 20 | addresses []string 21 | certificateAuthority string 22 | clientCertificate string 23 | clientKey string 24 | store store.Store 25 | } 26 | 27 | func (kvc *KVClient) prepareClient() error { 28 | hasCA := kvc.certificateAuthority != "" 29 | hasCert := kvc.clientCertificate != "" 30 | hasKey := kvc.clientKey != "" 31 | 32 | config := &store.Config{ 33 | ConnectionTimeout: 5 * time.Second, 34 | } 35 | if hasCA || hasCert || hasKey { 36 | 37 | var cacert *x509.CertPool 38 | if kvc.certificateAuthority != "" { 39 | capem, err := ioutil.ReadFile(kvc.certificateAuthority) 40 | if err != nil { 41 | return err 42 | } 43 | cacert = x509.NewCertPool() 44 | if !cacert.AppendCertsFromPEM(capem) { 45 | return errors.New("unable to load certificate authority") 46 | } 47 | } 48 | 49 | var cert tls.Certificate 50 | if kvc.clientCertificate != "" && kvc.clientKey != "" { 51 | c := kvc.clientCertificate 52 | k := kvc.clientKey 53 | var err error 54 | cert, err = tls.LoadX509KeyPair(c, k) 55 | if err != nil { 56 | return err 57 | } 58 | } 59 | 60 | config.ClientTLS = &store.ClientTLSConfig{ 61 | CertFile: kvc.clientCertificate, 62 | KeyFile: kvc.clientKey, 63 | CACertFile: kvc.certificateAuthority, 64 | } 65 | config.TLS = &tls.Config{ 66 | RootCAs: cacert, 67 | Certificates: []tls.Certificate{cert}, 68 | } 69 | 70 | } 71 | store, err := libkv.NewStore(kvc.backend, kvc.addresses, config) 72 | if err != nil { 73 | fmt.Println(err) 74 | logrus.Error("unable to create kvclient. ", err) 75 | return err 76 | } 77 | kvc.store = store 78 | return nil 79 | } 80 | 81 | func (kvc *KVClient) checkExists(check KubeCheck) (bool, error) { 82 | key := fmt.Sprintf("kube-alerts/%s/%s/%s", check.CheckGroup, check.CheckType, check.Name) 83 | exists, err := kvc.store.Exists(key) 84 | if err != nil { 85 | logrus.WithError(err).Error("unable to check key existence") 86 | return false, err 87 | } 88 | return exists, nil 89 | } 90 | 91 | func (kvc *KVClient) saveCheck(check KubeCheck) error { 92 | value, err := json.Marshal(&check) 93 | if err != nil { 94 | logrus.WithError(err).Error("unable to marshall check") 95 | return err 96 | } 97 | key := fmt.Sprintf("kube-alerts/%s/%s/%s", check.CheckGroup, check.CheckType, check.Name) 98 | return kvc.store.Put(key, value, nil) 99 | } 100 | 101 | func (kvc *KVClient) getCheck(checkGroup KubeCheckGroup, checkType KubeCheckType, checkName string) (KubeCheck, error) { 102 | var check KubeCheck 103 | key := fmt.Sprintf("kube-alerts/%s/%s/%s", checkGroup, checkType, checkName) 104 | kvpair, err := kvc.store.Get(key) 105 | if err != nil { 106 | logrus.WithError(err).Error("unable to get kv pair") 107 | return check, err 108 | } 109 | err = json.Unmarshal(kvpair.Value, &check) 110 | if err != nil { 111 | logrus.WithError(err).Error("unable to unmarshal kv value") 112 | return check, err 113 | } 114 | return check, nil 115 | } 116 | -------------------------------------------------------------------------------- /src/kubernetes-alerts/node-checker.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "sync" 5 | "time" 6 | 7 | "github.com/Sirupsen/logrus" 8 | ) 9 | 10 | const ( 11 | ConditionTypeReady = "Ready" 12 | ConditionTypeOutOfDisk = "OutOfDisk" 13 | 14 | NodeCheckReady = "NodeReady" 15 | NodeCheckOutOfDisk = "NodeOutOfDisk" 16 | NodeCheckCpu = "NodeCpu" 17 | NodeCheckMem = "NodeMem" 18 | ) 19 | 20 | type NodeChecker struct { 21 | *KubernetesApi 22 | *HeapsterModelApi 23 | *KVClient 24 | *NotifManager 25 | RunWaitGroup sync.WaitGroup 26 | CheckInterval time.Duration 27 | stopChannel chan bool 28 | Threshold time.Duration 29 | } 30 | 31 | func (n *NodeChecker) start() { 32 | logrus.Info("Starting Node Checker...") 33 | n.RunWaitGroup.Add(1) 34 | n.stopChannel = make(chan bool) 35 | go n.run() 36 | } 37 | 38 | func (n *NodeChecker) stop() { 39 | close(n.stopChannel) 40 | n.RunWaitGroup.Done() 41 | } 42 | 43 | func (n *NodeChecker) run() { 44 | running := true 45 | for running { 46 | select { 47 | case <-time.After(n.CheckInterval): 48 | n.processNodeCheck() 49 | case <-n.stopChannel: 50 | running = false 51 | } 52 | time.Sleep(1 * time.Second) 53 | } 54 | } 55 | 56 | func (n *NodeChecker) processNodeCheck() { 57 | logrus.Debug("Running Node Checks...") 58 | nodes, err := n.Nodes() 59 | if err != nil { 60 | logrus.WithError(err).Error("Unable to retrieve nodes.") 61 | return 62 | } 63 | n.processNodeCheckReady(nodes) 64 | n.processNodeOutOfDisk(nodes) 65 | // process Node OOD 66 | // ... 67 | } 68 | 69 | func (n *NodeChecker) processNodeCheckReady(nodes []Node) { 70 | logrus.Debug("Checking Node Readiness...") 71 | for _, node := range nodes { 72 | ready := false 73 | passThreshold := false 74 | for _, condition := range node.Status.Conditions { 75 | if condition.Type == ConditionTypeReady { 76 | ready = condition.Status == "True" 77 | duration := time.Since(condition.LastTransitionTime) 78 | passThreshold = duration >= n.Threshold 79 | } 80 | } 81 | 82 | // node readiness may have changed 83 | if passThreshold { 84 | 85 | var message string 86 | var status CheckStatus 87 | if ready { 88 | status = CheckStatusPass 89 | message = node.Metadata.Name + " is Ready" 90 | } else { 91 | status = CheckStatusFail 92 | message = node.Metadata.Name + " is NOT Ready" 93 | } 94 | 95 | check := KubeCheck{ 96 | Name: node.Metadata.Name, 97 | Node: node.Metadata.Name, 98 | CheckGroup: CheckGroupNode, 99 | CheckType: CheckTypeNodeReady, 100 | Status: status, 101 | Message: message, 102 | Timestamp: time.Now(), 103 | Labels: node.Metadata.Labels, 104 | } 105 | 106 | n.processCheck(check) 107 | } 108 | 109 | } 110 | } 111 | 112 | func (n *NodeChecker) processNodeOutOfDisk(nodes []Node) { 113 | logrus.Debug("Checking Node Disk Space...") 114 | for _, node := range nodes { 115 | ok := false 116 | passThreshold := false 117 | for _, condition := range node.Status.Conditions { 118 | if condition.Type == ConditionTypeOutOfDisk { 119 | ok = condition.Status == "False" 120 | duration := time.Since(condition.LastTransitionTime) 121 | passThreshold = duration >= n.Threshold 122 | } 123 | } 124 | 125 | // node ood may have changed 126 | if passThreshold { 127 | 128 | var message string 129 | var status CheckStatus 130 | if ok { 131 | status = CheckStatusPass 132 | message = node.Metadata.Name + " has sufficient disk space" 133 | } else { 134 | status = CheckStatusFail 135 | message = node.Metadata.Name + " is running out of disk space" 136 | } 137 | 138 | check := KubeCheck{ 139 | Name: node.Metadata.Name, 140 | Node: node.Metadata.Name, 141 | CheckGroup: CheckGroupNode, 142 | CheckType: CheckTypeNodeOutOfDisk, 143 | Status: status, 144 | Message: message, 145 | Timestamp: time.Now(), 146 | Labels: node.Metadata.Labels, 147 | } 148 | 149 | n.processCheck(check) 150 | } 151 | 152 | } 153 | } 154 | 155 | func (n *NodeChecker) processCheck(check KubeCheck) { 156 | exists, err := n.checkExists(check) 157 | if err != nil { 158 | logrus.WithError(err).Error("unable to determine if check exists or not") 159 | return 160 | } 161 | if !exists { 162 | logrus.Infof("check %s is not in the record. recoding now", check.Name) 163 | err := n.saveCheck(check) 164 | if err != nil { 165 | logrus.WithError(err).Warnf("Unable to save check") 166 | return 167 | } 168 | if check.Status == CheckStatusFail { 169 | logrus.Info("check %s is new and failing, will notify", check.Name) 170 | n.addNotification(check) 171 | } 172 | } else { 173 | oldCheck, err := n.getCheck(check.CheckGroup, check.CheckType, check.Name) 174 | if err != nil { 175 | logrus.WithError(err).Warnf("unable to get previous check, can't proceed") 176 | return 177 | } 178 | logrus.Printf("old: %s, new: %s", oldCheck.Status, check.Status) 179 | if check.Status != oldCheck.Status { 180 | logrus.Debugf("check %s status has changed, will notify", check.Name) 181 | logrus.Debugf("status for %s:%s:%s has changed.", check.CheckGroup, check.CheckType, check.Name) 182 | err := n.saveCheck(check) 183 | if err != nil { 184 | logrus.WithError(err).Warnf("Unable to save") 185 | return 186 | } 187 | logrus.Infof("check %s is failing, will notify", check.Name) 188 | n.addNotification(check) 189 | } else { 190 | logrus.Debug("nothing has changed.") 191 | } 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /src/kubernetes-alerts/notifier.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "sync" 5 | "time" 6 | 7 | "github.com/Sirupsen/logrus" 8 | ) 9 | 10 | type Notifier interface { 11 | Notify(checks []KubeCheck) bool 12 | NotifEnabled() bool 13 | } 14 | 15 | type NotifManager struct { 16 | NotifInterval time.Duration 17 | Notifiers []Notifier 18 | notifChannel chan KubeCheck 19 | stopChannel chan bool 20 | checks []KubeCheck 21 | addCheckWaitGroup sync.WaitGroup 22 | sendNotifWaitGroup sync.WaitGroup 23 | } 24 | 25 | func (n *NotifManager) Start() { 26 | logrus.Info("Starting notif manager...") 27 | n.notifChannel = make(chan KubeCheck, 10) 28 | n.stopChannel = make(chan bool) 29 | n.checks = make([]KubeCheck, 0) 30 | go n.listenForNotif() 31 | } 32 | 33 | func (n *NotifManager) Stop() { 34 | n.stopChannel <- true 35 | close(n.stopChannel) 36 | close(n.notifChannel) 37 | } 38 | 39 | func (n *NotifManager) listenForNotif() { 40 | running := true 41 | for running { 42 | select { 43 | case <-n.stopChannel: 44 | running = false 45 | case <-time.After(n.NotifInterval): 46 | logrus.Debug("Trying to send notifications...") 47 | n.addCheckWaitGroup.Wait() 48 | n.sendNotifWaitGroup.Add(1) 49 | n.sendNotifications() 50 | n.sendNotifWaitGroup.Done() 51 | case check := <-n.notifChannel: 52 | logrus.Debug("Adding check for notification...") 53 | n.sendNotifWaitGroup.Wait() 54 | n.addCheckWaitGroup.Add(1) 55 | n.checks = append(n.checks, check) 56 | n.addCheckWaitGroup.Done() 57 | } 58 | } 59 | } 60 | 61 | func (n *NotifManager) addNotification(check KubeCheck) { 62 | n.notifChannel <- check 63 | } 64 | 65 | func (n *NotifManager) sendNotifications() { 66 | if len(n.checks) > 0 { 67 | for _, notifier := range n.Notifiers { 68 | if notifier.NotifEnabled() { 69 | notifier.Notify(n.checks) 70 | } 71 | } 72 | n.checks = make([]KubeCheck, 0) 73 | } 74 | } 75 | 76 | func NotifSummary(checks []KubeCheck) (overall CheckStatus, pass, warn, fail int) { 77 | overall = CheckStatusPass 78 | for _, check := range checks { 79 | switch check.Status { 80 | case CheckStatusPass: 81 | pass++ 82 | case CheckStatusWarn: 83 | warn++ 84 | if overall != CheckStatusFail { 85 | overall = CheckStatusWarn 86 | } 87 | case CheckStatusFail: 88 | fail++ 89 | overall = CheckStatusFail 90 | } 91 | } 92 | return 93 | } 94 | -------------------------------------------------------------------------------- /src/kubernetes-alerts/slack-notifier.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "strings" 7 | 8 | "encoding/json" 9 | "io/ioutil" 10 | "net/http" 11 | 12 | "github.com/Sirupsen/logrus" 13 | ) 14 | 15 | type SlackNotifier struct { 16 | Enabled bool `json:"-"` 17 | ClusterName string `json:"-"` 18 | Url string `json:"-"` 19 | Channel string `json:"channel"` 20 | Username string `json:"username"` 21 | IconUrl string `json:"icon_url"` 22 | IconEmoji string `json:"icon_emoji"` 23 | Text string `json:"text,omitempty"` 24 | Attachments []attachment `json:"attachments,omitempty"` 25 | Detailed bool `json:"-"` 26 | } 27 | 28 | type attachment struct { 29 | Color string `json:"color"` 30 | Title string `json:"title"` 31 | Pretext string `json:"pretext"` 32 | Text string `json:"text"` 33 | MrkdwnIn []string `json:"mrkdwn_in"` 34 | } 35 | 36 | func (slack *SlackNotifier) Notify(checks []KubeCheck) bool { 37 | logrus.Infof("Sending %d notifications to slack", len(checks)) 38 | 39 | if slack.Detailed { 40 | return slack.notifyDetailed(checks) 41 | } else { 42 | return slack.notifySimple(checks) 43 | } 44 | 45 | } 46 | 47 | func (slack *SlackNotifier) NotifEnabled() bool { 48 | return slack.Enabled 49 | } 50 | 51 | func (slack *SlackNotifier) notifySimple(checks []KubeCheck) bool { 52 | 53 | _, pass, warn, fail := NotifSummary(checks) 54 | 55 | textTemplate := `%s Notifications 56 | -------------------------------------------------------------------------------- 57 | pass: %d warn: %d fail: %d 58 | -------------------------------------------------------------------------------- 59 | %s 60 | -------------------------------------------------------------------------------- 61 | ` 62 | 63 | detailTemplate := ` [%s] %s: %s.\n` 64 | var details string 65 | for _, check := range checks { 66 | details += fmt.Sprintf(detailTemplate, strings.ToUpper(string(check.Status)), check.Timestamp.String(), check.Message) 67 | } 68 | 69 | text := fmt.Sprintf(textTemplate, slack.ClusterName, pass, warn, fail, details) 70 | 71 | slack.Text = text 72 | return slack.postToSlack() 73 | } 74 | 75 | func (slack *SlackNotifier) notifyDetailed(checks []KubeCheck) bool { 76 | 77 | overall, pass, warn, fail := NotifSummary(checks) 78 | 79 | var emoji, color string 80 | switch overall { 81 | case CheckStatusPass: 82 | emoji = ":white_check_mark:" 83 | color = "good" 84 | case CheckStatusWarn: 85 | emoji = ":question:" 86 | color = "warning" 87 | case CheckStatusFail: 88 | emoji = ":x:" 89 | color = "danger" 90 | default: 91 | emoji = ":question:" 92 | } 93 | 94 | title := "Kubernetes Alerts" 95 | 96 | preTextTemplate := `%s %s Notifications 97 | -------------------------------------------------------------------------------- 98 | %d :simple_smile: %d :fearful: %d :rage: 99 | -------------------------------------------------------------------------------- 100 | ` 101 | 102 | preText := fmt.Sprintf(preTextTemplate, emoji, slack.ClusterName, pass, warn, fail) 103 | 104 | detailTemplate := " %s %s: %s.\n" 105 | var details string 106 | for _, check := range checks { 107 | var statusEmoji string 108 | switch check.Status { 109 | case CheckStatusPass: 110 | statusEmoji = ":simple_smile:" 111 | case CheckStatusWarn: 112 | statusEmoji = ":fearful:" 113 | case CheckStatusFail: 114 | statusEmoji = ":rage:" 115 | } 116 | details += fmt.Sprintf(detailTemplate, statusEmoji, check.Timestamp.String(), check.Message) 117 | details += "\n" 118 | } 119 | 120 | a := attachment{ 121 | Color: color, 122 | Title: title, 123 | Pretext: preText, 124 | Text: details, 125 | MrkdwnIn: []string{"text", "pretext"}, 126 | } 127 | slack.Attachments = []attachment{a} 128 | 129 | return slack.postToSlack() 130 | 131 | } 132 | 133 | func (slack *SlackNotifier) postToSlack() bool { 134 | 135 | data, err := json.Marshal(slack) 136 | if err != nil { 137 | logrus.WithError(err).Error("Unable to marshal slack payload") 138 | return false 139 | } 140 | logrus.Debugf("struct = %+v, json = %s", slack, string(data)) 141 | 142 | b := bytes.NewBuffer(data) 143 | if res, err := http.Post(slack.Url, "application/json", b); err != nil { 144 | logrus.WithError(err).Error("Unable to send data to slack") 145 | return false 146 | } else { 147 | defer res.Body.Close() 148 | statusCode := res.StatusCode 149 | if statusCode != 200 { 150 | body, _ := ioutil.ReadAll(res.Body) 151 | logrus.Error("Unable to notify slack:", string(body)) 152 | return false 153 | } else { 154 | logrus.Info("Slack notification sent.") 155 | return true 156 | } 157 | } 158 | 159 | } 160 | -------------------------------------------------------------------------------- /src/kubernetes-alerts/util.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "time" 7 | 8 | "encoding/json" 9 | 10 | "github.com/Sirupsen/logrus" 11 | ) 12 | 13 | func toTime(str string) (time.Time, error) { 14 | layout := "2006-01-02T15:04:05Z" 15 | return time.Parse(layout, str) 16 | } 17 | 18 | func toReader(data interface{}) (io.Reader, error) { 19 | b, err := json.Marshal(data) 20 | if err != nil { 21 | logrus.Error("error marshalling data") 22 | return nil, err 23 | } 24 | return bytes.NewReader(b), nil 25 | } 26 | -------------------------------------------------------------------------------- /vendor/manifest: -------------------------------------------------------------------------------- 1 | { 2 | "version": 0, 3 | "dependencies": [ 4 | { 5 | "importpath": "github.com/Sirupsen/logrus", 6 | "repository": "https://github.com/Sirupsen/logrus", 7 | "revision": "446d1c146faa8ed3f4218f056fcd165f6bcfda81", 8 | "branch": "master" 9 | }, 10 | { 11 | "importpath": "github.com/boltdb/bolt", 12 | "repository": "https://github.com/boltdb/bolt", 13 | "revision": "34a0fa5307f7562980fb8e7ff4723f7987edf49b", 14 | "branch": "master" 15 | }, 16 | { 17 | "importpath": "github.com/coreos/etcd/Godeps/_workspace/src/github.com/ugorji/go/codec", 18 | "repository": "https://github.com/coreos/etcd", 19 | "revision": "be57b6b10e38edf2c7d44b51237ce1895369fbe0", 20 | "branch": "master", 21 | "path": "/Godeps/_workspace/src/github.com/ugorji/go/codec" 22 | }, 23 | { 24 | "importpath": "github.com/coreos/etcd/Godeps/_workspace/src/golang.org/x/net/context", 25 | "repository": "https://github.com/coreos/etcd", 26 | "revision": "be57b6b10e38edf2c7d44b51237ce1895369fbe0", 27 | "branch": "master", 28 | "path": "/Godeps/_workspace/src/golang.org/x/net/context" 29 | }, 30 | { 31 | "importpath": "github.com/coreos/etcd/client", 32 | "repository": "https://github.com/coreos/etcd", 33 | "revision": "be57b6b10e38edf2c7d44b51237ce1895369fbe0", 34 | "branch": "master", 35 | "path": "/client" 36 | }, 37 | { 38 | "importpath": "github.com/coreos/etcd/pkg/pathutil", 39 | "repository": "https://github.com/coreos/etcd", 40 | "revision": "be57b6b10e38edf2c7d44b51237ce1895369fbe0", 41 | "branch": "master", 42 | "path": "/pkg/pathutil" 43 | }, 44 | { 45 | "importpath": "github.com/coreos/etcd/pkg/types", 46 | "repository": "https://github.com/coreos/etcd", 47 | "revision": "be57b6b10e38edf2c7d44b51237ce1895369fbe0", 48 | "branch": "master", 49 | "path": "/pkg/types" 50 | }, 51 | { 52 | "importpath": "github.com/davecgh/go-spew/spew", 53 | "repository": "https://github.com/davecgh/go-spew", 54 | "revision": "5215b55f46b2b919f50a1df0eaa5886afe4e3b3d", 55 | "branch": "master", 56 | "path": "/spew" 57 | }, 58 | { 59 | "importpath": "github.com/docker/libkv", 60 | "repository": "https://github.com/docker/libkv", 61 | "revision": "2f2380c8698abff4eb662f33b0e088e520ec416e", 62 | "branch": "master" 63 | }, 64 | { 65 | "importpath": "github.com/docker/libkv/store", 66 | "repository": "https://github.com/docker/libkv", 67 | "revision": "2f2380c8698abff4eb662f33b0e088e520ec416e", 68 | "branch": "master", 69 | "path": "/store" 70 | }, 71 | { 72 | "importpath": "github.com/docker/libkv/store/etcd", 73 | "repository": "https://github.com/docker/libkv", 74 | "revision": "2f2380c8698abff4eb662f33b0e088e520ec416e", 75 | "branch": "master", 76 | "path": "/store/etcd" 77 | }, 78 | { 79 | "importpath": "github.com/hashicorp/consul/api", 80 | "repository": "https://github.com/hashicorp/consul", 81 | "revision": "0d81e4bab4e0fc8fdbaabf99050468591edad128", 82 | "branch": "master", 83 | "path": "/api" 84 | }, 85 | { 86 | "importpath": "github.com/hashicorp/go-cleanhttp", 87 | "repository": "https://github.com/hashicorp/go-cleanhttp", 88 | "revision": "ce617e79981a8fff618bb643d155133a8f38db96", 89 | "branch": "master" 90 | }, 91 | { 92 | "importpath": "github.com/hashicorp/serf/coordinate", 93 | "repository": "https://github.com/hashicorp/serf", 94 | "revision": "39c7c06298b480560202bec00c2c77e974e88792", 95 | "branch": "master", 96 | "path": "/coordinate" 97 | }, 98 | { 99 | "importpath": "github.com/pmezard/go-difflib/difflib", 100 | "repository": "https://github.com/pmezard/go-difflib", 101 | "revision": "e8554b8641db39598be7f6342874b958f12ae1d4", 102 | "branch": "master", 103 | "path": "/difflib" 104 | }, 105 | { 106 | "importpath": "github.com/samuel/go-zookeeper/zk", 107 | "repository": "https://github.com/samuel/go-zookeeper", 108 | "revision": "218e9c81c0dd8b3b18172b2bbfad92cc7d6db55f", 109 | "branch": "master", 110 | "path": "/zk" 111 | }, 112 | { 113 | "importpath": "github.com/stretchr/objx", 114 | "repository": "https://github.com/stretchr/objx", 115 | "revision": "1a9d0bb9f541897e62256577b352fdbc1fb4fd94", 116 | "branch": "master" 117 | }, 118 | { 119 | "importpath": "github.com/stretchr/testify/assert", 120 | "repository": "https://github.com/stretchr/testify", 121 | "revision": "5b9da39b66e8e994455c2525c4421c8cc00a7f93", 122 | "branch": "master", 123 | "path": "/assert" 124 | }, 125 | { 126 | "importpath": "github.com/stretchr/testify/mock", 127 | "repository": "https://github.com/stretchr/testify", 128 | "revision": "5b9da39b66e8e994455c2525c4421c8cc00a7f93", 129 | "branch": "master", 130 | "path": "/mock" 131 | }, 132 | { 133 | "importpath": "golang.org/x/net/context", 134 | "repository": "https://go.googlesource.com/net", 135 | "revision": "3b90a77d2885fb0429e8a21ab72fc73ca6f8b401", 136 | "branch": "master", 137 | "path": "/context" 138 | }, 139 | { 140 | "importpath": "gopkg.in/airbrake/gobrake.v2", 141 | "repository": "https://gopkg.in/airbrake/gobrake.v2", 142 | "revision": "c9d51adc624b5cc4c1bf8de730a09af4878ffe2d", 143 | "branch": "master" 144 | }, 145 | { 146 | "importpath": "gopkg.in/gemnasium/logrus-airbrake-hook.v2", 147 | "repository": "https://gopkg.in/gemnasium/logrus-airbrake-hook.v2", 148 | "revision": "31e6fd4bd5a98d8ee7673d24bc54ec73c31810dd", 149 | "branch": "master" 150 | } 151 | ] 152 | } --------------------------------------------------------------------------------