├── .babelrc ├── bin └── rancher-alarms.js ├── .dockerignore ├── Dockerfile ├── examples ├── docker-compose.minimal.yml ├── docker-compose.minimal-and-filter.yml ├── docker-compose.slack-full.yml ├── docker-compose.slack-and-smtp.yml └── docker-compose.smtp-full.yml ├── src ├── notifications │ ├── target.es6 │ ├── slack.es6 │ └── email.es6 ├── render-template.es6 ├── log.es6 ├── env-to-object.es6 ├── rancher.es6 ├── config.es6 ├── server.es6 └── monitor.es6 ├── .editorconfig ├── CHANGELOG.md ├── .gitignore ├── package.json ├── env.sh └── README.md /.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "stage": 0, 3 | } 4 | -------------------------------------------------------------------------------- /bin/rancher-alarms.js: -------------------------------------------------------------------------------- 1 | require('babel-core/register'); 2 | require('../src/server'); 3 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | .gitignore 3 | LICENSE 4 | VERSION 5 | README.md 6 | Changelog.md 7 | Makefile 8 | env 9 | *.log 10 | .idea 11 | docker-compose.yml 12 | 13 | node_modules 14 | 15 | config.json 16 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mhart/alpine-node:4 2 | 3 | RUN mkdir -p /usr/src/app 4 | WORKDIR /usr/src/app 5 | 6 | COPY package.json /usr/src/app/ 7 | RUN npm install --loglevel warn 8 | COPY . /usr/src/app 9 | 10 | CMD [ "npm", "start" ] 11 | -------------------------------------------------------------------------------- /examples/docker-compose.minimal.yml: -------------------------------------------------------------------------------- 1 | # push messages in default Slack channel with default template 2 | rancher-alarms: 3 | image: ndelitski/rancher-alarms 4 | environment: 5 | ALARM_SLACK_WEBHOOK_URL: https://hooks.slack.com/services/YOUR-WEBHOOK-ID 6 | labels: 7 | io.rancher.container.create_agent: true 8 | io.rancher.container.agent.role: environment 9 | -------------------------------------------------------------------------------- /examples/docker-compose.minimal-and-filter.yml: -------------------------------------------------------------------------------- 1 | # all services in "app" stack are monitored 2 | rancher-alarms: 3 | image: ndelitski/rancher-alarms 4 | environment: 5 | ALARM_SLACK_WEBHOOK_URL: https://hooks.slack.com/services/... 6 | ALARM_FILTER: app/* 7 | labels: 8 | io.rancher.container.create_agent: true 9 | io.rancher.container.agent.role: environment 10 | -------------------------------------------------------------------------------- /src/notifications/target.es6: -------------------------------------------------------------------------------- 1 | import {info} from '../log'; 2 | import assert from 'assert'; 3 | 4 | const VALID_TARGETS = ['email', 'slack']; 5 | 6 | export default class NotificationTarget { 7 | async notify(message) { 8 | info(message); 9 | } 10 | static init(name, options) { 11 | assert(VALID_TARGETS.indexOf(name) !== -1, `invalid notification target '${name}'`); 12 | return new (require('./' + name))(options); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig helps developers define and maintain consistent 2 | # coding styles between different editors and IDEs 3 | # editorconfig.org 4 | 5 | root = true 6 | 7 | [*] 8 | end_of_line = lf 9 | charset = utf-8 10 | trim_trailing_whitespace = true 11 | insert_final_newline = true 12 | 13 | indent_style = space 14 | indent_size = 4 15 | 16 | [*.yml] 17 | indent_style = space 18 | indent_size = 2 19 | 20 | [*.es6] 21 | indent_style = space 22 | indent_size = 2 23 | 24 | [*.md] 25 | trim_trailing_whitespace = false 26 | 27 | -------------------------------------------------------------------------------- /examples/docker-compose.slack-full.yml: -------------------------------------------------------------------------------- 1 | # Send Slack notifications with using full list of sutomization parameters 2 | rancher-alarms: 3 | image: ndelitski/rancher-alarms 4 | environment: 5 | ALARM_SLACK_WEBHOOK_URL: https://hooks.slack.com/services/sdf/sdf24 6 | ALARM_SLACK_CHANNEL: "@somebody" 7 | ALARM_SLACK_BOTNAME: "rancher-alarm" 8 | ALARM_SLACK_TEMPLATE: "Hey buddy! Your service #{serviceName} become #{healthyState}" 9 | labels: 10 | io.rancher.container.create_agent: true 11 | io.rancher.container.agent.role: environment 12 | 13 | -------------------------------------------------------------------------------- /src/render-template.es6: -------------------------------------------------------------------------------- 1 | import _ from 'lodash'; 2 | import assert from 'assert'; 3 | 4 | export default function renderTemplate(template, data) { 5 | var result = template; 6 | 7 | // interpolate string 8 | for (let [k,v] of _.pairs(data)) { 9 | result = result.replace(new RegExp(`#{${k}}`, 'g'), v) 10 | } 11 | 12 | // check if not all variables are filled 13 | let freeVariables = result.match(/#\{(\S+)\}/g); 14 | assert(!freeVariables, `template has unresolved variables:\ntemplate: ${template}\nmissing variables: ${(freeVariables || []).join(', ')}`); 15 | 16 | return result; 17 | } 18 | -------------------------------------------------------------------------------- /examples/docker-compose.slack-and-smtp.yml: -------------------------------------------------------------------------------- 1 | # 2 notifications targets here SLACK and SMTP 2 | rancher-alarms: 3 | image: ndelitski/rancher-alarms 4 | environment: 5 | ALARM_EMAIL_ADDRESSES: john@snow.com 6 | ALARM_EMAIL_USER: alarm@nightwatch.com 7 | ALARM_EMAIL_PASS: nightWatch 8 | ALARM_EMAIL_SMTP_HOST: smtp.nightwatch.com 9 | ALARM_EMAIL_FROM: 'Alarm of a Night Watch ' 10 | ALARM_SLACK_WEBHOOK_URL: https://hooks.slack.com/services/... 11 | labels: 12 | io.rancher.container.create_agent: true 13 | io.rancher.container.agent.role: environment 14 | 15 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ### v0.1.7 (January 28, 2017) 4 | - Fix system-type services caused to crush after refetching services list. Fix #23 5 | 6 | ## v0.1.6 (January 27, 2017) 7 | - Ignore system stacks. Fix #23 8 | 9 | ## v0.1.5 10 | - Fixed retrieval of project id (environment id) PR#21 11 | - Minified Docker image size 12 | 13 | ## v0.1.4 14 | - support for rancher-agent, no need to define rancher_* variables anymore 15 | - add filter gsto monitor specific range of services. use `ALARM_FILTER=regex1,regex2` 16 | - search configs files in multiple dirs - local dir, cwd, /etc/rancher-alarms 17 | - huge update of docsgs 18 | - examples folder with env configurations 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | 5 | # Runtime data 6 | pids 7 | *.pid 8 | *.seed 9 | 10 | # Directory for instrumented libs generated by jscoverage/JSCover 11 | lib-cov 12 | 13 | # Coverage directory used by tools like istanbul 14 | coverage 15 | 16 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 17 | .grunt 18 | 19 | # node-waf configuration 20 | .lock-wscript 21 | 22 | # Compiled binary addons (http://nodejs.org/api/addons.html) 23 | build/Release 24 | 25 | # Dependency directory 26 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git 27 | node_modules 28 | 29 | 30 | .idea 31 | config.json 32 | dist 33 | docker-compose.yml 34 | -------------------------------------------------------------------------------- /examples/docker-compose.smtp-full.yml: -------------------------------------------------------------------------------- 1 | # Send SMTP notifications with using full list of sutomization parameters 2 | rancher-alarms: 3 | image: ndelitski/rancher-alarms 4 | environment: 5 | ALARM_EMAIL_ADDRESSES: john@snow.com 6 | ALARM_EMAIL_USER: alarm@nightwatch.com 7 | ALARM_EMAIL_PASS: nightWatch 8 | ALARM_EMAIL_SMTP_HOST: smtp.nightwatch.com 9 | ALARM_EMAIL_SMTP_PORT: 465 10 | ALARM_EMAIL_FROM: 'Alarm of a Night Watch ' 11 | ALARM_EMAIL_SSL: true 12 | ALARM_EMAIL_SUBJECT: '[Rancher-Alarms] problems with #{serviceName}' 13 | ALARM_EMAIL_TEMPLATE: 'service #{serviceName} become #{monitorState}' 14 | labels: 15 | io.rancher.container.create_agent: true 16 | io.rancher.container.agent.role: environment 17 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "rancher-alarms", 3 | "version": "0.1.8", 4 | "description": "Will kick your ass if found unhealthy service in a Rancher environment", 5 | "main": "bin/rancher-alarms.js", 6 | "scripts": { 7 | "start": "node bin/rancher-alarms.js", 8 | "test": "echo \"Error: no test specified\" && exit 1", 9 | "build": "babel src/ -d dist" 10 | }, 11 | "repository": { 12 | "type": "git", 13 | "url": "git+https://github.com/ndelitski/rancher-alarms.git" 14 | }, 15 | "author": "Nick Delitski", 16 | "license": "MIT", 17 | "dependencies": { 18 | "axios": "^0.7.0", 19 | "babel-core": "^5.8.33", 20 | "bluebird": "^3.0.5", 21 | "deep-set": "^1.0.1", 22 | "lodash": "^3.10.1", 23 | "nodemailer": "^1.8.0", 24 | "yield": "0.0.6-8" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/log.es6: -------------------------------------------------------------------------------- 1 | import {padRight} from 'lodash'; 2 | 3 | const LOG_LEVEL = { 4 | 'error': 0, 5 | 'info': 1, 6 | 'trace': 2 7 | }; 8 | 9 | const verbosity = LOG_LEVEL[process.env.LOG_LEVEL || 'info']; 10 | 11 | 12 | export function info(...args) { 13 | log('info', ...args); 14 | } 15 | 16 | export function trace(...args) { 17 | log('trace', ...args); 18 | } 19 | 20 | export function error(msg, err) { 21 | if (arguments.length === 1) { 22 | msg = 'error'; 23 | err = arguments[0]; 24 | } 25 | 26 | log(msg, err, err.stack); 27 | } 28 | 29 | function log(severity, ...args) { 30 | let method; 31 | 32 | if (LOG_LEVEL[severity] > verbosity) { 33 | return; 34 | } 35 | 36 | if (severity == 'error') { 37 | method = 'error'; 38 | } else if (severity == 'trace') { 39 | method = 'log'; 40 | } else if (severity == 'info') { 41 | method = 'log'; 42 | } else { 43 | throw new Error(`unknown severity ${severity}`) 44 | } 45 | 46 | console[method](padRight(`[${severity.toUpperCase()}]`, 8), padRight(date(), 25), ...args); 47 | } 48 | 49 | function date() { 50 | const dt = new Date(); 51 | return [dt.getFullYear(), dt.getMonth() + 1, dt.getDate()].join('-') + ' ' + [dt.getHours(), dt.getMinutes(), dt.getSeconds(), dt.getMilliseconds()].join(':'); 52 | } 53 | -------------------------------------------------------------------------------- /src/notifications/slack.es6: -------------------------------------------------------------------------------- 1 | import NotificationTarget from './target'; 2 | import assert from 'assert'; 3 | import {info} from '../log'; 4 | import axios from 'axios'; 5 | import renderTemplate from '../render-template'; 6 | 7 | const SLACK_TEMPLATE = `service <#{serviceUrl}|#{serviceName}> in stack <#{stackUrl}|#{stackName}> became #{monitorState} (#{state})`; 8 | 9 | export default class SlackTarget extends NotificationTarget { 10 | constructor({webhookUrl, botName, channel, template = SLACK_TEMPLATE}) { 11 | super(); 12 | assert(webhookUrl, '`webhookUrl` is missing'); 13 | this._channel = channel; 14 | this._url = webhookUrl; 15 | this._botName = botName; 16 | this._messageTemplate = template; 17 | } 18 | 19 | async notify(data) { 20 | const webhookPayload = { 21 | channel: this._channel, 22 | username: this._botName, 23 | text: renderTemplate(this._messageTemplate, data) 24 | }; 25 | 26 | await axios({url: this._url, method: 'POST', data: webhookPayload}); 27 | 28 | info(`sent email notification to SLACK channel:${this._channel || 'default'}, text: ${webhookPayload.text}`) 29 | } 30 | 31 | toString() { 32 | const options = { 33 | channel: this._channel, 34 | webhookUrl: this._webhookUrl, 35 | botName: this._botName, 36 | template: this._messageTemplate 37 | }; 38 | 39 | return `(SlackTarget ${JSON.stringify(options)})` 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | repo=ndelitski 6 | 7 | image=rancher-alarms 8 | 9 | version_default=latest 10 | 11 | container_name=$image 12 | 13 | build() { 14 | local tg=${1:-$version_default} 15 | docker build --pull --rm -t ${image} ./ 16 | tag $tg 17 | } 18 | 19 | push() { 20 | local version=${1:-$version_default} 21 | tag $version 22 | docker push ${repo}/${image}:${version} 23 | } 24 | 25 | tag() { 26 | local tg=$1 27 | docker tag ${image} ${repo}/${image}:${tg} 28 | } 29 | 30 | check_state() { 31 | local container_id=$1 32 | local desired_state=$2 33 | docker inspect -f "{{.State.${desired_state}}}" ${container_id} 34 | } 35 | 36 | check_exists() { 37 | docker inspect $1 > /dev/null && echo "true" || echo "false" 38 | } 39 | 40 | run() { 41 | # if [ "$(check_exists ${container_name})" = "true" ]; then 42 | # if [ "$(check_state ${container_name} Running)" = "true" ]; then 43 | # docker kill ${container_name} 44 | # fi 45 | # docker rm -f ${container_name} 46 | # fi 47 | docker rm -f ${container_name} > /dev/null 48 | docker run \ 49 | -it --label io.rancher.container.network=true \ 50 | -e CONFIG_FILE=/etc/rancher-alarms/config.json \ 51 | -v $(pwd)/config.json:/etc/rancher-alarms/config.json \ 52 | --name ${container_name} \ 53 | ${image} 54 | } 55 | 56 | case "$1" in 57 | 'build') 58 | build ${@:2} 59 | ;; 60 | 'run') 61 | run 62 | ;; 63 | 'tag') 64 | tag ${@:2} 65 | ;; 66 | 'push') 67 | push ${@:2} 68 | ;; 69 | esac 70 | -------------------------------------------------------------------------------- /src/env-to-object.es6: -------------------------------------------------------------------------------- 1 | import _ from 'lodash'; 2 | import deepSet from 'deep-set'; 3 | 4 | /* 5 | Parse linear environment variables to nested object 6 | translate indexed variables like VAR_0=val1, VAR_1=val2 into var: [val1, val2] 7 | TARGET_0_URL= 8 | TARGET_0_TEMPLATE= 9 | ```json 10 | target: [{ 11 | url: 12 | template 13 | }] 14 | ``` 15 | */ 16 | export default function envToObject(prefix) { 17 | const prefixRe = new RegExp('^'+prefix, 'i'); 18 | const env = pickKeys(process.env, (k) => k.match(prefixRe)); 19 | 20 | return convertIndexedPropsToArrays(_.pairs(env) 21 | .reduce((obj, [key, v]) => { 22 | return deepSet(obj, deepPropertyNotation(key), v) 23 | }, {})) 24 | } 25 | 26 | function pickKeys(obj, keyPredicate) { 27 | return _.pairs(obj).reduce((filteredObj, [k,v]) => { 28 | if (keyPredicate(k)) { 29 | filteredObj[k] = v 30 | } 31 | return filteredObj 32 | }, {}) 33 | } 34 | 35 | function deepPropertyNotation(envKey) { 36 | return envKey.replace(/_/g, '.').toLowerCase() 37 | } 38 | 39 | function convertIndexedPropsToArrays(obj) { 40 | const keys = _.keys(obj); 41 | const allKeysAreNumbers = keys.every((k)=>!_.isNaN(parseInt(k))); 42 | 43 | if (allKeysAreNumbers) { 44 | return Array.apply(null, _.extend(_.clone(obj), {length: keys.length})) 45 | } 46 | 47 | return _.pairs(obj) 48 | .reduce((newObj, [k,v]) => { 49 | newObj[k] = _.isObject(v) ? convertIndexedPropsToArrays(v) : v; 50 | 51 | return newObj; 52 | }, {}); 53 | } 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /src/notifications/email.es6: -------------------------------------------------------------------------------- 1 | import NotificationTarget from './target'; 2 | import assert from 'assert'; 3 | import {isArray} from 'lodash'; 4 | import {info} from '../log'; 5 | import nodemailer from 'nodemailer'; 6 | import {all, promisifyAll} from 'bluebird'; 7 | import renderTemplate from '../render-template'; 8 | 9 | const EMAIL_TEMPLATE = `service #{serviceName} became #{monitorState} (#{state}) 10 | service: #{serviceUrl} 11 | stack: #{stackUrl} 12 | `; 13 | const DEFAULT_SUBJECT = 'Unhealth service alarm'; 14 | 15 | export default class EmailTarget extends NotificationTarget { 16 | constructor({recipients, smtp, template, textTemplate, htmlTemplate, subject = DEFAULT_SUBJECT}) { 17 | super(); 18 | assert(recipients, '`recipients` is missing'); 19 | assert(isArray(recipients), '`recipients` expected as array of email'); 20 | assert(smtp, '`smtp` is missing'); 21 | assert(smtp.host, '`smtp.host` is missing'); 22 | assert(smtp.port, '`smtp.port` is missing'); 23 | assert(smtp.from, '`smtp.from` is missing'); 24 | 25 | if (smtp.auth) { 26 | assert(smtp.auth.user, '`smtp.auth.user` is missing'); 27 | assert(smtp.auth.password, '`smtp.auth.password` is missing'); 28 | } 29 | 30 | this._recipients = recipients; 31 | this._smtpSettings = smtp; 32 | this._sender = promisifyAll(nodemailer.createTransport({ 33 | port: smtp.port, 34 | host: smtp.host, 35 | from: smtp.from, 36 | auth: smtp.auth && { 37 | user: smtp.auth.user, 38 | pass: smtp.auth.password 39 | }, 40 | secure: smtp.secureConnection 41 | })); 42 | 43 | this._textTemplate = textTemplate || EMAIL_TEMPLATE; 44 | this._htmlTemplate = template || htmlTemplate; 45 | this._subject = subject; 46 | } 47 | 48 | async notify(data) { 49 | await all(this._recipients).map((to) => { 50 | let mail = { 51 | from: this._smtpSettings.from, 52 | to, 53 | subject: renderTemplate(this._subject, data), 54 | }; 55 | 56 | if (this._htmlTemplate) { 57 | mail.html = renderTemplate(this._htmlTemplate, data) 58 | } else { 59 | mail.text = renderTemplate(this._textTemplate, data) 60 | } 61 | 62 | info(`sending email notification to ${to}`); 63 | return this._sender.sendMailAsync(mail).then((result)=> { 64 | info(`sent email notification to ${to} ${JSON.stringify(result, null, 4)}`) 65 | }); 66 | }, {concurrency: 5}); 67 | } 68 | 69 | toString() { 70 | return `email: 71 | recipients: ${this._recipients}` 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/rancher.es6: -------------------------------------------------------------------------------- 1 | import axios from 'axios'; 2 | import assert from 'assert'; 3 | import {merge, omit} from 'lodash'; 4 | import $url from 'url'; 5 | 6 | export default class RancherClient { 7 | constructor({address, version='v1', url, protocol='http', auth, projectId}) { 8 | if (auth) { 9 | assert(auth.accessKey, '`auth.accessKey` is missing'); 10 | assert(auth.secretKey, '`auth.secretKey` is missing'); 11 | this._auth = {user: auth.accessKey, password: auth.secretKey}; 12 | } 13 | 14 | if (address && !address.match(/^http/)) { 15 | address = 'http://' + address; 16 | } 17 | 18 | if (!url) { 19 | assert(address, '`url` is missing'); 20 | url = (address.match(/^http/) ? address : protocol + '://' + address); 21 | if (!url.match(/\/v\d+$/)) { 22 | url += '/' + version 23 | } 24 | } 25 | 26 | this.address = url; 27 | this.projectId = projectId; 28 | } 29 | 30 | async getCurrentProjectIdAsync() { 31 | return (await this._request({ 32 | url: `/v1/projects/` 33 | })).data[0].id; 34 | } 35 | 36 | async _request(options) { 37 | assert(options.url); 38 | try { 39 | const res = await axios(merge(options, { 40 | url: this.buildUrl(options.url), 41 | headers: this._auth ? { 42 | 'Authorization': 'Basic ' + new Buffer(this._auth.user + ':' + this._auth.password).toString('base64') 43 | } : {}, 44 | responseType: 'json' 45 | })); 46 | 47 | return res.data 48 | } 49 | catch (resp) { 50 | throw new Error('RancherClientError: non-200 code response ' + JSON.stringify(resp, null, 4)); 51 | } 52 | } 53 | 54 | async getServices() { 55 | return (await this._request({ 56 | url: `projects/${this.projectId}/services` 57 | })).data; 58 | } 59 | 60 | async getStacks() { 61 | return (await this._request({ 62 | url: `projects/${this.projectId}/environments` 63 | })).data; 64 | } 65 | 66 | async getService(serviceId) { 67 | return await this._request({ 68 | url: `projects/${this.projectId}/services/${serviceId}` 69 | }); 70 | } 71 | 72 | async getCurrentEnvironment() { 73 | return await this._request({ 74 | url: `projects/${this.projectId}` 75 | }); 76 | } 77 | 78 | async getStack(stackId) { 79 | return await this._request({ 80 | url: `projects/${this.projectId}/environments/${stackId}` 81 | }); 82 | } 83 | 84 | async getServiceContainers(serviceId) { 85 | return (await this._request({ 86 | url: `projects/${this.projectId}/services/${serviceId}/instances` 87 | })).data; 88 | } 89 | 90 | buildUrl(path) { 91 | if (path.startsWith('/')) { 92 | return $url.resolve(this.address, path); 93 | } else { 94 | return this.address + '/' + path 95 | } 96 | 97 | } 98 | 99 | } 100 | -------------------------------------------------------------------------------- /src/config.es6: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import fs from 'fs'; 3 | import {promisify} from 'bluebird'; 4 | import {info} from './log'; 5 | import envToObject from './env-to-object'; 6 | import RancherClient from './rancher'; 7 | import _ from 'lodash'; 8 | 9 | const readFile = promisify(fs.readFile); 10 | const CONFIG_SEARCH_LOCATIONS = [ 11 | path.resolve(__dirname, '..'), 12 | process.cwd(), 13 | '/etc/rancher-alarms' 14 | ]; 15 | 16 | export default async function resolveConfig() { 17 | let configFile = process.env.ALARM_CONFIG_FILE || process.env.CONFIG_FILE; 18 | 19 | // if config is set with env variable we should ensure that file exists 20 | if (configFile && !fs.lstatSync(configFile)) { 21 | throw Error(`config file was not found: ${configFile}`) 22 | } 23 | 24 | if (!configFile) { 25 | // if no config is given – check default directories 26 | configFile = _.find(_.map(CONFIG_SEARCH_LOCATIONS, 27 | (dir) => path.join(dir, 'config.json')), fileExists 28 | ) 29 | } 30 | 31 | if (configFile) { 32 | info(`reading config from file ${configFile}`); 33 | return await fileSource(configFile); 34 | } else { 35 | info('composing config from env variables'); 36 | return await envSource(); 37 | } 38 | } 39 | 40 | function fileExists(path) { 41 | try { 42 | fs.lstatSync(path); 43 | return true; 44 | } catch (err) { 45 | return false 46 | } 47 | } 48 | 49 | async function fileSource(filePath) { 50 | const contents = await readFile(filePath, 'utf8'); 51 | const parsed = JSON.parse(contents); 52 | return parsed; 53 | } 54 | 55 | async function envSource() { 56 | const { 57 | RANCHER_ADDRESS, 58 | RANCHER_ACCESS_KEY, 59 | RANCHER_SECRET_KEY, 60 | CATTLE_URL, 61 | CATTLE_SECRET_KEY, 62 | CATTLE_ACCESS_KEY, 63 | ALARM_POLL_INTERVAL, 64 | ALARM_FILTER, 65 | ALARM_EMAIL_ADDRESSES, 66 | ALARM_EMAIL_USER, 67 | ALARM_EMAIL_PASS, 68 | ALARM_EMAIL_SSL, 69 | ALARM_EMAIL_SMTP_HOST, 70 | ALARM_EMAIL_SMTP_PORT, 71 | ALARM_EMAIL_FROM, 72 | ALARM_EMAIL_SUBJECT, 73 | ALARM_EMAIL_TEMPLATE, 74 | ALARM_EMAIL_TEMPLATE_FILE, 75 | ALARM_MONITOR_INTERVAL, 76 | ALARM_MONITOR_HEALTHY_THRESHOLD, 77 | ALARM_MONITOR_UNHEALTHY_THRESHOLD, 78 | ALARM_SLACK_WEBHOOK_URL, 79 | ALARM_SLACK_CHANNEL, 80 | ALARM_SLACK_BOTNAME, 81 | ALARM_SLACK_TEMPLATE, 82 | ALARM_SLACK_TEMPLATE_FILE, 83 | } = process.env; 84 | 85 | let { 86 | RANCHER_PROJECT_ID, 87 | } = process.env; 88 | 89 | let emailAuth; 90 | 91 | // if project_id is missing trying to figure out 92 | if (!RANCHER_PROJECT_ID) { 93 | let client = new RancherClient({ 94 | address: RANCHER_ADDRESS || CATTLE_URL, 95 | auth: { 96 | accessKey: RANCHER_ACCESS_KEY || CATTLE_ACCESS_KEY, 97 | secretKey: RANCHER_SECRET_KEY || CATTLE_SECRET_KEY 98 | } 99 | }); 100 | 101 | RANCHER_PROJECT_ID = await client.getCurrentProjectIdAsync(); 102 | } 103 | 104 | if (ALARM_EMAIL_USER || ALARM_EMAIL_PASS) { 105 | emailAuth = { 106 | user: ALARM_EMAIL_USER, 107 | password: ALARM_EMAIL_PASS 108 | } 109 | } 110 | 111 | return { 112 | rancher: { 113 | address: RANCHER_ADDRESS || CATTLE_URL, 114 | auth: { 115 | accessKey: RANCHER_ACCESS_KEY || CATTLE_ACCESS_KEY, 116 | secretKey: RANCHER_SECRET_KEY || CATTLE_SECRET_KEY 117 | }, 118 | projectId: RANCHER_PROJECT_ID 119 | }, 120 | pollServicesInterval: ALARM_POLL_INTERVAL || 60000, 121 | filter: ALARM_FILTER && ALARM_FILTER.split(','), 122 | notifications: { 123 | '*': { 124 | targets: { 125 | email: ALARM_EMAIL_FROM && { 126 | recipients: ALARM_EMAIL_ADDRESSES && ALARM_EMAIL_ADDRESSES.split(',') || [], 127 | subject: ALARM_EMAIL_SUBJECT 128 | }, 129 | slack: ALARM_SLACK_WEBHOOK_URL && { 130 | template: ALARM_SLACK_TEMPLATE, 131 | templateFile: ALARM_SLACK_TEMPLATE_FILE 132 | } 133 | }, 134 | healthcheck: { 135 | pollInterval: ALARM_MONITOR_INTERVAL || 15000, 136 | healthyThreshold: ALARM_MONITOR_HEALTHY_THRESHOLD || 3, 137 | unhealthyThreshold: ALARM_MONITOR_UNHEALTHY_THRESHOLD || 4 138 | } 139 | } 140 | }, 141 | targets: { 142 | email: ALARM_EMAIL_FROM && { 143 | smtp: { 144 | from: ALARM_EMAIL_FROM, 145 | auth: emailAuth, 146 | "host": ALARM_EMAIL_SMTP_HOST, 147 | "secureConnection": ALARM_EMAIL_SSL === "true" || ALARM_EMAIL_SSL === undefined, 148 | "port": ALARM_EMAIL_SMTP_PORT || 465 149 | }, 150 | template: ALARM_EMAIL_TEMPLATE, 151 | templateFile: ALARM_EMAIL_TEMPLATE_FILE 152 | }, 153 | slack: ALARM_SLACK_WEBHOOK_URL && { 154 | webhookUrl: ALARM_SLACK_WEBHOOK_URL, 155 | channel: ALARM_SLACK_CHANNEL, 156 | botName: ALARM_SLACK_BOTNAME || 'rancher-alarms' 157 | } 158 | } 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /src/server.es6: -------------------------------------------------------------------------------- 1 | import RancherClient from './rancher'; 2 | import resolveConfig from './config'; 3 | import ServiceHealthMonitor from './monitor'; 4 | import {isArray, some, keys, pluck, find, invoke, pairs, extend, merge, values} from 'lodash'; 5 | import {info, trace, error} from './log'; 6 | import Promise, {all} from 'bluebird'; 7 | import assert from 'assert'; 8 | 9 | (async () => { 10 | const config = await resolveConfig(); 11 | 12 | info(`started with config:\n${JSON.stringify(config, null, 4)}`); 13 | assert(config.pollServicesInterval, '`pollServicesInterval` is missing'); 14 | if (config.filter) { 15 | assert(isArray(config.filter), '`filters` should be of type Array'); 16 | } 17 | 18 | const rancher = new RancherClient(config.rancher); 19 | const stacks = (await rancher.getStacks()) 20 | .filter(stack => !stack.system) // ignore `system: true` stacks 21 | trace(`loaded stacks from API\n${JSON.stringify(stacks, null, 4)}`) 22 | let stacksById = (stacks).reduce((map, {name, id}) => { 23 | map[id] = name; 24 | return map; 25 | }, {}); 26 | 27 | const services = (await rancher.getServices()) 28 | .filter(globalServiceFilterPredicate) 29 | .filter(runningServicePredicate) 30 | .filter(s => keys(stacksById).indexOf(s.environmentId) !== -1); 31 | trace(`loaded services from API\n${JSON.stringify(services, null, 4)}`) 32 | let systemServicesIds = [] // cache of system services we will ignore 33 | 34 | const monitors = await all(services.map(initServiceMonitor)); 35 | info('monitors inited:'); 36 | for (let m of monitors) { 37 | info(m.toString()); 38 | } 39 | invoke(values(monitors), 'start'); 40 | 41 | while(true) { 42 | await Promise.delay(config.pollServicesInterval); 43 | await updateMonitors(); 44 | } 45 | 46 | async function initServiceMonitor(service) { 47 | const {name, environmentId} = service; 48 | const serviceFullName = stacksById[environmentId].toLowerCase() + '/' + name.toLowerCase(); 49 | 50 | const targets = extend({}, config.notifications['*'] && config.notifications['*'].targets, config.notifications[serviceFullName] && config.notifications[serviceFullName].targets); 51 | 52 | for(let [targetName, targetConfig] of pairs(targets)) { 53 | if (config.targets[targetName]) { 54 | merge(targetConfig, config.targets[targetName]); 55 | } 56 | } 57 | 58 | const healthcheck = merge({}, config.notifications['*'] && config.notifications['*'].healthcheck, config.notifications[serviceFullName] && config.notifications[serviceFullName].healthcheck); 59 | return new ServiceHealthMonitor({ 60 | stackName: stacksById[environmentId], 61 | rancherClient: rancher, 62 | service, 63 | healthcheck, 64 | targets, 65 | templates: config.templates || {} 66 | }); 67 | } 68 | 69 | async function updateMonitors() { 70 | const availableServices = (await rancher.getServices()) 71 | .filter(globalServiceFilterPredicate); 72 | const monitoredServices = pluck(monitors, 'service'); 73 | trace(`updating monitors`); 74 | 75 | //check if there are new services running 76 | for (let s of availableServices.filter(runningServicePredicate)) { 77 | if (systemServicesIds.indexOf(s.id) !== -1) { 78 | trace(`service id=${s.id} name=${s.name} is system, ignoring...`); 79 | continue 80 | } 81 | 82 | if (!find(monitoredServices, {id: s.id})) { 83 | if (!s.environmentId) { 84 | // some services doesn't have `environmentId` property. we will skip these so far (I suppose those are internal Rancher services) 85 | trace(`service id=${s.id} name=${s.name} has no environmentId property, skipping... data=${JSON.stringify(s, null, 4)}`); 86 | continue; 87 | } 88 | 89 | let stackName = stacksById[s.environmentId]; 90 | if (!stackName) { 91 | const stack = await rancher.getStack(s.environmentId) 92 | if (stack.system) { 93 | systemServicesIds.push(s.id) 94 | trace(`service id=${s.id} name=${s.name} is system, skipping... data=${JSON.stringify(s, null, 4)}`); 95 | continue; 96 | } 97 | // we found new `user` stack, add it to cache 98 | stackName = stacksById[stack.id] = stack.name 99 | } 100 | info(`discovered new running service, creating monitor for: ${stackName}/${s.name}`); 101 | const monitor = await initServiceMonitor(s); 102 | info(`new monitor up ${monitor}`); 103 | monitors.push(monitor); 104 | monitor.start(); 105 | } 106 | } 107 | 108 | //check if there are monitors polling stopped service 109 | for (let s of availableServices.filter((s) => (!runningServicePredicate(s)))) { 110 | let monitoredService, monitor; 111 | 112 | if (monitoredService = find(monitoredServices, {id: s.id})) { 113 | monitor = find(monitors, {service: monitoredService}); 114 | info(`stopping ${monitoredService.name} due to ${s.state} state`); 115 | monitors.splice(monitors.indexOf(monitor), 1); 116 | monitor.stop(); 117 | } 118 | } 119 | } 120 | 121 | /** 122 | * Should we monitor this service? 123 | * @param service 124 | */ 125 | function runningServicePredicate(service) { 126 | return ['active', 'upgraded', 'upgrading', 'updating-active'].indexOf(service.state) !== -1; 127 | } 128 | 129 | function globalServiceFilterPredicate(service) { 130 | const fullName = stacksById[service.environmentId] + '/' + service.name; 131 | 132 | if (config.filter) { 133 | const matched = some(config.filter, (f) => fullName.match(new RegExp(f))); 134 | 135 | if (matched) { 136 | return true; 137 | } else { 138 | trace(`${fullName} ignored due to global filter setup('filter' config option)`) 139 | } 140 | } else { 141 | return true; 142 | } 143 | } 144 | 145 | })(); 146 | 147 | process.on('unhandledRejection', handleError); 148 | 149 | function handleError(err) { 150 | error(err); 151 | process.exit(1); 152 | } 153 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rancher-alarms 2 | 3 | Send notifications when something goes wrong in rancher 4 | 5 | ## Features 6 | - Will kick your ass when service goes down and send message when on recover 7 | - Various notification mechanisms 8 | - email 9 | - slack 10 | - * please create an issue if you need more 11 | - Configure notification mechanisms globally or on a per service level(supported in `.json` config setup for now) 12 | - Customize your notification messages 13 | 14 | ## Quick start 15 | 16 | ### Inside Rancher environment using rancher-compose CLI 17 | ```yml 18 | rancher-alarms: 19 | image: ndelitski/rancher-alarms 20 | environment: 21 | ALARM_SLACK_WEBHOOK_URL:https://hooks.slack.com/services/:UUID 22 | labels: 23 | io.rancher.container.create_agent: true 24 | io.rancher.container.agent.role: environment 25 | ``` 26 | [How to create Slack Webhook URL](https://my.slack.com/services/new/incoming-webhook/) 27 | 28 | NOTE: Including rancher agent labels is crucial otherwise you need provide rancher credentials manually with RANCHER_* variables 29 | 30 | ### Outside Rancher environment using `docker run` 31 | ``` 32 | docker run \ 33 | -d \ 34 | -e RANCHER_ADDRESS=rancher.yourdomain.com \ 35 | -e RANCHER_ACCESS_KEY=ACCESS-KEY \ 36 | -e RANCHER_SECRET_KEY=SECRET-KEY \ 37 | -e RANCHER_PROJECT_ID=1a8 \ 38 | -e ALARM_SLACK_WEBHOOK_URL=https://hooks.slack.com/services/YOUR_SLACK_UUID \ 39 | --name rancher-alarms \ 40 | ndelitski/rancher-alarms 41 | ``` 42 | 43 | ## How it works 44 | 45 | On startup get a list of services and instantiate healthcheck monitor for each of them if service is in a running state. Removed, purged and etc services will be ignored 46 | 47 | List of healthcheck monitors is updated with a `pollServicesInterval` interval. When service is removed it will be no longer monitored. 48 | 49 | When a service transitions to a degraded state, all targets will be invoked to process notification(s). 50 | 51 | 52 | ## docker-compose configuration 53 | 54 | ### Docker compose for email notification target 55 | 56 | ```yml 57 | rancher-alarms: 58 | image: ndelitski/rancher-alarms 59 | environment: 60 | RANCHER_ADDRESS:your-rancher.com 61 | ALARM_SLACK_WEBHOOK_URL:https://hooks.slack.com/services/... 62 | ``` 63 | 64 | More docker-compose examples see in [examples](https://github.com/ndelitski/rancher-alarms/tree/master/examples) 65 | 66 | ## Configuration 67 | 68 | ### Environment variables 69 | 70 | #### Rancher settings 71 | Could be ignored if you are running inside Rancher environment (service should be started as a rancher agent though) 72 | - `RANCHER_ADDRESS` 73 | - `RANCHER_PROJECT_ID` 74 | - `RANCHER_ACCESS_KEY` 75 | - `RANCHER_SECRET_KEY` 76 | 77 | #### Polling settings 78 | - `ALARM_POLL_INTERVAL` 79 | - `ALARM_MONITOR_INTERVAL` 80 | - `ALARM_MONITOR_HEALTHY_THRESHOLD` 81 | - `ALARM_MONITOR_UNHEALTHY_THRESHOLD` 82 | - `ALARM_FILTER` 83 | 84 | #### Email target settings 85 | - `ALARM_EMAIL_ADDRESSES` 86 | - `ALARM_EMAIL_USER` 87 | - `ALARM_EMAIL_PASS` 88 | - `ALARM_EMAIL_SSL` 89 | - `ALARM_EMAIL_SMTP_HOST` 90 | - `ALARM_EMAIL_SMTP_PORT` 91 | - `ALARM_EMAIL_FROM` 92 | - `ALARM_EMAIL_SUBJECT` 93 | - `ALARM_EMAIL_TEMPLATE` 94 | - `ALARM_EMAIL_TEMPLATE_FILE` 95 | 96 | #### Slack target settings 97 | - `ALARM_SLACK_WEBHOOK_URL` 98 | - `ALARM_SLACK_CHANNEL` 99 | - `ALARM_SLACK_BOTNAME` 100 | - `ALARM_SLACK_TEMPLATE` 101 | - `ALARM_SLACK_TEMPLATE_FILE` 102 | 103 | See [examples](https://github.com/ndelitski/rancher-alarms/tree/master/examples) using environment config in docker-compose files 104 | 105 | ### Local json config 106 | 107 | ```json 108 | { 109 | "rancher": { 110 | "address": "rancher-host:port", 111 | "auth": { 112 | "accessKey": "", 113 | "secretKey": "" 114 | }, 115 | "projectId": "1a5" 116 | }, 117 | "pollServicesInterval": 10000, 118 | "filter": [ 119 | "app/*" 120 | ], 121 | "notifications": { 122 | "*": { 123 | "targets": { 124 | "email": { 125 | "recipients": [ 126 | "join@snow.com" 127 | ] 128 | } 129 | }, 130 | "healthcheck": { 131 | "pollInterval": 5000, 132 | "healthyThreshold": 2, 133 | "unhealthyThreshold": 3 134 | }, 135 | }, 136 | "frontend": { 137 | "targets": { 138 | "email": { 139 | "recipients": [ 140 | "arya@stark.com" 141 | ] 142 | } 143 | } 144 | } 145 | }, 146 | "targets": { 147 | "email": { 148 | "smtp": { 149 | "from": " alarm@domain.com", 150 | "auth": { 151 | "user": "john@doe.com", 152 | "password": "Str0ngPa$$" 153 | }, 154 | "host": "smtp.gmail.com", 155 | "secureConnection": true, 156 | "port": 465 157 | } 158 | }, 159 | "slack": { 160 | "webhookUrl": "https://hooks.slack.com/services/YOUR_SLACK_UUID", 161 | "botName": "rancher-alarm", 162 | "channel": "#devops" 163 | } 164 | } 165 | } 166 | ``` 167 | 168 | #### Config file sections 169 | - `rancher` Rancher API settings. `required` 170 | - `pollServicesInterval` interval in ms of fetching list of services. `required`. 171 | - `filter` whitelist filter for stack/services names in environment. List of string values. Every string is a RegExp expression so you can use something like this to match all stack services `frontend/*`. `optional` 172 | - `notifications` per service notification settings. Wildcard means any service `required` 173 | - `healtcheck` monitoring state options. `optional` defaults are: 174 | ```js 175 | { 176 | pollInterval: 5000, 177 | healthyThreshold: 2, 178 | unhealthyThreshold: 3 179 | } 180 | ``` 181 | - `targets` what notification targets to use. Will override base target settings in a root `targets` section. Currently each target must be an Object value. If you have nothing to override from a base settings just place `{}` as a value. `optional` 182 | - `targets` base settings for each notification target. `required` 183 | 184 | ## Templates 185 | ### List of template variables: 186 | - `healthyState` HEALTHY or UNHEALTHY 187 | - `state` service state like it named in Rancher API 188 | - `prevMonitorState` rancher-alarms previous service state name 189 | - `monitorState` rancher-alarms service state name - e.g. always degraded for unhealthy 190 | - `serviceName` Name of a service in a Rancher 191 | - `serviceUrl` Url to a running service in a Rancher UI 192 | - `stackUrl` Url to stack in a Rancher UI 193 | - `stackName` Name of a stack in a Rancher 194 | - `environmentName` Name of a environment in a Rancher 195 | - `environmentUrl` URL to environment in a rancher UI 196 | 197 | ### Using variables in template string: 198 | ``` 199 | Hey buddy! Your service #{serviceName} become #{healthyState}, direct link to the service #{serviceUrl} 200 | ``` 201 | More detailed examples your can see in the `examples` folder 202 | 203 | ## Roadmap 204 | - [] Simplify configuration. 205 | - [] More use of rancher labels and metadata. Alternate configuration through rancher labels/metadata(can be used in a conjunction with initial config). 206 | - [] Run in a rancher environment as an agent with a new label `agent: true`. No need to specify keys anymore! 207 | - [] More notifications mechanisms: AWS SNS, http, sms 208 | - [x] Support templating 209 | - [] Test coverage. Setup drone.io 210 | - [x] Notify when all services operate normal after some of them were in a degraded state 211 | - [] Refactor code 212 | - [x] Shrinking image size with alpine linux 213 | -------------------------------------------------------------------------------- /src/monitor.es6: -------------------------------------------------------------------------------- 1 | import _, {pairs, defaults, padRight, invoke} from 'lodash'; 2 | import Target from './notifications/target'; 3 | import {info, trace, error as logError} from './log'; 4 | import assert from 'assert'; 5 | import {delay} from 'bluebird'; 6 | import y from 'yield'; 7 | import fs from 'fs'; 8 | import path from 'path'; 9 | 10 | class StateRingBuffer { 11 | get length() { 12 | return this._arr.length; 13 | } 14 | constructor(length) { 15 | this._arr = Array.apply( null, { length: length } ); 16 | } 17 | push(state) { 18 | this._arr.shift(); 19 | this._arr.push(state); 20 | } 21 | validateState(state) { 22 | for(let s of this._arr) { 23 | if (typeof s === 'undefined' || s !== state) { 24 | return false; 25 | } 26 | } 27 | return true; 28 | } 29 | join(sym) { 30 | return this._arr.join(sym) 31 | } 32 | toString() { 33 | let str = '['; 34 | const delimeter = ' '; 35 | for (var i = 0, len = this._arr.length; i < len; i++) { 36 | str += (this._arr[i] || '?') + (i !== this._arr.length - 1 ? delimeter : '') 37 | } 38 | str += ']'; 39 | return str; 40 | } 41 | } 42 | 43 | export default class ServiceStateMonitor { 44 | 45 | get name() { 46 | return this.stackName.toLowerCase() + '/' + this.service.name.toLowerCase(); 47 | } 48 | 49 | constructor({targets, templates, service, stackName, rancherClient, healthcheck}) { 50 | assert(service, '`service` is missing'); 51 | assert(service.name, '`service.name` is missing'); 52 | assert(stackName, '`stackName` is missing'); 53 | assert(rancherClient, '`rancherClient` is missing'); 54 | 55 | this.healthcheck = defaults(healthcheck || {}, { 56 | pollInterval: 5000, 57 | healthyThreshold: 2, 58 | unhealthyThreshold: 3 59 | }); 60 | 61 | this.service = service; 62 | this.state = service.state; 63 | this.stackName = stackName; 64 | this._isHealthy = true; 65 | this._rancher = rancherClient; 66 | this._unhealhtyStatesBuffer = new StateRingBuffer(this.healthcheck.unhealthyThreshold); 67 | this._healhtyStatesBuffer = new StateRingBuffer(this.healthcheck.healthyThreshold); 68 | this._templates = templates; 69 | 70 | if (targets) { 71 | this.setupNotificationsTargets(targets) 72 | } 73 | } 74 | 75 | setupNotificationsTargets(targets) { 76 | this._targets = []; 77 | for (let [targetName, targetConfig] of pairs(targets)) { 78 | if (!targetConfig) { //skip undefined targets 79 | continue; 80 | } 81 | 82 | const options = _.clone(targetConfig); 83 | 84 | if (options.templateName) { 85 | assert(this._templates[options.templateName], `template ${options.templateName} is not found in "templates" section`); 86 | options.template = this._templates[options.templateName] 87 | } else if (options.templateFile) { 88 | options.template = fs.readFileSync(options.templateFile, 'utf8'); 89 | } 90 | 91 | this._targets.push(Target.init(targetName, options)); 92 | } 93 | } 94 | 95 | async notifyHealthStateChangeAsync(isHealthy, oldState, newState) { 96 | let { 97 | state, 98 | name: serviceName, 99 | accountId: envId, 100 | environmentId: stackId, 101 | id: serviceId, 102 | } = this.service; 103 | 104 | let serviceUrl = this._rancher.buildUrl(`/env/${envId}/apps/stacks/${stackId}/services/${serviceId}/containers`); 105 | let stackUrl = this._rancher.buildUrl(`/env/${envId}/apps/stacks/${stackId}`); 106 | let stack = await this._rancher.getStack(stackId); 107 | let environment = await this._rancher.getCurrentEnvironment(); 108 | let environmentUrl = this._rancher.buildUrl(`/env/${envId}`); 109 | 110 | for (let target of this._targets) { 111 | try { 112 | await target.notify({ 113 | isHealthy: isHealthy, 114 | healthyState: isHealthy ? 'HEALTHY' : 'UNHEALTHY', 115 | service: this.service, // service object with a full list of properties (see Rancher API) 116 | state, // rancher service state 117 | prevMonitorState: oldState, // rancher-alarms previous service state 118 | monitorState: newState, // rancher-alarms service state - e.g. always degraded for unhealthy 119 | serviceName, 120 | serviceUrl, // url to a running service in a rancher UI 121 | stackUrl, // url to stack in a rancher UI 122 | stack, // stack object with a full list of properties (see Rancher API) 123 | stackName: stack.name, 124 | environment, // environment object with a full list of properties (see Rancher API) 125 | environmentName: environment.name, 126 | environmentUrl, // url to environment in a rancher UI 127 | }) 128 | } catch(err) { 129 | logError(`failed to notify target ${target.toString()}`, err); 130 | } 131 | } 132 | } 133 | 134 | _pushState(state) { 135 | this.prevState = this.state; 136 | this.state = state; 137 | this._unhealhtyStatesBuffer.push(state); 138 | this._healhtyStatesBuffer.push(state); 139 | trace(`${this.name} buffers:\n \thealthy: ${this._healhtyStatesBuffer} \n\tunhealthy: ${this._unhealhtyStatesBuffer}`); 140 | } 141 | 142 | updateState(newState) { 143 | this._pushState(newState); 144 | 145 | if (this.prevState !== this.state) { 146 | info(`service ${padRight(this.name, 15)} ${this.prevState || 'unknown'} -> ${this.state}`); 147 | } 148 | 149 | if (this._isHealthy && this._unhealhtyStatesBuffer.validateState('degraded')) { 150 | this._isHealthy = false; 151 | this.notifyHealthStateChangeAsync(this._isHealthy, this.prevState, this.state); 152 | info(`service ${padRight(this.name, 15)} became UNHEALTHY with threshold ${this._unhealhtyStatesBuffer.length}`); 153 | } else if (!this._isHealthy && this._healhtyStatesBuffer.validateState('active')) { 154 | this._isHealthy = true; 155 | this.notifyHealthStateChangeAsync(this._isHealthy, this.prevState, this.state); 156 | info(`service ${padRight(this.name, 15)} became HEALTHY with threshold ${this._healhtyStatesBuffer.length}`); 157 | } 158 | } 159 | 160 | start() { 161 | this.stop(); 162 | info(`start polling ${this.name}`); 163 | this._pollCanceled = false; 164 | 165 | (async () => { 166 | while (!this._pollCanceled) { 167 | await delay(this.healthcheck.pollInterval); 168 | await this._tick(); 169 | } 170 | })(); 171 | } 172 | 173 | async _tick() { 174 | let newState; 175 | 176 | this.service = await this._rancher.getService(this.service.id); 177 | trace(`poll ${this.name}`); 178 | 179 | if (this.service.state === 'updating-active') { 180 | newState = 'degraded'; 181 | } else if (this.service.state === 'active') { 182 | if (this.service.launchConfig && this.service.launchConfig.healthCheck) { 183 | const containers = await this._rancher.getServiceContainers(this.service.id); 184 | 185 | const hasUnhealthyContainers = _(this._withoutSidekicks(containers)) 186 | .filter((c) => c.state === 'running') 187 | .some((c) => (c.healthState !== 'healthy')); 188 | 189 | newState = hasUnhealthyContainers ? 'degraded' : 'active'; 190 | } else { 191 | newState = 'active'; 192 | } 193 | } else { 194 | newState = this.service.state; 195 | } 196 | 197 | this.updateState(newState); 198 | } 199 | 200 | _withoutSidekicks(containers) { 201 | return containers.filter(({name}) => name.split('_').length <= 3 ); 202 | //const byDeployUnit = {}; 203 | //let results = []; 204 | // 205 | //for (let container of containers) { 206 | // let unit; 207 | // if (unit = container.labels['io.rancher.service.deployment.unit']) { 208 | // if (!byDeployUnit[unit]) { 209 | // byDeployUnit[unit] = [container] 210 | // } else { 211 | // byDeployUnit[unit].push(container); 212 | // } 213 | // } 214 | //} 215 | // 216 | //for (let [unitId, unitContainers] of pairs(byDeployUnit)) { 217 | // const sidekicks = _(unitContainers) 218 | // .map((c) => c.labels['io.rancher.sidekicks'] && c.labels['io.rancher.sidekicks'].split(',')) 219 | // .compact() 220 | // .flatten() 221 | // .uniq(); 222 | // 223 | // results = results.concat(unitContainers.filter(({name}) => { 224 | // const re = new RegExp(`${this.stackName}_${this.service.name}_(.*)_\\d`); 225 | // const match = name.match(re); 226 | // if (!match) { 227 | // error(`failed to extract container_name from ${name} with regex ${re}`) 228 | // } 229 | // const serviceName = match && match[1]; 230 | // info(`serviceName ${serviceName} extracted from ${name}`); 231 | // return sidekicks.indexOf(serviceName) == -1; 232 | // })); 233 | //} 234 | //return results; 235 | } 236 | 237 | stop() { 238 | if (this._pollCanceled !== undefined && !this._pollCanceled) { 239 | info(`stop polling ${this.name}`); 240 | this._pollCanceled = true; 241 | } 242 | } 243 | 244 | toString() { 245 | return `${this.name}: 246 | targets: ${stringify(invoke(this._targets, 'toString').join(''))} 247 | healthcheck: ${stringify(this.healthcheck)} 248 | ` 249 | } 250 | 251 | }; 252 | 253 | function stringify(obj) { 254 | return JSON.stringify(obj, null, 4); 255 | } 256 | --------------------------------------------------------------------------------