├── .github └── demo.png ├── .gitignore ├── Dockerfile ├── README.md ├── cmd └── daemon │ └── main.go ├── integration └── slack.go ├── template ├── error └── template.go └── types └── types.go /.github/demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteArena/docker-healthcheck-watcher/09f2bc9d2c83ff05ca6c5951363979ddc6eb7c06/.github/demo.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | cmd/daemon/daemon 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.8-stretch 2 | 3 | ENV APP_HOME $GOPATH/src/github.com/bytearena/docker-healthcheck-watcher 4 | 5 | COPY . $APP_HOME 6 | 7 | WORKDIR $APP_HOME/cmd/daemon 8 | RUN go get -v ./... 9 | RUN go build 10 | 11 | CMD ["daemon"] 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # docker-healthcheck-watcher 2 | 3 | 4 | 5 | > Monitor container healthchecks 6 | 7 | ### The issue 8 | 9 | If you are using a micro-service oriented architecture and you want to make sure all your services are up and running, you need some sort of application healthchecks. 10 | 11 | Usually each service must expose an HTTP interface (something like `/health`). In a web application it's easy, you can just create a new route responding to the healthcheck but how would you do for system binaries for example? 12 | 13 | ### The solution 14 | 15 | You can specify a `HEALTHCHECK` instruction in your Dockerfile. Docker will be executing the command at a regular time (interval and timeout are configurable). 16 | Almost no monitoring/alerting tool uses that Docker feature, so we decided to build our own. 17 | 18 | The healthcheck doesn't pass the boundaries of the container, there is no security or disclosure issue. Even for non HTTP service likes batches you can provide a custom command (bash script) to check the health of your program. 19 | 20 | If Docker detects an unhealthy service, it will send a message to our tool and we can decide what to do (see integrations). 21 | 22 | ### Integrations 23 | 24 | At the moment only Slack is supported for alerting. The configuration (environment variables) is the following: 25 | 26 | ```yml 27 | # Slack endpoint with the authentication token 28 | SLACK_URL: "https://hooks.slack.com/services/[...]" 29 | 30 | # Channel, private group, or IM channel to send message to. 31 | SLACK_CHANNEL: "#ops" 32 | 33 | # Set your bot's user name. 34 | SLACK_USERNAME: "dockerwatcher" 35 | 36 | # Emoji to use as the icon for this message 37 | SLACK_ICONEMOJI: ":robot_face:" 38 | ``` 39 | 40 | ### Build and run 41 | 42 | The binary is in the `cmd/daemon` folder. 43 | 44 | Build: 45 | ```sh 46 | go build 47 | ``` 48 | 49 | And run the daemon: 50 | ```sh 51 | ./daemon 52 | ``` 53 | -------------------------------------------------------------------------------- /cmd/daemon/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "log" 6 | 7 | "github.com/docker/docker/api/types" 8 | "github.com/docker/docker/client" 9 | 10 | "github.com/bytearena/docker-healthcheck-watcher/integration" 11 | "github.com/bytearena/docker-healthcheck-watcher/template" 12 | t "github.com/bytearena/docker-healthcheck-watcher/types" 13 | ) 14 | 15 | func onContainerDieFailure(service string, exitCode string) { 16 | errorMessage := t.ErrorMessage{ 17 | Emoji: ":red_circle:", 18 | ServiceName: service, 19 | ServiceStatus: "died (exited with code " + exitCode + ")", 20 | Log: "", 21 | } 22 | 23 | message := template.MakeTemplate(errorMessage) 24 | 25 | output := slack.Publish(message) 26 | 27 | log.Println(service, "failure, sent message", output) 28 | } 29 | 30 | func onContainerHealthy(service string) { 31 | errorMessage := t.ErrorMessage{ 32 | Emoji: ":+1:", 33 | ServiceName: service, 34 | ServiceStatus: "ok", 35 | Log: "", 36 | } 37 | 38 | message := template.MakeTemplate(errorMessage) 39 | 40 | output := slack.Publish(message) 41 | 42 | log.Println(service, "sent message", output) 43 | } 44 | 45 | func onContainerHealthCheckFailure(service string) { 46 | errorMessage := t.ErrorMessage{ 47 | Emoji: "🚨", 48 | ServiceName: service, 49 | ServiceStatus: "unhealthy (running)", 50 | Log: "", 51 | } 52 | 53 | message := template.MakeTemplate(errorMessage) 54 | 55 | output := slack.Publish(message) 56 | 57 | log.Println(service, "failure, sent message", output) 58 | } 59 | 60 | func main() { 61 | cli, clientError := client.NewEnvClient() 62 | ctx := context.Background() 63 | 64 | if clientError != nil { 65 | log.Panicln(clientError) 66 | } 67 | 68 | stream, err := cli.Events(ctx, types.EventsOptions{}) 69 | 70 | for { 71 | select { 72 | case msg := <-err: 73 | log.Panicln(msg) 74 | case msg := <-stream: 75 | if msg.Action == "health_status: unhealthy" { 76 | onContainerHealthCheckFailure(msg.Actor.Attributes["image"]) 77 | } 78 | 79 | if msg.Action == "health_status: healthy" { 80 | onContainerHealthy(msg.Actor.Attributes["image"]) 81 | } 82 | 83 | if msg.Action == "die" { 84 | exitCode := msg.Actor.Attributes["exitCode"] 85 | 86 | if exitCode != "0" { 87 | onContainerDieFailure(msg.Actor.Attributes["image"], exitCode) 88 | } 89 | } 90 | } 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /integration/slack.go: -------------------------------------------------------------------------------- 1 | package slack 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "io/ioutil" 7 | "net/http" 8 | "os" 9 | ) 10 | 11 | type message struct { 12 | Channel string `json:"channel"` 13 | Username string `json:"username"` 14 | Text string `json:"text"` 15 | IconEmoji string `json:"icon_emoji"` 16 | } 17 | 18 | func makeMessage(text string) message { 19 | channel := os.Getenv("SLACK_CHANNEL") 20 | username := os.Getenv("SLACK_USERNAME") 21 | iconEmoji := os.Getenv("SLACK_ICONEMOJI") 22 | 23 | return message{ 24 | Channel: channel, 25 | Username: username, 26 | Text: text, 27 | IconEmoji: iconEmoji, 28 | } 29 | } 30 | 31 | func Publish(text string) string { 32 | url := os.Getenv("SLACK_URL") 33 | 34 | if url == "" { 35 | panic("SLACK_URL needs to be specified") 36 | } 37 | 38 | data, _ := json.Marshal(makeMessage(text)) 39 | 40 | resp, err := http.Post(url, "application/json", bytes.NewBuffer(data)) 41 | 42 | if err != nil { 43 | panic(err) 44 | } 45 | 46 | body, _ := ioutil.ReadAll(resp.Body) 47 | 48 | return string(body) 49 | } 50 | -------------------------------------------------------------------------------- /template/error: -------------------------------------------------------------------------------- 1 | {{.Emoji}} {{.ServiceName}}: {{.ServiceStatus}} 2 | -------------------------------------------------------------------------------- /template/template.go: -------------------------------------------------------------------------------- 1 | package template 2 | 3 | import ( 4 | "bytes" 5 | "io/ioutil" 6 | "log" 7 | "text/template" 8 | 9 | "github.com/bytearena/docker-healthcheck-watcher/types" 10 | ) 11 | 12 | func getTemplateContent(filename string) ([]byte, error) { 13 | return ioutil.ReadFile(filename) 14 | } 15 | 16 | func MakeTemplate(s types.ErrorMessage) string { 17 | 18 | //create a new template with some name 19 | tmpl := template.New("test") 20 | content, getTemplateErr := getTemplateContent("../../template/error") 21 | 22 | if getTemplateErr != nil { 23 | log.Panicln(getTemplateErr) 24 | } 25 | 26 | //parse some content and generate a template 27 | tmpl, err := tmpl.Parse(string(content)) 28 | if err != nil { 29 | log.Panicln("Parse: ", err) 30 | } 31 | 32 | var res bytes.Buffer 33 | 34 | //merge template 'tmpl' with content of 's' 35 | err1 := tmpl.Execute(&res, s) 36 | if err1 != nil { 37 | log.Panicln("Execute: ", err1) 38 | } 39 | 40 | return res.String() 41 | } 42 | -------------------------------------------------------------------------------- /types/types.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | type ErrorMessage struct { 4 | Emoji string 5 | ServiceName string 6 | ServiceStatus string 7 | Log string 8 | } 9 | --------------------------------------------------------------------------------