├── .gitignore ├── Godeps ├── Readme └── Godeps.json ├── install.sh ├── sample.go-monitor.yml ├── init ├── main_test.go ├── README.md └── main.go /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | vendor 3 | *.iml 4 | go-monitor.yml 5 | -------------------------------------------------------------------------------- /Godeps/Readme: -------------------------------------------------------------------------------- 1 | This directory tree is generated automatically by godep. 2 | 3 | Please do not edit. 4 | 5 | See https://github.com/tools/godep for more information. 6 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | cp init /etc/init.d/go-monitor 4 | chmod +x /etc/init.d/go-monitor 5 | go build 6 | cp go-monitor /usr/local/bin/ 7 | cp go-monitor.yml /usr/local/etc/ 8 | 9 | update-rc.d go-monitor defaults 10 | -------------------------------------------------------------------------------- /sample.go-monitor.yml: -------------------------------------------------------------------------------- 1 | processes: [ "list", "of", "processes", "tcp://localhost:8000", "http://keyshift.co" ] 2 | config: 3 | messagebirdtoken: "test_TOKEN" 4 | messagebirdsender: "+sender-number" 5 | recipients: "+recipient-numbers,+one-or-many" 6 | defaultttlseconds: 5 7 | servernicename: "my-server-name" 8 | checkfrequencyseconds: 5 -------------------------------------------------------------------------------- /Godeps/Godeps.json: -------------------------------------------------------------------------------- 1 | { 2 | "ImportPath": "github.com/ksred/go-monitor", 3 | "GoVersion": "go1.8", 4 | "GodepVersion": "v79", 5 | "Deps": [ 6 | { 7 | "ImportPath": "github.com/patrickmn/go-cache", 8 | "Comment": "v2.0.0-7-gea4bd2a", 9 | "Rev": "ea4bd2a538d816ad2bd3426ced3a99f2382103ba" 10 | }, 11 | { 12 | "ImportPath": "gopkg.in/yaml.v2", 13 | "Rev": "a3f3340b5840cee44f372bddb5880fcbc419b46a" 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /init: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # go-monitor daemon 3 | # chkconfig: 345 20 80 4 | # description: go-monitor daemon 5 | # processname: go-monitor 6 | 7 | DAEMON_PATH="/usr/local/bin/" 8 | 9 | DAEMON=go-monitor 10 | 11 | NAME=go-monitor 12 | DESC="go-monitor service monitoring" 13 | PIDFILE=/var/run/$NAME.pid 14 | SCRIPTNAME=/etc/init.d/$NAME 15 | 16 | case "$1" in 17 | start) 18 | printf "%-50s" "Starting $NAME..." 19 | cd $DAEMON_PATH 20 | PID=`$DAEMON > /dev/null 2>&1 & echo $!` 21 | #echo "Saving PID" $PID " to " $PIDFILE 22 | if [ -z $PID ]; then 23 | printf "%s\n" "Fail" 24 | else 25 | echo $PID > $PIDFILE 26 | printf "%s\n" "Ok" 27 | fi 28 | ;; 29 | status) 30 | printf "%-50s" "Checking $NAME..." 31 | if [ -f $PIDFILE ]; then 32 | PID=`cat $PIDFILE` 33 | if [ -z "`ps axf | grep ${PID} | grep -v grep`" ]; then 34 | printf "%s\n" "Process dead but pidfile exists" 35 | else 36 | echo "Running" 37 | fi 38 | else 39 | printf "%s\n" "Service not running" 40 | fi 41 | ;; 42 | stop) 43 | printf "%-50s" "Stopping $NAME" 44 | PID=`cat $PIDFILE` 45 | cd $DAEMON_PATH 46 | if [ -f $PIDFILE ]; then 47 | kill -HUP $PID 48 | printf "%s\n" "Ok" 49 | rm -f $PIDFILE 50 | else 51 | printf "%s\n" "pidfile not found" 52 | fi 53 | ;; 54 | 55 | restart) 56 | $0 stop 57 | $0 start 58 | ;; 59 | 60 | *) 61 | echo "Usage: $0 {status|start|stop|restart}" 62 | exit 1 63 | esac 64 | -------------------------------------------------------------------------------- /main_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | ) 7 | 8 | func TestValidate(t *testing.T) { 9 | monitor := Monitor{} 10 | 11 | err := monitor.validate() 12 | if err == nil { 13 | t.Errorf("Looking for %v, got %v", "We need to monitor at least one process", nil) 14 | } 15 | 16 | monitor.Processes = []string{"test"} 17 | 18 | err = monitor.validate() 19 | if err == nil { 20 | t.Errorf("Looking for %v, got %v", "Not all config variables present", nil) 21 | } 22 | 23 | monitor.Config.MessageBirdToken = "test" 24 | monitor.Config.MessageBirdSender = "test" 25 | monitor.Config.Recipients = "test" 26 | monitor.Config.DefaultTTLSeconds = 1 27 | monitor.Config.ServerNiceName = "test" 28 | 29 | err = monitor.validate() 30 | if err != nil { 31 | t.Errorf("Looking for %v, got %v", nil, err) 32 | } 33 | } 34 | 35 | func TestGetServerInfo(t *testing.T) { 36 | 37 | } 38 | 39 | func TestCheckProc(t *testing.T) { 40 | // @TODO Not sure how to test this without involving setting up a channel 41 | } 42 | 43 | func TestLineCount(t *testing.T) { 44 | line := bytes.NewBufferString("test one line\n") 45 | lines, err := lineCounter(line) 46 | if err != nil { 47 | t.Errorf("Looking for %v, got %v", nil, err) 48 | } 49 | if lines != 1 { 50 | t.Errorf("Looking for %v, got %v", 1, lines) 51 | } 52 | 53 | line = bytes.NewBufferString("test one line\ntest two lines\n") 54 | lines, err = lineCounter(line) 55 | if err != nil { 56 | t.Errorf("Looking for %v, got %v", nil, err) 57 | } 58 | if lines != 2 { 59 | t.Errorf("Looking for %v, got %v", 2, lines) 60 | } 61 | 62 | line = bytes.NewBufferString("test one line\ntest two lines\nthree\nfour\nfive\n") 63 | lines, err = lineCounter(line) 64 | if err != nil { 65 | t.Errorf("Looking for %v, got %v", nil, err) 66 | } 67 | if lines != 5 { 68 | t.Errorf("Looking for %v, got %v", 5, lines) 69 | } 70 | } 71 | 72 | func TestNotifyProcError(t *testing.T) { 73 | // @TODO 74 | } 75 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Go Report Card](https://goreportcard.com/badge/github.com/ksred/go-monitor)](https://goreportcard.com/report/github.com/ksred/go-monitor) 2 | 3 | # Simple server monitoring in Go 4 | 5 | `go-monitor` is simple server monitoring written in Go. I was on the lookout for a tool which allowed me 6 | to monitor a list of services and notify me through SMS or email. Everything I found seemed a bit too complex 7 | and so `go-monitor` was born. 8 | 9 | ## TL;DR 10 | `go-monitor` monitors a list of services as specified by the user and then notifies one or more users through SMS 11 | of any services that are down. 12 | 13 | We use [MessageBird](https://www.messagebird.com/) as an SMS delivery service. SMS can be exchanged for a 14 | number of other services through MessageBird. 15 | 16 | Config is read through a yaml file. 17 | Users can specify how often they are notified of a given service being down through the `defaultttl` value in the config. 18 | 19 | By default, processes are checked every 60 seconds. This can be increased or decreased depending on the importance of services. 20 | 21 | This program is intended for use on Unix systems. 22 | 23 | ## Installation 24 | 1. `go get github.com/ksred/go-monitor` 25 | 26 | 2. Install dependencies: `godep restore` (If you are unfamiliar with Godeop see [here](https://github.com/tools/godep)) 27 | 28 | 3. Update `go-monitor.yml.sample` to include the configuration options desired. 29 | 30 | 4. Rename config to `go-monitor.yml`. 31 | 32 | 5. `bash install.sh` will build the program and add it as a service. 33 | 34 | ## How it works 35 | We use a Go to push this onto the server: 36 | 37 | `ps aux | grep PROCESS_NAME` 38 | 39 | We then read the number of lines returned. Super simple. 40 | 41 | ## Additions 42 | Down the line it would be nice to have server monitoring in addition to process monitoring: 43 | 44 | - CPU 45 | - Disks (usage, read/write) 46 | - Memory 47 | 48 | This could further be extended into network monitoring. Someday. 49 | 50 | ## License 51 | MIT 52 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "flag" 7 | "fmt" 8 | "github.com/patrickmn/go-cache" 9 | "gopkg.in/yaml.v2" 10 | "io" 11 | "io/ioutil" 12 | "log" 13 | "net" 14 | "net/http" 15 | "net/url" 16 | "os" 17 | "os/exec" 18 | "strings" 19 | "sync" 20 | "time" 21 | ) 22 | 23 | // All these values should be in lowercase in the yaml file 24 | // See sample.go-monitor.yaml 25 | type Monitor struct { 26 | Processes []string 27 | Config struct { 28 | MessageBirdToken string 29 | MessageBirdSender string 30 | Recipients string 31 | DefaultTTLSeconds time.Duration 32 | ServerNiceName string 33 | CheckFrequencySeconds time.Duration 34 | } 35 | writeToConsole bool 36 | } 37 | 38 | func main() { 39 | 40 | defaultConfigfile := "/usr/local/etc/go-monitor.yml" 41 | configFile := flag.String("f", defaultConfigfile, fmt.Sprintf("config file path, default = %s", defaultConfigfile)) 42 | writeToConsole := flag.Bool("o", false, fmt.Sprintf("output, if true will write to console")) 43 | flag.Parse() 44 | 45 | monitor, err := createMonitorFromFile(*configFile) 46 | monitor.writeToConsole = *writeToConsole 47 | 48 | monitor.Println("Go Monitor running") 49 | 50 | // Default notification from config 51 | // Refresh time is 60 seconds 52 | c := cache.New(monitor.Config.DefaultTTLSeconds*time.Second, 60*time.Second) 53 | procErrChan := make(chan string, len(monitor.Processes)) 54 | 55 | server, err := monitor.getServerInfo() 56 | if err != nil { 57 | monitor.Println("Error getting server information, using NIL") 58 | server = "NIL" 59 | } 60 | 61 | // Parent waitgroup for the two go functions below 62 | var wgParent sync.WaitGroup 63 | wgParent.Add(2) 64 | 65 | // One go func for the adding of procs to error channel 66 | go func() { 67 | var wg sync.WaitGroup 68 | for { 69 | wg.Add(len(monitor.Processes)) 70 | 71 | for index, proc := range monitor.Processes { 72 | go monitor.checkProcess(proc, procErrChan, &wg) 73 | 74 | // Sleep when the loop is done 75 | // This is how often the checks for each process will run 76 | if index == len(monitor.Processes)-1 { 77 | // Check every 60 seconds 78 | time.Sleep(monitor.Config.CheckFrequencySeconds * time.Second) 79 | } 80 | } 81 | wg.Wait() 82 | } 83 | }() 84 | 85 | // Another go func for reading the results from the error chan 86 | go func() { 87 | for { 88 | procErr := <-procErrChan 89 | go monitor.notifyProcError(procErr, server, monitor.Config.Recipients, c) 90 | } 91 | }() 92 | 93 | // Never die 94 | wgParent.Wait() 95 | } 96 | 97 | func (monitor *Monitor) Println(message string) { 98 | 99 | if monitor.writeToConsole { 100 | fmt.Println(message) 101 | } 102 | } 103 | 104 | func (monitor *Monitor) Printf(message string, a ...interface{}) { 105 | 106 | if monitor.writeToConsole { 107 | fmt.Printf(message, a) 108 | } 109 | } 110 | 111 | func createMonitorFromFile(configFile string) (monitor *Monitor, error error) { 112 | 113 | data, error := ioutil.ReadFile(configFile) 114 | if error != nil { 115 | log.Fatal(error) 116 | } 117 | 118 | fmt.Println(configFile) 119 | 120 | yaml.Unmarshal(data, &monitor) 121 | 122 | error = monitor.validate() 123 | 124 | return 125 | } 126 | 127 | func (monitor *Monitor) validate() error { 128 | // Do validation checks 129 | if len(monitor.Processes) < 1 { 130 | return errors.New("Config: We need to monitor at least one process") 131 | } else { 132 | monitor.Printf("Processes %s\n", monitor.Processes) 133 | } 134 | if strings.Trim(monitor.Config.MessageBirdToken, " ") != "" { 135 | monitor.Printf("MessageBirdToken %s\n", monitor.Config.MessageBirdToken) 136 | 137 | if strings.Trim(monitor.Config.MessageBirdSender, "") == "" { 138 | return errors.New("Config: MessageBird sender not set") 139 | } else { 140 | monitor.Printf("MessageBirdSender %s\n", monitor.Config.MessageBirdSender) 141 | } 142 | 143 | if monitor.Config.Recipients == "" { 144 | return errors.New("Config: Recipients list is empty") 145 | } else { 146 | monitor.Printf("Recipients %s\n", monitor.Config.Recipients) 147 | } 148 | } 149 | 150 | if monitor.Config.DefaultTTLSeconds == 0 { 151 | monitor.Config.DefaultTTLSeconds = 30000 152 | } 153 | if monitor.Config.CheckFrequencySeconds == 0 { 154 | monitor.Config.CheckFrequencySeconds = 60 155 | } 156 | if monitor.Config.ServerNiceName == "" { 157 | return errors.New("Config: ServerNiceName empty") 158 | } 159 | 160 | monitor.Printf("DefaultTTLSeconds %d\n", monitor.Config.DefaultTTLSeconds) 161 | monitor.Printf("CheckFrequencySeconds %d\n", monitor.Config.CheckFrequencySeconds) 162 | monitor.Printf("ServerNiceName %v\n", monitor.Config.ServerNiceName) 163 | 164 | return nil 165 | } 166 | 167 | func (monitor *Monitor) getServerInfo() (server string, err error) { 168 | ifaces, err := net.Interfaces() 169 | if err != nil { 170 | return "", err 171 | } 172 | 173 | var ip net.IP 174 | for _, i := range ifaces { 175 | addrs, err := i.Addrs() 176 | if err != nil { 177 | return "", err 178 | } 179 | 180 | for _, addr := range addrs { 181 | switch v := addr.(type) { 182 | case *net.IPNet: 183 | ip = v.IP 184 | case *net.IPAddr: 185 | ip = v.IP 186 | } 187 | } 188 | } 189 | 190 | // Get hostname 191 | host, err := os.Hostname() 192 | if err != nil { 193 | return "", err 194 | } 195 | 196 | server = monitor.Config.ServerNiceName + " " + host + " with IP " + ip.String() 197 | return server, nil 198 | } 199 | 200 | func (monitor *Monitor) checkProcess(processName string, procErrChan chan string, wg *sync.WaitGroup) { 201 | 202 | if strings.HasPrefix(processName, "tcp://") { 203 | monitor.checkTcpSocket(strings.TrimPrefix(processName, "tcp://"), procErrChan, wg) 204 | } else if strings.HasPrefix(processName, "http://") || strings.HasPrefix(processName, "https://") { 205 | monitor.checkHttpEndpoint(processName, procErrChan, wg) 206 | } else { 207 | monitor.checkLocalProcess(processName, procErrChan, wg) 208 | } 209 | } 210 | 211 | func (monitor *Monitor) checkTcpSocket(tcpAddress string, procErrChan chan string, wg *sync.WaitGroup) { 212 | monitor.Printf("Checking for tcp socket %s\n", tcpAddress) 213 | 214 | conn, err := net.Dial("tcp", tcpAddress) 215 | defer func() { 216 | if conn != nil { 217 | conn.Close() 218 | } 219 | }() 220 | 221 | if err != nil { 222 | monitor.Printf("Error: unable to open socket! %s\n", tcpAddress) 223 | procErrChan <- tcpAddress 224 | } else { 225 | monitor.Printf("Successful connection to %s \n", tcpAddress) 226 | } 227 | 228 | // Doing this keeps the channel open 229 | // If this is not done, the channel closes and there is a fatal error 230 | procErrChan <- "" 231 | } 232 | 233 | func (monitor *Monitor) checkHttpEndpoint(httpEndpoint string, procErrChan chan string, wg *sync.WaitGroup) { 234 | monitor.Printf("Checking http endpoint %s\n", httpEndpoint) 235 | 236 | resp, err := http.DefaultClient.Get(httpEndpoint) 237 | 238 | if err != nil { 239 | monitor.Printf("Error: unable to connect to %s - %s\n", httpEndpoint, err.Error()) 240 | procErrChan <- httpEndpoint 241 | } else if resp.Status != "200 OK" { 242 | monitor.Printf("Error: non 200 status from %s - %s\n", httpEndpoint, resp.Status) 243 | procErrChan <- httpEndpoint 244 | } else { 245 | monitor.Printf("%s returns 200 OK\n", httpEndpoint, resp.Status) 246 | } 247 | 248 | // Doing this keeps the channel open 249 | // If this is not done, the channel closes and there is a fatal error 250 | procErrChan <- "" 251 | } 252 | 253 | func (monitor *Monitor) checkLocalProcess(processName string, procErrChan chan string, wg *sync.WaitGroup) { 254 | monitor.Printf("Checking for process %s\n", processName) 255 | 256 | c1 := exec.Command("ps", "aux") 257 | c2 := exec.Command("grep", processName) 258 | 259 | r, w := io.Pipe() 260 | c1.Stdout = w 261 | c2.Stdin = r 262 | 263 | var b2 bytes.Buffer 264 | c2.Stdout = &b2 265 | 266 | c1.Start() 267 | c2.Start() 268 | c1.Wait() 269 | w.Close() 270 | c2.Wait() 271 | 272 | //Println(&b2) 273 | lines, err := lineCounter(&b2) 274 | if err != nil { 275 | monitor.Printf("Error: %s\n", err.Error()) 276 | os.Exit(0) 277 | } 278 | 279 | // Mark this task as done 280 | // Error if done after passing data to channels 281 | wg.Done() 282 | 283 | if lines == 0 { 284 | monitor.Printf("Error: no process %s found running!\n", processName) 285 | procErrChan <- processName 286 | } 287 | 288 | // Doing this keeps the channel open 289 | // If this is not done, the channel closes and there is a fatal error 290 | procErrChan <- "" 291 | } 292 | 293 | // Linecounter counts the number of lines in a given output 294 | func lineCounter(r io.Reader) (int, error) { 295 | buf := make([]byte, 32*1024) 296 | count := 0 297 | lineSep := []byte{'\n'} 298 | 299 | for { 300 | c, err := r.Read(buf) 301 | count += bytes.Count(buf[:c], lineSep) 302 | 303 | switch { 304 | case err == io.EOF: 305 | return count, nil 306 | 307 | case err != nil: 308 | return count, err 309 | } 310 | } 311 | } 312 | 313 | // Notifyproceerror sends a notification for a given process 314 | func (monitor *Monitor) notifyProcError(proc string, server string, recipientNumber string, c *cache.Cache) { 315 | if len(proc) > 0 { 316 | monitor.Printf("### ERROR: proc %s not running!\n", proc) 317 | 318 | // Check cache for process 319 | _, found := c.Get(proc) 320 | if found { 321 | // Wait until expiry before another notification 322 | monitor.Printf("Process %s stored in cache, skipping\n", proc) 323 | return 324 | } 325 | 326 | // If proc not in cache, store in cache 327 | c.Set(proc, true, cache.DefaultExpiration) 328 | 329 | // Send text message 330 | authToken := monitor.Config.MessageBirdToken 331 | urlStr := "https://rest.messagebird.com/messages" 332 | 333 | v := url.Values{} 334 | v.Set("recipients", recipientNumber) 335 | v.Set("originator", monitor.Config.MessageBirdSender) 336 | v.Set("body", "📢 "+proc+" not running on server "+server+"!") 337 | rb := *strings.NewReader(v.Encode()) 338 | 339 | client := &http.Client{} 340 | 341 | req, _ := http.NewRequest("POST", urlStr, &rb) 342 | req.SetBasicAuth("AccessKey", authToken) 343 | req.Header.Add("Accept", "application/json") 344 | req.Header.Add("Content-Type", "application/x-www-form-urlencoded") 345 | 346 | // Make request 347 | _, err := client.Do(req) 348 | if err != nil { 349 | monitor.Printf("Error: %s\n", err.Error()) 350 | return 351 | } 352 | 353 | monitor.Println("Notification sent!") 354 | } 355 | } 356 | --------------------------------------------------------------------------------