├── .gitignore ├── README.md ├── example_falcon.go ├── example_statsd.go ├── falcon └── falcon.go ├── metric ├── defines.go ├── metric.go ├── stats_darwin.go └── stats_linux.go └── statsd └── statsd.go /.gitignore: -------------------------------------------------------------------------------- 1 | eru-metric 2 | example_statsd 3 | example_falcon 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Eru-Metric 2 | ========== 3 | 4 | [![GoDoc](https://godoc.org/github.com/projecteru/eru-metric?status.svg)](https://godoc.org/github.com/projecteru/eru-metric) 5 | 6 | A library for watching container metrics and send to remote. 7 | 8 | This repo implement open-falcon methods you can write your methods by your self. 9 | 10 | How 11 | === 12 | 13 | * write a func to implement Send method if you want send metircs to other place. 14 | 15 | ``` 16 | func Send(data map[string]float64, endpoint, tag string, timestamp, step int64) error 17 | ``` 18 | 19 | * set metric global setting 20 | 21 | ``` 22 | SetGlobalSetting(client Remote, timeout, forceTimeout time.Duration, vlanPrefix, defaultVlan string) 23 | ``` 24 | 25 | * create a backend object which implemented Send interface. 26 | 27 | * create a metric for each container. 28 | 29 | ``` 30 | CreateMetric(step time.Duration, client Remote, tag, endpoint string) 31 | ``` 32 | 33 | * init metric object 34 | 35 | ``` 36 | InitMetric(cid string, pid int) 37 | ``` 38 | 39 | * update, calcuate, save and send 40 | 41 | ``` 42 | UpdateStats(cid string) 43 | CalcRate(info map[string]uint64, now time.Time) 44 | SaveLast(info map[string]uint64) 45 | Send(rate map[string]float64) 46 | ``` 47 | 48 | * exit metirc 49 | 50 | ``` 51 | Exit() 52 | ``` 53 | 54 | Example 55 | ======= 56 | 57 | see example.go only work under LINUX environment. 58 | 59 | ``` 60 | eru-metric CONTAINERID CONTAINERID ... CONTAINERID [-DEBUG] [-d docker remote addr] [-t transfer remote addr] 61 | ``` 62 | 63 | -------------------------------------------------------------------------------- /example_falcon.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "time" 7 | 8 | "golang.org/x/net/context" 9 | 10 | log "github.com/Sirupsen/logrus" 11 | "github.com/docker/engine-api/client" 12 | "github.com/projecteru/eru-metric/falcon" 13 | "github.com/projecteru/eru-metric/metric" 14 | ) 15 | 16 | func main() { 17 | var dockerAddr string 18 | var transferAddr string 19 | var certDir string 20 | var debug bool 21 | flag.BoolVar(&debug, "DEBUG", false, "enable debug") 22 | flag.StringVar(&dockerAddr, "d", "tcp://192.168.99.100:2376", "docker daemon addr") 23 | flag.StringVar(&transferAddr, "t", "10.200.8.37:8433", "transfer addr") 24 | flag.StringVar(&certDir, "c", "/root/.docker", "cert files dir") 25 | flag.Parse() 26 | if flag.NArg() < 1 { 27 | fmt.Println("need at least one container id") 28 | return 29 | } 30 | if debug { 31 | log.SetLevel(log.DebugLevel) 32 | } 33 | 34 | cli, _ := client.NewEnvClient() 35 | 36 | metric.SetGlobalSetting(cli, 2, 3, "vnbe", "eth0") 37 | client := falcon.CreateFalconClient(transferAddr, 5*time.Millisecond) 38 | ctx := context.Background() 39 | 40 | for i := 0; i < flag.NArg(); i++ { 41 | if c, err := cli.ContainerInspect(ctx, flag.Arg(i)); err != nil { 42 | fmt.Println(flag.Arg(i), err) 43 | continue 44 | } else { 45 | go start_watcher(client, c.ID, c.State.Pid) 46 | } 47 | } 48 | for { 49 | } 50 | } 51 | 52 | func start_watcher(client metric.Remote, cid string, pid int) { 53 | serv := metric.CreateMetric(time.Duration(5)*time.Second, client, "a=b,b=c", fmt.Sprintf("test_%s", cid)) 54 | defer serv.Client.Close() 55 | if err := serv.InitMetric(cid, pid); err != nil { 56 | fmt.Println("failed", err) 57 | return 58 | } 59 | 60 | t := time.NewTicker(serv.Step) 61 | defer t.Stop() 62 | fmt.Println("begin watch", cid) 63 | for { 64 | select { 65 | case now := <-t.C: 66 | go func() { 67 | if info, err := serv.UpdateStats(cid); err == nil { 68 | fmt.Println(info) 69 | rate := serv.CalcRate(info, now) 70 | serv.SaveLast(info) 71 | // for safe 72 | fmt.Println(rate) 73 | go serv.Send(rate) 74 | } 75 | }() 76 | case <-serv.Stop: 77 | return 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /example_statsd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "time" 7 | 8 | "golang.org/x/net/context" 9 | 10 | log "github.com/Sirupsen/logrus" 11 | "github.com/docker/engine-api/client" 12 | "github.com/projecteru/eru-metric/metric" 13 | "github.com/projecteru/eru-metric/statsd" 14 | ) 15 | 16 | func main() { 17 | var dockerAddr string 18 | var transferAddr string 19 | var debug bool 20 | flag.BoolVar(&debug, "DEBUG", false, "enable debug") 21 | flag.StringVar(&dockerAddr, "d", "tcp://192.168.99.100:2376", "docker daemon addr") 22 | flag.StringVar(&transferAddr, "t", "10.200.8.37:8433", "transfer addr") 23 | flag.Parse() 24 | if flag.NArg() < 1 { 25 | fmt.Println("need at least one container id") 26 | return 27 | } 28 | if debug { 29 | log.SetLevel(log.DebugLevel) 30 | } 31 | 32 | cli, _ := client.NewEnvClient() 33 | 34 | metric.SetGlobalSetting(cli, 2, 3, "vnbe", "eth0") 35 | client := statsd.CreateStatsDClient(transferAddr) 36 | ctx := context.Background() 37 | 38 | for i := 0; i < flag.NArg(); i++ { 39 | if c, err := cli.ContainerInspect(ctx, flag.Arg(i)); err != nil { 40 | fmt.Println(flag.Arg(i), err) 41 | continue 42 | } else { 43 | go start_watcher(client, c.ID, c.State.Pid) 44 | } 45 | } 46 | for { 47 | } 48 | } 49 | 50 | func start_watcher(client metric.Remote, cid string, pid int) { 51 | serv := metric.CreateMetric(time.Duration(5)*time.Second, client, "a.b", fmt.Sprintf("test_%s", cid[:7])) 52 | if err := serv.InitMetric(cid, pid); err != nil { 53 | fmt.Println("failed", err) 54 | return 55 | } 56 | 57 | t := time.NewTicker(serv.Step) 58 | defer t.Stop() 59 | fmt.Println("begin watch", cid) 60 | for { 61 | select { 62 | case now := <-t.C: 63 | go func() { 64 | if info, err := serv.UpdateStats(cid); err == nil { 65 | fmt.Println(info) 66 | rate := serv.CalcRate(info, now) 67 | serv.SaveLast(info) 68 | // for safe 69 | fmt.Println(rate) 70 | go serv.Send(rate) 71 | } 72 | }() 73 | case <-serv.Stop: 74 | return 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /falcon/falcon.go: -------------------------------------------------------------------------------- 1 | package falcon 2 | 3 | import ( 4 | "math" 5 | "net/rpc" 6 | "sync" 7 | "time" 8 | 9 | log "github.com/Sirupsen/logrus" 10 | "github.com/open-falcon/common/model" 11 | "github.com/toolkits/net" 12 | ) 13 | 14 | func CreateFalconClient(transfer string, timeout time.Duration) *FalconClient { 15 | return &FalconClient{ 16 | RpcServer: transfer, 17 | Timeout: timeout, 18 | } 19 | } 20 | 21 | type FalconClient struct { 22 | sync.Mutex 23 | rpcClient *rpc.Client 24 | RpcServer string 25 | Timeout time.Duration 26 | } 27 | 28 | func (self *FalconClient) Close() error { 29 | if self.rpcClient != nil { 30 | self.rpcClient.Close() 31 | self.rpcClient = nil 32 | } 33 | return nil 34 | } 35 | 36 | func (self *FalconClient) insureConn() error { 37 | if self.rpcClient != nil { 38 | return nil 39 | } 40 | 41 | var err error 42 | var retry int = 1 43 | 44 | for { 45 | if self.rpcClient != nil { 46 | return nil 47 | } 48 | 49 | self.rpcClient, err = net.JsonRpcClient("tcp", self.RpcServer, self.Timeout) 50 | if err == nil { 51 | return nil 52 | } 53 | 54 | log.Errorf("Metrics rpc dial fail %s", err) 55 | if retry > 5 { 56 | return err 57 | } 58 | 59 | time.Sleep(time.Duration(math.Pow(2.0, float64(retry))) * time.Second) 60 | retry++ 61 | } 62 | return nil 63 | } 64 | 65 | func (self *FalconClient) call(method string, args interface{}, reply interface{}) error { 66 | self.Lock() 67 | defer self.Unlock() 68 | 69 | if err := self.insureConn(); err != nil { 70 | return err 71 | } 72 | 73 | timeout := time.Duration(50 * time.Second) 74 | done := make(chan error) 75 | 76 | go func() { 77 | err := self.rpcClient.Call(method, args, reply) 78 | done <- err 79 | }() 80 | 81 | select { 82 | case <-time.After(timeout): 83 | log.Infof("Metrics rpc call timeout %s %s", self.rpcClient, self.RpcServer) 84 | self.Close() 85 | case err := <-done: 86 | if err != nil { 87 | self.Close() 88 | return err 89 | } 90 | } 91 | return nil 92 | } 93 | 94 | func (self *FalconClient) Send(data map[string]float64, endpoint, tag string, timestamp, step int64) error { 95 | metrics := []*model.MetricValue{} 96 | var metric *model.MetricValue 97 | for k, v := range data { 98 | metric = &model.MetricValue{ 99 | Endpoint: endpoint, 100 | Metric: k, 101 | Value: v, 102 | Step: step, 103 | Type: "GAUGE", 104 | Tags: tag, 105 | Timestamp: timestamp, 106 | } 107 | metrics = append(metrics, metric) 108 | } 109 | log.Debug(metrics) 110 | var resp model.TransferResponse 111 | if err := self.call("Transfer.Update", metrics, &resp); err != nil { 112 | return err 113 | } 114 | log.Debugf("%s %s %s", endpoint, timestamp, &resp) 115 | return nil 116 | } 117 | -------------------------------------------------------------------------------- /metric/defines.go: -------------------------------------------------------------------------------- 1 | package metric 2 | 3 | import ( 4 | "io" 5 | "os" 6 | "sync" 7 | "time" 8 | 9 | "golang.org/x/net/context" 10 | ) 11 | 12 | type DockerClient interface { 13 | ContainerStats(ctx context.Context, containerID string, stream bool) (io.ReadCloser, error) 14 | } 15 | 16 | type Remote interface { 17 | Send(data map[string]float64, endpoint, tag string, timestamp, step int64) error 18 | Close() error 19 | } 20 | 21 | type Metric struct { 22 | sync.Mutex 23 | Step time.Duration 24 | Client Remote 25 | Tag string 26 | Endpoint string 27 | 28 | statFile *os.File 29 | Last time.Time 30 | 31 | Stop chan bool 32 | Save map[string]uint64 33 | } 34 | 35 | type Setting struct { 36 | timeout time.Duration 37 | force time.Duration 38 | vlanPrefix string 39 | defaultVlan string 40 | client DockerClient 41 | } 42 | 43 | var g Setting 44 | -------------------------------------------------------------------------------- /metric/metric.go: -------------------------------------------------------------------------------- 1 | package metric 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io/ioutil" 7 | "os" 8 | "strings" 9 | "time" 10 | 11 | "github.com/docker/engine-api/types" 12 | "golang.org/x/net/context" 13 | 14 | log "github.com/Sirupsen/logrus" 15 | ) 16 | 17 | func SetGlobalSetting(client DockerClient, timeout, force time.Duration, vlanPrefix, defaultVlan string) { 18 | g = Setting{timeout, force, vlanPrefix, defaultVlan, client} 19 | } 20 | 21 | func CreateMetric(step time.Duration, client Remote, tag string, endpoint string) Metric { 22 | return Metric{ 23 | Step: step, 24 | Client: client, 25 | Tag: tag, 26 | Endpoint: endpoint, 27 | Stop: make(chan bool), 28 | } 29 | } 30 | 31 | func (self *Metric) InitMetric(cid string, pid int) (err error) { 32 | if self.statFile, err = os.Open(fmt.Sprintf("/proc/%d/net/dev", pid)); err != nil { 33 | return 34 | } 35 | var info map[string]uint64 36 | if info, err = self.UpdateStats(cid); err == nil { 37 | self.Last = time.Now() 38 | self.SaveLast(info) 39 | } 40 | return 41 | } 42 | 43 | func (self *Metric) Exit() { 44 | defer self.statFile.Close() 45 | self.Stop <- true 46 | close(self.Stop) 47 | } 48 | 49 | func (self *Metric) UpdateStats(cid string) (map[string]uint64, error) { 50 | info := map[string]uint64{} 51 | ctx := context.Background() 52 | resp, err := g.client.ContainerStats(ctx, cid, false) 53 | if err != nil { 54 | log.Errorf("Get stats failed %s %s", cid[:12], err) 55 | return info, err 56 | } 57 | defer resp.Close() 58 | data, err := ioutil.ReadAll(resp) 59 | if err != nil { 60 | log.Errorf("Read stats failed %s %s", cid[:12], err) 61 | return info, err 62 | } 63 | var stats types.StatsJSON 64 | if err := json.Unmarshal(data, &stats); err != nil { 65 | log.Errorf("Unmarshal stats failed %s %s", cid[:12], err) 66 | return info, err 67 | } 68 | 69 | info["cpu_user"] = stats.Stats.CPUStats.CPUUsage.UsageInUsermode 70 | info["cpu_system"] = stats.Stats.CPUStats.CPUUsage.UsageInKernelmode 71 | info["cpu_usage"] = stats.Stats.CPUStats.CPUUsage.TotalUsage 72 | //FIXME in container it will get all CPUStats 73 | info["mem_usage"] = stats.Stats.MemoryStats.Usage 74 | info["mem_max_usage"] = stats.Stats.MemoryStats.MaxUsage 75 | info["mem_rss"] = stats.Stats.MemoryStats.Stats["rss"] 76 | 77 | if err := self.getNetStats(info); err != nil { 78 | return info, err 79 | } 80 | return info, nil 81 | } 82 | 83 | func (self *Metric) SaveLast(info map[string]uint64) { 84 | self.Lock() 85 | defer self.Unlock() 86 | self.Save = map[string]uint64{} 87 | for k, d := range info { 88 | self.Save[k] = d 89 | } 90 | } 91 | 92 | func (self *Metric) CalcRate(info map[string]uint64, now time.Time) (rate map[string]float64) { 93 | rate = map[string]float64{} 94 | delta := now.Sub(self.Last) 95 | nano_t := float64(delta.Nanoseconds()) 96 | second_t := delta.Seconds() 97 | for k, d := range info { 98 | switch { 99 | case strings.HasPrefix(k, "cpu_") && d >= self.Save[k]: 100 | rate[fmt.Sprintf("%s_rate", k)] = float64(d-self.Save[k]) / nano_t 101 | case (strings.HasPrefix(k, g.vlanPrefix) || strings.HasPrefix(k, g.defaultVlan)) && d >= self.Save[k]: 102 | rate[fmt.Sprintf("%s.rate", k)] = float64(d-self.Save[k]) / second_t 103 | case strings.HasPrefix(k, "mem"): 104 | rate[k] = float64(d) 105 | } 106 | } 107 | self.Last = now 108 | return 109 | } 110 | 111 | func (self *Metric) Send(rate map[string]float64) error { 112 | step := int64(self.Step.Seconds()) 113 | timestamp := self.Last.Unix() 114 | return self.Client.Send(rate, self.Endpoint, self.Tag, timestamp, step) 115 | } 116 | -------------------------------------------------------------------------------- /metric/stats_darwin.go: -------------------------------------------------------------------------------- 1 | package metric 2 | 3 | func (self *Metric) getNetStats(info map[string]uint64) (err error) { 4 | return 5 | } 6 | -------------------------------------------------------------------------------- /metric/stats_linux.go: -------------------------------------------------------------------------------- 1 | package metric 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "strings" 7 | 8 | log "github.com/Sirupsen/logrus" 9 | ) 10 | 11 | func (self *Metric) getNetStats(result map[string]uint64) (err error) { 12 | s := bufio.NewScanner(self.statFile) 13 | defer self.statFile.Seek(0, 0) 14 | var d uint64 15 | for s.Scan() { 16 | var name string 17 | var n [8]uint64 18 | text := s.Text() 19 | if strings.Index(text, ":") < 1 { 20 | continue 21 | } 22 | ts := strings.Split(text, ":") 23 | fmt.Sscanf(ts[0], "%s", &name) 24 | if !strings.HasPrefix(name, g.vlanPrefix) && name != g.defaultVlan { 25 | continue 26 | } 27 | fmt.Sscanf(ts[1], 28 | "%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d", 29 | &n[0], &n[1], &n[2], &n[3], &d, &d, &d, &d, 30 | &n[4], &n[5], &n[6], &n[7], &d, &d, &d, &d, 31 | ) 32 | result[name+".inbytes"] = n[0] 33 | result[name+".inpackets"] = n[1] 34 | result[name+".inerrs"] = n[2] 35 | result[name+".indrop"] = n[3] 36 | result[name+".outbytes"] = n[4] 37 | result[name+".outpackets"] = n[5] 38 | result[name+".outerrs"] = n[6] 39 | result[name+".outdrop"] = n[7] 40 | } 41 | log.Debugf("Container net status %v", result) 42 | return 43 | } 44 | -------------------------------------------------------------------------------- /statsd/statsd.go: -------------------------------------------------------------------------------- 1 | package statsd 2 | 3 | import ( 4 | "fmt" 5 | 6 | statsdlib "github.com/CMGS/statsd" 7 | log "github.com/Sirupsen/logrus" 8 | ) 9 | 10 | func CreateStatsDClient(addr string) *StatsDClient { 11 | return &StatsDClient{ 12 | Addr: addr, 13 | } 14 | } 15 | 16 | type StatsDClient struct { 17 | Addr string 18 | } 19 | 20 | func (self *StatsDClient) Close() error { 21 | return nil 22 | } 23 | 24 | func (self *StatsDClient) Send(data map[string]float64, endpoint, tag string, timestamp, step int64) error { 25 | remote, err := statsdlib.New(self.Addr) 26 | if err != nil { 27 | log.Errorf("Connect statsd failed", err) 28 | return err 29 | } 30 | defer remote.Close() 31 | defer remote.Flush() 32 | for k, v := range data { 33 | key := fmt.Sprintf("eru.%s.%s.%s", endpoint, tag, k) 34 | remote.Gauge(key, v) 35 | } 36 | return nil 37 | } 38 | --------------------------------------------------------------------------------