├── .gitignore ├── Dockerfile ├── History.md ├── Makefile ├── Readme.md ├── Vagrantfile ├── main.go └── pkg ├── collector └── collector.go ├── cpu └── cpu.go ├── disk └── disk.go ├── memory └── memory.go └── resource └── resource.go /.gitignore: -------------------------------------------------------------------------------- 1 | .vagrant/ 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM google/golang-runtime -------------------------------------------------------------------------------- /History.md: -------------------------------------------------------------------------------- 1 | 2 | v0.2.0 / 2014-09-29 3 | ================== 4 | 5 | * add mounted fs discovery 6 | 7 | v0.1.0 / 2014-09-20 8 | ================== 9 | 10 | * change --cpu-interval to 5s 11 | 12 | v0.0.2 / 2014-09-10 13 | ================== 14 | 15 | * fix ticker memory leak 16 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | build: 3 | gox -os="linux darwin" -arch=amd64 4 | 5 | clean: 6 | git clean -fd 7 | 8 | .PHONY: clean -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | 2 | # System 3 | 4 | System statistics collector for statsd on linux written in Go . 5 | 6 | ## Usage 7 | 8 | ``` 9 | 10 | Usage: 11 | system-stats 12 | [--statsd-address addr] 13 | [--memory-interval i] 14 | [--disk-interval i] 15 | [--cpu-interval i] 16 | [--extended] 17 | [--name name] 18 | system-stats -h | --help 19 | system-stats --version 20 | 21 | Options: 22 | --statsd-address addr statsd address [default: :8125] 23 | --memory-interval i memory reporting interval [default: 10s] 24 | --disk-interval i disk reporting interval [default: 1m] 25 | --cpu-interval i cpu reporting interval [default: 2s] 26 | --name name node name defaulting to hostname [default: hostname] 27 | --extended output additional extended metrics 28 | -h, --help output help information 29 | -v, --version output version 30 | 31 | ```` 32 | 33 | ## Installation 34 | 35 | Via go-get: 36 | 37 | ``` 38 | $ go get github.com/statsd/system 39 | ``` 40 | 41 | Via binaries: 42 | 43 | Coming soon! 44 | 45 | ## Metrics 46 | 47 | Metrics are prefixed with the hostname (or `--name`), and 48 | namespaced by the resource, for example: 49 | 50 | ``` 51 | api-2.cpu.blocked:7|c 52 | api-2.cpu.running:4|c 53 | api-2.cpu.interrupts:19695796035|c 54 | api-2.cpu.percent:26|g 55 | api-2.cpu.switches:25195265352|c 56 | api-2.cpu.processes:20027|c 57 | api-2.cpu.blocked:7|c 58 | api-2.cpu.running:4|c 59 | api-2.cpu.interrupts:19695796035|c 60 | api-2.cpu.percent:26|g 61 | ... 62 | ``` 63 | 64 | Depending on the statd implementation that you use this 65 | may result in different outputs. For example with the 66 | [armon/statsite](https://github.com/armon/statsite) implementation 67 | this would result in `gauges.api-2.memory.free`. 68 | 69 | The `--extended` flag enables extended metrics per resource 70 | and are listed as __extended__ below. 71 | 72 | ### CPU 73 | 74 | - `cpu.percent` gauge 75 | - `cpu.switches` counter __extended__ 76 | - `cpu.interrupts` counter __extended__ 77 | - `cpu.blocked` counter __extended__ 78 | 79 | ### Memory 80 | 81 | Memory values are represented in bytes. 82 | 83 | - `memory.percent` gauge 84 | - `memory.used` gauge 85 | - `memory.active` gauge __extended__ 86 | - `memory.total` gauge __extended__ 87 | - `memory.free` gauge __extended__ 88 | - `memory.swap.percent` gauge 89 | - `memory.swap.total` gauge __extended__ 90 | - `memory.swap.free` gauge __extended__ 91 | 92 | ### Disk 93 | 94 | Disk values are represented in bytes. `` is the 95 | path the fs is mounted on (/, /data, etc). 96 | 97 | - `disk..percent` gauge 98 | - `disk..free` gauge 99 | - `disk..used` gauge 100 | 101 | ### IO 102 | 103 | Coming soon! 104 | 105 | ## Daemonization 106 | 107 | system-stats(1) doesn't support running as a daemon natively, you'll 108 | want to use upstart or similar for this. Add the following example 109 | upstart script to /etc/init/system-stats.conf: 110 | 111 | ``` 112 | respawn 113 | 114 | start on runlevel [2345] 115 | stop on runlevel [016] 116 | 117 | exec system-stats --statsd-address 10.0.0.214:5000 118 | ``` 119 | 120 | Then run `sudo start system-stats` and you're good to go! 121 | 122 | ## Debugging 123 | 124 | Run with `DEBUG=stats` to view the [go-debug](http://github.com/visionmedia/go-debug) output: 125 | 126 | ``` 127 | 2014-08-13 22:04:36 INFO - cpu: reporting 128 | 22:04:36.098 2s 2s statsd - vagrant-ubuntu-precise-64.cpu.switches:20384|c 129 | 22:04:36.098 4us 4us statsd - vagrant-ubuntu-precise-64.cpu.processes:0|c 130 | 22:04:36.098 3us 3us statsd - vagrant-ubuntu-precise-64.cpu.running:0|c 131 | 22:04:36.098 3us 3us statsd - vagrant-ubuntu-precise-64.cpu.interrupts:656|c 132 | 22:04:36.098 3us 3us statsd - vagrant-ubuntu-precise-64.cpu.percent:100|g 133 | 2014-08-13 22:04:38 INFO - cpu: reporting 134 | 22:04:38.098 2s 2s statsd - vagrant-ubuntu-precise-64.cpu.switches:24074|c 135 | 22:04:38.098 23us 13us statsd - vagrant-ubuntu-precise-64.cpu.processes:0|c 136 | 22:04:38.098 15us 8us statsd - vagrant-ubuntu-precise-64.cpu.running:1|c 137 | 22:04:38.098 12us 7us statsd - vagrant-ubuntu-precise-64.cpu.interrupts:638|c 138 | 22:04:38.099 11us 7us statsd - vagrant-ubuntu-precise-64.cpu.percent:100|g 139 | ``` 140 | 141 | # License 142 | 143 | MIT 144 | -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | Vagrant.configure("2") do |config| 2 | config.vm.box = "precise64" 3 | config.vm.box_url = "http://files.vagrantup.com/precise64.box" 4 | 5 | config.ssh.forward_agent = true 6 | config.vm.synced_folder ".", "/opt/go/src/github.com/statsd/system" 7 | 8 | config.vm.provision "shell", inline: <<-EOF 9 | set -e 10 | 11 | # System packages 12 | echo "Installing Base Packages" 13 | export DEBIAN_FRONTEND=noninteractive 14 | sudo apt-get update -qq 15 | sudo apt-get install -qqy --force-yes build-essential bzr git mercurial vim 16 | 17 | # Install Go 18 | GOVERSION="1.2" 19 | GOTARBALL="go${GOVERSION}.linux-amd64.tar.gz" 20 | export GOROOT=/usr/local/go 21 | export GOPATH=/opt/go 22 | export PATH=$PATH:$GOROOT/bin:$GOPATH/bin 23 | 24 | echo "Installing Go $GOVERSION" 25 | if [ ! $(which go) ]; then 26 | echo " Downloading $GOTARBALL" 27 | wget --quiet --directory-prefix=/tmp https://go.googlecode.com/files/$GOTARBALL 28 | 29 | echo " Extracting $GOTARBALL to $GOROOT" 30 | sudo tar -C /usr/local -xzf /tmp/$GOTARBALL 31 | 32 | echo " Configuring GOPATH" 33 | sudo mkdir -p $GOPATH/src $GOPATH/bin $GOPATH/pkg 34 | sudo chown -R vagrant $GOPATH 35 | 36 | echo " Configuring env vars" 37 | echo "export PATH=\$PATH:$GOROOT/bin:$GOPATH/bin" | sudo tee /etc/profile.d/golang.sh > /dev/null 38 | echo "export GOROOT=$GOROOT" | sudo tee --append /etc/profile.d/golang.sh > /dev/null 39 | echo "export GOPATH=$GOPATH" | sudo tee --append /etc/profile.d/golang.sh > /dev/null 40 | fi 41 | 42 | # Cleanup 43 | sudo apt-get autoremove 44 | 45 | echo "Provisioning complete" 46 | EOF 47 | end -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "github.com/statsd/system/pkg/collector" 4 | import "github.com/statsd/system/pkg/memory" 5 | import "github.com/statsd/client-namespace" 6 | import "github.com/statsd/system/pkg/disk" 7 | import "github.com/statsd/system/pkg/cpu" 8 | import . "github.com/tj/go-gracefully" 9 | import "github.com/segmentio/go-log" 10 | import "github.com/statsd/client" 11 | import "github.com/tj/docopt" 12 | import "time" 13 | import "os" 14 | 15 | const Version = "0.2.0" 16 | 17 | const Usage = ` 18 | Usage: 19 | system-stats 20 | [--statsd-address addr] 21 | [--memory-interval i] 22 | [--disk-interval i] 23 | [--cpu-interval i] 24 | [--extended] 25 | [--name name] 26 | system-stats -h | --help 27 | system-stats --version 28 | 29 | Options: 30 | --statsd-address addr statsd address [default: :8125] 31 | --memory-interval i memory reporting interval [default: 10s] 32 | --disk-interval i disk reporting interval [default: 30s] 33 | --cpu-interval i cpu reporting interval [default: 5s] 34 | --extended output additional extended metrics 35 | --name name node name defaulting to hostname [default: hostname] 36 | -h, --help output help information 37 | -v, --version output version 38 | ` 39 | 40 | func main() { 41 | args, err := docopt.Parse(Usage, nil, true, Version, false) 42 | log.Check(err) 43 | 44 | log.Info("starting system %s", Version) 45 | 46 | client, err := statsd.Dial(args["--statsd-address"].(string)) 47 | log.Check(err) 48 | 49 | extended := args["--extended"].(bool) 50 | 51 | name := args["--name"].(string) 52 | if "hostname" == name { 53 | host, err := os.Hostname() 54 | log.Check(err) 55 | name = host 56 | } 57 | 58 | c := collector.New(namespace.New(client, name)) 59 | c.Add(memory.New(interval(args, "--memory-interval"), extended)) 60 | c.Add(cpu.New(interval(args, "--cpu-interval"), extended)) 61 | c.Add(disk.New(interval(args, "--disk-interval"))) 62 | 63 | c.Start() 64 | Shutdown() 65 | c.Stop() 66 | } 67 | 68 | func interval(args map[string]interface{}, name string) time.Duration { 69 | d, err := time.ParseDuration(args[name].(string)) 70 | log.Check(err) 71 | return d 72 | } 73 | -------------------------------------------------------------------------------- /pkg/collector/collector.go: -------------------------------------------------------------------------------- 1 | // 2 | // The Collector handles starting and stopping of 3 | // all of the resources, and flushes stats on shutdown. 4 | // 5 | package collector 6 | 7 | import "github.com/statsd/system/pkg/resource" 8 | import "github.com/statsd/client-namespace" 9 | import "github.com/statsd/client-interface" 10 | import "github.com/segmentio/go-log" 11 | import "sync" 12 | 13 | // Collector. 14 | type Collector struct { 15 | Resources []resource.Resource 16 | client statsd.Client 17 | wg sync.WaitGroup 18 | } 19 | 20 | // New collector with the given statsd client. 21 | func New(client statsd.Client) *Collector { 22 | return &Collector{ 23 | client: client, 24 | } 25 | } 26 | 27 | // Start the collector with the resources 28 | // which have been provided. Each resource gets 29 | // its own prefixed statsd client. 30 | func (c *Collector) Start() error { 31 | log.Info("starting collector with %d resources", len(c.Resources)) 32 | 33 | for _, r := range c.Resources { 34 | log.Info("starting %s", r.Name()) 35 | c.wg.Add(1) 36 | err := r.Start(namespace.New(c.client, r.Name())) 37 | if err != nil { 38 | return err 39 | } 40 | } 41 | 42 | return nil 43 | } 44 | 45 | // Stop the resource collectors gracefully 46 | // and then flush all metrics. 47 | func (c *Collector) Stop() error { 48 | log.Info("stopping collector") 49 | 50 | for _, r := range c.Resources { 51 | go func(r resource.Resource) { 52 | log.Info("stopping %s", r.Name()) 53 | err := r.Stop() 54 | if err != nil { 55 | log.Error("failed to gracefully stop %s: %s", r.Name(), err) 56 | } 57 | c.wg.Done() 58 | }(r) 59 | } 60 | 61 | c.wg.Wait() 62 | 63 | log.Info("flushing stats") 64 | return c.client.Flush() 65 | } 66 | 67 | // Add the given resource for collection. 68 | func (c *Collector) Add(r resource.Resource) { 69 | c.Resources = append(c.Resources, r) 70 | } 71 | -------------------------------------------------------------------------------- /pkg/cpu/cpu.go: -------------------------------------------------------------------------------- 1 | // 2 | // CPU resource. 3 | // 4 | // This collector reports on the following stat metrics: 5 | // 6 | // - "switches" (counter) 7 | // - "interrupts" (counter) 8 | // - "running" (counter) 9 | // - "blocked" (counter) 10 | // - "usage" (gauge) 11 | // 12 | package cpu 13 | 14 | import "github.com/statsd/client-interface" 15 | import "github.com/c9s/goprocinfo/linux" 16 | import "github.com/segmentio/go-log" 17 | import "time" 18 | 19 | // CPU resource. 20 | type CPU struct { 21 | Path string 22 | Interval time.Duration 23 | Extended bool 24 | client statsd.Client 25 | exit chan struct{} 26 | } 27 | 28 | // New CPU resource. 29 | func New(interval time.Duration, extended bool) *CPU { 30 | return &CPU{ 31 | Path: "/proc/stat", 32 | Extended: extended, 33 | Interval: interval, 34 | exit: make(chan struct{}), 35 | } 36 | } 37 | 38 | // Name of the resource. 39 | func (c *CPU) Name() string { 40 | return "cpu" 41 | } 42 | 43 | // Start resource collection. 44 | func (c *CPU) Start(client statsd.Client) error { 45 | c.client = client 46 | go c.Report() 47 | return nil 48 | } 49 | 50 | // Report resource collection. 51 | func (c *CPU) Report() { 52 | var prevTotal, prevIdle uint64 53 | prev := new(linux.Stat) 54 | tick := time.Tick(c.Interval) 55 | 56 | for { 57 | select { 58 | case <-tick: 59 | stat, err := linux.ReadStat(c.Path) 60 | 61 | if err != nil { 62 | log.Error("cpu: %s", err) 63 | continue 64 | } 65 | 66 | c.client.Gauge("percent", int(percent(&prevIdle, &prevTotal, stat.CPUStatAll))) 67 | 68 | if c.Extended { 69 | c.client.IncrBy("blocked", int(stat.ProcsBlocked)) 70 | c.client.IncrBy("interrupts", int(stat.Interrupts-prev.Interrupts)) 71 | c.client.IncrBy("switches", int(stat.ContextSwitches-prev.ContextSwitches)) 72 | } 73 | 74 | prev = stat 75 | case <-c.exit: 76 | log.Info("cpu: exiting") 77 | return 78 | } 79 | } 80 | } 81 | 82 | // Stop resource collection. 83 | func (c *CPU) Stop() error { 84 | println("stopping cpu") 85 | close(c.exit) 86 | return nil 87 | } 88 | 89 | // calculate percentage from the previous read 90 | // and adjust the previous values. 91 | func percent(prevIdle, prevTotal *uint64, s linux.CPUStat) float64 { 92 | total, idle := totals(s) 93 | di := idle - *prevIdle 94 | dt := total - *prevTotal 95 | *prevIdle = idle 96 | *prevTotal = total 97 | return float64(dt-di) / float64(dt) * 100 98 | } 99 | 100 | // totals from jiffies. 101 | func totals(s linux.CPUStat) (uint64, uint64) { 102 | user := s.User - s.Guest 103 | usernice := s.Nice - s.GuestNice 104 | idle := s.Idle + s.IOWait 105 | system := s.System + s.IRQ + s.SoftIRQ 106 | virt := s.Guest + s.GuestNice 107 | total := user + usernice + system + idle + s.Steal + virt 108 | return total, idle 109 | } 110 | -------------------------------------------------------------------------------- /pkg/disk/disk.go: -------------------------------------------------------------------------------- 1 | // 2 | // Disk resource. 3 | // 4 | // This collector reports on the following stat metrics: 5 | // 6 | // - "disk.percent" (gauge) 7 | // - "disk.free" (gauge) 8 | // - "disk.used" (gauge) 9 | // 10 | package disk 11 | 12 | import "github.com/statsd/client-interface" 13 | import "github.com/deniswernert/go-fstab" 14 | import "github.com/c9s/goprocinfo/linux" 15 | import "github.com/segmentio/go-log" 16 | import "time" 17 | 18 | // Disk resource. 19 | type Disk struct { 20 | Interval time.Duration 21 | client statsd.Client 22 | exit chan struct{} 23 | } 24 | 25 | // New disk resource. 26 | func New(interval time.Duration) *Disk { 27 | return &Disk{ 28 | Interval: interval, 29 | exit: make(chan struct{}), 30 | } 31 | } 32 | 33 | // Name of resource. 34 | func (d *Disk) Name() string { 35 | return "disk" 36 | } 37 | 38 | // Start resource collection. 39 | func (d *Disk) Start(client statsd.Client) error { 40 | d.client = client 41 | go d.Report() 42 | return nil 43 | } 44 | 45 | // paths returns the mount-point paths. 46 | func (d *Disk) paths() ([]string, error) { 47 | mounts, err := fstab.ParseSystem() 48 | if err != nil { 49 | return nil, err 50 | } 51 | 52 | paths := []string{} 53 | for _, mount := range mounts { 54 | paths = append(paths, mount.File) 55 | } 56 | 57 | return paths, nil 58 | } 59 | 60 | // Report resources. 61 | func (d *Disk) Report() { 62 | tick := time.Tick(d.Interval) 63 | 64 | paths, err := d.paths() 65 | if err != nil { 66 | log.Error("disk: failed to read fstab: %s", err) 67 | log.Error("disk: will not report") 68 | return 69 | } 70 | 71 | log.Info("disk: discovered %v", paths) 72 | 73 | for { 74 | select { 75 | case <-tick: 76 | for _, path := range paths { 77 | stat, err := linux.ReadDisk(path) 78 | 79 | if err != nil { 80 | log.Error("disk: %s %s", path, err) 81 | continue 82 | } 83 | 84 | d.client.Gauge(path+".percent", int(percent(stat.Used, stat.All))) 85 | d.client.Gauge(path+".free", int(stat.Free)) 86 | d.client.Gauge(path+".used", int(stat.Used)) 87 | } 88 | 89 | case <-d.exit: 90 | log.Info("disk: exiting") 91 | return 92 | } 93 | } 94 | } 95 | 96 | // Stop resource collection. 97 | func (d *Disk) Stop() error { 98 | println("stopping disk") 99 | return nil 100 | } 101 | 102 | // calculate percentage. 103 | func percent(a, b uint64) uint64 { 104 | return uint64(float64(a) / float64(b) * 100) 105 | } 106 | -------------------------------------------------------------------------------- /pkg/memory/memory.go: -------------------------------------------------------------------------------- 1 | // 2 | // Memory resource. 3 | // 4 | // This collector reports on the following meminfo metrics: 5 | // 6 | // - "percent" (gauge) 7 | // - "active" (gauge) 8 | // - "total" (gauge) 9 | // - "free" (gauge) 10 | // - "swap.percent" (gauge) 11 | // - "swap.total" (gauge) 12 | // - "swap.free" (gauge) 13 | // 14 | package memory 15 | 16 | import "github.com/statsd/client-interface" 17 | import "github.com/c9s/goprocinfo/linux" 18 | import "github.com/segmentio/go-log" 19 | import "math" 20 | import "time" 21 | 22 | // Memory resource. 23 | type Memory struct { 24 | Path string 25 | Interval time.Duration 26 | Extended bool 27 | client statsd.Client 28 | exit chan struct{} 29 | } 30 | 31 | // New memory resource. 32 | func New(interval time.Duration, extended bool) *Memory { 33 | return &Memory{ 34 | Path: "/proc/meminfo", 35 | Extended: extended, 36 | Interval: interval, 37 | exit: make(chan struct{}), 38 | } 39 | } 40 | 41 | // Name of the resource. 42 | func (m *Memory) Name() string { 43 | return "memory" 44 | } 45 | 46 | // Start resource collection. 47 | func (m *Memory) Start(client statsd.Client) error { 48 | m.client = client 49 | go m.Report() 50 | return nil 51 | } 52 | 53 | // Report resource. 54 | func (m *Memory) Report() { 55 | tick := time.Tick(m.Interval) 56 | for { 57 | select { 58 | case <-tick: 59 | stat, err := linux.ReadMemInfo(m.Path) 60 | 61 | if err != nil { 62 | log.Error("memory: %s", err) 63 | continue 64 | } 65 | 66 | m.client.Gauge("percent", percent(stat)) 67 | m.client.Gauge("swap.percent", swapPercent(stat)) 68 | 69 | if m.Extended { 70 | m.client.Gauge("total", bytes(stat["MemTotal"])) 71 | m.client.Gauge("used", bytes(used(stat))) 72 | m.client.Gauge("free", bytes(stat["MemFree"])) 73 | m.client.Gauge("active", bytes(stat["Active"])) 74 | m.client.Gauge("swap.total", bytes(stat["SwapTotal"])) 75 | m.client.Gauge("swap.free", bytes(stat["SwapFree"])) 76 | } 77 | 78 | case <-m.exit: 79 | log.Info("mem: exiting") 80 | return 81 | } 82 | } 83 | } 84 | 85 | // Stop resource collection. 86 | func (m *Memory) Stop() error { 87 | close(m.exit) 88 | return nil 89 | } 90 | 91 | // calculate swap percentage. 92 | func swapPercent(s linux.MemInfo) int { 93 | total := s["SwapTotal"] 94 | used := total - s["SwapFree"] 95 | p := float64(used) / float64(total) * 100 96 | 97 | if math.IsNaN(p) { 98 | return 0 99 | } 100 | 101 | return int(p) 102 | } 103 | 104 | // calculate percentage. 105 | func percent(s linux.MemInfo) int { 106 | total := s["MemTotal"] 107 | p := float64(used(s)) / float64(total) * 100 108 | 109 | if math.IsNaN(p) { 110 | return 0 111 | } 112 | 113 | return int(p) 114 | } 115 | 116 | // used memory. 117 | func used(s linux.MemInfo) uint64 { 118 | return s["MemTotal"] - s["MemFree"] - s["Buffers"] - s["Cached"] 119 | } 120 | 121 | // convert to bytes. 122 | func bytes(n uint64) int { 123 | return int(n * 1000) 124 | } 125 | -------------------------------------------------------------------------------- /pkg/resource/resource.go: -------------------------------------------------------------------------------- 1 | package resource 2 | 3 | import "github.com/statsd/client-interface" 4 | 5 | type Resource interface { 6 | Name() string 7 | Start(statsd.Client) error 8 | Stop() error 9 | } 10 | --------------------------------------------------------------------------------