├── .github └── workflows │ └── go.yml ├── .gitignore ├── .goreleaser.yml ├── Dockerfile ├── LICENSE.md ├── Makefile ├── README.md ├── go.mod ├── go.sum ├── main.go ├── metrics.go ├── poller.go └── zookeeper.go /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | on: [push] 3 | jobs: 4 | 5 | build: 6 | name: Build 7 | runs-on: ubuntu-latest 8 | steps: 9 | 10 | - name: Set up Go 1.12 11 | uses: actions/setup-go@v1 12 | with: 13 | go-version: 1.12 14 | id: go 15 | 16 | - name: Check out code into the Go module directory 17 | uses: actions/checkout@v1 18 | 19 | - name: Get dependencies 20 | run: | 21 | go get -v -t -d ./... 22 | if [ -f Gopkg.toml ]; then 23 | curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh 24 | dep ensure 25 | fi 26 | 27 | - name: Build 28 | run: go build -v . 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | zk-exporter 2 | dist/ 3 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | builds: 2 | - main: . 3 | binary: zk-exporter 4 | goos: 5 | - windows 6 | - darwin 7 | - linux 8 | goarch: 9 | - amd64 10 | dockers: 11 | - image: lucianjon/prometheus-zk-exporter 12 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.5 2 | 3 | ADD zk-exporter /zk-exporter 4 | 5 | EXPOSE 9120 6 | 7 | ENTRYPOINT ["/zk-exporter"] -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2017 Lucian Jones 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | export SHELL:=/usr/bin/env bash -O extglob -c 2 | export GO15VENDOREXPERIMENT:=1 3 | export OS=$(shell uname | tr '[:upper:]' '[:lower:]') 4 | 5 | build: GOOS ?= ${OS} 6 | build: GOARCH ?= amd64 7 | build: test 8 | rm -f zk-exporter 9 | GOOS=${GOOS} GOARCH=${GOARCH} go build -ldflags "-X main.buildTime=`date --iso-8601=s` -X main.buildVersion=`git rev-parse HEAD | cut -c-7`" -o zk-exporter . 10 | 11 | release-linux: 12 | GOOS=linux $(MAKE) build 13 | tar Jcf zk-exporter-`git describe --abbrev=0 --tags`-linux-amd64.txz zk-exporter 14 | 15 | release-darwin: 16 | GOOS=darwin $(MAKE) build 17 | tar Jcf zk-exporter-`git describe --abbrev=0 --tags`-darwin-amd64.txz zk-exporter 18 | 19 | release: clean release-linux release-darwin 20 | 21 | test: 22 | go test -v 23 | 24 | clean: 25 | rm -f zk-exporter 26 | rm -f zk-exporter-*.txz 27 | 28 | run: build 29 | ./zk-exporter 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Prometheus ZooKeeper Exporter [![Build Status](https://travis-ci.org/lucianjon/zk-exporter.svg?branch=master)](https://travis-ci.org/lucianjon/zk-exporter) 2 | 3 | The exporter peridically scrapes Zookeeper metrics via four letter word commands, see https://zookeeper.apache.org/doc/trunk/zookeeperAdmin.html#The+Four+Letter+Words. Currently exposes metrics from the "mntr" and "ruok" (requires ZooKeeper 3.4.0 or above) 4 | 5 | These are parsed into prometheus metrics and served on an endpoint at `/metrics` 6 | 7 | Still a WIP 8 | 9 | ## Installation 10 | 11 | Currently requires go for installation. Binaries and a docker image will be coming. 12 | 13 | To install the latest version run: 14 | 15 | ``` 16 | go get -u github.com/lucianjon/zk-exporter 17 | ``` 18 | 19 | Note: you should have $GOPATH/bin added to your $PATH 20 | 21 | ## Usage 22 | 23 | Once installed run: 24 | 25 | ``` 26 | zk-exporter -port -servers -pollinterval 27 | ``` 28 | The ZooKeeper servers are a string in the `host:port,host2:port2` format. 29 | The pollinterval is a go time.Duration value, eg: `30s` 30 | 31 | ## Getting Started 32 | 33 | * By default metrics are exposed on `0.0.0.0:9120/metrics` 34 | 35 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/lucianjon/zk-exporter 2 | 3 | go 1.12 4 | 5 | require ( 6 | github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a // indirect 7 | github.com/golang/protobuf v0.0.0-20170920220647-130e6b02ab05 // indirect 8 | github.com/matttproud/golang_protobuf_extensions v1.0.0 // indirect 9 | github.com/prometheus/client_golang v0.8.0 10 | github.com/prometheus/client_model v0.0.0-20170216185247-6f3806018612 // indirect 11 | github.com/prometheus/common v0.0.0-20170908161822-2f17f4a9d485 // indirect 12 | github.com/prometheus/procfs v0.0.0-20170703101242-e645f4e5aaa8 // indirect 13 | golang.org/x/sync v0.0.0-20190423024810-112230192c58 // indirect 14 | ) 15 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a h1:BtpsbiV638WQZwhA98cEZw2BsbnQJrbd0BI7tsy0W1c= 2 | github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= 3 | github.com/golang/protobuf v0.0.0-20170920220647-130e6b02ab05 h1:Kesru7U6Mhpf/x7rthxAKnr586VFmoE2NdEvkOKvfjg= 4 | github.com/golang/protobuf v0.0.0-20170920220647-130e6b02ab05/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 5 | github.com/matttproud/golang_protobuf_extensions v1.0.0 h1:YNOwxxSJzSUARoD9KRZLzM9Y858MNGCOACTvCW9TSAc= 6 | github.com/matttproud/golang_protobuf_extensions v1.0.0/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= 7 | github.com/prometheus/client_golang v0.8.0 h1:1921Yw9Gc3iSc4VQh3PIoOqgPCZS7G/4xQNVUp8Mda8= 8 | github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= 9 | github.com/prometheus/client_model v0.0.0-20170216185247-6f3806018612 h1:13pIdM2tpaDi4OVe24fgoIS7ZTqMt0QI+bwQsX5hq+g= 10 | github.com/prometheus/client_model v0.0.0-20170216185247-6f3806018612/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= 11 | github.com/prometheus/common v0.0.0-20170908161822-2f17f4a9d485 h1:ELypU1kBAPEzqcj8hphDyZWTJw5TIFgepXU983BEkD0= 12 | github.com/prometheus/common v0.0.0-20170908161822-2f17f4a9d485/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= 13 | github.com/prometheus/procfs v0.0.0-20170703101242-e645f4e5aaa8 h1:Kh7M6mzRpQ2de1rixoSQZr4BTINXFm8WDbeN5ttnwyE= 14 | github.com/prometheus/procfs v0.0.0-20170703101242-e645f4e5aaa8/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= 15 | golang.org/x/sync v0.0.0-20190423024810-112230192c58 h1:8gQV6CLnAEikrhgkHFbMAEhagSSnXWGV915qUMm9mrU= 16 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 17 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "flag" 6 | "fmt" 7 | "log" 8 | "net/http" 9 | "os" 10 | "os/signal" 11 | "strings" 12 | "syscall" 13 | 14 | "time" 15 | 16 | "github.com/prometheus/client_golang/prometheus/promhttp" 17 | ) 18 | 19 | var ( 20 | port int 21 | servers string 22 | pollInterval time.Duration 23 | listenaddr string 24 | ) 25 | 26 | func init() { 27 | flag.IntVar(&port, "port", 9120, "The port to serve the endpoint from.") 28 | flag.StringVar(&listenaddr, "listenaddr", "", "Address to listen on") 29 | flag.StringVar(&servers, "servers", "", "Comma separated list of zk servers in the format host:port") 30 | flag.DurationVar(&pollInterval, "pollinterval", 10*time.Second, "How often to poll zookeeper for metrics.") 31 | flag.Parse() 32 | } 33 | 34 | func main() { 35 | ss := strings.Split(servers, ",") 36 | if servers == "" || len(ss) == 0 { 37 | log.Fatal("main: at least one zookeeper server is required") 38 | } 39 | 40 | metrics := initMetrics() 41 | 42 | for _, server := range ss { 43 | p := newPoller(pollInterval, metrics, newZooKeeper(server)) 44 | go p.pollForMetrics() 45 | } 46 | 47 | sigs := make(chan os.Signal, 1) 48 | signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) 49 | 50 | mux := http.NewServeMux() 51 | mux.Handle("/metrics", promhttp.Handler()) 52 | 53 | srv := &http.Server{ 54 | Addr: fmt.Sprintf("%v:%v", listenaddr, port), 55 | Handler: mux, 56 | ReadTimeout: 30 * time.Second, 57 | WriteTimeout: 30 * time.Second, 58 | } 59 | 60 | go func() { 61 | _ = <-sigs 62 | log.Println("main: received SIGINT or SIGTERM, shutting down") 63 | context, cancel := context.WithTimeout(context.Background(), time.Second) 64 | defer cancel() 65 | if err := srv.Shutdown(context); err != nil { 66 | log.Printf("main: failed to shutdown endpoint with err=%#v\n", err) 67 | } 68 | }() 69 | 70 | if err := srv.ListenAndServe(); err != http.ErrServerClosed { 71 | log.Printf("main: failure while serving endpoint, err=%#v\n", err) 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /metrics.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "github.com/prometheus/client_golang/prometheus" 4 | 5 | type serverState float64 6 | 7 | const ( 8 | zkAvgLatency = "zk_avg_latency" 9 | zkMinLatency = "zk_min_latency" 10 | zkMaxLatency = "zk_max_latency" 11 | zkPacketsReceived = "zk_packets_received" 12 | zkPacketsSent = "zk_packets_sent" 13 | zkNumAliveConnections = "zk_num_alive_connections" 14 | zkOutstandingRequests = "zk_outstanding_requests" 15 | zkZnodeCount = "zk_znode_count" 16 | zkWatchCount = "zk_watch_count" 17 | zkEphemeralsCount = "zk_ephemerals_count" 18 | zkApproximateDataSize = "zk_approximate_data_size" 19 | zkOpenFileDescriptorCount = "zk_open_file_descriptor_count" 20 | zkMaxFileDescriptorCount = "zk_max_file_descriptor_count" 21 | zkFollowers = "zk_followers" 22 | zkSyncedFollowers = "zk_synced_followers" 23 | zkPendingSyncs = "zk_pending_syncs" 24 | zkServerState = "zk_server_state" 25 | 26 | zkOK = "zk_ok" 27 | 28 | // server states 29 | unknown serverState = -1 30 | follower serverState = 1 31 | leader serverState = 2 32 | standalone serverState = 3 33 | ) 34 | 35 | func getState(s string) serverState { 36 | switch s { 37 | case "follower": 38 | return follower 39 | case "leader": 40 | return leader 41 | case "standalone": 42 | return standalone 43 | default: 44 | return unknown 45 | } 46 | } 47 | 48 | func initMetrics() map[string]*prometheus.GaugeVec { 49 | 50 | metrics := make(map[string]*prometheus.GaugeVec) 51 | 52 | metrics[zkAvgLatency] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 53 | Name: zkAvgLatency, 54 | Help: "Average Latency for ZooKeeper network requests.", 55 | }, []string{"zk_instance"}) 56 | 57 | metrics[zkMinLatency] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 58 | Name: zkMinLatency, 59 | Help: "Minimum latency for Zookeeper network requests.", 60 | }, []string{"zk_instance"}) 61 | 62 | metrics[zkMaxLatency] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 63 | Name: zkMaxLatency, 64 | Help: "Maximum latency for ZooKeeper network requests", 65 | }, []string{"zk_instance"}) 66 | 67 | metrics[zkPacketsReceived] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 68 | Name: zkPacketsReceived, 69 | Help: "Number of network packets received by the ZooKeeper instance.", 70 | }, []string{"zk_instance"}) 71 | 72 | metrics[zkPacketsSent] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 73 | Name: zkPacketsSent, 74 | Help: "Number of network packets sent by the ZooKeeper instance.", 75 | }, []string{"zk_instance"}) 76 | 77 | metrics[zkNumAliveConnections] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 78 | Name: zkNumAliveConnections, 79 | Help: "Number of currently alive connections to the ZooKeeper instance.", 80 | }, []string{"zk_instance"}) 81 | 82 | metrics[zkOutstandingRequests] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 83 | Name: zkOutstandingRequests, 84 | Help: "Number of requests currently waiting in the queue.", 85 | }, []string{"zk_instance"}) 86 | 87 | metrics[zkZnodeCount] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 88 | Name: zkZnodeCount, 89 | Help: "Znode count", 90 | }, []string{"zk_instance"}) 91 | 92 | metrics[zkWatchCount] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 93 | Name: zkWatchCount, 94 | Help: "Watch count", 95 | }, []string{"zk_instance"}) 96 | 97 | metrics[zkEphemeralsCount] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 98 | Name: zkEphemeralsCount, 99 | Help: "Ephemerals Count", 100 | }, []string{"zk_instance"}) 101 | 102 | metrics[zkApproximateDataSize] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 103 | Name: zkApproximateDataSize, 104 | Help: "Approximate data size", 105 | }, []string{"zk_instance"}) 106 | 107 | metrics[zkOpenFileDescriptorCount] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 108 | Name: zkOpenFileDescriptorCount, 109 | Help: "Number of currently open file descriptors", 110 | }, []string{"zk_instance"}) 111 | 112 | metrics[zkMaxFileDescriptorCount] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 113 | Name: zkMaxFileDescriptorCount, 114 | Help: "Maximum number of open file descriptors", 115 | }, []string{"zk_instance"}) 116 | 117 | metrics[zkServerState] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 118 | Name: zkServerState, 119 | Help: "Current state of the zk instance: 1 = follower, 2 = leader, 3 = standalone, -1 if unknown", 120 | }, []string{"zk_instance"}) 121 | 122 | metrics[zkFollowers] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 123 | Name: zkFollowers, 124 | Help: "Leader only: number of followers.", 125 | }, []string{"zk_instance"}) 126 | 127 | metrics[zkSyncedFollowers] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 128 | Name: zkSyncedFollowers, 129 | Help: "Leader only: number of followers currenty in sync", 130 | }, []string{"zk_instance"}) 131 | 132 | metrics[zkPendingSyncs] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 133 | Name: zkPendingSyncs, 134 | Help: "Current number of pending syncs", 135 | }, []string{"zk_instance"}) 136 | 137 | metrics[zkOK] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 138 | Name: zkOK, 139 | Help: "Is ZooKeeper currently OK", 140 | }, []string{"zk_instance"}) 141 | 142 | for _, metric := range metrics { 143 | prometheus.MustRegister(metric) 144 | } 145 | 146 | return metrics 147 | } 148 | -------------------------------------------------------------------------------- /poller.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "strconv" 6 | "time" 7 | 8 | "github.com/prometheus/client_golang/prometheus" 9 | ) 10 | 11 | type zkPoller struct { 12 | interval time.Duration 13 | metrics map[string]*prometheus.GaugeVec 14 | zk *zooKeeper 15 | } 16 | 17 | func newPoller(interval time.Duration, metrics map[string]*prometheus.GaugeVec, zk *zooKeeper) *zkPoller { 18 | return &zkPoller{ 19 | interval: interval, 20 | metrics: metrics, 21 | zk: zk, 22 | } 23 | } 24 | 25 | func (p *zkPoller) pollForMetrics() { 26 | for { 27 | log.Printf("poller: polling zookeeper [%v] for metrics\n", p.zk.addr) 28 | m, err := p.zk.fetchStats() 29 | if err != nil { 30 | log.Printf("poller: failed to fetch stats, err=%v\n", err) 31 | } 32 | p.refreshMetrics(m) 33 | <-time.After(p.interval) 34 | } 35 | } 36 | 37 | func (p *zkPoller) refreshMetrics(updated map[string]string) { 38 | for name, value := range updated { 39 | metric, ok := p.metrics[name] 40 | 41 | if !ok { 42 | log.Printf("poller: couldn't find metric for stat=%v\n", name) 43 | continue 44 | } 45 | 46 | if name == zkOK { 47 | switch value { 48 | case "imok": 49 | metric.WithLabelValues(p.zk.addr).Set(1) 50 | default: 51 | metric.WithLabelValues(p.zk.addr).Set(0) 52 | } 53 | continue 54 | } 55 | 56 | if name == zkServerState { 57 | state := getState(value) 58 | metric.WithLabelValues(p.zk.addr).Set(float64(state)) 59 | continue 60 | } 61 | 62 | f, err := strconv.ParseFloat(value, 64) 63 | if err != nil { 64 | log.Printf("poller: failed to convert string value to float, value=%v\n", value) 65 | continue 66 | } 67 | 68 | metric.WithLabelValues(p.zk.addr).Set(f) 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /zookeeper.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "fmt" 7 | "io" 8 | "log" 9 | "net" 10 | "strings" 11 | ) 12 | 13 | const ( 14 | monitorCMD = "mntr" 15 | okCMD = "ruok" 16 | ) 17 | 18 | type zooKeeper struct { 19 | addr string 20 | } 21 | 22 | func newZooKeeper(addr string) *zooKeeper { 23 | return &zooKeeper{addr: addr} 24 | } 25 | 26 | func (zk *zooKeeper) fetchStats() (map[string]string, error) { 27 | stats, err := zk.fetchMntrStats() 28 | if err != nil { 29 | return stats, err 30 | } 31 | 32 | isOK, err := zk.fetchOKStat() 33 | if err != nil { 34 | return stats, err 35 | } 36 | 37 | stats[zkOK] = isOK 38 | return stats, nil 39 | } 40 | 41 | func (zk *zooKeeper) fetchMntrStats() (map[string]string, error) { 42 | stats := make(map[string]string) 43 | byts, err := zk.sendCommand(monitorCMD) 44 | if err != nil { 45 | return stats, err 46 | } 47 | scanner := bufio.NewScanner(bytes.NewReader(byts)) 48 | for scanner.Scan() { 49 | splits := strings.Split(scanner.Text(), "\t") 50 | if splits[0] == "zk_version" { 51 | continue 52 | } 53 | if len(splits) != 2 { 54 | log.Printf("zookeeper: expected a key value pair separated by a tab, got [%v]\n", splits) 55 | continue 56 | } 57 | stats[splits[0]] = splits[1] 58 | } 59 | return stats, nil 60 | } 61 | 62 | func (zk *zooKeeper) fetchOKStat() (string, error) { 63 | byts, err := zk.sendCommand(okCMD) 64 | return string(byts), err 65 | } 66 | 67 | func (zk *zooKeeper) sendCommand(cmd string) ([]byte, error) { 68 | conn, err := net.Dial("tcp", zk.addr) 69 | if err != nil { 70 | return nil, fmt.Errorf("zookeeper: dial failed, err=%#v", err) 71 | } 72 | 73 | defer func() { 74 | if err := conn.Close(); err != nil { 75 | fmt.Printf("Failed to close connection, err=%#v\n", err) 76 | } 77 | }() 78 | 79 | if _, err = fmt.Fprintf(conn, fmt.Sprintf("%s\n", cmd)); err != nil { 80 | return nil, fmt.Errorf("zookeeper: command send failed, err=%#v", err) 81 | } 82 | 83 | var buf bytes.Buffer 84 | if _, err = io.Copy(&buf, conn); err != nil { 85 | return nil, fmt.Errorf("zookeeper: fetch response failed, err=%#v", err) 86 | } 87 | 88 | return buf.Bytes(), nil 89 | } 90 | --------------------------------------------------------------------------------