├── .github └── workflows │ └── release.yml ├── Dockerfile ├── LICENSE ├── README.md ├── build.sh ├── conf ├── prometheus.yml ├── zoo.cfg.1 ├── zoo.cfg.2 └── zoo.cfg.3 ├── docker-compose.yml ├── go.mod ├── main.go ├── no-serving-leader.yml ├── switch-leaderserves.sh └── zoo-timeout ├── Dockerfile └── custom-entrypoint.sh /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: create release and upload binary 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v[0-9]+.[0-9]+.[0-9]+' 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: install go 13 | uses: actions/setup-go@v1 14 | with: 15 | go-version: 1.13.x 16 | 17 | - name: checkout 18 | uses: actions/checkout@v1 19 | 20 | - name: get version 21 | id: v 22 | run: echo ::set-output name=tag::$( awk -F '/' '{print $NF}' <<< "${GITHUB_REF}" ) 23 | 24 | - name: build 25 | run: | 26 | for OS in linux darwin; do 27 | echo "building binary for ${OS}" 28 | GOOS=${OS} GOARCH=amd64 go build -v -o zookeeper-exporter 29 | tar -czvf zookeeper-exporter-${{ steps.v.outputs.tag }}-${OS}.tar.gz --transform "s,^,zookeeper-exporter-${{ steps.v.outputs.tag }}-${OS}/," zookeeper-exporter 30 | done 31 | ls -lh 32 | 33 | - name: create release 34 | id: create-release 35 | uses: actions/create-release@v1 36 | env: 37 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 38 | with: 39 | tag_name: ${{ steps.v.outputs.tag }} 40 | release_name: ${{ steps.v.outputs.tag }} 41 | draft: false 42 | prerelease: false 43 | 44 | - name: upload linux binary 45 | uses: actions/upload-release-asset@v1.0.1 46 | env: 47 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 48 | with: 49 | upload_url: ${{ steps.create-release.outputs.upload_url }} 50 | asset_path: zookeeper-exporter-${{ steps.v.outputs.tag }}-linux.tar.gz 51 | asset_name: zookeeper-exporter-${{ steps.v.outputs.tag }}-linux.tar.gz 52 | asset_content_type: application/gzip 53 | 54 | - name: upload darwin binary 55 | uses: actions/upload-release-asset@v1.0.1 56 | env: 57 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 58 | with: 59 | upload_url: ${{ steps.create-release.outputs.upload_url }} 60 | asset_path: zookeeper-exporter-${{ steps.v.outputs.tag }}-darwin.tar.gz 61 | asset_name: zookeeper-exporter-${{ steps.v.outputs.tag }}-darwin.tar.gz 62 | asset_content_type: application/gzip -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.14-alpine as builder 2 | WORKDIR /usr/src/zookeeper-exporter 3 | COPY . /usr/src/zookeeper-exporter 4 | RUN go build -v 5 | 6 | FROM alpine:3.11 7 | COPY --from=builder /usr/src/zookeeper-exporter/zookeeper-exporter /usr/local/bin/zookeeper-exporter 8 | ENTRYPOINT ["/usr/local/bin/zookeeper-exporter"] 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Dmitry Andronov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Prometheus zookeeper exporter 2 | 3 | Exports `mntr` zookeeper's stats in prometheus format. 4 | `zk_followers`, `zk_synced_followers` and `zk_pending_syncs` metrics are available only on cluster leader. 5 | 6 | #### Build 7 | 8 | `./build.sh` script builds `dabealu/zookeeper-exporter:latest` docker image. 9 | To build image with different name, pass it to `build.sh` as a first arg. 10 | 11 | #### Usage 12 | 13 | **Note:** starting from zookeeper v3.4.10 it's required to have `mntr` command whitelisted (details: [4lw.commands.whitelist](https://zookeeper.apache.org/doc/current/zookeeperAdmin.html)). 14 | 15 | **Warning:** flag to specify target zk hosts is changed since `v0.1.10`, see below 16 | 17 | ``` 18 | Usage of zookeeper-exporter: 19 | -listen string 20 | address to listen on (default "0.0.0.0:9141") 21 | -location string 22 | metrics location (default "/metrics") 23 | -timeout int 24 | timeout for connection to zk servers, in seconds (default 30) 25 | -zk-hosts string 26 | comma separated list of zk servers, e.g. '10.0.0.1:2181,10.0.0.2:2181,10.0.0.3:2181' 27 | -zk-tls-auth bool 28 | zk tls client authentication (default false) 29 | -zk-tls-auth-cert string 30 | tls certiticate for zk tls client authentication (required if -zk-tls-auth is true) 31 | -zk-tls-auth-key string 32 | tls key for zk tls client authentication (required if -zk-tls-auth is true) 33 | ``` 34 | 35 | An example `docker-compose.yml` can be used for management of clustered zookeeper + exporters: 36 | 37 | ``` 38 | # start zk cluster and exporters 39 | docker-compose up -d 40 | 41 | # get metrics of first exporter (second and third exporters are on 9142 and 9143 ports) 42 | curl -s localhost:9141/metrics 43 | 44 | # at 9184 port there's exporter which handles multiple zk hosts 45 | curl -s localhost:9144/metrics 46 | 47 | # shutdown containers 48 | docker-compose down -v 49 | ``` 50 | 51 | #### Dashboard 52 | 53 | Example grafana dashboard: https://grafana.com/grafana/dashboards/11442 54 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | docker build -t ${1:-'dabealu/zookeeper-exporter:latest'} . -------------------------------------------------------------------------------- /conf/prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 5s 3 | evaluation_interval: 15s 4 | 5 | alerting: 6 | alertmanagers: 7 | - static_configs: 8 | - targets: 9 | 10 | rule_files: 11 | 12 | scrape_configs: 13 | - job_name: 'exp1' 14 | static_configs: 15 | - targets: ['exp1:9141'] 16 | - job_name: 'exp2' 17 | static_configs: 18 | - targets: ['exp2:9141'] 19 | - job_name: 'exp3' 20 | static_configs: 21 | - targets: ['exp3:9141'] 22 | - job_name: 'exp123' 23 | static_configs: 24 | - targets: ['exp123:9141'] 25 | -------------------------------------------------------------------------------- /conf/zoo.cfg.1: -------------------------------------------------------------------------------- 1 | clientPort=2181 2 | dataDir=/data 3 | dataLogDir=/datalog 4 | tickTime=2000 5 | initLimit=5 6 | syncLimit=2 7 | maxClientCnxns=60 8 | clientPort=2181 9 | server.1=0.0.0.0:2888:3888 10 | server.2=zoo2:2888:3888 11 | server.3=zoo3:2888:3888 12 | leaderServes=no 13 | 4lw.commands.whitelist=mntr,ruok 14 | -------------------------------------------------------------------------------- /conf/zoo.cfg.2: -------------------------------------------------------------------------------- 1 | clientPort=2181 2 | dataDir=/data 3 | dataLogDir=/datalog 4 | tickTime=2000 5 | initLimit=5 6 | syncLimit=2 7 | maxClientCnxns=60 8 | clientPort=2181 9 | server.1=zoo1:2888:3888 10 | server.2=0.0.0.0:2888:3888 11 | server.3=zoo3:2888:3888 12 | leaderServes=no 13 | 4lw.commands.whitelist=mntr,ruok 14 | -------------------------------------------------------------------------------- /conf/zoo.cfg.3: -------------------------------------------------------------------------------- 1 | clientPort=2181 2 | dataDir=/data 3 | dataLogDir=/datalog 4 | tickTime=2000 5 | initLimit=5 6 | syncLimit=2 7 | maxClientCnxns=60 8 | clientPort=2181 9 | server.1=zoo1:2888:3888 10 | server.2=zoo2:2888:3888 11 | server.3=0.0.0.0:2888:3888 12 | leaderServes=no 13 | 4lw.commands.whitelist=mntr,ruok 14 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.1' 2 | services: 3 | 4 | # first 5 | zoo1: 6 | image: zookeeper:3.6 7 | hostname: zoo1 8 | ports: 9 | - 2181:2181 10 | environment: 11 | ZOO_MY_ID: 1 12 | volumes: 13 | - ./conf/zoo.cfg.1:/conf/zoo.cfg 14 | exp1: 15 | image: dabealu/zookeeper-exporter 16 | ports: 17 | - 9141:9141 18 | command: --zk-hosts="zoo1:2181" 19 | 20 | # second 21 | zoo2: 22 | image: zookeeper:3.6 23 | hostname: zoo2 24 | ports: 25 | - 2182:2181 26 | environment: 27 | ZOO_MY_ID: 2 28 | volumes: 29 | - ./conf/zoo.cfg.2:/conf/zoo.cfg 30 | exp2: 31 | image: dabealu/zookeeper-exporter 32 | ports: 33 | - 9142:9141 34 | command: --zk-hosts="zoo2:2181" 35 | 36 | # third 37 | zoo3: 38 | ##### Uncomment block below and comment 'image' parameter in order to test timeout 39 | # build: 40 | # context: zoo-timeout 41 | # privileged: true # Needed for iptables drop rule to work in custom entrypoint script 42 | ##### End 43 | image: zookeeper:3.6 44 | hostname: zoo3 45 | ports: 46 | - 2183:2181 47 | environment: 48 | ZOO_MY_ID: 3 49 | volumes: 50 | - ./conf/zoo.cfg.3:/conf/zoo.cfg 51 | exp3: 52 | image: dabealu/zookeeper-exporter 53 | ports: 54 | - 9143:9141 55 | command: --zk-hosts="zoo3:2181" --timeout=5 56 | 57 | # multitarget 58 | exp123: 59 | image: dabealu/zookeeper-exporter 60 | # build: 61 | # context: . 62 | ports: 63 | - 9144:9141 64 | command: --zk-hosts="zoo1:2181,zoo2:2181,zoo3:2181" --timeout=5 65 | 66 | # prometheus server 67 | prometheus: 68 | image: prom/prometheus:latest 69 | ports: 70 | - 9090:9090 71 | entrypoint: > 72 | prometheus 73 | --config.file='/etc/prometheus/prometheus.yml' 74 | --storage.tsdb.path='data' 75 | --web.listen-address='0.0.0.0:9090' 76 | volumes: 77 | - ./conf/prometheus.yml:/etc/prometheus/prometheus.yml 78 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/dabealu/zookeeper-exporter 2 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/tls" 5 | "flag" 6 | "fmt" 7 | "io/ioutil" 8 | "log" 9 | "net" 10 | "net/http" 11 | "regexp" 12 | "strconv" 13 | "strings" 14 | "time" 15 | ) 16 | 17 | const ( 18 | // template format: command, host_label 19 | commandNotAllowedTmpl = "warning: %q command isn't allowed at %q, see '4lw.commands.whitelist' ZK config parameter" 20 | instanceNotServingMessage = "This ZooKeeper instance is not currently serving requests" 21 | cmdNotExecutedSffx = "is not executed because it is not in the whitelist." 22 | ) 23 | 24 | var ( 25 | versionRE = regexp.MustCompile(`^([0-9]+\.[0-9]+\.[0-9]+).*$`) 26 | metricNameReplacer = strings.NewReplacer("-", "_", ".", "_", ",", "_") 27 | ) 28 | 29 | func main() { 30 | location := flag.String("location", "/metrics", "metrics location") 31 | listen := flag.String("listen", "0.0.0.0:9141", "address to listen on") 32 | timeout := flag.Int64("timeout", 30, "timeout for connection to zk servers, in seconds") 33 | zkhosts := flag.String("zk-hosts", "", "comma separated list of zk servers, e.g. '10.0.0.1:2181,10.0.0.2:2181,10.0.0.3:2181'") 34 | zktlsauth := flag.Bool("zk-tls-auth", false, "zk tls client authentication") 35 | zktlscert := flag.String("zk-tls-auth-cert", "", "cert for zk tls client authentication") 36 | zktlskey := flag.String("zk-tls-auth-key", "", "key for zk tls client authentication") 37 | 38 | flag.Parse() 39 | 40 | var clientCert *tls.Certificate 41 | if *zktlsauth { 42 | if *zktlscert == "" || *zktlskey == "" { 43 | log.Fatal("-zk-tls-auth-cert and -zk-tls-auth-key flags are required when -zk-tls-auth is true") 44 | } 45 | _clientCert, err := tls.LoadX509KeyPair(*zktlscert, *zktlskey) 46 | if err != nil { 47 | log.Fatalf("fatal: can't load keypair %s, %s: %v", *zktlskey, *zktlscert, err) 48 | } 49 | clientCert = &_clientCert 50 | } 51 | 52 | hosts := strings.Split(*zkhosts, ",") 53 | if len(hosts) == 0 { 54 | log.Fatal("fatal: no target zookeeper hosts specified, exiting") 55 | } 56 | 57 | log.Printf("info: zookeeper hosts: %v", hosts) 58 | log.Printf("info: serving metrics at %s%s", *listen, *location) 59 | serveMetrics(&Options{ 60 | Timeout: *timeout, 61 | Hosts: hosts, 62 | Location: *location, 63 | Listen: *listen, 64 | ClientCert: clientCert, 65 | }) 66 | } 67 | 68 | type Options struct { 69 | Timeout int64 70 | Hosts []string 71 | Location string 72 | Listen string 73 | ClientCert *tls.Certificate 74 | } 75 | 76 | func dial(host string, timeout time.Duration, clientCert *tls.Certificate) (net.Conn, error) { 77 | dialer := net.Dialer{Timeout: timeout} 78 | if clientCert == nil { 79 | return dialer.Dial("tcp", host) 80 | } else { 81 | return tls.DialWithDialer(&dialer, "tcp", host, &tls.Config{ 82 | Certificates: []tls.Certificate{*clientCert}, 83 | InsecureSkipVerify: true, 84 | }) 85 | } 86 | } 87 | 88 | // open tcp connections to zk nodes, send 'mntr' and return result as a map 89 | func getMetrics(options *Options) map[string]string { 90 | metrics := map[string]string{} 91 | timeout := time.Duration(options.Timeout) * time.Second 92 | 93 | for _, h := range options.Hosts { 94 | tcpaddr, err := net.ResolveTCPAddr("tcp", h) 95 | if err != nil { 96 | log.Printf("warning: cannot resolve zk hostname '%s': %s", h, err) 97 | continue 98 | } 99 | 100 | hostLabel := fmt.Sprintf("zk_host=%q", h) 101 | zkUp := fmt.Sprintf("zk_up{%s}", hostLabel) 102 | 103 | conn, err := dial(tcpaddr.String(), timeout, options.ClientCert) 104 | if err != nil { 105 | log.Printf("warning: cannot connect to %s: %v", h, err) 106 | metrics[zkUp] = "0" 107 | continue 108 | } 109 | 110 | res := sendZookeeperCmd(conn, h, "mntr") 111 | 112 | // get slice of strings from response, like 'zk_avg_latency 0' 113 | lines := strings.Split(res, "\n") 114 | 115 | // skip instance if it in a leader only state and doesnt serving client requets 116 | if lines[0] == instanceNotServingMessage { 117 | metrics[zkUp] = "1" 118 | metrics[fmt.Sprintf("zk_server_leader{%s}", hostLabel)] = "1" 119 | continue 120 | } 121 | 122 | // 'mntr' command isn't allowed in zk config, log as a warning 123 | if strings.Contains(lines[0], cmdNotExecutedSffx) { 124 | metrics[zkUp] = "0" 125 | log.Printf(commandNotAllowedTmpl, "mntr", hostLabel) 126 | continue 127 | } 128 | 129 | // split each line into key-value pair 130 | for _, l := range lines { 131 | if l == "" { 132 | continue 133 | } 134 | 135 | kv := strings.Split(strings.Replace(l, "\t", " ", -1), " ") 136 | key := kv[0] 137 | value := kv[1] 138 | 139 | switch key { 140 | case "zk_server_state": 141 | zkLeader := fmt.Sprintf("zk_server_leader{%s}", hostLabel) 142 | if value == "leader" { 143 | metrics[zkLeader] = "1" 144 | } else { 145 | metrics[zkLeader] = "0" 146 | } 147 | 148 | case "zk_version": 149 | version := versionRE.ReplaceAllString(value, "$1") 150 | metrics[fmt.Sprintf("zk_version{%s,version=%q}", hostLabel, version)] = "1" 151 | 152 | case "zk_peer_state": 153 | metrics[fmt.Sprintf("zk_peer_state{%s,state=%q}", hostLabel, value)] = "1" 154 | 155 | default: 156 | var k string 157 | if strings.Contains(key, "}") { 158 | k = metricNameReplacer.Replace(key) 159 | k = strings.Replace(k, "}", ",", 1) 160 | k = fmt.Sprintf("%s%s}", k, hostLabel) 161 | } else { 162 | k = fmt.Sprintf("%s{%s}", metricNameReplacer.Replace(key), hostLabel) 163 | } 164 | 165 | if !isDigit(value) { 166 | log.Printf("warning: skipping metric %q which holds not-digit value: %q", key, value) 167 | continue 168 | } 169 | 170 | metrics[k] = value 171 | } 172 | } 173 | 174 | zkRuok := fmt.Sprintf("zk_ruok{%s}", hostLabel) 175 | if conn, err := dial(tcpaddr.String(), timeout, options.ClientCert); err == nil { 176 | res = sendZookeeperCmd(conn, h, "ruok") 177 | if res == "imok" { 178 | metrics[zkRuok] = "1" 179 | } else { 180 | if strings.Contains(res, cmdNotExecutedSffx) { 181 | log.Printf(commandNotAllowedTmpl, "ruok", hostLabel) 182 | } 183 | metrics[zkRuok] = "0" 184 | } 185 | } else { 186 | metrics[zkRuok] = "0" 187 | } 188 | 189 | metrics[zkUp] = "1" 190 | } 191 | 192 | return metrics 193 | } 194 | 195 | func isDigit(in string) bool { 196 | // check input is an int 197 | if _, err := strconv.Atoi(in); err != nil { 198 | // not int, try float 199 | if _, err := strconv.ParseFloat(in, 64); err != nil { 200 | return false 201 | } 202 | } 203 | return true 204 | } 205 | 206 | func sendZookeeperCmd(conn net.Conn, host, cmd string) string { 207 | defer conn.Close() 208 | 209 | _, err := conn.Write([]byte(cmd)) 210 | if err != nil { 211 | log.Printf("warning: failed to send '%s' to '%s': %s", cmd, host, err) 212 | } 213 | 214 | res, err := ioutil.ReadAll(conn) 215 | if err != nil { 216 | log.Printf("warning: failed read '%s' response from '%s': %s", cmd, host, err) 217 | } 218 | 219 | return string(res) 220 | } 221 | 222 | // serve zk metrics at chosen address and url 223 | func serveMetrics(options *Options) { 224 | handler := func(w http.ResponseWriter, r *http.Request) { 225 | for k, v := range getMetrics(options) { 226 | fmt.Fprintf(w, "%s %s\n", k, v) 227 | } 228 | } 229 | 230 | http.HandleFunc(options.Location, handler) 231 | 232 | if err := http.ListenAndServe(options.Listen, nil); err != nil { 233 | log.Fatalf("fatal: shutting down exporter: %s", err) 234 | } 235 | } 236 | -------------------------------------------------------------------------------- /no-serving-leader.yml: -------------------------------------------------------------------------------- 1 | # docker-compose file to test ZK 2 | # with additional config paremeters 3 | 4 | version: '3.1' 5 | services: 6 | 7 | zoo1: 8 | volumes: 9 | - ./conf/zoo.cfg.1:/conf/zoo.cfg 10 | 11 | zoo2: 12 | volumes: 13 | - ./conf/zoo.cfg.2:/conf/zoo.cfg 14 | 15 | zoo3: 16 | volumes: 17 | - ./conf/zoo.cfg.3:/conf/zoo.cfg 18 | -------------------------------------------------------------------------------- /switch-leaderserves.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | VALUE=$( awk -F '=' '/leaderServes/ {print $2}' conf/zoo.cfg.1 ) 4 | 5 | if [ "$VALUE" == 'no' ]; then 6 | VALUE='yes' 7 | else 8 | VALUE='no' 9 | fi 10 | 11 | for I in {1..3}; do 12 | sed -i '' "s/leaderServes=.*$/leaderServes=${VALUE}/" conf/zoo.cfg.${I} 13 | done 14 | 15 | echo "leaderServes changed to '${VALUE}'" 16 | -------------------------------------------------------------------------------- /zoo-timeout/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM zookeeper:3.5 2 | 3 | RUN apt-get update && apt-get install -y iptables 4 | 5 | # Add custom entrypoint to set iptables rules and then resume the original entrypoint script 6 | ADD custom-entrypoint.sh / 7 | RUN cat /docker-entrypoint.sh >> /custom-entrypoint.sh 8 | 9 | # Use custom entrypoint with default command taken from upstream Dockerfile 10 | ENTRYPOINT ["/custom-entrypoint.sh"] 11 | CMD ["zkServer.sh", "start-foreground"] 12 | -------------------------------------------------------------------------------- /zoo-timeout/custom-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # Drop traffic to zookeeper 6 | echo 'about to setup iptables to drop packets on 2181' 7 | iptables -A INPUT -p tcp --destination-port 2181 -j DROP 8 | echo 'ip tables rules setup' 9 | 10 | # Call the original entrypoint script 11 | echo 'now calling original entrypoint' 12 | echo '----' 13 | --------------------------------------------------------------------------------