├── .github ├── CODEOWNERS └── workflows │ ├── ci.yml │ └── release.yml ├── .gitignore ├── .golangci.yml ├── .goreleaser.yml ├── CHANGELOG.md ├── Dockerfile ├── Dockerfile.gorelease ├── LICENSE ├── Makefile ├── NOTICE ├── README.md ├── config ├── burrow.toml ├── default-email.tmpl ├── default-http-delete.tmpl ├── default-http-post.tmpl ├── default-slack-delete.tmpl └── default-slack-post.tmpl ├── core ├── burrow.go ├── internal │ ├── cluster │ │ ├── coordinator.go │ │ ├── coordinator_test.go │ │ ├── kafka_cluster.go │ │ └── kafka_cluster_test.go │ ├── consumer │ │ ├── coordinator.go │ │ ├── coordinator_test.go │ │ ├── kafka_client.go │ │ ├── kafka_client_test.go │ │ ├── kafka_zk_client.go │ │ └── kafka_zk_test.go │ ├── doc.go │ ├── evaluator │ │ ├── caching.go │ │ ├── caching_test.go │ │ ├── coordinator.go │ │ ├── coordinator_test.go │ │ └── fixtures.go │ ├── helpers │ │ ├── coordinators.go │ │ ├── coordinators_test.go │ │ ├── sarama.go │ │ ├── sarama_test.go │ │ ├── scram.go │ │ ├── storage.go │ │ ├── storage_test.go │ │ ├── time.go │ │ ├── time_test.go │ │ ├── validation.go │ │ ├── validation_test.go │ │ ├── zookeeper.go │ │ └── zookeeper_test.go │ ├── httpserver │ │ ├── config.go │ │ ├── config_test.go │ │ ├── coordinator.go │ │ ├── coordinator_test.go │ │ ├── kafka.go │ │ ├── kafka_test.go │ │ ├── prometheus.go │ │ ├── prometheus_test.go │ │ └── structs.go │ ├── notifier │ │ ├── coordinator.go │ │ ├── coordinator_race_test.go │ │ ├── coordinator_test.go │ │ ├── email.go │ │ ├── email_test.go │ │ ├── helpers.go │ │ ├── http.go │ │ ├── http_test.go │ │ └── null.go │ ├── storage │ │ ├── coordinator.go │ │ ├── coordinator_test.go │ │ ├── fixtures.go │ │ ├── inmemory.go │ │ └── inmemory_test.go │ └── zookeeper │ │ ├── coordinator.go │ │ └── coordinator_test.go ├── logger.go ├── open_out_log_linux_dup3.go ├── open_out_log_linux_loong64.go ├── open_out_log_unix.go ├── open_out_log_windows.go └── protocol │ ├── evaluator.go │ ├── protocol.go │ └── storage.go ├── docker-compose.yml ├── docker-config └── burrow.toml ├── go.mod ├── go.sum ├── main.go └── main_test.go /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @bai 2 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | test: 7 | name: Go ${{ matrix.go-version }} on Ubuntu 8 | runs-on: ubuntu-latest 9 | strategy: 10 | fail-fast: false 11 | matrix: 12 | go-version: [1.24.x] 13 | platform: [ubuntu-latest] 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | 18 | - name: Setup Go 19 | uses: actions/setup-go@v5 20 | with: 21 | go-version: ${{ matrix.go-version }} 22 | 23 | - uses: actions/cache@v4 24 | with: 25 | path: ~/go/pkg/mod 26 | key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} 27 | restore-keys: | 28 | ${{ runner.os }}-go- 29 | 30 | - name: Install dependencies 31 | run: curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.64.2 32 | 33 | - name: Run test suite 34 | run: make test 35 | 36 | - name: Run linter 37 | run: make lint 38 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | branches-ignore: 6 | - "**" 7 | tags: 8 | - "v*.*.*" 9 | 10 | jobs: 11 | test: 12 | name: Release 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | 18 | - name: Setup Go 19 | uses: actions/setup-go@v5 20 | with: 21 | go-version: 1.24.x 22 | 23 | - uses: actions/cache@v4 24 | with: 25 | path: ~/go/pkg/mod 26 | key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} 27 | restore-keys: | 28 | ${{ runner.os }}-go- 29 | 30 | - name: Login to GitHub Registry 31 | run: | 32 | docker login docker.pkg.github.com -u bai -p "${GITHUB_TOKEN}" 33 | env: 34 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 35 | 36 | - name: Run GoReleaser 37 | uses: goreleaser/goreleaser-action@v6 38 | with: 39 | version: latest 40 | args: release --clean 41 | env: 42 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | burrow-src 2 | .*.swp 3 | !config 4 | dist 5 | log 6 | .idea 7 | *.cov 8 | Burrow 9 | Burrow.exe 10 | Burrow.iml 11 | tmp 12 | vendor 13 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | run: 2 | timeout: 5m 3 | deadline: 10m 4 | 5 | linters-settings: 6 | govet: 7 | check-shadowing: false 8 | golint: 9 | min-confidence: 0 10 | gocyclo: 11 | min-complexity: 25 12 | maligned: 13 | suggest-new: true 14 | dupl: 15 | threshold: 100 16 | goconst: 17 | min-len: 2 18 | min-occurrences: 3 19 | misspell: 20 | locale: US 21 | goimports: 22 | local-prefixes: github.com/linkedin/Burrow 23 | gocritic: 24 | enabled-tags: 25 | - diagnostic 26 | - experimental 27 | - opinionated 28 | - performance 29 | - style 30 | disabled-checks: 31 | - wrapperFunc 32 | - ifElseChain 33 | funlen: 34 | lines: 350 35 | statements: 200 36 | revive: 37 | # default rules derived from upstream revive repo 38 | # https://github.com/walles/revive/blob/f417cbd57c6d90b43bdb7f113c222e5aeef117e5/defaults.toml 39 | rules: 40 | - name: blank-imports 41 | - name: context-as-argument 42 | - name: context-keys-type 43 | - name: dot-imports 44 | - name: error-return 45 | - name: error-strings 46 | - name: error-naming 47 | - name: exported 48 | # - name: if-return 49 | - name: increment-decrement 50 | - name: var-naming 51 | - name: var-declaration 52 | - name: package-comments 53 | - name: range 54 | - name: receiver-naming 55 | - name: time-naming 56 | - name: unexported-return 57 | - name: indent-error-flow 58 | - name: errorf 59 | - name: empty-block 60 | - name: superfluous-else 61 | # - name: unused-parameter 62 | - name: unreachable-code 63 | - name: redefines-builtin-id 64 | 65 | linters: 66 | disable-all: true 67 | enable: 68 | - bodyclose 69 | # - deadcode 70 | # - depguard 71 | - dogsled 72 | # - dupl 73 | # - errcheck 74 | - funlen 75 | - gocritic 76 | - gocyclo 77 | # - gofmt 78 | - goimports 79 | - revive 80 | # - gosec 81 | - gosimple 82 | - govet 83 | - ineffassign 84 | - misspell 85 | - nakedret 86 | - copyloopvar 87 | - staticcheck 88 | # - structcheck 89 | - stylecheck 90 | - typecheck 91 | - unconvert 92 | - unused 93 | # - varcheck 94 | - whitespace 95 | - goconst 96 | - unused 97 | # - gochecknoinits 98 | 99 | issues: 100 | exclude: 101 | - consider giving a name to these results 102 | - include an explanation for nolint directive 103 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | project_name: Burrow 2 | version: 2 3 | 4 | builds: 5 | - main: main.go 6 | binary: burrow 7 | env: 8 | - CGO_ENABLED=0 9 | goos: 10 | - windows 11 | - darwin 12 | - linux 13 | goarch: 14 | - amd64 15 | - arm64 16 | archives: 17 | - formats: [tar.gz] 18 | format_overrides: 19 | - goos: windows 20 | formats: [zip,tar.gz] 21 | files: 22 | - LICENSE 23 | - NOTICE 24 | - README.md 25 | - CHANGELOG.md 26 | - config/burrow.toml 27 | - config/default-email.tmpl 28 | - config/default-http-delete.tmpl 29 | - config/default-http-post.tmpl 30 | - config/default-slack-delete.tmpl 31 | - config/default-slack-post.tmpl 32 | snapshot: 33 | version_template: "{{ .FullCommit }}" 34 | dockers: 35 | - goos: linux 36 | goarch: amd64 37 | dockerfile: Dockerfile.gorelease 38 | image_templates: 39 | - 'docker.pkg.github.com/linkedin/burrow/burrow:latest' 40 | - 'docker.pkg.github.com/linkedin/burrow/burrow:latest-amd64' 41 | - 'docker.pkg.github.com/linkedin/burrow/burrow:{{ .Tag }}' 42 | - 'docker.pkg.github.com/linkedin/burrow/burrow:{{ .Tag }}-amd64' 43 | extra_files: 44 | - docker-config/burrow.toml 45 | build_flag_templates: 46 | - "--label=org.label-schema.schema-version=1.0" 47 | - "--label=org.label-schema.version={{ .Version }}" 48 | - "--label=org.label-schema.name={{ .ProjectName }}" 49 | - "--label=org.label-schema.vcs=https://github.com/linkedin/Burrow" 50 | - "--label=org.label-schema.vcs-ref={{ .FullCommit }}" 51 | 52 | - goos: linux 53 | goarch: arm64 54 | dockerfile: Dockerfile.gorelease 55 | image_templates: 56 | - 'docker.pkg.github.com/linkedin/burrow/burrow:latest-arm64' 57 | - 'docker.pkg.github.com/linkedin/burrow/burrow:{{ .Tag }}-arm64' 58 | extra_files: 59 | - docker-config/burrow.toml 60 | build_flag_templates: 61 | - "--label=org.label-schema.schema-version=1.0" 62 | - "--label=org.label-schema.version={{ .Version }}" 63 | - "--label=org.label-schema.name={{ .ProjectName }}" 64 | - "--label=org.label-schema.vcs=https://github.com/linkedin/Burrow" 65 | - "--label=org.label-schema.vcs-ref={{ .FullCommit }}" 66 | docker_manifests: 67 | - name_template: "docker.pkg.github.com/linkedin/burrow/burrow:{{ .Tag }}" 68 | image_templates: 69 | - "docker.pkg.github.com/linkedin/burrow/burrow:{{ .Tag }}" 70 | - "docker.pkg.github.com/linkedin/burrow/burrow:{{ .Tag }}-amd64" 71 | - "docker.pkg.github.com/linkedin/burrow/burrow:{{ .Tag }}-arm64" 72 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # stage 1: builder 2 | FROM golang:1.24.1-alpine as builder 3 | 4 | ENV BURROW_SRC /usr/src/Burrow/ 5 | 6 | RUN apk add --no-cache git curl 7 | COPY . $BURROW_SRC 8 | WORKDIR $BURROW_SRC 9 | 10 | RUN go mod tidy && go build -o /tmp/burrow . 11 | 12 | # stage 2: runner 13 | FROM alpine:3.21 14 | 15 | LABEL maintainer="LinkedIn Burrow https://github.com/linkedin/Burrow" 16 | 17 | COPY --from=builder /tmp/burrow /app/ 18 | COPY docker-config/burrow.toml /etc/burrow/ 19 | 20 | CMD ["/app/burrow", "--config-dir", "/etc/burrow"] 21 | -------------------------------------------------------------------------------- /Dockerfile.gorelease: -------------------------------------------------------------------------------- 1 | FROM alpine:3.21 2 | LABEL maintainer="LinkedIn Burrow https://github.com/linkedin/Burrow" 3 | 4 | WORKDIR /app 5 | COPY burrow /app/ 6 | 7 | CMD ["/app/burrow", "--config-dir", "/etc/burrow"] 8 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: get update fmt lint test 2 | 3 | GO := GO111MODULE=on GOSUMDB=off go 4 | GOBUILD := CGO_ENABLED=0 $(GO) build $(BUILD_FLAG) 5 | GOTEST := $(GO) test -gcflags='-l' -p 3 -v -race 6 | 7 | FILES := $(shell find core -name '*.go' -type f -not -name '*.pb.go' -not -name '*_generated.go' -not -name '*_test.go') 8 | TESTS := $(shell find core -name '*.go' -type f -not -name '*.pb.go' -not -name '*_generated.go' -name '*_test.go') 9 | 10 | get: 11 | $(GO) get ./... 12 | $(GO) mod verify 13 | $(GO) mod tidy 14 | 15 | update: 16 | $(GO) get -u -v all 17 | $(GO) mod verify 18 | $(GO) mod tidy 19 | 20 | fmt: 21 | gofmt -s -l -w $(FILES) $(TESTS) 22 | 23 | lint: 24 | golangci-lint run 25 | 26 | test: 27 | $(GOTEST) ./... 28 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright 2015 LinkedIn Corp. Licensed under the Apache License, Version 2 | 2.0 (the "License"); you may not use this file except in compliance with 3 | the License. You may obtain a copy of the License at 4 | http://www.apache.org/licenses/LICENSE-2.0 5 | 6 | Unless required by applicable law or agreed to in writing, software 7 | distributed under the License is distributed on an "AS IS" BASIS, 8 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | 11 | 12 | This product includes/uses Go (https://golang.org/) 13 | Copyright (C) 2012 The Go Authors 14 | License: BSD 15 | 16 | This product includes/uses gcfg (https://code.google.com/p/gcfg) 17 | Copyright (c) 2012 Péter Surányi 18 | License: BSD 19 | 20 | This product includes/uses go-uuid (https://code.google.com/p/go-uuid) 21 | Copyright (c) 2009,2014 Google Inc. 22 | License: BSD 23 | 24 | This product includes/uses snappy-go (https://code.google.com/p/snappy-go) 25 | Copyright (c) 2011 The Snappy-Go Authors 26 | License: BSD 27 | 28 | This product includes/uses cihub/seelog (https://github.com/cihub/seelog) 29 | Copyright (c) 2012, Cloud Instruments Co., Ltd. 30 | License: BSD 31 | 32 | This product includes/uses eapache/go-resiliency (https://github.com/eapache/go-resiliency/) 33 | Copyright (c) 2014 Evan Huus 34 | License: MIT 35 | 36 | This product includes/uses eapache/queue (https://github.com/eapache/queue/) 37 | Copyright (c) 2014 Evan Huus 38 | License: MIT 39 | 40 | This product includes/uses go-zk (https://github.com/linkedin/go-zk/) 41 | Copyright (c) 2013, Samuel Stauffer 42 | License: BSD 43 | 44 | This product includes/uses sarama (https://github.com/IBM/sarama/) 45 | Copyright (c) 2013 Shopify 46 | Copyright (c) 2023 IBM Corporation 47 | License: MIT 48 | 49 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Release](https://img.shields.io/github/v/release/linkedin/Burrow)](https://github.com/linkedin/Burrow/releases) 2 | [![Join the chat at https://gitter.im/linkedin-Burrow/Lobby](https://badges.gitter.im/linkedin-Burrow/Lobby.svg)](https://gitter.im/linkedin-Burrow/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) 3 | [![Build Status](https://github.com/linkedin/Burrow/actions/workflows/ci.yml/badge.svg)](https://github.com/linkedin/Burrow/actions/workflows/ci.yml) 4 | [![go report card](https://goreportcard.com/badge/github.com/linkedin/Burrow)](https://goreportcard.com/report/github.com/linkedin/Burrow) 5 | [![Coverage Status](https://coveralls.io/repos/github/linkedin/Burrow/badge.svg?branch=master)](https://coveralls.io/github/linkedin/Burrow?branch=master) 6 | [![GoDoc](https://godoc.org/github.com/linkedin/Burrow?status.svg)](https://godoc.org/github.com/linkedin/Burrow) 7 | 8 | # Burrow - Kafka Consumer Lag Checking 9 | 10 | 11 | Burrow is a monitoring companion for [Apache Kafka](http://kafka.apache.org) that provides consumer lag checking as a service without the need for specifying thresholds. It monitors committed offsets for all consumers and calculates the status of those consumers on demand. An HTTP endpoint is provided to request status on demand, as well as provide other Kafka cluster information. There are also configurable notifiers that can send status out via email or HTTP calls to another service. 12 | 13 | ## Features 14 | * NO THRESHOLDS! Groups are evaluated over a sliding window. 15 | * Multiple Kafka Cluster support 16 | * Automatically monitors all consumers using Kafka-committed offsets 17 | * Configurable support for Zookeeper-committed offsets 18 | * Configurable support for Storm-committed offsets 19 | * HTTP endpoint for consumer group status, as well as broker and consumer information 20 | * Configurable emailer for sending alerts for specific groups 21 | * Configurable HTTP client for sending alerts to another system for all groups 22 | 23 | ## Getting Started 24 | ### Prerequisites 25 | Burrow is written in Go, so before you get started, you should [install and set up Go](https://golang.org/doc/install). As the dependencies 26 | are managed using Go module, the lowest version of Go supported is 1.11, though we recommend using version 1.12 for development. 27 | 28 | ### Build and Install 29 | ``` 30 | $ Clone github.com/linkedin/Burrow to a directory outside of $GOPATH. Alternatively, you can export GO111MODULE=on to enable Go module. 31 | $ cd to the source directory. 32 | $ go mod tidy 33 | $ go install 34 | ``` 35 | 36 | ### Running Burrow 37 | ``` 38 | $ $GOPATH/bin/Burrow --config-dir /path/containing/config 39 | ``` 40 | 41 | ### Using Docker 42 | A Docker file is available which builds this project on top of an Alpine Linux image. 43 | To use it, build your docker container, mount your Burrow configuration into `/etc/burrow` and run docker. 44 | 45 | A [Docker Compose](docker-compose.yml) is also available for quick and easy development. 46 | 47 | Install [Docker Compose](https://docs.docker.com/compose/) and then: 48 | 49 | 1. Build the docker container: 50 | ``` 51 | docker-compose build 52 | ``` 53 | 54 | 2. Run the docker compose stack which includes kafka and zookeeper: 55 | ``` 56 | docker-compose down; docker-compose up 57 | ``` 58 | 59 | 3. Some test topics have already been created by default and Burrow can be accessed on `http://localhost:8000/v3/kafka`. 60 | 61 | 62 | ### Configuration 63 | For information on how to write your configuration file, check out the [detailed wiki](https://github.com/linkedin/Burrow/wiki) 64 | 65 | ## License 66 | Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 67 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 68 | 69 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 70 | CONDITIONS OF ANY KIND, either express or implied. 71 | -------------------------------------------------------------------------------- /config/burrow.toml: -------------------------------------------------------------------------------- 1 | [general] 2 | pidfile="burrow.pid" 3 | stdout-logfile="burrow.out" 4 | access-control-allow-origin="mysite.example.com" 5 | 6 | [logging] 7 | filename="logs/burrow.log" 8 | level="info" 9 | maxsize=100 10 | maxbackups=30 11 | maxage=10 12 | use-localtime=false 13 | use-compression=true 14 | 15 | [zookeeper] 16 | servers=[ "zkhost01.example.com:2181", "zkhost02.example.com:2181", "zkhost03.example.com:2181" ] 17 | timeout=6 18 | root-path="/burrow" 19 | 20 | [client-profile.test] 21 | client-id="burrow-test" 22 | kafka-version="0.10.0" 23 | 24 | [cluster.local] 25 | class-name="kafka" 26 | servers=[ "kafka01.example.com:10251", "kafka02.example.com:10251", "kafka03.example.com:10251" ] 27 | client-profile="test" 28 | topic-refresh=120 29 | offset-refresh=30 30 | groups-reaper-refresh=0 31 | 32 | [consumer.local] 33 | class-name="kafka" 34 | cluster="local" 35 | servers=[ "kafka01.example.com:10251", "kafka02.example.com:10251", "kafka03.example.com:10251" ] 36 | client-profile="test" 37 | group-denylist="^(console-consumer-|python-kafka-consumer-|quick-).*$" 38 | group-allowlist="" 39 | 40 | [consumer.local_zk] 41 | class-name="kafka_zk" 42 | cluster="local" 43 | servers=[ "zk01.example.com:2181", "zk02.example.com:2181", "zk03.example.com:2181" ] 44 | zookeeper-path="/kafka-cluster" 45 | zookeeper-timeout=30 46 | group-denylist="^(console-consumer-|python-kafka-consumer-|quick-).*$" 47 | group-allowlist="" 48 | 49 | [httpserver.default] 50 | address=":8000" 51 | 52 | [storage.default] 53 | class-name="inmemory" 54 | workers=20 55 | intervals=15 56 | expire-group=604800 57 | min-distance=1 58 | 59 | [notifier.default] 60 | class-name="http" 61 | url-open="http://someservice.example.com:1467/v1/event" 62 | interval=60 63 | timeout=5 64 | keepalive=30 65 | extras={ api_key="REDACTED", app="burrow", tier="STG", fabric="mydc" } 66 | template-open="conf/default-http-post.tmpl" 67 | template-close="conf/default-http-delete.tmpl" 68 | method-close="DELETE" 69 | send-close=true 70 | threshold=1 71 | -------------------------------------------------------------------------------- /config/default-email.tmpl: -------------------------------------------------------------------------------- 1 | Subject: [Burrow] Kafka Consumer Lag Alert 2 | 3 | The Kafka consumer groups you are monitoring are currently showing problems. The following groups are in a problem state (groups not listed are OK): 4 | 5 | Cluster: {{.Result.Cluster}} 6 | Group: {{.Result.Group}} 7 | Status: {{.Result.Status.String}} 8 | Complete: {{.Result.Complete}} 9 | Errors: {{len .Result.Partitions}} partitions have problems 10 | {{range .Result.Partitions}} {{.Status.String}} {{.Topic}}:{{.Partition}} ({{.Start.Timestamp}}, {{.Start.Offset}}, {{.Start.Lag}}) -> ({{.End.Timestamp}}, {{.End.Offset}}, {{.End.Lag}}) 11 | {{end}} 12 | -------------------------------------------------------------------------------- /config/default-http-delete.tmpl: -------------------------------------------------------------------------------- 1 | {"api_key":"{{index .Extras "api_key"}}","app":"{{index .Extras "app"}}","block":false,"ids":["{{.Id}}"]} 2 | -------------------------------------------------------------------------------- /config/default-http-post.tmpl: -------------------------------------------------------------------------------- 1 | {"api_key":"{{index .Extras "api_key"}}","app":"{{index .Extras "app"}}","block":false,"events":[{"id":"{{.ID}}","event":{"severity":"{{if eq .Result.Status 2}}WARN{{else}}ERR{{end}}","tier":"{{index .Extras "tier"}}","group":"{{.Result.Group}}","start":"{{.Start.Format "Jan 02, 2006 15:04:05 UTC"}}","complete":{{.Result.Complete}},"partitions":{{.Result.Partitions | jsonencoder}}}}]} 2 | -------------------------------------------------------------------------------- /config/default-slack-delete.tmpl: -------------------------------------------------------------------------------- 1 | { "attachments": [{"color": "good","title": "A kafka consumer is no longer lagging","fields": [{"title": "Group","value": "{{ .Group }}", "short": false},{"title": "Cluster","value": "{{ .Cluster }}","short": true},{"title": "Total Lag","value": "{{ .Result.TotalLag}}","short": true}, {"title": "Start","value": "{{ .Start.Format "2006-01-02T15:04:05Z07:00" }}","short": true}]}]} 2 | -------------------------------------------------------------------------------- /config/default-slack-post.tmpl: -------------------------------------------------------------------------------- 1 | { "attachments": [{"color": "danger","title": "A kafka consumer is lagging behind!","fields": [{"title": "Group","value": "{{ .Group }}", "short": false},{"title": "Cluster","value": "{{ .Cluster }}","short": true},{"title": "Total Lag","value": "{{ .Result.TotalLag}}","short": true}, {"title": "Start","value": "{{ .Start.Format "2006-01-02T15:04:05Z07:00" }}","short": true}]}]} 2 | -------------------------------------------------------------------------------- /core/burrow.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | // Package core - Core Burrow logic. 11 | // The core package is where all the internal logic for Burrow is located. It provides several helpers for setting up 12 | // logging and application management (such as PID files), as well as the Start method that runs Burrow itself. 13 | // 14 | // The documentation for the rest of the internals, including all the available modules, is available at 15 | // https://godoc.org/github.com/linkedin/Burrow/core/internal/?m=all. For the most part, end users of Burrow should not 16 | // need to refer to this documentation, as it is targeted at developers of Burrow modules. Details on what modules are 17 | // available and how to configure them are available at https://github.com/linkedin/Burrow/wiki 18 | package core 19 | 20 | import ( 21 | "os" 22 | 23 | "github.com/spf13/viper" 24 | "go.uber.org/zap" 25 | 26 | "github.com/linkedin/Burrow/core/internal/cluster" 27 | "github.com/linkedin/Burrow/core/internal/consumer" 28 | "github.com/linkedin/Burrow/core/internal/evaluator" 29 | "github.com/linkedin/Burrow/core/internal/helpers" 30 | "github.com/linkedin/Burrow/core/internal/httpserver" 31 | "github.com/linkedin/Burrow/core/internal/notifier" 32 | "github.com/linkedin/Burrow/core/internal/storage" 33 | "github.com/linkedin/Burrow/core/internal/zookeeper" 34 | "github.com/linkedin/Burrow/core/protocol" 35 | ) 36 | 37 | func newCoordinators(app *protocol.ApplicationContext) []protocol.Coordinator { 38 | // This order is important - it makes sure that the things taking requests start up before things sending requests 39 | var coordinators []protocol.Coordinator 40 | 41 | haveNotifiers := viper.IsSet("notifier") 42 | 43 | // Only include zookeeper if we have dependant coordinators 44 | if haveNotifiers { 45 | coordinators = append(coordinators, 46 | &zookeeper.Coordinator{ 47 | App: app, 48 | Log: app.Logger.With( 49 | zap.String("type", "coordinator"), 50 | zap.String("name", "zookeeper"), 51 | ), 52 | }, 53 | ) 54 | } 55 | 56 | coordinators = append(coordinators, 57 | &storage.Coordinator{ 58 | App: app, 59 | Log: app.Logger.With( 60 | zap.String("type", "coordinator"), 61 | zap.String("name", "storage"), 62 | ), 63 | }, 64 | &evaluator.Coordinator{ 65 | App: app, 66 | Log: app.Logger.With( 67 | zap.String("type", "coordinator"), 68 | zap.String("name", "evaluator"), 69 | ), 70 | }, 71 | &httpserver.Coordinator{ 72 | App: app, 73 | Log: app.Logger.With( 74 | zap.String("type", "coordinator"), 75 | zap.String("name", "httpserver"), 76 | ), 77 | }, 78 | ) 79 | 80 | if haveNotifiers { 81 | coordinators = append(coordinators, 82 | ¬ifier.Coordinator{ 83 | App: app, 84 | Log: app.Logger.With( 85 | zap.String("type", "coordinator"), 86 | zap.String("name", "notifier"), 87 | ), 88 | }, 89 | ) 90 | } 91 | 92 | coordinators = append(coordinators, 93 | &cluster.Coordinator{ 94 | App: app, 95 | Log: app.Logger.With( 96 | zap.String("type", "coordinator"), 97 | zap.String("name", "cluster"), 98 | ), 99 | }, 100 | &consumer.Coordinator{ 101 | App: app, 102 | Log: app.Logger.With( 103 | zap.String("type", "coordinator"), 104 | zap.String("name", "consumer"), 105 | ), 106 | }, 107 | ) 108 | 109 | return coordinators 110 | } 111 | 112 | func configureCoordinators(app *protocol.ApplicationContext, coordinators []protocol.Coordinator) { // nolint:gocritic 113 | // Configure methods are allowed to panic, as their errors are non-recoverable 114 | // Catch panics here and flag in the application context if we can't continue 115 | defer func() { 116 | if r := recover(); r != nil { 117 | app.Logger.Panic(r.(string)) 118 | app.ConfigurationValid = false 119 | } 120 | }() 121 | 122 | // Configure the coordinators in order 123 | for _, coordinator := range coordinators { 124 | coordinator.Configure() 125 | } 126 | app.ConfigurationValid = true 127 | } 128 | 129 | // Start is called to start the Burrow application. This is exposed so that it is possible to use Burrow as a library 130 | // from within another application. Prior to calling this func, the configuration must have been loaded by viper from 131 | // some underlying source (e.g. a TOML configuration file, or explicitly set in code after reading from another source). 132 | // This func will block upon being called. 133 | // 134 | // If the calling application would like to control logging, it can pass a pointer to an instantiated 135 | // protocol.ApplicationContext struct that has the Logger and LogLevel fields set. Otherwise, Start will create a 136 | // logger based on configurations in viper. 137 | // 138 | // exitChannel is a signal channel that is provided by the calling application in order to signal Burrow to shut down. 139 | // Burrow does not currently check the signal type: if any message is received on the channel, or if the channel is 140 | // closed, Burrow will exit and Start will return 0. 141 | // 142 | // Start will return a 1 on any failure, including invalid configurations or a failure to start Burrow modules. 143 | func Start(app *protocol.ApplicationContext, exitChannel chan os.Signal) int { 144 | // Validate that the ApplicationContext is complete 145 | if (app == nil) || (app.Logger == nil) || (app.LogLevel == nil) { 146 | // Didn't get a valid ApplicationContext, so we'll set up our own, with the logger 147 | app = &protocol.ApplicationContext{} 148 | app.Logger, app.LogLevel = ConfigureLogger() 149 | defer app.Logger.Sync() 150 | } 151 | app.Logger.Info("Started Burrow") 152 | 153 | // Set up a specific child logger for main 154 | log := app.Logger.With(zap.String("type", "main"), zap.String("name", "burrow")) 155 | 156 | // send sarama logs to zap 157 | helpers.InitSaramaLogging(app.Logger) 158 | 159 | // Set up an array of coordinators in the order they are to be loaded (and closed) 160 | coordinators := newCoordinators(app) 161 | 162 | // Set up two main channels to use for the evaluator and storage coordinators. This is how burrow communicates 163 | // internally: 164 | // * Consumers and Clusters send offsets to the storage coordinator to populate all the state information 165 | // * The Notifiers send evaluation requests to the evaluator coordinator to check group status 166 | // * The Evaluators send requests to the storage coordinator for group offset and lag information 167 | // * The HTTP server sends requests to both the evaluator and storage coordinators to fulfill API requests 168 | app.EvaluatorChannel = make(chan *protocol.EvaluatorRequest) 169 | app.StorageChannel = make(chan *protocol.StorageRequest) 170 | 171 | // Configure coordinators and exit if anything fails 172 | configureCoordinators(app, coordinators) 173 | if !app.ConfigurationValid { 174 | return 1 175 | } 176 | 177 | // Start the coordinators in order 178 | for i, coordinator := range coordinators { 179 | err := coordinator.Start() 180 | if err != nil { 181 | // Reverse our way out, stopping coordinators, then exit 182 | for j := i - 1; j >= 0; j-- { 183 | coordinators[j].Stop() 184 | } 185 | return 1 186 | } 187 | } 188 | 189 | // Wait until we're told to exit 190 | <-exitChannel 191 | log.Info("Shutdown triggered") 192 | 193 | // Stop the coordinators in the reverse order. This assures that request senders are stopped before request servers 194 | for i := len(coordinators) - 1; i >= 0; i-- { 195 | coordinators[i].Stop() 196 | } 197 | 198 | // Exit cleanly 199 | return 0 200 | } 201 | -------------------------------------------------------------------------------- /core/internal/cluster/coordinator.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | // Package cluster - Kafka cluster subsystem. 11 | // The cluster subsystem is responsible for getting topic and partition information, as well as current broker offsets, 12 | // from Kafka clusters and sending that information to the storage subsystem. It does not handle any consumer group 13 | // information. 14 | // 15 | // # Modules 16 | // 17 | // Currently, the following modules are provided: 18 | // 19 | // * kafka - Fetch topic, partition, and offset information from a Kafka cluster 20 | package cluster 21 | 22 | import ( 23 | "errors" 24 | 25 | "github.com/spf13/viper" 26 | "go.uber.org/zap" 27 | 28 | "github.com/linkedin/Burrow/core/internal/helpers" 29 | "github.com/linkedin/Burrow/core/protocol" 30 | ) 31 | 32 | // A "cluster" is a single Kafka cluster that is going to be monitored by Burrow. The cluster module is responsible for 33 | // connecting to the Kafka cluster, monitoring the topic list, and periodically fetching the broker end offset (latest 34 | // offset) for each partition. This information is sent to the storage subsystem, where it can be retrieved by the 35 | // evaluator and HTTP server. 36 | 37 | // Coordinator manages all cluster modules, making sure they are configured, started, and stopped at the appropriate 38 | // time. 39 | type Coordinator struct { 40 | // App is a pointer to the application context. This stores the channel to the storage subsystem 41 | App *protocol.ApplicationContext 42 | 43 | // Log is a logger that has been configured for this module to use. Normally, this means it has been set up with 44 | // fields that are appropriate to identify this coordinator 45 | Log *zap.Logger 46 | 47 | modules map[string]protocol.Module 48 | } 49 | 50 | // getModuleForClass returns the correct module based on the passed className. As part of the Configure steps, if there 51 | // is any error, it will panic with an appropriate message describing the problem. 52 | func getModuleForClass(app *protocol.ApplicationContext, moduleName, className string) protocol.Module { 53 | switch className { 54 | case "kafka": 55 | return &KafkaCluster{ 56 | App: app, 57 | Log: app.Logger.With( 58 | zap.String("type", "module"), 59 | zap.String("coordinator", "cluster"), 60 | zap.String("class", className), 61 | zap.String("name", moduleName), 62 | ), 63 | } 64 | default: 65 | panic("Unknown cluster className provided: " + className) 66 | } 67 | } 68 | 69 | // Configure is called to create each of the configured cluster modules and call their Configure funcs to validate 70 | // their individual configurations and set them up. If there are any problems, it is expected that these funcs will 71 | // panic with a descriptive error message, as configuration failures are not recoverable errors. 72 | func (bc *Coordinator) Configure() { 73 | bc.Log.Info("configuring") 74 | 75 | bc.modules = make(map[string]protocol.Module) 76 | 77 | // Create all configured cluster modules, add to list of clusters 78 | modules := viper.GetStringMap("cluster") 79 | for name := range modules { 80 | configRoot := "cluster." + name 81 | module := getModuleForClass(bc.App, name, viper.GetString(configRoot+".class-name")) 82 | module.Configure(name, configRoot) 83 | bc.modules[name] = module 84 | } 85 | } 86 | 87 | // Start calls each of the configured cluster modules' underlying Start funcs. As the coordinator itself has no ongoing 88 | // work to do, it does not start any other goroutines. If any module Start returns an error, this func stops immediately 89 | // and returns that error to the caller. No further modules will be loaded after that. 90 | func (bc *Coordinator) Start() error { 91 | bc.Log.Info("starting") 92 | 93 | // Start Cluster modules 94 | err := helpers.StartCoordinatorModules(bc.modules) 95 | if err != nil { 96 | return errors.New("Error starting cluster module: " + err.Error()) 97 | } 98 | return nil 99 | } 100 | 101 | // Stop calls each of the configured cluster modules' underlying Stop funcs. It is expected that the module Stop will 102 | // not return until the module has been completely stopped. While an error can be returned, this func always returns no 103 | // error, as a failure during stopping is not a critical failure 104 | func (bc *Coordinator) Stop() error { 105 | bc.Log.Info("stopping") 106 | 107 | // The individual cluster modules can choose whether or not to implement a wait in the Stop routine 108 | helpers.StopCoordinatorModules(bc.modules) 109 | return nil 110 | } 111 | -------------------------------------------------------------------------------- /core/internal/cluster/coordinator_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package cluster 11 | 12 | import ( 13 | "testing" 14 | 15 | "github.com/linkedin/Burrow/core/internal/helpers" 16 | "github.com/linkedin/Burrow/core/protocol" 17 | 18 | "github.com/spf13/viper" 19 | "github.com/stretchr/testify/assert" 20 | "go.uber.org/zap" 21 | ) 22 | 23 | func fixtureCoordinator() *Coordinator { 24 | coordinator := Coordinator{ 25 | Log: zap.NewNop(), 26 | } 27 | coordinator.App = &protocol.ApplicationContext{ 28 | Logger: zap.NewNop(), 29 | StorageChannel: make(chan *protocol.StorageRequest), 30 | } 31 | 32 | viper.Reset() 33 | viper.Set("client-profile..client-id", "testid") 34 | viper.Set("cluster.test.class-name", "kafka") 35 | viper.Set("cluster.test.servers", []string{"broker1.example.com:1234"}) 36 | 37 | return &coordinator 38 | } 39 | 40 | func TestCoordinator_ImplementsCoordinator(t *testing.T) { 41 | assert.Implements(t, (*protocol.Coordinator)(nil), new(Coordinator)) 42 | } 43 | 44 | func TestCoordinator_Configure(t *testing.T) { 45 | coordinator := fixtureCoordinator() 46 | coordinator.Configure() 47 | 48 | assert.Lenf(t, coordinator.modules, 1, "Expected 1 module configured, not %v", len(coordinator.modules)) 49 | } 50 | 51 | func TestCoordinator_Configure_TwoModules(t *testing.T) { 52 | coordinator := fixtureCoordinator() 53 | viper.Set("cluster.anothertest.class-name", "kafka") 54 | viper.Set("cluster.anothertest.servers", []string{"broker1.example.com:1234"}) 55 | coordinator.Configure() 56 | } 57 | 58 | func TestCoordinator_StartStop(t *testing.T) { 59 | coordinator := fixtureCoordinator() 60 | coordinator.Configure() 61 | 62 | // Swap out the coordinator modules with a mock for testing 63 | mockModule := &helpers.MockModule{} 64 | mockModule.On("Start").Return(nil) 65 | mockModule.On("Stop").Return(nil) 66 | coordinator.modules["test"] = mockModule 67 | 68 | coordinator.Start() 69 | mockModule.AssertCalled(t, "Start") 70 | 71 | coordinator.Stop() 72 | mockModule.AssertCalled(t, "Stop") 73 | } 74 | -------------------------------------------------------------------------------- /core/internal/consumer/coordinator.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | // Package consumer - Kafka consumer subsystem. 11 | // The consumer subsystem is responsible for getting consumer offset information and sending that information to the 12 | // storage subsystem. This consumer information could be stored in a variety of places, and each module supports a 13 | // different type of repository. 14 | // 15 | // # Modules 16 | // 17 | // Currently, the following modules are provided: 18 | // 19 | // * kafka - Consume a Kafka cluster's __consumer_offsets topic to get consumer information (new consumer) 20 | // 21 | // * kafka_zk - Parse the /consumers tree of a Kafka cluster's metadata to get consumer information (old consumer) 22 | package consumer 23 | 24 | import ( 25 | "errors" 26 | 27 | "github.com/spf13/viper" 28 | "go.uber.org/zap" 29 | 30 | "github.com/linkedin/Burrow/core/internal/helpers" 31 | "github.com/linkedin/Burrow/core/protocol" 32 | ) 33 | 34 | // The consumer module is responsible for fetching information about consumer group status from some external system 35 | // and forwarding it to the storage module. Each consumer module is associated with a single cluster. 36 | 37 | // Coordinator manages all consumer modules, making sure they are configured, started, and stopped at the appropriate 38 | // time. 39 | type Coordinator struct { 40 | // App is a pointer to the application context. This stores the channel to the storage subsystem 41 | App *protocol.ApplicationContext 42 | 43 | // Log is a logger that has been configured for this module to use. Normally, this means it has been set up with 44 | // fields that are appropriate to identify this coordinator 45 | Log *zap.Logger 46 | 47 | modules map[string]protocol.Module 48 | } 49 | 50 | // getModuleForClass returns the correct module based on the passed className. As part of the Configure steps, if there 51 | // is any error, it will panic with an appropriate message describing the problem. 52 | func getModuleForClass(app *protocol.ApplicationContext, moduleName, className string) protocol.Module { 53 | logger := app.Logger.With( 54 | zap.String("type", "module"), 55 | zap.String("coordinator", "consumer"), 56 | zap.String("class", className), 57 | zap.String("name", moduleName), 58 | ) 59 | 60 | switch className { 61 | case "kafka": 62 | return &KafkaClient{ 63 | App: app, 64 | Log: logger, 65 | } 66 | case "kafka_zk": 67 | return &KafkaZkClient{ 68 | App: app, 69 | Log: logger, 70 | } 71 | default: 72 | panic("Unknown consumer className provided: " + className) 73 | } 74 | } 75 | 76 | // Configure is called to create each of the configured consumer modules and call their Configure funcs to validate 77 | // their individual configurations and set them up. If there are any problems, it is expected that these funcs will 78 | // panic with a descriptive error message, as configuration failures are not recoverable errors. 79 | func (cc *Coordinator) Configure() { 80 | cc.Log.Info("configuring") 81 | 82 | cc.modules = make(map[string]protocol.Module) 83 | 84 | // Create all configured cluster modules, add to list of clusters 85 | modules := viper.GetStringMap("consumer") 86 | for name := range modules { 87 | configRoot := "consumer." + name 88 | if !viper.IsSet("cluster." + viper.GetString(configRoot+".cluster")) { 89 | panic("Consumer '" + name + "' references an unknown cluster '" + viper.GetString(configRoot+".cluster") + "'") 90 | } 91 | module := getModuleForClass(cc.App, name, viper.GetString(configRoot+".class-name")) 92 | module.Configure(name, configRoot) 93 | cc.modules[name] = module 94 | } 95 | } 96 | 97 | // Start calls each of the configured consumer modules' underlying Start funcs. As the coordinator itself has no ongoing 98 | // work to do, it does not start any other goroutines. If any module Start returns an error, this func stops immediately 99 | // and returns that error to the caller. No further modules will be loaded after that. 100 | func (cc *Coordinator) Start() error { 101 | cc.Log.Info("starting") 102 | 103 | // Start Consumer modules 104 | err := helpers.StartCoordinatorModules(cc.modules) 105 | if err != nil { 106 | return errors.New("Error starting consumer module: " + err.Error()) 107 | } 108 | // All consumers started, Burrow is ready to serve requests 109 | // set the readiness probe 110 | cc.App.AppReady = true 111 | return nil 112 | } 113 | 114 | // Stop calls each of the configured consumer modules' underlying Stop funcs. It is expected that the module Stop will 115 | // not return until the module has been completely stopped. While an error can be returned, this func always returns no 116 | // error, as a failure during stopping is not a critical failure 117 | func (cc *Coordinator) Stop() error { 118 | cc.Log.Info("stopping") 119 | 120 | // The individual consumer modules can choose whether or not to implement a wait in the Stop routine 121 | helpers.StopCoordinatorModules(cc.modules) 122 | return nil 123 | } 124 | -------------------------------------------------------------------------------- /core/internal/consumer/coordinator_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package consumer 11 | 12 | import ( 13 | "testing" 14 | 15 | "github.com/spf13/viper" 16 | "go.uber.org/zap" 17 | 18 | "github.com/stretchr/testify/assert" 19 | 20 | "github.com/linkedin/Burrow/core/internal/helpers" 21 | "github.com/linkedin/Burrow/core/protocol" 22 | ) 23 | 24 | func fixtureCoordinator() *Coordinator { 25 | coordinator := Coordinator{ 26 | Log: zap.NewNop(), 27 | } 28 | coordinator.App = &protocol.ApplicationContext{ 29 | Logger: zap.NewNop(), 30 | StorageChannel: make(chan *protocol.StorageRequest), 31 | } 32 | 33 | viper.Reset() 34 | viper.Set("client-profile..client-id", "testid") 35 | viper.Set("cluster.test.class-name", "kafka") 36 | viper.Set("cluster.test.servers", []string{"broker1.example.com:1234"}) 37 | viper.Set("consumer.test.class-name", "kafka") 38 | viper.Set("consumer.test.servers", []string{"broker1.example.com:1234"}) 39 | viper.Set("consumer.test.cluster", "test") 40 | 41 | return &coordinator 42 | } 43 | 44 | func TestCoordinator_ImplementsCoordinator(t *testing.T) { 45 | assert.Implements(t, (*protocol.Coordinator)(nil), new(Coordinator)) 46 | } 47 | 48 | func TestCoordinator_Configure(t *testing.T) { 49 | coordinator := fixtureCoordinator() 50 | coordinator.Configure() 51 | 52 | assert.Lenf(t, coordinator.modules, 1, "Expected 1 module configured, not %v", len(coordinator.modules)) 53 | } 54 | 55 | func TestCoordinator_Configure_BadCluster(t *testing.T) { 56 | coordinator := fixtureCoordinator() 57 | viper.Set("consumer.test.cluster", "nocluster") 58 | 59 | assert.Panics(t, coordinator.Configure, "Expected panic") 60 | } 61 | 62 | func TestCoordinator_Configure_TwoModules(t *testing.T) { 63 | coordinator := fixtureCoordinator() 64 | viper.Set("consumer.anothertest.class-name", "kafka") 65 | viper.Set("consumer.anothertest.servers", []string{"broker1.example.com:1234"}) 66 | viper.Set("consumer.anothertest.cluster", "test") 67 | coordinator.Configure() 68 | } 69 | 70 | func TestCoordinator_StartStop(t *testing.T) { 71 | coordinator := fixtureCoordinator() 72 | coordinator.Configure() 73 | 74 | // Swap out the coordinator modules with a mock for testing 75 | mockModule := &helpers.MockModule{} 76 | mockModule.On("Start").Return(nil) 77 | mockModule.On("Stop").Return(nil) 78 | coordinator.modules["test"] = mockModule 79 | 80 | coordinator.Start() 81 | mockModule.AssertCalled(t, "Start") 82 | 83 | coordinator.Stop() 84 | mockModule.AssertCalled(t, "Stop") 85 | } 86 | -------------------------------------------------------------------------------- /core/internal/doc.go: -------------------------------------------------------------------------------- 1 | // Package internal - Here be dragons. 2 | // The internal package contains the bulk of Burrow's logic, including all of the coordinators and modules that have 3 | // been defined. This documentation is targeted at developers of Burrow modules to get more information about the 4 | // internal structure and how the modules fit together. It is not designed for end users, and for the most part will not 5 | // be useful. 6 | package internal 7 | -------------------------------------------------------------------------------- /core/internal/evaluator/coordinator.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | // Package evaluator - Group evaluation subsystem. 11 | // The evaluator subsystem is responsible for fetching group information from the storage subsystem and calculating the 12 | // group's status based on that. It responds to EvaluatorRequest objects that are send via a channel, and replies with 13 | // a ConsumerGroupStatus. 14 | // 15 | // # Modules 16 | // 17 | // Currently, only one module is provided: 18 | // 19 | // * caching - Evaluate a consumer group and cache the results in memory for a short period of time 20 | package evaluator 21 | 22 | import ( 23 | "errors" 24 | 25 | "github.com/spf13/viper" 26 | "go.uber.org/zap" 27 | 28 | "github.com/linkedin/Burrow/core/internal/helpers" 29 | "github.com/linkedin/Burrow/core/protocol" 30 | ) 31 | 32 | // Module is responsible for answering requests to evaluate the status of a consumer group. It fetches offset 33 | // information from the storage subsystem and transforms that into a protocol.ConsumerGroupStatus response. It conforms 34 | // to the overall protocol.Module interface, but it adds a func to fetch the channel that the module is listening on for 35 | // requests, so that requests can be forwarded to it by the coordinator 36 | type Module interface { 37 | protocol.Module 38 | GetCommunicationChannel() chan *protocol.EvaluatorRequest 39 | } 40 | 41 | // Coordinator manages a single evaluator module (only one module is supported at this time), making sure it is 42 | // configured, started, and stopped at the appropriate time. It is also responsible for listening to the 43 | // EvaluatorChannel that is provided in the application context and forwarding those requests to the evaluator module. 44 | // If no evaluator module has been configured explicitly, the coordinator starts the caching module as a default. 45 | type Coordinator struct { 46 | // App is a pointer to the application context. This stores the channel to the storage subsystem 47 | App *protocol.ApplicationContext 48 | 49 | // Log is a logger that has been configured for this module to use. Normally, this means it has been set up with 50 | // fields that are appropriate to identify this coordinator 51 | Log *zap.Logger 52 | 53 | quitChannel chan struct{} 54 | modules map[string]protocol.Module 55 | } 56 | 57 | // getModuleForClass returns the correct module based on the passed className. As part of the Configure steps, if there 58 | // is any error, it will panic with an appropriate message describing the problem. 59 | func getModuleForClass(app *protocol.ApplicationContext, moduleName, className string) protocol.Module { 60 | switch className { 61 | case "caching": 62 | return &CachingEvaluator{ 63 | App: app, 64 | Log: app.Logger.With( 65 | zap.String("type", "module"), 66 | zap.String("coordinator", "evaluator"), 67 | zap.String("class", className), 68 | zap.String("name", moduleName), 69 | ), 70 | } 71 | default: 72 | panic("Unknown evaluator className provided: " + className) 73 | } 74 | } 75 | 76 | // Configure is called to create the configured evaluator module and call its Configure func to validate the 77 | // configuration and set it up. The coordinator will panic is more than one module is configured, and if no modules have 78 | // been configured, it will set up a default caching evaluator module. If there are any problems, it is expected that 79 | // this func will panic with a descriptive error message, as configuration failures are not recoverable errors. 80 | func (ec *Coordinator) Configure() { 81 | ec.Log.Info("configuring") 82 | 83 | ec.quitChannel = make(chan struct{}) 84 | ec.modules = make(map[string]protocol.Module) 85 | 86 | modules := viper.GetStringMap("evaluator") 87 | switch len(modules) { 88 | case 0: 89 | // Create a default module 90 | viper.Set("evaluator.default.class-name", "caching") 91 | modules = viper.GetStringMap("evaluator") 92 | case 1: 93 | // Have one module. Just continue 94 | break 95 | default: 96 | panic("Only one evaluator module must be configured") 97 | } 98 | 99 | // Create all configured evaluator modules, add to list of evaluators 100 | for name := range modules { 101 | configRoot := "evaluator." + name 102 | module := getModuleForClass(ec.App, name, viper.GetString(configRoot+".class-name")) 103 | module.Configure(name, configRoot) 104 | ec.modules[name] = module 105 | } 106 | } 107 | 108 | // Start calls the evaluator module's underlying Start func. If the module Start returns an error, this func stops 109 | // immediately and returns that error to the caller. 110 | // 111 | // We also start a request forwarder goroutine. This listens to the EvaluatorChannel that is provided in the application 112 | // context that all modules receive, and forwards those requests to the evaluator modules. At the present time, the 113 | // evaluator only supports one module, so this is a simple "accept and forward". 114 | func (ec *Coordinator) Start() error { 115 | ec.Log.Info("starting") 116 | 117 | // Start Evaluator modules 118 | err := helpers.StartCoordinatorModules(ec.modules) 119 | if err != nil { 120 | return errors.New("Error starting evaluator module: " + err.Error()) 121 | } 122 | 123 | // Start request forwarder 124 | go func() { 125 | // We only support 1 module right now, so only send to that module 126 | var channel chan *protocol.EvaluatorRequest 127 | for _, module := range ec.modules { 128 | channel = module.(Module).GetCommunicationChannel() 129 | } 130 | 131 | for { 132 | select { 133 | case request := <-ec.App.EvaluatorChannel: 134 | // Yes, this forwarder is silly. However, in the future we want to support multiple evaluator modules 135 | // concurrently. However, that will require implementing a router that properly handles requests and 136 | // makes sure that only 1 evaluator responds 137 | channel <- request 138 | case <-ec.quitChannel: 139 | return 140 | } 141 | } 142 | }() 143 | 144 | return nil 145 | } 146 | 147 | // Stop calls the configured evaluator module's underlying Stop func. It is expected that the module Stop will not 148 | // return until the module has been completely stopped. While an error can be returned, this func always returns no 149 | // error, as a failure during stopping is not a critical failure 150 | func (ec *Coordinator) Stop() error { 151 | ec.Log.Info("stopping") 152 | 153 | close(ec.quitChannel) 154 | 155 | // The individual storage modules can choose whether or not to implement a wait in the Stop routine 156 | helpers.StopCoordinatorModules(ec.modules) 157 | return nil 158 | } 159 | -------------------------------------------------------------------------------- /core/internal/evaluator/coordinator_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package evaluator 11 | 12 | import ( 13 | "testing" 14 | 15 | "github.com/spf13/viper" 16 | "github.com/stretchr/testify/assert" 17 | "go.uber.org/zap" 18 | 19 | "github.com/linkedin/Burrow/core/protocol" 20 | ) 21 | 22 | func fixtureCoordinator() *Coordinator { 23 | coordinator := Coordinator{ 24 | Log: zap.NewNop(), 25 | } 26 | coordinator.App = &protocol.ApplicationContext{ 27 | Logger: zap.NewNop(), 28 | StorageChannel: make(chan *protocol.StorageRequest), 29 | } 30 | 31 | viper.Reset() 32 | viper.Set("evaluator.test.class-name", "caching") 33 | viper.Set("evaluator.test.expire-cache", 30) 34 | viper.Set("cluster.test.class-name", "kafka") 35 | viper.Set("cluster.test.servers", []string{"broker1.example.com:1234"}) 36 | 37 | return &coordinator 38 | } 39 | 40 | func TestCoordinator_ImplementsCoordinator(t *testing.T) { 41 | assert.Implements(t, (*protocol.Coordinator)(nil), new(Coordinator)) 42 | } 43 | 44 | func TestCoordinator_Configure(t *testing.T) { 45 | coordinator := fixtureCoordinator() 46 | coordinator.Configure() 47 | 48 | assert.Lenf(t, coordinator.modules, 1, "Expected 1 module configured, not %v", len(coordinator.modules)) 49 | } 50 | 51 | func TestCoordinator_Configure_NoModules(t *testing.T) { 52 | coordinator := fixtureCoordinator() 53 | viper.Reset() 54 | viper.Set("cluster.test.class-name", "kafka") 55 | viper.Set("cluster.test.servers", []string{"broker1.example.com:1234"}) 56 | 57 | coordinator.Configure() 58 | assert.Lenf(t, coordinator.modules, 1, "Expected 1 module configured, not %v", len(coordinator.modules)) 59 | } 60 | 61 | func TestCoordinator_Configure_TwoModules(t *testing.T) { 62 | coordinator := fixtureCoordinator() 63 | viper.Set("evaluator.anothertest.class-name", "caching") 64 | viper.Set("evaluator.anothertest.expire-cache", 30) 65 | 66 | assert.Panics(t, coordinator.Configure, "Expected panic") 67 | } 68 | 69 | func TestCoordinator_Start(t *testing.T) { 70 | evaluatorCoordinator, storageCoordinator := StorageAndEvaluatorCoordinatorsWithOffsets() 71 | 72 | // Best is to test a request that we know the response to 73 | request := &protocol.EvaluatorRequest{ 74 | Reply: make(chan *protocol.ConsumerGroupStatus), 75 | Cluster: "testcluster", 76 | Group: "testgroup", 77 | ShowAll: true, 78 | } 79 | evaluatorCoordinator.App.EvaluatorChannel <- request 80 | response := <-request.Reply 81 | 82 | assert.Equalf(t, protocol.StatusOK, response.Status, "Expected status to be OK, not %v", response.Status.String()) 83 | assert.Equalf(t, float32(1.0), response.Complete, "Expected complete to be 1.0, not %v", response.Complete) 84 | assert.Equalf(t, 1, response.TotalPartitions, "Expected total_partitions to be 1, not %v", response.TotalPartitions) 85 | assert.Equalf(t, uint64(2421), response.TotalLag, "Expected total_lag to be 2421, not %v", response.TotalLag) 86 | assert.Equalf(t, "testcluster", response.Cluster, "Expected cluster to be testcluster, not %v", response.Cluster) 87 | assert.Equalf(t, "testgroup", response.Group, "Expected group to be testgroup, not %v", response.Group) 88 | assert.Lenf(t, response.Partitions, 1, "Expected 1 partition status objects, not %v", len(response.Partitions)) 89 | 90 | evaluatorCoordinator.Stop() 91 | storageCoordinator.Stop() 92 | } 93 | 94 | func TestCoordinator_MultipleRequests(t *testing.T) { 95 | evaluatorCoordinator, storageCoordinator := StorageAndEvaluatorCoordinatorsWithOffsets() 96 | 97 | // This test is really just to check and make sure the evaluator can handle multiple requests without deadlock 98 | for i := 0; i < 10; i++ { 99 | request := &protocol.EvaluatorRequest{ 100 | Reply: make(chan *protocol.ConsumerGroupStatus), 101 | Cluster: "testcluster", 102 | Group: "testgroup", 103 | ShowAll: true, 104 | } 105 | evaluatorCoordinator.App.EvaluatorChannel <- request 106 | response := <-request.Reply 107 | 108 | assert.Equalf(t, protocol.StatusOK, response.Status, "Expected status to be OK, not %v", response.Status.String()) 109 | } 110 | // Best is to test a request that we know the response to 111 | 112 | evaluatorCoordinator.Stop() 113 | storageCoordinator.Stop() 114 | } 115 | -------------------------------------------------------------------------------- /core/internal/evaluator/fixtures.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package evaluator 11 | 12 | import ( 13 | "github.com/spf13/viper" 14 | "go.uber.org/zap" 15 | 16 | "github.com/linkedin/Burrow/core/internal/storage" 17 | "github.com/linkedin/Burrow/core/protocol" 18 | ) 19 | 20 | // StorageAndEvaluatorCoordinatorsWithOffsets sets up a Coordinator with a single caching module defined. In order to do 21 | // this, it also calls the storage subsystem fixture to get a configured storage.Coordinator with offsets for a test 22 | // cluster and group. This func should never be called in normal code. It is only provided to facilitate testing by 23 | // other subsystems. 24 | func StorageAndEvaluatorCoordinatorsWithOffsets() (*Coordinator, *storage.Coordinator) { 25 | storageCoordinator := storage.CoordinatorWithOffsets() 26 | 27 | evaluatorCoordinator := Coordinator{ 28 | Log: zap.NewNop(), 29 | } 30 | evaluatorCoordinator.App = storageCoordinator.App 31 | evaluatorCoordinator.App.EvaluatorChannel = make(chan *protocol.EvaluatorRequest) 32 | 33 | viper.Set("evaluator.test.class-name", "caching") 34 | viper.Set("evaluator.test.expire-cache", 30) 35 | 36 | evaluatorCoordinator.Configure() 37 | evaluatorCoordinator.Start() 38 | 39 | return &evaluatorCoordinator, storageCoordinator 40 | } 41 | -------------------------------------------------------------------------------- /core/internal/helpers/coordinators.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | // Package helpers - Common utilities. 11 | // The helpers subsystem provides common utilities that can be used by all subsystems. This includes utilities for 12 | // coordinators to start and stop modules, as well as Kafka and Zookeeper client implementations. There are also a 13 | // number of mocks that are provided for testing purposes only, and should not be used in normal code. 14 | package helpers 15 | 16 | import ( 17 | "regexp" 18 | "time" 19 | 20 | "github.com/stretchr/testify/mock" 21 | "go.uber.org/zap" 22 | 23 | "github.com/linkedin/Burrow/core/protocol" 24 | ) 25 | 26 | // StartCoordinatorModules is a helper func for coordinators to start a list of modules. Given a map of protocol.Module, 27 | // it calls the Start func on each one. If any module returns an error, it immediately stops and returns that error 28 | func StartCoordinatorModules(modules map[string]protocol.Module) error { 29 | // Start all the modules, returning an error if any fail to start 30 | for _, module := range modules { 31 | err := module.Start() 32 | if err != nil { 33 | return err 34 | } 35 | } 36 | return nil 37 | } 38 | 39 | // StopCoordinatorModules is a helper func for coordinators to stop a list of modules. Given a map of protocol.Module, 40 | // it calls the Stop func on each one. Any errors that are returned are ignored. 41 | func StopCoordinatorModules(modules map[string]protocol.Module) { 42 | // Stop all the modules passed in 43 | for _, module := range modules { 44 | module.Stop() 45 | } 46 | } 47 | 48 | // MockModule is a mock of protocol.Module that also satisfies the various subsystem Module variants, and is used in 49 | // tests. It should never be used in the normal code. 50 | type MockModule struct { 51 | mock.Mock 52 | } 53 | 54 | // Configure mocks the protocol.Module Configure func 55 | func (m *MockModule) Configure(name, configRoot string) { 56 | m.Called(name, configRoot) 57 | } 58 | 59 | // Start mocks the protocol.Module Start func 60 | func (m *MockModule) Start() error { 61 | args := m.Called() 62 | return args.Error(0) 63 | } 64 | 65 | // Stop mocks the protocol.Module Stop func 66 | func (m *MockModule) Stop() error { 67 | args := m.Called() 68 | return args.Error(0) 69 | } 70 | 71 | // GetName mocks the notifier.Module GetName func 72 | func (m *MockModule) GetName() string { 73 | args := m.Called() 74 | return args.String(0) 75 | } 76 | 77 | // GetGroupAllowlist mocks the notifier.Module GetGroupAllowlist func 78 | func (m *MockModule) GetGroupAllowlist() *regexp.Regexp { 79 | args := m.Called() 80 | return args.Get(0).(*regexp.Regexp) 81 | } 82 | 83 | // GetGroupDenylist mocks the notifier.Module GetGroupDenylist func 84 | func (m *MockModule) GetGroupDenylist() *regexp.Regexp { 85 | args := m.Called() 86 | return args.Get(0).(*regexp.Regexp) 87 | } 88 | 89 | // GetLogger mocks the notifier.Module GetLogger func 90 | func (m *MockModule) GetLogger() *zap.Logger { 91 | args := m.Called() 92 | return args.Get(0).(*zap.Logger) 93 | } 94 | 95 | // AcceptConsumerGroup mocks the notifier.Module AcceptConsumerGroup func 96 | func (m *MockModule) AcceptConsumerGroup(status *protocol.ConsumerGroupStatus) bool { 97 | args := m.Called(status) 98 | return args.Bool(0) 99 | } 100 | 101 | // Notify mocks the notifier.Module Notify func 102 | func (m *MockModule) Notify(status *protocol.ConsumerGroupStatus, eventID string, startTime time.Time, stateGood bool) { 103 | m.Called(status, eventID, startTime, stateGood) 104 | } 105 | -------------------------------------------------------------------------------- /core/internal/helpers/coordinators_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package helpers 11 | 12 | import ( 13 | "errors" 14 | "testing" 15 | 16 | "github.com/stretchr/testify/assert" 17 | 18 | "github.com/linkedin/Burrow/core/protocol" 19 | ) 20 | 21 | func TestStartCoordinatorModules(t *testing.T) { 22 | mock1 := &MockModule{} 23 | mock2 := &MockModule{} 24 | modules := map[string]protocol.Module{ 25 | "mock1": mock1, 26 | "mock2": mock2, 27 | } 28 | 29 | mock1.On("Start").Return(nil) 30 | mock2.On("Start").Return(nil) 31 | err := StartCoordinatorModules(modules) 32 | 33 | assert.Nil(t, err, "Expected error to be nil") 34 | mock1.AssertExpectations(t) 35 | mock2.AssertExpectations(t) 36 | } 37 | 38 | func TestStartCoordinatorModules_Error(t *testing.T) { 39 | mock1 := &MockModule{} 40 | mock2 := &MockModule{} 41 | modules := map[string]protocol.Module{ 42 | "mock1": mock1, 43 | "mock2": mock2, 44 | } 45 | 46 | mock1.On("Start").Return(nil) 47 | mock2.On("Start").Return(errors.New("bad start")) 48 | err := StartCoordinatorModules(modules) 49 | 50 | assert.NotNil(t, err, "Expected error to be nil") 51 | // Can't assert expectations, as it's possible that mock1 won't be called due to non-deterministic ordering of range 52 | } 53 | 54 | func TestStopCoordinatorModules(t *testing.T) { 55 | mock1 := &MockModule{} 56 | mock2 := &MockModule{} 57 | modules := map[string]protocol.Module{ 58 | "mock1": mock1, 59 | "mock2": mock2, 60 | } 61 | 62 | mock1.On("Stop").Return(nil) 63 | mock2.On("Stop").Return(nil) 64 | StopCoordinatorModules(modules) 65 | 66 | mock1.AssertExpectations(t) 67 | mock2.AssertExpectations(t) 68 | } 69 | -------------------------------------------------------------------------------- /core/internal/helpers/sarama_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package helpers 11 | 12 | import ( 13 | "testing" 14 | 15 | "go.uber.org/zap" 16 | "go.uber.org/zap/zapcore" 17 | 18 | "github.com/IBM/sarama" 19 | "github.com/stretchr/testify/assert" 20 | ) 21 | 22 | func TestBurrowSaramaClient_ImplementsSaramaClient(t *testing.T) { 23 | assert.Implements(t, (*SaramaClient)(nil), new(BurrowSaramaClient)) 24 | } 25 | 26 | func TestMockSaramaClient_ImplementsSaramaClient(t *testing.T) { 27 | assert.Implements(t, (*SaramaClient)(nil), new(MockSaramaClient)) 28 | } 29 | 30 | func TestBurrowSaramaBroker_ImplementsSaramaBroker(t *testing.T) { 31 | assert.Implements(t, (*SaramaBroker)(nil), new(BurrowSaramaBroker)) 32 | } 33 | 34 | func TestMockSaramaBroker_ImplementsSaramaBroker(t *testing.T) { 35 | assert.Implements(t, (*SaramaBroker)(nil), new(MockSaramaBroker)) 36 | } 37 | 38 | func TestMockSaramaConsumer_ImplementsSaramaConsumer(t *testing.T) { 39 | assert.Implements(t, (*sarama.Consumer)(nil), new(MockSaramaConsumer)) 40 | } 41 | 42 | func TestMockSaramaPartitionConsumer_ImplementsSaramaPartitionConsumer(t *testing.T) { 43 | assert.Implements(t, (*sarama.PartitionConsumer)(nil), new(MockSaramaPartitionConsumer)) 44 | } 45 | 46 | func TestInitSaramaLogging(t *testing.T) { 47 | // given 48 | var entries = make([]zapcore.Entry, 0) 49 | d, _ := zap.NewDevelopment() 50 | logger := zap.New(zapcore.RegisterHooks(d.Core(), func(entry zapcore.Entry) error { 51 | entries = append(entries, entry) 52 | return nil 53 | })) 54 | InitSaramaLogging(logger) 55 | 56 | // when 57 | sarama.Logger.Printf("hello") 58 | 59 | // then 60 | assert.Len(t, entries, 1) 61 | assert.Equal(t, entries[0].Message, "hello") 62 | assert.Equal(t, entries[0].Level, zap.DebugLevel) 63 | } 64 | 65 | func shouldPanicForVersion(t *testing.T, v string) { 66 | defer func() { recover() }() 67 | out := parseKafkaVersion(v) 68 | t.Errorf("Kafka version %s should have panicked, but got: %s", v, out.String()) 69 | } 70 | 71 | func TestVersionMapping(t *testing.T) { 72 | assert.Equal(t, parseKafkaVersion(""), sarama.V0_10_2_0) 73 | assert.Equal(t, parseKafkaVersion("0.10"), sarama.V0_10_0_0) 74 | assert.Equal(t, parseKafkaVersion("0.10.2"), sarama.V0_10_2_0) 75 | assert.Equal(t, parseKafkaVersion("0.10.2.0"), sarama.V0_10_2_0) 76 | // some other legacy cases 77 | assert.Equal(t, parseKafkaVersion("0.8.0"), sarama.V0_8_2_0) 78 | assert.Equal(t, parseKafkaVersion("0.8.1"), sarama.V0_8_2_1) 79 | assert.Equal(t, parseKafkaVersion("0.8.2"), sarama.V0_8_2_2) 80 | // and older versions that want to use the 4-part version 81 | assert.Equal(t, parseKafkaVersion("0.8.2.2"), sarama.V0_8_2_2) 82 | assert.Equal(t, parseKafkaVersion("0.10.2.1"), sarama.V0_10_2_1) 83 | assert.Equal(t, parseKafkaVersion("0.11.0.1"), sarama.V0_11_0_1) 84 | // check some of the newer versions 85 | assert.Equal(t, parseKafkaVersion("1.0.0"), sarama.V1_0_0_0) 86 | assert.Equal(t, parseKafkaVersion("1.0.2"), sarama.V1_0_2_0) 87 | assert.Equal(t, parseKafkaVersion("2.1.0"), sarama.V2_1_0_0) 88 | assert.Equal(t, parseKafkaVersion("2.2.0"), sarama.V2_2_0_0) 89 | assert.Equal(t, parseKafkaVersion("3.0.0"), sarama.V3_0_0_0) 90 | 91 | // check that we fail a 4-part version for newer versions 92 | shouldPanicForVersion(t, "3.0.0.0") 93 | // or for other unknown/unsupported versions 94 | shouldPanicForVersion(t, "foo") 95 | } 96 | -------------------------------------------------------------------------------- /core/internal/helpers/scram.go: -------------------------------------------------------------------------------- 1 | package helpers 2 | 3 | import ( 4 | "crypto/sha256" 5 | "crypto/sha512" 6 | 7 | "github.com/xdg/scram" 8 | ) 9 | 10 | var SHA256 scram.HashGeneratorFcn = sha256.New 11 | var SHA512 scram.HashGeneratorFcn = sha512.New 12 | 13 | type XDGSCRAMClient struct { 14 | *scram.Client 15 | *scram.ClientConversation 16 | scram.HashGeneratorFcn 17 | } 18 | 19 | func (x *XDGSCRAMClient) Begin(userName, password, authzID string) (err error) { 20 | x.Client, err = x.HashGeneratorFcn.NewClient(userName, password, authzID) 21 | if err != nil { 22 | return err 23 | } 24 | x.ClientConversation = x.Client.NewConversation() 25 | return nil 26 | } 27 | 28 | func (x *XDGSCRAMClient) Step(challenge string) (response string, err error) { 29 | response, err = x.ClientConversation.Step(challenge) 30 | return 31 | } 32 | 33 | func (x *XDGSCRAMClient) Done() bool { 34 | return x.ClientConversation.Done() 35 | } 36 | -------------------------------------------------------------------------------- /core/internal/helpers/storage.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package helpers 11 | 12 | import ( 13 | "time" 14 | 15 | "github.com/linkedin/Burrow/core/protocol" 16 | ) 17 | 18 | // TimeoutSendStorageRequest is a helper func for sending a protocol.StorageRequest to a channel with a timeout, 19 | // specified in seconds. If the request is sent, return true. Otherwise, if the timeout is hit, return false. 20 | func TimeoutSendStorageRequest(storageChannel chan *protocol.StorageRequest, request *protocol.StorageRequest, maxTime int) bool { 21 | timeout := time.After(time.Duration(maxTime) * time.Second) 22 | select { 23 | case storageChannel <- request: 24 | return true 25 | case <-timeout: 26 | return false 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /core/internal/helpers/storage_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package helpers 11 | 12 | import ( 13 | "testing" 14 | "time" 15 | 16 | "github.com/stretchr/testify/assert" 17 | 18 | "github.com/linkedin/Burrow/core/protocol" 19 | ) 20 | 21 | func TestTimeoutSendStorageRequest(t *testing.T) { 22 | storageChannel := make(chan *protocol.StorageRequest) 23 | storageRequest := &protocol.StorageRequest{} 24 | 25 | go TimeoutSendStorageRequest(storageChannel, storageRequest, 1) 26 | 27 | // Sleep for 0.5 seconds before reading. There should be a storage request waiting 28 | time.Sleep(500 * time.Millisecond) 29 | readRequest := <-storageChannel 30 | 31 | assert.Equal(t, storageRequest, readRequest, "Expected to receive the same storage request") 32 | } 33 | 34 | func TestTimeoutSendStorageRequest_Timeout(t *testing.T) { 35 | storageChannel := make(chan *protocol.StorageRequest) 36 | storageRequest := &protocol.StorageRequest{} 37 | 38 | go TimeoutSendStorageRequest(storageChannel, storageRequest, 1) 39 | 40 | // Sleep for 1.5 seconds before reading. There should be nothing waiting 41 | time.Sleep(1500 * time.Millisecond) 42 | 43 | select { 44 | case <-storageChannel: 45 | assert.Fail(t, "Expected to not receive storage request after timeout") 46 | default: 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /core/internal/helpers/time.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package helpers 11 | 12 | import ( 13 | "time" 14 | 15 | "github.com/stretchr/testify/mock" 16 | ) 17 | 18 | // Ticker is a generic interface for a channel that delivers `ticks' of a clock at intervals. 19 | type Ticker interface { 20 | // Start sending ticks over the channel 21 | Start() 22 | 23 | // Stop sending ticks over the channel 24 | Stop() 25 | 26 | // Return the channel that ticks will be sent over 27 | GetChannel() <-chan time.Time 28 | } 29 | 30 | // PausableTicker is an implementation of Ticker which can be stopped and restarted without changing the underlying 31 | // channel. This is useful for cases where you may need to stop performing actions for a while (such as sending 32 | // notifications), but you do not want to tear down everything. 33 | type PausableTicker struct { 34 | channel chan time.Time 35 | duration time.Duration 36 | ticker *time.Ticker 37 | quitChannel chan struct{} 38 | } 39 | 40 | // NewPausableTicker returns a Ticker that has not yet been started, but the channel is ready to use. This ticker can be 41 | // started and stopped multiple times without needing to swap the ticker channel 42 | func NewPausableTicker(d time.Duration) Ticker { 43 | return &PausableTicker{ 44 | channel: make(chan time.Time), 45 | duration: d, 46 | ticker: nil, 47 | } 48 | } 49 | 50 | // Start begins sending ticks over the channel at the interval that has already been configured. If the ticker is 51 | // already sending ticks, this func has no effect. 52 | func (ticker *PausableTicker) Start() { 53 | if ticker.ticker != nil { 54 | // Don't restart a ticker that's already running 55 | return 56 | } 57 | 58 | // Channel to be able to close the goroutine 59 | ticker.quitChannel = make(chan struct{}) 60 | 61 | // Start the ticker 62 | ticker.ticker = time.NewTicker(ticker.duration) 63 | 64 | // This goroutine will forward the ticker ticks to our exposed channel 65 | go func(tickerChan <-chan time.Time, quitChan chan struct{}) { 66 | for { 67 | select { 68 | case tick := <-tickerChan: 69 | ticker.channel <- tick 70 | case <-quitChan: 71 | return 72 | } 73 | } 74 | }(ticker.ticker.C, ticker.quitChannel) 75 | } 76 | 77 | // Stop stops ticks from being sent over the channel. If the ticker is not currently sending ticks, this func has no 78 | // effect 79 | func (ticker *PausableTicker) Stop() { 80 | if ticker.ticker == nil { 81 | // Don't stop an already stopped ticker 82 | return 83 | } 84 | 85 | // Stop the underlying ticker 86 | ticker.ticker.Stop() 87 | ticker.ticker = nil 88 | 89 | // Tell our goroutine to quit 90 | close(ticker.quitChannel) 91 | } 92 | 93 | // GetChannel returns the channel over which ticks will be sent. This channel can be used over multiple Start/Stop 94 | // cycles, and will not be closed. 95 | func (ticker *PausableTicker) GetChannel() <-chan time.Time { 96 | return ticker.channel 97 | } 98 | 99 | // MockTicker is a mock Ticker interface that can be used for testing. It should not be used in normal code. 100 | type MockTicker struct { 101 | mock.Mock 102 | } 103 | 104 | // Start mocks Ticker.Start 105 | func (m *MockTicker) Start() { 106 | m.Called() 107 | } 108 | 109 | // Stop mocks Ticker.Stop 110 | func (m *MockTicker) Stop() { 111 | m.Called() 112 | } 113 | 114 | // GetChannel mocks Ticker.GetChannel 115 | func (m *MockTicker) GetChannel() <-chan time.Time { 116 | args := m.Called() 117 | return args.Get(0).(<-chan time.Time) 118 | } 119 | -------------------------------------------------------------------------------- /core/internal/helpers/time_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package helpers 11 | 12 | import ( 13 | "sync" 14 | "testing" 15 | "time" 16 | 17 | "github.com/stretchr/testify/assert" 18 | ) 19 | 20 | func TestPausableTicker_ImplementsTicker(t *testing.T) { 21 | assert.Implements(t, (*Ticker)(nil), new(PausableTicker)) 22 | } 23 | 24 | func TestPausableTicker_New(t *testing.T) { 25 | ticker := NewPausableTicker(5 * time.Millisecond) 26 | assert.Implements(t, (*Ticker)(nil), ticker) 27 | 28 | // We shouldn't get any events across the channel 29 | quitChan := make(chan struct{}) 30 | channel := ticker.GetChannel() 31 | go func() { 32 | select { 33 | case <-channel: 34 | assert.Fail(t, "Expected to receive no event on ticker channel") 35 | case <-quitChan: 36 | break 37 | } 38 | }() 39 | 40 | time.Sleep(25 * time.Millisecond) 41 | close(quitChan) 42 | } 43 | 44 | func TestPausableTicker_StartStop(t *testing.T) { 45 | ticker := NewPausableTicker(20 * time.Millisecond) 46 | ticker.Start() 47 | 48 | numEvents := 0 49 | quitChan := make(chan struct{}) 50 | channel := ticker.GetChannel() 51 | wg := sync.WaitGroup{} 52 | wg.Add(1) 53 | go func() { 54 | defer wg.Done() 55 | for { 56 | select { 57 | case <-channel: 58 | numEvents++ 59 | case <-quitChan: 60 | return 61 | } 62 | } 63 | }() 64 | 65 | time.Sleep(50 * time.Millisecond) 66 | ticker.Stop() 67 | time.Sleep(50 * time.Millisecond) 68 | close(quitChan) 69 | wg.Wait() 70 | 71 | assert.Equalf(t, 2, numEvents, "Expected 2 events, not %v", numEvents) 72 | } 73 | 74 | func TestPausableTicker_Restart(t *testing.T) { 75 | ticker := NewPausableTicker(20 * time.Millisecond) 76 | ticker.Start() 77 | 78 | numEvents := 0 79 | quitChan := make(chan struct{}) 80 | channel := ticker.GetChannel() 81 | wg := sync.WaitGroup{} 82 | wg.Add(1) 83 | go func() { 84 | defer wg.Done() 85 | for { 86 | select { 87 | case <-channel: 88 | numEvents++ 89 | case <-quitChan: 90 | return 91 | } 92 | } 93 | }() 94 | 95 | time.Sleep(50 * time.Millisecond) 96 | ticker.Stop() 97 | time.Sleep(50 * time.Millisecond) 98 | ticker.Start() 99 | time.Sleep(50 * time.Millisecond) 100 | ticker.Stop() 101 | close(quitChan) 102 | wg.Wait() 103 | 104 | assert.Equalf(t, 4, numEvents, "Expected 4 events, not %v", numEvents) 105 | } 106 | -------------------------------------------------------------------------------- /core/internal/helpers/validation.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package helpers 11 | 12 | import ( 13 | "net" 14 | "net/url" 15 | "regexp" 16 | "strconv" 17 | "strings" 18 | ) 19 | 20 | // ValidateIP returns true if the provided string can be parsed as an IP address (either IPv4 or IPv6). 21 | func ValidateIP(ipaddr string) bool { 22 | addr := net.ParseIP(ipaddr) 23 | return addr != nil 24 | } 25 | 26 | // ValidateHostname returns true if the provided string can be parsed as a hostname. In general this means: 27 | // 28 | // * One or more segments delimited by a '.' 29 | // * Each segment can be no more than 63 characters long 30 | // * Valid characters in a segment are letters, numbers, and dashes 31 | // * Segments may not start or end with a dash 32 | // * The exception is IPv6 addresses, which are also permitted. 33 | // * An underscore is allowed to support Docker Swarm service names. 34 | func ValidateHostname(hostname string) bool { 35 | matches, _ := regexp.MatchString(`^([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])(\.([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]{0,61}[a-zA-Z0-9]))*$`, hostname) 36 | 37 | if !matches { 38 | // Try Docker Swarm service name 39 | matchesDocker, _ := regexp.MatchString(`^([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])\_([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])$`, hostname) 40 | if !matchesDocker { 41 | // Try as an IP address 42 | return ValidateIP(hostname) 43 | } 44 | return true 45 | } 46 | 47 | return matches 48 | } 49 | 50 | // ValidateZookeeperPath returns true if the provided string can be parsed as a Zookeeper node path. This means that it 51 | // starts with a forward slash, and contains one or more segments that are separated by slashes (but does not end with 52 | // a slash). 53 | func ValidateZookeeperPath(path string) bool { 54 | parts := strings.Split(path, "/") 55 | if (len(parts) < 2) || (parts[0] != "") { 56 | return false 57 | } 58 | if (len(parts) == 2) && (parts[1] == "") { 59 | // Root node is OK 60 | return true 61 | } 62 | 63 | nodeRegexp := regexp.MustCompile(`^[a-zA-Z0-9_\-][a-zA-Z0-9_\-.]*$`) 64 | for i, node := range parts { 65 | if i == 0 { 66 | continue 67 | } 68 | if !nodeRegexp.MatchString(node) { 69 | return false 70 | } 71 | } 72 | return true 73 | } 74 | 75 | // ValidateTopic returns true if the provided string is a valid topic name, which may only contain letters, numbers, 76 | // underscores, dashes, and periods. 77 | func ValidateTopic(topic string) bool { 78 | matches, _ := regexp.MatchString(`^[a-zA-Z0-9_.-]+$`, topic) 79 | return matches 80 | } 81 | 82 | // ValidateFilename returns true if the provided string is a sane-looking filename (not just a valid filename, which 83 | // could be almost anything). Right now, this is defined to be the same thing as ValidateTopic. 84 | func ValidateFilename(filename string) bool { 85 | return ValidateTopic(filename) 86 | } 87 | 88 | // ValidateEmail returns true if the provided string is an email address. This is a very simplistic validator - the 89 | // string must be of the form (something)@(something).(something) 90 | func ValidateEmail(email string) bool { 91 | matches, _ := regexp.MatchString(`^.+@.+\..+$`, email) 92 | return matches 93 | } 94 | 95 | // ValidateURL returns true if the provided string can be parsed as a URL. We use the net/url Parse func for this. 96 | func ValidateURL(rawURL string) bool { 97 | _, err := url.Parse(rawURL) 98 | return err == nil 99 | } 100 | 101 | // ValidateHostList returns true if the provided slice of strings can all be parsed by ValidateHostPort 102 | func ValidateHostList(hosts []string) bool { 103 | for _, host := range hosts { 104 | if !ValidateHostPort(host, false) { 105 | return false 106 | } 107 | } 108 | 109 | return true 110 | } 111 | 112 | // ValidateHostPort returns true if the provided string is of the form "hostname:port", where hostname is a valid 113 | // hostname or IP address (as parsed by ValidateIP or ValidateHostname), and port is a valid integer. 114 | func ValidateHostPort(host string, allowBlankHost bool) bool { 115 | // Must be hostname:port, ipv4:port, or [ipv6]:port. Optionally allow blank hostname 116 | hostname, portString, err := net.SplitHostPort(host) 117 | if err != nil { 118 | return false 119 | } 120 | 121 | // Validate the port is a numeric (yeah, strings are valid in some places, but we don't support it) 122 | _, err = strconv.Atoi(portString) 123 | if err != nil { 124 | return false 125 | } 126 | 127 | // Listeners can have blank hostnames, so we'll skip validation if that's what we're looking for 128 | if allowBlankHost && hostname == "" { 129 | return true 130 | } 131 | 132 | // Only IPv6 can contain : 133 | if strings.Contains(hostname, ":") && (!ValidateIP(hostname)) { 134 | return false 135 | } 136 | 137 | // If all the parts of the hostname are numbers, validate as IP. Otherwise, it's a hostname 138 | hostnameParts := strings.Split(hostname, ".") 139 | isIP4 := true 140 | for _, section := range hostnameParts { 141 | _, err := strconv.Atoi(section) 142 | if err != nil { 143 | isIP4 = false 144 | break 145 | } 146 | } 147 | if isIP4 { 148 | return ValidateIP(hostname) 149 | } 150 | return ValidateHostname(hostname) 151 | } 152 | -------------------------------------------------------------------------------- /core/internal/helpers/validation_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package helpers 11 | 12 | import ( 13 | "testing" 14 | 15 | "github.com/stretchr/testify/assert" 16 | ) 17 | 18 | type TestSet struct { 19 | TestValue string 20 | Result bool 21 | } 22 | 23 | var testIP = []TestSet{ 24 | {"1.2.3.4", true}, 25 | {"127.0.0.1", true}, 26 | {"204.27.175.1", true}, 27 | {"255.255.255.255", true}, 28 | {"256.1.2.3", false}, 29 | {"1.2.3.4.5", false}, 30 | {"notanip", false}, 31 | {"2001:0db8:0a0b:12f0:0000:0000:0000:0001", true}, 32 | {"2001:db8:a0b:12f0::1", true}, 33 | {"2001:db8::1", true}, 34 | {"2001:db8::2:1", true}, 35 | {"2001:db8:0:1:1:1:1:1", true}, 36 | {"2001:0db8:0a0b:12f0:0000:0000:0000:0001:0004", false}, 37 | } 38 | 39 | func TestValidateIP(t *testing.T) { 40 | for i, testSet := range testIP { 41 | result := ValidateIP(testSet.TestValue) 42 | assert.Equalf(t, testSet.Result, result, "Test %v - Expected '%v' to return %v, not %v", i, testSet.TestValue, testSet.Result, result) 43 | } 44 | } 45 | 46 | var testHostnames = []TestSet{ 47 | {"hostname", true}, 48 | {"host0", true}, 49 | {"host.example.com", true}, 50 | {"example.com", true}, 51 | {"thissegmentiswaytoolongbecauseitshouldnotbemorethansixtythreecharacters.foo.com", false}, 52 | {"underscores_are.not.valid.com", false}, 53 | {"800.hostnames.starting.with.numbers.are.valid.because.people.suck.org", true}, 54 | {"hostnames-.may.not.end.with.a.dash.com", false}, 55 | {"no spaces.com", false}, 56 | {"docker_service.name.should.not.contain.dots", false}, 57 | {"docker-swarmservice_name-with-one-underscore-is-valid", true}, 58 | {"invalid-docker-_service-name", false}, 59 | {"invalid-docker_-service-name", false}, 60 | {"docker-service-may-not-end-with-underscore_", false}, 61 | {"_docker-service-may-not-start-with-underscore", false}, 62 | } 63 | 64 | func TestValidateHostname(t *testing.T) { 65 | for i, testSet := range testHostnames { 66 | result := ValidateHostname(testSet.TestValue) 67 | assert.Equalf(t, testSet.Result, result, "Test %v - Expected '%v' to return %v, not %v", i, testSet.TestValue, testSet.Result, result) 68 | } 69 | } 70 | 71 | var testZkPaths = []TestSet{ 72 | {"/", true}, 73 | {"", false}, 74 | {"/no/trailing/slash/", false}, 75 | {"/this/is/fine", true}, 76 | {"/underscores_are/ok", true}, 77 | {"/dashes-are/fine/too", true}, 78 | {"/no spaces/in/paths", false}, 79 | } 80 | 81 | func TestValidateZookeeperPath(t *testing.T) { 82 | for i, testSet := range testZkPaths { 83 | result := ValidateZookeeperPath(testSet.TestValue) 84 | assert.Equalf(t, testSet.Result, result, "Test %v - Expected '%v' to return %v, not %v", i, testSet.TestValue, testSet.Result, result) 85 | } 86 | } 87 | 88 | var testTopics = []TestSet{ 89 | {"metrics", true}, 90 | {"__consumer_offsets", true}, 91 | {"stars*arent_valid_you_monster", false}, 92 | {"dashes-are-ok", true}, 93 | {"numbers0-are_fine", true}, 94 | {"no spaces", false}, 95 | {"dots.are_ok", true}, 96 | } 97 | 98 | func TestValidateTopic(t *testing.T) { 99 | for i, testSet := range testTopics { 100 | result := ValidateTopic(testSet.TestValue) 101 | assert.Equalf(t, testSet.Result, result, "Test %v - Expected '%v' to return %v, not %v", i, testSet.TestValue, testSet.Result, result) 102 | } 103 | } 104 | 105 | var testEmails = []TestSet{ 106 | {"ok@example.com", true}, 107 | {"need@domain", false}, 108 | {"gotta.have.an.at", false}, 109 | {"nogood@", false}, 110 | {"this.is@ok.com", true}, 111 | } 112 | 113 | func TestValidateEmail(t *testing.T) { 114 | for i, testSet := range testEmails { 115 | result := ValidateEmail(testSet.TestValue) 116 | assert.Equalf(t, testSet.Result, result, "Test %v - Expected '%v' to return %v, not %v", i, testSet.TestValue, testSet.Result, result) 117 | } 118 | } 119 | 120 | var testUrls = []TestSet{ 121 | {"http://foo.com/blah_blah", true}, 122 | {"http://foo.com/blah_blah/", true}, 123 | {"http://www.example.com/wpstyle/?p=364", true}, 124 | {"https://www.example.com/foo/?bar=baz&inga=42&quux", true}, 125 | {"http://✪df.ws/123", true}, 126 | {"http://userid:password@example.com:8080", true}, 127 | {"http://userid:password@example.com:8080/", true}, 128 | {"http://userid@example.com", true}, 129 | {"http://userid@example.com/", true}, 130 | {"http://userid@example.com:8080", true}, 131 | {"http://userid@example.com:8080/", true}, 132 | {"http://userid:password@example.com", true}, 133 | {"http://userid:password@example.com/", true}, 134 | {"http://142.42.1.1/", true}, 135 | {"http://142.42.1.1:8080/", true}, 136 | {"http://➡.ws/䨹", true}, 137 | {"http://⌘.ws", true}, 138 | {"http://⌘.ws/", true}, 139 | {"http://foo.com/blah_(wikipedia)#cite-1", true}, 140 | {"http://foo.com/blah_(wikipedia)_blah#cite-1", true}, 141 | {"http://foo.com/unicode_(✪)_in_parens", true}, 142 | {"http://foo.com/(something)?after=parens", true}, 143 | {"http://☺.damowmow.com/", true}, 144 | {"http://code.google.com/events/#&product=browser", true}, 145 | {"http://j.mp", true}, 146 | {"ftp://foo.bar/baz", true}, 147 | {"http://foo.bar/?q=Test%20URL-encoded%20stuff", true}, 148 | {"http://مثال.إختبار", true}, 149 | {"http://例子.测试", true}, 150 | {"http://उदाहरण.परीक्षा", true}, 151 | {"http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com", true}, 152 | {"http://1337.net", true}, 153 | {"http://a.b-c.de", true}, 154 | {"http://223.255.255.254", true}, 155 | {"http:// shouldfail.com", false}, 156 | {":// should fail", false}, 157 | } 158 | 159 | func TestValidateUrl(t *testing.T) { 160 | for i, testSet := range testUrls { 161 | result := ValidateURL(testSet.TestValue) 162 | assert.Equalf(t, testSet.Result, result, "Test %v - Expected '%v' to return %v, not %v", i, testSet.TestValue, testSet.Result, result) 163 | } 164 | } 165 | 166 | var testHostPorts = []TestSet{ 167 | {"1.2.3.4:3453", true}, 168 | {"127.0.0.1:2342", true}, 169 | {"204.27.175.1:4", true}, 170 | {"256.1.2.3:3743", false}, 171 | {"1.2.3.4.5:2452", false}, 172 | {"[2001:0db8:0a0b:12f0:0000:0000:0000:0001]:4356", true}, 173 | {"[2001:db8:a0b:12f0::1]:234", true}, 174 | {"[2001:db8::1]:3453", true}, 175 | {"2001:db8:0:1:1:1:1:1:3453", false}, 176 | {"[2001:0db8:0a0b:12f0:0000:0000:0000:0001:0004]:4533", false}, 177 | {"hostname:3432", true}, 178 | {"host0:4234", true}, 179 | {"host.example.com:23", true}, 180 | {"thissegmentiswaytoolongbecauseitshouldnotbemorethansixtythreecharacters.foo.com:36334", false}, 181 | {"underscores_are.not.valid.com:3453", false}, 182 | } 183 | 184 | func TestValidateHostList(t *testing.T) { 185 | for i, testSet := range testHostPorts { 186 | result := ValidateHostList([]string{testSet.TestValue}) 187 | assert.Equalf(t, testSet.Result, result, "Test %v - Expected '%v' to return %v, not %v", i, testSet.TestValue, testSet.Result, result) 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /core/internal/helpers/zookeeper_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package helpers 11 | 12 | import ( 13 | "testing" 14 | 15 | "github.com/stretchr/testify/assert" 16 | 17 | "github.com/linkedin/Burrow/core/protocol" 18 | ) 19 | 20 | func TestBurrowZookeeperClient_ImplementsZookeeperClient(t *testing.T) { 21 | assert.Implements(t, (*protocol.ZookeeperClient)(nil), new(BurrowZookeeperClient)) 22 | } 23 | 24 | func TestMockZookeeperClient_ImplementsZookeeperClient(t *testing.T) { 25 | assert.Implements(t, (*protocol.ZookeeperClient)(nil), new(MockZookeeperClient)) 26 | } 27 | -------------------------------------------------------------------------------- /core/internal/httpserver/coordinator_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package httpserver 11 | 12 | import ( 13 | "encoding/json" 14 | "net/http" 15 | "net/http/httptest" 16 | "strings" 17 | "testing" 18 | "time" 19 | 20 | "github.com/spf13/viper" 21 | "github.com/stretchr/testify/assert" 22 | "go.uber.org/zap" 23 | 24 | "github.com/linkedin/Burrow/core/protocol" 25 | ) 26 | 27 | func fixtureConfiguredCoordinator() *Coordinator { 28 | logLevel := zap.NewAtomicLevelAt(zap.InfoLevel) 29 | 30 | coordinator := Coordinator{ 31 | Log: zap.NewNop(), 32 | App: &protocol.ApplicationContext{ 33 | Logger: zap.NewNop(), 34 | LogLevel: &logLevel, 35 | StorageChannel: make(chan *protocol.StorageRequest), 36 | EvaluatorChannel: make(chan *protocol.EvaluatorRequest), 37 | AppReady: false, 38 | }, 39 | } 40 | 41 | viper.Reset() 42 | coordinator.Configure() 43 | return &coordinator 44 | } 45 | 46 | func TestHttpServer_handleAdmin(t *testing.T) { 47 | coordinator := fixtureConfiguredCoordinator() 48 | 49 | // Set up a request 50 | req, err := http.NewRequest("GET", "/burrow/admin", http.NoBody) 51 | assert.NoError(t, err, "Expected request setup to return no error") 52 | 53 | // Call the handler via httprouter 54 | rr := httptest.NewRecorder() 55 | coordinator.router.ServeHTTP(rr, req) 56 | 57 | assert.Equalf(t, http.StatusOK, rr.Code, "Expected response code to be 200, not %v", rr.Code) 58 | assert.Equalf(t, "GOOD", rr.Body.String(), "Expected response body to be 'GOOD', not '%v'", rr.Body.String()) 59 | } 60 | 61 | func TestHttpServer_handleReady(t *testing.T) { 62 | coordinator := fixtureConfiguredCoordinator() 63 | 64 | // Set up a request 65 | req, err := http.NewRequest("GET", "/burrow/admin/ready", http.NoBody) 66 | assert.NoError(t, err, "Expected request setup to return no error") 67 | 68 | // Call the handler via httprouter, the app is not ready so we expect "STARTING" and HTTP 503 69 | rr := httptest.NewRecorder() 70 | coordinator.router.ServeHTTP(rr, req) 71 | assert.Equalf(t, http.StatusServiceUnavailable, rr.Code, "Expected response code to be 503, not %v", rr.Code) 72 | assert.Equalf(t, "STARTING", rr.Body.String(), "Expected response body to be 'STARTING', not '%v'", rr.Body.String()) 73 | 74 | // Change the AppReady, and try again 75 | coordinator.App.AppReady = true 76 | rr = httptest.NewRecorder() 77 | coordinator.router.ServeHTTP(rr, req) 78 | assert.Equalf(t, http.StatusOK, rr.Code, "Expected response code to be 200, not %v", rr.Code) 79 | assert.Equalf(t, "READY", rr.Body.String(), "Expected response body to be 'READY', not '%v'", rr.Body.String()) 80 | } 81 | 82 | func TestHttpServer_getClusterList(t *testing.T) { 83 | coordinator := fixtureConfiguredCoordinator() 84 | 85 | // Respond to the expected storage request 86 | go func() { 87 | request := <-coordinator.App.StorageChannel 88 | assert.Equalf(t, protocol.StorageFetchClusters, request.RequestType, "Expected request of type StorageFetchClusters, not %v", request.RequestType) 89 | request.Reply <- []string{"testcluster"} 90 | close(request.Reply) 91 | }() 92 | 93 | // Set up a request 94 | req, err := http.NewRequest("GET", "/v3/admin/loglevel", http.NoBody) 95 | assert.NoError(t, err, "Expected request setup to return no error") 96 | 97 | // Call the handler via httprouter 98 | rr := httptest.NewRecorder() 99 | coordinator.router.ServeHTTP(rr, req) 100 | 101 | assert.Equalf(t, http.StatusOK, rr.Code, "Expected response code to be 200, not %v", rr.Code) 102 | 103 | // Parse response body 104 | decoder := json.NewDecoder(rr.Body) 105 | var resp httpResponseLogLevel 106 | err = decoder.Decode(&resp) 107 | assert.NoError(t, err, "Expected body decode to return no error") 108 | assert.False(t, resp.Error, "Expected response Error to be false") 109 | assert.Equalf(t, "info", resp.Level, "Expected Level to be info, not %v", resp.Level) 110 | } 111 | 112 | func TestHttpServer_setLogLevel(t *testing.T) { 113 | coordinator := fixtureConfiguredCoordinator() 114 | 115 | // Set up a request 116 | req, err := http.NewRequest("POST", "/v3/admin/loglevel", strings.NewReader("{\"level\": \"debug\"}")) 117 | assert.NoError(t, err, "Expected request setup to return no error") 118 | 119 | // Call the handler via httprouter 120 | rr := httptest.NewRecorder() 121 | coordinator.router.ServeHTTP(rr, req) 122 | assert.Equalf(t, http.StatusOK, rr.Code, "Expected response code to be 200, not %v", rr.Code) 123 | 124 | // Parse response body 125 | decoder := json.NewDecoder(rr.Body) 126 | var resp httpResponseError 127 | err = decoder.Decode(&resp) 128 | assert.NoError(t, err, "Expected body decode to return no error") 129 | 130 | assert.False(t, resp.Error, "Expected response Error to be false") 131 | 132 | // The log level is changed async to the HTTP call, so sleep to make sure it got processed 133 | time.Sleep(100 * time.Millisecond) 134 | assert.Equalf(t, zap.DebugLevel, coordinator.App.LogLevel.Level(), "Expected log level to be set to Debug, not %v", coordinator.App.LogLevel.Level().String()) 135 | } 136 | 137 | func TestHttpServer_DefaultHandler(t *testing.T) { 138 | coordinator := fixtureConfiguredCoordinator() 139 | 140 | // Set up a request 141 | req, err := http.NewRequest("GET", "/v3/no/such/uri", http.NoBody) 142 | assert.NoError(t, err, "Expected request setup to return no error") 143 | 144 | // Call the handler via httprouter 145 | rr := httptest.NewRecorder() 146 | coordinator.router.ServeHTTP(rr, req) 147 | assert.Equalf(t, http.StatusNotFound, rr.Code, "Expected response code to be 404, not %v", rr.Code) 148 | 149 | // Parse response body 150 | decoder := json.NewDecoder(rr.Body) 151 | var resp httpResponseError 152 | err = decoder.Decode(&resp) 153 | assert.NoError(t, err, "Expected body decode to return no error") 154 | 155 | assert.True(t, resp.Error, "Expected response Error to be true") 156 | } 157 | -------------------------------------------------------------------------------- /core/internal/httpserver/prometheus.go: -------------------------------------------------------------------------------- 1 | package httpserver 2 | 3 | import ( 4 | "net/http" 5 | "strconv" 6 | 7 | "github.com/prometheus/client_golang/prometheus" 8 | 9 | "github.com/linkedin/Burrow/core/protocol" 10 | 11 | "github.com/prometheus/client_golang/prometheus/promauto" 12 | "github.com/prometheus/client_golang/prometheus/promhttp" 13 | ) 14 | 15 | var ( 16 | consumerTotalLagGauge = promauto.NewGaugeVec( 17 | prometheus.GaugeOpts{ 18 | Name: "burrow_kafka_consumer_lag_total", 19 | Help: "The sum of all partition current lag values for the group", 20 | }, 21 | []string{"cluster", "consumer_group"}, 22 | ) 23 | 24 | consumerStatusGauge = promauto.NewGaugeVec( 25 | prometheus.GaugeOpts{ 26 | Name: "burrow_kafka_consumer_status", 27 | Help: "The status of the consumer group. It is calculated from the highest status for the individual partitions. Statuses are an index list from NOTFOUND, OK, WARN, or ERR", 28 | }, 29 | []string{"cluster", "consumer_group"}, 30 | ) 31 | 32 | partitionStatusGauge = promauto.NewGaugeVec( 33 | prometheus.GaugeOpts{ 34 | Name: "burrow_kafka_topic_partition_status", 35 | Help: "The status of topic partition. It is calculated from the highest status for the individual partitions. Statuses are an index list from OK, WARN, STOP, STALL, REWIND", 36 | }, 37 | []string{"cluster", "consumer_group", "topic", "partition"}, 38 | ) 39 | 40 | consumerPartitionCurrentOffset = promauto.NewGaugeVec( 41 | prometheus.GaugeOpts{ 42 | Name: "burrow_kafka_consumer_current_offset", 43 | Help: "Latest offset that Burrow is storing for this partition", 44 | }, 45 | []string{"cluster", "consumer_group", "topic", "partition"}, 46 | ) 47 | 48 | consumerPartitionLagGauge = promauto.NewGaugeVec( 49 | prometheus.GaugeOpts{ 50 | Name: "burrow_kafka_consumer_partition_lag", 51 | Help: "Number of messages the consumer group is behind by for a partition as reported by Burrow", 52 | }, 53 | []string{"cluster", "consumer_group", "topic", "partition"}, 54 | ) 55 | 56 | topicPartitionOffsetGauge = promauto.NewGaugeVec( 57 | prometheus.GaugeOpts{ 58 | Name: "burrow_kafka_topic_partition_offset", 59 | Help: "Latest offset the topic that Burrow is storing for this partition", 60 | }, 61 | []string{"cluster", "topic", "partition"}, 62 | ) 63 | ) 64 | 65 | // DeleteConsumerMetrics deletes all metrics that are labeled with a consumer group 66 | func DeleteConsumerMetrics(cluster, consumer string) { 67 | labels := map[string]string{ 68 | "cluster": cluster, 69 | "consumer_group": consumer, 70 | } 71 | 72 | consumerTotalLagGauge.Delete(labels) 73 | consumerStatusGauge.Delete(labels) 74 | consumerPartitionLagGauge.DeletePartialMatch(labels) 75 | consumerPartitionCurrentOffset.DeletePartialMatch(labels) 76 | partitionStatusGauge.DeletePartialMatch(labels) 77 | } 78 | 79 | // DeleteTopicMetrics deletes all metrics that are labeled with a topic 80 | func DeleteTopicMetrics(cluster, topic string) { 81 | labels := map[string]string{ 82 | "cluster": cluster, 83 | "topic": topic, 84 | } 85 | 86 | topicPartitionOffsetGauge.DeletePartialMatch(labels) 87 | 88 | // If a topic is deleted there cannot be any consumers, so delete all consumer metrics too 89 | // Not strictly necessary as Kafka will delete the consumer groups, which will eventually trigger DeleteConsumerMetrics 90 | consumerPartitionLagGauge.DeletePartialMatch(labels) 91 | consumerPartitionCurrentOffset.DeletePartialMatch(labels) 92 | consumerTotalLagGauge.DeletePartialMatch(labels) 93 | consumerStatusGauge.DeletePartialMatch(labels) 94 | } 95 | 96 | // DeleteConsumerTopicMetrics deletes all metrics that are labeled with the provided consumer group AND topic 97 | func DeleteConsumerTopicMetrics(cluster, consumer, topic string) { 98 | labels := map[string]string{ 99 | "cluster": cluster, 100 | "consumer_group": consumer, 101 | "topic": topic, 102 | } 103 | 104 | partitionStatusGauge.DeletePartialMatch(labels) 105 | consumerPartitionCurrentOffset.DeletePartialMatch(labels) 106 | consumerPartitionLagGauge.DeletePartialMatch(labels) 107 | } 108 | 109 | func (hc *Coordinator) handlePrometheusMetrics() http.HandlerFunc { 110 | promHandler := promhttp.Handler() 111 | 112 | return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) { 113 | for _, cluster := range listClusters(hc.App) { 114 | for _, consumer := range listConsumers(hc.App, cluster) { 115 | consumerStatus := getFullConsumerStatus(hc.App, cluster, consumer) 116 | 117 | if consumerStatus == nil || 118 | consumerStatus.Status == protocol.StatusNotFound { 119 | continue 120 | } 121 | 122 | labels := map[string]string{ 123 | "cluster": cluster, 124 | "consumer_group": consumer, 125 | } 126 | 127 | consumerTotalLagGauge.With(labels).Set(float64(consumerStatus.TotalLag)) 128 | consumerStatusGauge.With(labels).Set(float64(consumerStatus.Status)) 129 | 130 | for _, partition := range consumerStatus.Partitions { 131 | labels := map[string]string{ 132 | "cluster": cluster, 133 | "consumer_group": consumer, 134 | "topic": partition.Topic, 135 | "partition": strconv.FormatInt(int64(partition.Partition), 10), 136 | } 137 | 138 | consumerPartitionLagGauge.With(labels).Set(float64(partition.CurrentLag)) 139 | 140 | if partition.Complete == 1.0 { 141 | consumerPartitionCurrentOffset.With(labels).Set(float64(partition.End.Offset)) 142 | partitionStatusGauge.With(labels).Set(float64(partition.Status)) 143 | } 144 | } 145 | } 146 | 147 | // Topics 148 | for _, topic := range listTopics(hc.App, cluster) { 149 | for partitionNumber, offset := range getTopicDetail(hc.App, cluster, topic) { 150 | topicPartitionOffsetGauge.With(map[string]string{ 151 | "cluster": cluster, 152 | "topic": topic, 153 | "partition": strconv.FormatInt(int64(partitionNumber), 10), 154 | }).Set(float64(offset)) 155 | } 156 | } 157 | } 158 | 159 | promHandler.ServeHTTP(resp, req) 160 | }) 161 | } 162 | 163 | func listClusters(app *protocol.ApplicationContext) []string { 164 | request := &protocol.StorageRequest{ 165 | RequestType: protocol.StorageFetchClusters, 166 | Reply: make(chan interface{}), 167 | } 168 | app.StorageChannel <- request 169 | response := <-request.Reply 170 | if response == nil { 171 | return []string{} 172 | } 173 | 174 | return response.([]string) 175 | } 176 | 177 | func listConsumers(app *protocol.ApplicationContext, cluster string) []string { 178 | request := &protocol.StorageRequest{ 179 | RequestType: protocol.StorageFetchConsumers, 180 | Cluster: cluster, 181 | Reply: make(chan interface{}), 182 | } 183 | app.StorageChannel <- request 184 | response := <-request.Reply 185 | if response == nil { 186 | return []string{} 187 | } 188 | 189 | return response.([]string) 190 | } 191 | 192 | func getFullConsumerStatus(app *protocol.ApplicationContext, cluster, consumer string) *protocol.ConsumerGroupStatus { 193 | request := &protocol.EvaluatorRequest{ 194 | Cluster: cluster, 195 | Group: consumer, 196 | ShowAll: true, 197 | Reply: make(chan *protocol.ConsumerGroupStatus), 198 | } 199 | app.EvaluatorChannel <- request 200 | response := <-request.Reply 201 | return response 202 | } 203 | 204 | func listTopics(app *protocol.ApplicationContext, cluster string) []string { 205 | request := &protocol.StorageRequest{ 206 | RequestType: protocol.StorageFetchTopics, 207 | Cluster: cluster, 208 | Reply: make(chan interface{}), 209 | } 210 | app.StorageChannel <- request 211 | response := <-request.Reply 212 | if response == nil { 213 | return []string{} 214 | } 215 | 216 | return response.([]string) 217 | } 218 | 219 | func getTopicDetail(app *protocol.ApplicationContext, cluster, topic string) []int64 { 220 | request := &protocol.StorageRequest{ 221 | RequestType: protocol.StorageFetchTopic, 222 | Cluster: cluster, 223 | Topic: topic, 224 | Reply: make(chan interface{}), 225 | } 226 | app.StorageChannel <- request 227 | response := <-request.Reply 228 | if response == nil { 229 | return []int64{} 230 | } 231 | 232 | return response.([]int64) 233 | } 234 | -------------------------------------------------------------------------------- /core/internal/httpserver/prometheus_test.go: -------------------------------------------------------------------------------- 1 | package httpserver 2 | 3 | import ( 4 | "net/http" 5 | "net/http/httptest" 6 | "testing" 7 | 8 | "github.com/stretchr/testify/assert" 9 | 10 | "github.com/linkedin/Burrow/core/protocol" 11 | ) 12 | 13 | func TestHttpServer_handlePrometheusMetrics(t *testing.T) { 14 | coordinator := fixtureConfiguredCoordinator() 15 | 16 | // Respond to the expected storage requests 17 | go func() { 18 | request := <-coordinator.App.StorageChannel 19 | assert.Equalf(t, protocol.StorageFetchClusters, request.RequestType, "Expected request of type StorageFetchClusters, not %v", request.RequestType) 20 | request.Reply <- []string{"testcluster"} 21 | close(request.Reply) 22 | 23 | // List of consumers 24 | request = <-coordinator.App.StorageChannel 25 | assert.Equalf(t, protocol.StorageFetchConsumers, request.RequestType, "Expected request of type StorageFetchConsumers, not %v", request.RequestType) 26 | assert.Equalf(t, "testcluster", request.Cluster, "Expected request Cluster to be testcluster, not %v", request.Cluster) 27 | request.Reply <- []string{"testgroup", "testgroup2"} 28 | close(request.Reply) 29 | 30 | // List of topics 31 | request = <-coordinator.App.StorageChannel 32 | assert.Equalf(t, protocol.StorageFetchTopics, request.RequestType, "Expected request of type StorageFetchTopics, not %v", request.RequestType) 33 | assert.Equalf(t, "testcluster", request.Cluster, "Expected request Cluster to be testcluster, not %v", request.Cluster) 34 | request.Reply <- []string{"testtopic", "testtopic1"} 35 | close(request.Reply) 36 | 37 | // Topic details 38 | request = <-coordinator.App.StorageChannel 39 | assert.Equalf(t, protocol.StorageFetchTopic, request.RequestType, "Expected request of type StorageFetchTopic, not %v", request.RequestType) 40 | assert.Equalf(t, "testcluster", request.Cluster, "Expected request Cluster to be testcluster, not %v", request.Cluster) 41 | assert.Equalf(t, "testtopic", request.Topic, "Expected request Topic to be testtopic, not %v", request.Topic) 42 | request.Reply <- []int64{6556, 5566} 43 | close(request.Reply) 44 | 45 | request = <-coordinator.App.StorageChannel 46 | assert.Equalf(t, protocol.StorageFetchTopic, request.RequestType, "Expected request of type StorageFetchTopic, not %v", request.RequestType) 47 | assert.Equalf(t, "testcluster", request.Cluster, "Expected request Cluster to be testcluster, not %v", request.Cluster) 48 | assert.Equalf(t, "testtopic1", request.Topic, "Expected request Topic to be testtopic, not %v", request.Topic) 49 | request.Reply <- []int64{54} 50 | close(request.Reply) 51 | }() 52 | 53 | // Respond to the expected evaluator requests 54 | go func() { 55 | // testgroup happy paths 56 | request := <-coordinator.App.EvaluatorChannel 57 | assert.Equalf(t, "testcluster", request.Cluster, "Expected request Cluster to be testcluster, not %v", request.Cluster) 58 | assert.Equalf(t, "testgroup", request.Group, "Expected request Group to be testgroup, not %v", request.Group) 59 | assert.True(t, request.ShowAll, "Expected request ShowAll to be True") 60 | response := &protocol.ConsumerGroupStatus{ 61 | Cluster: request.Cluster, 62 | Group: request.Group, 63 | Status: protocol.StatusOK, 64 | Complete: 1.0, 65 | Partitions: []*protocol.PartitionStatus{ 66 | { 67 | Topic: "testtopic", 68 | Partition: 0, 69 | Status: protocol.StatusOK, 70 | CurrentLag: 100, 71 | Complete: 1.0, 72 | End: &protocol.ConsumerOffset{ 73 | Offset: 22663, 74 | }, 75 | }, 76 | { 77 | Topic: "testtopic", 78 | Partition: 1, 79 | Status: protocol.StatusOK, 80 | CurrentLag: 10, 81 | Complete: 1.0, 82 | End: &protocol.ConsumerOffset{ 83 | Offset: 2488, 84 | }, 85 | }, 86 | { 87 | Topic: "testtopic1", 88 | Partition: 0, 89 | Status: protocol.StatusOK, 90 | CurrentLag: 50, 91 | Complete: 1.0, 92 | End: &protocol.ConsumerOffset{ 93 | Offset: 99888, 94 | }, 95 | }, 96 | { 97 | Topic: "incomplete", 98 | Partition: 0, 99 | Status: protocol.StatusOK, 100 | CurrentLag: 0, 101 | Complete: 0.2, 102 | End: &protocol.ConsumerOffset{ 103 | Offset: 5335, 104 | }, 105 | }, 106 | { 107 | Topic: "incomplete", 108 | Partition: 1, 109 | Status: protocol.StatusOK, 110 | CurrentLag: 10, 111 | Complete: 1.0, 112 | End: &protocol.ConsumerOffset{ 113 | Offset: 99888, 114 | }, 115 | }, 116 | }, 117 | TotalPartitions: 2134, 118 | Maxlag: &protocol.PartitionStatus{}, 119 | TotalLag: 2345, 120 | } 121 | request.Reply <- response 122 | close(request.Reply) 123 | 124 | // testgroup2 not found 125 | request = <-coordinator.App.EvaluatorChannel 126 | assert.Equalf(t, "testcluster", request.Cluster, "Expected request Cluster to be testcluster, not %v", request.Cluster) 127 | assert.Equalf(t, "testgroup2", request.Group, "Expected request Group to be testgroup, not %v", request.Group) 128 | assert.True(t, request.ShowAll, "Expected request ShowAll to be True") 129 | response = &protocol.ConsumerGroupStatus{ 130 | Cluster: request.Cluster, 131 | Group: request.Group, 132 | Status: protocol.StatusNotFound, 133 | } 134 | request.Reply <- response 135 | close(request.Reply) 136 | }() 137 | 138 | // Set up a request 139 | req, err := http.NewRequest("GET", "/metrics", http.NoBody) 140 | assert.NoError(t, err, "Expected request setup to return no error") 141 | 142 | // Call the handler via httprouter 143 | rr := httptest.NewRecorder() 144 | coordinator.router.ServeHTTP(rr, req) 145 | 146 | assert.Equalf(t, http.StatusOK, rr.Code, "Expected response code to be 200, not %v", rr.Code) 147 | 148 | promExp := rr.Body.String() 149 | assert.Contains(t, promExp, `burrow_kafka_consumer_status{cluster="testcluster",consumer_group="testgroup"} 1`) 150 | assert.Contains(t, promExp, `burrow_kafka_consumer_lag_total{cluster="testcluster",consumer_group="testgroup"} 2345`) 151 | 152 | assert.Contains(t, promExp, `burrow_kafka_consumer_partition_lag{cluster="testcluster",consumer_group="testgroup",partition="0",topic="testtopic"} 100`) 153 | assert.Contains(t, promExp, `burrow_kafka_consumer_partition_lag{cluster="testcluster",consumer_group="testgroup",partition="1",topic="testtopic"} 10`) 154 | assert.Contains(t, promExp, `burrow_kafka_consumer_partition_lag{cluster="testcluster",consumer_group="testgroup",partition="0",topic="testtopic1"} 50`) 155 | assert.Contains(t, promExp, `burrow_kafka_consumer_partition_lag{cluster="testcluster",consumer_group="testgroup",partition="0",topic="incomplete"} 0`) 156 | assert.Contains(t, promExp, `burrow_kafka_consumer_partition_lag{cluster="testcluster",consumer_group="testgroup",partition="1",topic="incomplete"} 10`) 157 | 158 | assert.Contains(t, promExp, `burrow_kafka_consumer_current_offset{cluster="testcluster",consumer_group="testgroup",partition="0",topic="testtopic"} 22663`) 159 | assert.Contains(t, promExp, `burrow_kafka_consumer_current_offset{cluster="testcluster",consumer_group="testgroup",partition="1",topic="testtopic"} 2488`) 160 | assert.Contains(t, promExp, `burrow_kafka_consumer_current_offset{cluster="testcluster",consumer_group="testgroup",partition="0",topic="testtopic1"} 99888`) 161 | assert.NotContains(t, promExp, `burrow_kafka_consumer_current_offset{cluster="testcluster",consumer_group="testgroup",partition="0",topic="incomplete"} 5335`) 162 | assert.Contains(t, promExp, `burrow_kafka_consumer_current_offset{cluster="testcluster",consumer_group="testgroup",partition="1",topic="incomplete"} 99888`) 163 | 164 | assert.Contains(t, promExp, `burrow_kafka_topic_partition_offset{cluster="testcluster",partition="0",topic="testtopic"} 6556`) 165 | assert.Contains(t, promExp, `burrow_kafka_topic_partition_offset{cluster="testcluster",partition="1",topic="testtopic"} 5566`) 166 | assert.Contains(t, promExp, `burrow_kafka_topic_partition_offset{cluster="testcluster",partition="0",topic="testtopic1"} 54`) 167 | assert.NotContains(t, promExp, `burrow_kafka_topic_partition_offset{cluster="testcluster",consumer_group="testgroup",partition="0",topic="incomplete"} 0`) 168 | assert.NotContains(t, promExp, `burrow_kafka_topic_partition_offset{cluster="testcluster",consumer_group="testgroup",partition="1",topic="incomplete"} 99888`) 169 | 170 | assert.Contains(t, promExp, `burrow_kafka_consumer_partition_lag{cluster="testcluster",consumer_group="testgroup",partition="0",topic="incomplete"} 0`) 171 | assert.NotContains(t, promExp, "testgroup2") 172 | } 173 | -------------------------------------------------------------------------------- /core/internal/notifier/coordinator_race_test.go: -------------------------------------------------------------------------------- 1 | //go:build !race 2 | // +build !race 3 | 4 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 5 | // 2.0 (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | 13 | package notifier 14 | 15 | import ( 16 | "errors" 17 | "sync" 18 | "sync/atomic" 19 | "testing" 20 | "time" 21 | 22 | "github.com/stretchr/testify/assert" 23 | 24 | "github.com/linkedin/Burrow/core/internal/helpers" 25 | ) 26 | 27 | // This tests the full set of calls to send evaluator requests. It triggers the race detector because of setting 28 | // doEvaluations to false to end the loop. 29 | func TestCoordinator_sendEvaluatorRequests(t *testing.T) { 30 | coordinator := fixtureCoordinator() 31 | coordinator.Configure() 32 | 33 | // A test cluster and group to send requests for 34 | coordinator.clusters["testcluster"] = &clusterGroups{ 35 | Lock: &sync.RWMutex{}, 36 | Groups: make(map[string]*consumerGroup), 37 | } 38 | coordinator.clusters["testcluster"].Groups["testgroup"] = &consumerGroup{ 39 | LastNotify: make(map[string]time.Time), 40 | LastEval: time.Now().Add(-time.Duration(coordinator.minInterval) * time.Second), 41 | } 42 | coordinator.clusters["testcluster2"] = &clusterGroups{ 43 | Lock: &sync.RWMutex{}, 44 | Groups: make(map[string]*consumerGroup), 45 | } 46 | coordinator.clusters["testcluster2"].Groups["testgroup2"] = &consumerGroup{ 47 | LastNotify: make(map[string]time.Time), 48 | LastEval: time.Now().Add(-time.Duration(coordinator.minInterval) * time.Second), 49 | } 50 | 51 | coordinator.doEvaluations = true 52 | coordinator.running.Add(1) 53 | go coordinator.sendEvaluatorRequests() 54 | 55 | // We expect to get 2 requests 56 | for i := 0; i < 2; i++ { 57 | request := <-coordinator.App.EvaluatorChannel 58 | switch request.Cluster { 59 | case "testcluster": 60 | assert.Equalf(t, "testcluster", request.Cluster, "Expected request cluster to be testcluster, not %v", request.Cluster) 61 | assert.Equalf(t, "testgroup", request.Group, "Expected request group to be testgroup, not %v", request.Group) 62 | assert.False(t, request.ShowAll, "Expected ShowAll to be false") 63 | case "testcluster2": 64 | assert.Equalf(t, "testcluster2", request.Cluster, "Expected request cluster to be testcluster2, not %v", request.Cluster) 65 | assert.Equalf(t, "testgroup2", request.Group, "Expected request group to be testgroup2, not %v", request.Group) 66 | assert.False(t, request.ShowAll, "Expected ShowAll to be false") 67 | default: 68 | assert.Failf(t, "Received unexpected request for cluster %v, group %v", request.Cluster, request.Group) 69 | } 70 | } 71 | 72 | select { 73 | case <-coordinator.App.EvaluatorChannel: 74 | assert.Fail(t, "Received extra request on the evaluator channel") 75 | default: 76 | // All is good - we didn't expect to find another request 77 | } 78 | coordinator.doEvaluations = false 79 | } 80 | 81 | // We know this will trigger the race detector, because of the way we manipulate the ZK state 82 | func TestCoordinator_manageEvalLoop_Start(t *testing.T) { 83 | coordinator := fixtureCoordinator() 84 | coordinator.Configure() 85 | 86 | // Add mock calls for the Zookeeper client - Lock immediately returns with no error 87 | mockLock := &helpers.MockZookeeperLock{} 88 | mockLock.On("Lock").Return(nil) 89 | mockZk := coordinator.App.Zookeeper.(*helpers.MockZookeeperClient) 90 | mockZk.On("NewLock", "/burrow/notifier").Return(mockLock) 91 | 92 | go coordinator.manageEvalLoop() 93 | time.Sleep(200 * time.Millisecond) 94 | 95 | mockLock.AssertExpectations(t) 96 | mockZk.AssertExpectations(t) 97 | assert.True(t, coordinator.doEvaluations, "Expected doEvaluations to be true") 98 | } 99 | 100 | // We know this will trigger the race detector, because of the way we manipulate the ZK state 101 | func TestCoordinator_manageEvalLoop_Expiration(t *testing.T) { 102 | coordinator := fixtureCoordinator() 103 | coordinator.Configure() 104 | 105 | // Add mock calls for the Zookeeper client - Lock immediately returns with no error 106 | mockLock := &helpers.MockZookeeperLock{} 107 | mockLock.On("Lock").Return(nil) 108 | mockZk := coordinator.App.Zookeeper.(*helpers.MockZookeeperClient) 109 | mockZk.On("NewLock", "/burrow/notifier").Return(mockLock) 110 | 111 | go coordinator.manageEvalLoop() 112 | time.Sleep(200 * time.Millisecond) 113 | 114 | // ZK gets disconnected and expired 115 | coordinator.App.ZookeeperConnected = false 116 | coordinator.App.ZookeeperExpired.Broadcast() 117 | time.Sleep(300 * time.Millisecond) 118 | 119 | mockLock.AssertExpectations(t) 120 | mockZk.AssertExpectations(t) 121 | assert.False(t, coordinator.doEvaluations, "Expected doEvaluations to be false") 122 | } 123 | 124 | type statefulMockLock struct { 125 | *helpers.MockZookeeperLock 126 | lockHeld *int32 127 | } 128 | 129 | func (s *statefulMockLock) Lock() error { 130 | if atomic.CompareAndSwapInt32(s.lockHeld, 0, 1) { 131 | return nil 132 | } 133 | return errors.New("unable to lock twice: must unlock first") 134 | } 135 | 136 | func (s *statefulMockLock) Unlock() error { 137 | if atomic.CompareAndSwapInt32(s.lockHeld, 1, 0) { 138 | return nil 139 | } 140 | return errors.New("unable to unlock: lock not held") 141 | } 142 | 143 | // We know this will trigger the race detector, because of the way we manipulate the ZK state 144 | func TestCoordinator_manageEvalLoop_Reconnect(t *testing.T) { 145 | coordinator := fixtureCoordinator() 146 | coordinator.Configure() 147 | 148 | // Add mock calls for the Zookeeper client - Lock immediately returns with no error 149 | mockLock := &statefulMockLock{&helpers.MockZookeeperLock{}, new(int32)} 150 | mockZk := coordinator.App.Zookeeper.(*helpers.MockZookeeperClient) 151 | mockZk.On("NewLock", "/burrow/notifier").Return(mockLock) 152 | 153 | go coordinator.manageEvalLoop() 154 | time.Sleep(200 * time.Millisecond) 155 | 156 | // ZK gets disconnected and expired 157 | coordinator.App.ZookeeperConnected = false 158 | coordinator.App.ZookeeperExpired.Broadcast() 159 | time.Sleep(200 * time.Millisecond) 160 | 161 | // ZK gets reconnected 162 | coordinator.App.ZookeeperConnected = true 163 | time.Sleep(300 * time.Millisecond) 164 | 165 | mockLock.AssertExpectations(t) 166 | mockZk.AssertExpectations(t) 167 | assert.True(t, coordinator.doEvaluations, "Expected doEvaluations to be true") 168 | } 169 | -------------------------------------------------------------------------------- /core/internal/notifier/email.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package notifier 11 | 12 | import ( 13 | "crypto/tls" 14 | "errors" 15 | "fmt" 16 | "net/smtp" 17 | "regexp" 18 | "strings" 19 | "text/template" 20 | "time" 21 | 22 | "github.com/spf13/viper" 23 | "go.uber.org/zap" 24 | "gopkg.in/gomail.v2" 25 | 26 | "github.com/linkedin/Burrow/core/internal/helpers" 27 | "github.com/linkedin/Burrow/core/protocol" 28 | ) 29 | 30 | // EmailNotifier is a module which can be used to send notifications of consumer group status via email messages. One 31 | // email is sent for each consumer group that matches the allowlist/denylist and the status threshold. 32 | type EmailNotifier struct { 33 | // App is a pointer to the application context. This stores the channel to the storage subsystem 34 | App *protocol.ApplicationContext 35 | 36 | // Log is a logger that has been configured for this module to use. Normally, this means it has been set up with 37 | // fields that are appropriate to identify this coordinator 38 | Log *zap.Logger 39 | 40 | name string 41 | groupAllowlist *regexp.Regexp 42 | groupDenylist *regexp.Regexp 43 | extras map[string]string 44 | templateOpen *template.Template 45 | templateClose *template.Template 46 | 47 | to string 48 | from string 49 | 50 | smtpDialer *gomail.Dialer 51 | sendMailFunc func(message *gomail.Message) error 52 | } 53 | 54 | // Configure validates the configuration of the email notifier. At minimum, there must be a valid server, port, from 55 | // address, and to address. If any of these are missing or incorrect, this func will panic with an explanatory message. 56 | // It is also possible to specify an auth-type of either "plain" or "crammd5", along with a username and password. 57 | func (module *EmailNotifier) Configure(name, configRoot string) { 58 | module.name = name 59 | 60 | // Abstract the SendMail call so we can test 61 | if module.sendMailFunc == nil { 62 | module.sendMailFunc = module.sendEmail 63 | } 64 | 65 | host := viper.GetString(configRoot + ".server") 66 | port := viper.GetInt(configRoot + ".port") 67 | 68 | serverWithPort := fmt.Sprintf("%s:%v", host, port) 69 | 70 | if !helpers.ValidateHostList([]string{serverWithPort}) { 71 | module.Log.Panic("bad server or port") 72 | panic(errors.New("configuration error")) 73 | } 74 | 75 | module.from = viper.GetString(configRoot + ".from") 76 | if module.from == "" { 77 | module.Log.Panic("missing from address") 78 | panic(errors.New("configuration error")) 79 | } 80 | 81 | module.to = viper.GetString(configRoot + ".to") 82 | if module.to == "" { 83 | module.Log.Panic("missing to address") 84 | panic(errors.New("configuration error")) 85 | } 86 | 87 | // Set up dialer and extra TLS configuration 88 | extraCa := viper.GetString(configRoot + ".extra-ca") 89 | noVerify := viper.GetBool(configRoot + ".noverify") 90 | 91 | d := gomail.NewDialer(host, port, "", "") 92 | d.Auth = module.getSMTPAuth(configRoot) 93 | d.TLSConfig = buildEmailTLSConfig(extraCa, noVerify, host) 94 | 95 | module.smtpDialer = d 96 | } 97 | 98 | func buildEmailTLSConfig(extraCaFile string, noVerify bool, smtpHost string) *tls.Config { 99 | rootCAs := buildRootCAs(extraCaFile, noVerify) 100 | 101 | return &tls.Config{ 102 | InsecureSkipVerify: noVerify, 103 | ServerName: smtpHost, 104 | RootCAs: rootCAs, 105 | } 106 | } 107 | 108 | // Builds authentication profile for smtp client 109 | func (module *EmailNotifier) getSMTPAuth(configRoot string) smtp.Auth { 110 | var auth smtp.Auth 111 | // Set up SMTP authentication 112 | switch strings.ToLower(viper.GetString(configRoot + ".auth-type")) { 113 | case "plain": 114 | auth = smtp.PlainAuth("", viper.GetString(configRoot+".username"), viper.GetString(configRoot+".password"), viper.GetString(configRoot+".server")) 115 | case "crammd5": 116 | auth = smtp.CRAMMD5Auth(viper.GetString(configRoot+".username"), viper.GetString(configRoot+".password")) 117 | case "": 118 | auth = nil 119 | default: 120 | module.Log.Panic("unknown auth type") 121 | panic(errors.New("configuration error")) 122 | } 123 | 124 | return auth 125 | } 126 | 127 | // Start is a no-op for the email notifier. It always returns no error 128 | func (module *EmailNotifier) Start() error { 129 | return nil 130 | } 131 | 132 | // Stop is a no-op for the email notifier. It always returns no error 133 | func (module *EmailNotifier) Stop() error { 134 | return nil 135 | } 136 | 137 | // GetName returns the configured name of this module 138 | func (module *EmailNotifier) GetName() string { 139 | return module.name 140 | } 141 | 142 | // GetGroupAllowlist returns the compiled group allowlist (or nil, if there is not one) 143 | func (module *EmailNotifier) GetGroupAllowlist() *regexp.Regexp { 144 | return module.groupAllowlist 145 | } 146 | 147 | // GetGroupDenylist returns the compiled group denylist (or nil, if there is not one) 148 | func (module *EmailNotifier) GetGroupDenylist() *regexp.Regexp { 149 | return module.groupDenylist 150 | } 151 | 152 | // GetLogger returns the configured zap.Logger for this notifier 153 | func (module *EmailNotifier) GetLogger() *zap.Logger { 154 | return module.Log 155 | } 156 | 157 | // AcceptConsumerGroup has no additional function for the email notifier, and so always returns true 158 | func (module *EmailNotifier) AcceptConsumerGroup(status *protocol.ConsumerGroupStatus) bool { 159 | return true 160 | } 161 | 162 | // Notify sends a single email message, with the from and to set to the configured addresses for the notifier. The 163 | // status, eventID, and startTime are all passed to the template for compiling the message. If stateGood is true, the 164 | // "close" template is used. Otherwise, the "open" template is used. 165 | func (module *EmailNotifier) Notify(status *protocol.ConsumerGroupStatus, eventID string, startTime time.Time, stateGood bool) { 166 | logger := module.Log.With( 167 | zap.String("cluster", status.Cluster), 168 | zap.String("group", status.Group), 169 | zap.String("id", eventID), 170 | zap.String("status", status.Status.String()), 171 | ) 172 | 173 | var tmpl *template.Template 174 | if stateGood { 175 | tmpl = module.templateClose 176 | } else { 177 | tmpl = module.templateOpen 178 | } 179 | 180 | // Put the from and to lines in without the template. Template should set the subject line, followed by a blank line 181 | messageContent, err := executeTemplate(tmpl, module.extras, status, eventID, startTime) 182 | 183 | if err != nil { 184 | logger.Error("failed to assemble", zap.Error(err)) 185 | return 186 | } 187 | 188 | // Process template headers and send email 189 | if m, err := module.createMessage(messageContent.String()); err == nil { 190 | if err := module.sendMailFunc(m); err != nil { 191 | logger.Error("failed to send", zap.Error(err)) 192 | } 193 | } else { 194 | logger.Error("failed to send", zap.Error(err)) 195 | } 196 | } 197 | 198 | // sendEmail uses the gomail smtpDialer to send a constructed message. This function is mocked for testing purposes 199 | func (module *EmailNotifier) sendEmail(m *gomail.Message) error { 200 | if err := module.smtpDialer.DialAndSend(m); err != nil { 201 | return err 202 | } 203 | 204 | return nil 205 | } 206 | 207 | // createMessage organizes all relevant email message content into a structure for easy use 208 | func (module *EmailNotifier) createMessage(messageContent string) (*gomail.Message, error) { 209 | m := gomail.NewMessage() 210 | var subject string 211 | var mimeVersion string 212 | 213 | contentType := "text/plain" 214 | 215 | subjectDelimiter := "Subject: " 216 | contentTypeDelimiter := "Content-Type: " 217 | mimeVersionDelimiter := "MIME-version: " 218 | 219 | if !strings.HasPrefix(messageContent, subjectDelimiter) { 220 | return nil, errors.New("no subject line detected. Please make sure" + 221 | " \"Subject: my_subject_line\" is included in your template") 222 | } 223 | 224 | var body string 225 | 226 | // Go doesn't support regex lookaheads yet 227 | for _, line := range strings.Split(messageContent, "\n") { 228 | if strings.HasPrefix(line, subjectDelimiter) && subject == "" { 229 | subject = getKeywordContent(line, subjectDelimiter) 230 | } else if strings.HasPrefix(line, contentTypeDelimiter) { 231 | contentType = strings.Replace(getKeywordContent(line, contentTypeDelimiter), ";", "", -1) 232 | } else if strings.HasPrefix(line, mimeVersionDelimiter) { 233 | mimeVersion = strings.Replace(getKeywordContent(line, mimeVersionDelimiter), ";", "", -1) 234 | } else { 235 | body = body + line + "\n" 236 | } 237 | } 238 | 239 | recipients := strings.Split(module.to, ",") 240 | m.SetHeader("To", recipients...) 241 | m.SetHeader("From", module.from) 242 | m.SetHeader("Subject", subject) 243 | 244 | if mimeVersion != "" { 245 | m.SetHeader("MIME-version", mimeVersion) 246 | } 247 | 248 | m.SetBody(contentType, body) 249 | 250 | return m, nil 251 | } 252 | 253 | func getKeywordContent(header, subjectDelimiter string) string { 254 | return strings.Split(header, subjectDelimiter)[1] 255 | } 256 | -------------------------------------------------------------------------------- /core/internal/notifier/email_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package notifier 11 | 12 | import ( 13 | "text/template" 14 | "time" 15 | 16 | "testing" 17 | 18 | "github.com/stretchr/testify/assert" 19 | 20 | "github.com/spf13/viper" 21 | "go.uber.org/zap" 22 | 23 | "net" 24 | "strconv" 25 | 26 | "gopkg.in/gomail.v2" 27 | 28 | "github.com/linkedin/Burrow/core/protocol" 29 | ) 30 | 31 | func fixtureEmailNotifier() *EmailNotifier { 32 | module := EmailNotifier{ 33 | Log: zap.NewNop(), 34 | } 35 | module.App = &protocol.ApplicationContext{} 36 | 37 | viper.Reset() 38 | viper.Set("notifier.test.class-name", "email") 39 | viper.Set("notifier.test.template-open", "template_open") 40 | viper.Set("notifier.test.template-close", "template_close") 41 | viper.Set("notifier.test.send-close", false) 42 | viper.Set("notifier.test.server", "test.example.com") 43 | viper.Set("notifier.test.port", 587) 44 | viper.Set("notifier.test.from", "sender@example.com") 45 | viper.Set("notifier.test.to", "receiver@example.com") 46 | viper.Set("notifier.test.noverify", true) 47 | 48 | return &module 49 | } 50 | 51 | func TestEmailNotifier_ImplementsModule(t *testing.T) { 52 | assert.Implements(t, (*protocol.Module)(nil), new(EmailNotifier)) 53 | assert.Implements(t, (*Module)(nil), new(EmailNotifier)) 54 | } 55 | 56 | func TestEmailNotifier_Configure(t *testing.T) { 57 | module := fixtureEmailNotifier() 58 | 59 | module.Configure("test", "notifier.test") 60 | assert.NotNil(t, module.smtpDialer, "Expected smtpDialer") 61 | } 62 | 63 | func TestEmailNotifier_Configure_BasicAuth(t *testing.T) { 64 | module := fixtureEmailNotifier() 65 | viper.Set("notifier.test.auth-type", "plain") 66 | viper.Set("notifier.test.username", "user") 67 | viper.Set("notifier.test.password", "pass") 68 | 69 | module.Configure("test", "notifier.test") 70 | } 71 | 72 | func TestEmailNotifier_Configure_CramMD5(t *testing.T) { 73 | module := fixtureEmailNotifier() 74 | viper.Set("notifier.test.auth-type", "CramMD5") 75 | viper.Set("notifier.test.username", "user") 76 | viper.Set("notifier.test.password", "pass") 77 | 78 | module.Configure("test", "notifier.test") 79 | } 80 | 81 | func TestEmailNotifier_StartStop(t *testing.T) { 82 | module := fixtureEmailNotifier() 83 | module.Configure("test", "notifier.test") 84 | 85 | err := module.Start() 86 | assert.Nil(t, err, "Expected Start to return no error") 87 | err = module.Stop() 88 | assert.Nil(t, err, "Expected Stop to return no error") 89 | } 90 | 91 | func TestEmailNotifier_AcceptConsumerGroup(t *testing.T) { 92 | module := fixtureEmailNotifier() 93 | module.Configure("test", "notifier.test") 94 | 95 | // Should always return true 96 | assert.True(t, module.AcceptConsumerGroup(&protocol.ConsumerGroupStatus{}), "Expected any status to return True") 97 | } 98 | 99 | func TestEmailNotifier_Notify_Open(t *testing.T) { 100 | module := fixtureEmailNotifier() 101 | viper.Set("notifier.test.auth-type", "plain") 102 | viper.Set("notifier.test.username", "user") 103 | viper.Set("notifier.test.password", "pass") 104 | 105 | module.sendMailFunc = func(m *gomail.Message) error { 106 | d := module.smtpDialer 107 | serverWithPort := net.JoinHostPort(d.Host, strconv.Itoa(d.Port)) 108 | assert.Equalf(t, "test.example.com:587", serverWithPort, "Expected server to be test.example.com:587, not %v", serverWithPort) 109 | assert.NotNil(t, d.Auth, "Expected auth to not be nil") 110 | assert.Equalf(t, "sender@example.com", module.from, "Expected from to be sender@example.com, not %v", module.from) 111 | assert.Lenf(t, []string{module.to}, 1, "Expected one to address, not %v", len([]string{module.to})) 112 | assert.Equalf(t, "receiver@example.com", []string{module.to}[0], "Expected to to be receiver@example.com, not %v", []string{module.to}[0]) 113 | 114 | mimeHeader := m.GetHeader("MIME-version") 115 | subHeader := m.GetHeader("Subject") 116 | 117 | assert.Equalf(t, []string{"[Burrow] Kafka Consumer Lag Alert"}, subHeader, "Expected subject to be [Burrow] Kafka Consumer Lag Alert, not %v", subHeader) 118 | assert.Equalf(t, []string{"1.0"}, mimeHeader, "Expected MimeVersion of 1.0", mimeHeader) 119 | assert.NotNil(t, m, "Expected auth to not be nil") 120 | assert.True(t, d.TLSConfig.InsecureSkipVerify) 121 | 122 | return nil 123 | } 124 | 125 | // Template for testing 126 | module.templateOpen, _ = template.New("test").Parse("Subject: [Burrow] Kafka Consumer Lag Alert\n\n" + 127 | "MIME-version: 1.0\n" + 128 | "The Kafka consumer groups you are monitoring are currently showing problems. The following groups are in a problem state (groups not listed are OK):\n\n" + 129 | "Cluster: {{.Result.Cluster}}\n" + 130 | "Group: {{.Result.Group}}\n" + 131 | "Status: {{.Result.Status.String}}\n" + 132 | "Complete: {{.Result.Complete}}\n" + 133 | "Errors: {{len .Result.Partitions}} partitions have problems\n" + 134 | "{{range .Result.Partitions}} {{.Status.String}} {{.Topic}}:{{.Partition}} ({{.Start.Timestamp}}, {{.Start.Offset}}, {{.Start.Lag}}) -> ({{.End.Timestamp}}, {{.End.Offset}}, {{.End.Lag}})\n" + 135 | "{{end}}") 136 | 137 | module.Configure("test", "notifier.test") 138 | 139 | status := &protocol.ConsumerGroupStatus{ 140 | Status: protocol.StatusWarning, 141 | Cluster: "testcluster", 142 | Group: "testgroup", 143 | } 144 | 145 | module.Notify(status, "testidstring", time.Now(), false) 146 | } 147 | 148 | func TestEmailNotifier_Notify_Close(t *testing.T) { 149 | module := fixtureEmailNotifier() 150 | 151 | module.sendMailFunc = func(m *gomail.Message) error { 152 | d := module.smtpDialer 153 | serverWithPort := net.JoinHostPort(d.Host, strconv.Itoa(d.Port)) 154 | 155 | assert.Equalf(t, "test.example.com:587", serverWithPort, "Expected server to be test.example.com:587, not %v", serverWithPort) 156 | assert.Nil(t, d.Auth, "Expected auth to be nil") 157 | assert.Equalf(t, "sender@example.com", module.from, "Expected from to be sender@example.com, not %v", module.from) 158 | assert.Lenf(t, []string{module.to}, 1, "Expected one to address, not %v", len([]string{module.to})) 159 | assert.Equalf(t, "receiver@example.com", []string{module.to}[0], "Expected to to be receiver@example.com, not %v", []string{module.to}[0]) 160 | 161 | mimeHeader := m.GetHeader("MIME-version") 162 | subHeader := m.GetHeader("Subject") 163 | 164 | assert.Equalf(t, []string{"[Burrow] Kafka Consumer Healthy"}, subHeader, "Expected subject to be [Burrow] Kafka Consumer Healthy, not %v", subHeader) 165 | assert.Equalf(t, []string(nil), mimeHeader, "Expected empty MimeVersion, not %v", mimeHeader) 166 | assert.NotNil(t, m, "Expected auth to not be nil") 167 | assert.True(t, d.TLSConfig.InsecureSkipVerify) 168 | 169 | return nil 170 | } 171 | 172 | // Template for testing 173 | module.templateClose, _ = template.New("test").Parse("Subject: [Burrow] Kafka Consumer Healthy\n\n" + 174 | "Content-Type: text/html\n" + 175 | "Consumer is now in a healthy state" + 176 | "Cluster: {{.Result.Cluster}}\n" + 177 | "Group: {{.Result.Group}}\n" + 178 | "Status: {{.Result.Status.String}}\n" + 179 | "Complete: {{.Result.Complete}}\n" + 180 | "{{range .Result.Partitions}} {{.Status.String}} {{.Topic}}:{{.Partition}} ({{.Start.Timestamp}}, {{.Start.Offset}}, {{.Start.Lag}}) -> ({{.End.Timestamp}}, {{.End.Offset}}, {{.End.Lag}})\n" + 181 | "{{end}}") 182 | 183 | module.Configure("test", "notifier.test") 184 | 185 | status := &protocol.ConsumerGroupStatus{ 186 | Status: protocol.StatusOK, 187 | Cluster: "testcluster", 188 | Group: "testgroup", 189 | } 190 | 191 | // Test appending file that doesn't exist 192 | assert.Panics(t, func() { buildRootCAs("/etc/no/file", false) }, "The code did not panic") 193 | 194 | module.Notify(status, "testidstring", time.Now(), true) 195 | } 196 | -------------------------------------------------------------------------------- /core/internal/notifier/helpers.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package notifier 11 | 12 | import ( 13 | "crypto/x509" 14 | "encoding/json" 15 | "log" 16 | "os" 17 | "text/template" 18 | "time" 19 | 20 | "bytes" 21 | 22 | "github.com/linkedin/Burrow/core/protocol" 23 | ) 24 | 25 | // executeTemplate provides a common interface for notifier modules to call to process a text/template in the context 26 | // of a protocol.ConsumerGroupStatus and create a message to use in a notification. 27 | func executeTemplate(tmpl *template.Template, extras map[string]string, status *protocol.ConsumerGroupStatus, eventID string, startTime time.Time) (*bytes.Buffer, error) { 28 | bytesToSend := new(bytes.Buffer) 29 | err := tmpl.Execute(bytesToSend, struct { 30 | Cluster string 31 | Group string 32 | ID string 33 | Start time.Time 34 | Extras map[string]string 35 | Result protocol.ConsumerGroupStatus 36 | }{ 37 | Cluster: status.Cluster, 38 | Group: status.Group, 39 | ID: eventID, 40 | Start: startTime, 41 | Extras: extras, 42 | Result: *status, 43 | }) 44 | if err != nil { 45 | return nil, err 46 | } 47 | return bytesToSend, nil 48 | } 49 | 50 | // Helper functions for templates 51 | var helperFunctionMap = template.FuncMap{ 52 | "jsonencoder": templateJSONEncoder, 53 | "topicsbystatus": classifyTopicsByStatus, 54 | "partitioncounts": templateCountPartitions, 55 | "add": templateAdd, 56 | "minus": templateMinus, 57 | "multiply": templateMultiply, 58 | "divide": templateDivide, 59 | "maxlag": maxLagHelper, 60 | "formattimestamp": formatTimestamp, 61 | } 62 | 63 | // Helper function for the templates to encode an object into a JSON string 64 | func templateJSONEncoder(encodeMe interface{}) string { 65 | jsonStr, _ := json.Marshal(encodeMe) 66 | return string(jsonStr) 67 | } 68 | 69 | // Helper - recategorize partitions as a map of lists 70 | // map[string][]string => status short name -> list of topics 71 | func classifyTopicsByStatus(partitions []*protocol.PartitionStatus) map[string][]string { 72 | tmpMap := make(map[string]map[string]bool) 73 | for _, partition := range partitions { 74 | if _, ok := tmpMap[partition.Status.String()]; !ok { 75 | tmpMap[partition.Status.String()] = make(map[string]bool) 76 | } 77 | tmpMap[partition.Status.String()][partition.Topic] = true 78 | } 79 | 80 | rv := make(map[string][]string) 81 | for status, topicMap := range tmpMap { 82 | rv[status] = make([]string, 0, len(topicMap)) 83 | for topic := range topicMap { 84 | rv[status] = append(rv[status], topic) 85 | } 86 | } 87 | 88 | return rv 89 | } 90 | 91 | // Template Helper - Return a map of partition counts 92 | // keys are warn, stop, stall, rewind, unknown 93 | func templateCountPartitions(partitions []*protocol.PartitionStatus) map[string]int { 94 | rv := map[string]int{ 95 | "warn": 0, 96 | "stop": 0, 97 | "stall": 0, 98 | "rewind": 0, 99 | "unknown": 0, 100 | } 101 | 102 | for _, partition := range partitions { 103 | switch partition.Status { 104 | case protocol.StatusOK: 105 | case protocol.StatusWarning: 106 | rv["warn"]++ 107 | case protocol.StatusStop: 108 | rv["stop"]++ 109 | case protocol.StatusStall: 110 | rv["stall"]++ 111 | case protocol.StatusRewind: 112 | rv["rewind"]++ 113 | default: 114 | rv["unknown"]++ 115 | } 116 | } 117 | 118 | return rv 119 | } 120 | 121 | // Appends supplied certificates to trusted certificate chain 122 | func buildRootCAs(extraCaFile string, noVerify bool) *x509.CertPool { 123 | rootCAs, caError := x509.SystemCertPool() 124 | if caError != nil { 125 | log.Println("Unable to load system certs, using empty cert pool instead") 126 | rootCAs = x509.NewCertPool() 127 | } 128 | 129 | if extraCaFile != "" && !noVerify { 130 | certs, err := os.ReadFile(extraCaFile) 131 | 132 | if err != nil { 133 | log.Panicf("Failed to append %q to RootCAs: %v", extraCaFile, err) 134 | } 135 | 136 | if ok := rootCAs.AppendCertsFromPEM(certs); !ok { 137 | log.Println("No certs appended, using system certs only") 138 | } 139 | } 140 | 141 | return rootCAs 142 | } 143 | 144 | // Template Helper - do maths 145 | func templateAdd(a, b int) int { 146 | return a + b 147 | } 148 | func templateMinus(a, b int) int { 149 | return a - b 150 | } 151 | func templateMultiply(a, b int) int { 152 | return a * b 153 | } 154 | func templateDivide(a, b int) int { 155 | return a / b 156 | } 157 | 158 | func maxLagHelper(a *protocol.PartitionStatus) uint64 { 159 | if a == nil { 160 | return 0 161 | } 162 | return a.CurrentLag 163 | } 164 | 165 | func formatTimestamp(timestamp int64, formatString string) string { 166 | return time.Unix(0, timestamp*int64(time.Millisecond)).Format(formatString) 167 | } 168 | -------------------------------------------------------------------------------- /core/internal/notifier/http.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package notifier 11 | 12 | import ( 13 | "errors" 14 | "io" 15 | "net" 16 | "net/http" 17 | "regexp" 18 | "text/template" 19 | "time" 20 | 21 | "github.com/spf13/viper" 22 | "go.uber.org/zap" 23 | 24 | "crypto/tls" 25 | 26 | "github.com/linkedin/Burrow/core/protocol" 27 | ) 28 | 29 | // HTTPNotifier is a module which can be used to send notifications of consumer group status via outbound HTTP calls to 30 | // another server. This is useful for informing another system, such as an alert system, when there is a problem. One 31 | // HTTP call is made for each consumer group that matches the allowlist/denylist and the status threshold (though 32 | // keepalive connections will be used if configured). 33 | type HTTPNotifier struct { 34 | // App is a pointer to the application context. This stores the channel to the storage subsystem 35 | App *protocol.ApplicationContext 36 | 37 | // Log is a logger that has been configured for this module to use. Normally, this means it has been set up with 38 | // fields that are appropriate to identify this coordinator 39 | Log *zap.Logger 40 | 41 | name string 42 | groupAllowlist *regexp.Regexp 43 | groupDenylist *regexp.Regexp 44 | extras map[string]string 45 | urlOpen string 46 | urlClose string 47 | methodOpen string 48 | methodClose string 49 | templateOpen *template.Template 50 | templateClose *template.Template 51 | sendClose bool 52 | 53 | httpClient *http.Client 54 | } 55 | 56 | // Configure validates the configuration of the http notifier. At minimum, there must be a url-open specified, and if 57 | // send-close is set to true there must also be a url-close. If these are missing or incorrect, this func will panic 58 | // with an explanatory message. It is also possible to configure a specific method (such as POST or DELETE) to be used 59 | // with these URLs, as well as a timeout and keepalive for the HTTP smtpClient. 60 | func (module *HTTPNotifier) Configure(name, configRoot string) { 61 | module.name = name 62 | 63 | // Validate and set defaults for profile configs 64 | module.urlOpen = viper.GetString(configRoot + ".url-open") 65 | if module.urlOpen == "" { 66 | module.Log.Panic("no url-open specified") 67 | panic(errors.New("configuration error")) 68 | } 69 | 70 | viper.SetDefault(configRoot+".method-open", "POST") 71 | module.methodOpen = viper.GetString(configRoot + ".method-open") 72 | 73 | module.sendClose = viper.GetBool(configRoot + ".send-close") 74 | if module.sendClose { 75 | module.urlClose = viper.GetString(configRoot + ".url-close") 76 | if module.urlClose == "" { 77 | module.Log.Panic("no url-close specified") 78 | panic(errors.New("configuration error")) 79 | } 80 | viper.SetDefault(configRoot+".method-close", "POST") 81 | module.methodClose = viper.GetString(configRoot + ".method-close") 82 | } 83 | 84 | // Set defaults for module-specific configs if needed 85 | viper.SetDefault(configRoot+".timeout", 5) 86 | viper.SetDefault(configRoot+".keepalive", 300) 87 | 88 | tlsConfig := buildHTTPTLSConfig(viper.GetString(configRoot+".extra-ca"), viper.GetBool(configRoot+".noverify")) 89 | 90 | module.httpClient = &http.Client{ 91 | Timeout: viper.GetDuration(configRoot+".timeout") * time.Second, 92 | Transport: &http.Transport{ 93 | Dial: (&net.Dialer{ 94 | KeepAlive: viper.GetDuration(configRoot+".keepalive") * time.Second, 95 | }).Dial, 96 | Proxy: http.ProxyFromEnvironment, 97 | TLSClientConfig: tlsConfig, 98 | DisableKeepAlives: viper.GetBool(configRoot + ".disable-http-keepalive"), 99 | }, 100 | } 101 | } 102 | 103 | func buildHTTPTLSConfig(extraCaFile string, noVerify bool) *tls.Config { 104 | rootCAs := buildRootCAs(extraCaFile, noVerify) 105 | 106 | return &tls.Config{ 107 | InsecureSkipVerify: noVerify, 108 | RootCAs: rootCAs, 109 | } 110 | } 111 | 112 | // Start is a no-op for the http notifier. It always returns no error 113 | func (module *HTTPNotifier) Start() error { 114 | return nil 115 | } 116 | 117 | // Stop is a no-op for the http notifier. It always returns no error 118 | func (module *HTTPNotifier) Stop() error { 119 | return nil 120 | } 121 | 122 | // GetName returns the configured name of this module 123 | func (module *HTTPNotifier) GetName() string { 124 | return module.name 125 | } 126 | 127 | // GetGroupAllowlist returns the compiled group allowlist (or nil, if there is not one) 128 | func (module *HTTPNotifier) GetGroupAllowlist() *regexp.Regexp { 129 | return module.groupAllowlist 130 | } 131 | 132 | // GetGroupDenylist returns the compiled group denylist (or nil, if there is not one) 133 | func (module *HTTPNotifier) GetGroupDenylist() *regexp.Regexp { 134 | return module.groupDenylist 135 | } 136 | 137 | // GetLogger returns the configured zap.Logger for this notifier 138 | func (module *HTTPNotifier) GetLogger() *zap.Logger { 139 | return module.Log 140 | } 141 | 142 | // AcceptConsumerGroup has no additional function for the http notifier, and so always returns true 143 | func (module *HTTPNotifier) AcceptConsumerGroup(status *protocol.ConsumerGroupStatus) bool { 144 | return true 145 | } 146 | 147 | // Notify makes a single outbound HTTP request. The status, eventID, and startTime are all passed to the template for 148 | // compiling the request body. If stateGood is true, the "close" template and URL are used. Otherwise, the "open" 149 | // template and URL are used. 150 | func (module *HTTPNotifier) Notify(status *protocol.ConsumerGroupStatus, eventID string, startTime time.Time, stateGood bool) { 151 | logger := module.Log.With( 152 | zap.String("cluster", status.Cluster), 153 | zap.String("group", status.Group), 154 | zap.String("id", eventID), 155 | zap.String("status", status.Status.String()), 156 | ) 157 | 158 | var tmpl *template.Template 159 | var method string 160 | var url string 161 | 162 | if stateGood { 163 | tmpl = module.templateClose 164 | method = module.methodClose 165 | url = module.urlClose 166 | } else { 167 | tmpl = module.templateOpen 168 | method = module.methodOpen 169 | url = module.urlOpen 170 | } 171 | 172 | bytesToSend, err := executeTemplate(tmpl, module.extras, status, eventID, startTime) 173 | if err != nil { 174 | logger.Error("failed to assemble message", zap.Error(err)) 175 | return 176 | } 177 | 178 | urlTmpl, err := template.New("url").Parse(url) 179 | if err != nil { 180 | logger.Error("failed to parse url", zap.Error(err)) 181 | return 182 | } 183 | 184 | urlToSend, err := executeTemplate(urlTmpl, module.extras, status, eventID, startTime) 185 | if err != nil { 186 | logger.Error("failed to assemble url", zap.Error(err)) 187 | return 188 | } 189 | 190 | // Send request to HTTP endpoint 191 | req, err := http.NewRequest(method, urlToSend.String(), bytesToSend) 192 | if err != nil { 193 | logger.Error("failed to create request", zap.Error(err)) 194 | return 195 | } 196 | username := viper.GetString("notifier." + module.name + ".username") 197 | if username != "" { 198 | // Add basic auth using the provided username and password 199 | req.SetBasicAuth(viper.GetString("notifier."+module.name+".username"), viper.GetString("notifier."+module.name+".password")) 200 | } 201 | req.Header.Set("Content-Type", "application/json") 202 | 203 | for header, value := range viper.GetStringMapString("notifier." + module.name + ".headers") { 204 | req.Header.Set(header, value) 205 | } 206 | 207 | resp, err := module.httpClient.Do(req) 208 | if err != nil { 209 | logger.Error("failed to send", zap.Error(err)) 210 | return 211 | } 212 | io.Copy(io.Discard, resp.Body) 213 | resp.Body.Close() 214 | 215 | if (resp.StatusCode >= 200) && (resp.StatusCode <= 299) { 216 | logger.Debug("sent") 217 | } else { 218 | logger.Error("failed to send", zap.Int("response", resp.StatusCode)) 219 | } 220 | } 221 | -------------------------------------------------------------------------------- /core/internal/notifier/http_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package notifier 11 | 12 | import ( 13 | "encoding/json" 14 | "fmt" 15 | "net/http" 16 | "text/template" 17 | "time" 18 | 19 | "net/http/httptest" 20 | "testing" 21 | 22 | "github.com/stretchr/testify/assert" 23 | 24 | "github.com/spf13/viper" 25 | "go.uber.org/zap" 26 | 27 | "github.com/linkedin/Burrow/core/protocol" 28 | ) 29 | 30 | func fixtureHTTPNotifier() *HTTPNotifier { 31 | module := HTTPNotifier{ 32 | Log: zap.NewNop(), 33 | } 34 | module.App = &protocol.ApplicationContext{} 35 | 36 | viper.Reset() 37 | viper.Set("notifier.test.class-name", "http") 38 | viper.Set("notifier.test.url-open", "url_open") 39 | viper.Set("notifier.test.url-close", "url_close") 40 | viper.Set("notifier.test.template-open", "template_open") 41 | viper.Set("notifier.test.template-close", "template_close") 42 | viper.Set("notifier.test.send-close", false) 43 | viper.Set("notifier.test.headers", map[string]string{"Token": "testtoken"}) 44 | viper.Set("notifier.test.noverify", true) 45 | 46 | return &module 47 | } 48 | 49 | func TestHttpNotifier_ImplementsModule(t *testing.T) { 50 | assert.Implements(t, (*protocol.Module)(nil), new(HTTPNotifier)) 51 | assert.Implements(t, (*Module)(nil), new(HTTPNotifier)) 52 | } 53 | 54 | func TestHttpNotifier_Configure(t *testing.T) { 55 | module := fixtureHTTPNotifier() 56 | 57 | module.Configure("test", "notifier.test") 58 | assert.NotNil(t, module.httpClient, "Expected httpClient to be set with a client object") 59 | } 60 | 61 | func TestHttpNotifier_Bad_Configuration(t *testing.T) { 62 | module := fixtureHTTPNotifier() 63 | viper.Set("notifier.test.url-open", "") 64 | 65 | assert.Panics(t, func() { module.Configure("test", "notifier.test") }, "HTTP notifier needs a supplied email") 66 | } 67 | 68 | func TestHttpNotifier_StartStop(t *testing.T) { 69 | module := fixtureHTTPNotifier() 70 | module.Configure("test", "notifier.test") 71 | 72 | err := module.Start() 73 | assert.Nil(t, err, "Expected Start to return no error") 74 | err = module.Stop() 75 | assert.Nil(t, err, "Expected Stop to return no error") 76 | } 77 | 78 | func TestHttpNotifier_AcceptConsumerGroup(t *testing.T) { 79 | module := fixtureHTTPNotifier() 80 | module.Configure("test", "notifier.test") 81 | 82 | // Should always return true 83 | assert.True(t, module.AcceptConsumerGroup(&protocol.ConsumerGroupStatus{}), "Expected any status to return True") 84 | } 85 | 86 | // Struct that will be used for sending HTTP requests for testing 87 | type HTTPRequest struct { 88 | Template string 89 | ID string 90 | Cluster string 91 | Group string 92 | } 93 | 94 | func TestHttpNotifier_Notify_Open(t *testing.T) { 95 | // handler that validates that we get the right values 96 | requestHandler := func(w http.ResponseWriter, r *http.Request) { 97 | // Must get an appropriate Content-Type header 98 | headers, ok := r.Header["Content-Type"] 99 | assert.True(t, ok, "Expected to receive Content-Type header") 100 | assert.Len(t, headers, 1, "Expected to receive exactly one Content-Type header") 101 | assert.Equalf(t, "application/json", headers[0], "Expected Content-Type header to be 'application/json', not '%v'", headers[0]) 102 | 103 | tokenHeaders, ok := r.Header["Token"] 104 | assert.True(t, ok, "Expected to receive Token header") 105 | assert.Equalf(t, "testtoken", tokenHeaders[0], "Expected Token header to be 'testtoken', not '%v'", tokenHeaders[0]) 106 | 107 | assert.Equalf(t, "id=testidstring", r.URL.RawQuery, "Expected URL querystring to be id=testidstring, not %v", r.URL) 108 | 109 | decoder := json.NewDecoder(r.Body) 110 | var req HTTPRequest 111 | err := decoder.Decode(&req) 112 | if err != nil { 113 | assert.Failf(t, "Failed to decode message body", "Failed to decode message body: %v", err.Error()) 114 | http.Error(w, err.Error(), http.StatusBadRequest) 115 | return 116 | } 117 | 118 | assert.Equalf(t, "template_open", req.Template, "Expected Template to be template_open, not %v", req.Template) 119 | assert.Equalf(t, "testidstring", req.ID, "Expected ID to be testidstring, not %v", req.ID) 120 | assert.Equalf(t, "testcluster", req.Cluster, "Expected Cluster to be testcluster, not %v", req.Cluster) 121 | assert.Equalf(t, "testgroup", req.Group, "Expected Group to be testgroup, not %v", req.Group) 122 | 123 | fmt.Fprint(w, "ok") 124 | } 125 | 126 | // create test server with handler 127 | ts := httptest.NewServer(http.HandlerFunc(requestHandler)) 128 | defer ts.Close() 129 | 130 | module := fixtureHTTPNotifier() 131 | viper.Set("notifier.test.url-open", fmt.Sprintf("%s?id={{.ID}}", ts.URL)) 132 | 133 | // Template sends the ID, cluster, and group 134 | module.templateOpen, _ = template.New("test").Parse("{\"template\":\"template_open\",\"id\":\"{{.ID}}\",\"cluster\":\"{{.Cluster}}\",\"group\":\"{{.Group}}\"}") 135 | 136 | module.Configure("test", "notifier.test") 137 | 138 | status := &protocol.ConsumerGroupStatus{ 139 | Status: protocol.StatusWarning, 140 | Cluster: "testcluster", 141 | Group: "testgroup", 142 | } 143 | 144 | module.Notify(status, "testidstring", time.Now(), false) 145 | } 146 | 147 | func TestHttpNotifier_Notify_Close(t *testing.T) { 148 | // handler that validates that we get the right values 149 | requestHandler := func(w http.ResponseWriter, r *http.Request) { 150 | // Must get an appropriate Content-Type header 151 | headers, ok := r.Header["Content-Type"] 152 | assert.True(t, ok, "Expected to receive Content-Type header") 153 | assert.Len(t, headers, 1, "Expected to receive exactly one Content-Type header") 154 | assert.Equalf(t, "application/json", headers[0], "Expected Content-Type header to be 'application/json', not '%v'", headers[0]) 155 | 156 | tokenHeaders, ok := r.Header["Token"] 157 | assert.True(t, ok, "Expected to receive Token header") 158 | assert.Equalf(t, "testtoken", tokenHeaders[0], "Expected Token header to be 'testtoken', not '%v'", tokenHeaders[0]) 159 | 160 | assert.Equalf(t, "id=testidstring", r.URL.RawQuery, "Expected URL querystring to be id=testidstring, not %v", r.URL) 161 | 162 | decoder := json.NewDecoder(r.Body) 163 | var req HTTPRequest 164 | err := decoder.Decode(&req) 165 | if err != nil { 166 | assert.Failf(t, "Failed to decode message body", "Failed to decode message body: %v", err.Error()) 167 | http.Error(w, err.Error(), http.StatusBadRequest) 168 | return 169 | } 170 | 171 | assert.Equalf(t, "template_close", req.Template, "Expected Template to be template_close, not %v", req.Template) 172 | assert.Equalf(t, "testidstring", req.ID, "Expected ID to be testidstring, not %v", req.ID) 173 | assert.Equalf(t, "testcluster", req.Cluster, "Expected Cluster to be testcluster, not %v", req.Cluster) 174 | assert.Equalf(t, "testgroup", req.Group, "Expected Group to be testgroup, not %v", req.Group) 175 | 176 | fmt.Fprint(w, "ok") 177 | } 178 | 179 | // create test server with handler 180 | ts := httptest.NewServer(http.HandlerFunc(requestHandler)) 181 | defer ts.Close() 182 | 183 | module := fixtureHTTPNotifier() 184 | viper.Set("notifier.test.send-close", true) 185 | viper.Set("notifier.test.url-close", fmt.Sprintf("%s?id={{.ID}}", ts.URL)) 186 | 187 | // Template sends the ID, cluster, and group 188 | module.templateClose, _ = template.New("test").Parse("{\"template\":\"template_close\",\"id\":\"{{.ID}}\",\"cluster\":\"{{.Cluster}}\",\"group\":\"{{.Group}}\"}") 189 | 190 | module.Configure("test", "notifier.test") 191 | 192 | status := &protocol.ConsumerGroupStatus{ 193 | Status: protocol.StatusWarning, 194 | Cluster: "testcluster", 195 | Group: "testgroup", 196 | } 197 | 198 | module.Notify(status, "testidstring", time.Now(), true) 199 | } 200 | -------------------------------------------------------------------------------- /core/internal/notifier/null.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package notifier 11 | 12 | import ( 13 | "regexp" 14 | "text/template" 15 | "time" 16 | 17 | "go.uber.org/zap" 18 | 19 | "github.com/linkedin/Burrow/core/protocol" 20 | ) 21 | 22 | // NullNotifier is a no-op notifier that can be used for testing purposes in place of a mock. It does not make any 23 | // external calls, and will record if specific funcs are called. 24 | type NullNotifier struct { 25 | // App is a pointer to the application context. This stores the channel to the storage subsystem 26 | App *protocol.ApplicationContext 27 | 28 | // Log is a logger that has been configured for this module to use. Normally, this means it has been set up with 29 | // fields that are appropriate to identify this coordinator 30 | Log *zap.Logger 31 | 32 | name string 33 | groupAllowlist *regexp.Regexp 34 | groupDenylist *regexp.Regexp 35 | extras map[string]string 36 | templateOpen *template.Template 37 | templateClose *template.Template 38 | 39 | // CalledConfigure is set to true if the Configure method is called 40 | CalledConfigure bool 41 | 42 | // CalledStart is set to true if the Start method is called 43 | CalledStart bool 44 | 45 | // CalledStop is set to true if the Stop method is called 46 | CalledStop bool 47 | 48 | // CalledNotify is set to true if the Notify method is called 49 | CalledNotify bool 50 | 51 | // CalledAcceptConsumerGroup is set to true if the AcceptConsumerGroup method is called 52 | CalledAcceptConsumerGroup bool 53 | } 54 | 55 | // Configure sets the module name, but performs no other functions for the null notifier 56 | func (module *NullNotifier) Configure(name, configRoot string) { 57 | module.name = name 58 | module.CalledConfigure = true 59 | } 60 | 61 | // Start is a no-op for the null notifier. It always returns no error 62 | func (module *NullNotifier) Start() error { 63 | module.CalledStart = true 64 | return nil 65 | } 66 | 67 | // Stop is a no-op for the null notifier. It always returns no error 68 | func (module *NullNotifier) Stop() error { 69 | module.CalledStop = true 70 | return nil 71 | } 72 | 73 | // GetName returns the configured name of this module 74 | func (module *NullNotifier) GetName() string { 75 | return module.name 76 | } 77 | 78 | // GetGroupAllowlist returns the compiled group allowlist (or nil, if there is not one) 79 | func (module *NullNotifier) GetGroupAllowlist() *regexp.Regexp { 80 | return module.groupAllowlist 81 | } 82 | 83 | // GetGroupDenylist returns the compiled group denylist (or nil, if there is not one) 84 | func (module *NullNotifier) GetGroupDenylist() *regexp.Regexp { 85 | return module.groupDenylist 86 | } 87 | 88 | // GetLogger returns the configured zap.Logger for this notifier 89 | func (module *NullNotifier) GetLogger() *zap.Logger { 90 | return module.Log 91 | } 92 | 93 | // AcceptConsumerGroup has no additional function for the null notifier, and so always returns true 94 | func (module *NullNotifier) AcceptConsumerGroup(status *protocol.ConsumerGroupStatus) bool { 95 | module.CalledAcceptConsumerGroup = true 96 | return true 97 | } 98 | 99 | // Notify is a no-op for the null notifier 100 | func (module *NullNotifier) Notify(status *protocol.ConsumerGroupStatus, eventID string, startTime time.Time, stateGood bool) { 101 | module.CalledNotify = true 102 | } 103 | -------------------------------------------------------------------------------- /core/internal/storage/coordinator.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | // Package storage - Data storage subsystem. 11 | // The storage subsystem receives information from the cluster and consumer subsystems and serves that information out 12 | // to other subsystems on request. 13 | // 14 | // # Modules 15 | // 16 | // Currently, only one module is provided: 17 | // 18 | // * inmemory - Store all information in a set of in-memory maps 19 | package storage 20 | 21 | import ( 22 | "errors" 23 | "sync" 24 | 25 | "github.com/spf13/viper" 26 | "go.uber.org/zap" 27 | 28 | "github.com/linkedin/Burrow/core/internal/helpers" 29 | "github.com/linkedin/Burrow/core/protocol" 30 | ) 31 | 32 | // Module (storage) is responsible for maintaining all the broker and consumer offsets for all clusters that Burrow 33 | // watches. It must accept and respond to all protocol.StorageRequest types. This interface conforms to the overall 34 | // protocol.Module interface, but it adds a func to fetch the channel that the module is listening on for requests, so 35 | // that requests can be forwarded to it by the coordinator. 36 | type Module interface { 37 | protocol.Module 38 | GetCommunicationChannel() chan *protocol.StorageRequest 39 | } 40 | 41 | // Coordinator (storage) manages a single storage module (only one module is supported at this time), making sure it 42 | // is configured, started, and stopped at the appropriate time. It is also responsible for listening to the 43 | // StorageChannel that is provided in the application context and forwarding those requests to the storage module. If 44 | // no storage module has been configured explicitly, the coordinator starts the inmemory module as a default. 45 | type Coordinator struct { 46 | // App is a pointer to the application context. This stores the channel to the storage subsystem 47 | App *protocol.ApplicationContext 48 | 49 | // Log is a logger that has been configured for this module to use. Normally, this means it has been set up with 50 | // fields that are appropriate to identify this coordinator 51 | Log *zap.Logger 52 | 53 | quitChannel chan struct{} 54 | modules map[string]protocol.Module 55 | running sync.WaitGroup 56 | } 57 | 58 | // getModuleForClass returns the correct module based on the passed className. As part of the Configure steps, if there 59 | // is any error, it will panic with an appropriate message describing the problem. 60 | func getModuleForClass(app *protocol.ApplicationContext, moduleName, className string) Module { 61 | switch className { 62 | case "inmemory": 63 | return &InMemoryStorage{ 64 | App: app, 65 | Log: app.Logger.With( 66 | zap.String("type", "module"), 67 | zap.String("coordinator", "storage"), 68 | zap.String("class", className), 69 | zap.String("name", moduleName), 70 | ), 71 | } 72 | default: 73 | panic("Unknown storage className provided: " + className) 74 | } 75 | } 76 | 77 | // Configure is called to create the configured storage module and call its Configure func to validate the 78 | // configuration and set it up. The coordinator will panic is more than one module is configured, and if no modules have 79 | // been configured, it will set up a default inmemory storage module. If there are any problems, it is expected that 80 | // this func will panic with a descriptive error message, as configuration failures are not recoverable errors. 81 | func (sc *Coordinator) Configure() { 82 | sc.Log.Info("configuring") 83 | sc.quitChannel = make(chan struct{}) 84 | sc.modules = make(map[string]protocol.Module) 85 | sc.running = sync.WaitGroup{} 86 | 87 | modules := viper.GetStringMap("storage") 88 | switch len(modules) { 89 | case 0: 90 | // Create a default module 91 | viper.Set("storage.default.class-name", "inmemory") 92 | modules = viper.GetStringMap("storage") 93 | case 1: 94 | // Have one module. Just continue 95 | break 96 | default: 97 | panic("Only one storage module must be configured") 98 | } 99 | 100 | // Create all configured storage modules, add to list of storage 101 | for name := range modules { 102 | configRoot := "storage." + name 103 | module := getModuleForClass(sc.App, name, viper.GetString(configRoot+".class-name")) 104 | module.Configure(name, configRoot) 105 | sc.modules[name] = module 106 | } 107 | } 108 | 109 | // Start calls the storage module's underlying Start func. If the module Start returns an error, this func stops 110 | // immediately and returns that error to the caller. 111 | // 112 | // We also start a request forwarder goroutine. This listens to the StorageChannel that is provided in the application 113 | // context that all modules receive, and forwards those requests to the storage modules. At the present time, the 114 | // storage subsystem only supports one module, so this is a simple "accept and forward". 115 | func (sc *Coordinator) Start() error { 116 | sc.Log.Info("starting") 117 | 118 | // Start Storage modules 119 | err := helpers.StartCoordinatorModules(sc.modules) 120 | if err != nil { 121 | return errors.New("Error starting storage module: " + err.Error()) 122 | } 123 | 124 | // Start request forwarder 125 | go sc.mainLoop() 126 | return nil 127 | } 128 | 129 | // Stop calls the configured storage module's underlying Stop func. It is expected that the module Stop will not return 130 | // until the module has been completely stopped. While an error can be returned, this func always returns no error, as 131 | // a failure during stopping is not a critical failure 132 | func (sc *Coordinator) Stop() error { 133 | sc.Log.Info("stopping") 134 | 135 | close(sc.quitChannel) 136 | sc.running.Wait() 137 | 138 | // The individual storage modules can choose whether or not to implement a wait in the Stop routine 139 | helpers.StopCoordinatorModules(sc.modules) 140 | return nil 141 | } 142 | 143 | func (sc *Coordinator) mainLoop() { 144 | sc.running.Add(1) 145 | defer sc.running.Done() 146 | 147 | // We only support 1 module right now, so only send to that module 148 | var channel chan *protocol.StorageRequest 149 | for _, module := range sc.modules { 150 | channel = module.(Module).GetCommunicationChannel() 151 | } 152 | 153 | for { 154 | select { 155 | case request := <-sc.App.StorageChannel: 156 | // Yes, this forwarder is silly. However, in the future we want to support multiple storage modules 157 | // concurrently. However, that will require implementing a router that properly handles sets and 158 | // fetches and makes sure only 1 module responds to fetches 159 | channel <- request 160 | case <-sc.quitChannel: 161 | return 162 | } 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /core/internal/storage/coordinator_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package storage 11 | 12 | import ( 13 | "github.com/spf13/viper" 14 | "go.uber.org/zap" 15 | 16 | "testing" 17 | 18 | "github.com/stretchr/testify/assert" 19 | 20 | "time" 21 | 22 | "github.com/linkedin/Burrow/core/protocol" 23 | ) 24 | 25 | func fixtureCoordinator() *Coordinator { 26 | coordinator := Coordinator{ 27 | Log: zap.NewNop(), 28 | } 29 | coordinator.App = &protocol.ApplicationContext{ 30 | Logger: zap.NewNop(), 31 | StorageChannel: make(chan *protocol.StorageRequest), 32 | } 33 | 34 | viper.Reset() 35 | viper.Set("storage.test.class-name", "inmemory") 36 | viper.Set("cluster.testcluster.class-name", "kafka") 37 | viper.Set("cluster.testcluster.servers", []string{"broker1.example.com:1234"}) 38 | 39 | return &coordinator 40 | } 41 | 42 | func TestCoordinator_ImplementsCoordinator(t *testing.T) { 43 | assert.Implements(t, (*protocol.Coordinator)(nil), new(Coordinator)) 44 | } 45 | 46 | func TestCoordinator_Configure(t *testing.T) { 47 | coordinator := fixtureCoordinator() 48 | coordinator.Configure() 49 | 50 | assert.Lenf(t, coordinator.modules, 1, "Expected 1 module configured, not %v", len(coordinator.modules)) 51 | } 52 | 53 | func TestCoordinator_Configure_NoModules(t *testing.T) { 54 | coordinator := fixtureCoordinator() 55 | viper.Reset() 56 | 57 | coordinator.Configure() 58 | 59 | assert.Lenf(t, coordinator.modules, 1, "Expected 1 module configured, not %v", len(coordinator.modules)) 60 | } 61 | 62 | func TestCoordinator_Configure_TwoModules(t *testing.T) { 63 | coordinator := fixtureCoordinator() 64 | viper.Set("storage.anothertest.class-name", "inmemory") 65 | 66 | assert.Panics(t, coordinator.Configure, "Expected panic") 67 | } 68 | 69 | func TestCoordinator_Start(t *testing.T) { 70 | coordinator := fixtureCoordinator() 71 | coordinator.Configure() 72 | coordinator.Start() 73 | 74 | // Best is to test a request that we know the response to 75 | request := &protocol.StorageRequest{ 76 | RequestType: protocol.StorageFetchClusters, 77 | Reply: make(chan interface{}), 78 | } 79 | coordinator.App.StorageChannel <- request 80 | response := <-request.Reply 81 | 82 | assert.IsType(t, []string{}, response, "Expected response to be of type []string") 83 | val := response.([]string) 84 | assert.Len(t, val, 1, "One entry not returned") 85 | assert.Equalf(t, val[0], "testcluster", "Expected return value to be 'testcluster', not %v", val[0]) 86 | 87 | _, ok := <-request.Reply 88 | assert.False(t, ok, "Expected channel to be closed") 89 | 90 | time.Sleep(10 * time.Millisecond) 91 | coordinator.Stop() 92 | } 93 | 94 | func TestCoordinator_MultipleRequests(t *testing.T) { 95 | coordinator := CoordinatorWithOffsets() 96 | coordinator.Stop() 97 | } 98 | -------------------------------------------------------------------------------- /core/internal/storage/fixtures.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package storage 11 | 12 | import ( 13 | "time" 14 | 15 | "github.com/spf13/viper" 16 | "go.uber.org/zap" 17 | 18 | "github.com/linkedin/Burrow/core/protocol" 19 | ) 20 | 21 | // CoordinatorWithOffsets sets up a Coordinator with a single inmemory module defined. This module is loaded with 22 | // offsets for a test cluster and group. This func should never be called in normal code. It is only provided to 23 | // facilitate testing by other subsystems. 24 | func CoordinatorWithOffsets() *Coordinator { 25 | coordinator := Coordinator{ 26 | Log: zap.NewNop(), 27 | } 28 | coordinator.App = &protocol.ApplicationContext{ 29 | Logger: zap.NewNop(), 30 | StorageChannel: make(chan *protocol.StorageRequest), 31 | } 32 | 33 | viper.Reset() 34 | viper.Set("storage.test.class-name", "inmemory") 35 | viper.Set("storage.test.intervals", 10) 36 | viper.Set("storage.test.min-distance", 0) 37 | viper.Set("storage.test.group-allowlist", "") 38 | viper.Set("cluster.testcluster.class-name", "kafka") 39 | 40 | coordinator.Configure() 41 | coordinator.Start() 42 | 43 | // Add a broker offset 44 | coordinator.App.StorageChannel <- &protocol.StorageRequest{ 45 | RequestType: protocol.StorageSetBrokerOffset, 46 | Cluster: "testcluster", 47 | Topic: "testtopic", 48 | Partition: 0, 49 | TopicPartitionCount: 1, 50 | Offset: 4321, 51 | Order: 9, 52 | Timestamp: 9876, 53 | } 54 | time.Sleep(100 * time.Millisecond) 55 | 56 | // Add consumer offsets for a full ring 57 | startTime := (time.Now().Unix() * 1000) - 100000 58 | for i := 0; i < 10; i++ { 59 | coordinator.App.StorageChannel <- &protocol.StorageRequest{ 60 | RequestType: protocol.StorageSetConsumerOffset, 61 | Cluster: "testcluster", 62 | Topic: "testtopic", 63 | Group: "testgroup", 64 | Partition: 0, 65 | Order: int64(i + 10), 66 | Offset: int64(1000 + (i * 100)), 67 | Timestamp: startTime + int64((i * 10000)), 68 | } 69 | 70 | // If we don't sleep while submitting these, we can end up with false test results due to race conditions 71 | time.Sleep(10 * time.Millisecond) 72 | } 73 | 74 | // Add a second group with a partial ring 75 | for i := 0; i < 5; i++ { 76 | coordinator.App.StorageChannel <- &protocol.StorageRequest{ 77 | RequestType: protocol.StorageSetConsumerOffset, 78 | Cluster: "testcluster", 79 | Topic: "testtopic", 80 | Group: "testgroup2", 81 | Partition: 0, 82 | Order: int64(10 + i), 83 | Offset: int64(1000 + (i * 100)), 84 | Timestamp: startTime + int64((i * 10000)), 85 | } 86 | 87 | // If we don't sleep while submitting these, we can end up with false test results due to race conditions 88 | time.Sleep(10 * time.Millisecond) 89 | } 90 | 91 | // Sleep just a little more to make sure everything's processed 92 | time.Sleep(100 * time.Millisecond) 93 | return &coordinator 94 | } 95 | -------------------------------------------------------------------------------- /core/internal/zookeeper/coordinator.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | // Package zookeeper - Common Zookeeper subsystem. 11 | // The zookeeper subsystem provides a Zookeeper client that is common across all of Burrow, and can be used by other 12 | // subsystems to store metadata or coordinate operations between multiple Burrow instances. It is used primarily to 13 | // assure that only one Burrow instance is sending notifications at any time. 14 | package zookeeper 15 | 16 | import ( 17 | "strings" 18 | "sync" 19 | "time" 20 | 21 | "github.com/linkedin/go-zk" 22 | "github.com/spf13/viper" 23 | "go.uber.org/zap" 24 | 25 | "github.com/linkedin/Burrow/core/internal/helpers" 26 | "github.com/linkedin/Burrow/core/protocol" 27 | ) 28 | 29 | // Coordinator (zookeeper) manages a single Zookeeper connection for other coordinators and modules to make use of in 30 | // order to store metadata for Burrow itself. This is not required to connect to the same Zookeeper ensemble as any 31 | // specific Kafka cluster. The ZookeeperClient is stored in the application context, as well as the root path that 32 | // any modules should create their metadata underneath. 33 | // 34 | // The coordinator monitors the connection state transitions and signals when the session is expired, and then when it 35 | // reconnects. Code that must be aware of session expirations, such as code that makes use of watches, should have a 36 | // structure as in the example. 37 | type Coordinator struct { 38 | App *protocol.ApplicationContext 39 | Log *zap.Logger 40 | 41 | servers []string 42 | connectFunc func([]string, time.Duration, *zap.Logger) (protocol.ZookeeperClient, <-chan zk.Event, error) 43 | running sync.WaitGroup 44 | } 45 | 46 | // Configure validates that the configuration has a list of servers provided for the Zookeeper ensemble, of the form 47 | // host:port. It also checks the provided root path, using a default of "/burrow" if none has been provided. 48 | func (zc *Coordinator) Configure() { 49 | zc.Log.Info("configuring") 50 | 51 | // if zookeeper.tls has been set, use the TLS connect function otherwise use default connect 52 | if zc.connectFunc == nil && viper.IsSet("zookeeper.tls") { 53 | zc.connectFunc = helpers.ZookeeperConnectTLS 54 | } else if zc.connectFunc == nil { 55 | zc.connectFunc = helpers.ZookeeperConnect 56 | } 57 | 58 | // Set and check configs 59 | viper.SetDefault("zookeeper.timeout", 6) 60 | viper.SetDefault("zookeeper.root-path", "/burrow") 61 | 62 | zc.servers = viper.GetStringSlice("zookeeper.servers") 63 | if len(zc.servers) == 0 { 64 | panic("No Zookeeper servers specified") 65 | } else if !helpers.ValidateHostList(zc.servers) { 66 | panic("Failed to validate Zookeeper servers") 67 | } 68 | 69 | zc.App.ZookeeperRoot = viper.GetString("zookeeper.root-path") 70 | if !helpers.ValidateZookeeperPath(zc.App.ZookeeperRoot) { 71 | panic("Zookeeper root path is not valid") 72 | } 73 | 74 | zc.running = sync.WaitGroup{} 75 | } 76 | 77 | // Start creates the connection to the Zookeeper ensemble, and assures that the root path exists. Once that is done, 78 | // it sets the ZookeeperConnected flag in the application context to true, and creates the ZookeeperExpired condition 79 | // flag. It then starts a main loop to watch for connection state changes. 80 | func (zc *Coordinator) Start() error { 81 | zc.Log.Info("starting") 82 | 83 | // This ZK client will be shared by other parts of Burrow for things like locks 84 | // NOTE - samuel/go-zookeeper does not support chroot, so we pass along the configured root path in config 85 | zkConn, connEventChan, err := zc.connectFunc(zc.servers, viper.GetDuration("zookeeper.timeout")*time.Second, zc.Log) 86 | if err != nil { 87 | zc.Log.Panic("Failure to start zookeeper", zap.String("error", err.Error())) 88 | return err 89 | } 90 | zc.App.Zookeeper = zkConn 91 | 92 | // Assure that our root path exists 93 | err = zc.createRecursive(zc.App.ZookeeperRoot) 94 | if err != nil { 95 | zc.Log.Error("cannot create root path", zap.Error(err)) 96 | return err 97 | } 98 | 99 | zc.App.ZookeeperConnected = true 100 | zc.App.ZookeeperExpired = &sync.Cond{L: &sync.Mutex{}} 101 | 102 | go zc.mainLoop(connEventChan) 103 | 104 | return nil 105 | } 106 | 107 | // Stop closes the connection to the Zookeeper ensemble and waits for the connection state monitor to exit (which it 108 | // will because the event channel will be closed). 109 | func (zc *Coordinator) Stop() error { 110 | zc.Log.Info("stopping") 111 | 112 | // This will close the event channel, closing the mainLoop 113 | zc.App.Zookeeper.Close() 114 | zc.running.Wait() 115 | 116 | return nil 117 | } 118 | 119 | func (zc *Coordinator) createRecursive(path string) error { 120 | if path == "/" { 121 | return nil 122 | } 123 | 124 | parts := strings.Split(path, "/") 125 | for i := 2; i <= len(parts); i++ { 126 | // If the rootpath exists, skip the Create process to avoid "zk: not authenticated" error 127 | exist, _, errExists := zc.App.Zookeeper.Exists(strings.Join(parts[:i], "/")) 128 | if !exist { 129 | _, err := zc.App.Zookeeper.Create(strings.Join(parts[:i], "/"), []byte{}, 0, zk.WorldACL(zk.PermAll)) 130 | // Ignore when the node exists already 131 | if (err != nil) && (err != zk.ErrNodeExists) { 132 | return err 133 | } 134 | } else { 135 | return errExists 136 | } 137 | } 138 | return nil 139 | } 140 | 141 | func (zc *Coordinator) mainLoop(eventChan <-chan zk.Event) { 142 | zc.running.Add(1) 143 | defer zc.running.Done() 144 | 145 | for event := range eventChan { 146 | if event.Type == zk.EventSession { 147 | switch event.State { 148 | case zk.StateExpired: 149 | zc.Log.Error("session expired") 150 | zc.App.ZookeeperConnected = false 151 | zc.App.ZookeeperExpired.Broadcast() 152 | case zk.StateConnected: 153 | if !zc.App.ZookeeperConnected { 154 | zc.Log.Info("starting session") 155 | zc.App.ZookeeperConnected = true 156 | } 157 | } 158 | } 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /core/internal/zookeeper/coordinator_test.go: -------------------------------------------------------------------------------- 1 | //go:build !race 2 | // +build !race 3 | 4 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 5 | // 2.0 (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | 13 | package zookeeper 14 | 15 | import ( 16 | "sync" 17 | "testing" 18 | "time" 19 | 20 | "github.com/stretchr/testify/assert" 21 | "github.com/stretchr/testify/mock" 22 | 23 | "github.com/linkedin/go-zk" 24 | "github.com/spf13/viper" 25 | "go.uber.org/zap" 26 | 27 | "github.com/linkedin/Burrow/core/internal/helpers" 28 | "github.com/linkedin/Burrow/core/protocol" 29 | ) 30 | 31 | func fixtureCoordinator() *Coordinator { 32 | coordinator := Coordinator{ 33 | Log: zap.NewNop(), 34 | } 35 | coordinator.App = &protocol.ApplicationContext{ 36 | Logger: zap.NewNop(), 37 | } 38 | 39 | viper.Reset() 40 | viper.Set("zookeeper.root-path", "/test/path/burrow") 41 | viper.Set("zookeeper.servers", []string{"zk.example.com:2181"}) 42 | viper.Set("zookeeper.timeout", 5) 43 | 44 | return &coordinator 45 | } 46 | 47 | func TestCoordinator_ImplementsCoordinator(t *testing.T) { 48 | assert.Implements(t, (*protocol.Coordinator)(nil), new(Coordinator)) 49 | } 50 | 51 | func TestCoordinator_Configure(t *testing.T) { 52 | coordinator := fixtureCoordinator() 53 | coordinator.Configure() 54 | 55 | assert.NotNil(t, coordinator.connectFunc, "Expected connectFunc to get set") 56 | } 57 | 58 | func TestCoordinator_StartStop(t *testing.T) { 59 | coordinator := fixtureCoordinator() 60 | 61 | // mock the connectFunc to return a mock client 62 | mockClient := helpers.MockZookeeperClient{} 63 | eventChan := make(chan zk.Event) 64 | coordinator.connectFunc = func(servers []string, timeout time.Duration, logger *zap.Logger) (protocol.ZookeeperClient, <-chan zk.Event, error) { 65 | return &mockClient, eventChan, nil 66 | } 67 | 68 | mockClient.On("Create", "/test", []byte{}, int32(0), zk.WorldACL(zk.PermAll)).Return("", zk.ErrNodeExists) 69 | mockClient.On("Create", "/test/path", []byte{}, int32(0), zk.WorldACL(zk.PermAll)).Return("", zk.ErrNodeExists) 70 | mockClient.On("Create", "/test/path/burrow", []byte{}, int32(0), zk.WorldACL(zk.PermAll)).Return("", nil) 71 | mockClient.On("Close").Run(func(args mock.Arguments) { close(eventChan) }).Return() 72 | 73 | coordinator.Configure() 74 | err := coordinator.Start() 75 | assert.Nil(t, err, "Expected Start to not return an error") 76 | assert.Equal(t, &mockClient, coordinator.App.Zookeeper, "Expected App.Zookeeper to be set to the mock client") 77 | assert.Equalf(t, "/test/path/burrow", coordinator.App.ZookeeperRoot, "Expected App.ZookeeperRoot to be /test/path/burrow, not %v", coordinator.App.ZookeeperRoot) 78 | assert.True(t, coordinator.App.ZookeeperConnected, "Expected App.ZookeeperConnected to be true") 79 | assert.NotNil(t, coordinator.App.ZookeeperExpired, "Expected App.ZookeeperExpired to be set") 80 | 81 | err = coordinator.Stop() 82 | assert.Nil(t, err, "Expected Stop to not return an error") 83 | } 84 | 85 | func TestCoordinator_mainLoop(t *testing.T) { 86 | coordinator := fixtureCoordinator() 87 | coordinator.running = sync.WaitGroup{} 88 | coordinator.App.ZookeeperConnected = true 89 | coordinator.App.ZookeeperExpired = &sync.Cond{L: &sync.Mutex{}} 90 | 91 | eventChan := make(chan zk.Event) 92 | go coordinator.mainLoop(eventChan) 93 | 94 | // Nothing should change 95 | eventChan <- zk.Event{ 96 | Type: zk.EventSession, 97 | State: zk.StateDisconnected, 98 | } 99 | assert.True(t, coordinator.App.ZookeeperConnected, "Expected App.ZookeeperConnected to remain true") 100 | 101 | // On Expiration, the condition should be set and connected should be false 102 | coordinator.App.ZookeeperExpired.L.Lock() 103 | eventChan <- zk.Event{ 104 | Type: zk.EventSession, 105 | State: zk.StateExpired, 106 | } 107 | coordinator.App.ZookeeperExpired.Wait() 108 | coordinator.App.ZookeeperExpired.L.Unlock() 109 | assert.False(t, coordinator.App.ZookeeperConnected, "Expected App.ZookeeperConnected to be false") 110 | 111 | eventChan <- zk.Event{ 112 | Type: zk.EventSession, 113 | State: zk.StateConnected, 114 | } 115 | time.Sleep(100 * time.Millisecond) 116 | assert.True(t, coordinator.App.ZookeeperConnected, "Expected App.ZookeeperConnected to be true") 117 | 118 | close(eventChan) 119 | coordinator.running.Wait() 120 | } 121 | 122 | // Example for the Coordinator docs on how to do connection state monitoring 123 | func ExampleCoordinator_stateMonitoring() { 124 | // Ignore me - needed to make the example clean 125 | app := &protocol.ApplicationContext{} 126 | 127 | for { 128 | // Wait for the Zookeeper connection to be connected 129 | for !app.ZookeeperConnected { 130 | // Sleep before looping around to prevent a tight loop 131 | time.Sleep(100 * time.Millisecond) 132 | continue 133 | } 134 | 135 | // Zookeeper is connected 136 | // Do all the work you need to do setting up watches, locks, etc. 137 | 138 | // Wait on the condition that signals that the session has expired 139 | app.ZookeeperExpired.L.Lock() 140 | app.ZookeeperExpired.Wait() 141 | app.ZookeeperExpired.L.Unlock() 142 | 143 | // The Zookeeper session has been lost 144 | // Do any work that you need to in order to clean up, or stop work that was happening inside a lock 145 | 146 | // Loop around to wait for the Zookeeper session to be established again 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /core/logger.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package core 11 | 12 | import ( 13 | "fmt" 14 | "os" 15 | "strconv" 16 | "strings" 17 | "syscall" 18 | "time" 19 | 20 | "github.com/spf13/viper" 21 | "go.uber.org/zap" 22 | "go.uber.org/zap/zapcore" 23 | "gopkg.in/natefinch/lumberjack.v2" 24 | ) 25 | 26 | // CheckAndCreatePidFile takes a single argument, which is the path to a PID file (a file that contains a single 27 | // integer, which is the process ID of a running process). If this file exists, and if the PID is that of a running 28 | // process, return false as that indicates another copy of this process is already running. Otherwise, create the 29 | // file and write this process's PID to the file and return true. Any error doing this (such as not having permissions 30 | // to write the file) will return false. 31 | // 32 | // This func should be called when Burrow starts to prevent multiple copies from running. 33 | func CheckAndCreatePidFile(filename string) bool { 34 | // Check if the PID file exists 35 | if _, err := os.Stat(filename); !os.IsNotExist(err) { 36 | // The file exists, so read it and check if the PID specified is running 37 | pidString, err := os.ReadFile(filename) 38 | if err != nil { 39 | fmt.Printf("Cannot read PID file: %v", err) 40 | return false 41 | } 42 | pid, err := strconv.Atoi(string(pidString)) 43 | if err != nil { 44 | fmt.Printf("Cannot interpret contents of PID file: %v", err) 45 | return false 46 | } 47 | 48 | if pid == os.Getpid() { 49 | // This could happen inside a docker 50 | // container, e.g. the pid of Burrow could be 51 | // equal to 1 each time the container is 52 | // restarted. 53 | fmt.Println("Found existing pidfile matching current pid") 54 | return true 55 | } 56 | 57 | // Try sending a signal to the process to see if it is still running 58 | process, err := os.FindProcess(pid) 59 | if err == nil { 60 | err = process.Signal(syscall.Signal(0)) 61 | if (err == nil) || (err == syscall.EPERM) { 62 | // The process exists, so we're going to assume it's an old Burrow and we shouldn't start 63 | fmt.Printf("Existing process running on PID %d. Exiting (my pid = %d)", pid, os.Getpid()) 64 | return false 65 | } 66 | } 67 | } 68 | 69 | // Create a PID file, replacing any existing one (as we already checked it) 70 | pidfile, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o644) 71 | if err != nil { 72 | fmt.Printf("Cannot write PID file: %v", err) 73 | return false 74 | } 75 | fmt.Fprintf(pidfile, "%v", os.Getpid()) 76 | pidfile.Close() 77 | return true 78 | } 79 | 80 | // RemovePidFile takes a single argument, which is the path to a PID file. That file is deleted. This func should be 81 | // called when Burrow exits. 82 | func RemovePidFile(filename string) { 83 | err := os.Remove(filename) 84 | if err != nil { 85 | fmt.Printf("Failed to remove PID file: %v\n", err) 86 | } 87 | } 88 | 89 | // ConfigureLogger returns a configured zap.Logger which can be used by Burrow for all logging. It also returns a 90 | // zap.AtomicLevel, which can be used to dynamically adjust the level of the logger. The configuration for the logger 91 | // is read from viper, with the following defaults: 92 | // 93 | // logging.level = info 94 | // 95 | // If logging.filename (path to the log file) is provided, a rolling log file is set up using lumberjack. The 96 | // configuration for that log file is read from viper, with the following defaults: 97 | // 98 | // logging.maxsize = 100 99 | // logging.maxbackups = 10 100 | // logging.maxage = 30 101 | // logging.use-localtime = false 102 | // logging.use-compression = false 103 | func ConfigureLogger() (*zap.Logger, *zap.AtomicLevel) { 104 | var level zap.AtomicLevel 105 | var syncOutput zapcore.WriteSyncer 106 | 107 | // Set config defaults for logging 108 | viper.SetDefault("logging.level", "info") 109 | viper.SetDefault("logging.maxsize", 100) 110 | viper.SetDefault("logging.maxbackups", 10) 111 | viper.SetDefault("logging.maxage", 30) 112 | 113 | // Create an AtomicLevel that we can use elsewhere to dynamically change the logging level 114 | logLevel := viper.GetString("logging.level") 115 | switch strings.ToLower(logLevel) { 116 | case "", "info": 117 | level = zap.NewAtomicLevelAt(zap.InfoLevel) 118 | case "debug": 119 | level = zap.NewAtomicLevelAt(zap.DebugLevel) 120 | case "warn": 121 | level = zap.NewAtomicLevelAt(zap.WarnLevel) 122 | case "error": 123 | level = zap.NewAtomicLevelAt(zap.ErrorLevel) 124 | case "panic": 125 | level = zap.NewAtomicLevelAt(zap.PanicLevel) 126 | case "fatal": 127 | level = zap.NewAtomicLevelAt(zap.FatalLevel) 128 | default: 129 | fmt.Printf("Invalid log level supplied. Defaulting to info: %s", logLevel) 130 | level = zap.NewAtomicLevelAt(zap.InfoLevel) 131 | } 132 | 133 | // If a filename has been set, set up a rotating logger. Otherwise, use Stdout 134 | logFilename := viper.GetString("logging.filename") 135 | if logFilename != "" { 136 | syncOutput = zapcore.AddSync(&lumberjack.Logger{ 137 | Filename: logFilename, 138 | MaxSize: viper.GetInt("logging.maxsize"), 139 | MaxBackups: viper.GetInt("logging.maxbackups"), 140 | MaxAge: viper.GetInt("logging.maxage"), 141 | LocalTime: viper.GetBool("logging.use-localtime"), 142 | Compress: viper.GetBool("logging.use-compression"), 143 | }) 144 | } else { 145 | syncOutput = zapcore.Lock(os.Stdout) 146 | } 147 | 148 | core := zapcore.NewCore( 149 | zapcore.NewJSONEncoder(zap.NewProductionEncoderConfig()), 150 | syncOutput, 151 | level, 152 | ) 153 | logger := zap.New(core) 154 | zap.ReplaceGlobals(logger) 155 | return logger, &level 156 | } 157 | 158 | // OpenOutLog takes a single argument, which is the path to a log file. This process's stdout and stderr are redirected 159 | // to this log file. The os.File object is returned so that it can be managed. 160 | func OpenOutLog(filename string) *os.File { 161 | // Move existing out file to a dated file if it exists 162 | if _, err := os.Stat(filename); err == nil { 163 | if err = os.Rename(filename, filename+"."+time.Now().Format("2006-01-02_15:04:05")); err != nil { 164 | fmt.Printf("Cannot move old out file: %v", err) 165 | os.Exit(1) 166 | } 167 | } 168 | 169 | // Redirect stdout and stderr to out file 170 | logFile, _ := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_SYNC, 0o644) 171 | internalDup2(logFile.Fd(), 1) 172 | internalDup2(logFile.Fd(), 2) 173 | return logFile 174 | } 175 | -------------------------------------------------------------------------------- /core/open_out_log_linux_dup3.go: -------------------------------------------------------------------------------- 1 | //go:build linux && (arm64 || riscv64) 2 | // +build linux 3 | // +build arm64 riscv64 4 | 5 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 6 | // 2.0 (the "License"); you may not use this file except in compliance with 7 | // the License. You may obtain a copy of the License at 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | 14 | package core 15 | 16 | import ( 17 | "syscall" 18 | ) 19 | 20 | // linux_arm64 and linux_riscv64 doesn't have syscall.Dup2, so use 21 | // the nearly identical syscall.Dup3 instead 22 | func internalDup2(oldfd uintptr, newfd uintptr) error { 23 | return syscall.Dup3(int(oldfd), int(newfd), 0) 24 | } 25 | -------------------------------------------------------------------------------- /core/open_out_log_linux_loong64.go: -------------------------------------------------------------------------------- 1 | //go:build linux && loong64 2 | // +build linux,loong64 3 | 4 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 5 | // 2.0 (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | 13 | package core 14 | 15 | import ( 16 | "syscall" 17 | ) 18 | 19 | // linux_loong64 doesn't have syscall.Dup2, so use 20 | // the nearly identical syscall.Dup3 instead 21 | func internalDup2(oldfd uintptr, newfd uintptr) error { 22 | return syscall.Dup3(int(oldfd), int(newfd), 0) 23 | } 24 | -------------------------------------------------------------------------------- /core/open_out_log_unix.go: -------------------------------------------------------------------------------- 1 | //go:build !windows && !(linux && arm64) && !(linux && loong64) 2 | // +build !windows 3 | // +build !linux !arm64 4 | // +build !linux !loong64 5 | 6 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 7 | // 2.0 (the "License"); you may not use this file except in compliance with 8 | // the License. You may obtain a copy of the License at 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | 15 | package core 16 | 17 | import ( 18 | "syscall" 19 | ) 20 | 21 | func internalDup2(oldfd, newfd uintptr) error { 22 | return syscall.Dup2(int(oldfd), int(newfd)) 23 | } 24 | -------------------------------------------------------------------------------- /core/open_out_log_windows.go: -------------------------------------------------------------------------------- 1 | //go:build windows 2 | // +build windows 3 | 4 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 5 | // 2.0 (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | 13 | package core 14 | 15 | import ( 16 | "syscall" 17 | ) 18 | 19 | var ( 20 | kernel32 = syscall.MustLoadDLL("kernel32.dll") 21 | procSetStdHandle = kernel32.MustFindProc("SetStdHandle") 22 | ) 23 | 24 | func internalDup2(oldfd uintptr, newfd uintptr) error { 25 | r0, _, e1 := syscall.Syscall(procSetStdHandle.Addr(), 2, oldfd, newfd, 0) 26 | if r0 == 0 { 27 | if e1 != 0 { 28 | return error(e1) 29 | } 30 | return syscall.EINVAL 31 | } 32 | return nil 33 | } 34 | -------------------------------------------------------------------------------- /core/protocol/evaluator.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package protocol 11 | 12 | import "encoding/json" 13 | 14 | // EvaluatorRequest is sent over the EvaluatorChannel that is stored in the application context. It is a query for the 15 | // status of a group in a cluster. The response to this query is sent over the reply channel. This request is typically 16 | // used in the HTTP server and notifier subsystems. 17 | type EvaluatorRequest struct { 18 | // Reply is the channel over which the evaluator will send the status response. The sender should expect to receive 19 | // only one message over this channel for each request, and the channel will not be closed after the response is 20 | // sent (to facilitate the notifier, which uses a single channel for all responses) 21 | Reply chan *ConsumerGroupStatus 22 | 23 | // The name of the cluster in which the group is found 24 | Cluster string 25 | 26 | // The name of the group to get the status for 27 | Group string 28 | 29 | // If ShowAll is true, the returned status object contains a partition entry for every partition the group consumes, 30 | // regardless of the state of that partition. If false (the default), only partitions that have a status of WARN 31 | // or above are returned in the status object. 32 | ShowAll bool 33 | } 34 | 35 | // PartitionStatus represents the state of a single consumed partition 36 | type PartitionStatus struct { 37 | // The topic name for this partition 38 | Topic string `json:"topic"` 39 | 40 | // The partition ID 41 | Partition int32 `json:"partition"` 42 | 43 | // If available (for active new consumers), the consumer host that currently owns this partiton 44 | Owner string `json:"owner"` 45 | 46 | // If available (for active new consumers), the client_id of the consumer that currently owns this partition 47 | ClientID string `json:"client_id"` 48 | 49 | // The status of the partition 50 | Status StatusConstant `json:"status"` 51 | 52 | // A ConsumerOffset object that describes the first (oldest) offset that Burrow is storing for this partition 53 | Start *ConsumerOffset `json:"start"` 54 | 55 | // A ConsumerOffset object that describes the last (latest) offset that Burrow is storing for this partition 56 | End *ConsumerOffset `json:"end"` 57 | 58 | // The current number of messages that the consumer is behind for this partition. This is calculated using the 59 | // last committed offset and the current broker end offset 60 | CurrentLag uint64 `json:"current_lag"` 61 | 62 | // A number between 0.0 and 1.0 that describes the percentage complete the offset information is for this partition. 63 | // For example, if Burrow has been configured to store 10 offsets, and Burrow has only stored 7 commits for this 64 | // partition, Complete will be 0.7 65 | Complete float32 `json:"complete"` 66 | } 67 | 68 | // ConsumerGroupStatus is the response object that is sent in reply to an EvaluatorRequest. It describes the current 69 | // status of a single consumer group. 70 | type ConsumerGroupStatus struct { 71 | // The name of the cluster in which the group exists 72 | Cluster string `json:"cluster"` 73 | 74 | // The name of the consumer group 75 | Group string `json:"group"` 76 | 77 | // The status of the consumer group. This is either NOTFOUND, OK, WARN, or ERR. It is calculated from the highest 78 | // Status for the individual partitions 79 | Status StatusConstant `json:"status"` 80 | 81 | // A number between 0.0 and 1.0 that describes the percentage complete the partition information is for this group. 82 | // A partition that has a Complete value of less than 1.0 will be treated as zero. 83 | Complete float32 `json:"complete"` 84 | 85 | // A slice of PartitionStatus objects showing individual partition status. If the request ShowAll field was true, 86 | // this slice will contain every partition consumed by the group. If ShowAll was false, this slice will only 87 | // contain the partitions that have a status of WARN or above. 88 | Partitions []*PartitionStatus `json:"partitions"` 89 | 90 | // A count of the total number of partitions that the group has committed offsets for. Note, this may not be the 91 | // same as the total number of partitions consumed by the group, if Burrow has not seen commits for all partitions 92 | // yet. 93 | TotalPartitions int `json:"partition_count"` 94 | 95 | // A PartitionStatus object for the partition with the highest CurrentLag value 96 | Maxlag *PartitionStatus `json:"maxlag"` 97 | 98 | // The sum of all partition CurrentLag values for the group 99 | TotalLag uint64 `json:"totallag"` 100 | } 101 | 102 | // StatusConstant describes the state of a partition or group as a single value. These values are ordered from least 103 | // to most "bad", with zero being reserved to indicate that a group is not found. 104 | type StatusConstant int 105 | 106 | const ( 107 | // StatusNotFound indicates that the consumer group does not exist. It is not used for partition status. 108 | StatusNotFound StatusConstant = 0 109 | 110 | // StatusOK indicates that a partition is in a good state. For a group, it indicates that all partitions are in a 111 | // good state. 112 | StatusOK StatusConstant = 1 113 | 114 | // StatusWarning indicates that a partition is lagging - it is making progress, but falling further behind. For a 115 | // group, it indicates that one or more partitions are lagging. 116 | StatusWarning StatusConstant = 2 117 | 118 | // StatusError indicates that a group has one or more partitions that are in the Stop, Stall, or Rewind states. It 119 | // is not used for partition status. 120 | StatusError StatusConstant = 3 121 | 122 | // StatusStop indicates that the consumer has not committed an offset for that partition in some time, and the lag 123 | // is non-zero. It is not used for group status. 124 | StatusStop StatusConstant = 4 125 | 126 | // StatusStall indicates that the consumer is committing offsets for the partition, but they are not increasing and 127 | // the lag is non-zero. It is not used for group status. 128 | StatusStall StatusConstant = 5 129 | 130 | // StatusRewind indicates that the consumer has committed an offset for the partition that is less than the 131 | // previous offset. It is not used for group status. 132 | StatusRewind StatusConstant = 6 133 | ) 134 | 135 | var statusStrings = [...]string{"NOTFOUND", "OK", "WARN", "ERR", "STOP", "STALL", "REWIND"} 136 | 137 | // String returns a string representation of a StatusConstant 138 | func (c StatusConstant) String() string { 139 | if (c >= 0) && (c < StatusConstant(len(statusStrings))) { 140 | return statusStrings[c] 141 | } 142 | return "UNKNOWN" 143 | } 144 | 145 | // MarshalText implements the encoding.TextMarshaler interface. The status is the string representation of 146 | // StatusConstant 147 | func (c StatusConstant) MarshalText() ([]byte, error) { 148 | return []byte(c.String()), nil 149 | } 150 | 151 | // MarshalJSON implements the json.Marshaler interface. The status is the string representation of StatusConstant 152 | func (c StatusConstant) MarshalJSON() ([]byte, error) { 153 | return json.Marshal(c.String()) 154 | } 155 | -------------------------------------------------------------------------------- /core/protocol/storage.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package protocol 11 | 12 | import "encoding/json" 13 | 14 | // StorageRequestConstant is used in StorageRequest to indicate the type of request. Numeric ordering is not important 15 | type StorageRequestConstant int 16 | 17 | const ( 18 | // StorageSetBrokerOffset is the request type to store a broker offset. Requires Cluster, Topic, Partition, 19 | // TopicPartitionCount, and Offset fields 20 | StorageSetBrokerOffset StorageRequestConstant = 0 21 | 22 | // StorageSetConsumerOffset is the request type to store a consumer offset. Requires Cluster, Group, Topic, 23 | // Partition, Offset, and Timestamp fields 24 | StorageSetConsumerOffset StorageRequestConstant = 1 25 | 26 | // StorageSetConsumerOwner is the request type to store a consumer owner. Requires Cluster, Group, Topic, Partition, 27 | // and Owner fields 28 | StorageSetConsumerOwner StorageRequestConstant = 2 29 | 30 | // StorageSetDeleteTopic is the request type to remove a topic from the broker and all consumers. Requires Cluster, 31 | // Group, and Topic fields 32 | StorageSetDeleteTopic StorageRequestConstant = 3 33 | 34 | // StorageSetDeleteGroup is the request type to remove a consumer group. Requires Cluster and Group fields 35 | StorageSetDeleteGroup StorageRequestConstant = 4 36 | 37 | // StorageFetchClusters is the request type to retrieve a list of clusters. Requires Reply. Returns a []string 38 | StorageFetchClusters StorageRequestConstant = 5 39 | 40 | // StorageFetchConsumers is the request type to retrieve a list of consumer groups in a cluster. Requires Reply and 41 | // Cluster fields. Returns a []string 42 | StorageFetchConsumers StorageRequestConstant = 6 43 | 44 | // StorageFetchTopics is the request type to retrieve a list of topics in a cluster. Requires Reply and Cluster 45 | // fields. Returns a []string 46 | StorageFetchTopics StorageRequestConstant = 7 47 | 48 | // StorageFetchConsumer is the request type to retrieve all stored information for a single consumer group. Requires 49 | // Reply, Cluster, and Group fields. Returns a ConsumerTopics object 50 | StorageFetchConsumer StorageRequestConstant = 8 51 | 52 | // StorageFetchTopic is the request type to retrieve the current broker offsets (one per partition) for a topic. 53 | // Requires Reply, Cluster, and Topic fields. 54 | // Returns a []int64 55 | StorageFetchTopic StorageRequestConstant = 9 56 | 57 | // StorageClearConsumerOwners is the request type to remove all partition owner information for a single group. 58 | // Requires Cluster and Group fields 59 | StorageClearConsumerOwners StorageRequestConstant = 10 60 | 61 | // StorageFetchConsumersForTopic is the request type to obtain a list of all consumer groups consuming from a topic. 62 | // Returns a []string 63 | StorageFetchConsumersForTopic StorageRequestConstant = 11 64 | ) 65 | 66 | var storageRequestStrings = [...]string{ 67 | "StorageSetBrokerOffset", 68 | "StorageSetConsumerOffset", 69 | "StorageSetConsumerOwner", 70 | "StorageSetDeleteTopic", 71 | "StorageSetDeleteGroup", 72 | "StorageFetchClusters", 73 | "StorageFetchConsumers", 74 | "StorageFetchTopics", 75 | "StorageFetchConsumer", 76 | "StorageFetchTopic", 77 | "StorageClearConsumerOwners", 78 | "StorageFetchConsumersForTopic", 79 | } 80 | 81 | // String returns a string representation of a StorageRequestConstant for logging 82 | func (c StorageRequestConstant) String() string { 83 | if (c >= 0) && (c < StorageRequestConstant(len(storageRequestStrings))) { 84 | return storageRequestStrings[c] 85 | } 86 | return "UNKNOWN" 87 | } 88 | 89 | // MarshalText implements the encoding.TextMarshaler interface. The status is the string representation of 90 | // StorageRequestConstant 91 | func (c StorageRequestConstant) MarshalText() ([]byte, error) { 92 | return []byte(c.String()), nil 93 | } 94 | 95 | // MarshalJSON implements the json.Marshaler interface. The status is the string representation of 96 | // StorageRequestConstant 97 | func (c StorageRequestConstant) MarshalJSON() ([]byte, error) { 98 | return json.Marshal(c.String()) 99 | } 100 | 101 | // StorageRequest is sent over the StorageChannel that is stored in the application context. It is a query to either 102 | // send information to the storage subsystem, or retrieve information from it . The RequestType indicates the 103 | // particular type of request. "Set" and "Clear" requests do not get a response. "Fetch" requests will send a response 104 | // over the Reply channel supplied in the request 105 | type StorageRequest struct { 106 | // The type of request that this struct encapsulates 107 | RequestType StorageRequestConstant 108 | 109 | // If the RequestType is a "Fetch" request, Reply must contain a channel to receive the response on 110 | Reply chan interface{} 111 | 112 | // The name of the cluster to which the request applies. Required for all request types except StorageFetchClusters 113 | Cluster string 114 | 115 | // The name of the consumer group to which the request applies 116 | Group string 117 | 118 | // The name of the topic to which the request applies 119 | Topic string 120 | 121 | // The ID of the partition to which the request applies 122 | Partition int32 123 | 124 | // For StorageSetBrokerOffset requests, TopicPartitionCount indicates the total number of partitions for the topic 125 | TopicPartitionCount int32 126 | 127 | // For StorageSetBrokerOffset and StorageSetConsumerOffset requests, the offset to store 128 | Offset int64 129 | 130 | // For StorageSetConsumerOffset requests, the offset of the offset commit itself (i.e. the __consumer_offsets offset) 131 | Order int64 132 | 133 | // For StorageSetConsumerOffset requests, the timestamp of the offset being stored 134 | Timestamp int64 135 | 136 | // For StorageSetConsumerOwner requests, a string describing the consumer host that owns the partition 137 | Owner string 138 | 139 | // For StorageSetConsumerOwner requests, a string containing the client_id set by the consumer 140 | ClientID string 141 | } 142 | 143 | // ConsumerPartition represents the information stored for a group for a single partition. It is used as part of the 144 | // response to a StorageFetchConsumer request 145 | type ConsumerPartition struct { 146 | // A slice containing a ConsumerOffset object for each offset Burrow has stored for this partition. This can be any 147 | // length up to the number of intervals Burrow has been configured to store, depending on how many offset commits 148 | // have been seen for this partition 149 | Offsets []*ConsumerOffset `json:"offsets"` 150 | 151 | // A slice containing the history of broker offsets stored for this partition. This is used for evaluation only, 152 | // and as such it is not provided when encoding to JSON (for HTTP responses) 153 | BrokerOffsets []int64 `json:"-"` 154 | 155 | // A string that describes the consumer host that currently owns this partition, if the information is available 156 | // (for active new consumers) 157 | Owner string `json:"owner"` 158 | 159 | // A string containing the client_id set by the consumer (for active new consumers) 160 | ClientID string `json:"client_id"` 161 | 162 | // The current number of messages that the consumer is behind for this partition. This is calculated using the 163 | // last committed offset and the current broker end offset 164 | CurrentLag uint64 `json:"current-lag"` 165 | } 166 | 167 | // Lag is just a wrapper for a uint64, but it can be `nil` 168 | type Lag struct { 169 | Value uint64 170 | } 171 | 172 | // MarshalJSON should just treat lag as a nullable number, not a nested struct 173 | func (lag Lag) MarshalJSON() ([]byte, error) { 174 | return json.Marshal(lag.Value) 175 | } 176 | 177 | // UnmarshalJSON reads lag from a JSON number 178 | func (lag *Lag) UnmarshalJSON(b []byte) error { 179 | return json.Unmarshal(b, &lag.Value) 180 | } 181 | 182 | // ConsumerOffset represents a single offset stored. It is used as part of the response to a StorageFetchConsumer 183 | // request 184 | type ConsumerOffset struct { 185 | // The offset that is stored 186 | Offset int64 `json:"offset"` 187 | 188 | // The offset of this __consumer_offsets commit 189 | Order int64 `json:"-"` 190 | 191 | // The timestamp at which the offset was committed 192 | Timestamp int64 `json:"timestamp"` 193 | 194 | // The timestamp at which the commit was seen by burrow 195 | ObservedTimestamp int64 `json:"observedAt"` 196 | 197 | // The number of messages that the consumer was behind at the time that the offset was committed. This number is 198 | // not updated after the offset was committed, so it does not represent the current lag of the consumer. 199 | Lag *Lag `json:"lag"` 200 | } 201 | 202 | // ConsumerTopics is the response that is sent for a StorageFetchConsumer request. It is a map of topic names to 203 | // ConsumerPartitions objects that describe that topic 204 | type ConsumerTopics map[string]ConsumerPartitions 205 | 206 | // ConsumerPartitions describes all partitions for a single topic. The index indicates the partition ID, and the value 207 | // is a pointer to a ConsumerPartition object with the offset information for that partition. 208 | type ConsumerPartitions []*ConsumerPartition 209 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | services: 3 | burrow: 4 | build: . 5 | volumes: 6 | - ${PWD}/docker-config:/etc/burrow/ 7 | - ${PWD}/tmp:/var/tmp/burrow 8 | ports: 9 | - 8000:8000 10 | depends_on: 11 | - zookeeper 12 | - kafka 13 | restart: always 14 | 15 | zookeeper: 16 | image: wurstmeister/zookeeper 17 | ports: 18 | - 2181:2181 19 | 20 | kafka: 21 | image: wurstmeister/kafka 22 | ports: 23 | - 9092:9092 24 | environment: 25 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181/local 26 | KAFKA_ADVERTISED_HOST_NAME: kafka 27 | KAFKA_ADVERTISED_PORT: 9092 28 | KAFKA_CREATE_TOPICS: "test-topic:2:1,test-topic2:1:1,test-topic3:1:1" 29 | -------------------------------------------------------------------------------- /docker-config/burrow.toml: -------------------------------------------------------------------------------- 1 | [zookeeper] 2 | servers=[ "zookeeper:2181" ] 3 | timeout=6 4 | root-path="/burrow" 5 | 6 | [client-profile.profile] 7 | kafka-version="0.11.0" 8 | client-id="docker-client" 9 | 10 | [cluster.local] 11 | client-profile="profile" 12 | class-name="kafka" 13 | servers=[ "kafka:9092" ] 14 | topic-refresh=60 15 | offset-refresh=30 16 | groups-reaper-refresh=30 17 | 18 | [consumer.local] 19 | class-name="kafka" 20 | cluster="local" 21 | servers=[ "kafka:9092" ] 22 | group-denylist="^(console-consumer-|python-kafka-consumer-).*$" 23 | group-allowlist="" 24 | 25 | [consumer.local_zk] 26 | class-name="kafka_zk" 27 | cluster="local" 28 | servers=[ "zookeeper:2181" ] 29 | zookeeper-path="/local" 30 | zookeeper-timeout=30 31 | group-denylist="^(console-consumer-|python-kafka-consumer-).*$" 32 | group-allowlist="" 33 | 34 | [httpserver.default] 35 | address=":8000" 36 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/linkedin/Burrow 2 | 3 | go 1.24 4 | 5 | require ( 6 | github.com/IBM/sarama v1.45.1 7 | github.com/OneOfOne/xxhash v1.2.8 8 | github.com/julienschmidt/httprouter v1.3.0 9 | github.com/karrick/goswarm v1.10.0 10 | github.com/linkedin/go-zk v0.1.4 11 | github.com/pborman/uuid v1.2.1 12 | github.com/pkg/errors v0.9.1 13 | github.com/prometheus/client_golang v1.21.1 14 | github.com/spf13/viper v1.20.1 15 | github.com/stretchr/testify v1.10.0 16 | github.com/xdg/scram v1.0.5 17 | go.uber.org/automaxprocs v1.6.0 18 | go.uber.org/zap v1.27.0 19 | gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df 20 | gopkg.in/natefinch/lumberjack.v2 v2.2.1 21 | ) 22 | 23 | require ( 24 | github.com/beorn7/perks v1.0.1 // indirect 25 | github.com/cespare/xxhash/v2 v2.3.0 // indirect 26 | github.com/davecgh/go-spew v1.1.1 // indirect 27 | github.com/eapache/go-resiliency v1.7.0 // indirect 28 | github.com/eapache/go-xerial-snappy v0.0.0-20230731223053-c322873962e3 // indirect 29 | github.com/eapache/queue v1.1.0 // indirect 30 | github.com/fsnotify/fsnotify v1.8.0 // indirect 31 | github.com/go-viper/mapstructure/v2 v2.2.1 // indirect 32 | github.com/golang/snappy v1.0.0 // indirect 33 | github.com/google/uuid v1.6.0 // indirect 34 | github.com/hashicorp/errwrap v1.1.0 // indirect 35 | github.com/hashicorp/go-multierror v1.1.1 // indirect 36 | github.com/hashicorp/go-uuid v1.0.3 // indirect 37 | github.com/jcmturner/aescts/v2 v2.0.0 // indirect 38 | github.com/jcmturner/dnsutils/v2 v2.0.0 // indirect 39 | github.com/jcmturner/gofork v1.7.6 // indirect 40 | github.com/jcmturner/gokrb5/v8 v8.4.4 // indirect 41 | github.com/jcmturner/rpc/v2 v2.0.3 // indirect 42 | github.com/klauspost/compress v1.18.0 // indirect 43 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 44 | github.com/pelletier/go-toml/v2 v2.2.3 // indirect 45 | github.com/pierrec/lz4/v4 v4.1.22 // indirect 46 | github.com/pmezard/go-difflib v1.0.0 // indirect 47 | github.com/prometheus/client_model v0.6.1 // indirect 48 | github.com/prometheus/common v0.63.0 // indirect 49 | github.com/prometheus/procfs v0.16.0 // indirect 50 | github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 // indirect 51 | github.com/sagikazarmark/locafero v0.9.0 // indirect 52 | github.com/sourcegraph/conc v0.3.0 // indirect 53 | github.com/spf13/afero v1.14.0 // indirect 54 | github.com/spf13/cast v1.7.1 // indirect 55 | github.com/spf13/pflag v1.0.6 // indirect 56 | github.com/stretchr/objx v0.5.2 // indirect 57 | github.com/subosito/gotenv v1.6.0 // indirect 58 | github.com/xdg/stringprep v1.0.3 // indirect 59 | go.uber.org/multierr v1.11.0 // indirect 60 | golang.org/x/crypto v0.36.0 // indirect 61 | golang.org/x/net v0.38.0 // indirect 62 | golang.org/x/sys v0.31.0 // indirect 63 | golang.org/x/text v0.23.0 // indirect 64 | google.golang.org/protobuf v1.36.6 // indirect 65 | gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect 66 | gopkg.in/yaml.v3 v3.0.1 // indirect 67 | ) 68 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | // Burrow provides advanced Kafka Consumer Lag Checking. 11 | // It is a monitoring companion for Apache Kafka that provides consumer lag checking as a service without the need for 12 | // specifying thresholds. It monitors committed offsets for all consumers and calculates the status of those consumers 13 | // on demand. An HTTP endpoint is provided to request status on demand, as well as provide other Kafka cluster 14 | // information. There are also configurable notifiers that can send status out via email or HTTP calls to another 15 | // service. 16 | // 17 | // # CLI or Library 18 | // 19 | // Burrow is designed to be run as a standalone application (CLI), and this is what the main package provides. In some 20 | // situations it may be better for you to wrap Burrow with another application - for example, in environments where you 21 | // have your own application structure to provide configuration and logging. To this end, Burrow can also be used as a 22 | // library within another app. 23 | // 24 | // When embedding Burrow, please refer to https://github.com/linkedin/Burrow/blob/master/main.go for details on what 25 | // preparation should happen before starting it. This is the wrapper that provides the CLI interface. The main logic 26 | // for Burrow is in the core package, while the protocol package provides some of the common interfaces that are used. 27 | // 28 | // # Additional Documentation 29 | // 30 | // More documentation on Burrow, including configuration and HTTP requests, can be found at 31 | // https://github.com/linkedin/Burrow/wiki 32 | package main 33 | 34 | import ( 35 | "flag" 36 | "fmt" 37 | "os" 38 | "os/signal" 39 | "strings" 40 | "syscall" 41 | "time" 42 | 43 | "github.com/spf13/viper" 44 | _ "go.uber.org/automaxprocs" 45 | 46 | "github.com/linkedin/Burrow/core" 47 | ) 48 | 49 | // exitCode wraps a return value for the application 50 | type exitCode struct{ Code int } 51 | 52 | func handleExit() { 53 | if e := recover(); e != nil { 54 | if exit, ok := e.(exitCode); ok { 55 | if exit.Code != 0 { 56 | fmt.Fprintln(os.Stderr, "Burrow failed at", time.Now().Format("January 2, 2006 at 3:04pm (MST)")) 57 | } else { 58 | fmt.Fprintln(os.Stderr, "Stopped Burrow at", time.Now().Format("January 2, 2006 at 3:04pm (MST)")) 59 | } 60 | 61 | os.Exit(exit.Code) 62 | } 63 | panic(e) // not an exitCode, bubble up 64 | } 65 | } 66 | 67 | func main() { 68 | // This makes sure that we panic and run defers correctly 69 | defer handleExit() 70 | 71 | // The only command line arg is the config file 72 | configPath := flag.String("config-dir", ".", "Directory that contains the configuration file") 73 | flag.Parse() 74 | 75 | // Load the configuration from the file 76 | viper.SetConfigName("burrow") 77 | viper.AddConfigPath(*configPath) 78 | fmt.Fprintln(os.Stderr, "Reading configuration from", *configPath) 79 | err := viper.ReadInConfig() 80 | if err != nil { 81 | fmt.Fprintln(os.Stderr, "Failed reading configuration:", err.Error()) 82 | panic(exitCode{1}) 83 | } 84 | 85 | // setup viper to be able to read env variables with a configured prefix 86 | viper.SetDefault("general.env-var-prefix", "burrow") 87 | envPrefix := viper.GetString("general.env-var-prefix") 88 | viper.SetEnvPrefix(envPrefix) 89 | viper.SetEnvKeyReplacer(strings.NewReplacer(".", "_", "-", "_")) 90 | viper.AutomaticEnv() 91 | 92 | // Create the PID file to lock out other processes 93 | viper.SetDefault("general.pidfile", "burrow.pid") 94 | pidFile := viper.GetString("general.pidfile") 95 | if !core.CheckAndCreatePidFile(pidFile) { 96 | // Any error on checking or creating the PID file causes an immediate exit 97 | panic(exitCode{1}) 98 | } 99 | defer core.RemovePidFile(pidFile) 100 | 101 | // Set up stderr/stdout to go to a separate log file, if enabled 102 | stdoutLogfile := viper.GetString("general.stdout-logfile") 103 | if stdoutLogfile != "" { 104 | core.OpenOutLog(stdoutLogfile) 105 | } 106 | 107 | // Register signal handlers for exiting 108 | exitChannel := make(chan os.Signal, 1) 109 | signal.Notify(exitChannel, syscall.SIGINT, syscall.SIGQUIT, syscall.SIGTERM) 110 | 111 | // This triggers handleExit (after other defers), which will then call os.Exit properly 112 | panic(exitCode{core.Start(nil, exitChannel)}) 113 | } 114 | -------------------------------------------------------------------------------- /main_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 LinkedIn Corp. Licensed under the Apache License, Version 2 | // 2.0 (the "License"); you may not use this file except in compliance with 3 | // the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // 6 | // Unless required by applicable law or agreed to in writing, software 7 | // distributed under the License is distributed on an "AS IS" BASIS, 8 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | package main 11 | 12 | import ( 13 | "testing" 14 | ) 15 | 16 | // This is just a dummy test function to have a base to start with for Travis 17 | func Test_dummy(t *testing.T) { 18 | t.Log("Dummy test passed") 19 | } 20 | --------------------------------------------------------------------------------