├── .chglog ├── CHANGELOG.tpl.md └── config.yml ├── .github └── workflows │ ├── main.yml │ └── release.yml ├── .gitignore ├── .goreleaser.yml ├── .vscode └── settings.json ├── CHANGELOG.md ├── Dockerfile ├── LICENSE ├── Makefile ├── README.assets ├── image-20200916185943754.png ├── image-20200916190045700.png ├── image-20200916190143119.png ├── image-20200917105556403.png ├── image-20200917112711674.png ├── image-20200917112924277.png ├── image-20201126025027475.png ├── image-20201126025227967.png ├── image-20201126025254839.png ├── image-20201126031339101.png ├── image-20201126031346835.png ├── image-20201126031409284.png ├── image-20201126031456582.png ├── image-20201126032854346.png ├── image-20201126032905101.png ├── image-20201126032909776.png ├── image-20201126035103180.png └── logo.png ├── README.md ├── README_CN.md ├── cmd └── kvass │ ├── coordinator.go │ ├── main.go │ └── sidecar.go ├── deploy └── demo │ ├── config.yaml │ ├── coordinator.yaml │ ├── kvass-rbac.yaml │ ├── metrics.yaml │ ├── prometheus-rep-0.yaml │ ├── thanos-query.yaml │ └── thanos-rule.yaml ├── documents └── design.md ├── go.mod ├── go.sum └── pkg ├── api ├── request.go ├── request_test.go ├── result.go ├── result_test.go ├── testing.go └── testing_test.go ├── coordinator ├── coordinator.go ├── coordinator_test.go ├── rebalance.go ├── service.go ├── service_test.go └── types.go ├── discovery ├── discovery.go ├── discovery_test.go └── translate.go ├── explore ├── explore.go └── explore_test.go ├── prom ├── client.go ├── client_test.go ├── config.go ├── config_test.go └── data.go ├── scrape ├── jobinfo.go ├── manager.go ├── manager_test.go ├── reader.go ├── reader_test.go ├── scraper.go └── scraper_test.go ├── shard ├── kubernetes │ ├── replicasmanager.go │ ├── replicasmanager_test.go │ ├── shardmanager.go │ └── shardmanager_test.go ├── shard.go ├── shard_test.go ├── static │ ├── replicasmanager.go │ ├── replicasmanager_test.go │ ├── shardmanager.go │ ├── shardmanager_test.go │ └── types.go └── types.go ├── sidecar ├── injector.go ├── injector_test.go ├── proxy.go ├── proxy_test.go ├── service.go ├── service_test.go ├── targets.go └── targets_test.go ├── target ├── status.go ├── status_test.go ├── target.go └── target_test.go └── utils ├── encode ├── encode.go └── encode_test.go ├── k8sutil ├── status.go └── status_test.go ├── test ├── format.go └── format_test.go ├── types ├── pointer.go ├── pointer_test.go ├── slice.go ├── slice_test.go └── strings.go └── wait ├── wait.go └── wait_test.go /.chglog/CHANGELOG.tpl.md: -------------------------------------------------------------------------------- 1 | {{ range .Versions }} 2 | ## {{ if .Tag.Previous }}[{{ .Tag.Name }}]{{ else }}{{ .Tag.Name }}{{ end }} - {{ datetime "2006-01-02" .Tag.Date }} 3 | {{ range .CommitGroups -}} 4 | ### {{ .Title }} 5 | {{ range .Commits -}} 6 | - {{ if .Scope }}**{{ .Scope }}:** {{ end }}{{ .Subject }} 7 | {{ end }} 8 | {{ end -}} 9 | 10 | {{- if .RevertCommits -}} 11 | ### Reverts 12 | {{ range .RevertCommits -}} 13 | - {{ .Revert.Header }} 14 | {{ end }} 15 | {{ end -}} 16 | 17 | {{- if .MergeCommits -}} 18 | ### Pull Requests 19 | {{ range .MergeCommits -}} 20 | - {{ .Header }} 21 | {{ end }} 22 | {{ end -}} 23 | 24 | {{- if .NoteGroups -}} 25 | {{ range .NoteGroups -}} 26 | ### {{ .Title }} 27 | {{ range .Notes }} 28 | {{ .Body }} 29 | {{ end }} 30 | {{ end -}} 31 | {{ end -}} 32 | {{ end -}} 33 | -------------------------------------------------------------------------------- /.chglog/config.yml: -------------------------------------------------------------------------------- 1 | style: github 2 | template: CHANGELOG.tpl.md 3 | info: 4 | title: CHANGELOG 5 | repository_url: https://github.com/tkestack/kvass 6 | options: 7 | commits: 8 | filters: 9 | Type: 10 | - feat 11 | - fix 12 | # - perf 13 | # - refactor 14 | commit_groups: 15 | # title_maps: 16 | # feat: Features 17 | # fix: Bug Fixes 18 | # perf: Performance Improvements 19 | # refactor: Code Refactoring 20 | header: 21 | pattern: "^(\\w*)(?:\\(([\\w\\$\\.\\-\\*\\s]*)\\))?\\:\\s(.*)$" 22 | pattern_maps: 23 | - Type 24 | - Scope 25 | - Subject 26 | notes: 27 | keywords: 28 | - BREAKING CHANGE 29 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | 8 | jobs: 9 | 10 | lint: 11 | name: Lint 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Set up Go 15 | uses: actions/setup-go@v2 16 | with: 17 | go-version: 1.17 18 | 19 | - name: Check out code 20 | uses: actions/checkout@v2 21 | 22 | - name: Lint Go Code 23 | run: | 24 | export PATH=$PATH:$(go env GOPATH)/bin 25 | go get -u golang.org/x/lint/golint 26 | make lint 27 | 28 | test: 29 | name: Test 30 | runs-on: ubuntu-latest 31 | steps: 32 | - name: Set up Go 33 | uses: actions/setup-go@v2 34 | with: 35 | go-version: 1.17 36 | 37 | - name: Check out code 38 | uses: actions/checkout@v2 39 | 40 | build: 41 | name: Build 42 | runs-on: ubuntu-latest 43 | needs: [lint, test] 44 | steps: 45 | - name: Set up Go 46 | uses: actions/setup-go@v2 47 | with: 48 | go-version: 1.17 49 | 50 | - name: Check out code 51 | uses: actions/checkout@v2 52 | 53 | - name: Build 54 | run: make build 55 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | on: 3 | create: 4 | tags: 5 | - v* 6 | 7 | jobs: 8 | release: 9 | name: Release on GitHub 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Check out code 13 | uses: actions/checkout@v2 14 | - name: Set up Go 15 | uses: actions/setup-go@v2 16 | with: 17 | go-version: 1.17 18 | - name: pull image 19 | run: docker pull ubuntu:latest 20 | - name: login to docker 21 | run: echo ${{secrets.DOCKER_PASSWORD}} | docker login -u ${{secrets.DOCKER_USERNAME}} --password-stdin 22 | - name: Create release on GitHub 23 | uses: goreleaser/goreleaser-action@v2 24 | with: 25 | args: release 26 | env: 27 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /bin/ 2 | .idea 3 | build 4 | coverage.txt 5 | cover.out 6 | kvass 7 | .vscode 8 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | builds: 2 | - id: "build" 3 | main: ./cmd/kvass/ 4 | binary: kvass 5 | goos: 6 | - linux 7 | goarch: 8 | - amd64 9 | 10 | dockers: 11 | - image_templates: 12 | - "tkestack/kvass:latest" 13 | - "tkestack/kvass:{{ .Tag }}" 14 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "cSpell.words": [ 3 | "Infof", 4 | "kubernetes", 5 | "stretchr", 6 | "thanos", 7 | "tkestack", 8 | "TSDB", 9 | "vmagent", 10 | "Warnf", 11 | "Wrapf" 12 | ] 13 | } -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## [v0.3.2] - 2022-11-16 2 | ### Feat 3 | - support disable http keep alive during service discovery 4 | 5 | ## [v0.3.1] - 2022-07-05 6 | ### Fix 7 | - bugfix: dup transfer target ([#101](https://github.com/tkestack/kvass/issues/101)) 8 | 9 | ## [v0.3.0] - 2022-06-23 10 | ### Fix 11 | - target gc and duplicate alleviate shard ([#96](https://github.com/tkestack/kvass/issues/96)) 12 | 13 | ## [v0.2.3] - 2022-06-17 14 | ### Fix 15 | - miss 'break' in alleviate shard 16 | 17 | 18 | ## [v0.2.2] - 2022-06-17 19 | ### Fix 20 | - targets gc 21 | 22 | 23 | ## [v0.2.1] - 2022-02-15 24 | 25 | ## [v0.2.0] - 2022-01-03 26 | ### Feat 27 | - add metrics ([#82](https://github.com/tkestack/kvass/issues/82)) 28 | - use stream model to reduce memory usage ([#81](https://github.com/tkestack/kvass/issues/81)) 29 | - support-http-sd ([#74](https://github.com/tkestack/kvass/issues/74)) 30 | - extend /api/v1/targets detail ([#66](https://github.com/tkestack/kvass/issues/66)) 31 | - support static shards ([#64](https://github.com/tkestack/kvass/issues/64)) 32 | 33 | ### Fix 34 | - may panic when waiting for first service discovery done ([#79](https://github.com/tkestack/kvass/issues/79)) 35 | - statefulsets shards pods should be sort ([#77](https://github.com/tkestack/kvass/issues/77)) 36 | - statefulsets shards pods should be sort ([#76](https://github.com/tkestack/kvass/issues/76)) 37 | - some unit test ([#68](https://github.com/tkestack/kvass/issues/68)) 38 | - fetching head series ([#53](https://github.com/tkestack/kvass/issues/53)) 39 | 40 | 41 | ## [v0.1.4] - 2021-05-25 42 | ### Feat 43 | - coordinator skip upgrading statefulsets 44 | 45 | ### Fix 46 | - log format 47 | - some unit test 48 | - use /api/v1/status/tsdb to get head series 49 | 50 | 51 | ## [v0.1.3] - 2021-05-18 52 | ### Feat 53 | - add "statistics" result of coordinator api about /api/targets 54 | 55 | ### Fix 56 | - unexpect scaling up when some targets is too big during shard alleviation 57 | - sidecar proxy http request report "use of closed network connection" 58 | - return duplicate target with same hash 59 | - alleviateShards 60 | 61 | 62 | ## [v0.1.2] - 2021-04-23 63 | ### Fix 64 | - empty shard need ensure idle state every coordinate period 65 | - targets GC should include unhealthy target 66 | 67 | 68 | ## [v0.1.1] - 2021-04-21 69 | ### Feat 70 | - use weighted random when assign new target with max-idle-timeout=0 71 | 72 | ### Fix 73 | - scale down blocked if any idle shard exited 74 | - scale up when target is too big 75 | - concurrent map iteration and map write on discovery targets map 76 | - set content-type when copy data to prometheus 77 | 78 | 79 | ## [v0.1.0] - 2021-03-09 80 | ### Feat 81 | - external_labels not affects config hash now 82 | - update workflow 83 | - support min shards, change replicas management and rand assign ([#38](https://github.com/tkestack/kvass/issues/38)) 84 | 85 | ### Fix 86 | - coordinator min shard chaos with max shard 87 | - sidecar always panic at first time started 88 | - update workflow 89 | - go lint 90 | - base image 91 | 92 | 93 | ## [v0.0.6] - 2021-02-24 94 | ### Fix 95 | - sidecar panic when scrape failed ([#32](https://github.com/tkestack/kvass/issues/32)) 96 | 97 | 98 | ## [v0.0.5] - 2021-02-22 99 | ### Feat 100 | - disable scaling down by default 101 | 102 | 103 | ## [v0.0.4] - 2021-01-18 104 | ### Fix 105 | - scrape timeout message of targets list 106 | - register all SD type 107 | - register all SD type 108 | 109 | 110 | ## [v0.0.3] - 2020-12-18 111 | ### Fix 112 | - change workflow go version to 1.15 113 | - coordinator start with service discovery init 114 | - statistic samples before copy data to prometheus 115 | - remove deleted targets 116 | - remove all auth in injected config file 117 | 118 | 119 | ## [v0.0.2] - 2020-12-11 120 | ### Fix 121 | - Dockerfile and Makefile 122 | - unmarshal bear_token/password of remote write/read config ([#6](https://github.com/tkestack/kvass/issues/6)) 123 | - upgrade prometheus lib 124 | - flag descriptions of Coordinator 125 | 126 | 127 | ## v0.0.1 - 2020-11-20 128 | ### Feat 129 | - support invalid label name 130 | - support inject APIServer information for kubernetes SD 131 | - coordinator support maxShard flag 132 | 133 | ### Fix 134 | - shard client return empty RuntimeInfo if request failed 135 | 136 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:latest 2 | RUN apt update && apt install -y curl 3 | COPY kvass /kvass 4 | ENTRYPOINT ["/kvass"] -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PROJECT_NAME := "tkestack.io/kvass" 2 | PKG := "./" 3 | PKG_LIST := $(shell go list ${PKG}/... | grep -v /vendor/) 4 | GO_FILES := $(shell find . -name '*.go' | grep -v /vendor/ | grep -v _test.go) 5 | GOOS := "linux" 6 | GOARCH := "amd64" 7 | .PHONY: all dep lint vet test test-coverage build clean 8 | 9 | all: test-coverage lint vet build 10 | 11 | dep: ## Get the dependencies 12 | @go mod tidy 13 | @go mod download 14 | 15 | lint: ## Lint Golang files 16 | @golint -set_exit_status ${PKG_LIST} 17 | 18 | vet: ## Run go vet 19 | @go vet ${PKG_LIST} 20 | 21 | test: ## Run unittests 22 | @go test -short ${PKG_LIST} 23 | 24 | test-coverage: ## Run tests with coverage 25 | @go test -short -coverprofile cover.out -covermode=atomic ${PKG_LIST} 26 | @cat cover.out >> coverage.txt 27 | 28 | build: dep ## Build the binary file 29 | @GOOS=${GOOS} GOARCH=${GOARCH} go build -o kvass cmd/kvass/*.go 30 | 31 | clean: ## Remove previous build 32 | @rm -fr kvass 33 | @rm -fr cover.out coverage.txt 34 | clog: 35 | @git-chglog -o CHANGELOG.md 36 | -------------------------------------------------------------------------------- /README.assets/image-20200916185943754.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkestack/kvass/04643b757dd67bc21b863d59df1de03bb43efea8/README.assets/image-20200916185943754.png -------------------------------------------------------------------------------- /README.assets/image-20200916190045700.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkestack/kvass/04643b757dd67bc21b863d59df1de03bb43efea8/README.assets/image-20200916190045700.png -------------------------------------------------------------------------------- /README.assets/image-20200916190143119.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkestack/kvass/04643b757dd67bc21b863d59df1de03bb43efea8/README.assets/image-20200916190143119.png -------------------------------------------------------------------------------- /README.assets/image-20200917105556403.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkestack/kvass/04643b757dd67bc21b863d59df1de03bb43efea8/README.assets/image-20200917105556403.png -------------------------------------------------------------------------------- /README.assets/image-20200917112711674.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkestack/kvass/04643b757dd67bc21b863d59df1de03bb43efea8/README.assets/image-20200917112711674.png -------------------------------------------------------------------------------- /README.assets/image-20200917112924277.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkestack/kvass/04643b757dd67bc21b863d59df1de03bb43efea8/README.assets/image-20200917112924277.png -------------------------------------------------------------------------------- /README.assets/image-20201126025027475.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkestack/kvass/04643b757dd67bc21b863d59df1de03bb43efea8/README.assets/image-20201126025027475.png -------------------------------------------------------------------------------- /README.assets/image-20201126025227967.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkestack/kvass/04643b757dd67bc21b863d59df1de03bb43efea8/README.assets/image-20201126025227967.png -------------------------------------------------------------------------------- /README.assets/image-20201126025254839.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkestack/kvass/04643b757dd67bc21b863d59df1de03bb43efea8/README.assets/image-20201126025254839.png -------------------------------------------------------------------------------- /README.assets/image-20201126031339101.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkestack/kvass/04643b757dd67bc21b863d59df1de03bb43efea8/README.assets/image-20201126031339101.png -------------------------------------------------------------------------------- /README.assets/image-20201126031346835.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkestack/kvass/04643b757dd67bc21b863d59df1de03bb43efea8/README.assets/image-20201126031346835.png -------------------------------------------------------------------------------- /README.assets/image-20201126031409284.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkestack/kvass/04643b757dd67bc21b863d59df1de03bb43efea8/README.assets/image-20201126031409284.png -------------------------------------------------------------------------------- /README.assets/image-20201126031456582.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkestack/kvass/04643b757dd67bc21b863d59df1de03bb43efea8/README.assets/image-20201126031456582.png -------------------------------------------------------------------------------- /README.assets/image-20201126032854346.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkestack/kvass/04643b757dd67bc21b863d59df1de03bb43efea8/README.assets/image-20201126032854346.png -------------------------------------------------------------------------------- /README.assets/image-20201126032905101.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkestack/kvass/04643b757dd67bc21b863d59df1de03bb43efea8/README.assets/image-20201126032905101.png -------------------------------------------------------------------------------- /README.assets/image-20201126032909776.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkestack/kvass/04643b757dd67bc21b863d59df1de03bb43efea8/README.assets/image-20201126032909776.png -------------------------------------------------------------------------------- /README.assets/image-20201126035103180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkestack/kvass/04643b757dd67bc21b863d59df1de03bb43efea8/README.assets/image-20201126035103180.png -------------------------------------------------------------------------------- /README.assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkestack/kvass/04643b757dd67bc21b863d59df1de03bb43efea8/README.assets/logo.png -------------------------------------------------------------------------------- /README_CN.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | [English](README.md) 4 | 5 | Kvass 是一个 [Prometheus](https://github.com/prometheus/prometheus) 横向扩缩容解决方案,他使用Sidecar动态的根据Coordinator分配下来的target列表来为每个Prometheus生成只含特定target的配置文件,从而将采集任务动态调度到各个Prometheus分片。 6 | Coordinator 用于服务发现,target调度和分片扩缩容管理. 7 | [Thanos](https://github.com/thanos-io/thanos) (或者其他TSDB) 用来将分片数据汇总成全局数据. 8 | 9 | [![Go Report Card](https://goreportcard.com/badge/github.com/tkestack/kvass)](https://goreportcard.com/report/github.com/tkestack/kvass) [![Build](https://github.com/tkestack/kvass/workflows/Build/badge.svg?branch=master)]() 10 | 11 | ------ 12 | 13 | # 目录 14 | * [概述](#概述) 15 | * [设计](#设计) 16 | * [核心架构](#核心架构) 17 | * [组件](#组件) 18 | * [Coordinator](#coordinator) 19 | * [Sidecar](#sidecar) 20 | * [Kvass + Thanos](#kvass--thanos) 21 | * [Kvass + 远程存储](#kvass--远程存储) 22 | * [多副本](#多副本) 23 | * [Target迁移原理](#target迁移原理) 24 | * [分片降压](#分片降压) 25 | * [分片缩容](#分片缩容) 26 | * [限制分片数目](#限制分片数目) 27 | * [Target调度策略](#target调度策略) 28 | * [安装Demo](#安装demo) 29 | * [最佳实践](#最佳实践) 30 | * [启动参数推荐](#启动参数推荐) 31 | * [License](#license) 32 | 33 | # 概述 34 | 35 | Kvass 是一个 [Prometheus](https://github.com/prometheus/prometheus) 横向扩缩容解决方案,他有以下特点. 36 | 37 | * 轻量,安装方便 38 | * 支持数千万series规模 (数千k8s节点) 39 | * 无需修改Prometheus配置文件,无需加入hash_mod 40 | * target动态调度 41 | * 根据target实际数据规模来进行分片负载均衡,而不是用hash_mod 42 | * 支持多副本 43 | 44 | # 设计 45 | 46 | ## 核心架构 47 | 48 | Kvass由2个组件组成:coordinator和sidecar. 49 | 50 | image-20201126031456582 51 | 52 | ## 组件 53 | 54 | ### Coordinator 55 | 56 | Coordinator的核心作用包括服务发现,target调度,分片管理等启动参数参考 [code](https://github.com/tkestack/kvass/blob/master/cmd/kvass/coordinator.go#L61),其核心工作流程包含以下几点 57 | 58 | * Coordinator 加载配置文件并进行服务发现,获取所有target 59 | * 对于每个需要采集的target, Coordinator 为其应用配置文件中的"relabel_configs",并且探测target当前包含的series数 60 | * Coordinator 周期性分配新Target,转移Target,以及进行分片的扩缩容。 61 | 62 | image-20201126031409284 63 | 64 | ### Sidecar 65 | 66 | Sidecar负责为其边上的Prometheus生成特殊的配置文件,用以控制Prometheus的采集对象列表,启动参数参考 [code](https://github.com/tkestack/kvass/blob/master/cmd/kvass/sidecar.go#L48) 67 | 68 | * Sidecar 从Coordinator获得target及其relabel过的Labels 69 | 70 | * Sidecar只使用 "static_configs" 服务发现机制来生成一份新的配置文件,配置文件中只包含分配给他的target, 并删除所有"relabel_configs" 71 | 72 | * 所有Prometheus抓取请求会被代理到Sidecar用于target规模的跟踪 73 | 74 | 75 | 76 | image-20201126032909776 77 | 78 | ## Kvass + Thanos 79 | 80 | 由于现在Prometheus的数据分散到了各个分片, 我们需要一个方案去获得全局数据。 81 | 82 | [Thanos](https://github.com/thanos-io/thanos) 是一个不错的选择. 我们只需要将Kvass sidecar放在Thanos sidecar边上,再安装一个Kvass coordinator就可以了. 83 | 84 | ![image-20201126035103180](./README.assets/image-20201126035103180.png) 85 | 86 | ## Kvass + 远程存储 87 | 88 | 如果你使用其他远程存储例如influxdb,只需要直接在配置文件里配置 "remote write"就可以了。 89 | 90 | ## 多副本 91 | 92 | Coordinator 使用 label 选择器来选择分片的StatefulSets, 每一个StatefulSet被认为是一个副本, 副本之间的target分配与调度是独立的。 93 | 94 | > --shard.selector=app.kubernetes.io/name=prometheus 95 | 96 | ## Target迁移原理 97 | 98 | 在某些场景下我们需要将一个已分配的Target从一个分片转移到另外一个分片(例如为分片降压)。 99 | 100 | 为了保证数据不断点,Target迁移被分为以下几个步骤。 101 | 102 | * 将原在所在分片中该Target的状态标记为in_transfer,并将Target同时分配给目标分片,状态为normal。 103 | * 等待Target被2个分片同时采集至少3个周期。 104 | * 将原来分片中的Target删除。 105 | 106 | ## 分片降压 107 | 108 | 当一个Target分片给一个分片后,随着时间推移,Target产品的series有可能增加,从而导致分片的head series超过阈值,例如新加入的k8s节点,其cadvisor数据规模就有可能随着Pod被调度上来而增加。 109 | 110 | 当分片head series超过阈值一定比例后,Coordinator会尝试做分片的降压处理,即根据超过阈值的比例,将一部分Target从该分片转移到其他空闲分片中,超过阈值比例越高,被转移的Target就越多。 111 | 112 | ## 分片缩容 113 | 114 | 分片缩容只会从标号最大的分片开始。 115 | 116 | 当编号最大的分片上所有Target都可以被迁移到其他分片,就会尝试进行迁移,即清空编号最大的分片。 117 | 118 | 当分片被清空后,分片会变为闲置状态,经过一段时间后(等待分片数据被删除或者被上传至对象存储),分片被删除。 119 | 120 | 您可以通过Coordinaor的以下参数来设置闲置时间,当设置为0时关闭缩容。 121 | 122 | > ``` 123 | > --shard.max-idle-time=3h 124 | > --shard.max-idle-time=0 // 默认 125 | > ``` 126 | 127 | 如果使用的是Statefulset来管理分片,您可以添加一下参数来让Coordinator在删除分片时自动删除pvc 128 | 129 | > ``` 130 | > --shard.delete-pvc=true // 默认 131 | > ``` 132 | 133 | 134 | ## 限制分片数目 135 | 可通过设置以下参数来限制Coordinator的最大最小分片数。 136 | 值得注意的是,如果设置了最小分片数,那么只有可用分片数不低于最小分片数才会开始Coordinate。 137 | 138 | > ``` 139 | > --shard.max-shard=99999 //默认 140 | > --shard.min-shard=0 //默认 141 | > ``` 142 | 143 | ## Target调度策略 144 | 145 | 如果开启了缩容,那么无论是新的Target还是被迁移的Target,都会优先被分配给编号低的分片。 146 | 147 | 如果关闭了缩容,则会随机分配到有空间的分片上,这种方式特别适合和```--shard.min-shard```参数一起使用。 148 | 149 | # 安装Demo 150 | 151 | 我们提供了一个demo去展示Kvass的使用. 152 | 153 | > git clone https://github.com/tkestack/kvass 154 | > 155 | > cd kvass 156 | > 157 | > kubectl create -f ./deploy/demo 158 | 159 | 我可以看到一个叫"metrics"的Deployment, 其有 6 个Pod, 每个Pod会生成 10045 series (45 series 来至golang默认的metrics)。 160 | 161 | 我们将采集这些指标。 162 | 163 | ![image-20200916185943754](./README.assets/image-20200916185943754.png) 164 | 165 | 每个分片能采集的最大series在Coordinator的启动参数里配置。 166 | 167 | 在这里例子中我们设置为30000. 168 | 169 | > ``` 170 | > --shard.max-series=30000 171 | > ``` 172 | 173 | 现在我们有6个target,总计60000+ series 每个分片最多能采30000 series,所以我们预期需要3个分片. 174 | 175 | Coordinator自动将分片个数变成3,并将6个target分配给他们. 176 | 177 | ![image-20200916190143119](./README.assets/image-20200916190143119.png) 178 | 179 | 我们发现每个分片的head series数目确实只有2个target的量。 180 | 181 | ![image-20200917112924277](./README.assets/image-20200917112924277.png) 182 | 183 | 我们通过thanos-query查询到的,则是全部的数据。 184 | 185 | ![image-20200917112711674](./README.assets/image-20200917112711674.png) 186 | 187 | # 最佳实践 188 | 189 | ## 启动参数推荐 190 | 191 | Prometheus的内存使用量和head series有关。 192 | 193 | 在实际使用时,我们推荐每个分片的最大series设置成750000。 194 | 195 | > 设置Coordinator启动参数 196 | > 197 | > --shard.max-series=600000 198 | 199 | 每个Prometheus的内存request建议设置为2C8G. 200 | 201 | Sidecar有一个名为```store.path```参数,用于指定将正在采集的 202 | 203 | # License 204 | 205 | Apache License 2.0, see [LICENSE](./LICENSE). 206 | 207 | -------------------------------------------------------------------------------- /cmd/kvass/main.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package main 19 | 20 | import ( 21 | "fmt" 22 | "math/rand" 23 | "net/http" 24 | "os" 25 | "time" 26 | 27 | "github.com/gin-gonic/gin" 28 | "github.com/prometheus/client_golang/prometheus" 29 | "github.com/prometheus/client_golang/prometheus/collectors" 30 | "github.com/prometheus/client_golang/prometheus/promhttp" 31 | _ "github.com/prometheus/prometheus/discovery/install" 32 | "github.com/spf13/cobra" 33 | ) 34 | 35 | var ( 36 | promRegistry *prometheus.Registry 37 | ) 38 | 39 | func init() { 40 | promRegistry = prometheus.NewRegistry() 41 | promRegistry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})) 42 | promRegistry.MustRegister(collectors.NewGoCollector()) 43 | } 44 | 45 | var rootCmd = &cobra.Command{ 46 | Use: "Kvass", 47 | Short: `Prometheus sharding`, 48 | Long: `Kvass is a Prometheus horizontal auto-scaling solution , 49 | which uses Sidecar to generate special config file only contains part of targets assigned from Coordinator for every Prometheus shard.`, 50 | } 51 | 52 | func main() { 53 | gin.SetMode(gin.ReleaseMode) 54 | null, _ := os.Open(os.DevNull) 55 | gin.DefaultWriter = null 56 | rand.Seed(time.Now().UnixNano()) 57 | http.Handle("/metrics", promhttp.HandlerFor( 58 | prometheus.DefaultGatherer, 59 | promhttp.HandlerOpts{ 60 | EnableOpenMetrics: true, 61 | }, 62 | )) 63 | 64 | if err := rootCmd.Execute(); err != nil { 65 | fmt.Println(err) 66 | os.Exit(1) 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /cmd/kvass/sidecar.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package main 19 | 20 | import ( 21 | "tkestack.io/kvass/pkg/scrape" 22 | "tkestack.io/kvass/pkg/sidecar" 23 | "tkestack.io/kvass/pkg/target" 24 | 25 | "github.com/prometheus/prometheus/config" 26 | "tkestack.io/kvass/pkg/prom" 27 | 28 | log "github.com/sirupsen/logrus" 29 | "github.com/spf13/cobra" 30 | "golang.org/x/sync/errgroup" 31 | ) 32 | 33 | var sidecarCfg = struct { 34 | configFile string 35 | configOutFile string 36 | proxyAddress string 37 | apiAddress string 38 | prometheusURL string 39 | storePath string 40 | injectProxyURL string 41 | fetchHeadSeries bool 42 | configInject configInjectOption 43 | scrapeKeepAliveDisable bool 44 | shardMonitor bool 45 | }{} 46 | 47 | func init() { 48 | sidecarCmd.Flags().StringVar(&sidecarCfg.proxyAddress, "web.proxy-addr", ":8008", 49 | "proxy listen address") 50 | sidecarCmd.Flags().StringVar(&sidecarCfg.apiAddress, "web.api-addr", ":8080", 51 | "api listen address") 52 | sidecarCmd.Flags().StringVar(&sidecarCfg.prometheusURL, "prometheus.url", "http://127.0.0.1:9090", 53 | "url of target prometheus") 54 | sidecarCmd.Flags().StringVar(&sidecarCfg.configFile, "config.file", "/etc/prometheus/config_out/prometheus.env.yaml", 55 | "origin config file, set this empty to enable updating config from coordinator") 56 | sidecarCmd.Flags().StringVar(&sidecarCfg.configOutFile, "config.output-file", "/etc/prometheus/config_out/prometheus_injected.yaml", 57 | "injected config file") 58 | sidecarCmd.Flags().StringVar(&sidecarCfg.storePath, "store.path", "/prometheus/", 59 | "path to save shard runtime") 60 | sidecarCmd.Flags().StringVar(&sidecarCfg.injectProxyURL, "inject.proxy", "http://127.0.0.1:8008", 61 | "proxy url to inject to all job") 62 | sidecarCmd.Flags().StringVar(&sidecarCfg.configInject.kubernetes.serviceAccountPath, "inject.kubernetes-sa-path", "", 63 | "change default service account token path") 64 | sidecarCmd.Flags().BoolVar(&sidecarCfg.fetchHeadSeries, "shard.fetch-head-series", true, 65 | "if true, prometheus head series will be used as runtimeinfo.HeadSeries."+ 66 | "otherwise, the sum of all scraping targets series will be used."+ 67 | "must set false if use vmagent (or other scraping agent) instead of prometheus.") 68 | sidecarCmd.Flags().BoolVar(&sidecarCfg.scrapeKeepAliveDisable, "scrape.disable-keep-alive", false, 69 | "disable http keep alive") 70 | sidecarCmd.Flags().BoolVar(&sidecarCfg.shardMonitor, "shard.self-monitor", false, 71 | "enable shard monitor") 72 | rootCmd.AddCommand(sidecarCmd) 73 | } 74 | 75 | var sidecarCmd = &cobra.Command{ 76 | Use: "sidecar", 77 | Short: "sidecar manager one prometheus shard", 78 | Long: `sidecar generate a new config file only use static_configs to tell prometheus what to scrape`, 79 | RunE: func(cmd *cobra.Command, args []string) error { 80 | if err := cmd.Flags().Parse(args); err != nil { 81 | return err 82 | } 83 | var ( 84 | lg = log.New() 85 | scrapeManager = scrape.New(sidecarCfg.scrapeKeepAliveDisable, log.WithField("component", "scrape manager")) 86 | configManager = prom.NewConfigManager() 87 | targetManager = sidecar.NewTargetsManager( 88 | sidecarCfg.storePath, 89 | promRegistry, 90 | log.WithField("component", "targets manager"), 91 | ) 92 | 93 | proxy = sidecar.NewProxy( 94 | scrapeManager.GetJob, 95 | func() map[uint64]*target.ScrapeStatus { 96 | return targetManager.TargetsInfo().Status 97 | }, 98 | configManager.ConfigInfo, 99 | promRegistry, 100 | log.WithField("component", "target manager")) 101 | 102 | injector = sidecar.NewInjector( 103 | sidecarCfg.configOutFile, 104 | sidecar.InjectConfigOptions{ 105 | ProxyURL: sidecarCfg.injectProxyURL, 106 | PrometheusURL: sidecarCfg.prometheusURL, 107 | ShardMonitorEnable: sidecarCfg.shardMonitor, 108 | }, 109 | promRegistry, 110 | lg.WithField("component", "injector"), 111 | ) 112 | promCli = prom.NewClient(sidecarCfg.prometheusURL) 113 | ) 114 | 115 | configManager.AddReloadCallbacks( 116 | func(cfg *prom.ConfigInfo) error { 117 | return configInjectSidecar(cfg.Config, &sidecarCfg.configInject) 118 | }, 119 | scrapeManager.ApplyConfig, 120 | injector.ApplyConfig, 121 | func(cfg *prom.ConfigInfo) error { 122 | return promCli.ConfigReload() 123 | }) 124 | 125 | targetManager.AddUpdateCallbacks( 126 | injector.UpdateTargets, 127 | func(map[string][]*target.Target) error { 128 | return promCli.ConfigReload() 129 | }) 130 | 131 | service := sidecar.NewService( 132 | sidecarCfg.configFile, 133 | sidecarCfg.prometheusURL, 134 | func() (i int64, e error) { 135 | if !sidecarCfg.fetchHeadSeries { 136 | return 0, nil 137 | } 138 | 139 | ts, err := promCli.TSDBInfo() 140 | if err != nil { 141 | return 0, err 142 | } 143 | 144 | return ts.HeadStats.NumSeries, nil 145 | }, 146 | configManager, 147 | targetManager, 148 | promRegistry, 149 | log.WithField("component", "web"), 150 | ) 151 | 152 | if sidecarCfg.configFile != "" { 153 | if err := configManager.ReloadFromFile(sidecarCfg.configFile); err != nil { 154 | panic(err) 155 | } 156 | lg.Infof("load config done") 157 | } 158 | 159 | if err := targetManager.Load(); err != nil { 160 | panic(err) 161 | } 162 | lg.Infof("load targets done") 163 | 164 | g := errgroup.Group{} 165 | g.Go(func() error { 166 | lg.Infof("proxy start at %s", sidecarCfg.proxyAddress) 167 | return proxy.Run(sidecarCfg.proxyAddress) 168 | }) 169 | 170 | g.Go(func() error { 171 | lg.Infof("sidecar server start at %s", sidecarCfg.apiAddress) 172 | return service.Run(sidecarCfg.apiAddress) 173 | }) 174 | return g.Wait() 175 | }, 176 | } 177 | 178 | func configInjectSidecar(cfg *config.Config, option *configInjectOption) error { 179 | if option == nil { 180 | return nil 181 | } 182 | 183 | for _, job := range cfg.ScrapeConfigs { 184 | configInjectServiceAccount(job, option) 185 | } 186 | return nil 187 | } 188 | -------------------------------------------------------------------------------- /deploy/demo/config.yaml: -------------------------------------------------------------------------------- 1 | kind: ConfigMap 2 | apiVersion: v1 3 | metadata: 4 | name: prometheus-config 5 | data: 6 | prometheus.yml: |- 7 | global: 8 | scrape_interval: 15s 9 | evaluation_interval: 15s 10 | external_labels: 11 | cluster: custom2 12 | scrape_configs: 13 | - job_name: 'metrics-test' 14 | kubernetes_sd_configs: 15 | - role: pod 16 | tls_config: 17 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 18 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 19 | metric_relabel_configs: 20 | - source_labels: [pod] 21 | separator: ; 22 | regex: (.*) 23 | target_label: pod1 24 | replacement: $1 25 | relabel_configs: 26 | - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name] 27 | regex: metrics 28 | action: keep 29 | - source_labels: [__meta_kubernetes_pod_ip] 30 | action: replace 31 | regex: (.*) 32 | replacement: ${1}:9091 33 | target_label: __address__ 34 | - source_labels: 35 | - __meta_kubernetes_pod_name 36 | target_label: pod 37 | 38 | --- 39 | apiVersion: v1 40 | kind: ConfigMap 41 | metadata: 42 | name: prometheus-rules 43 | labels: 44 | name: prometheus-rules 45 | data: 46 | rules.yaml: |- 47 | groups: 48 | - name: count 49 | rules: 50 | - expr: | 51 | count(a00000000000metrics0) 52 | record: metrics_count 53 | 54 | 55 | -------------------------------------------------------------------------------- /deploy/demo/coordinator.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: kvass-coordinator 5 | labels: 6 | app.kubernetes.io/name: kvass-coordinator 7 | spec: 8 | ports: 9 | - name: http 10 | port: 9090 11 | targetPort: http 12 | selector: 13 | app.kubernetes.io/name: kvass-coordinator 14 | --- 15 | apiVersion: apps/v1 16 | kind: Deployment 17 | metadata: 18 | labels: 19 | app.kubernetes.io/name: kvass-coordinator 20 | name: kvass-coordinator 21 | spec: 22 | replicas: 1 23 | selector: 24 | matchLabels: 25 | app.kubernetes.io/name: kvass-coordinator 26 | strategy: 27 | rollingUpdate: 28 | maxSurge: 1 29 | maxUnavailable: 0 30 | type: RollingUpdate 31 | template: 32 | metadata: 33 | labels: 34 | app.kubernetes.io/name: kvass-coordinator 35 | spec: 36 | serviceAccountName: prometheus 37 | containers: 38 | - name: config-reload 39 | args: 40 | - --reload-url=http://localhost:9090/-/reload 41 | - --config-file=/etc/prometheus/config/prometheus.yml 42 | - --config-envsubst-file=/etc/prometheus/config_out/prometheus.env.yaml 43 | image: rancher/coreos-prometheus-config-reloader:v0.32.0 44 | imagePullPolicy: Always 45 | resources: 46 | limits: 47 | memory: 50Mi 48 | requests: 49 | memory: 10Mi 50 | volumeMounts: 51 | - mountPath: /etc/prometheus/config_out 52 | name: config-out 53 | - mountPath: /etc/prometheus/config 54 | name: config 55 | - image: tkestack/kvass:latest 56 | imagePullPolicy: Always 57 | args: 58 | - coordinator 59 | - --shard.max-series=30000 # max series per shard 60 | - --shard.selector=app.kubernetes.io/name=prometheus # selector to get shard StatefulSets 61 | - --shard.namespace=$(NAMESPACE) # namespace to select shard StatefulSets 62 | - --config.file=/etc/prometheus/config_out/prometheus.env.yaml 63 | - --shard.max-idle-time=0 # max time to wait before idle shard removed, 0 means shard never removed 64 | env: 65 | - name: NAMESPACE 66 | valueFrom: 67 | fieldRef: 68 | apiVersion: v1 69 | fieldPath: metadata.namespace 70 | ports: 71 | - containerPort: 9090 72 | name: http 73 | protocol: TCP 74 | volumeMounts: 75 | - mountPath: /etc/prometheus/config 76 | name: config 77 | - mountPath: /etc/prometheus/config_out 78 | name: config-out 79 | name: kvass 80 | resources: 81 | limits: 82 | cpu: 1 83 | memory: 2Gi 84 | requests: 85 | cpu: 250m 86 | memory: 20Mi 87 | volumes: 88 | - name: config 89 | configMap: 90 | name: prometheus-config 91 | defaultMode: 420 92 | - emptyDir: {} 93 | name: config-out 94 | - emptyDir: {} 95 | name: tls-assets 96 | 97 | 98 | -------------------------------------------------------------------------------- /deploy/demo/kvass-rbac.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: prometheus 5 | rules: 6 | - apiGroups: [""] 7 | resources: 8 | - persistentvolumeclaims 9 | verbs: 10 | - delete 11 | - apiGroups: 12 | - apps 13 | resources: 14 | - statefulsets 15 | verbs: 16 | - list 17 | - get 18 | - patch 19 | - update 20 | - apiGroups: [""] 21 | resources: 22 | - nodes 23 | - nodes/proxy 24 | - services 25 | - endpoints 26 | - pods 27 | - configmaps 28 | - secrets 29 | verbs: ["get", "list", "watch"] 30 | - nonResourceURLs: ["/metrics"] 31 | verbs: ["get"] 32 | --- 33 | apiVersion: v1 34 | kind: ServiceAccount 35 | metadata: 36 | name: prometheus 37 | --- 38 | apiVersion: rbac.authorization.k8s.io/v1 39 | kind: ClusterRoleBinding 40 | metadata: 41 | name: prometheus 42 | roleRef: 43 | apiGroup: rbac.authorization.k8s.io 44 | kind: ClusterRole 45 | name: prometheus 46 | subjects: 47 | - kind: ServiceAccount 48 | name: prometheus 49 | namespace: default 50 | --- 51 | -------------------------------------------------------------------------------- /deploy/demo/metrics.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: metrics 6 | name: metrics 7 | spec: 8 | progressDeadlineSeconds: 600 9 | replicas: 6 10 | revisionHistoryLimit: 5 11 | selector: 12 | matchLabels: 13 | app.kubernetes.io/name: metrics 14 | template: 15 | metadata: 16 | labels: 17 | app.kubernetes.io/name: metrics 18 | spec: 19 | containers: 20 | - args: 21 | - -label=4 22 | - -metrics=1 23 | - -value=10 24 | - -value-len=20 25 | - -label-len=20 26 | command: 27 | - /metrics 28 | image: ccr.ccs.tencentyun.com/ccr_dev/metrics:latest 29 | imagePullPolicy: Always 30 | ports: 31 | - containerPort: 9091 32 | name: metrics 33 | securityContext: 34 | privileged: false 35 | terminationMessagePath: /dev/termination-log 36 | terminationMessagePolicy: File 37 | dnsPolicy: ClusterFirst 38 | imagePullSecrets: 39 | - name: qcloudregistrykey 40 | restartPolicy: Always 41 | schedulerName: default-scheduler 42 | securityContext: {} 43 | -------------------------------------------------------------------------------- /deploy/demo/prometheus-rep-0.yaml: -------------------------------------------------------------------------------- 1 | kind: Service 2 | apiVersion: v1 3 | metadata: 4 | name: prometheus 5 | labels: 6 | app.kubernetes.io/name: prometheus 7 | spec: 8 | type: ClusterIP 9 | clusterIP: None 10 | selector: 11 | app.kubernetes.io/name: prometheus 12 | ports: 13 | - name: web 14 | protocol: TCP 15 | port: 8080 16 | targetPort: web 17 | - name: grpc 18 | port: 10901 19 | targetPort: grpc 20 | --- 21 | apiVersion: apps/v1 22 | kind: StatefulSet 23 | metadata: 24 | labels: 25 | app.kubernetes.io/name: prometheus 26 | name: prometheus-rep-0 27 | spec: 28 | # must set as Parallel 29 | podManagementPolicy: Parallel 30 | replicas: 0 31 | revisionHistoryLimit: 10 32 | selector: 33 | matchLabels: 34 | app.kubernetes.io/name: prometheus 35 | kvass/rep: "0" 36 | serviceName: prometheus 37 | template: 38 | metadata: 39 | labels: 40 | app.kubernetes.io/name: prometheus 41 | kvass/rep: "0" 42 | spec: 43 | containers: 44 | - name: thanos 45 | image: thanosio/thanos:v0.18.0 46 | args: 47 | - sidecar 48 | - --tsdb.path=/prometheus 49 | - --prometheus.url=http://localhost:8080 50 | - --reloader.config-file=/etc/prometheus/config/prometheus.yml 51 | - --reloader.config-envsubst-file=/etc/prometheus/config_out/prometheus.env.yaml 52 | ports: 53 | - name: http-sidecar 54 | containerPort: 10902 55 | - name: grpc 56 | containerPort: 10901 57 | livenessProbe: 58 | httpGet: 59 | port: 10902 60 | path: /-/healthy 61 | readinessProbe: 62 | httpGet: 63 | port: 10902 64 | path: /-/ready 65 | volumeMounts: 66 | - mountPath: /etc/prometheus/config_out 67 | name: config-out 68 | - mountPath: /etc/prometheus/config 69 | name: config 70 | - name: kvass 71 | args: 72 | - sidecar 73 | - --store.path=/prometheus/ # where to store kvass local data 74 | - --config.file=/etc/prometheus/config_out/prometheus.env.yaml # origin config file 75 | - --config.output-file=/etc/prometheus/config_out/prometheus_injected.yaml # injected config file. this is the file prometheus use 76 | image: tkestack/kvass:latest 77 | imagePullPolicy: Always 78 | volumeMounts: 79 | - mountPath: /etc/prometheus/config_out 80 | name: config-out 81 | # sidecar need pvc to store targets list, see '--store.path" flag 82 | # sidecar will reload targets list in initialization phase 83 | - mountPath: /prometheus 84 | name: data 85 | ports: 86 | - containerPort: 8080 87 | name: web 88 | protocol: TCP 89 | env: 90 | - name: POD_NAME 91 | valueFrom: 92 | fieldRef: 93 | fieldPath: metadata.name 94 | - name: prometheus 95 | args: 96 | - --storage.tsdb.path=/prometheus 97 | - --storage.tsdb.retention.time=3h 98 | - --web.enable-lifecycle 99 | - --storage.tsdb.no-lockfile 100 | - --storage.tsdb.max-block-duration=2h 101 | - --storage.tsdb.min-block-duration=2h 102 | - --config.file=/etc/prometheus/config_out/prometheus_injected.yaml # use injected config file instead of origin config file 103 | - --log.level=debug 104 | image: prom/prometheus:v2.33.3 105 | ports: 106 | - containerPort: 9090 107 | name: server 108 | protocol: TCP 109 | volumeMounts: 110 | - mountPath: /etc/prometheus/config 111 | name: config 112 | - mountPath: /etc/prometheus/config_out 113 | name: config-out 114 | - mountPath: /prometheus 115 | name: data 116 | dnsPolicy: ClusterFirst 117 | restartPolicy: Always 118 | schedulerName: default-scheduler 119 | serviceAccountName: prometheus 120 | securityContext: 121 | runAsUser: 0 122 | volumes: 123 | - name: data 124 | emptyDir: {} 125 | - name: config 126 | configMap: 127 | name: prometheus-config 128 | defaultMode: 420 129 | - emptyDir: {} 130 | name: config-out 131 | - emptyDir: {} 132 | name: tls-assets 133 | # volumeClaimTemplates: 134 | # - metadata: 135 | # labels: 136 | # k8s-app: prometheus 137 | # name: data 138 | # spec: 139 | # accessModes: 140 | # - ReadWriteOnce 141 | # resources: 142 | # requests: 143 | # storage: 10Gi 144 | # storageClassName: cbs 145 | # volumeMode: Filesystem 146 | updateStrategy: 147 | rollingUpdate: 148 | partition: 0 149 | type: RollingUpdate 150 | -------------------------------------------------------------------------------- /deploy/demo/thanos-query.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: thanos-query 5 | labels: 6 | app.kubernetes.io/name: thanos-query 7 | spec: 8 | ports: 9 | - name: grpc 10 | port: 10901 11 | targetPort: grpc 12 | - name: http 13 | port: 9090 14 | targetPort: http 15 | selector: 16 | app.kubernetes.io/name: thanos-query 17 | --- 18 | 19 | apiVersion: apps/v1 20 | kind: Deployment 21 | metadata: 22 | name: thanos-query 23 | labels: 24 | app.kubernetes.io/name: thanos-query 25 | spec: 26 | replicas: 1 27 | selector: 28 | matchLabels: 29 | app.kubernetes.io/name: thanos-query 30 | template: 31 | metadata: 32 | labels: 33 | app.kubernetes.io/name: thanos-query 34 | spec: 35 | containers: 36 | - args: 37 | - query 38 | - --log.level=debug 39 | - --query.auto-downsampling 40 | - --grpc-address=0.0.0.0:10901 41 | - --http-address=0.0.0.0:9090 42 | - --query.partial-response 43 | - --store=dnssrv+_grpc._tcp.prometheus.$(NAMESPACE).svc.cluster.local 44 | - --store=dnssrv+_grpc._tcp.thanos-rule.$(NAMESPACE).svc.cluster.local 45 | image: thanosio/thanos:v0.18.0 46 | env: 47 | - name: NAMESPACE 48 | valueFrom: 49 | fieldRef: 50 | apiVersion: v1 51 | fieldPath: metadata.namespace 52 | livenessProbe: 53 | failureThreshold: 4 54 | httpGet: 55 | path: /-/healthy 56 | port: 9090 57 | scheme: HTTP 58 | periodSeconds: 30 59 | name: thanos-query 60 | ports: 61 | - containerPort: 10901 62 | name: grpc 63 | - containerPort: 9090 64 | name: http 65 | readinessProbe: 66 | failureThreshold: 20 67 | httpGet: 68 | path: /-/ready 69 | port: 9090 70 | scheme: HTTP 71 | periodSeconds: 5 72 | terminationMessagePolicy: FallbackToLogsOnError 73 | terminationGracePeriodSeconds: 120 74 | -------------------------------------------------------------------------------- /deploy/demo/thanos-rule.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: thanos-rule 5 | labels: 6 | app.kubernetes.io/name: thanos-rule 7 | spec: 8 | ports: 9 | - name: grpc 10 | port: 10901 11 | targetPort: grpc 12 | - name: http 13 | port: 10902 14 | targetPort: http 15 | selector: 16 | app.kubernetes.io/name: thanos-rule 17 | --- 18 | 19 | apiVersion: apps/v1 20 | kind: StatefulSet 21 | metadata: 22 | name: thanos-rule 23 | labels: 24 | app.kubernetes.io/name: thanos-rule 25 | spec: 26 | replicas: 1 27 | selector: 28 | matchLabels: 29 | app.kubernetes.io/name: thanos-rule 30 | serviceName: "" 31 | template: 32 | metadata: 33 | labels: 34 | app.kubernetes.io/name: thanos-rule 35 | spec: 36 | volumes: 37 | - name: rules 38 | configMap: 39 | name: prometheus-rules 40 | defaultMode: 420 41 | containers: 42 | - args: 43 | - rule 44 | - --rule-file=/etc/thanos/rules/*.yaml 45 | - --grpc-address=:10901 46 | - --http-address=:10902 47 | - --data-dir=/var/thanos/rule 48 | - --query=dnssrv+_http._tcp.thanos-query.$(NAMESPACE).svc.cluster.local 49 | - --eval-interval=10s 50 | - --tsdb.retention=3h 51 | env: 52 | - name: NAMESPACE 53 | valueFrom: 54 | fieldRef: 55 | apiVersion: v1 56 | fieldPath: metadata.namespace 57 | image: thanosio/thanos:v0.18.0 58 | volumeMounts: 59 | - mountPath: /etc/thanos/rules 60 | name: rules 61 | livenessProbe: 62 | failureThreshold: 4 63 | httpGet: 64 | path: /-/healthy 65 | port: 10902 66 | scheme: HTTP 67 | periodSeconds: 30 68 | name: thanos-query 69 | ports: 70 | - containerPort: 10901 71 | name: grpc 72 | - containerPort: 10902 73 | name: http 74 | readinessProbe: 75 | failureThreshold: 20 76 | httpGet: 77 | path: /-/ready 78 | port: 10902 79 | scheme: HTTP 80 | periodSeconds: 5 81 | terminationMessagePolicy: FallbackToLogsOnError 82 | terminationGracePeriodSeconds: 120 83 | -------------------------------------------------------------------------------- /documents/design.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkestack/kvass/04643b757dd67bc21b863d59df1de03bb43efea8/documents/design.md -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module tkestack.io/kvass 2 | 3 | go 1.17 4 | 5 | require ( 6 | github.com/VictoriaMetrics/VictoriaMetrics v1.71.0 7 | github.com/cssivision/reverseproxy v0.0.1 8 | github.com/gin-contrib/pprof v1.3.0 9 | github.com/gin-gonic/gin v1.6.3 10 | github.com/go-kit/kit v0.12.0 11 | github.com/go-kit/log v0.2.0 12 | github.com/gobuffalo/packr/v2 v2.2.0 13 | github.com/klauspost/compress v1.13.6 14 | github.com/mitchellh/hashstructure/v2 v2.0.1 15 | github.com/mroth/weightedrand v0.4.1 16 | github.com/pkg/errors v0.9.1 17 | github.com/prometheus/client_golang v1.12.1 18 | github.com/prometheus/common v0.32.1 19 | github.com/prometheus/prometheus v2.28.1+incompatible 20 | github.com/sirupsen/logrus v1.8.1 21 | github.com/spf13/cobra v1.1.3 22 | github.com/stretchr/testify v1.7.0 23 | go.etcd.io/etcd v0.5.0-alpha.5.0.20200910180754-dd1b699fc489 24 | golang.org/x/sync v0.0.0-20210220032951-036812b2e83c 25 | gopkg.in/yaml.v2 v2.4.0 26 | k8s.io/api v0.22.7 27 | k8s.io/apimachinery v0.22.7 28 | k8s.io/client-go v0.22.7 29 | ) 30 | 31 | replace github.com/prometheus/prometheus => github.com/prometheus/prometheus v0.0.0-20220324221659-44a5e705be50 // 2.35.0 32 | -------------------------------------------------------------------------------- /pkg/api/request.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package api 19 | 20 | import ( 21 | "bytes" 22 | "encoding/json" 23 | "fmt" 24 | "io/ioutil" 25 | "net/http" 26 | 27 | "github.com/pkg/errors" 28 | ) 29 | 30 | // Post do http post with standard response format 31 | func Post(url string, req interface{}, ret interface{}) (err error) { 32 | reqData := make([]byte, 0) 33 | if req != nil { 34 | reqData, err = json.Marshal(req) 35 | if err != nil { 36 | return err 37 | } 38 | } 39 | 40 | resp, err := http.Post(url, "application/json", bytes.NewBuffer(reqData)) 41 | if err != nil { 42 | return err 43 | } 44 | defer func() { _ = resp.Body.Close() }() 45 | return dealResp(resp, ret) 46 | } 47 | 48 | // Get do get request to target url and save data to ret 49 | func Get(url string, ret interface{}) error { 50 | resp, err := http.Get(url) 51 | if err != nil { 52 | return errors.Wrapf(err, "http get") 53 | } 54 | defer func() { _ = resp.Body.Close() }() 55 | return dealResp(resp, ret) 56 | } 57 | 58 | func dealResp(resp *http.Response, ret interface{}) error { 59 | if resp.StatusCode != 200 { 60 | return fmt.Errorf("status code is %d", resp.StatusCode) 61 | } 62 | 63 | data, err := ioutil.ReadAll(resp.Body) 64 | if err != nil { 65 | return errors.Wrapf(err, "read data") 66 | } 67 | if ret != nil { 68 | commonResp := Data(ret) 69 | if err := json.Unmarshal(data, commonResp); err != nil { 70 | return errors.Wrapf(err, "Unmarshal") 71 | } 72 | 73 | if commonResp.Status != StatusSuccess { 74 | return fmt.Errorf(commonResp.Err) 75 | } 76 | } 77 | 78 | return nil 79 | } 80 | -------------------------------------------------------------------------------- /pkg/api/request_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package api 19 | 20 | import ( 21 | "io/ioutil" 22 | "net/http" 23 | "net/http/httptest" 24 | "testing" 25 | 26 | "github.com/stretchr/testify/require" 27 | "tkestack.io/kvass/pkg/utils/test" 28 | 29 | "tkestack.io/kvass/pkg/utils/types" 30 | ) 31 | 32 | type caseType struct { 33 | name string 34 | statusCode int 35 | req interface{} 36 | ret interface{} 37 | respData string 38 | wantRet interface{} 39 | wantErr bool 40 | } 41 | 42 | var cases = []caseType{ 43 | { 44 | name: "return no 200 status code", 45 | statusCode: 503, 46 | wantErr: true, 47 | ret: types.StringPtr(""), 48 | wantRet: types.StringPtr("test"), 49 | }, 50 | { 51 | name: "return error response", 52 | statusCode: 200, 53 | respData: ` 54 | { 55 | "status":"error", 56 | "error":"test" 57 | }`, 58 | wantErr: true, 59 | ret: types.StringPtr(""), 60 | wantRet: types.StringPtr("test"), 61 | }, 62 | { 63 | name: "unknown resp format ", 64 | statusCode: 200, 65 | respData: "---", 66 | ret: types.StringPtr(""), 67 | wantRet: types.StringPtr("test"), 68 | wantErr: true, 69 | }, 70 | { 71 | name: "normal response", 72 | req: 1, 73 | statusCode: 200, 74 | respData: ` 75 | { 76 | "status":"success", 77 | "data":"test" 78 | } 79 | `, 80 | ret: types.StringPtr(""), 81 | wantRet: types.StringPtr("test"), 82 | wantErr: false, 83 | }, 84 | } 85 | 86 | func testCases(t *testing.T, method string) { 87 | for _, cs := range cases { 88 | t.Run(cs.name, func(t *testing.T) { 89 | r := require.New(t) 90 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 91 | if !cs.wantErr && method == "POST" { 92 | data, err := ioutil.ReadAll(req.Body) 93 | r.NoError(err) 94 | r.Equal("1", string(data)) 95 | } 96 | 97 | if cs.statusCode == 200 { 98 | w.Write([]byte(cs.respData)) 99 | } else { 100 | w.WriteHeader(cs.statusCode) 101 | } 102 | })) 103 | defer ts.Close() 104 | 105 | var err error 106 | if method == "POST" { 107 | err = Post(ts.URL, cs.req, cs.ret) 108 | } else { 109 | err = Get(ts.URL, cs.ret) 110 | } 111 | 112 | if cs.wantErr { 113 | r.Error(err) 114 | return 115 | } 116 | r.NoError(err) 117 | r.JSONEq(test.MustJSON(cs.wantRet), test.MustJSON(cs.ret)) 118 | }) 119 | } 120 | } 121 | 122 | func TestGet(t *testing.T) { 123 | testCases(t, "GET") 124 | } 125 | func TestPost(t *testing.T) { 126 | testCases(t, "POST") 127 | } 128 | -------------------------------------------------------------------------------- /pkg/api/result.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package api 19 | 20 | import ( 21 | "fmt" 22 | "time" 23 | 24 | "github.com/gin-gonic/gin" 25 | "github.com/pkg/errors" 26 | "github.com/prometheus/client_golang/prometheus" 27 | "github.com/prometheus/client_golang/prometheus/promhttp" 28 | "github.com/sirupsen/logrus" 29 | ) 30 | 31 | // Status indicate the result status of request, success or error 32 | type Status string 33 | 34 | // ErrorType is not empty if result status is not success 35 | type ErrorType string 36 | 37 | // Result is the common format of all response 38 | type Result struct { 39 | // ErrorType is the type of result if Status is not success 40 | ErrorType ErrorType `json:"errorType,omitempty"` 41 | // Err indicate the error detail 42 | Err string `json:"error,omitempty"` 43 | // Data is the real data of result, data may be nil even if Status is success 44 | Data interface{} `json:"data,omitempty"` 45 | // Status indicate whether the result is success 46 | Status Status `json:"status"` 47 | } 48 | 49 | // InternalErr make a result with ErrorType ErrorInternal 50 | func InternalErr(err error, format string, args ...interface{}) *Result { 51 | return &Result{ 52 | ErrorType: ErrorInternal, 53 | Status: StatusError, 54 | Err: errors.Wrapf(err, format, args...).Error(), 55 | } 56 | } 57 | 58 | // BadDataErr make a result with ErrorType ErrorBadData 59 | func BadDataErr(err error, format string, args ...interface{}) *Result { 60 | return &Result{ 61 | ErrorType: ErrorBadData, 62 | Status: StatusError, 63 | Err: errors.Wrapf(err, format, args...).Error(), 64 | } 65 | } 66 | 67 | // Data make a result with data or nil, the Status will be set to StatusSuccess 68 | func Data(data interface{}) *Result { 69 | return &Result{ 70 | Data: data, 71 | Status: StatusSuccess, 72 | } 73 | } 74 | 75 | const ( 76 | // StatusSuccess indicate result Status is success, the data of result is available 77 | StatusSuccess Status = "success" 78 | // StatusError indicate result is failed, the data may be empty 79 | StatusError Status = "error" 80 | // ErrorBadData indicate that result is failed because the wrong request data 81 | ErrorBadData ErrorType = "bad_data" 82 | // ErrorInternal indicate that result is failed because the request data may be right but the server is something wrong 83 | ErrorInternal ErrorType = "internal" 84 | ) 85 | 86 | // Helper provider some function to build a service 87 | type Helper struct { 88 | log logrus.FieldLogger 89 | httpDurationSeconds *prometheus.HistogramVec 90 | register *prometheus.Registry 91 | } 92 | 93 | // NewHelper create a new APIWrapper 94 | func NewHelper(lg logrus.FieldLogger, register *prometheus.Registry, metricsPrefix string) *Helper { 95 | w := &Helper{ 96 | log: lg, 97 | register: register, 98 | httpDurationSeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{ 99 | Name: fmt.Sprintf("%s_http_request_duration_seconds", metricsPrefix), 100 | Help: "http request duration seconds", 101 | Buckets: []float64{0.01, 0.1, 0.3, 0.5, 1, 3, 5, 10}, 102 | }, []string{"path", "code"}), 103 | } 104 | 105 | w.register.MustRegister(w.httpDurationSeconds) 106 | return w 107 | } 108 | 109 | // MetricsHandler process metrics request 110 | func (h *Helper) MetricsHandler(c *gin.Context) { 111 | promhttp.HandlerFor(h.register, promhttp.HandlerOpts{ 112 | ErrorLog: h.log, 113 | }).ServeHTTP(c.Writer, c.Request) 114 | } 115 | 116 | // Wrap return a gin handler function with common result processed 117 | func (h *Helper) Wrap(f func(ctx *gin.Context) *Result) func(ctx *gin.Context) { 118 | return func(ctx *gin.Context) { 119 | var ( 120 | path = ctx.Request.URL.Path 121 | code = 200 122 | ) 123 | 124 | defer func(start time.Time) { 125 | h.httpDurationSeconds.WithLabelValues(path, fmt.Sprint(code)). 126 | Observe(float64(time.Since(start).Seconds())) 127 | }(time.Now()) 128 | 129 | r := f(ctx) 130 | if r == nil { 131 | ctx.Status(code) 132 | return 133 | } 134 | 135 | if r.ErrorType != "" { 136 | h.log.Error(r.Err) 137 | code = 503 138 | if r.ErrorType == ErrorBadData { 139 | code = 400 140 | } 141 | } 142 | 143 | ctx.JSON(code, r) 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /pkg/api/result_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package api 19 | 20 | import ( 21 | "fmt" 22 | "io/ioutil" 23 | "net/http/httptest" 24 | "testing" 25 | 26 | "tkestack.io/kvass/pkg/utils/test" 27 | 28 | "github.com/gin-gonic/gin" 29 | "github.com/prometheus/client_golang/prometheus" 30 | "github.com/sirupsen/logrus" 31 | 32 | "github.com/stretchr/testify/require" 33 | ) 34 | 35 | func TestData(t *testing.T) { 36 | s := Data(map[string]string{}) 37 | require.NotNil(t, s.Data) 38 | require.Empty(t, s.ErrorType) 39 | require.Empty(t, s.Err) 40 | require.Equal(t, StatusSuccess, s.Status) 41 | } 42 | 43 | func TestInternalErr(t *testing.T) { 44 | s := InternalErr(fmt.Errorf("1"), "test") 45 | require.Nil(t, s.Data) 46 | require.Equal(t, ErrorInternal, s.ErrorType) 47 | require.NotEmpty(t, s.Err) 48 | require.Equal(t, StatusError, s.Status) 49 | } 50 | 51 | func TestBadDataErr(t *testing.T) { 52 | s := BadDataErr(fmt.Errorf("1"), "test") 53 | require.Nil(t, s.Data) 54 | require.Equal(t, ErrorBadData, s.ErrorType) 55 | require.NotEmpty(t, s.Err) 56 | require.Equal(t, StatusError, s.Status) 57 | } 58 | 59 | func TestWrapper(t *testing.T) { 60 | var cases = []struct { 61 | name string 62 | code int 63 | result *Result 64 | }{ 65 | { 66 | name: "test that return interval error", 67 | code: 503, 68 | result: InternalErr(fmt.Errorf(""), "test"), 69 | }, 70 | { 71 | name: "test that return bad request error", 72 | code: 400, 73 | result: BadDataErr(fmt.Errorf(""), "test"), 74 | }, 75 | { 76 | name: "test that return success", 77 | code: 200, 78 | result: Data(map[string]string{}), 79 | }, 80 | { 81 | name: "test that return empty", 82 | code: 200, 83 | result: nil, 84 | }, 85 | } 86 | 87 | for _, cs := range cases { 88 | t.Run(cs.name, func(t *testing.T) { 89 | r := require.New(t) 90 | wp := NewHelper(logrus.New(), prometheus.NewRegistry(), "test") 91 | e := gin.Default() 92 | e.GET("/test", wp.Wrap(func(ctx *gin.Context) *Result { 93 | return cs.result 94 | })) 95 | e.GET("/metrics", wp.MetricsHandler) 96 | 97 | req := httptest.NewRequest("GET", "/test", nil) 98 | w := httptest.NewRecorder() 99 | e.ServeHTTP(w, req) 100 | result := w.Result() 101 | 102 | body, err := ioutil.ReadAll(result.Body) 103 | r.NoError(err) 104 | r.Equal(cs.code, result.StatusCode) 105 | 106 | if cs.result == nil { 107 | r.Empty(body) 108 | return 109 | } 110 | r.JSONEq(test.MustJSON(cs.result), string(body)) 111 | }) 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /pkg/api/testing.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package api 19 | 20 | import ( 21 | "encoding/json" 22 | "io/ioutil" 23 | "net/http" 24 | "net/http/httptest" 25 | "strings" 26 | "testing" 27 | 28 | "github.com/gin-gonic/gin" 29 | "github.com/stretchr/testify/require" 30 | ) 31 | 32 | // TestCall create a httptest server and do http request to it 33 | // the data in params will be write to server and the ret in params is deemed to the Data of common Result 34 | func TestCall(t *testing.T, serveHTTP func(w http.ResponseWriter, req *http.Request), uri, method, data string, ret interface{}) (*require.Assertions, *Result) { 35 | gin.SetMode(gin.ReleaseMode) 36 | req := httptest.NewRequest(method, uri, strings.NewReader(data)) 37 | w := httptest.NewRecorder() 38 | 39 | serveHTTP(w, req) 40 | 41 | result := w.Result() 42 | defer result.Body.Close() 43 | r := require.New(t) 44 | body, err := ioutil.ReadAll(result.Body) 45 | r.NoError(err) 46 | resObj := &Result{Data: ret} 47 | if len(body) != 0 { 48 | _ = json.Unmarshal(body, resObj) 49 | } 50 | if ret != nil { 51 | r.Equal(StatusSuccess, resObj.Status) 52 | r.Empty(resObj.Err) 53 | r.Empty(resObj.ErrorType) 54 | } 55 | return r, resObj 56 | } 57 | -------------------------------------------------------------------------------- /pkg/api/testing_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package api 19 | 20 | import ( 21 | "github.com/stretchr/testify/require" 22 | "io/ioutil" 23 | "net/http" 24 | "testing" 25 | "tkestack.io/kvass/pkg/utils/test" 26 | ) 27 | 28 | func TestTestCall(t *testing.T) { 29 | r := require.New(t) 30 | res := "" 31 | TestCall(t, func(w http.ResponseWriter, req *http.Request) { 32 | r.Equal("/api", req.URL.Path) 33 | data, err := ioutil.ReadAll(req.Body) 34 | r.NoError(err) 35 | defer req.Body.Close() 36 | r.Equal("xxx", string(data)) 37 | _, _ = w.Write([]byte(test.MustJSON(&Result{ 38 | Status: StatusSuccess, 39 | Data: "test", 40 | }))) 41 | }, "/api", http.MethodGet, "xxx", &res) 42 | r.Equal("test", res) 43 | } 44 | -------------------------------------------------------------------------------- /pkg/coordinator/service_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package coordinator 19 | 20 | import ( 21 | "net/http" 22 | "net/url" 23 | "testing" 24 | 25 | "github.com/prometheus/client_golang/prometheus" 26 | "github.com/prometheus/common/model" 27 | "github.com/prometheus/prometheus/model/labels" 28 | "github.com/prometheus/prometheus/scrape" 29 | "github.com/sirupsen/logrus" 30 | "tkestack.io/kvass/pkg/api" 31 | "tkestack.io/kvass/pkg/discovery" 32 | "tkestack.io/kvass/pkg/prom" 33 | "tkestack.io/kvass/pkg/shard" 34 | "tkestack.io/kvass/pkg/target" 35 | "tkestack.io/kvass/pkg/utils/test" 36 | ) 37 | 38 | func TestAPI_Targets(t *testing.T) { 39 | lbs := labels.Labels{ 40 | { 41 | Name: model.AddressLabel, 42 | Value: "127.0.0.1:80", 43 | }, 44 | { 45 | Name: model.SchemeLabel, 46 | Value: "http", 47 | }, 48 | } 49 | 50 | getDrop := func() map[string][]*discovery.SDTargets { 51 | return map[string][]*discovery.SDTargets{ 52 | "job1": { 53 | { 54 | PromTarget: scrape.NewTarget(lbs, lbs, url.Values{}), 55 | }, 56 | }, 57 | } 58 | } 59 | 60 | getScrapeStatus := func() map[uint64]*target.ScrapeStatus { 61 | return map[uint64]*target.ScrapeStatus{ 62 | 1: { 63 | Health: scrape.HealthBad, 64 | LastError: "test", 65 | }, 66 | } 67 | } 68 | 69 | getActive := func() map[string][]*discovery.SDTargets { 70 | return map[string][]*discovery.SDTargets{ 71 | "job1": { 72 | { 73 | ShardTarget: &target.Target{ 74 | Hash: 1, 75 | Labels: lbs, 76 | Series: 0, 77 | }, 78 | PromTarget: scrape.NewTarget(lbs, lbs, url.Values{}), 79 | }, 80 | }, 81 | } 82 | } 83 | 84 | var cases = []struct { 85 | name string 86 | param url.Values 87 | wantActive int 88 | wantDropped int 89 | wantStatistics []TargetStatistics 90 | }{ 91 | { 92 | name: "(state=): return all targets only", 93 | wantActive: 1, 94 | wantDropped: 1, 95 | }, 96 | { 97 | name: "(state=any): return all targets", 98 | param: url.Values{ 99 | "state": []string{"any"}, 100 | }, 101 | wantActive: 1, 102 | wantDropped: 1, 103 | }, 104 | { 105 | name: "(state=active): return active targets only", 106 | param: url.Values{ 107 | "state": []string{"active"}, 108 | }, 109 | wantActive: 1, 110 | wantDropped: 0, 111 | }, 112 | { 113 | name: "(state=dropped): return dropped targets only", 114 | param: url.Values{ 115 | "state": []string{"dropped"}, 116 | }, 117 | wantActive: 0, 118 | wantDropped: 1, 119 | }, 120 | { 121 | name: "(statistics=only): return statistics only", 122 | param: url.Values{ 123 | "statistics": []string{"only"}, 124 | }, 125 | wantActive: 0, 126 | wantDropped: 0, 127 | wantStatistics: []TargetStatistics{ 128 | { 129 | JobName: "job1", 130 | Total: 1, 131 | Health: map[scrape.TargetHealth]uint64{ 132 | scrape.HealthBad: 1, 133 | }, 134 | }, 135 | }, 136 | }, 137 | { 138 | name: "(statistics=with): return statistics and all targets", 139 | param: url.Values{ 140 | "statistics": []string{"with"}, 141 | }, 142 | wantActive: 1, 143 | wantDropped: 1, 144 | wantStatistics: []TargetStatistics{ 145 | { 146 | JobName: "job1", 147 | Total: 1, 148 | Health: map[scrape.TargetHealth]uint64{ 149 | scrape.HealthBad: 1, 150 | }, 151 | }, 152 | }, 153 | }, 154 | { 155 | name: "(statistics=with,state=active): return statistics and active targets", 156 | param: url.Values{ 157 | "statistics": []string{"with"}, 158 | "state": []string{"active"}, 159 | }, 160 | wantActive: 1, 161 | wantDropped: 0, 162 | wantStatistics: []TargetStatistics{ 163 | { 164 | JobName: "job1", 165 | Total: 1, 166 | Health: map[scrape.TargetHealth]uint64{ 167 | scrape.HealthBad: 1, 168 | }, 169 | }, 170 | }, 171 | }, 172 | { 173 | name: "(job=job.*,state=active): return targets with job_name", 174 | param: url.Values{ 175 | "job": []string{"job.*"}, 176 | "state": []string{"active"}, 177 | }, 178 | wantActive: 1, 179 | wantDropped: 0, 180 | }, 181 | { 182 | name: "(job=xx,state=active): not targets returned with wrong job_name", 183 | param: url.Values{ 184 | "job": []string{"xx"}, 185 | "state": []string{"active"}, 186 | }, 187 | wantActive: 0, 188 | wantDropped: 0, 189 | }, 190 | { 191 | name: "(health=down,state=active): return targets with special health", 192 | param: url.Values{ 193 | "health": []string{"down"}, 194 | "state": []string{"active"}, 195 | }, 196 | wantActive: 1, 197 | wantDropped: 0, 198 | }, 199 | { 200 | name: "(health=down,up,state=active): return targets with muti special health", 201 | param: url.Values{ 202 | "health": []string{"down", "up"}, 203 | "state": []string{"active"}, 204 | }, 205 | wantActive: 1, 206 | wantDropped: 0, 207 | }, 208 | { 209 | name: "(health=up,state=active): not targets returned with wrong health", 210 | param: url.Values{ 211 | "health": []string{"up"}, 212 | "state": []string{"active"}, 213 | }, 214 | wantActive: 0, 215 | wantDropped: 0, 216 | }, 217 | } 218 | for _, cs := range cases { 219 | t.Run(cs.name, func(t *testing.T) { 220 | a := NewService("", prom.NewConfigManager(), nil, getScrapeStatus, getActive, getDrop, 221 | prometheus.NewRegistry(), logrus.New()) 222 | uri := "/api/v1/targets" 223 | if len(cs.param) != 0 { 224 | uri += "?" + cs.param.Encode() 225 | } 226 | 227 | res := &TargetDiscovery{} 228 | r, _ := api.TestCall(t, a.Engine.ServeHTTP, uri, http.MethodGet, "", res) 229 | r.Equal(cs.wantActive, len(res.ActiveTargets)) 230 | r.Equal(cs.wantDropped, len(res.DroppedTargets)) 231 | r.JSONEq(test.MustJSON(cs.wantStatistics), test.MustJSON(res.ActiveStatistics)) 232 | }) 233 | } 234 | } 235 | 236 | func TestAPI_RuntimeInfo(t *testing.T) { 237 | a := NewService("", prom.NewConfigManager(), nil, func() map[uint64]*target.ScrapeStatus { 238 | return map[uint64]*target.ScrapeStatus{ 239 | 1: { 240 | Series: 100, 241 | }, 242 | 2: { 243 | Series: 100, 244 | }, 245 | } 246 | }, nil, nil, prometheus.NewRegistry(), logrus.New()) 247 | res := &shard.RuntimeInfo{} 248 | r, _ := api.TestCall(t, a.Engine.ServeHTTP, "/api/v1/runtimeinfo", http.MethodGet, "", res) 249 | r.Equal(int64(200), res.HeadSeries) 250 | } 251 | -------------------------------------------------------------------------------- /pkg/coordinator/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package coordinator 19 | 20 | // SamplesInfo contains statistic of sample scraped rate 21 | type SamplesInfo struct { 22 | // SamplesRate is total sample rate in last scrape 23 | SamplesRate uint64 `json:"samplesRate"` 24 | // JobsSamplesRate show total sample rate in last scrape about a job 25 | JobsSamplesRate []*JobSamplesInfo `json:"jobsSamplesRate"` 26 | } 27 | 28 | // JobSamplesInfo show total sample rate in last scrape 29 | type JobSamplesInfo struct { 30 | // JobName is the name of this job 31 | JobName string `json:"jobName"` 32 | // SamplesRateTotal is the total samples rate of this job' targets 33 | SamplesRate uint64 `json:"samplesRateTotal"` 34 | // MetricsSamplesRate indicate the metrics samples rate 35 | MetricsSamplesRate map[string]uint64 `json:"metricsSamplesRate"` 36 | } 37 | 38 | type space struct { 39 | headSpace int64 40 | processSpace int64 41 | } 42 | 43 | func (s *space) add(src space) { 44 | s.headSpace += src.headSpace 45 | s.processSpace += src.processSpace 46 | } 47 | 48 | func (s *space) isZero() bool { 49 | return s.headSpace == 0 && s.processSpace == 0 50 | } 51 | -------------------------------------------------------------------------------- /pkg/discovery/discovery.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package discovery 19 | 20 | import ( 21 | "context" 22 | "github.com/prometheus/prometheus/config" 23 | "sync" 24 | "time" 25 | "tkestack.io/kvass/pkg/prom" 26 | "tkestack.io/kvass/pkg/target" 27 | 28 | "github.com/prometheus/prometheus/discovery/targetgroup" 29 | "github.com/prometheus/prometheus/scrape" 30 | "github.com/sirupsen/logrus" 31 | ) 32 | 33 | // SDTargets represent a target has be processed with job relabel_configs 34 | // every SDTargets contains a target use for shard sidecar to generate prometheus config 35 | // and a prometheus scrape target for api /api/v1/targets 36 | type SDTargets struct { 37 | // Job if the jobName of this target 38 | Job string 39 | // ShardTarget is the target for shard sidecar to generate prometheus config 40 | ShardTarget *target.Target 41 | // PromTarget is the target of prometheus lib 42 | PromTarget *scrape.Target 43 | } 44 | 45 | // TargetsDiscovery manager the active targets and dropped targets use SD manager from prometheus lib 46 | // it will maintains shard.Target and scrape.Target together 47 | type TargetsDiscovery struct { 48 | config map[string]*config.ScrapeConfig 49 | log logrus.FieldLogger 50 | activeTargetsChan chan map[string][]*SDTargets 51 | activeTargets map[string][]*SDTargets 52 | dropTargets map[string][]*SDTargets 53 | targetsLock sync.Mutex 54 | } 55 | 56 | // New create a new TargetsDiscovery 57 | func New(log logrus.FieldLogger) *TargetsDiscovery { 58 | return &TargetsDiscovery{ 59 | log: log, 60 | activeTargetsChan: make(chan map[string][]*SDTargets, 1000), 61 | activeTargets: map[string][]*SDTargets{}, 62 | dropTargets: map[string][]*SDTargets{}, 63 | } 64 | } 65 | 66 | // WaitInit block until all job's sd done 67 | func (m *TargetsDiscovery) WaitInit(ctx context.Context) error { 68 | t := time.NewTicker(time.Second) 69 | flag := map[string]bool{} 70 | l1: 71 | for { 72 | select { 73 | case <-t.C: 74 | m.targetsLock.Lock() 75 | for job := range m.config { 76 | if _, exist := m.activeTargets[job]; !exist { 77 | m.targetsLock.Unlock() 78 | continue l1 79 | } 80 | if !flag[job] { 81 | m.log.Infof("job %s first service discovery done, active(%d) ,drop(%d)", job, len(m.activeTargets[job]), len(m.dropTargets[job])) 82 | flag[job] = true 83 | } 84 | } 85 | m.log.Infof("all job first service discovery done") 86 | m.targetsLock.Unlock() 87 | return nil 88 | case <-ctx.Done(): 89 | return nil 90 | } 91 | } 92 | } 93 | 94 | // ActiveTargetsChan return an channel for notify active SDTargets updated 95 | func (m *TargetsDiscovery) ActiveTargetsChan() <-chan map[string][]*SDTargets { 96 | return m.activeTargetsChan 97 | } 98 | 99 | // ActiveTargets return a copy map of global active targets the 100 | func (m *TargetsDiscovery) ActiveTargets() map[string][]*SDTargets { 101 | m.targetsLock.Lock() 102 | defer m.targetsLock.Unlock() 103 | 104 | ret := map[string][]*SDTargets{} 105 | for k, v := range m.activeTargets { 106 | ret[k] = v 107 | } 108 | return ret 109 | } 110 | 111 | // ActiveTargetsByHash return a map that with the key of target hash 112 | func (m *TargetsDiscovery) ActiveTargetsByHash() map[uint64]*SDTargets { 113 | m.targetsLock.Lock() 114 | defer m.targetsLock.Unlock() 115 | ret := map[uint64]*SDTargets{} 116 | for _, ts := range m.activeTargets { 117 | for _, t := range ts { 118 | ret[t.ShardTarget.Hash] = t 119 | } 120 | } 121 | return ret 122 | } 123 | 124 | // DropTargets return a copy map of global dropped targets the 125 | func (m *TargetsDiscovery) DropTargets() map[string][]*SDTargets { 126 | m.targetsLock.Lock() 127 | defer m.targetsLock.Unlock() 128 | 129 | ret := map[string][]*SDTargets{} 130 | for k, v := range m.dropTargets { 131 | ret[k] = v 132 | } 133 | return ret 134 | } 135 | 136 | // ApplyConfig save new scrape config 137 | func (m *TargetsDiscovery) ApplyConfig(cfg *prom.ConfigInfo) error { 138 | m.targetsLock.Lock() 139 | defer m.targetsLock.Unlock() 140 | 141 | newActiveTargets := map[string][]*SDTargets{} 142 | newDropTargets := map[string][]*SDTargets{} 143 | newCfg := map[string]*config.ScrapeConfig{} 144 | for _, j := range cfg.Config.ScrapeConfigs { 145 | newCfg[j.JobName] = j 146 | if _, exist := m.activeTargets[j.JobName]; exist { 147 | newActiveTargets[j.JobName] = m.activeTargets[j.JobName] 148 | newDropTargets[j.JobName] = m.dropTargets[j.JobName] 149 | } 150 | } 151 | m.config = newCfg 152 | m.activeTargets = newActiveTargets 153 | m.dropTargets = newDropTargets 154 | return nil 155 | } 156 | 157 | // Run receive prometheus service discovery result and update global active SDTargets and dropped SDTargets 158 | // the active SDTargets of one process will be send to activeTargetsChan 159 | func (m *TargetsDiscovery) Run(ctx context.Context, sdChan <-chan map[string][]*targetgroup.Group) error { 160 | for { 161 | select { 162 | case <-ctx.Done(): 163 | return nil 164 | case ts := <-sdChan: 165 | m.activeTargetsChan <- m.translateTargets(ts) 166 | } 167 | } 168 | } 169 | 170 | func (m *TargetsDiscovery) translateTargets(targets map[string][]*targetgroup.Group) map[string][]*SDTargets { 171 | actives := map[string][]*SDTargets{} 172 | drops := map[string][]*SDTargets{} 173 | for job, tsg := range targets { 174 | allActive := make([]*SDTargets, 0) 175 | allDrop := make([]*SDTargets, 0) 176 | 177 | cfg := m.config[job] 178 | if cfg == nil { 179 | m.log.Warnf("can not found job %m", job) 180 | continue 181 | } 182 | 183 | for _, tr := range tsg { 184 | ts, err := targetsFromGroup(tr, cfg) 185 | if err != nil { 186 | m.log.Error("create target for job", cfg.JobName, err.Error()) 187 | continue 188 | } 189 | 190 | for _, tar := range ts { 191 | if tar.PromTarget.Labels().Len() > 0 { 192 | allActive = append(allActive, tar) 193 | } else if tar.PromTarget.DiscoveredLabels().Len() > 0 { 194 | allDrop = append(allDrop, tar) 195 | } 196 | } 197 | } 198 | actives[job] = allActive 199 | drops[job] = allDrop 200 | } 201 | 202 | m.targetsLock.Lock() 203 | defer m.targetsLock.Unlock() 204 | 205 | for job, targets := range actives { 206 | m.activeTargets[job] = targets 207 | } 208 | 209 | for job, targets := range drops { 210 | m.dropTargets[job] = targets 211 | } 212 | 213 | return actives 214 | } 215 | -------------------------------------------------------------------------------- /pkg/discovery/discovery_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package discovery 19 | 20 | import ( 21 | "context" 22 | "testing" 23 | "time" 24 | 25 | "github.com/prometheus/common/model" 26 | "github.com/prometheus/prometheus/config" 27 | "github.com/prometheus/prometheus/discovery/targetgroup" 28 | "github.com/prometheus/prometheus/model/relabel" 29 | "github.com/sirupsen/logrus" 30 | "github.com/stretchr/testify/require" 31 | "tkestack.io/kvass/pkg/prom" 32 | "tkestack.io/kvass/pkg/target" 33 | ) 34 | 35 | func TestTargetsDiscovery_WaitInit(t *testing.T) { 36 | cfg := &config.Config{ 37 | ScrapeConfigs: []*config.ScrapeConfig{ 38 | { 39 | JobName: "test", 40 | }, 41 | }, 42 | } 43 | 44 | var cases = []struct { 45 | name string 46 | targets map[string][]*SDTargets 47 | wantTimeout bool 48 | }{ 49 | { 50 | name: "success", 51 | targets: map[string][]*SDTargets{ 52 | "test": {}, 53 | }, 54 | wantTimeout: false, 55 | }, 56 | { 57 | name: "timeout", 58 | targets: map[string][]*SDTargets{}, 59 | wantTimeout: true, 60 | }, 61 | } 62 | 63 | for _, cs := range cases { 64 | t.Run(cs.name, func(t *testing.T) { 65 | r := require.New(t) 66 | d := New(logrus.New()) 67 | r.NoError(d.ApplyConfig(&prom.ConfigInfo{ 68 | Config: cfg, 69 | })) 70 | d.activeTargets = cs.targets 71 | ctx, cancel := context.WithTimeout(context.Background(), time.Second*2) 72 | defer cancel() 73 | r.NoError(d.WaitInit(ctx)) 74 | r.Equal(cs.wantTimeout, ctx.Err() != nil) 75 | }) 76 | } 77 | 78 | } 79 | func TestTargetsDiscovery_ActiveTargets(t *testing.T) { 80 | d := New(logrus.New()) 81 | d.activeTargets = map[string][]*SDTargets{ 82 | "job": { 83 | {}, 84 | }, 85 | } 86 | require.Equal(t, 1, len(d.ActiveTargets()["job"])) 87 | } 88 | 89 | func TestTargetsDiscovery_ActiveTargetsByHash(t *testing.T) { 90 | d := New(logrus.New()) 91 | d.activeTargets = map[string][]*SDTargets{ 92 | "job": { 93 | { 94 | ShardTarget: &target.Target{Hash: 1}, 95 | }, 96 | }, 97 | } 98 | require.Equal(t, 1, len(d.ActiveTargetsByHash())) 99 | } 100 | 101 | func TestTargetsDiscovery_DropTargets(t *testing.T) { 102 | d := New(logrus.New()) 103 | d.dropTargets = map[string][]*SDTargets{ 104 | "job": { 105 | {}, 106 | }, 107 | } 108 | require.Equal(t, 1, len(d.DropTargets()["job"])) 109 | } 110 | 111 | func TestTargetsDiscovery_Run(t *testing.T) { 112 | r := require.New(t) 113 | d := New(logrus.New()) 114 | cfg := &config.Config{ 115 | ScrapeConfigs: []*config.ScrapeConfig{ 116 | { 117 | JobName: "test", 118 | Params: map[string][]string{ 119 | "t1": {"v1"}, 120 | }, 121 | RelabelConfigs: []*relabel.Config{ 122 | { 123 | Separator: ";", 124 | Regex: relabel.MustNewRegexp("__test_" + "(.+)"), 125 | Replacement: "$1", 126 | Action: relabel.LabelMap, 127 | }, 128 | { 129 | SourceLabels: model.LabelNames{"drop"}, 130 | Regex: relabel.MustNewRegexp("true"), 131 | Action: relabel.Drop, 132 | }, 133 | }, 134 | }, 135 | }, 136 | } 137 | r.NoError(d.ApplyConfig(&prom.ConfigInfo{ 138 | Config: cfg, 139 | })) 140 | 141 | sdChan := make(chan map[string][]*targetgroup.Group, 0) 142 | ctx, cancel := context.WithCancel(context.Background()) 143 | defer cancel() 144 | go func() { 145 | r.NoError(d.Run(ctx, sdChan)) 146 | }() 147 | 148 | d.activeTargetsChan = make(chan map[string][]*SDTargets) 149 | sdChan <- map[string][]*targetgroup.Group{ 150 | cfg.ScrapeConfigs[0].JobName: { 151 | { 152 | Targets: []model.LabelSet{ 153 | map[model.LabelName]model.LabelValue{ 154 | model.MetricsPathLabel: "/metrics", 155 | model.SchemeLabel: "https", 156 | model.AddressLabel: "127.0.0.1", 157 | "__test_xx": "xxx", 158 | }, 159 | }, 160 | }, 161 | { 162 | Targets: []model.LabelSet{ 163 | map[model.LabelName]model.LabelValue{ 164 | model.AddressLabel: "127.0.0.2", 165 | "drop": "true", 166 | model.SchemeLabel: "http", 167 | }, 168 | }, 169 | }, 170 | }, 171 | } 172 | 173 | active := <-d.ActiveTargetsChan() 174 | job := cfg.ScrapeConfigs[0].JobName 175 | r.Equal(1, len(active[job])) 176 | tar := active[cfg.ScrapeConfigs[0].JobName][0] 177 | r.NotNil(tar.ShardTarget) 178 | r.NotNil(tar.PromTarget) 179 | 180 | r.Equal("xxx", tar.ShardTarget.Labels.Get("xx")) 181 | r.Equal(1, len(d.ActiveTargets()[job])) 182 | r.Equal("https://127.0.0.1:443/metrics?t1=v1", d.ActiveTargets()[job][0].PromTarget.URL().String()) 183 | r.Equal(1, len(d.DropTargets()[job])) 184 | } 185 | -------------------------------------------------------------------------------- /pkg/discovery/translate.go: -------------------------------------------------------------------------------- 1 | package discovery 2 | 3 | import ( 4 | "fmt" 5 | "hash/fnv" 6 | "net" 7 | "net/url" 8 | "sort" 9 | "strings" 10 | "tkestack.io/kvass/pkg/target" 11 | 12 | "github.com/prometheus/prometheus/model/labels" 13 | "github.com/prometheus/prometheus/model/relabel" 14 | 15 | "github.com/pkg/errors" 16 | "github.com/prometheus/common/model" 17 | "github.com/prometheus/prometheus/config" 18 | 19 | "tkestack.io/kvass/pkg/utils/types" 20 | 21 | "github.com/prometheus/prometheus/discovery/targetgroup" 22 | "github.com/prometheus/prometheus/scrape" 23 | ) 24 | 25 | // addPort checks whether we should add a default port to the address. 26 | // If the address is not valid, we don't append a port either. 27 | func addPort(s string) bool { 28 | // If we can split, a port exists and we don'target have to add one. 29 | if _, _, err := net.SplitHostPort(s); err == nil { 30 | return false 31 | } 32 | // If adding a port makes it valid, the previous error 33 | // was not due to an invalid address and we can append a port. 34 | _, _, err := net.SplitHostPort(s + ":1234") 35 | return err == nil 36 | } 37 | 38 | func completePort(addr string, scheme string) (string, error) { 39 | // If it's an address with no trailing port, infer it based on the used scheme. 40 | if addPort(addr) { 41 | // Addresses reaching this point are already wrapped in [] if necessary. 42 | switch scheme { 43 | case "http", "": 44 | addr = addr + ":80" 45 | case "https": 46 | addr = addr + ":443" 47 | default: 48 | return "", errors.Errorf("invalid scheme: %q", scheme) 49 | } 50 | } 51 | return addr, nil 52 | } 53 | 54 | // populateLabels builds a label set from the given label set and scrape configuration. 55 | // It returns a label set before relabeling was applied as the second return value. 56 | // Returns the original discovered label set found before relabelling was applied if the target is dropped during relabeling. 57 | func populateLabels(lset labels.Labels, cfg *config.ScrapeConfig) (res, orig labels.Labels, err error) { 58 | lb := labels.NewBuilder(lset) 59 | // Copy labels into the labelset for the target if they are not set already. 60 | scrapeLabels := []labels.Label{ 61 | {Name: model.JobLabel, Value: cfg.JobName}, 62 | {Name: model.MetricsPathLabel, Value: cfg.MetricsPath}, 63 | {Name: model.SchemeLabel, Value: cfg.Scheme}, 64 | } 65 | 66 | for _, l := range scrapeLabels { 67 | if lv := lset.Get(l.Name); lv == "" { 68 | lb.Set(l.Name, l.Value) 69 | } 70 | } 71 | // Encode scrape query parameters as labels. 72 | for k, v := range cfg.Params { 73 | if len(v) > 0 { 74 | lb.Set(model.ParamLabelPrefix+k, v[0]) 75 | } 76 | } 77 | 78 | preRelabelLabels := lb.Labels() 79 | lset = relabel.Process(preRelabelLabels, cfg.RelabelConfigs...) 80 | 81 | // Get if the target was dropped. 82 | if lset == nil { 83 | return nil, preRelabelLabels, nil 84 | } 85 | if v := lset.Get(model.AddressLabel); v == "" { 86 | return nil, nil, errors.New("no address") 87 | } 88 | 89 | lb = labels.NewBuilder(lset) 90 | addr, err := completePort(lset.Get(model.AddressLabel), lset.Get(model.SchemeLabel)) 91 | if err != nil { 92 | return nil, nil, err 93 | } 94 | lb.Set(model.AddressLabel, addr) 95 | 96 | if err := config.CheckTargetAddress(model.LabelValue(addr)); err != nil { 97 | return nil, nil, err 98 | } 99 | 100 | // Meta labels are deleted after relabelling. Other internal labels propagate to 101 | // the target which decides whether they will be part of their label set. 102 | for _, l := range lset { 103 | if strings.HasPrefix(l.Name, model.MetaLabelPrefix) { 104 | lb.Del(l.Name) 105 | } 106 | } 107 | 108 | // Default the instance label to the target address. 109 | if v := lset.Get(model.InstanceLabel); v == "" { 110 | lb.Set(model.InstanceLabel, addr) 111 | } 112 | 113 | res = lb.Labels() 114 | for _, l := range res { 115 | // Get label values are valid, drop the target if not. 116 | if !model.LabelValue(l.Value).IsValid() { 117 | return nil, nil, errors.Errorf("invalid label value for %q: %q", l.Name, l.Value) 118 | } 119 | } 120 | return res, preRelabelLabels, nil 121 | } 122 | 123 | // targetsFromGroup builds activeTargets based on the given TargetGroup and config. 124 | func targetsFromGroup(tg *targetgroup.Group, cfg *config.ScrapeConfig) ([]*SDTargets, error) { 125 | targets := make([]*SDTargets, 0, len(tg.Targets)) 126 | exists := map[uint64]bool{} 127 | 128 | for i, tlset := range tg.Targets { 129 | lbls := make([]labels.Label, 0, len(tlset)+len(tg.Labels)) 130 | 131 | for ln, lv := range tlset { 132 | lbls = append(lbls, labels.Label{Name: string(ln), Value: string(lv)}) 133 | } 134 | for ln, lv := range tg.Labels { 135 | if _, ok := tlset[ln]; !ok { 136 | lbls = append(lbls, labels.Label{Name: string(ln), Value: string(lv)}) 137 | } 138 | } 139 | 140 | lset := labels.New(lbls...) 141 | 142 | lbls, origLabels, err := populateLabels(lset, cfg) 143 | if err != nil { 144 | return nil, errors.Wrapf(err, "instance %d in group %s", i, tg) 145 | } 146 | 147 | if lbls != nil || origLabels != nil { 148 | tar := scrape.NewTarget(lbls, origLabels, cfg.Params) 149 | hash := targetHash(lbls, tar.URL().String()) 150 | if exists[hash] { 151 | continue 152 | } 153 | exists[hash] = true 154 | targets = append(targets, &SDTargets{ 155 | Job: cfg.JobName, 156 | PromTarget: tar, 157 | ShardTarget: &target.Target{ 158 | Hash: hash, 159 | Labels: supportInvalidLabelName(labelsWithoutConfigParam(lbls, cfg.Params)), 160 | }, 161 | }) 162 | } 163 | } 164 | return targets, nil 165 | } 166 | 167 | func targetHash(lbls labels.Labels, url string) uint64 { 168 | h := fnv.New64a() 169 | //nolint: errcheck 170 | sort.Sort(lbls) 171 | _, _ = h.Write([]byte(fmt.Sprintf("%016d", lbls.Hash()))) 172 | //nolint: errcheck 173 | _, _ = h.Write([]byte(url)) 174 | return h.Sum64() 175 | } 176 | 177 | // some config param is not valid as label values 178 | // but populateLabels will add all config param into labels 179 | // must delete them from label set 180 | func labelsWithoutConfigParam(lbls labels.Labels, param url.Values) labels.Labels { 181 | key := make([]string, 0, len(param)) 182 | for k := range param { 183 | key = append(key, model.ParamLabelPrefix+k) 184 | } 185 | 186 | newlbls := labels.Labels{} 187 | for _, l := range lbls { 188 | if !types.FindString(l.Name, key...) { 189 | newlbls = append(newlbls, l) 190 | } 191 | } 192 | return newlbls 193 | } 194 | 195 | // some label's name is invalid but we do need it 196 | // add prefix string to valid it 197 | // sidecar should add relabel_configs to remove prefix 198 | func supportInvalidLabelName(lbls labels.Labels) labels.Labels { 199 | res := labels.Labels{} 200 | for _, l := range lbls { 201 | if !model.LabelName(l.Name).IsValid() { 202 | l.Name = target.PrefixForInvalidLabelName + l.Name 203 | } 204 | res = append(res, l) 205 | } 206 | return res 207 | } 208 | -------------------------------------------------------------------------------- /pkg/explore/explore.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package explore 19 | 20 | import ( 21 | "context" 22 | "fmt" 23 | 24 | "tkestack.io/kvass/pkg/discovery" 25 | "tkestack.io/kvass/pkg/prom" 26 | "tkestack.io/kvass/pkg/scrape" 27 | "tkestack.io/kvass/pkg/utils/types" 28 | 29 | "sync" 30 | "time" 31 | 32 | parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus" 33 | 34 | "github.com/pkg/errors" 35 | "github.com/prometheus/client_golang/prometheus" 36 | "github.com/sirupsen/logrus" 37 | "golang.org/x/sync/errgroup" 38 | "tkestack.io/kvass/pkg/target" 39 | ) 40 | 41 | var ( 42 | exploredTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 43 | Name: "kvass_explore_explored_total", 44 | }, []string{"job", "success"}) 45 | exploringTotal = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 46 | Name: "kvass_explore_exploring_total", 47 | }, []string{"job"}) 48 | ) 49 | 50 | type exploringTarget struct { 51 | exploring bool 52 | job string 53 | target *target.Target 54 | rt *target.ScrapeStatus 55 | } 56 | 57 | // Explore will explore Target before it assigned to Shard 58 | type Explore struct { 59 | logger logrus.FieldLogger 60 | scrapeManager *scrape.Manager 61 | 62 | targets map[uint64]*exploringTarget 63 | targetsLock sync.Mutex 64 | 65 | retryInterval time.Duration 66 | needExplore chan *exploringTarget 67 | explore func(log logrus.FieldLogger, scrapeInfo *scrape.JobInfo, url string) (*scrape.StatisticsSeriesResult, error) 68 | } 69 | 70 | // New create a new Explore 71 | func New(scrapeManager *scrape.Manager, promRegistry prometheus.Registerer, log logrus.FieldLogger) *Explore { 72 | _ = promRegistry.Register(exploredTotal) 73 | _ = promRegistry.Register(exploringTotal) 74 | return &Explore{ 75 | logger: log, 76 | scrapeManager: scrapeManager, 77 | needExplore: make(chan *exploringTarget, 10000), 78 | retryInterval: time.Second * 5, 79 | targets: map[uint64]*exploringTarget{}, 80 | explore: explore, 81 | } 82 | } 83 | 84 | // Get return the target scrape status of the target by hash 85 | // if target is never explored, it will be send to explore 86 | func (e *Explore) Get(hash uint64) *target.ScrapeStatus { 87 | e.targetsLock.Lock() 88 | defer e.targetsLock.Unlock() 89 | 90 | r := e.targets[hash] 91 | if r == nil { 92 | return nil 93 | } 94 | 95 | if !r.exploring { 96 | r.exploring = true 97 | e.needExplore <- r 98 | } 99 | 100 | return r.rt 101 | } 102 | 103 | // ApplyConfig delete invalid job's targets according to config 104 | // the new targets will be add by UpdateTargets 105 | func (e *Explore) ApplyConfig(cfg *prom.ConfigInfo) error { 106 | jobs := make([]string, 0, len(cfg.Config.ScrapeConfigs)) 107 | for _, j := range cfg.Config.ScrapeConfigs { 108 | jobs = append(jobs, j.JobName) 109 | } 110 | 111 | e.targetsLock.Lock() 112 | defer e.targetsLock.Unlock() 113 | 114 | newTargets := map[uint64]*exploringTarget{} 115 | deletedJobs := map[string]struct{}{} 116 | for hash, v := range e.targets { 117 | if types.FindString(v.job, jobs...) { 118 | newTargets[hash] = v 119 | } else { 120 | deletedJobs[v.job] = struct{}{} 121 | } 122 | } 123 | 124 | for job := range deletedJobs { 125 | exploredTotal.DeleteLabelValues(job, "true") 126 | exploredTotal.DeleteLabelValues(job, "false") 127 | exploringTotal.DeleteLabelValues(job) 128 | } 129 | 130 | e.targets = newTargets 131 | return nil 132 | } 133 | 134 | // UpdateTargets update global target info, if target is new , it will send for exploring 135 | func (e *Explore) UpdateTargets(targets map[string][]*discovery.SDTargets) { 136 | e.targetsLock.Lock() 137 | defer e.targetsLock.Unlock() 138 | 139 | all := map[uint64]*exploringTarget{} 140 | for job, ts := range targets { 141 | for _, t := range ts { 142 | hash := t.ShardTarget.Hash 143 | if e.targets[hash] != nil { 144 | all[hash] = e.targets[hash] 145 | } else { 146 | all[hash] = &exploringTarget{ 147 | job: job, 148 | rt: target.NewScrapeStatus(0, 0), 149 | target: t.ShardTarget, 150 | } 151 | } 152 | } 153 | } 154 | e.targets = all 155 | } 156 | 157 | // Run start Explore exploring engine 158 | // every Target will be explore MaxExploreTime times 159 | // "con" is the max worker goroutines 160 | func (e *Explore) Run(ctx context.Context, con int) error { 161 | var g errgroup.Group 162 | for i := 0; i < con; i++ { 163 | g.Go(func() error { 164 | for { 165 | select { 166 | case <-ctx.Done(): 167 | return nil 168 | case temp := <-e.needExplore: 169 | if temp == nil { 170 | continue 171 | } 172 | tar := temp 173 | hash := tar.target.Hash 174 | err := e.exploreOnce(ctx, tar) 175 | if err != nil { 176 | go func() { 177 | time.Sleep(e.retryInterval) 178 | e.targetsLock.Lock() 179 | defer e.targetsLock.Unlock() 180 | if e.targets[hash] != nil { 181 | e.needExplore <- tar 182 | } 183 | }() 184 | } 185 | } 186 | } 187 | }) 188 | } 189 | 190 | return g.Wait() 191 | } 192 | 193 | func (e *Explore) exploreOnce(ctx context.Context, t *exploringTarget) (err error) { 194 | defer t.rt.SetScrapeErr(time.Now(), err) 195 | exploringTotal.WithLabelValues(t.job).Inc() 196 | defer func() { 197 | exploringTotal.WithLabelValues(t.job).Dec() 198 | exploredTotal.WithLabelValues(t.job, fmt.Sprint(err == nil)).Inc() 199 | }() 200 | 201 | info := e.scrapeManager.GetJob(t.job) 202 | if info == nil { 203 | return fmt.Errorf("can not found %s scrape info", t.job) 204 | } 205 | 206 | url := t.target.URL(info.Config).String() 207 | result, err := e.explore(e.logger, info, url) 208 | if err != nil { 209 | return errors.Wrapf(err, "explore failed : %s/%s", t.job, url) 210 | } 211 | 212 | t.rt.UpdateScrapeResult(result) 213 | t.target.Series = int64(result.ScrapedTotal) 214 | t.target.TotalSeries = int64(result.Total) 215 | t.rt.LastScrapeStatistics = result 216 | 217 | return nil 218 | } 219 | 220 | func explore(log logrus.FieldLogger, scrapeInfo *scrape.JobInfo, url string) (*scrape.StatisticsSeriesResult, error) { 221 | scraper := scrape.NewScraper(scrapeInfo, url, log) 222 | if err := scraper.RequestTo(); err != nil { 223 | return nil, errors.Wrap(err, "request to ") 224 | } 225 | 226 | r := scrape.NewStatisticsSeriesResult() 227 | return r, scraper.ParseResponse(func(rows []parser.Row) error { 228 | scrape.StatisticSeries(rows, scrapeInfo.Config.MetricRelabelConfigs, r) 229 | return nil 230 | }) 231 | } 232 | -------------------------------------------------------------------------------- /pkg/explore/explore_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package explore 19 | 20 | import ( 21 | "context" 22 | "net/http" 23 | "net/http/httptest" 24 | "net/url" 25 | "testing" 26 | "time" 27 | 28 | "github.com/prometheus/client_golang/prometheus" 29 | "github.com/prometheus/common/model" 30 | "github.com/prometheus/prometheus/config" 31 | "github.com/prometheus/prometheus/model/labels" 32 | scrape2 "github.com/prometheus/prometheus/scrape" 33 | "github.com/sirupsen/logrus" 34 | "github.com/stretchr/testify/require" 35 | "tkestack.io/kvass/pkg/discovery" 36 | "tkestack.io/kvass/pkg/prom" 37 | "tkestack.io/kvass/pkg/scrape" 38 | "tkestack.io/kvass/pkg/target" 39 | ) 40 | 41 | func TestExplore_UpdateTargets(t *testing.T) { 42 | e := New(scrape.New(true, logrus.New()), prometheus.NewRegistry(), logrus.New()) 43 | require.Nil(t, e.Get(1)) 44 | e.UpdateTargets(map[string][]*discovery.SDTargets{ 45 | "job1": {&discovery.SDTargets{ 46 | ShardTarget: &target.Target{ 47 | Hash: 1, 48 | Series: 100, 49 | }, 50 | }}, 51 | }) 52 | r := e.Get(1) 53 | require.NotNil(t, r) 54 | require.True(t, e.targets[1].exploring) 55 | } 56 | 57 | func TestExplore_Run(t *testing.T) { 58 | r := require.New(t) 59 | sm := scrape.New(true, logrus.New()) 60 | r.NoError(sm.ApplyConfig(&prom.ConfigInfo{ 61 | RawContent: nil, 62 | ConfigHash: "", 63 | Config: &config.Config{ 64 | ScrapeConfigs: []*config.ScrapeConfig{ 65 | { 66 | JobName: "job1", 67 | ScrapeTimeout: model.Duration(time.Second * 3), 68 | }, 69 | }, 70 | }, 71 | })) 72 | 73 | e := New(sm, prometheus.NewRegistry(), logrus.New()) 74 | e.retryInterval = time.Millisecond * 10 75 | ctx, cancel := context.WithCancel(context.Background()) 76 | defer cancel() 77 | go func() { 78 | r.NoError(e.Run(ctx, 1)) 79 | }() 80 | 81 | data := `` 82 | hts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 83 | // must failed first time 84 | if data == "" { 85 | data = `metrics{} 1` 86 | w.WriteHeader(502) 87 | return 88 | } 89 | _, _ = w.Write([]byte(data)) 90 | })) 91 | defer hts.Close() 92 | 93 | targetURL, err := url.Parse(hts.URL) 94 | r.NoError(err) 95 | 96 | e.UpdateTargets(map[string][]*discovery.SDTargets{ 97 | "job1": {&discovery.SDTargets{ 98 | ShardTarget: &target.Target{ 99 | Hash: 1, 100 | Series: 100, 101 | Labels: labels.Labels{ 102 | { 103 | Name: model.SchemeLabel, 104 | Value: targetURL.Scheme, 105 | }, 106 | { 107 | Name: model.AddressLabel, 108 | Value: targetURL.Host, 109 | }, 110 | }, 111 | }, 112 | }}, 113 | }) 114 | 115 | res := e.Get(1) 116 | r.Equal(scrape2.HealthUnknown, res.Health) 117 | time.Sleep(time.Second) 118 | res = e.Get(1) 119 | r.NotNil(res) 120 | r.Equal(scrape2.HealthGood, res.Health) 121 | r.Equal(int64(1), res.Series) 122 | r.Equal("", res.LastError) 123 | } 124 | 125 | func TestExplore_ApplyConfig(t *testing.T) { 126 | r := require.New(t) 127 | e := New(scrape.New(false, logrus.New()), prometheus.NewRegistry(), logrus.New()) 128 | e.UpdateTargets(map[string][]*discovery.SDTargets{ 129 | "job1": {&discovery.SDTargets{ 130 | ShardTarget: &target.Target{ 131 | Hash: 1, 132 | Series: 100, 133 | }, 134 | }}, 135 | }) 136 | r.NoError(e.ApplyConfig(&prom.ConfigInfo{Config: &config.Config{}})) 137 | require.Nil(t, e.Get(1)) 138 | } 139 | -------------------------------------------------------------------------------- /pkg/prom/client.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package prom 19 | 20 | import ( 21 | "tkestack.io/kvass/pkg/api" 22 | 23 | v1 "github.com/prometheus/prometheus/web/api/v1" 24 | ) 25 | 26 | // Client is a client to do prometheus API request 27 | type Client struct { 28 | url string 29 | } 30 | 31 | // NewClient return an cli with url 32 | func NewClient(url string) *Client { 33 | return &Client{ 34 | url: url, 35 | } 36 | } 37 | 38 | // TSDBInfo return the current head status of this shard 39 | func (c *Client) TSDBInfo() (*TSDBInfo, error) { 40 | ret := &TSDBInfo{} 41 | return ret, api.Get(c.url+"/api/v1/status/tsdb", ret) 42 | } 43 | 44 | // Targets is compatible with prometheusURL /api/v1/targets 45 | // the origin prometheusURL's Config is injected, so the targets it report must be adjusted by cli sidecar 46 | func (c *Client) Targets(state string) (*v1.TargetDiscovery, error) { 47 | url := c.url + "/api/v1/targets" 48 | if state != "" { 49 | url += "?state=" + state 50 | } 51 | ret := &v1.TargetDiscovery{} 52 | return ret, api.Get(url, ret) 53 | } 54 | 55 | // ConfigReload do Config reloading 56 | func (c *Client) ConfigReload() error { 57 | url := c.url + "/-/reload" 58 | return api.Post(url, nil, nil) 59 | } 60 | -------------------------------------------------------------------------------- /pkg/prom/client_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package prom 19 | 20 | import ( 21 | "net/http" 22 | "net/http/httptest" 23 | "testing" 24 | 25 | "github.com/stretchr/testify/require" 26 | ) 27 | 28 | func dataServer(data string) *httptest.Server { 29 | return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 30 | w.Write([]byte(data)) 31 | })) 32 | } 33 | 34 | func TestClient_TSDBInfo(t *testing.T) { 35 | w := dataServer(`{ 36 | "status": "success", 37 | "data": { 38 | "headStats": { 39 | "numSeries": 508 40 | } 41 | } 42 | }`) 43 | defer w.Close() 44 | c := NewClient(w.URL) 45 | r, err := c.TSDBInfo() 46 | require.NoError(t, err) 47 | require.Equal(t, int64(508), r.HeadStats.NumSeries) 48 | } 49 | 50 | func TestClient_ConfigReload(t *testing.T) { 51 | w := dataServer(``) 52 | defer w.Close() 53 | c := NewClient(w.URL) 54 | err := c.ConfigReload() 55 | require.NoError(t, err) 56 | } 57 | 58 | func TestClient_Targets(t *testing.T) { 59 | w := dataServer(`{ 60 | "status": "success", 61 | "data": { 62 | "activeTargets": [ 63 | { 64 | "discoveredLabels": { 65 | "__address__": "127.0.0.1:9090", 66 | "__metrics_path__": "/metrics", 67 | "__scheme__": "http", 68 | "job": "prometheus" 69 | }, 70 | "labels": { 71 | "instance": "127.0.0.1:9090", 72 | "job": "prometheus" 73 | }, 74 | "scrapePool": "prometheus", 75 | "scrapeUrl": "http://127.0.0.1:9090/metrics", 76 | "lastError": "", 77 | "lastScrape": "2017-01-17T15:07:44.723715405+01:00", 78 | "lastScrapeDuration": 0.050688943, 79 | "health": "up" 80 | } 81 | ], 82 | "droppedTargets": [ 83 | { 84 | "discoveredLabels": { 85 | "__address__": "127.0.0.1:9100", 86 | "__metrics_path__": "/metrics", 87 | "__scheme__": "http", 88 | "job": "node" 89 | } 90 | } 91 | ] 92 | } 93 | }`) 94 | defer w.Close() 95 | c := NewClient(w.URL) 96 | tar, err := c.Targets("all") 97 | require.NoError(t, err) 98 | require.Equal(t, 1, len(tar.ActiveTargets)) 99 | require.Equal(t, 1, len(tar.DroppedTargets)) 100 | } 101 | -------------------------------------------------------------------------------- /pkg/prom/config.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package prom 19 | 20 | import ( 21 | "fmt" 22 | "io/ioutil" 23 | 24 | "github.com/go-kit/log" 25 | "github.com/mitchellh/hashstructure/v2" 26 | "github.com/pkg/errors" 27 | "github.com/prometheus/prometheus/config" 28 | "github.com/prometheus/prometheus/model/labels" 29 | ) 30 | 31 | const ( 32 | defaultConfig = ` 33 | global: 34 | external_labels: 35 | status: default 36 | ` 37 | ) 38 | 39 | // ExtraConfig is config about kvass it self , not Prometheus config 40 | type ExtraConfig struct { 41 | // StopScrapeReason ,if not empty, all scrape will failed 42 | StopScrapeReason string `json:"stopScrapeReason"` 43 | } 44 | 45 | // EQ return true if all ExtraConfig fields is eq 46 | func (c *ExtraConfig) EQ(e *ExtraConfig) bool { 47 | return c.StopScrapeReason == e.StopScrapeReason 48 | } 49 | 50 | // ConfigInfo include all information of current config 51 | type ConfigInfo struct { 52 | // RawContent is the content of config file 53 | RawContent []byte 54 | // ConfigHash is a md5 of config file content 55 | ConfigHash string 56 | // Config is the marshaled prometheus config 57 | Config *config.Config 58 | // ExtraConfig contains Config not in origin Prometheus define 59 | ExtraConfig *ExtraConfig 60 | } 61 | 62 | // DefaultConfig init a ConfigInfo with default prometheus config 63 | var DefaultConfig = &ConfigInfo{ 64 | RawContent: []byte(defaultConfig), 65 | ConfigHash: "", 66 | Config: &config.DefaultConfig, 67 | ExtraConfig: &ExtraConfig{}, 68 | } 69 | 70 | // ConfigManager do config manager 71 | type ConfigManager struct { 72 | callbacks []func(cfg *ConfigInfo) error 73 | currentConfig *ConfigInfo 74 | } 75 | 76 | // NewConfigManager return an config manager 77 | func NewConfigManager() *ConfigManager { 78 | return &ConfigManager{ 79 | currentConfig: &ConfigInfo{ 80 | RawContent: []byte(defaultConfig), 81 | ConfigHash: "", 82 | Config: &config.DefaultConfig, 83 | ExtraConfig: &ExtraConfig{}, 84 | }, 85 | } 86 | } 87 | 88 | // ReloadFromFile reload config from file and do all callbacks 89 | func (c *ConfigManager) ReloadFromFile(file string) error { 90 | data, err := ioutil.ReadFile(file) 91 | if err != nil { 92 | return err 93 | } 94 | return c.ReloadFromRaw(data) 95 | } 96 | 97 | // ReloadFromRaw reload config from raw data 98 | func (c *ConfigManager) ReloadFromRaw(data []byte) (err error) { 99 | info := &ConfigInfo{ 100 | ExtraConfig: c.currentConfig.ExtraConfig, 101 | } 102 | info.RawContent = data 103 | if len(info.RawContent) == 0 { 104 | return errors.New("config content is empty") 105 | } 106 | 107 | info.Config, err = config.Load(string(data), true, log.NewNopLogger()) 108 | if err != nil { 109 | return errors.Wrapf(err, "marshal config") 110 | } 111 | 112 | // config hash don't include external labels 113 | eLb := info.Config.GlobalConfig.ExternalLabels 114 | info.Config.GlobalConfig.ExternalLabels = []labels.Label{} 115 | hash, err := hashstructure.Hash(info.Config, hashstructure.FormatV2, nil) 116 | if err != nil { 117 | return errors.Wrapf(err, "get config hash") 118 | } 119 | 120 | info.ConfigHash = fmt.Sprint(hash) 121 | info.Config.GlobalConfig.ExternalLabels = eLb 122 | c.currentConfig = info 123 | 124 | for _, f := range c.callbacks { 125 | if err := f(c.currentConfig); err != nil { 126 | return err 127 | } 128 | } 129 | 130 | return nil 131 | } 132 | 133 | // UpdateExtraConfig set new extra config 134 | func (c *ConfigManager) UpdateExtraConfig(cfg ExtraConfig) error { 135 | if c.currentConfig.ExtraConfig.EQ(&cfg) { 136 | return nil 137 | } 138 | 139 | c.currentConfig.ExtraConfig = &cfg 140 | for _, f := range c.callbacks { 141 | if err := f(c.currentConfig); err != nil { 142 | return err 143 | } 144 | } 145 | return nil 146 | } 147 | 148 | // ConfigInfo return current config info 149 | func (c *ConfigManager) ConfigInfo() *ConfigInfo { 150 | return c.currentConfig 151 | } 152 | 153 | // AddReloadCallbacks add callbacks of config reload event 154 | func (c *ConfigManager) AddReloadCallbacks(f ...func(c *ConfigInfo) error) { 155 | c.callbacks = append(c.callbacks, f...) 156 | } 157 | -------------------------------------------------------------------------------- /pkg/prom/config_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package prom 19 | 20 | import ( 21 | "io/ioutil" 22 | "path" 23 | "testing" 24 | 25 | "github.com/stretchr/testify/require" 26 | ) 27 | 28 | func TestConfigManager_ReloadFromFile(t *testing.T) { 29 | type caseInfo struct { 30 | fileExist bool 31 | content string 32 | wantErr bool 33 | wantCallBack bool 34 | } 35 | 36 | successCase := func() *caseInfo { 37 | return &caseInfo{ 38 | fileExist: true, 39 | content: ` 40 | global: 41 | evaluation_interval: 10s 42 | scrape_interval: 15s 43 | external_labels: 44 | replica: pod1 45 | scrape_configs: 46 | - job_name: "test" 47 | static_configs: 48 | - targets: 49 | - 127.0.0.1:9091 50 | `, 51 | wantErr: false, 52 | wantCallBack: true, 53 | } 54 | } 55 | 56 | var cases = []struct { 57 | desc string 58 | updateCase func(c *caseInfo) 59 | }{ 60 | { 61 | desc: "success", 62 | updateCase: func(c *caseInfo) {}, 63 | }, 64 | { 65 | desc: "file not exit, want err ", 66 | updateCase: func(c *caseInfo) { 67 | c.fileExist = false 68 | c.wantErr = true 69 | }, 70 | }, 71 | { 72 | desc: "empty content, want err", 73 | updateCase: func(c *caseInfo) { 74 | c.content = "" 75 | c.wantErr = true 76 | }, 77 | }, 78 | { 79 | desc: "wrong content format, want err", 80 | updateCase: func(c *caseInfo) { 81 | c.content = "a : a : a" 82 | c.wantErr = true 83 | }, 84 | }, 85 | } 86 | 87 | for _, cs := range cases { 88 | t.Run(cs.desc, func(t *testing.T) { 89 | r := require.New(t) 90 | c := successCase() 91 | cs.updateCase(c) 92 | file := path.Join(t.TempDir(), "a") 93 | if c.fileExist { 94 | _ = ioutil.WriteFile(file, []byte(c.content), 0777) 95 | } 96 | 97 | m := NewConfigManager() 98 | updated := false 99 | m.AddReloadCallbacks(func(cfg *ConfigInfo) error { 100 | updated = true 101 | r.Equal(string(cfg.RawContent), c.content) 102 | r.Equal("16727296455050936695", cfg.ConfigHash) 103 | r.Equal(1, len(cfg.Config.ScrapeConfigs)) 104 | return nil 105 | }) 106 | 107 | err := m.ReloadFromFile(file) 108 | if c.wantErr { 109 | r.Error(err) 110 | return 111 | } 112 | r.Equal(c.wantCallBack, updated) 113 | }) 114 | } 115 | } 116 | 117 | func TestConfigManager_UpdateExtraConfig(t *testing.T) { 118 | m := NewConfigManager() 119 | updated := false 120 | m.AddReloadCallbacks(func(c *ConfigInfo) error { 121 | updated = true 122 | require.Equal(t, "test", c.ExtraConfig.StopScrapeReason) 123 | return nil 124 | }) 125 | 126 | m.UpdateExtraConfig(ExtraConfig{ 127 | StopScrapeReason: "", // not change 128 | }) 129 | require.False(t, updated) 130 | 131 | m.UpdateExtraConfig(ExtraConfig{ 132 | StopScrapeReason: "test", // not change 133 | }) 134 | require.True(t, updated) 135 | } 136 | -------------------------------------------------------------------------------- /pkg/prom/data.go: -------------------------------------------------------------------------------- 1 | package prom 2 | 3 | // RuntimeInfo include some filed the prometheus API /api/v1/runtimeinfo returned 4 | type RuntimeInfo struct { 5 | // TimeSeriesCount is the series the prometheus head check handled 6 | TimeSeriesCount int64 `json:"timeSeriesCount"` 7 | } 8 | 9 | // TSDBInfo include some filed the prometheus API /api/v1/status/tsdb returned 10 | type TSDBInfo struct { 11 | // HeadStats include information of current head block 12 | HeadStats struct { 13 | // NumSeries is current series in head block (in memory) 14 | NumSeries int64 `json:"numSeries"` 15 | } `json:"headStats"` 16 | } 17 | -------------------------------------------------------------------------------- /pkg/scrape/jobinfo.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package scrape 19 | 20 | import ( 21 | "fmt" 22 | "net/http" 23 | "net/url" 24 | "os" 25 | 26 | "github.com/pkg/errors" 27 | "go.etcd.io/etcd/version" 28 | 29 | config_util "github.com/prometheus/common/config" 30 | "github.com/prometheus/prometheus/config" 31 | ) 32 | 33 | const acceptHeader = `application/openmetrics-text; version=0.0.1,text/plain;version=0.0.4;q=0.5,*/*;q=0.1` 34 | 35 | var userAgentHeader = fmt.Sprintf("prometheusURL/%s", version.Version) 36 | 37 | // JobInfo contains http client for scraping target, and the origin scrape config 38 | type JobInfo struct { 39 | // Config is the origin scrape config in config file 40 | Config *config.ScrapeConfig 41 | // Cli is the http.Cli for scraping 42 | // all scraping request will be proxy to env SCRAPE_PROXY if it is not empty 43 | Cli *http.Client 44 | // proxyURL save old proxyURL set in ScrapeConfig if env SCRAPE_PROXY is not empty 45 | // proxyURL will be saved in head "Origin-Proxy" when scrape request is send 46 | proxyURL *url.URL 47 | } 48 | 49 | func newJobInfo(cfg config.ScrapeConfig, keeAliveDisable bool) (*JobInfo, error) { 50 | proxy := os.Getenv("SCRAPE_PROXY") 51 | oldProxy := cfg.HTTPClientConfig.ProxyURL 52 | if proxy != "" { 53 | newURL, err := url.Parse(proxy) 54 | if err != nil { 55 | return nil, errors.Wrapf(err, "proxy parse failed") 56 | } 57 | cfg.HTTPClientConfig.ProxyURL.URL = newURL 58 | } 59 | 60 | option := make([]config_util.HTTPClientOption, 0) 61 | if keeAliveDisable { 62 | option = append(option, config_util.WithKeepAlivesDisabled()) 63 | } 64 | 65 | client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, cfg.JobName, option...) 66 | if err != nil { 67 | return nil, errors.Wrap(err, "error creating HTTP Cli") 68 | } 69 | 70 | return &JobInfo{ 71 | Cli: client, 72 | Config: &cfg, 73 | proxyURL: oldProxy.URL, 74 | }, nil 75 | } 76 | -------------------------------------------------------------------------------- /pkg/scrape/manager.go: -------------------------------------------------------------------------------- 1 | package scrape 2 | 3 | import ( 4 | "github.com/sirupsen/logrus" 5 | "tkestack.io/kvass/pkg/prom" 6 | ) 7 | 8 | // Manager includes all jobs 9 | type Manager struct { 10 | jobs map[string]*JobInfo 11 | keeAliveDisable bool 12 | lg logrus.FieldLogger 13 | } 14 | 15 | // New create a Manager with specified Cli set 16 | func New(keeAliveDisable bool, lg logrus.FieldLogger) *Manager { 17 | return &Manager{ 18 | lg: lg, 19 | keeAliveDisable: keeAliveDisable, 20 | jobs: map[string]*JobInfo{}, 21 | } 22 | } 23 | 24 | // ApplyConfig update Manager from config 25 | func (s *Manager) ApplyConfig(cfg *prom.ConfigInfo) error { 26 | ret := map[string]*JobInfo{} 27 | for _, cfg := range cfg.Config.ScrapeConfigs { 28 | info, err := newJobInfo(*cfg, s.keeAliveDisable) 29 | if err != nil { 30 | s.lg.Error(err.Error()) 31 | continue 32 | } 33 | ret[cfg.JobName] = info 34 | } 35 | s.jobs = ret 36 | return nil 37 | } 38 | 39 | // GetJob search job by job name, nil will be return if job not exist 40 | func (s *Manager) GetJob(job string) *JobInfo { 41 | return s.jobs[job] 42 | } 43 | -------------------------------------------------------------------------------- /pkg/scrape/manager_test.go: -------------------------------------------------------------------------------- 1 | package scrape 2 | 3 | import ( 4 | "net/url" 5 | "os" 6 | "testing" 7 | "time" 8 | 9 | config_util "github.com/prometheus/common/config" 10 | "github.com/prometheus/common/model" 11 | "github.com/prometheus/prometheus/config" 12 | "github.com/sirupsen/logrus" 13 | "github.com/stretchr/testify/require" 14 | "tkestack.io/kvass/pkg/prom" 15 | ) 16 | 17 | func TestManager(t *testing.T) { 18 | r := require.New(t) 19 | cfg := &config.ScrapeConfig{ 20 | JobName: "test", 21 | } 22 | u, _ := url.Parse("http://127.0.0.1:8008") 23 | cfg.HTTPClientConfig.ProxyURL = config_util.URL{URL: u} 24 | cfg.ScrapeTimeout = model.Duration(time.Second) 25 | r.NoError(os.Setenv("SCRAPE_PROXY", "http://127.0.0.1:9090")) 26 | defer os.Unsetenv("SCRAPE_PROXY") 27 | 28 | ss := New(false, logrus.New()) 29 | r.NoError(ss.ApplyConfig(&prom.ConfigInfo{ 30 | Config: &config.Config{ 31 | ScrapeConfigs: []*config.ScrapeConfig{cfg}, 32 | }, 33 | })) 34 | s := ss.GetJob(cfg.JobName) 35 | r.NotNil(s) 36 | 37 | r.Equal(u.String(), s.proxyURL.String()) 38 | } 39 | -------------------------------------------------------------------------------- /pkg/scrape/reader.go: -------------------------------------------------------------------------------- 1 | package scrape 2 | 3 | import ( 4 | "io" 5 | ) 6 | 7 | func wrapReader(reader io.ReadCloser, writer ...io.Writer) *wrappedReader { 8 | return &wrappedReader{ 9 | reader: reader, 10 | writer: writer, 11 | } 12 | } 13 | 14 | // wrappedReader copy data to writer when Read is called 15 | type wrappedReader struct { 16 | reader io.ReadCloser 17 | writer []io.Writer 18 | } 19 | 20 | // Read implement io.Reader 21 | func (w *wrappedReader) Read(p []byte) (n int, err error) { 22 | n, rerr := w.reader.Read(p) 23 | for _, w := range w.writer { 24 | wTotal := 0 25 | for wTotal < n { 26 | wn, werr := w.Write(p[wTotal:n]) 27 | if werr != nil { 28 | return n, werr 29 | } 30 | wTotal += wn 31 | } 32 | } 33 | return n, rerr 34 | } 35 | 36 | // Close implement io.Closer 37 | func (w *wrappedReader) Close() error { 38 | return w.reader.Close() 39 | } 40 | -------------------------------------------------------------------------------- /pkg/scrape/reader_test.go: -------------------------------------------------------------------------------- 1 | package scrape 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "testing" 7 | ) 8 | 9 | func TestWrapReader(t *testing.T) { 10 | data := []byte("hello") 11 | p := []byte("he") 12 | buf := bytes.NewBuffer([]byte{}) 13 | r := wrapReader(io.NopCloser(bytes.NewReader(data)), buf) 14 | for { 15 | _, err := r.Read(p) 16 | if err == io.EOF { 17 | break 18 | } 19 | } 20 | if string(data) != buf.String() { 21 | t.Fatalf("[%s]\n", buf.String()) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /pkg/scrape/scraper.go: -------------------------------------------------------------------------------- 1 | package scrape 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | "sync" 8 | 9 | "github.com/klauspost/compress/gzip" 10 | "tkestack.io/kvass/pkg/utils/types" 11 | 12 | "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common" 13 | "github.com/pkg/errors" 14 | "github.com/prometheus/prometheus/model/labels" 15 | "github.com/prometheus/prometheus/model/relabel" 16 | "github.com/sirupsen/logrus" 17 | 18 | "net/http" 19 | "time" 20 | 21 | parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus" 22 | ) 23 | 24 | // Scraper do one scraping 25 | // must call RequestTo befor ParseResponse 26 | type Scraper struct { 27 | job *JobInfo 28 | url string 29 | writer []io.Writer 30 | log logrus.FieldLogger 31 | // HTTPResponse save the http response when RequestTo is called 32 | HTTPResponse *http.Response 33 | gZipReader *gzip.Reader 34 | reader io.ReadCloser 35 | ctxCancel func() 36 | } 37 | 38 | // NewScraper create a new Scraper 39 | func NewScraper(job *JobInfo, url string, log logrus.FieldLogger) *Scraper { 40 | return &Scraper{ 41 | job: job, 42 | url: url, 43 | log: log, 44 | } 45 | } 46 | 47 | // WithRawWriter add writers 48 | // data will be copy to writers when ParseResponse is processing 49 | // gziped data will be decoded before write to writer 50 | func (s *Scraper) WithRawWriter(w ...io.Writer) { 51 | s.writer = append(s.writer, w...) 52 | } 53 | 54 | // RequestTo do http request to target 55 | // response will be saved to s.HTTPResponse 56 | // ParseResponse must be called if RequestTo return nil error 57 | func (s *Scraper) RequestTo() error { 58 | req, err := http.NewRequest("GET", s.url, nil) 59 | if err != nil { 60 | return errors.Wrap(err, "new request") 61 | } 62 | req.Header.Add("Accept", acceptHeader) 63 | req.Header.Add("Accept-Encoding", "gzip") 64 | req.Header.Set("User-Agent", userAgentHeader) 65 | req.Header.Set("X-prometheusURL-Cli-Timeout-Seconds", fmt.Sprintf("%f", time.Duration(s.job.Config.ScrapeTimeout).Seconds())) 66 | if s.job.proxyURL != nil { 67 | req.Header.Set("Origin-Proxy", s.job.proxyURL.String()) 68 | } 69 | 70 | ctx, cancel := context.WithTimeout(context.Background(), time.Duration(s.job.Config.ScrapeTimeout)) 71 | s.ctxCancel = cancel 72 | 73 | s.HTTPResponse, err = s.job.Cli.Do(req.WithContext(ctx)) 74 | if err != nil { 75 | return errors.Wrap(err, "do http") 76 | } 77 | 78 | if s.HTTPResponse.StatusCode != http.StatusOK { 79 | return errors.Errorf("server returned HTTP status %s", s.HTTPResponse.Status) 80 | } 81 | 82 | s.reader = s.HTTPResponse.Body 83 | if s.HTTPResponse.Header.Get("Content-Encoding") == "gzip" { 84 | s.gZipReader, err = common.GetGzipReader(s.HTTPResponse.Body) 85 | if err != nil { 86 | return fmt.Errorf("cannot read gzipped lines with Prometheus exposition format: %w", err) 87 | } 88 | s.reader = s.gZipReader 89 | } 90 | 91 | s.reader = wrapReader(s.reader, s.writer...) 92 | return nil 93 | } 94 | 95 | // ParseResponse parse metrics 96 | // RequestTo must be called before ParseResponse 97 | func (s *Scraper) ParseResponse(do func(rows []parser.Row) error) error { 98 | defer func() { 99 | s.ctxCancel() 100 | if s.gZipReader != nil { 101 | common.PutGzipReader(s.gZipReader) 102 | } 103 | }() 104 | 105 | return parser.ParseStream(s.reader, time.Now().UnixNano()/1e6, 106 | false, 107 | do, func(str string) { 108 | s.log.Print(str) 109 | }) 110 | } 111 | 112 | // StatisticsSeriesResult is the samples count in one scrape 113 | type StatisticsSeriesResult struct { 114 | lk sync.Mutex `json:"-"` 115 | // ScrapedTotal is samples number total after relabel 116 | ScrapedTotal float64 `json:"scrapedTotal"` 117 | // Total is total samples appeared in this scape 118 | Total float64 `json:"total"` 119 | // MetricsTotal is samples number info about all metrics 120 | MetricsTotal map[string]*MetricSamplesInfo `json:"metricsTotal"` 121 | } 122 | 123 | // NewStatisticsSeriesResult return an empty StatisticsSeriesResult 124 | func NewStatisticsSeriesResult() *StatisticsSeriesResult { 125 | return &StatisticsSeriesResult{ 126 | MetricsTotal: map[string]*MetricSamplesInfo{}, 127 | } 128 | } 129 | 130 | // MetricSamplesInfo statistics sample about one metric 131 | type MetricSamplesInfo struct { 132 | // Total is total samples appeared in this scape 133 | Total float64 `json:"total"` 134 | // Scraped is samples number after relabel 135 | Scraped float64 `json:"scraped"` 136 | } 137 | 138 | // StatisticSeries statistic load from metrics raw data 139 | func StatisticSeries(rows []parser.Row, rc []*relabel.Config, result *StatisticsSeriesResult) { 140 | result.lk.Lock() 141 | defer result.lk.Unlock() 142 | 143 | for _, row := range rows { 144 | var lset labels.Labels 145 | lset = append(lset, labels.Label{ 146 | Name: "__name__", 147 | Value: row.Metric, 148 | }) 149 | 150 | n := types.DeepCopyString(row.Metric) 151 | if result.MetricsTotal[n] == nil { 152 | result.MetricsTotal[n] = &MetricSamplesInfo{} 153 | } 154 | 155 | result.MetricsTotal[n].Total++ 156 | for _, tag := range row.Tags { 157 | lset = append(lset, labels.Label{ 158 | Name: tag.Key, 159 | Value: tag.Value, 160 | }) 161 | } 162 | 163 | result.Total++ 164 | if newSets := relabel.Process(lset, rc...); newSets != nil { 165 | result.ScrapedTotal++ 166 | result.MetricsTotal[n].Scraped++ 167 | } 168 | } 169 | } 170 | 171 | func init() { 172 | common.StartUnmarshalWorkers() 173 | } 174 | -------------------------------------------------------------------------------- /pkg/scrape/scraper_test.go: -------------------------------------------------------------------------------- 1 | package scrape 2 | 3 | import ( 4 | "bytes" 5 | "compress/gzip" 6 | "fmt" 7 | "net/http" 8 | "net/http/httptest" 9 | "net/url" 10 | "testing" 11 | "time" 12 | 13 | "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus" 14 | "github.com/prometheus/common/model" 15 | "github.com/prometheus/prometheus/config" 16 | "github.com/prometheus/prometheus/model/relabel" 17 | "github.com/sirupsen/logrus" 18 | "github.com/stretchr/testify/require" 19 | ) 20 | 21 | func TestStatisticSample(t *testing.T) { 22 | r := NewStatisticsSeriesResult() 23 | StatisticSeries([]prometheus.Row{ 24 | { 25 | Metric: "a", 26 | Tags: []prometheus.Tag{ 27 | { 28 | Key: "tk", 29 | Value: "tv", 30 | }, 31 | }, 32 | }, 33 | { 34 | Metric: "b", 35 | Tags: []prometheus.Tag{ 36 | { 37 | Key: "tk1", 38 | Value: "tv1", 39 | }, 40 | }, 41 | }, 42 | }, []*relabel.Config{ 43 | { 44 | SourceLabels: []model.LabelName{"tk"}, 45 | Regex: relabel.MustNewRegexp("tv"), 46 | Action: relabel.Drop, 47 | }, 48 | }, r) 49 | require.Equal(t, r, &StatisticsSeriesResult{ 50 | ScrapedTotal: 1, 51 | MetricsTotal: map[string]*MetricSamplesInfo{ 52 | "a": { 53 | Total: 1, 54 | Scraped: 0, 55 | }, 56 | "b": { 57 | Total: 1, 58 | Scraped: 1, 59 | }, 60 | }, 61 | }) 62 | } 63 | 64 | func gzippedData(raw []byte) []byte { 65 | ret := bytes.NewBuffer([]byte{}) 66 | w := gzip.NewWriter(ret) 67 | _, _ = w.Write(raw) 68 | w.Close() 69 | return ret.Bytes() 70 | } 71 | 72 | func TestScraper_ParseResponse(t *testing.T) { 73 | type caseInfo struct { 74 | statusCode int 75 | responseHeader http.Header 76 | responseData []byte 77 | scraperJob *JobInfo 78 | parseReponseDo func(rows []prometheus.Row) error 79 | 80 | wantRequestHeader http.Header 81 | wantRequestToErr bool 82 | wantResponseErr bool 83 | } 84 | 85 | getJob := func() *JobInfo { 86 | job, err := newJobInfo(config.ScrapeConfig{ 87 | JobName: "test", 88 | ScrapeTimeout: model.Duration(time.Second), 89 | }, false) 90 | if err != nil { 91 | require.Fail(t, err.Error()) 92 | } 93 | return job 94 | } 95 | 96 | success := func() *caseInfo { 97 | return &caseInfo{ 98 | statusCode: 200, 99 | responseHeader: http.Header{}, 100 | responseData: []byte("metrics{} 0"), 101 | scraperJob: getJob(), 102 | wantRequestHeader: http.Header{ 103 | "Accept": []string{acceptHeader}, 104 | "Accept-Encoding": []string{"gzip"}, // must support gzip data 105 | }, 106 | parseReponseDo: func(rows []prometheus.Row) error { 107 | if len(rows) != 1 { 108 | return fmt.Errorf("want len 1") 109 | } 110 | return nil 111 | }, 112 | } 113 | } 114 | 115 | var cases = []struct { 116 | name string 117 | updateCase func(c *caseInfo) 118 | }{ 119 | { 120 | name: "must success", 121 | updateCase: func(c *caseInfo) {}, 122 | }, 123 | { 124 | name: "if proxy url is set in config, set it to Origin-Proxy", 125 | updateCase: func(c *caseInfo) { 126 | url, err := url.ParseRequestURI("http://127.0.0.1") 127 | if err != nil { 128 | require.Fail(t, err.Error()) 129 | } 130 | c.scraperJob.proxyURL = url 131 | c.wantRequestHeader.Set("Origin-Proxy", url.String()) 132 | }, 133 | }, 134 | { 135 | name: "with gziped data", 136 | updateCase: func(c *caseInfo) { 137 | c.responseHeader.Set("Content-Encoding", "gzip") 138 | c.responseData = gzippedData(c.responseData) 139 | }, 140 | }, 141 | { 142 | name: "return status code != 200, must return err", 143 | updateCase: func(c *caseInfo) { 144 | c.statusCode = 400 145 | c.wantRequestToErr = true 146 | }, 147 | }, 148 | { 149 | name: "request timeout, must return err", 150 | updateCase: func(c *caseInfo) { 151 | c.scraperJob.Config.ScrapeTimeout = 0 152 | c.wantRequestToErr = true 153 | }, 154 | }, 155 | { 156 | name: "with worng data, parse response err", 157 | updateCase: func(c *caseInfo) { 158 | c.responseData = []byte("123x x x x") 159 | c.wantResponseErr = true 160 | }, 161 | }, 162 | { 163 | name: "do response deal failed, return err ", 164 | updateCase: func(c *caseInfo) { 165 | c.parseReponseDo = func(rows []prometheus.Row) error { 166 | return fmt.Errorf("test") 167 | } 168 | c.wantResponseErr = true 169 | }, 170 | }, 171 | } 172 | 173 | for _, c := range cases { 174 | t.Run(c.name, func(t *testing.T) { 175 | r := require.New(t) 176 | cs := success() 177 | c.updateCase(cs) 178 | 179 | targetServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 180 | for k := range cs.wantRequestHeader { 181 | if req.Header.Get(k) == "" { 182 | r.Failf("want head", k) 183 | } 184 | } 185 | 186 | for k := range cs.responseHeader { 187 | w.Header().Add(k, cs.responseHeader.Get(k)) 188 | } 189 | 190 | w.WriteHeader(cs.statusCode) 191 | _, err := w.Write(cs.responseData) 192 | r.NoError(err) 193 | })) 194 | defer targetServer.Close() 195 | 196 | s := NewScraper(cs.scraperJob, targetServer.URL, logrus.New()) 197 | err := s.RequestTo() 198 | if err != nil { 199 | t.Log(err.Error()) 200 | } 201 | r.Equal(cs.wantRequestToErr, err != nil) 202 | if cs.wantRequestToErr { 203 | return 204 | } 205 | 206 | err = s.ParseResponse(cs.parseReponseDo) 207 | if err != nil { 208 | t.Log(err.Error()) 209 | } 210 | r.Equal(cs.wantResponseErr, err != nil) 211 | }) 212 | } 213 | } 214 | 215 | func TestScraper_WithRawWriter(t *testing.T) { 216 | s := NewScraper(nil, "", nil) 217 | s.WithRawWriter(bytes.NewBuffer([]byte{})) 218 | require.Equal(t, 1, len(s.writer)) 219 | } 220 | -------------------------------------------------------------------------------- /pkg/shard/kubernetes/replicasmanager.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package kubernetes 19 | 20 | import ( 21 | "context" 22 | "github.com/pkg/errors" 23 | "github.com/sirupsen/logrus" 24 | v1 "k8s.io/api/apps/v1" 25 | v12 "k8s.io/apimachinery/pkg/apis/meta/v1" 26 | "k8s.io/client-go/kubernetes" 27 | "time" 28 | "tkestack.io/kvass/pkg/shard" 29 | ) 30 | 31 | // ReplicasManager select Statefusets to get shard manager 32 | type ReplicasManager struct { 33 | port int 34 | deletePVC bool 35 | stsSelector string 36 | cli kubernetes.Interface 37 | lg logrus.FieldLogger 38 | listStatefulSets func(ctx context.Context, opts v12.ListOptions) (*v1.StatefulSetList, error) 39 | stsUpdatedTime map[string]*time.Time 40 | } 41 | 42 | // NewReplicasManager create a ReplicasManager 43 | func NewReplicasManager( 44 | cli kubernetes.Interface, 45 | stsNamespace string, 46 | stsSelector string, 47 | port int, 48 | deletePVC bool, 49 | lg logrus.FieldLogger, 50 | ) *ReplicasManager { 51 | return &ReplicasManager{ 52 | cli: cli, 53 | port: port, 54 | deletePVC: deletePVC, 55 | lg: lg, 56 | stsSelector: stsSelector, 57 | listStatefulSets: cli.AppsV1().StatefulSets(stsNamespace).List, 58 | stsUpdatedTime: map[string]*time.Time{}, 59 | } 60 | } 61 | 62 | // Replicas return all shards manager 63 | func (g *ReplicasManager) Replicas() ([]shard.Manager, error) { 64 | sts, err := g.listStatefulSets(context.TODO(), v12.ListOptions{ 65 | LabelSelector: g.stsSelector, 66 | }) 67 | if err != nil { 68 | return nil, errors.Wrapf(err, "get statefulset") 69 | } 70 | 71 | ret := make([]shard.Manager, 0) 72 | for _, s := range sts.Items { 73 | if s.Status.Replicas != s.Status.UpdatedReplicas { 74 | g.lg.Warnf("Statefulset %s UpdatedReplicas != Replicas, skipped", s.Name) 75 | g.stsUpdatedTime[s.Name] = nil 76 | continue 77 | } 78 | 79 | if s.Status.ReadyReplicas != s.Status.Replicas && g.stsUpdatedTime[s.Name] == nil { 80 | t := time.Now() 81 | g.lg.Warnf("Statefulset %s is not ready, try wait 2m", s.Name) 82 | g.stsUpdatedTime[s.Name] = &t 83 | } 84 | 85 | t := g.stsUpdatedTime[s.Name] 86 | if s.Status.ReadyReplicas != s.Status.Replicas && time.Now().Sub(*t) < time.Minute*2 { 87 | g.lg.Warnf("Statefulset %s is not ready, still waiting", s.Name) 88 | continue 89 | } 90 | 91 | tempS := s 92 | ret = append(ret, newShardManager(g.cli, &tempS, g.port, g.deletePVC, g.lg.WithField("sts", s.Name))) 93 | } 94 | 95 | return ret, nil 96 | } 97 | -------------------------------------------------------------------------------- /pkg/shard/kubernetes/replicasmanager_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | package kubernetes 18 | 19 | import ( 20 | "context" 21 | "fmt" 22 | "github.com/sirupsen/logrus" 23 | "github.com/stretchr/testify/require" 24 | v1 "k8s.io/api/apps/v1" 25 | v12 "k8s.io/apimachinery/pkg/apis/meta/v1" 26 | "k8s.io/client-go/kubernetes/fake" 27 | "testing" 28 | ) 29 | 30 | func TestReplicasManager_Replicas(t *testing.T) { 31 | type caseInfo struct { 32 | stsNamespace string 33 | stsSelector string 34 | listStatefulSets func(ctx context.Context, opts v12.ListOptions) (*v1.StatefulSetList, error) 35 | sts *v1.StatefulSet 36 | wantShardManager int 37 | wantErr bool 38 | } 39 | 40 | defSts := func() v1.StatefulSet { 41 | s := &v1.StatefulSet{} 42 | s.Name = "s" 43 | s.Namespace = "test" 44 | s.Status.Replicas = 1 45 | s.Status.ReadyReplicas = 1 46 | s.Status.UpdatedReplicas = 1 47 | return *s 48 | } 49 | 50 | successCase := func() *caseInfo { 51 | sts := defSts() 52 | return &caseInfo{ 53 | stsNamespace: "test", 54 | stsSelector: "k8s-app=test", 55 | sts: &sts, 56 | listStatefulSets: func(ctx context.Context, opts v12.ListOptions) (list *v1.StatefulSetList, e error) { 57 | if opts.LabelSelector != "k8s-app=test" { 58 | return nil, fmt.Errorf("wrong label selector") 59 | } 60 | return &v1.StatefulSetList{ 61 | Items: []v1.StatefulSet{sts}, 62 | }, nil 63 | }, 64 | wantShardManager: 1, 65 | } 66 | } 67 | 68 | var cases = []struct { 69 | desc string 70 | updateCase func(c *caseInfo) 71 | }{ 72 | { 73 | desc: "success", 74 | updateCase: func(c *caseInfo) {}, 75 | }, 76 | { 77 | desc: "list shard failed", 78 | updateCase: func(c *caseInfo) { 79 | c.listStatefulSets = func(ctx context.Context, opts v12.ListOptions) (list *v1.StatefulSetList, e error) { 80 | return nil, fmt.Errorf("test") 81 | } 82 | c.wantErr = true 83 | }, 84 | }, 85 | { 86 | desc: "update replicas != replicas, must skip", 87 | updateCase: func(c *caseInfo) { 88 | c.sts.Status.UpdatedReplicas = 0 89 | c.wantShardManager = 0 90 | }, 91 | }, 92 | { 93 | desc: "ready replicas != replicas", 94 | updateCase: func(c *caseInfo) { 95 | c.sts.Status.ReadyReplicas = 0 96 | c.wantShardManager = 0 97 | }, 98 | }, 99 | } 100 | 101 | for _, cs := range cases { 102 | t.Run(cs.desc, func(t *testing.T) { 103 | r := require.New(t) 104 | c := successCase() 105 | cs.updateCase(c) 106 | 107 | m := NewReplicasManager(fake.NewSimpleClientset(), c.stsNamespace, c.stsSelector, 0, true, logrus.New()) 108 | m.listStatefulSets = c.listStatefulSets 109 | 110 | res, err := m.Replicas() 111 | if c.wantErr { 112 | r.Error(err) 113 | return 114 | } 115 | r.Equal(c.wantShardManager, len(res)) 116 | }) 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /pkg/shard/kubernetes/shardmanager.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package kubernetes 19 | 20 | import ( 21 | "context" 22 | "fmt" 23 | "github.com/pkg/errors" 24 | "github.com/sirupsen/logrus" 25 | v13 "k8s.io/api/apps/v1" 26 | v1 "k8s.io/api/core/v1" 27 | k8serr "k8s.io/apimachinery/pkg/api/errors" 28 | v12 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 | "k8s.io/apimachinery/pkg/labels" 30 | "k8s.io/client-go/kubernetes" 31 | "tkestack.io/kvass/pkg/shard" 32 | ) 33 | 34 | // shardManager manager shards use kubernetes shardManager 35 | type shardManager struct { 36 | sts *v13.StatefulSet 37 | // port is the shard client port 38 | port int 39 | deletePVC bool 40 | cli kubernetes.Interface 41 | lg logrus.FieldLogger 42 | getPods func(lb map[string]string) (*v1.PodList, error) 43 | } 44 | 45 | // newShardManager create a new StatefulSet shards manager 46 | func newShardManager(cli kubernetes.Interface, 47 | sts *v13.StatefulSet, 48 | port int, 49 | deletePVC bool, 50 | log logrus.FieldLogger) *shardManager { 51 | return &shardManager{ 52 | sts: sts, 53 | port: port, 54 | lg: log, 55 | cli: cli, 56 | deletePVC: deletePVC, 57 | getPods: func(selector map[string]string) (list *v1.PodList, e error) { 58 | return cli.CoreV1().Pods(sts.Namespace).List(context.TODO(), v12.ListOptions{ 59 | LabelSelector: labels.SelectorFromSet(selector).String(), 60 | }) 61 | }, 62 | } 63 | } 64 | 65 | // Shards return current Shards in the cluster 66 | func (s *shardManager) Shards() ([]*shard.Shard, error) { 67 | pods, err := s.getPods(s.sts.Spec.Selector.MatchLabels) 68 | if err != nil { 69 | return nil, errors.Wrap(err, "list pod") 70 | } 71 | 72 | ps := map[string]v1.Pod{} 73 | 74 | for _, p := range pods.Items { 75 | ps[p.Name] = p 76 | } 77 | 78 | ret := make([]*shard.Shard, 0) 79 | for index := range pods.Items { 80 | p := ps[fmt.Sprintf("%s-%d", s.sts.Name, index)] 81 | url := fmt.Sprintf("http://%s:%d", p.Status.PodIP, s.port) 82 | ret = append(ret, shard.NewShard(p.Name, url, p.Status.PodIP != "", s.lg.WithField("shard", p.Name))) 83 | } 84 | 85 | return ret, nil 86 | } 87 | 88 | // ChangeScale create or delete Shards according to "expReplicate" 89 | func (s *shardManager) ChangeScale(expect int32) error { 90 | sts, err := s.cli.AppsV1().StatefulSets(s.sts.Namespace).Get(context.TODO(), s.sts.Name, v12.GetOptions{}) 91 | if err != nil { 92 | return err 93 | } 94 | 95 | if sts.Spec.Replicas == nil || *sts.Spec.Replicas == expect { 96 | return nil 97 | } 98 | 99 | old := *sts.Spec.Replicas 100 | sts.Spec.Replicas = &expect 101 | s.lg.Infof("change scale to %d", expect) 102 | _, err = s.cli.AppsV1().StatefulSets(sts.Namespace).Update(context.TODO(), sts, v12.UpdateOptions{}) 103 | if err != nil { 104 | return errors.Wrapf(err, "update statefuleset %s replicate failed", sts.Name) 105 | 106 | } 107 | 108 | if s.deletePVC { 109 | for i := old - 1; i >= expect; i-- { 110 | for _, pvc := range sts.Spec.VolumeClaimTemplates { 111 | name := fmt.Sprintf("%s-%s-%d", pvc.Name, sts.Name, i) 112 | err = s.cli.CoreV1().PersistentVolumeClaims(sts.Namespace).Delete(context.TODO(), name, v12.DeleteOptions{}) 113 | if err != nil && !k8serr.IsNotFound(err) { 114 | s.lg.Errorf("delete pvc %s failed : %s", name, err.Error()) 115 | } 116 | } 117 | } 118 | } 119 | return nil 120 | } 121 | -------------------------------------------------------------------------------- /pkg/shard/kubernetes/shardmanager_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package kubernetes 19 | 20 | import ( 21 | "context" 22 | "fmt" 23 | "testing" 24 | 25 | v1 "k8s.io/api/core/v1" 26 | 27 | "github.com/sirupsen/logrus" 28 | 29 | "k8s.io/client-go/kubernetes/fake" 30 | 31 | "github.com/stretchr/testify/require" 32 | v12 "k8s.io/apimachinery/pkg/apis/meta/v1" 33 | 34 | appsv1 "k8s.io/api/apps/v1" 35 | "k8s.io/client-go/kubernetes" 36 | ) 37 | 38 | func createStatefulSet(t *testing.T, cli kubernetes.Interface, name string, rep int32) *appsv1.StatefulSet { 39 | r := require.New(t) 40 | sts1 := &appsv1.StatefulSet{} 41 | sts1.Name = name 42 | sts1.Namespace = "default" 43 | sts1.Spec.Replicas = &rep 44 | sts1.Labels = map[string]string{ 45 | "k8s-app": "prometheus", 46 | } 47 | sts1.Spec.Selector = &v12.LabelSelector{ 48 | MatchLabels: map[string]string{ 49 | "k8s-app": "prometheus", 50 | "rep": name, 51 | }, 52 | } 53 | sts1.Spec.VolumeClaimTemplates = []v1.PersistentVolumeClaim{ 54 | {ObjectMeta: v12.ObjectMeta{ 55 | Name: "data", 56 | }}, 57 | } 58 | 59 | _, err := cli.AppsV1().StatefulSets("default").Create(context.TODO(), sts1, v12.CreateOptions{}) 60 | for i := 0; i < int(rep); i++ { 61 | pvc := &v1.PersistentVolumeClaim{} 62 | pvc.Name = fmt.Sprintf("%s-%s-%d", "data", sts1.Name, i) 63 | _, err = cli.CoreV1().PersistentVolumeClaims("default").Create(context.TODO(), pvc, v12.CreateOptions{}) 64 | r.NoError(err) 65 | } 66 | 67 | r.NoError(err) 68 | return sts1 69 | } 70 | 71 | func TestStatefulSet_Shards(t *testing.T) { 72 | cli := fake.NewSimpleClientset() 73 | sf := createStatefulSet(t, cli, "rep1", 2) 74 | 75 | sts := newShardManager(cli, sf, 8080, true, logrus.New()) 76 | sts.getPods = func(lb map[string]string) (list *v1.PodList, e error) { 77 | pl := &v1.PodList{} 78 | for i := 0; i < 2; i++ { 79 | p := v1.Pod{} 80 | p.Name += fmt.Sprintf("rep1-%d", i) 81 | p.Status.Conditions = []v1.PodCondition{ 82 | { 83 | Type: v1.PodReady, 84 | Status: v1.ConditionTrue, 85 | }, 86 | } 87 | pl.Items = append(pl.Items, p) 88 | } 89 | return pl, nil 90 | } 91 | 92 | shards, err := sts.Shards() 93 | 94 | r := require.New(t) 95 | r.NoError(err) 96 | r.Equal(2, len(shards)) 97 | } 98 | 99 | func TestStatefulSet_ChangeScale(t *testing.T) { 100 | t.Run("scale up", testScaleUp) 101 | t.Run("scale down ,delete pvc", func(t *testing.T) { 102 | testScaleDown(t, true) 103 | }) 104 | t.Run("scale down ,remain pvc", func(t *testing.T) { 105 | testScaleDown(t, false) 106 | }) 107 | } 108 | 109 | func testScaleUp(t *testing.T) { 110 | r := require.New(t) 111 | cli := fake.NewSimpleClientset() 112 | sf := createStatefulSet(t, cli, "rep1", 2) 113 | sts := newShardManager(cli, sf, 8080, true, logrus.New()) 114 | r.NoError(sts.ChangeScale(10)) 115 | s, err := cli.AppsV1().StatefulSets("default").Get(context.TODO(), "rep1", v12.GetOptions{}) 116 | r.NoError(err) 117 | r.Equal(int32(10), *s.Spec.Replicas) 118 | } 119 | 120 | func testScaleDown(t *testing.T, deletePvc bool) { 121 | r := require.New(t) 122 | cli := fake.NewSimpleClientset() 123 | sf := createStatefulSet(t, cli, "rep1", 2) 124 | sts := newShardManager(cli, sf, 8080, deletePvc, logrus.New()) 125 | r.NoError(sts.ChangeScale(1)) 126 | s, err := cli.AppsV1().StatefulSets("default").Get(context.TODO(), "rep1", v12.GetOptions{}) 127 | r.NoError(err) 128 | r.Equal(int32(1), *s.Spec.Replicas) 129 | pvc, err := cli.CoreV1().PersistentVolumeClaims("default").List(context.TODO(), v12.ListOptions{}) 130 | r.NoError(err) 131 | if deletePvc { 132 | r.Equal(1, len(pvc.Items)) 133 | } else { 134 | r.Equal(2, len(pvc.Items)) 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /pkg/shard/shard.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package shard 19 | 20 | import ( 21 | "fmt" 22 | "net/url" 23 | 24 | "github.com/pkg/errors" 25 | "github.com/sirupsen/logrus" 26 | "tkestack.io/kvass/pkg/api" 27 | "tkestack.io/kvass/pkg/prom" 28 | "tkestack.io/kvass/pkg/scrape" 29 | "tkestack.io/kvass/pkg/target" 30 | ) 31 | 32 | // Shard is a prometheus shard 33 | type Shard struct { 34 | // ID is the unique ID for differentiate different replicate of shard 35 | ID string 36 | // APIGet is a function to do stand api request to target 37 | // exposed this field for user to writ unit testing easily 38 | APIGet func(url string, ret interface{}) error 39 | // APIPost is a function to do stand api request to target 40 | // exposed this field for user to writ unit testing easily 41 | APIPost func(url string, req interface{}, ret interface{}) (err error) 42 | // scraping is the cached ScrapeStatus fetched from sidecar last time 43 | scraping map[uint64]*target.ScrapeStatus 44 | url string 45 | log logrus.FieldLogger 46 | // Ready indicate this shard is ready 47 | Ready bool 48 | } 49 | 50 | // NewShard create a Shard with empty scraping cache 51 | func NewShard(id string, url string, ready bool, log logrus.FieldLogger) *Shard { 52 | return &Shard{ 53 | ID: id, 54 | APIGet: api.Get, 55 | APIPost: api.Post, 56 | url: url, 57 | log: log, 58 | Ready: ready, 59 | } 60 | } 61 | 62 | //Samples return the sample statistics of last scrape 63 | func (r *Shard) Samples(jobName string, withMetricsDetail bool) (map[string]*scrape.StatisticsSeriesResult, error) { 64 | ret := map[string]*scrape.StatisticsSeriesResult{} 65 | param := url.Values{} 66 | if jobName != "" { 67 | param["job"] = []string{jobName} 68 | } 69 | if withMetricsDetail { 70 | param["with_metrics_detail"] = []string{"true"} 71 | } 72 | 73 | u := r.url + "/api/v1/shard/samples/" 74 | if len(param) != 0 { 75 | u += "?" + param.Encode() 76 | } 77 | 78 | err := r.APIGet(u, &ret) 79 | if err != nil { 80 | return nil, fmt.Errorf("get samples info from %s failed : %s", r.ID, err.Error()) 81 | } 82 | 83 | return ret, nil 84 | } 85 | 86 | // RuntimeInfo return the runtime status of this shard 87 | func (r *Shard) RuntimeInfo() (*RuntimeInfo, error) { 88 | res := &RuntimeInfo{} 89 | err := r.APIGet(r.url+"/api/v1/shard/runtimeinfo/", &res) 90 | if err != nil { 91 | return res, fmt.Errorf("get runtime info from %s failed : %s", r.ID, err.Error()) 92 | } 93 | 94 | return res, nil 95 | } 96 | 97 | // TargetStatus return the target runtime status that Group scraping 98 | // cached result will be send if something wrong 99 | func (r *Shard) TargetStatus() (map[uint64]*target.ScrapeStatus, error) { 100 | res := map[uint64]*target.ScrapeStatus{} 101 | 102 | err := r.APIGet(r.url+"/api/v1/shard/targets/status/", &res) 103 | if err != nil { 104 | return res, errors.Wrapf(err, "get targets status info from %s failed, url = %s", r.ID, r.url) 105 | } 106 | 107 | //must copy 108 | m := map[uint64]*target.ScrapeStatus{} 109 | for k, v := range res { 110 | newV := *v 111 | m[k] = &newV 112 | } 113 | 114 | r.scraping = m 115 | return res, nil 116 | } 117 | 118 | // UpdateConfig try update shard config by API 119 | func (r *Shard) UpdateConfig(req *UpdateConfigRequest) error { 120 | return r.APIPost(r.url+"/api/v1/status/config", req, nil) 121 | } 122 | 123 | // UpdateExtraConfig try update shard extra config by API 124 | func (r *Shard) UpdateExtraConfig(req *prom.ExtraConfig) error { 125 | return r.APIPost(r.url+"/api/v1/status/extra_config", req, nil) 126 | } 127 | 128 | // UpdateTarget try apply targets to sidecar 129 | // request will be skipped if nothing changed according to r.scraping 130 | func (r *Shard) UpdateTarget(request *UpdateTargetsRequest) error { 131 | newTargets := map[uint64]*target.Target{} 132 | for _, ts := range request.Targets { 133 | for _, t := range ts { 134 | newTargets[t.Hash] = t 135 | } 136 | } 137 | 138 | if r.needUpdate(newTargets) { 139 | if len(newTargets) != 0 || len(r.scraping) != 0 { 140 | r.log.Infof("%s need update targets", r.ID) 141 | } 142 | if err := r.APIPost(r.url+"/api/v1/shard/targets/", &request, nil); err != nil { 143 | return err 144 | } 145 | } 146 | 147 | return nil 148 | } 149 | 150 | func (r *Shard) needUpdate(targets map[uint64]*target.Target) bool { 151 | if len(targets) != len(r.scraping) || len(targets) == 0 { 152 | return true 153 | } 154 | 155 | for k, v := range targets { 156 | if r.scraping[k] == nil || r.scraping[k].TargetState != v.TargetState { 157 | return true 158 | } 159 | } 160 | return false 161 | } 162 | -------------------------------------------------------------------------------- /pkg/shard/shard_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package shard 19 | 20 | import ( 21 | "net/url" 22 | "strings" 23 | "testing" 24 | "time" 25 | 26 | kscrape "tkestack.io/kvass/pkg/scrape" 27 | 28 | "github.com/prometheus/prometheus/scrape" 29 | "github.com/sirupsen/logrus" 30 | "github.com/stretchr/testify/require" 31 | "tkestack.io/kvass/pkg/target" 32 | "tkestack.io/kvass/pkg/utils/test" 33 | ) 34 | 35 | func newTestingShard(t *testing.T) (*Shard, *require.Assertions) { 36 | lg := logrus.New() 37 | s := NewShard("0", "", true, lg) 38 | return s, require.New(t) 39 | } 40 | 41 | func TestShard_RuntimeInfo(t *testing.T) { 42 | s, r := newTestingShard(t) 43 | s.APIGet = func(url string, ret interface{}) error { 44 | return test.CopyJSON(ret, &RuntimeInfo{ 45 | HeadSeries: 10, 46 | }) 47 | } 48 | 49 | res, err := s.RuntimeInfo() 50 | r.NoError(err) 51 | r.Equal(int64(10), res.HeadSeries) 52 | } 53 | 54 | func TestShard_TargetStatus(t *testing.T) { 55 | s, r := newTestingShard(t) 56 | st := &target.ScrapeStatus{ 57 | LastError: "test", 58 | LastScrape: time.Time{}, 59 | LastScrapeDuration: 10, 60 | Health: scrape.HealthBad, 61 | Series: 100, 62 | } 63 | s.APIGet = func(url string, ret interface{}) error { 64 | return test.CopyJSON(ret, map[uint64]*target.ScrapeStatus{ 65 | 1: st, 66 | }) 67 | } 68 | 69 | ret, err := s.TargetStatus() 70 | r.NoError(err) 71 | r.JSONEq(test.MustJSON(st), test.MustJSON(ret[1])) 72 | } 73 | 74 | func TestShard_UpdateTarget(t *testing.T) { 75 | var cases = []struct { 76 | name string 77 | curScraping map[uint64]*target.ScrapeStatus 78 | wantTargets *UpdateTargetsRequest 79 | wantUpdate bool 80 | }{ 81 | { 82 | name: "need update, targets not exist", 83 | curScraping: map[uint64]*target.ScrapeStatus{}, 84 | wantTargets: &UpdateTargetsRequest{ 85 | Targets: map[string][]*target.Target{ 86 | "job1": { 87 | { 88 | Hash: 1, 89 | }, 90 | }, 91 | }, 92 | }, 93 | wantUpdate: true, 94 | }, 95 | { 96 | name: "need update, target state change", 97 | curScraping: map[uint64]*target.ScrapeStatus{ 98 | 1: {}, 99 | }, 100 | wantTargets: &UpdateTargetsRequest{ 101 | Targets: map[string][]*target.Target{ 102 | "job1": { 103 | { 104 | Hash: 1, 105 | TargetState: target.StateInTransfer, 106 | }, 107 | }, 108 | }, 109 | }, 110 | wantUpdate: true, 111 | }, 112 | { 113 | name: "not need update", 114 | curScraping: map[uint64]*target.ScrapeStatus{ 115 | 1: {}, 116 | }, 117 | wantTargets: &UpdateTargetsRequest{ 118 | Targets: map[string][]*target.Target{ 119 | "job1": { 120 | { 121 | Hash: 1, 122 | }, 123 | }, 124 | }, 125 | }, 126 | wantUpdate: false, 127 | }, 128 | } 129 | 130 | for _, cs := range cases { 131 | t.Run(cs.name, func(t *testing.T) { 132 | s, r := newTestingShard(t) 133 | s.scraping = cs.curScraping 134 | s.APIPost = func(url string, req interface{}, ret interface{}) (err error) { 135 | r.True(cs.wantUpdate) 136 | return nil 137 | } 138 | r.NoError(s.UpdateTarget(cs.wantTargets)) 139 | }) 140 | } 141 | } 142 | 143 | func TestShard_Samples(t *testing.T) { 144 | fakeGet := func(u string, ret interface{}) error { 145 | ul, err := url.Parse(u) 146 | if err != nil { 147 | return err 148 | } 149 | 150 | job := ul.Query().Get("job") 151 | detail := ul.Query().Get("with_metrics_detail") 152 | data := map[string]*kscrape.StatisticsSeriesResult{} 153 | res := kscrape.NewStatisticsSeriesResult() 154 | res.ScrapedTotal = 1 155 | if detail == "true" { 156 | res.MetricsTotal = map[string]*kscrape.MetricSamplesInfo{ 157 | "test": { 158 | Total: 2, 159 | Scraped: 1, 160 | }, 161 | } 162 | } 163 | if job == "" || strings.Contains(job, "job1") { 164 | data["job1"] = res 165 | } 166 | 167 | test.CopyJSON(ret, data) 168 | return nil 169 | } 170 | 171 | var cases = []struct { 172 | desc string 173 | jobName string 174 | withDetail bool 175 | wantResult map[string]*kscrape.StatisticsSeriesResult 176 | }{ 177 | { 178 | desc: "without job name, without metrics detail", 179 | jobName: "", 180 | withDetail: false, 181 | wantResult: map[string]*kscrape.StatisticsSeriesResult{ 182 | "job1": { 183 | ScrapedTotal: 1, 184 | MetricsTotal: map[string]*kscrape.MetricSamplesInfo{}, 185 | }, 186 | }, 187 | }, 188 | { 189 | desc: "with wrong job name filter", 190 | jobName: "xx", 191 | withDetail: false, 192 | wantResult: map[string]*kscrape.StatisticsSeriesResult{}, 193 | }, { 194 | desc: "with right job name filter", 195 | jobName: "job1", 196 | withDetail: false, 197 | wantResult: map[string]*kscrape.StatisticsSeriesResult{ 198 | "job1": { 199 | ScrapedTotal: 1, 200 | MetricsTotal: map[string]*kscrape.MetricSamplesInfo{}, 201 | }, 202 | }, 203 | }, 204 | { 205 | desc: "without job name, with metrics detail", 206 | withDetail: true, 207 | wantResult: map[string]*kscrape.StatisticsSeriesResult{ 208 | "job1": { 209 | ScrapedTotal: 1, 210 | MetricsTotal: map[string]*kscrape.MetricSamplesInfo{ 211 | "test": { 212 | Total: 2, 213 | Scraped: 1, 214 | }, 215 | }, 216 | }, 217 | }, 218 | }, 219 | } 220 | 221 | for _, cs := range cases { 222 | t.Run(cs.desc, func(t *testing.T) { 223 | s, r := newTestingShard(t) 224 | s.APIGet = fakeGet 225 | res, err := s.Samples(cs.jobName, cs.withDetail) 226 | r.NoError(err) 227 | r.JSONEq(test.MustJSON(cs.wantResult), test.MustJSON(res)) 228 | }) 229 | } 230 | } 231 | -------------------------------------------------------------------------------- /pkg/shard/static/replicasmanager.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package static 19 | 20 | import ( 21 | "github.com/pkg/errors" 22 | "github.com/sirupsen/logrus" 23 | "gopkg.in/yaml.v2" 24 | "io/ioutil" 25 | "tkestack.io/kvass/pkg/shard" 26 | ) 27 | 28 | // ReplicasManager create replicas from static 29 | type ReplicasManager struct { 30 | file string 31 | log logrus.FieldLogger 32 | } 33 | 34 | // NewReplicasManager return an new static shard replicas manager 35 | func NewReplicasManager(file string, log logrus.FieldLogger) *ReplicasManager { 36 | return &ReplicasManager{ 37 | file: file, 38 | log: log, 39 | } 40 | } 41 | 42 | // Replicas return all shards manager 43 | func (g *ReplicasManager) Replicas() ([]shard.Manager, error) { 44 | content, err := ioutil.ReadFile(g.file) 45 | if err != nil { 46 | return nil, errors.Wrapf(err, "read config file") 47 | } 48 | config := &staticConfig{} 49 | if err := yaml.Unmarshal(content, config); err != nil { 50 | return nil, errors.Wrapf(err, "wrong format of shard config") 51 | } 52 | 53 | ret := make([]shard.Manager, 0) 54 | for _, r := range config.Replicas { 55 | ret = append(ret, newShardManager(r.Shards, g.log)) 56 | } 57 | 58 | return ret, nil 59 | } 60 | -------------------------------------------------------------------------------- /pkg/shard/static/replicasmanager_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package static 19 | 20 | import ( 21 | "github.com/sirupsen/logrus" 22 | "github.com/stretchr/testify/require" 23 | "io/ioutil" 24 | "testing" 25 | ) 26 | 27 | func TestReplicasManager_Replicas(t *testing.T) { 28 | type caseInfo struct { 29 | fileContent string 30 | wantReplicas int 31 | wantErr bool 32 | } 33 | 34 | successCase := func() *caseInfo { 35 | return &caseInfo{ 36 | fileContent: ` 37 | replicas: 38 | - shards: 39 | - id: shard-0 40 | url: http://1.1.1.1 41 | - shards: 42 | - id: shard-1 43 | url: http://2.2.2.2 44 | `, 45 | wantErr: false, 46 | wantReplicas: 2, 47 | } 48 | } 49 | 50 | var cases = []struct { 51 | desc string 52 | updateCase func(c *caseInfo) 53 | }{ 54 | { 55 | desc: "success", 56 | updateCase: func(c *caseInfo) {}, 57 | }, 58 | { 59 | desc: "wrong config format", 60 | updateCase: func(c *caseInfo) { 61 | c.fileContent = `replicas : a` 62 | c.wantErr = true 63 | }, 64 | }, 65 | { 66 | desc: "read file err", 67 | updateCase: func(c *caseInfo) { 68 | c.fileContent = "" 69 | c.wantErr = true 70 | }, 71 | }, 72 | } 73 | 74 | for _, cs := range cases { 75 | t.Run(cs.desc, func(t *testing.T) { 76 | r := require.New(t) 77 | c := successCase() 78 | cs.updateCase(c) 79 | file := t.TempDir() + "shards.yaml" 80 | if c.fileContent != "" { 81 | r.NoError(ioutil.WriteFile(file, []byte(c.fileContent), 0777)) 82 | } 83 | 84 | m := NewReplicasManager(file, logrus.New()) 85 | res, err := m.Replicas() 86 | if c.wantErr { 87 | r.Error(err) 88 | return 89 | } 90 | r.Equal(c.wantReplicas, len(res)) 91 | }) 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /pkg/shard/static/shardmanager.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package static 19 | 20 | import ( 21 | "github.com/sirupsen/logrus" 22 | "tkestack.io/kvass/pkg/shard" 23 | ) 24 | 25 | type shardManager struct { 26 | shards []shardConfig 27 | log logrus.FieldLogger 28 | } 29 | 30 | func newShardManager(shards []shardConfig, log logrus.FieldLogger) *shardManager { 31 | return &shardManager{ 32 | shards: shards, 33 | log: log, 34 | } 35 | } 36 | 37 | // Shards return current Shards in the cluster 38 | func (s *shardManager) Shards() ([]*shard.Shard, error) { 39 | ret := make([]*shard.Shard, 0) 40 | for _, sd := range s.shards { 41 | ret = append(ret, shard.NewShard(sd.ID, sd.URL, true, s.log.WithField("shard", sd.ID))) 42 | } 43 | return ret, nil 44 | } 45 | 46 | // ChangeScale create or delete Shards according to "expReplicate" 47 | // static shard can not change scale 48 | func (s *shardManager) ChangeScale(expReplicate int32) error { 49 | return nil 50 | } 51 | -------------------------------------------------------------------------------- /pkg/shard/static/shardmanager_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package static 19 | 20 | import ( 21 | "github.com/sirupsen/logrus" 22 | "github.com/stretchr/testify/require" 23 | "testing" 24 | ) 25 | 26 | func TestShardManager_Shards(t *testing.T) { 27 | shards := []shardConfig{ 28 | { 29 | ID: "0", 30 | URL: "http://1.1.1.1", 31 | }, 32 | } 33 | m := newShardManager(shards, logrus.New()) 34 | sd, err := m.Shards() 35 | require.NoError(t, err) 36 | require.Equal(t, 1, len(sd)) 37 | require.Equal(t, shards[0].ID, sd[0].ID) 38 | } 39 | 40 | func TestShardManager_ChangeScale(t *testing.T) { 41 | m := newShardManager(nil, logrus.New()) 42 | require.NoError(t, m.ChangeScale(0)) 43 | } 44 | -------------------------------------------------------------------------------- /pkg/shard/static/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package static 19 | 20 | type shardConfig struct { 21 | // ID is the unique id of this shard 22 | ID string `yaml:"id"` 23 | // URL for coordinator to communicate with shards 24 | URL string `yaml:"url"` 25 | } 26 | 27 | type staticConfig struct { 28 | // Replicas indicate all replicas information 29 | Replicas []struct { 30 | // Shards is all shard mem of one replica 31 | Shards []shardConfig `yaml:"shards"` 32 | } `yaml:"replicas"` 33 | } 34 | -------------------------------------------------------------------------------- /pkg/shard/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package shard 19 | 20 | import ( 21 | "time" 22 | 23 | "tkestack.io/kvass/pkg/target" 24 | ) 25 | 26 | // ReplicasManager known all shard managers 27 | type ReplicasManager interface { 28 | // Replicas return all replicas 29 | Replicas() ([]Manager, error) 30 | } 31 | 32 | // Manager known how to create or delete Shards 33 | type Manager interface { 34 | // Shards return current Shards in the cluster 35 | Shards() ([]*Shard, error) 36 | // ChangeScale create or delete Shards according to "expReplicate" 37 | ChangeScale(expReplicate int32) error 38 | } 39 | 40 | // RuntimeInfo contains all running status of this shard 41 | type RuntimeInfo struct { 42 | // HeadSeries return current head_series of prometheus 43 | HeadSeries int64 `json:"headSeries"` 44 | // ProcessSeries is the all process series of shard 45 | ProcessSeries int64 `json:"processSeries"` 46 | // ConfigHash is the md5 of current config file 47 | ConfigHash string `json:"ConfigHash"` 48 | // IdleStartAt is the time that shard begin idle 49 | IdleStartAt *time.Time `json:"IdleStartAt,omitempty"` 50 | } 51 | 52 | // UpdateTargetsRequest contains all information about the targets updating request 53 | type UpdateTargetsRequest struct { 54 | // targets contains all targets this shard should scrape 55 | Targets map[string][]*target.Target 56 | } 57 | 58 | // UpdateConfigRequest is request struct for POST / 59 | type UpdateConfigRequest struct { 60 | RawContent string `json:"rawContent"` 61 | } 62 | -------------------------------------------------------------------------------- /pkg/sidecar/injector_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | package sidecar 18 | 19 | import ( 20 | "os" 21 | "path" 22 | 23 | "github.com/go-kit/log" 24 | "github.com/prometheus/client_golang/prometheus" 25 | "github.com/prometheus/common/model" 26 | "tkestack.io/kvass/pkg/prom" 27 | 28 | "testing" 29 | 30 | "github.com/prometheus/prometheus/config" 31 | "github.com/prometheus/prometheus/discovery" 32 | "github.com/prometheus/prometheus/model/labels" 33 | "github.com/sirupsen/logrus" 34 | "github.com/stretchr/testify/require" 35 | "tkestack.io/kvass/pkg/target" 36 | ) 37 | 38 | func TestInjector_UpdateConfig(t *testing.T) { 39 | cfg := `global: {} 40 | scrape_configs: 41 | - job_name: job 42 | honor_timestamps: false 43 | bearer_token: job 44 | remote_write: 45 | - url: http://127.0.0.1 46 | bearer_token: write 47 | remote_read: 48 | - url: http://127.0.0.1 49 | bearer_token: read 50 | ` 51 | tar := &target.Target{ 52 | Hash: 1, 53 | Labels: labels.Labels{ 54 | { 55 | Name: model.AddressLabel, 56 | Value: "127.0.0.1:80", 57 | }, 58 | { 59 | Name: model.SchemeLabel, 60 | Value: "https", 61 | }, 62 | { 63 | Name: model.MetricsPathLabel, 64 | Value: "/metrics", 65 | }, 66 | }, 67 | } 68 | 69 | r := require.New(t) 70 | outFile := path.Join(t.TempDir(), "out") 71 | _ = os.Setenv("POD_NAME", "rep-0") 72 | 73 | in := NewInjector(outFile, 74 | InjectConfigOptions{ 75 | ProxyURL: "http://127.0.0.1:8008", 76 | PrometheusURL: "http://127.0.0.1:9090", 77 | }, prometheus.NewRegistry(), 78 | logrus.New()) 79 | 80 | r.NoError(in.ApplyConfig(&prom.ConfigInfo{ 81 | RawContent: []byte(cfg), 82 | })) 83 | r.NoError(in.UpdateTargets(map[string][]*target.Target{ 84 | "job": {tar}, 85 | })) 86 | out, err := config.LoadFile(outFile, true, false, log.NewNopLogger()) 87 | r.NoError(err) 88 | 89 | outJob := out.ScrapeConfigs[0] 90 | outSD := outJob.ServiceDiscoveryConfigs[0].(discovery.StaticConfig)[0] 91 | r.Equal("http://127.0.0.1:8008", outJob.HTTPClientConfig.ProxyURL.String()) 92 | r.Equal(model.LabelValue("127.0.0.1:80"), outSD.Targets[0][model.AddressLabel]) 93 | r.Equal(model.LabelValue("http"), outSD.Labels[model.SchemeLabel]) 94 | r.Equal(model.LabelValue("/metrics"), outSD.Labels[model.MetricsPathLabel]) 95 | r.Equal(model.LabelValue("https"), outSD.Labels[model.ParamLabelPrefix+paramScheme]) 96 | r.Equal(model.LabelValue("job"), outSD.Labels[model.ParamLabelPrefix+paramJobName]) 97 | r.Equal(model.LabelValue("1"), outSD.Labels[model.ParamLabelPrefix+paramHash]) 98 | 99 | outSelf := out.ScrapeConfigs[1] 100 | outSelfSD := outSelf.ServiceDiscoveryConfigs[0].(discovery.StaticConfig)[0] 101 | r.Nil(outSelf.HTTPClientConfig.ProxyURL.URL) 102 | r.Equal(model.LabelValue("127.0.0.1:9090"), outSelfSD.Targets[0][model.AddressLabel]) 103 | r.Equal(model.LabelValue("0"), outSelfSD.Labels["shard"]) 104 | r.Equal(model.LabelValue("rep-0"), outSelfSD.Labels["replicate"]) 105 | } 106 | -------------------------------------------------------------------------------- /pkg/sidecar/proxy.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package sidecar 19 | 20 | import ( 21 | "fmt" 22 | "net/http" 23 | "net/url" 24 | "strconv" 25 | "time" 26 | 27 | parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus" 28 | "github.com/prometheus/client_golang/prometheus" 29 | "github.com/sirupsen/logrus" 30 | "tkestack.io/kvass/pkg/prom" 31 | "tkestack.io/kvass/pkg/scrape" 32 | "tkestack.io/kvass/pkg/target" 33 | ) 34 | 35 | var ( 36 | proxyTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 37 | Name: "kvass_sidecar_proxy_total", 38 | }, []string{}) 39 | proxySeries = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 40 | Name: "kvass_sidecar_proxy_target_series", 41 | }, []string{"target_job", "url"}) 42 | proxyScrapeDurtion = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 43 | Name: "kvass_sidecar_proxy_target_scrape_durtion", 44 | }, []string{"target_job", "url"}) 45 | ) 46 | 47 | // Proxy is a Proxy server for prometheus tManager 48 | // Proxy will return an empty metrics if this target if not allowed to scrape for this prometheus client 49 | // otherwise, Proxy do real tManager, statistic metrics samples and return metrics to prometheus 50 | type Proxy struct { 51 | getJob func(jobName string) *scrape.JobInfo 52 | getStatus func() map[uint64]*target.ScrapeStatus 53 | getCurCfg func() *prom.ConfigInfo 54 | log logrus.FieldLogger 55 | } 56 | 57 | // NewProxy create a new proxy server 58 | func NewProxy( 59 | getJob func(jobName string) *scrape.JobInfo, 60 | getStatus func() map[uint64]*target.ScrapeStatus, 61 | getCurCfg func() *prom.ConfigInfo, 62 | promRegistry prometheus.Registerer, 63 | log logrus.FieldLogger) *Proxy { 64 | _ = promRegistry.Register(proxyTotal) 65 | _ = promRegistry.Register(proxySeries) 66 | _ = promRegistry.Register(proxyScrapeDurtion) 67 | return &Proxy{ 68 | getJob: getJob, 69 | getStatus: getStatus, 70 | getCurCfg: getCurCfg, 71 | log: log, 72 | } 73 | } 74 | 75 | // Run start Proxy server and block 76 | func (p *Proxy) Run(address string) error { 77 | return http.ListenAndServe(address, p) 78 | } 79 | 80 | // ServeHTTP handle one Proxy request 81 | func (p *Proxy) ServeHTTP(w http.ResponseWriter, r *http.Request) { 82 | proxyTotal.WithLabelValues().Inc() 83 | stopReason := p.getCurCfg().ExtraConfig.StopScrapeReason 84 | 85 | job, hashStr, realURL := translateURL(*r.URL) 86 | jobInfo := p.getJob(job) 87 | if jobInfo == nil { 88 | p.log.Errorf("can not found job client of %s", job) 89 | w.WriteHeader(http.StatusBadRequest) 90 | return 91 | } 92 | 93 | hash, err := strconv.ParseUint(hashStr, 10, 64) 94 | if err != nil { 95 | p.log.Errorf("unexpected hash string %s", hashStr) 96 | w.WriteHeader(http.StatusBadRequest) 97 | return 98 | } 99 | 100 | tar := p.getStatus()[hash] 101 | 102 | start := time.Now() 103 | var scrapErr error 104 | defer func() { 105 | if scrapErr != nil { 106 | p.log.Errorf(scrapErr.Error()) 107 | w.WriteHeader(http.StatusBadRequest) 108 | if tar != nil { 109 | tar.LastScrapeStatistics = scrape.NewStatisticsSeriesResult() 110 | } 111 | } else if stopReason != "" { 112 | p.log.Warnf(stopReason) 113 | w.WriteHeader(http.StatusBadRequest) 114 | scrapErr = fmt.Errorf(stopReason) 115 | } 116 | 117 | if tar != nil { 118 | tar.ScrapeTimes++ 119 | tar.SetScrapeErr(start, scrapErr) 120 | } 121 | }() 122 | 123 | scraper := scrape.NewScraper(jobInfo, realURL.String(), p.log) 124 | if stopReason == "" { 125 | scraper.WithRawWriter(w) 126 | } 127 | 128 | if err := scraper.RequestTo(); err != nil { 129 | scrapErr = fmt.Errorf("RequestTo %s %s %v", job, realURL.String(), err) 130 | return 131 | } 132 | w.Header().Set("Content-Type", scraper.HTTPResponse.Header.Get("Content-Type")) 133 | 134 | rs := scrape.NewStatisticsSeriesResult() 135 | if err := scraper.ParseResponse(func(rows []parser.Row) error { 136 | scrape.StatisticSeries(rows, jobInfo.Config.MetricRelabelConfigs, rs) 137 | return nil 138 | }); err != nil { 139 | scrapErr = fmt.Errorf("copy data to prometheus failed %v", err) 140 | if time.Since(start) > time.Duration(jobInfo.Config.ScrapeTimeout) { 141 | scrapErr = fmt.Errorf("scrape timeout") 142 | } 143 | return 144 | } 145 | 146 | proxySeries.WithLabelValues(jobInfo.Config.JobName, realURL.String()).Set(float64(rs.ScrapedTotal)) 147 | proxyScrapeDurtion.WithLabelValues(jobInfo.Config.JobName, realURL.String()).Set(float64(time.Now().Sub(start))) 148 | if tar != nil { 149 | tar.UpdateScrapeResult(rs) 150 | } 151 | } 152 | 153 | func translateURL(u url.URL) (job string, hash string, realURL url.URL) { 154 | vs := u.Query() 155 | job = vs.Get(paramJobName) 156 | hash = vs.Get(paramHash) 157 | scheme := vs.Get(paramScheme) 158 | 159 | vs.Del(paramHash) 160 | vs.Del(paramJobName) 161 | vs.Del(paramScheme) 162 | 163 | u.Scheme = scheme 164 | u.RawQuery = vs.Encode() 165 | return job, hash, u 166 | } 167 | -------------------------------------------------------------------------------- /pkg/sidecar/proxy_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | package sidecar 18 | 19 | import ( 20 | "io/ioutil" 21 | "net/http" 22 | "net/http/httptest" 23 | "strings" 24 | "testing" 25 | "time" 26 | 27 | "github.com/prometheus/client_golang/prometheus" 28 | "github.com/prometheus/common/model" 29 | "github.com/prometheus/prometheus/config" 30 | scrape2 "github.com/prometheus/prometheus/scrape" 31 | "github.com/sirupsen/logrus" 32 | "github.com/stretchr/testify/require" 33 | "tkestack.io/kvass/pkg/prom" 34 | "tkestack.io/kvass/pkg/scrape" 35 | "tkestack.io/kvass/pkg/target" 36 | ) 37 | 38 | func TestProxy_ServeHTTP(t *testing.T) { 39 | var cases = []struct { 40 | name string 41 | job *config.ScrapeConfig 42 | status map[uint64]*target.ScrapeStatus 43 | uri string 44 | data string 45 | wantStatusCode int 46 | wantTargetStatus map[uint64]*target.ScrapeStatus 47 | }{ 48 | { 49 | name: "job not found", 50 | job: nil, 51 | status: map[uint64]*target.ScrapeStatus{}, 52 | uri: "/metrics?_jobName=job1&_scheme=http&_hash=1", 53 | wantStatusCode: http.StatusBadRequest, 54 | wantTargetStatus: map[uint64]*target.ScrapeStatus{}, 55 | }, 56 | { 57 | name: "invalid hash", 58 | job: &config.ScrapeConfig{ 59 | JobName: "job1", 60 | ScrapeTimeout: model.Duration(time.Second * 3), 61 | }, 62 | status: map[uint64]*target.ScrapeStatus{}, 63 | uri: "/metrics?_jobName=job1&_scheme=http&_hash=xxxx", 64 | wantStatusCode: http.StatusBadRequest, 65 | wantTargetStatus: map[uint64]*target.ScrapeStatus{}, 66 | }, 67 | { 68 | name: "scrape failed", 69 | job: &config.ScrapeConfig{ 70 | JobName: "job1", 71 | ScrapeTimeout: model.Duration(time.Second * 3), 72 | }, 73 | status: map[uint64]*target.ScrapeStatus{ 74 | 1: {}, 75 | }, 76 | uri: "/metrics?_jobName=job1&_scheme=http&_hash=1", 77 | data: ``, 78 | wantStatusCode: http.StatusBadRequest, 79 | wantTargetStatus: map[uint64]*target.ScrapeStatus{ 80 | 1: { 81 | Health: scrape2.HealthBad, 82 | Series: 0, 83 | TargetState: target.StateInTransfer, 84 | }, 85 | }, 86 | }, 87 | { 88 | name: "scrape success", 89 | job: &config.ScrapeConfig{ 90 | JobName: "job1", 91 | ScrapeTimeout: model.Duration(time.Second * 3), 92 | }, 93 | status: map[uint64]*target.ScrapeStatus{ 94 | 1: {}, 95 | }, 96 | uri: "/metrics?_jobName=job1&_scheme=http&_hash=1", 97 | data: `metrics0{} 1`, 98 | wantStatusCode: http.StatusOK, 99 | wantTargetStatus: map[uint64]*target.ScrapeStatus{ 100 | 1: { 101 | Health: scrape2.HealthGood, 102 | Series: 1, 103 | }, 104 | }, 105 | }, 106 | } 107 | 108 | for _, cs := range cases { 109 | t.Run(cs.name, func(t *testing.T) { 110 | r := require.New(t) 111 | targetServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 112 | if cs.data == "" { 113 | w.WriteHeader(http.StatusBadRequest) 114 | } else { 115 | _, _ = w.Write([]byte(cs.data)) 116 | } 117 | })) 118 | defer targetServer.Close() 119 | 120 | p := NewProxy( 121 | func(jobName string) *scrape.JobInfo { 122 | if cs.job == nil { 123 | return nil 124 | } 125 | return &scrape.JobInfo{ 126 | Config: cs.job, 127 | Cli: http.DefaultClient, 128 | } 129 | }, 130 | func() map[uint64]*target.ScrapeStatus { 131 | return cs.status 132 | }, 133 | func() *prom.ConfigInfo { 134 | return prom.DefaultConfig 135 | }, 136 | prometheus.NewRegistry(), 137 | logrus.New()) 138 | 139 | req := httptest.NewRequest(http.MethodGet, targetServer.URL+cs.uri, strings.NewReader("")) 140 | w := httptest.NewRecorder() 141 | p.ServeHTTP(w, req) 142 | 143 | result := w.Result() 144 | r.Equal(cs.wantStatusCode, result.StatusCode) 145 | if cs.data != `` { 146 | d, err := ioutil.ReadAll(result.Body) 147 | r.NoError(err) 148 | r.Equal(string(d), cs.data) 149 | } 150 | 151 | if len(cs.wantTargetStatus) != 0 { 152 | r.Equal(cs.wantTargetStatus[1].Series, cs.status[1].Series) 153 | r.Equal(cs.wantTargetStatus[1].Health, cs.status[1].Health) 154 | } 155 | }) 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /pkg/sidecar/targets.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package sidecar 19 | 20 | import ( 21 | "encoding/json" 22 | "fmt" 23 | "io/ioutil" 24 | "os" 25 | "path" 26 | "time" 27 | 28 | "github.com/pkg/errors" 29 | "github.com/prometheus/client_golang/prometheus" 30 | "github.com/sirupsen/logrus" 31 | "tkestack.io/kvass/pkg/shard" 32 | "tkestack.io/kvass/pkg/target" 33 | "tkestack.io/kvass/pkg/utils/types" 34 | ) 35 | 36 | var ( 37 | storeFileName = "kvass-shard.json" 38 | oldVersionStoreFileName = "targets.json" 39 | timeNow = time.Now 40 | 41 | targetsUpdatedTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 42 | Name: "kvass_sidecar_targets_updated_total", 43 | }, []string{"success"}) 44 | 45 | targetsTotal = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 46 | Name: "kvass_sidecar_targets_total", 47 | }, []string{}) 48 | ) 49 | 50 | // TargetsInfo contains all current targets 51 | type TargetsInfo struct { 52 | // Targets is all targets this shard scraping 53 | Targets map[string][]*target.Target 54 | // IdleAt is the time this shard has no scraping targets 55 | // IdleAt is nil if at lease one target is scraping 56 | IdleAt *time.Time 57 | // Status is the runtime status of all targets 58 | Status map[uint64]*target.ScrapeStatus `json:"-"` 59 | } 60 | 61 | func newTargetsInfo() TargetsInfo { 62 | return TargetsInfo{ 63 | Targets: map[string][]*target.Target{}, 64 | Status: map[uint64]*target.ScrapeStatus{}, 65 | } 66 | } 67 | 68 | // TargetsManager manager local targets of this shard 69 | type TargetsManager struct { 70 | targets TargetsInfo 71 | updateCallbacks []func(targets map[string][]*target.Target) error 72 | storeDir string 73 | log logrus.FieldLogger 74 | } 75 | 76 | // NewTargetsManager return a new target manager 77 | func NewTargetsManager(storeDir string, promRegistry prometheus.Registerer, log logrus.FieldLogger) *TargetsManager { 78 | _ = promRegistry.Register(targetsTotal) 79 | _ = promRegistry.Register(targetsUpdatedTotal) 80 | return &TargetsManager{ 81 | storeDir: storeDir, 82 | log: log, 83 | targets: newTargetsInfo(), 84 | } 85 | } 86 | 87 | // Load load local targets information from storeDir 88 | func (t *TargetsManager) Load() error { 89 | _ = os.MkdirAll(t.storeDir, 0755) 90 | defer func() { 91 | _ = t.UpdateTargets(&shard.UpdateTargetsRequest{Targets: t.targets.Targets}) 92 | }() 93 | 94 | data, err := ioutil.ReadFile(t.storePath()) 95 | if err == nil { 96 | if err := json.Unmarshal(data, &t.targets); err != nil { 97 | return errors.Wrapf(err, "marshal %s", storeFileName) 98 | } 99 | } else { 100 | if !os.IsNotExist(err) { 101 | return errors.Wrapf(err, "load %s failed", storeFileName) 102 | } 103 | // compatible old version 104 | data, err := ioutil.ReadFile(path.Join(t.storeDir, oldVersionStoreFileName)) 105 | if err != nil { 106 | if os.IsNotExist(err) { 107 | return nil 108 | } 109 | return errors.Wrapf(err, "load %s failed", oldVersionStoreFileName) 110 | } 111 | 112 | if err := json.Unmarshal(data, &t.targets.Targets); err != nil { 113 | return errors.Wrapf(err, "marshal targets.json") 114 | } 115 | } 116 | 117 | return nil 118 | } 119 | 120 | // AddUpdateCallbacks add a call back for targets updating event 121 | func (t *TargetsManager) AddUpdateCallbacks(f ...func(targets map[string][]*target.Target) error) { 122 | t.updateCallbacks = append(t.updateCallbacks, f...) 123 | } 124 | 125 | // UpdateTargets update local targets 126 | func (t *TargetsManager) UpdateTargets(req *shard.UpdateTargetsRequest) (err error) { 127 | defer func() { 128 | targetsUpdatedTotal.WithLabelValues(fmt.Sprint(err == nil)).Inc() 129 | targetsTotal.WithLabelValues().Set(float64(len(t.targets.Status))) 130 | }() 131 | 132 | t.targets.Targets = req.Targets 133 | t.updateStatus() 134 | t.updateIdleState() 135 | 136 | if err := t.doCallbacks(); err != nil { 137 | return errors.Wrapf(err, "do callbacks") 138 | } 139 | 140 | return errors.Wrapf(t.saveTargets(), "save targets to file") 141 | } 142 | 143 | func (t *TargetsManager) updateIdleState() { 144 | if len(t.targets.Status) == 0 && t.targets.IdleAt == nil { 145 | t.targets.IdleAt = types.TimePtr(timeNow()) 146 | } 147 | 148 | if len(t.targets.Status) != 0 { 149 | t.targets.IdleAt = nil 150 | } 151 | } 152 | 153 | func (t *TargetsManager) updateStatus() { 154 | status := map[uint64]*target.ScrapeStatus{} 155 | for job, ts := range t.targets.Targets { 156 | for _, tar := range ts { 157 | if t.targets.Status[tar.Hash] == nil { 158 | status[tar.Hash] = target.NewScrapeStatus(tar.Series, tar.TotalSeries) 159 | } else { 160 | status[tar.Hash] = t.targets.Status[tar.Hash] 161 | } 162 | if status[tar.Hash].TargetState == target.StateNormal && tar.TargetState == target.StateInTransfer { 163 | t.log.Infof("%s/%s begin transfer", job, tar.NoParamURL()) 164 | status[tar.Hash].ScrapeTimes = 0 165 | } 166 | 167 | status[tar.Hash].TargetState = tar.TargetState 168 | } 169 | } 170 | t.targets.Status = status 171 | } 172 | 173 | func (t *TargetsManager) doCallbacks() error { 174 | for _, call := range t.updateCallbacks { 175 | if err := call(t.targets.Targets); err != nil { 176 | return err 177 | } 178 | } 179 | return nil 180 | } 181 | 182 | func (t *TargetsManager) saveTargets() error { 183 | data, _ := json.Marshal(&t.targets) 184 | if err := ioutil.WriteFile(t.storePath(), data, 0755); err != nil { 185 | return err 186 | } 187 | return nil 188 | } 189 | 190 | func (t *TargetsManager) storePath() string { 191 | return path.Join(t.storeDir, storeFileName) 192 | } 193 | 194 | // TargetsInfo return current targets of this shard 195 | func (t *TargetsManager) TargetsInfo() TargetsInfo { 196 | return t.targets 197 | } 198 | -------------------------------------------------------------------------------- /pkg/target/status.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package target 19 | 20 | import ( 21 | "time" 22 | 23 | kscrape "tkestack.io/kvass/pkg/scrape" 24 | 25 | "github.com/prometheus/prometheus/scrape" 26 | ) 27 | 28 | // ScrapeStatus contains last scraping status of the target 29 | type ScrapeStatus struct { 30 | // LastError save the error string if last scraping is error 31 | LastError string `json:"lastError"` 32 | // LastScrape save the time of last scraping 33 | LastScrape time.Time `json:"lastScrape"` 34 | // LastScrapeDuration save the seconds duration last scraping spend 35 | LastScrapeDuration float64 `json:"lastScrapeDuration"` 36 | // Health it the status of last scraping 37 | Health scrape.TargetHealth `json:"health"` 38 | // Series is the avg load of last 3 times scraping, metrics_relabel_configs will be process 39 | Series int64 `json:"series"` 40 | // TotalSeries is the total series in last scraping, without metrics_relabel_configs 41 | TotalSeries int64 `json:"totalSeries"` 42 | // TargetState indicate current state of this target 43 | TargetState string `json:"TargetState"` 44 | // ScrapeTimes is the times target scraped by this shard 45 | ScrapeTimes uint64 `json:"ScrapeTimes"` 46 | // Shards contains ID of shards that is scraping this target 47 | Shards []string `json:"shards"` 48 | // LastScrapeStatistics is samples statistics of last scrape 49 | LastScrapeStatistics *kscrape.StatisticsSeriesResult `json:"-"` 50 | lastSeries []int64 51 | } 52 | 53 | // SetScrapeErr mark the result of this scraping 54 | // health will be down if err is not nil 55 | // health will be up if err is nil 56 | func (t *ScrapeStatus) SetScrapeErr(start time.Time, err error) { 57 | t.LastScrape = start 58 | t.LastScrapeDuration = time.Since(start).Seconds() 59 | if err == nil { 60 | t.LastError = "" 61 | t.Health = scrape.HealthGood 62 | } else { 63 | t.LastError = err.Error() 64 | t.Health = scrape.HealthBad 65 | } 66 | } 67 | 68 | // NewScrapeStatus create a new ScrapeStatus with referential series 69 | func NewScrapeStatus(series, total int64) *ScrapeStatus { 70 | return &ScrapeStatus{ 71 | Series: series, 72 | TotalSeries: total, 73 | Health: scrape.HealthUnknown, 74 | LastScrapeStatistics: kscrape.NewStatisticsSeriesResult(), 75 | } 76 | } 77 | 78 | // UpdateScrapeResult statistic target samples info 79 | func (t *ScrapeStatus) UpdateScrapeResult(r *kscrape.StatisticsSeriesResult) { 80 | if len(t.lastSeries) < 3 { 81 | t.lastSeries = append(t.lastSeries, int64(r.ScrapedTotal)) 82 | } else { 83 | newSeries := make([]int64, 0) 84 | newSeries = append(newSeries, t.lastSeries[1:]...) 85 | newSeries = append(newSeries, int64(r.ScrapedTotal)) 86 | t.lastSeries = newSeries 87 | } 88 | 89 | total := int64(0) 90 | for _, i := range t.lastSeries { 91 | total += i 92 | } 93 | 94 | t.Series = int64(float64(total) / float64(len(t.lastSeries))) 95 | t.TotalSeries = int64(r.Total) 96 | t.LastScrapeStatistics = r 97 | } 98 | -------------------------------------------------------------------------------- /pkg/target/status_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package target 19 | 20 | import ( 21 | "fmt" 22 | "testing" 23 | "time" 24 | 25 | "github.com/prometheus/prometheus/scrape" 26 | "github.com/stretchr/testify/require" 27 | kscrape "tkestack.io/kvass/pkg/scrape" 28 | ) 29 | 30 | func TestScrapeStatus_SetScrapeErr(t *testing.T) { 31 | r := require.New(t) 32 | st := NewScrapeStatus(10, 10) 33 | r.Equal(int64(10), st.Series) 34 | st.SetScrapeErr(time.Now(), nil) 35 | r.Equal(scrape.HealthGood, st.Health) 36 | r.Equal("", st.LastError) 37 | 38 | st = NewScrapeStatus(10, 10) 39 | st.SetScrapeErr(time.Now(), fmt.Errorf("test")) 40 | r.Equal(scrape.HealthBad, st.Health) 41 | r.Equal("test", st.LastError) 42 | } 43 | 44 | func TestScrapeStatus_UpdateSamples(t *testing.T) { 45 | r := require.New(t) 46 | st := NewScrapeStatus(1, 10) 47 | rs := kscrape.NewStatisticsSeriesResult() 48 | rs.ScrapedTotal = 2 49 | st.UpdateScrapeResult(rs) 50 | st.UpdateScrapeResult(rs) 51 | st.UpdateScrapeResult(rs) 52 | st.UpdateScrapeResult(rs) 53 | r.Equal(int64(2), st.Series) 54 | } 55 | -------------------------------------------------------------------------------- /pkg/target/target.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package target 19 | 20 | import ( 21 | "net/url" 22 | "strings" 23 | 24 | "github.com/prometheus/common/model" 25 | "github.com/prometheus/prometheus/config" 26 | "github.com/prometheus/prometheus/model/labels" 27 | ) 28 | 29 | const ( 30 | // PrefixForInvalidLabelName is a prefix string for mark invalid label name become valid 31 | PrefixForInvalidLabelName = model.ReservedLabelPrefix + "invalid_label_" 32 | 33 | // StateNormal indicates this target is scraping normally 34 | StateNormal = "" 35 | // StateInTransfer indicate this target is in transfer process 36 | StateInTransfer = "in_transfer" 37 | ) 38 | 39 | // Target is a target generate prometheus config 40 | type Target struct { 41 | // Hash is calculated from origin labels before relabel_configs process and the URL of this target 42 | // see prometheus scrape.Target.hash 43 | Hash uint64 `json:"hash"` 44 | // Labels is result of relabel_configs process 45 | Labels labels.Labels `json:"labels"` 46 | // Series is reference series of this target, may from target explorer 47 | Series int64 `json:"series"` 48 | // TotalSeries is the total series in last scraping, without metrics_relabel_configs 49 | TotalSeries int64 `json:"totalSeries"` 50 | // TargetState indicate current state of this target 51 | TargetState string `json:"TargetState"` 52 | } 53 | 54 | // Address return the address from labels 55 | func (t *Target) Address() string { 56 | for _, v := range t.Labels { 57 | if v.Name == model.AddressLabel { 58 | return v.Value 59 | } 60 | } 61 | return "" 62 | } 63 | 64 | // NoReservedLabel return the labels without reserved prefix "__" 65 | func (t *Target) NoReservedLabel() labels.Labels { 66 | lset := make(labels.Labels, 0, len(t.Labels)) 67 | for _, l := range t.Labels { 68 | if !strings.HasPrefix(l.Name, model.ReservedLabelPrefix) { 69 | lset = append(lset, l) 70 | } 71 | } 72 | return lset 73 | } 74 | 75 | // NoParamURL return a url without params 76 | func (t *Target) NoParamURL() *url.URL { 77 | return &url.URL{ 78 | Scheme: t.Labels.Get(model.SchemeLabel), 79 | Host: t.Labels.Get(model.AddressLabel), 80 | Path: t.Labels.Get(model.MetricsPathLabel), 81 | } 82 | } 83 | 84 | // URL return the full url of this target, the params of cfg will be add to url 85 | func (t *Target) URL(cfg *config.ScrapeConfig) *url.URL { 86 | params := url.Values{} 87 | 88 | for k, v := range cfg.Params { 89 | params[k] = make([]string, len(v)) 90 | copy(params[k], v) 91 | } 92 | for _, l := range t.Labels { 93 | if !strings.HasPrefix(l.Name, model.ParamLabelPrefix) { 94 | continue 95 | } 96 | ks := l.Name[len(model.ParamLabelPrefix):] 97 | 98 | if len(params[ks]) > 0 { 99 | params[ks][0] = l.Value 100 | } else { 101 | params[ks] = []string{l.Value} 102 | } 103 | } 104 | 105 | return &url.URL{ 106 | Scheme: t.Labels.Get(model.SchemeLabel), 107 | Host: t.Labels.Get(model.AddressLabel), 108 | Path: t.Labels.Get(model.MetricsPathLabel), 109 | RawQuery: params.Encode(), 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /pkg/target/target_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package target 19 | 20 | import ( 21 | "github.com/prometheus/common/model" 22 | "github.com/prometheus/prometheus/config" 23 | "github.com/prometheus/prometheus/model/labels" 24 | "github.com/stretchr/testify/require" 25 | "testing" 26 | ) 27 | 28 | func TestTarget_Address(t *testing.T) { 29 | tar := &Target{ 30 | Labels: labels.Labels{ 31 | { 32 | Name: model.AddressLabel, 33 | Value: "123", 34 | }, 35 | }, 36 | } 37 | require.Equal(t, "123", tar.Address()) 38 | } 39 | 40 | func TestTarget_NoParamURL(t *testing.T) { 41 | tar := &Target{ 42 | Hash: 0, 43 | Labels: labels.Labels{ 44 | { 45 | Name: model.AddressLabel, 46 | Value: "127.0.0.1:80", 47 | }, 48 | { 49 | Name: model.SchemeLabel, 50 | Value: "http", 51 | }, 52 | { 53 | Name: model.MetricsPathLabel, 54 | Value: "/metrics", 55 | }, 56 | { 57 | Name: model.ParamLabelPrefix + "test", 58 | Value: "test", 59 | }, 60 | }, 61 | } 62 | require.Equal(t, "http://127.0.0.1:80/metrics", tar.NoParamURL().String()) 63 | } 64 | 65 | func TestTarget_URL(t *testing.T) { 66 | cfg := &config.ScrapeConfig{ 67 | Params: map[string][]string{ 68 | "t1": {"v1"}, 69 | }, 70 | } 71 | 72 | tar := &Target{ 73 | Hash: 0, 74 | Labels: labels.Labels{ 75 | { 76 | Name: model.AddressLabel, 77 | Value: "127.0.0.1:80", 78 | }, 79 | { 80 | Name: model.SchemeLabel, 81 | Value: "http", 82 | }, 83 | { 84 | Name: model.MetricsPathLabel, 85 | Value: "/metrics", 86 | }, 87 | { 88 | Name: model.ParamLabelPrefix + "t2", 89 | Value: "v2", 90 | }, 91 | }, 92 | } 93 | require.Equal(t, "http://127.0.0.1:80/metrics?t1=v1&t2=v2", tar.URL(cfg).String()) 94 | } 95 | 96 | func TestTarget_NoReservedLabel(t *testing.T) { 97 | tar := &Target{ 98 | Hash: 0, 99 | Labels: labels.Labels{ 100 | { 101 | Name: model.AddressLabel, 102 | Value: "127.0.0.1:80", 103 | }, 104 | { 105 | Name: "instance", 106 | Value: "a", 107 | }, 108 | }, 109 | } 110 | lb := tar.NoReservedLabel() 111 | require.Equal(t, 1, len(lb)) 112 | } 113 | -------------------------------------------------------------------------------- /pkg/utils/encode/encode.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package encode 19 | 20 | import ( 21 | "crypto/md5" 22 | "github.com/gobuffalo/packr/v2/file/resolver/encoding/hex" 23 | ) 24 | 25 | // Md5 return the md5 code of bytes 26 | func Md5(b []byte) string { 27 | h := md5.New() 28 | h.Write(b) 29 | return hex.EncodeToString(h.Sum(nil)) 30 | } 31 | -------------------------------------------------------------------------------- /pkg/utils/encode/encode_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package encode 19 | 20 | import ( 21 | "github.com/stretchr/testify/require" 22 | "testing" 23 | ) 24 | 25 | func TestMd5(t *testing.T) { 26 | data := `test` 27 | md5 := Md5([]byte(data)) 28 | require.Equal(t, "098f6bcd4621d373cade4e832627b4f6", md5) 29 | } 30 | -------------------------------------------------------------------------------- /pkg/utils/k8sutil/status.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package k8sutil 19 | 20 | import v1 "k8s.io/api/core/v1" 21 | 22 | // IsPodReady return true if Pod conditions is ready 23 | func IsPodReady(p *v1.Pod) bool { 24 | for _, c := range p.Status.Conditions { 25 | if c.Type == v1.PodReady && c.Status == v1.ConditionTrue { 26 | return true 27 | } 28 | } 29 | return false 30 | } 31 | -------------------------------------------------------------------------------- /pkg/utils/k8sutil/status_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package k8sutil 19 | 20 | import ( 21 | "github.com/stretchr/testify/require" 22 | v1 "k8s.io/api/core/v1" 23 | "testing" 24 | ) 25 | 26 | func TestIsPodReady(t *testing.T) { 27 | r := require.New(t) 28 | p := &v1.Pod{} 29 | r.False(IsPodReady(p)) 30 | p.Status.Conditions = []v1.PodCondition{ 31 | { 32 | Type: v1.PodReady, 33 | Status: v1.ConditionTrue, 34 | }, 35 | } 36 | r.True(IsPodReady(p)) 37 | } 38 | -------------------------------------------------------------------------------- /pkg/utils/test/format.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package test 19 | 20 | import ( 21 | "encoding/json" 22 | "gopkg.in/yaml.v2" 23 | ) 24 | 25 | // MustJSON marshal obj to json string, a panic will be thrown if marshal failed 26 | func MustJSON(obj interface{}) string { 27 | if obj == nil { 28 | return "" 29 | } 30 | data, err := json.Marshal(obj) 31 | if err != nil { 32 | panic(err) 33 | } 34 | return string(data) 35 | } 36 | 37 | // MustYAMLV2 marshal obj to yaml string, a panic will be thrown if marshal failed 38 | func MustYAMLV2(obj interface{}) string { 39 | data, err := yaml.Marshal(obj) 40 | if err != nil { 41 | panic(err) 42 | } 43 | return string(data) 44 | } 45 | 46 | // CopyJSON copy object via json marshal 47 | func CopyJSON(dst, from interface{}) error { 48 | data, err := json.Marshal(from) 49 | if err != nil { 50 | return err 51 | } 52 | return json.Unmarshal(data, dst) 53 | } 54 | -------------------------------------------------------------------------------- /pkg/utils/test/format_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package test 19 | 20 | import ( 21 | "testing" 22 | 23 | "github.com/stretchr/testify/require" 24 | ) 25 | 26 | func TestMustJSON(t *testing.T) { 27 | var a = struct { 28 | A string 29 | }{ 30 | A: "test", 31 | } 32 | 33 | require.Equal(t, `{"A":"test"}`, MustJSON(&a)) 34 | require.Equal(t, "", MustJSON(nil)) 35 | } 36 | 37 | func TestMustYAMLV2(t *testing.T) { 38 | var a = struct { 39 | A string 40 | }{ 41 | A: "test", 42 | } 43 | 44 | require.Equal(t, `a: test 45 | `, MustYAMLV2(&a)) 46 | } 47 | 48 | func TestCopyJSON(t *testing.T) { 49 | from := "test" 50 | dst := "" 51 | require.NoError(t, CopyJSON(&dst, &from)) 52 | require.Equal(t, "test", dst) 53 | require.Error(t, CopyJSON(dst, from)) 54 | require.Error(t, CopyJSON(dst, nil)) 55 | } 56 | -------------------------------------------------------------------------------- /pkg/utils/types/pointer.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package types 19 | 20 | import "time" 21 | 22 | // Int32Ptr returns a pointer to an int32 23 | func Int32Ptr(i int32) *int32 { 24 | return &i 25 | } 26 | 27 | // Int64Ptr returns a pointer to an int64 28 | func Int64Ptr(i int64) *int64 { 29 | return &i 30 | } 31 | 32 | // BoolPtr returns a pointer to a bool 33 | func BoolPtr(b bool) *bool { 34 | return &b 35 | } 36 | 37 | // StringPtr returns a pointer to the passed string. 38 | func StringPtr(s string) *string { 39 | return &s 40 | } 41 | 42 | // TimePtr returns a pointer to the passed time. 43 | func TimePtr(time time.Time) *time.Time { 44 | return &time 45 | } 46 | -------------------------------------------------------------------------------- /pkg/utils/types/pointer_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package types 19 | 20 | import ( 21 | "testing" 22 | 23 | "github.com/stretchr/testify/require" 24 | ) 25 | 26 | func TestPtr(t *testing.T) { 27 | i32 := Int32Ptr(1) 28 | require.Equal(t, int32(1), *i32) 29 | 30 | i64 := Int64Ptr(1) 31 | require.Equal(t, int64(1), *i64) 32 | 33 | b := BoolPtr(true) 34 | require.Equal(t, true, *b) 35 | 36 | s := StringPtr("123") 37 | require.Equal(t, "123", *s) 38 | } 39 | -------------------------------------------------------------------------------- /pkg/utils/types/slice.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package types 19 | 20 | import "strings" 21 | 22 | // FindString return true if target in slice, return false if not 23 | func FindString(target string, slice ...string) bool { 24 | for _, str := range slice { 25 | if str == target { 26 | return true 27 | } 28 | } 29 | return false 30 | } 31 | 32 | // FindStringVague return true if target is a sub string of strings in slice, return false if not 33 | func FindStringVague(target string, slice ...string) bool { 34 | for _, str := range slice { 35 | if strings.Contains(str, target) { 36 | return true 37 | } 38 | } 39 | return false 40 | } 41 | -------------------------------------------------------------------------------- /pkg/utils/types/slice_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package types 19 | 20 | import ( 21 | "testing" 22 | 23 | "github.com/stretchr/testify/require" 24 | ) 25 | 26 | func TestFindString(t *testing.T) { 27 | require.True(t, FindString("1", "1", "2")) 28 | require.False(t, FindString("3", "1", "2")) 29 | } 30 | 31 | func TestFindStringVague(t *testing.T) { 32 | require.True(t, FindStringVague("1", "1", "2")) 33 | require.True(t, FindStringVague("1", "11", "22")) 34 | require.True(t, FindStringVague("/api/v1/shard/runtimeinfo", "/api/v1/shard/runtimeinfo/", "22")) 35 | require.False(t, FindStringVague("3", "1", "2")) 36 | } 37 | -------------------------------------------------------------------------------- /pkg/utils/types/strings.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | import "strings" 4 | 5 | // DeepCopyString deep copy a string with new memory space 6 | func DeepCopyString(s string) string { 7 | var sb strings.Builder 8 | sb.WriteString(s) 9 | return sb.String() 10 | } 11 | -------------------------------------------------------------------------------- /pkg/utils/wait/wait.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package wait 19 | 20 | import ( 21 | "context" 22 | "github.com/sirupsen/logrus" 23 | "time" 24 | ) 25 | 26 | // RunUntil run fc period until ctx is done 27 | func RunUntil(ctx context.Context, log logrus.FieldLogger, interval time.Duration, fc func() error) error { 28 | for { 29 | select { 30 | case <-ctx.Done(): 31 | return nil 32 | default: 33 | } 34 | 35 | if err := fc(); err != nil { 36 | log.Errorf(err.Error()) 37 | } 38 | time.Sleep(interval) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /pkg/utils/wait/wait_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package wait 19 | 20 | import ( 21 | "context" 22 | "fmt" 23 | "testing" 24 | "time" 25 | 26 | "github.com/sirupsen/logrus" 27 | "github.com/stretchr/testify/require" 28 | ) 29 | 30 | func TestRunUntil(t *testing.T) { 31 | ctx, cancel := context.WithTimeout(context.Background(), time.Second) 32 | err := RunUntil(ctx, logrus.New(), time.Second, func() error { 33 | cancel() 34 | return fmt.Errorf("xx") 35 | }) 36 | require.NoError(t, err) 37 | } 38 | --------------------------------------------------------------------------------