├── .gitignore ├── .goreleaser.yml ├── .travis.yml ├── CHANGELOG.md ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── cmd └── discovery │ ├── discovery.toml │ └── main.go ├── codecov.sh ├── conf └── conf.go ├── coverage.txt ├── discovery ├── discovery.go ├── register.go ├── register_test.go └── syncup.go ├── doc ├── api.md ├── arch.md ├── discovery_arch.png ├── discovery_pod_quit.png ├── discovery_pod_start.png ├── discovery_sdk.png ├── discovery_sdk_self.png ├── discovery_wechat.png ├── discovery_zone_arch.png ├── felixhao_wechat.png ├── intro.md ├── practice.md ├── scheduler.md └── sdk.md ├── go.mod ├── go.sum ├── http ├── discovery.go └── http.go ├── install.sh ├── model ├── instance.go ├── node.go └── param.go ├── naming ├── client.go ├── client_test.go ├── example_test.go ├── grpc │ ├── resolver.go │ └── resolver_test.go └── naming.go └── registry ├── guard.go ├── guard_test.go ├── node.go ├── node_test.go ├── nodes.go ├── registry.go ├── registry_test.go └── scheduler.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.dll 4 | *.so 5 | *.dylib 6 | 7 | # Test binary, build with `go test -c` 8 | *.test 9 | 10 | # Output of the go coverage tool, specifically when used with LiteIDE 11 | *.out 12 | 13 | # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 14 | .glide/ 15 | 16 | cmd/discovery/discovery 17 | dist/ 18 | configs/ 19 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | # This is an example goreleaser.yaml file with some sane defaults. 2 | # Make sure to check the documentation at http://goreleaser.com 3 | before: 4 | hooks: 5 | # you may remove this if you don't use vgo 6 | - go mod download 7 | # you may remove this if you don't need go generate 8 | - go generate ./... 9 | - cp cmd/discovery/discovery-example.toml configs/discovery-example.toml 10 | builds: 11 | - 12 | env: 13 | - CGO_ENABLED=0 14 | main: ./cmd/discovery/main.go 15 | binary: discovery 16 | archive: 17 | replacements: 18 | darwin: Darwin 19 | linux: Linux 20 | windows: Windows 21 | 386: i386 22 | amd64: x86_64 23 | files: 24 | - LICENSE 25 | - README.md 26 | - CHANGELOG.md 27 | - configs/* 28 | checksum: 29 | name_template: 'checksums.txt' 30 | snapshot: 31 | name_template: "{{ .Tag }}-next" 32 | changelog: 33 | sort: asc 34 | filters: 35 | exclude: 36 | - '^docs:' 37 | - '^test:' 38 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # This is a weird way of telling Travis to use the fast container-based test 2 | # runner instead of the slow VM-based runner. 3 | sudo: false 4 | 5 | language: go 6 | 7 | # Force-enable Go modules. This will be unnecessary when Go 1.12 lands. 8 | env: 9 | - GO111MODULE=on 10 | 11 | # You don't need to test on very old version of the Go compiler. It's the user's 12 | # responsibility to keep their compilers up to date. 13 | go: 14 | - 1.12 15 | 16 | # Only clone the most recent commit. 17 | git: 18 | depth: 1 19 | 20 | # Skip the install step. Don't `go get` dependencies. Only build with the code 21 | # in vendor/ 22 | install: true 23 | 24 | # Don't email me the results of the test runs. 25 | notifications: 26 | email: false 27 | 28 | # Anything in before_script that returns a nonzero exit code will flunk the 29 | # build and immediately stop. It's sorta like having set -e enabled in bash. 30 | # Make sure golangci-lint is vendored. 31 | before_script: 32 | - curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b $GOPATH/bin 33 | 34 | # script always runs to completion (set +e). If we have linter issues AND a 35 | # failing test, we want to see both. Configure golangci-lint with a 36 | # .golangci.yml file at the top level of your repo. 37 | script: 38 | - sh codecov.sh # Run all the tests with the race detector enabled 39 | 40 | after_success: 41 | - golangci-lint run # run a bunch of code checkers/linters in parallel 42 | - bash <(curl -s https://codecov.io/bash) 43 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | #### Discovery 2 | ##### Version 1.2.1 3 | 1. fix replication set 4 | 2. fix config conflict 5 | 6 | ##### Version 1.2.0 7 | 1. multi zone scheduler 8 | 2. optimize lock scope 9 | 3. return appids which not exist when polls 10 | 11 | ##### Version 1.1.2 12 | 1. fix no LatestTimestamp when register 13 | 2. fix param split bug 14 | 3. update go mod 15 | 16 | ##### Version 1.1.1 17 | 1. fix initproject abort 18 | 19 | ##### Version 1.1.0 20 | 1. use kratos pkg 21 | 2. replace gin by kratos/bm 22 | 3. fix poll return nil when be canceled. 23 | 4. add init protect mode 24 | 5. supoort set. 25 | 26 | ##### Version 1.0.2 27 | 1.fix nodesproc. get all zone nodes. 28 | 29 | ##### Version 1.0.1 30 | 1.rename import path 31 | 32 | ##### Version 1.0.0 33 | 1. discovery register&polls&replica 34 | 2. self-discovering nodes 35 | 3. metadata updates 36 | 4. naming client & grpc resolver 37 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.12.16 as build 2 | RUN mkdir -p /app/building 3 | WORKDIR /app/building 4 | ADD . /app/building 5 | RUN make build 6 | 7 | FROM alpine:3.9.5 8 | # Copy from docker build 9 | COPY --from=build /app/building/dist/bin/discovery /app/bin/ 10 | COPY --from=build /app/building/dist/conf/discovery.toml /app/conf/ 11 | # Copy from local build 12 | #ADD dist/ /app/ 13 | ENV LOG_DIR /app/log 14 | EXPOSE 7171 15 | WORKDIR /app/ 16 | CMD /app/bin/discovery -conf /app/conf/ -confkey discovery.toml 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 bilibili 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Go parameters 2 | GOCMD=GO111MODULE=on CGO_ENABLED=0 go 3 | GOBUILD=$(GOCMD) build 4 | GOTEST=$(GOCMD) test 5 | 6 | all: test build 7 | build: 8 | rm -rf dist/ 9 | mkdir -p dist/conf 10 | cp cmd/discovery/discovery-example.toml dist/conf/discovery.toml 11 | $(GOBUILD) -o dist/bin/discovery cmd/discovery/main.go 12 | 13 | test: 14 | $(GOTEST) -v ./... 15 | 16 | clean: 17 | rm -rf dist/ 18 | 19 | run: 20 | nohup dist/bin/discovery -conf dist/conf -confkey discovery.toml -log.dir dist/log & > dist/nohup.out 21 | 22 | stop: 23 | pkill -f dist/bin/discovery 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Discovery 2 | [![Build Status](https://travis-ci.org/bilibili/discovery.svg?branch=master)](https://travis-ci.org/bilibili/discovery) 3 | [![Go Report Card](https://goreportcard.com/badge/github.com/bilibili/discovery)](https://goreportcard.com/report/github.com/bilibili/discovery) 4 | [![codecov](https://codecov.io/gh/Bilibili/discovery/branch/master/graph/badge.svg)](https://codecov.io/gh/Bilibili/discovery) 5 | 6 | Discovery is a based service that is production-ready and primarily used at [Bilibili](https://www.bilibili.com/) for locating services for the purpose of load balancing and failover of middle-tier servers. 7 | 8 | ## Quick Start 9 | 10 | ### env 11 | 12 | `go1.12.x` (and later) 13 | 14 | ### build 15 | ```shell 16 | cd $GOPATH/src 17 | git clone https://github.com/bilibili/discovery.git 18 | cd discovery/cmd/discovery 19 | go build 20 | ``` 21 | 22 | ### run 23 | ```shell 24 | ./discovery -conf discovery.toml -alsologtostderr 25 | ``` 26 | 27 | `-alsologtostderr` is `glog`'s flag,means print into stderr. If you hope print into file, can use `-log.dir="/tmp"`. [view glog doc](https://godoc.org/github.com/golang/glog). 28 | 29 | ### Configuration 30 | 31 | You can view the comments in `cmd/discovery/discovery.toml` to understand the meaning of the config. 32 | 33 | ### Client 34 | 35 | * [API Doc](doc/api.md) 36 | * [Go SDK](naming/client.go) | [Example](naming/example_test.go) 37 | * [Java SDK](https://github.com/flygit/discoveryJavaSDK) 38 | * [CPP SDK](https://github.com/brpc/brpc/blob/master/src/brpc/policy/discovery_naming_service.cpp) 39 | * [Python SDK](https://github.com/tomwei7/discovery-client) 40 | * [other language](doc/sdk.md) 41 | 42 | ## Intro/Arch/Practice 43 | 44 | * [Introduction](doc/intro.md) 45 | * [Architecture](doc/arch.md) 46 | * [Practice in Bilibili](doc/practice.md) 47 | 48 | ## Feedback 49 | 50 | Please report bugs, concerns, suggestions by issues, or join QQ-group 716486124 to discuss problems around source code. 51 | -------------------------------------------------------------------------------- /cmd/discovery/discovery.toml: -------------------------------------------------------------------------------- 1 | 2 | # 同一discovery集群的所有node节点地址,包含本node 3 | nodes = ["127.0.0.1:7171"] 4 | enableprotect=false 5 | 6 | # 本可用区zone(一般指机房)标识 7 | [env] 8 | region = "sh" 9 | zone = "sh001" 10 | host = "test1" 11 | DeployEnv = "dev" 12 | 13 | 14 | # 其他可用区zone访问host和其标识 15 | # [zones] 16 | # "sh002" = ["10.2.0.10:7171", "10.2.0.11:7171", "10.2.0.12:7171"] 17 | # "sh003" = ["10.3.0.10:7171", "10.3.0.11:7171", "10.3.0.12:7171"] 18 | 19 | # 本节点监听端口 20 | # 注意:ip别配置为0.0.0.0或者127.0.0.1 21 | [httpServer] 22 | addr = "127.0.0.1:7171" 23 | timeout="40s" 24 | 25 | # 当前节点同步其他节点使用的http client 26 | # dial 连接建立超时时间 27 | # keepAlive 连接复用保持时间 28 | [httpClient] 29 | dial = "1s" 30 | keepAlive = "120s" 31 | timeout="40s" 32 | 33 | [log] 34 | stdout = true 35 | -------------------------------------------------------------------------------- /cmd/discovery/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "os" 6 | "os/signal" 7 | "syscall" 8 | "time" 9 | 10 | "github.com/bilibili/discovery/conf" 11 | "github.com/bilibili/discovery/discovery" 12 | "github.com/bilibili/discovery/http" 13 | log "github.com/go-kratos/kratos/pkg/log" 14 | ) 15 | 16 | func main() { 17 | flag.Parse() 18 | if err := conf.Init(); err != nil { 19 | log.Error("conf.Init() error(%v)", err) 20 | panic(err) 21 | } 22 | log.Init(conf.Conf.Log) 23 | dis, cancel := discovery.New(conf.Conf) 24 | http.Init(conf.Conf, dis) 25 | // init signal 26 | c := make(chan os.Signal, 1) 27 | signal.Notify(c, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT) 28 | for { 29 | s := <-c 30 | log.Info("discovery get a signal %s", s.String()) 31 | switch s { 32 | case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT: 33 | cancel() 34 | time.Sleep(time.Second) 35 | log.Info("discovery quit !!!") 36 | return 37 | case syscall.SIGHUP: 38 | default: 39 | return 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /codecov.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | echo "" > coverage.txt 5 | 6 | for d in $(go list ./... | grep -v vendor); do 7 | go test -race -coverprofile=profile.out -covermode=atomic $d 8 | if [ -f profile.out ]; then 9 | cat profile.out >> coverage.txt 10 | rm profile.out 11 | fi 12 | done -------------------------------------------------------------------------------- /conf/conf.go: -------------------------------------------------------------------------------- 1 | package conf 2 | 3 | import ( 4 | "github.com/BurntSushi/toml" 5 | "github.com/go-kratos/kratos/pkg/conf/env" 6 | "github.com/go-kratos/kratos/pkg/conf/paladin" 7 | log "github.com/go-kratos/kratos/pkg/log" 8 | http "github.com/go-kratos/kratos/pkg/net/http/blademaster" 9 | ) 10 | 11 | var ( 12 | configKey = "discovery.toml" 13 | // Conf conf 14 | Conf = &Config{} 15 | ) 16 | 17 | // Env is discovery env. 18 | type Env struct { 19 | Region string 20 | Zone string 21 | Host string 22 | DeployEnv string 23 | } 24 | 25 | // Config config. 26 | type Config struct { 27 | Nodes []string 28 | Zones map[string][]string 29 | HTTPServer *http.ServerConfig 30 | HTTPClient *http.ClientConfig 31 | Env *Env 32 | Log *log.Config 33 | Scheduler []byte 34 | EnableProtect bool 35 | } 36 | 37 | func (c *Config) fix() (err error) { 38 | if c.Env == nil { 39 | c.Env = new(Env) 40 | } 41 | if c.Env.Region == "" { 42 | c.Env.Region = env.Region 43 | } 44 | if c.Env.Zone == "" { 45 | c.Env.Zone = env.Zone 46 | } 47 | if c.Env.Host == "" { 48 | c.Env.Host = env.Hostname 49 | } 50 | if c.Env.DeployEnv == "" { 51 | c.Env.DeployEnv = env.DeployEnv 52 | } 53 | return 54 | } 55 | 56 | // Init init conf 57 | func Init() (err error) { 58 | if err = paladin.Init(); err != nil { 59 | return 60 | } 61 | return paladin.Watch(configKey, Conf) 62 | } 63 | 64 | // Set config setter. 65 | func (c *Config) Set(content string) (err error) { 66 | var tmpConf *Config 67 | if _, err = toml.Decode(content, &tmpConf); err != nil { 68 | log.Error("decode config fail %v", err) 69 | return 70 | } 71 | if err = tmpConf.fix(); err != nil { 72 | return 73 | } 74 | *Conf = *tmpConf 75 | return nil 76 | } 77 | -------------------------------------------------------------------------------- /coverage.txt: -------------------------------------------------------------------------------- 1 | 2 | mode: atomic 3 | mode: atomic 4 | mode: atomic 5 | discovery/discovery/register.go:14.111,16.17 2 22 6 | discovery/discovery/register.go:16.17,18.3 1 0 7 | discovery/discovery/register.go:22.98,24.9 2 4 8 | discovery/discovery/register.go:29.2,29.22 1 3 9 | discovery/discovery/register.go:33.2,33.43 1 2 10 | discovery/discovery/register.go:38.2,38.8 1 2 11 | discovery/discovery/register.go:24.9,28.3 3 1 12 | discovery/discovery/register.go:29.22,32.3 2 1 13 | discovery/discovery/register.go:33.43,35.3 1 1 14 | discovery/discovery/register.go:35.8,35.50 1 1 15 | discovery/discovery/register.go:35.50,37.3 1 1 16 | discovery/discovery/register.go:42.81,44.9 2 12 17 | discovery/discovery/register.go:49.2,49.22 1 11 18 | discovery/discovery/register.go:52.2,52.8 1 11 19 | discovery/discovery/register.go:44.9,48.3 3 1 20 | discovery/discovery/register.go:49.22,51.3 1 11 21 | discovery/discovery/register.go:56.83,58.2 1 1 22 | discovery/discovery/register.go:61.105,63.2 1 4 23 | discovery/discovery/register.go:66.116,68.34 2 1 24 | discovery/discovery/register.go:76.2,76.8 1 1 25 | discovery/discovery/register.go:68.34,70.17 2 2 26 | discovery/discovery/register.go:74.3,74.16 1 2 27 | discovery/discovery/register.go:70.17,72.12 2 0 28 | discovery/discovery/register.go:80.129,82.2 1 6 29 | discovery/discovery/register.go:85.51,87.2 1 0 30 | discovery/discovery/register.go:90.66,92.2 1 1 31 | discovery/discovery/register.go:95.75,96.26 1 1 32 | discovery/discovery/register.go:99.2,99.8 1 1 33 | discovery/discovery/register.go:96.26,98.3 1 0 34 | discovery/discovery/syncup.go:22.30,24.40 2 11 35 | discovery/discovery/syncup.go:48.2,48.12 1 11 36 | discovery/discovery/syncup.go:24.40,25.30 1 22 37 | discovery/discovery/syncup.go:28.3,33.74 3 11 38 | discovery/discovery/syncup.go:37.3,37.20 1 1 39 | discovery/discovery/syncup.go:41.3,41.31 1 1 40 | discovery/discovery/syncup.go:25.30,26.12 1 11 41 | discovery/discovery/syncup.go:33.74,35.12 2 10 42 | discovery/discovery/syncup.go:37.20,39.12 2 0 43 | discovery/discovery/syncup.go:41.31,42.25 1 0 44 | discovery/discovery/syncup.go:42.25,44.5 1 0 45 | discovery/discovery/syncup.go:51.50,71.12 5 9 46 | discovery/discovery/syncup.go:100.2,100.15 1 9 47 | discovery/discovery/syncup.go:71.12,74.7 3 9 48 | discovery/discovery/syncup.go:74.7,75.11 1 9 49 | discovery/discovery/syncup.go:76.20,83.78 2 0 50 | discovery/discovery/syncup.go:86.22,93.63 2 9 51 | discovery/discovery/syncup.go:96.5,96.11 1 9 52 | discovery/discovery/syncup.go:83.78,85.6 1 0 53 | discovery/discovery/syncup.go:93.63,95.6 1 0 54 | discovery/discovery/syncup.go:103.33,107.6 2 9 55 | discovery/discovery/syncup.go:107.6,116.46 3 19 56 | discovery/discovery/syncup.go:121.3,123.24 3 19 57 | discovery/discovery/syncup.go:126.3,130.37 2 10 58 | discovery/discovery/syncup.go:144.3,152.65 9 10 59 | discovery/discovery/syncup.go:116.46,119.12 3 0 60 | discovery/discovery/syncup.go:123.24,125.4 1 8 61 | discovery/discovery/syncup.go:130.37,131.27 1 10 62 | discovery/discovery/syncup.go:131.27,132.35 1 11 63 | discovery/discovery/syncup.go:132.35,134.42 2 11 64 | discovery/discovery/syncup.go:134.42,135.30 1 11 65 | discovery/discovery/syncup.go:135.30,137.8 1 11 66 | discovery/discovery/syncup.go:137.13,139.8 1 0 67 | discovery/discovery/discovery.go:21.68,32.2 6 9 68 | mode: atomic 69 | mode: atomic 70 | mode: atomic 71 | mode: atomic 72 | mode: atomic 73 | -------------------------------------------------------------------------------- /discovery/discovery.go: -------------------------------------------------------------------------------- 1 | package discovery 2 | 3 | import ( 4 | "context" 5 | "sync/atomic" 6 | "time" 7 | 8 | "github.com/bilibili/discovery/conf" 9 | "github.com/bilibili/discovery/registry" 10 | http "github.com/go-kratos/kratos/pkg/net/http/blademaster" 11 | ) 12 | 13 | // Discovery discovery. 14 | type Discovery struct { 15 | c *conf.Config 16 | protected bool 17 | client *http.Client 18 | registry *registry.Registry 19 | nodes atomic.Value 20 | } 21 | 22 | // New get a discovery. 23 | func New(c *conf.Config) (d *Discovery, cancel context.CancelFunc) { 24 | d = &Discovery{ 25 | protected: c.EnableProtect, 26 | c: c, 27 | client: http.NewClient(c.HTTPClient), 28 | registry: registry.NewRegistry(c), 29 | } 30 | d.nodes.Store(registry.NewNodes(c)) 31 | d.syncUp() 32 | cancel = d.regSelf() 33 | go d.nodesproc() 34 | go d.exitProtect() 35 | return 36 | } 37 | 38 | func (d *Discovery) exitProtect() { 39 | // exist protect mode after two renew cycle 40 | time.Sleep(time.Second * 60) 41 | d.protected = false 42 | } 43 | -------------------------------------------------------------------------------- /discovery/register.go: -------------------------------------------------------------------------------- 1 | package discovery 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/bilibili/discovery/model" 7 | "github.com/bilibili/discovery/registry" 8 | "github.com/go-kratos/kratos/pkg/ecode" 9 | log "github.com/go-kratos/kratos/pkg/log" 10 | ) 11 | 12 | // Register a new instance. 13 | func (d *Discovery) Register(c context.Context, ins *model.Instance, latestTimestamp int64, replication bool, fromzone bool) { 14 | _ = d.registry.Register(ins, latestTimestamp) 15 | if !replication { 16 | _ = d.nodes.Load().(*registry.Nodes).Replicate(c, model.Register, ins, fromzone) 17 | } 18 | } 19 | 20 | // Renew marks the given instance of the given app name as renewed, and also marks whether it originated from replication. 21 | func (d *Discovery) Renew(c context.Context, arg *model.ArgRenew) (i *model.Instance, err error) { 22 | i, ok := d.registry.Renew(arg) 23 | if !ok { 24 | err = ecode.NothingFound 25 | log.Error("renew appid(%s) hostname(%s) zone(%s) env(%s) error", arg.AppID, arg.Hostname, arg.Zone, arg.Env) 26 | return 27 | } 28 | if !arg.Replication { 29 | _ = d.nodes.Load().(*registry.Nodes).Replicate(c, model.Renew, i, arg.Zone != d.c.Env.Zone) 30 | return 31 | } 32 | if arg.DirtyTimestamp > i.DirtyTimestamp { 33 | err = ecode.NothingFound 34 | } else if arg.DirtyTimestamp < i.DirtyTimestamp { 35 | err = ecode.Conflict 36 | } 37 | return 38 | } 39 | 40 | // Cancel cancels the registration of an instance. 41 | func (d *Discovery) Cancel(c context.Context, arg *model.ArgCancel) (err error) { 42 | i, ok := d.registry.Cancel(arg) 43 | if !ok { 44 | err = ecode.NothingFound 45 | log.Error("cancel appid(%s) hostname(%s) error", arg.AppID, arg.Hostname) 46 | return 47 | } 48 | if !arg.Replication { 49 | _ = d.nodes.Load().(*registry.Nodes).Replicate(c, model.Cancel, i, arg.Zone != d.c.Env.Zone) 50 | } 51 | return 52 | } 53 | 54 | // FetchAll fetch all instances of all the department. 55 | func (d *Discovery) FetchAll(c context.Context) (im map[string][]*model.Instance) { 56 | return d.registry.FetchAll() 57 | } 58 | 59 | // Fetch fetch all instances by appid. 60 | func (d *Discovery) Fetch(c context.Context, arg *model.ArgFetch) (info *model.InstanceInfo, err error) { 61 | return d.registry.Fetch(arg.Zone, arg.Env, arg.AppID, 0, arg.Status) 62 | } 63 | 64 | // Fetchs fetch multi app by appids. 65 | func (d *Discovery) Fetchs(c context.Context, arg *model.ArgFetchs) (is map[string]*model.InstanceInfo, err error) { 66 | is = make(map[string]*model.InstanceInfo, len(arg.AppID)) 67 | for _, appid := range arg.AppID { 68 | i, err := d.registry.Fetch(arg.Zone, arg.Env, appid, 0, arg.Status) 69 | if err != nil { 70 | log.Error("Fetchs fetch appid(%v) err", err) 71 | continue 72 | } 73 | is[appid] = i 74 | } 75 | return 76 | } 77 | 78 | // Polls hangs request and then write instances when that has changes, or return NotModified. 79 | func (d *Discovery) Polls(c context.Context, arg *model.ArgPolls) (ch chan map[string]*model.InstanceInfo, new bool, miss []string, err error) { 80 | return d.registry.Polls(arg) 81 | } 82 | 83 | // DelConns delete conn of host in appid 84 | func (d *Discovery) DelConns(arg *model.ArgPolls) { 85 | d.registry.DelConns(arg) 86 | } 87 | 88 | // Nodes get all nodes of discovery. 89 | func (d *Discovery) Nodes(c context.Context) (nsi []*model.Node) { 90 | return d.nodes.Load().(*registry.Nodes).Nodes() 91 | } 92 | 93 | // Set set metadata,color,status of instance. 94 | func (d *Discovery) Set(c context.Context, arg *model.ArgSet) (err error) { 95 | if !d.registry.Set(arg) { 96 | err = ecode.RequestErr 97 | } 98 | if !arg.Replication { 99 | d.nodes.Load().(*registry.Nodes).ReplicateSet(c, arg, arg.FromZone) 100 | } 101 | return 102 | } 103 | -------------------------------------------------------------------------------- /discovery/register_test.go: -------------------------------------------------------------------------------- 1 | package discovery 2 | 3 | import ( 4 | "context" 5 | "flag" 6 | "os" 7 | "strings" 8 | "testing" 9 | "time" 10 | 11 | dc "github.com/bilibili/discovery/conf" 12 | "github.com/bilibili/discovery/model" 13 | 14 | "github.com/go-kratos/kratos/pkg/conf/paladin" 15 | "github.com/go-kratos/kratos/pkg/ecode" 16 | http "github.com/go-kratos/kratos/pkg/net/http/blademaster" 17 | xtime "github.com/go-kratos/kratos/pkg/time" 18 | . "github.com/smartystreets/goconvey/convey" 19 | gock "gopkg.in/h2non/gock.v1" 20 | ) 21 | 22 | var ( 23 | ctx = context.TODO() 24 | reg = defRegisArg() 25 | rew = &model.ArgRenew{AppID: "main.arch.test", Hostname: "test1", Zone: "sh001", Env: "pre"} 26 | rew2 = &model.ArgRenew{AppID: "main.arch.test", Hostname: "test1", Zone: "sh001", Env: "pre"} 27 | cancel = &model.ArgCancel{AppID: "main.arch.test", Hostname: "test1", Zone: "sh001", Env: "pre"} 28 | fet = &model.ArgFetch{AppID: "main.arch.test", Zone: "sh001", Env: "pre", Status: 1} 29 | set = &model.ArgSet{AppID: "main.arch.test", 30 | Zone: "sh001", Env: "pre", 31 | Hostname: []string{"test1"}, 32 | Status: []int64{1}, 33 | } 34 | pollArg = newPoll() 35 | ) 36 | 37 | func TestMain(m *testing.M) { 38 | flag.Set("conf", "./") 39 | flag.Parse() 40 | paladin.Init() 41 | m.Run() 42 | os.Exit(0) 43 | } 44 | func newFetchArg() *model.ArgFetchs { 45 | return &model.ArgFetchs{AppID: []string{"main.arch.test"}, Zone: "sh001", Env: "pre", Status: 1} 46 | } 47 | func newPoll() *model.ArgPolls { 48 | return &model.ArgPolls{ 49 | Env: "pre", 50 | AppID: []string{"main.arch.test"}, 51 | LatestTimestamp: []int64{0}, 52 | } 53 | } 54 | func defRegisArg() *model.ArgRegister { 55 | return &model.ArgRegister{ 56 | AppID: "main.arch.test", 57 | Hostname: "test1", 58 | Zone: "sh001", 59 | Env: "pre", 60 | Status: 1, 61 | Metadata: `{"test":"test","weight":"10"}`, 62 | LatestTimestamp: time.Now().UnixNano(), 63 | } 64 | } 65 | func defRegDiscovery() *model.Instance { 66 | return &model.Instance{ 67 | AppID: "infra.discovery", 68 | Hostname: "test2", 69 | Zone: "sh001", 70 | Env: "pre", 71 | Status: 1, 72 | Addrs: []string{"http://127.0.0.1:7172"}, 73 | LatestTimestamp: time.Now().UnixNano(), 74 | } 75 | } 76 | 77 | var config = newConfig() 78 | 79 | func newConfig() *dc.Config { 80 | c := &dc.Config{ 81 | HTTPClient: &http.ClientConfig{ 82 | Timeout: xtime.Duration(time.Second * 30), 83 | Dial: xtime.Duration(time.Second), 84 | KeepAlive: xtime.Duration(time.Second * 30), 85 | }, 86 | HTTPServer: &http.ServerConfig{Addr: "127.0.0.1:7171"}, 87 | Nodes: []string{"127.0.0.1:7171", "127.0.0.1:7172"}, 88 | Env: &dc.Env{ 89 | Zone: "sh001", 90 | DeployEnv: "pre", 91 | Host: "test_server", 92 | }, 93 | } 94 | return c 95 | } 96 | func init() { 97 | httpMock("GET", "http://127.0.0.1:7172/discovery/fetch/all").Reply(200).JSON(`{"code":0}`) 98 | httpMock("POST", "http://127.0.0.1:7172/discovery/register").Reply(200).JSON(`{"code":0}`) 99 | httpMock("POST", "http://127.0.0.1:7172/discovery/cancel").Reply(200).JSON(`{"code":0}`) 100 | 101 | os.Setenv("ZONE", "sh001") 102 | os.Setenv("DEPLOY_ENV", "pre") 103 | } 104 | 105 | func httpMock(method, url string) *gock.Request { 106 | r := gock.New(url) 107 | r.Method = strings.ToUpper(method) 108 | return r 109 | } 110 | 111 | func TestRegister(t *testing.T) { 112 | Convey("test Register", t, func() { 113 | svr, cancel := New(config) 114 | defer cancel() 115 | svr.client.SetTransport(gock.DefaultTransport) 116 | svr.syncUp() 117 | i := model.NewInstance(reg) 118 | svr.Register(context.TODO(), i, reg.LatestTimestamp, reg.Replication, true) 119 | ins, err := svr.Fetch(context.TODO(), fet) 120 | So(err, ShouldBeNil) 121 | So(len(ins.Instances), ShouldResemble, 1) 122 | Convey("test metadta", func() { 123 | for _, is := range ins.Instances { 124 | So(err, ShouldBeNil) 125 | for _, i := range is { 126 | So(i.Metadata["weight"], ShouldEqual, "10") 127 | So(i.Metadata["test"], ShouldEqual, "test") 128 | } 129 | } 130 | }) 131 | Convey("test set", func() { 132 | err = svr.Set(context.TODO(), set) 133 | So(err, ShouldBeNil) 134 | ins, err = svr.Fetch(context.TODO(), fet) 135 | So(err, ShouldBeNil) 136 | So(len(ins.Instances), ShouldResemble, 1) 137 | for _, is := range ins.Instances { 138 | for _, i := range is { 139 | So(i.Status, ShouldEqual, 1) 140 | } 141 | } 142 | }) 143 | }) 144 | } 145 | func TestDiscovery(t *testing.T) { 146 | Convey("test cancel polls", t, func() { 147 | svr, disCancel := New(config) 148 | defer disCancel() 149 | svr.client.SetTransport(gock.DefaultTransport) 150 | reg2 := defRegisArg() 151 | reg2.Hostname = "test2" 152 | i1 := model.NewInstance(reg) 153 | i2 := model.NewInstance(reg2) 154 | svr.Register(context.TODO(), i1, reg.LatestTimestamp, reg.Replication, reg.FromZone) 155 | svr.Register(context.TODO(), i2, reg2.LatestTimestamp, reg.Replication, reg.FromZone) 156 | ch, new, _, err := svr.Polls(context.TODO(), pollArg) 157 | So(err, ShouldBeNil) 158 | So(new, ShouldBeTrue) 159 | ins := <-ch 160 | So(len(ins["main.arch.test"].Instances["sh001"]), ShouldEqual, 2) 161 | pollArg.LatestTimestamp[0] = ins["main.arch.test"].LatestTimestamp 162 | time.Sleep(time.Second) 163 | err = svr.Cancel(context.TODO(), cancel) 164 | So(err, ShouldBeNil) 165 | ch, new, _, err = svr.Polls(context.TODO(), pollArg) 166 | So(err, ShouldBeNil) 167 | So(new, ShouldBeTrue) 168 | ins = <-ch 169 | So(len(ins["main.arch.test"].Instances), ShouldEqual, 1) 170 | }) 171 | } 172 | func TestFetchs(t *testing.T) { 173 | Convey("test fetch multi appid", t, func() { 174 | svr, cancel := New(config) 175 | defer cancel() 176 | svr.client.SetTransport(gock.DefaultTransport) 177 | reg2 := defRegisArg() 178 | reg2.AppID = "appid2" 179 | i1 := model.NewInstance(reg) 180 | i2 := model.NewInstance(reg2) 181 | svr.Register(context.TODO(), i1, reg.LatestTimestamp, reg.Replication, reg.FromZone) 182 | svr.Register(context.TODO(), i2, reg2.LatestTimestamp, reg.Replication, reg.FromZone) 183 | fetchs := newFetchArg() 184 | fetchs.AppID = append(fetchs.AppID, "appid2") 185 | is, err := svr.Fetchs(ctx, fetchs) 186 | So(err, ShouldBeNil) 187 | So(len(is), ShouldResemble, 2) 188 | }) 189 | } 190 | func TestZones(t *testing.T) { 191 | Convey("test multi zone discovery", t, func() { 192 | svr, cancel := New(config) 193 | defer cancel() 194 | svr.client.SetTransport(gock.DefaultTransport) 195 | reg2 := defRegisArg() 196 | reg2.Zone = "sh002" 197 | i1 := model.NewInstance(reg) 198 | i2 := model.NewInstance(reg2) 199 | svr.Register(context.TODO(), i1, reg.LatestTimestamp, reg.Replication, reg.FromZone) 200 | svr.Register(context.TODO(), i2, reg2.LatestTimestamp, reg2.Replication, reg2.FromZone) 201 | ch, new, _, err := svr.Polls(context.TODO(), newPoll()) 202 | So(err, ShouldBeNil) 203 | So(new, ShouldBeTrue) 204 | ins := <-ch 205 | So(len(ins["main.arch.test"].Instances), ShouldEqual, 2) 206 | pollArg.Zone = "sh002" 207 | ch, new, _, err = svr.Polls(context.TODO(), newPoll()) 208 | So(err, ShouldBeNil) 209 | So(new, ShouldBeTrue) 210 | ins = <-ch 211 | So(len(ins["main.arch.test"].Instances["sh002"]), ShouldEqual, 1) 212 | Convey("test zone update", func() { 213 | pollArg.LatestTimestamp = []int64{ins["main.arch.test"].LatestTimestamp} 214 | pollArg.Zone = "" 215 | reg3 := defRegisArg() 216 | reg3.Zone = "sh002" 217 | reg3.Hostname = "test03" 218 | i3 := model.NewInstance(reg3) 219 | svr.Register(context.TODO(), i3, reg3.LatestTimestamp, reg3.Replication, reg3.FromZone) 220 | ch, _, _, err = svr.Polls(context.TODO(), pollArg) 221 | So(err, ShouldBeNil) 222 | ins = <-ch 223 | So(len(ins["main.arch.test"].Instances), ShouldResemble, 2) 224 | So(len(ins["main.arch.test"].Instances["sh002"]), ShouldResemble, 2) 225 | So(len(ins["main.arch.test"].Instances["sh001"]), ShouldResemble, 1) 226 | pollArg.LatestTimestamp = []int64{ins["main.arch.test"].LatestTimestamp} 227 | _, _, _, err = svr.Polls(context.TODO(), pollArg) 228 | So(err, ShouldResemble, ecode.NotModified) 229 | }) 230 | }) 231 | } 232 | func TestRenew(t *testing.T) { 233 | Convey("test Renew", t, func() { 234 | svr, cancel := New(config) 235 | defer cancel() 236 | svr.client.SetTransport(gock.DefaultTransport) 237 | i := model.NewInstance(reg) 238 | svr.Register(context.TODO(), i, reg.LatestTimestamp, reg.Replication, reg.FromZone) 239 | _, err := svr.Renew(context.TODO(), rew) 240 | So(err, ShouldBeNil) 241 | rew2.AppID = "main.arch.noexist" 242 | _, err = svr.Renew(context.TODO(), rew2) 243 | So(err, ShouldResemble, ecode.NothingFound) 244 | rew2.AppID = "main.arch.test" 245 | rew2.DirtyTimestamp = 1 246 | rew2.Replication = true 247 | _, err = svr.Renew(context.TODO(), rew2) 248 | So(err, ShouldResemble, ecode.Conflict) 249 | rew2.DirtyTimestamp = time.Now().UnixNano() 250 | _, err = svr.Renew(context.TODO(), rew2) 251 | So(err, ShouldResemble, ecode.NothingFound) 252 | }) 253 | } 254 | 255 | func TestCancel(t *testing.T) { 256 | Convey("test cancel", t, func() { 257 | svr, disCancel := New(config) 258 | defer disCancel() 259 | svr.client.SetTransport(gock.DefaultTransport) 260 | i := model.NewInstance(reg) 261 | svr.Register(context.TODO(), i, reg.LatestTimestamp, reg.Replication, reg.FromZone) 262 | err := svr.Cancel(context.TODO(), cancel) 263 | So(err, ShouldBeNil) 264 | err = svr.Cancel(context.TODO(), cancel) 265 | So(err, ShouldResemble, ecode.NothingFound) 266 | _, err = svr.Fetch(context.TODO(), fet) 267 | So(err, ShouldResemble, ecode.NothingFound) 268 | }) 269 | } 270 | 271 | func TestFetchAll(t *testing.T) { 272 | Convey("test fetch all", t, func() { 273 | svr, cancel := New(config) 274 | defer cancel() 275 | svr.client.SetTransport(gock.DefaultTransport) 276 | i := model.NewInstance(reg) 277 | svr.Register(context.TODO(), i, reg.LatestTimestamp, reg.Replication, reg.FromZone) 278 | fs := svr.FetchAll(context.TODO())[i.AppID] 279 | So(len(fs), ShouldResemble, 1) 280 | }) 281 | } 282 | 283 | func TestNodes(t *testing.T) { 284 | Convey("test nodes", t, func() { 285 | svr, cancel := New(config) 286 | defer cancel() 287 | svr.client.SetTransport(gock.DefaultTransport) 288 | svr.Register(context.Background(), defRegDiscovery(), time.Now().UnixNano(), false, true) 289 | time.Sleep(time.Second) 290 | ns := svr.Nodes(context.TODO()) 291 | So(len(ns), ShouldResemble, 2) 292 | }) 293 | } 294 | -------------------------------------------------------------------------------- /discovery/syncup.go: -------------------------------------------------------------------------------- 1 | package discovery 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "net/url" 7 | "time" 8 | 9 | "github.com/bilibili/discovery/conf" 10 | "github.com/bilibili/discovery/model" 11 | "github.com/bilibili/discovery/registry" 12 | "github.com/go-kratos/kratos/pkg/ecode" 13 | log "github.com/go-kratos/kratos/pkg/log" 14 | ) 15 | 16 | var ( 17 | _fetchAllURL = "http://%s/discovery/fetch/all" 18 | ) 19 | 20 | // Protected return if service in init protect mode. 21 | // if service in init protect mode,only support write, 22 | // read operator isn't supported. 23 | func (d *Discovery) Protected() bool { 24 | return d.protected 25 | } 26 | 27 | // syncUp populates the registry information from a peer eureka node. 28 | func (d *Discovery) syncUp() { 29 | nodes := d.nodes.Load().(*registry.Nodes) 30 | for _, node := range nodes.AllNodes() { 31 | if nodes.Myself(node.Addr) { 32 | continue 33 | } 34 | uri := fmt.Sprintf(_fetchAllURL, node.Addr) 35 | var res struct { 36 | Code int `json:"code"` 37 | Data map[string][]*model.Instance `json:"data"` 38 | } 39 | if err := d.client.Get(context.TODO(), uri, "", nil, &res); err != nil { 40 | log.Error("d.client.Get(%v) error(%v)", uri, err) 41 | continue 42 | } 43 | if res.Code != 0 { 44 | log.Error("service syncup from(%s) failed ", uri) 45 | continue 46 | } 47 | // sync success from other node,exit protected mode 48 | d.protected = false 49 | for _, is := range res.Data { 50 | for _, i := range is { 51 | _ = d.registry.Register(i, i.LatestTimestamp) 52 | } 53 | } 54 | // NOTE: no return, make sure that all instances from other nodes register into self. 55 | } 56 | nodes.UP() 57 | } 58 | 59 | func (d *Discovery) regSelf() context.CancelFunc { 60 | ctx, cancel := context.WithCancel(context.Background()) 61 | now := time.Now().UnixNano() 62 | ins := &model.Instance{ 63 | Region: d.c.Env.Region, 64 | Zone: d.c.Env.Zone, 65 | Env: d.c.Env.DeployEnv, 66 | Hostname: d.c.Env.Host, 67 | AppID: model.AppID, 68 | Addrs: []string{ 69 | "http://" + d.c.HTTPServer.Addr, 70 | }, 71 | Status: model.InstanceStatusUP, 72 | RegTimestamp: now, 73 | UpTimestamp: now, 74 | LatestTimestamp: now, 75 | RenewTimestamp: now, 76 | DirtyTimestamp: now, 77 | } 78 | d.Register(ctx, ins, now, false, false) 79 | go func() { 80 | ticker := time.NewTicker(30 * time.Second) 81 | defer ticker.Stop() 82 | for { 83 | select { 84 | case <-ticker.C: 85 | arg := &model.ArgRenew{ 86 | AppID: ins.AppID, 87 | Zone: d.c.Env.Zone, 88 | Env: d.c.Env.DeployEnv, 89 | Hostname: d.c.Env.Host, 90 | } 91 | if _, err := d.Renew(ctx, arg); err != nil && err == ecode.NothingFound { 92 | d.Register(ctx, ins, now, false, false) 93 | } 94 | case <-ctx.Done(): 95 | arg := &model.ArgCancel{ 96 | AppID: model.AppID, 97 | Zone: d.c.Env.Zone, 98 | Env: d.c.Env.DeployEnv, 99 | Hostname: d.c.Env.Host, 100 | } 101 | if err := d.Cancel(context.Background(), arg); err != nil { 102 | log.Error("d.Cancel(%+v) error(%v)", arg, err) 103 | } 104 | return 105 | } 106 | } 107 | }() 108 | return cancel 109 | } 110 | 111 | func (d *Discovery) nodesproc() { 112 | var ( 113 | lastTs int64 114 | ) 115 | for { 116 | arg := &model.ArgPolls{ 117 | AppID: []string{model.AppID}, 118 | Env: d.c.Env.DeployEnv, 119 | Hostname: d.c.Env.Host, 120 | LatestTimestamp: []int64{lastTs}, 121 | } 122 | ch, _, _, err := d.registry.Polls(arg) 123 | if err != nil && err != ecode.NotModified { 124 | log.Error("d.registry(%v) error(%v)", arg, err) 125 | time.Sleep(time.Second) 126 | continue 127 | } 128 | apps := <-ch 129 | ins, ok := apps[model.AppID] 130 | if !ok || ins == nil { 131 | return 132 | } 133 | var ( 134 | nodes []string 135 | zones = make(map[string][]string) 136 | ) 137 | for _, ins := range ins.Instances { 138 | for _, in := range ins { 139 | for _, addr := range in.Addrs { 140 | u, err := url.Parse(addr) 141 | if err == nil && u.Scheme == "http" { 142 | if in.Zone == d.c.Env.Zone { 143 | nodes = append(nodes, u.Host) 144 | } else { 145 | zones[in.Zone] = append(zones[in.Zone], u.Host) 146 | } 147 | } 148 | } 149 | } 150 | } 151 | lastTs = ins.LatestTimestamp 152 | c := new(conf.Config) 153 | *c = *d.c 154 | c.Nodes = nodes 155 | c.Zones = zones 156 | ns := registry.NewNodes(c) 157 | ns.UP() 158 | d.nodes.Store(ns) 159 | log.Info("discovery changed nodes:%v zones:%v", nodes, zones) 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /doc/api.md: -------------------------------------------------------------------------------- 1 | - [字段定义](#字段定义) 2 | - [错误码定义](#错误码定义) 3 | - [注册register](#注册register) 4 | - [心跳renew](#心跳renew) 5 | - [下线cancel](#下线cancel) 6 | - [获取实例fetch](#获取实例fetch) 7 | - [批量获取实例fetchs](#批量获取实例fetchs) 8 | - [长轮询获取实例poll](#长轮询获取实例poll) 9 | - [长轮询批量获取实例polls](#长轮询批量获取实例polls) 10 | - [获取node节点](#获取node节点) 11 | - [修改实例信息set](#修改实例信息set) 12 | 13 | 14 | ### 字段定义 15 | 16 | | 字段 | 说明 | 17 | | -------- | -------------------------------------------------------------------------------------------------------------------------------- | 18 | | zone | 机房服务地区标识,用于多机房部署区分数据中心 | 19 | | env | 环境信息,(例如:fat1,uat ,pre ,prod)分别对应fat环境 集成环境,预发布和线上 | 20 | | appid | 服务唯一标识。【业务标识.服务标识[.子服务标识]】 全局唯一,禁止修改 | 21 | | hostname | instance主机标识 | 22 | | addrs | 服务地址 格式为 scheme://ip:port,支持多个协议地址。如 grpc://127.0.0.1:8888, http://127.0.0.1:8887 | 23 | | color | 服务标记,可用于集群区分,业务灰度流量选择集群 | 24 | | version | 服务版本号信息 | 25 | | metadata | 服务自定义扩展元数据,格式为{"key1":"value1"},可以用于传递权重,负载等信息 使用json格式传递。 { “weight":"10","key2":"value2"} | 26 | 27 | ### 错误码定义ecode 28 | 29 | | 错误码 | 说明 | 30 | | ------ | -------------- | 31 | | 0 | 成功 | 32 | | -304 | 实例信息无变化 | 33 | | -400 | 请求参数错误 | 34 | | -404 | 实例不存在 | 35 | | -409 | 实例信息不一致 | 36 | | -500 | 未知错误 | 37 | 38 | ### 注册register 39 | 40 | *HTTP* 41 | 42 | POST http://HOST/discovery/register 43 | 44 | *请求参数* 45 | 46 | | 参数名 | 必选 | 类型 | 说明 | 47 | | -------- | ----- | ----------------- | -------------------------------- | 48 | | zone | true | string | 可用区 | 49 | | env | true | string | 环境 | 50 | | appid | true | string | 服务名标识 | 51 | | hostname | true | string | 主机名 | 52 | | addrs | true | []string | 服务地址列表 | 53 | | status | true | int | 状态,1表示接收流量,2表示不接收 | 54 | | color | false | string | 灰度或集群标识 | 55 | | metadata | false | json string | 业务自定义信息 必须为map[string]string 的json格式 | 56 | 57 | *返回结果* 58 | 59 | ```json 60 | *****成功***** 61 | { 62 | "code":0, 63 | "message":"" 64 | } 65 | ****失败**** 66 | { 67 | "code":-400, 68 | "message":"-400" 69 | } 70 | ``` 71 | 72 | *CURL* 73 | ```shell 74 | curl 'http://127.0.0.1:7171/discovery/register' -d "zone=sh1&env=test&appid=provider&hostname=myhostname&status=1&addrs=http%3A%2F%2F172.1.1.1%3A8000&addrs=grpc%3A%2F%2F172.1.1.1%3A9999&version=111&metadata=%7B%22weight%22%3A10%7D" 75 | ``` 76 | 77 | ### 心跳renew 78 | 79 | *HTTP* 80 | 81 | POST http://HOST/discovery/renew 82 | 83 | *请求参数* 84 | 85 | | 参数名 | 必选 | 类型 | 说明 | 86 | | -------- | ----- | ----------------- | -------------------------------- | 87 | | zone | true | string | 可用区 | 88 | | env | true | string | 环境 | 89 | | appid | true | string | 服务名标识 | 90 | | hostname | true | string | 主机名 | 91 | 92 | *返回结果* 93 | 94 | ```json 95 | *****成功***** 96 | { 97 | "code":0, 98 | "message":"" 99 | } 100 | ****失败**** 101 | { 102 | "code":-400, 103 | "message":"-400" 104 | } 105 | ``` 106 | 107 | *CURL* 108 | ```shell 109 | curl 'http://127.0.0.1:7171/discovery/renew' -d "zone=sh1&env=test&appid=provider&hostname=myhostname" 110 | ``` 111 | 112 | ### 下线cancel 113 | 114 | *HTTP* 115 | 116 | POST http://HOST/discovery/cancel 117 | 118 | *请求参数* 119 | 120 | | 参数名 | 必选 | 类型 | 说明 | 121 | | -------- | ----- | ----------------- | -------------------------------- | 122 | | zone | true | string | 可用区 | 123 | | env | true | string | 环境 | 124 | | appid | true | string | 服务名标识 | 125 | | hostname | true | string | 主机名 | 126 | 127 | *返回结果* 128 | 129 | ```json 130 | *****成功***** 131 | { 132 | "code":0, 133 | "message":"" 134 | } 135 | ****失败**** 136 | { 137 | "code":-400, 138 | "message":"-400" 139 | } 140 | ``` 141 | 142 | *CURL* 143 | ```shell 144 | curl 'http://127.0.0.1:7171/discovery/cancel' -d "zone=sh1&env=test&appid=provider&hostname=myhostname" 145 | ``` 146 | 147 | ### 获取实例fetch 148 | 149 | *HTTP* 150 | 151 | GET http://HOST/discovery/fetch 152 | 153 | *请求参数* 154 | 155 | | 参数名 | 必选 | 类型 | 说明 | 156 | | -------- | ----- | ----------------- | -------------------------------- | 157 | | appid | true | string | 服务名标识 | 158 | | env | true | string | 环境 | 159 | | zone | false | string | 可用区,不传返回所有zone的 | 160 | | status | true | int | 拉取某状态服务1.接收流量 2.不接收 3.所有状态 | 161 | 162 | *返回结果* 163 | 164 | ```json 165 | { 166 | "code": 0, 167 | "data": { 168 | "instances": { 169 | "zone001": [ 170 | { 171 | "zone": "zone001", 172 | "env": "uat", 173 | "appid": "app_id_0", 174 | "hostname": "hostname000000", 175 | "color": "", 176 | "version": "111", 177 | "metadata": { 178 | "provider": "", 179 | "weight": "10" 180 | }, 181 | "addrs": [ 182 | "http://172.1.1.1:8080", 183 | "gorpc://172.1.1.1:8089" 184 | ], 185 | "status": 1, 186 | "reg_timestamp": 1525948301833084700, 187 | "up_timestamp": 1525948301833084700, 188 | "renew_timestamp": 1525949202959821300, 189 | "dirty_timestamp": 1525948301848680000, 190 | "latest_timestamp": 1525948301833084700 191 | } 192 | ] 193 | }, 194 | "latest_timestamp": 1525948301833084700 195 | } 196 | } 197 | ``` 198 | 199 | *CURL* 200 | ```shell 201 | curl 'http://127.0.0.1:7171/discovery/fetch?zone=sh1&env=test&appid=provider&status=1' 202 | ``` 203 | 204 | ### 批量获取实例fetchs 205 | 206 | *HTTP* 207 | 208 | GET http://HOST/discovery/fetchs 209 | 210 | *请求参数* 211 | 212 | | 参数名 | 必选 | 类型 | 说明 | 213 | | -------- | ----- | ----------------- | -------------------------------- | 214 | | appid | true | []string | 服务名标识 | 215 | | env | true | string | 环境 | 216 | | zone | false | string | 可用区,不传返回所有zone的 | 217 | | status | true | int | 拉取某状态服务1.接收流量 2.不接收 3.所有状态 | 218 | 219 | *返回结果* 220 | 221 | ```json 222 | { 223 | "code": 0, 224 | "data": { 225 | "app_id_0": { 226 | "instances": { 227 | "zone001": [ 228 | { 229 | "zone": "zone001", 230 | "env": "uat", 231 | "appid": "app_id_0", 232 | "hostname": "hostname000000", 233 | "color": "", 234 | "version": "111", 235 | "metadata": { 236 | "provider": "", 237 | "weight": "10" 238 | }, 239 | "addrs": [ 240 | "http://172.1.1.1:8080", 241 | "gorpc://172.1.1.1:8089" 242 | ], 243 | "status": 1, 244 | "reg_timestamp": 1525948301833084700, 245 | "up_timestamp": 1525948301833084700, 246 | "renew_timestamp": 1525949202959821300, 247 | "dirty_timestamp": 1525948301848680000, 248 | "latest_timestamp": 1525948301833084700 249 | } 250 | ] 251 | }, 252 | "latest_timestamp": 1525948301833084700 253 | }, 254 | "app_id_1": { 255 | "instances": { 256 | "zone001": [ 257 | { 258 | "zone": "zone001", 259 | "env": "uat", 260 | "appid": "app_id_1", 261 | "hostname": "hostname111111", 262 | "color": "", 263 | "version": "222", 264 | "metadata": { 265 | "provider": "", 266 | "weight": "10" 267 | }, 268 | "addrs": [ 269 | "http://172.1.1.1:7070", 270 | "gorpc://172.1.1.1:7079" 271 | ], 272 | "status": 1, 273 | "reg_timestamp": 1525948301833084700, 274 | "up_timestamp": 1525948301833084700, 275 | "renew_timestamp": 1525949202959821300, 276 | "dirty_timestamp": 1525948301848680000, 277 | "latest_timestamp": 1525948301833084700 278 | } 279 | ] 280 | }, 281 | "latest_timestamp": 1525948297987066600 282 | } 283 | } 284 | } 285 | ``` 286 | 287 | *CURL* 288 | ```shell 289 | curl 'http://127.0.0.1:7171/discovery/fetchs?zone=sh1&env=test&appid=provider&appid=provider2&status=1' 290 | ``` 291 | 292 | ### 长轮询获取实例poll 293 | 294 | *HTTP* 295 | 296 | GET http://HOST/discovery/poll 297 | 298 | *请求参数* 299 | 300 | | 参数名 | 必选 | 类型 | 说明 | 301 | | -------- | ----- | ----------------- | -------------------------------- | 302 | | appid | true | string | 服务名标识 | 303 | | env | true | string | 环境 | 304 | | zone | false | string | 可用区,不传返回所有zone的 | 305 | | latest_timestamp | false | int | 服务最新更新时间 | 306 | 307 | *返回结果* 308 | 309 | ```json 310 | { 311 | "code": 0, 312 | "data": { 313 | "instances": { 314 | "zone001": [ 315 | { 316 | "zone": "zone001", 317 | "env": "uat", 318 | "appid": "app_id_0", 319 | "hostname": "hostname000000", 320 | "color": "", 321 | "version": "111", 322 | "metadata": { 323 | "provider": "", 324 | "weight": "10" 325 | }, 326 | "addrs": [ 327 | "http://172.1.1.1:8080", 328 | "gorpc://172.1.1.1:8089" 329 | ], 330 | "status": 1, 331 | "reg_timestamp": 1525948301833084700, 332 | "up_timestamp": 1525948301833084700, 333 | "renew_timestamp": 1525949202959821300, 334 | "dirty_timestamp": 1525948301848680000, 335 | "latest_timestamp": 1525948301833084700 336 | } 337 | ] 338 | }, 339 | "latest_timestamp": 1525948301833084700 340 | } 341 | } 342 | ``` 343 | 344 | *CURL* 345 | ```shell 346 | curl 'http://127.0.0.1:7171/discovery/poll?zone=sh1&env=test&appid=provider&latest_timestamp=0' 347 | ``` 348 | 349 | ### 长轮询批量获取实例polls 350 | 351 | *HTTP* 352 | 353 | GET http://HOST/discovery/polls 354 | 355 | *请求参数* 356 | 357 | | 参数名 | 必选 | 类型 | 说明 | 358 | | -------- | ----- | ----------------- | -------------------------------- | 359 | | appid | true | []string | 服务名标识 | 360 | | env | true | string | 环境 | 361 | | zone | false | string | 可用区,不传返回所有zone的 | 362 | | latest_timestamp | false | []int | 服务最新更新时间,要与appid一一对应 | 363 | 364 | *返回结果* 365 | 366 | ```json 367 | { 368 | "code": 0, 369 | "data": { 370 | "app_id_0": { 371 | "instances": { 372 | "zone001": [ 373 | { 374 | "zone": "zone001", 375 | "env": "uat", 376 | "appid": "app_id_0", 377 | "hostname": "hostname000000", 378 | "color": "", 379 | "version": "111", 380 | "metadata": { 381 | "provider": "", 382 | "weight": "10" 383 | }, 384 | "addrs": [ 385 | "http://172.1.1.1:8080", 386 | "gorpc://172.1.1.1:8089" 387 | ], 388 | "status": 1, 389 | "reg_timestamp": 1525948301833084700, 390 | "up_timestamp": 1525948301833084700, 391 | "renew_timestamp": 1525949202959821300, 392 | "dirty_timestamp": 1525948301848680000, 393 | "latest_timestamp": 1525948301833084700 394 | } 395 | ] 396 | }, 397 | "latest_timestamp": 1525948301833084700 398 | }, 399 | "app_id_1": { 400 | "instances": { 401 | "zone001": [ 402 | { 403 | "zone": "zone001", 404 | "env": "uat", 405 | "appid": "app_id_1", 406 | "hostname": "hostname111111", 407 | "color": "", 408 | "version": "222", 409 | "metadata": { 410 | "provider": "", 411 | "weight": "10" 412 | }, 413 | "addrs": [ 414 | "http://172.1.1.1:7070", 415 | "gorpc://172.1.1.1:7079" 416 | ], 417 | "status": 1, 418 | "reg_timestamp": 1525948301833084700, 419 | "up_timestamp": 1525948301833084700, 420 | "renew_timestamp": 1525949202959821300, 421 | "dirty_timestamp": 1525948301848680000, 422 | "latest_timestamp": 1525948301833084700 423 | } 424 | ] 425 | }, 426 | "latest_timestamp": 1525948297987066600 427 | } 428 | } 429 | } 430 | ``` 431 | 432 | *CURL* 433 | ```shell 434 | curl 'http://127.0.0.1:7171/discovery/polls?zone=sh1&env=test&appid=provider1&appid=provider2&latest_timestamp=01&latest_timestamp=02' 435 | ``` 436 | 437 | ### 获取node节点 438 | 439 | *HTTP* 440 | 441 | GET http://HOST/discovery/nodes 442 | 443 | *请求参数* 444 | 445 | 无 446 | 447 | *返回结果* 448 | 449 | ```json 450 | { 451 | "code": 0, 452 | "data": [ 453 | { 454 | "addr": "172.1.1.1:7171", 455 | "status": 0, 456 | "zone": "zone001" 457 | }, 458 | { 459 | "addr": "172.1.1.2:7171", 460 | "status": 0, 461 | "zone": "zone001" 462 | }, 463 | { 464 | "addr": "172.1.1.3:7171", 465 | "status": 0, 466 | "zone": "zone001" 467 | } 468 | ] 469 | } 470 | ``` 471 | 472 | *CURL* 473 | ```shell 474 | curl 'http://127.0.0.1:7171/discovery/nodes' 475 | ``` 476 | 477 | ### 修改实例信息set 478 | 479 | *HTTP* 480 | 481 | POST http://HOST/discovery/set 482 | 483 | *请求参数* 484 | 485 | | 参数名 | 必选 | 类型 | 说明 | 486 | | -------- | ----- | ----------------- | -------------------------------- | 487 | | zone | true | string | 可用区 | 488 | | env | true | string | 环境 | 489 | | appid | true | string | 服务名标识 | 490 | | hostname | true | []string | 主机名 | 491 | | status | false | []int | 状态,1表示接收流量,2表示不接收 | 492 | | color | false | []string | 灰度或集群标识 | 493 | | metadata | false | []string | 业务自定义信息 string 必须为map[strinng]string 的json格式 | 494 | 495 | *返回结果* 496 | 497 | ```json 498 | *****成功***** 499 | { 500 | "code":0, 501 | "message":"" 502 | } 503 | ****失败**** 504 | { 505 | "code":-400, 506 | "message":"-400" 507 | } 508 | ``` 509 | 510 | *CURL* 511 | ```shell 512 | curl 'http://127.0.0.1:7171/discovery/set' -d "zone=sh1&env=test&appid=provider&hostname=myhostname&status=1&color=red&hostname=myhostname2&status=1&color=red" 513 | ``` 514 | -------------------------------------------------------------------------------- /doc/arch.md: -------------------------------------------------------------------------------- 1 | ## 设计概览 2 | 3 | ### 基本概念 4 | 5 | 0. 通过AppID(服务名)和hostname定位实例 6 | 1. Node: discovery server节点 7 | 2. Provider: 服务提供者,目前托管给k8s平台,容器启动后发起register请求给Discover server,后定期(30s)心跳一次 8 | 3. Consumer: 启动时拉取node节点信息,后随机选择一个node发起long polling(30s一次)拉取服务instances列表 9 | 4. Instance: 保存在node内存中的AppID对应的容器节点信息,包含hostname/ip/service等 10 | 11 | ### 架构图 12 | 13 | ![discovery arch](discovery_arch.png) 14 | 15 | ### 重要步骤 16 | 17 | 1. 心跳复制(Peer to Peer),数据一致性的保障: 18 | * AppID注册时根据当前时间生成dirtyTimestamp,nodeA向nodeB同步(register)时,nodeB可能有以下两种情况: 19 | * 返回-404 则nodeA携带dirtyTimestamp向nodeB发起注册请求,把最新信息同步: 20 | 1. nodeB中不存在实例 21 | 2. nodeB中dirtyTimestamp较小 22 | * 返回-409 nodeB不同意采纳nodeA信息,且返回自身信息,nodeA使用该信息更新自身 23 | * AppID注册成功后,Provider每(30s)发起一次heartbeat请求,处理流程如上 24 | 2. Instance管理 25 | * 正常检测模式,随机分批踢掉无心跳Instance节点,尽量避免单应用节点被一次全踢 26 | * 网络闪断和分区时自我保护模式 27 | * 60s内丢失大量(小于Instance总数*2*0.85)心跳数,“好”“坏”Instance信息都保留 28 | * 所有node都会持续提供服务,单个node的注册和发现功能不受影响 29 | * 最大保护时间,防止分区恢复后大量原先Instance真的已经不存在时,一直处于保护模式 30 | 3. Consumer客户端 31 | * 长轮询+node推送,服务发现准实时 32 | * 订阅式,只需要关注想要关注的AppID的Instance列表变化 33 | * 缓存实例Instance列表信息,保证与node网络不通等无法访问到node情况时原先的Instance可用 34 | 35 | ### 多注册中心 36 | 37 | ![discovery zone arch](discovery_zone_arch.png) 38 | 39 | * 机房定义为zone,表示“可用区”(可能两个相邻机房通过牛逼专线当一个机房用呢~~所以没用IDC~~) 40 | * zoneA使用zoneB的其中一个node地址(只是不需要同步信息) 41 | * 如zoneA内有node1,node2,node3,zoneB内有nodeI,nodeII,nodeIII 42 | * zoneA将zoneB的nodeI配入本身配置文件内,当做一个特殊node,同时zoneB将zoneA的node1配入本身当做特殊node(请参考配置文件内zones) 43 | * 跨zone同步数据时,单向同步,zoneB的nodeI收到信息后,nodeI在nodeII,nodeIII之间只做内部广播,不会再次向zoneA的node广播 44 | * 如果有条件可以使用SLB,请参考[B站最佳实践](practice.md) 45 | 46 | ### 自发现 47 | * Discovery节点上下线自发现,节点之间可以感知到状态 48 | * 客户端SDK可感知节点状态,进行动态添加、移除节点 49 | 50 | ### 多机房调度 51 | * 支持通过通过下发scheduler信息,进行多机房间的[流量调度](scheduler.md) 52 | * 机房故障或负载过高的情况下,允许通过scheduler将部分流量按比例切换到其他机房。 -------------------------------------------------------------------------------- /doc/discovery_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/discovery/1e12d5c0080ecd7ce97ab78076ef36dda8d56a1a/doc/discovery_arch.png -------------------------------------------------------------------------------- /doc/discovery_pod_quit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/discovery/1e12d5c0080ecd7ce97ab78076ef36dda8d56a1a/doc/discovery_pod_quit.png -------------------------------------------------------------------------------- /doc/discovery_pod_start.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/discovery/1e12d5c0080ecd7ce97ab78076ef36dda8d56a1a/doc/discovery_pod_start.png -------------------------------------------------------------------------------- /doc/discovery_sdk.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/discovery/1e12d5c0080ecd7ce97ab78076ef36dda8d56a1a/doc/discovery_sdk.png -------------------------------------------------------------------------------- /doc/discovery_sdk_self.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/discovery/1e12d5c0080ecd7ce97ab78076ef36dda8d56a1a/doc/discovery_sdk_self.png -------------------------------------------------------------------------------- /doc/discovery_wechat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/discovery/1e12d5c0080ecd7ce97ab78076ef36dda8d56a1a/doc/discovery_wechat.png -------------------------------------------------------------------------------- /doc/discovery_zone_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/discovery/1e12d5c0080ecd7ce97ab78076ef36dda8d56a1a/doc/discovery_zone_arch.png -------------------------------------------------------------------------------- /doc/felixhao_wechat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/discovery/1e12d5c0080ecd7ce97ab78076ef36dda8d56a1a/doc/felixhao_wechat.png -------------------------------------------------------------------------------- /doc/intro.md: -------------------------------------------------------------------------------- 1 | ## 背景 2 | 3 | * 团队是B站主站技术,位于上海的Gopher 4 | * 之前一直使用zookeeper作为服务注册发现中间件,但由于zookeeper: 5 | * 用好难,维护难,二次开发难 6 | * 服务规模太大(3W+节点),一套zk集群很难满足大量的连接和写请求,但如果拆分服务类型维护多套集群又破坏了连通性 7 | * CP系统,对于微服务的服务注册发现,其实不如一套AP系统更可用 8 | * 运维小锅锅们大力推进业务向k8s的迁移,zk难以满足迭代需求 9 | * 也考虑过直接使用现有的如consul、etcd,但我们更想要一套纯纯的AP系统 10 | * 补充资料:[Why You Shouldn’t Use ZooKeeper for Service Discovery](https://medium.com/knerd/eureka-why-you-shouldnt-use-zookeeper-for-service-discovery-4932c5c7e764) 11 | 12 | ## 设计目标 13 | 14 | Netflix Eureka可以说是服务注册发现领域AP系统的标杆,所以参考Eureka之后,我们设定以下目标: 15 | 16 | 1. 实现AP类型服务注册发现系统,在可用性极极极极强的情况下,努力保证数据最终一致性 17 | 2. 与公司k8s平台深度结合,注册打通、发布平滑、naming service等等 18 | 3. 网络闪断等异常情况,可自我保护,保证每个节点可用 19 | 4. 基于HTTP协议实现接口,简单易用,维护各流行语言SDK 20 | 21 | ## 相对Netflix Eureka的改进 22 | 23 | * 长轮询监听应用变更(Eureka定期30s拉取一次) 24 | * 只拉取感兴趣的AppID实例(Eureka一拉就是全部,无法区分) 25 | * 合并node之间的同步请求/(ㄒoㄒ)/~~其实还没实现,是个TODO 26 | * Dashboard骚操作~ 27 | * 多注册中心信息同步支持 28 | * 更完善的日志记录 29 | 30 | PS:[Eureka2.0](https://github.com/Netflix/eureka/wiki/Eureka-2.0-Motivations)以上功能基本也要实现,但难产很久了/(ㄒoㄒ)/~~ 31 | -------------------------------------------------------------------------------- /doc/practice.md: -------------------------------------------------------------------------------- 1 | # 环境及配置说明 2 | 3 | *注:以下名称IP等都是示例* 4 | 5 | ## 环境介绍 6 | 7 | 可用区zone: sh1, sh2 8 | sh1物理机三台:172.1.1.1, 172.1.1.2, 172.1.1.3 9 | sh2物理机三台:172.2.2.1, 172.2.2.2, 172.2.2.3 10 | 11 | ## Supervisor 12 | 13 | Discovery节点通过supervisor管理配置如下: 14 | ```shell 15 | [program:discovery] 16 | name = discovery 17 | command= /app/discovery -conf /conf/discovery.toml 18 | autostart = true 19 | autorestart = true 20 | user = nobody 21 | stdout_logfile = /log/discovery/stdout.log 22 | stderr_logfile = /log/discovery/stderr.log 23 | ``` 24 | 25 | ## SLB 26 | 27 | sh1内SLB`sh1.discovery.bilibili.com`配置如下,sh2的`sh2.discovery.bilibili.com`类似: 28 | ```nginx 29 | upstream discovery_upstream { 30 | server 172.1.1.1:7171; 31 | server 172.1.1.2:7171; 32 | server 172.1.1.3:7171; 33 | } 34 | server{ 35 | listen 80; 36 | server_name sh1.discovery.bilibili.com; 37 | location /discovery { 38 | // 此处省略其余配置... 39 | proxy_pass http://discovery_upstream; 40 | } 41 | } 42 | ``` 43 | 44 | ## DiscoveryNode配置 45 | 46 | sh1的172.1.1.1节点`discovery.toml`配置如下,其余节点类似: 47 | ```toml 48 | zone = "sh1" 49 | nodes = ["172.1.1.1:7171","172.1.1.2:7171","172.1.1.3:7171"] 50 | 51 | [zones] 52 | "sh2.discovery.bilibili.com" = "sh2" 53 | # 注意,这里对应sh2的节点配置是 `"sh1.discovery.bilibili.com" = "sh1"` 54 | 55 | [httpServer] 56 | addr = "172.1.1.1:7171" # 注意IP配置为本机真实IP 57 | 58 | [httpClient] 59 | dial = "1s" 60 | keepAlive = "120s" 61 | ``` 62 | 63 | # 使用说明 64 | 65 | ## 服务提供者 66 | 67 | B站内部有k8s平台,平台名字叫caster。 68 | 69 | 服务提供者启动过程中流程如下: 70 | 71 | ![](discovery_pod_start.png) 72 | 73 | 服务提供者停止过程中流程如下: 74 | 75 | ![](discovery_pod_quit.png) 76 | 77 | ### 没上caster平台的服务 78 | 79 | 服务基于`naming/client.go`中SDK实现进程启动和退出时的register/renew/cancel,详细请看`naming/example_test.go`内示例代码 80 | 81 | ## 服务消费者 82 | 83 | 服务消费者通过使用`naming/client.go`中SDK实现依赖appID的fetchs/polls,详细请看`naming/example_test.go`内示例代码 84 | -------------------------------------------------------------------------------- /doc/scheduler.md: -------------------------------------------------------------------------------- 1 | ## 简介 2 | 多机房部署时,需要对各个机房的流量进行控制调度,多机房流量调度主要解决一下几个问题 3 | 4 | 1. 根据监控获取的机房负载指标,动态下发机房负载调度信息,是个机房流量尽量负载均衡 5 | 2. 机房故障时,通过下发机房调度信息,动态调度流量到可用机房,保证流量的平稳切换 6 | 7 | ## 实现 8 | 多机房流量调度的实现包括 9 | 10 | 1. discovery下发负载调度信息 11 | 2. sdk根据负载信息进行流量调度 12 | 13 | ## discovery 实现 14 | 15 | * 通过poll / fetch 接口动态返回多机房的流量调度信息 16 | 17 | 调度信息格式如下(scheduler 部分) 18 | ``` 19 | { 20 | "code":0, 21 | "message":"0", 22 | "ttl":1, 23 | "data":{ 24 | "main.account.account-service":{ 25 | "instances":[....], 26 | "scheduler":[{"src":"sh001","dst":{"sh001":3,"sh002":1}},{"src":"bj002","dst":{"bj001":1,"bj002":3}}], 27 | "latest_timestamp":1545795463166919001, 28 | "latest_timestamp_str":"1545795463" 29 | }, 30 | "latest_timestamp":1525948297987066600 31 | } 32 | } 33 | ``` 34 | * scheduler 返回信息说明 35 | 36 | |参数 |含义| 37 | |--|--| 38 | |scheduler.src |源机房信息| 39 | |scheduler.dst| 服务提供方所在机房信息| 40 | |dst.key |服务提供方所在目标机房| 41 | |dst.value| 服务提供方所在机房的流量权重| 42 | 上面示例的返回信息表示: 43 | * sh001 的服务调度3/4的流量到sh001 ,调度1/4的流量到sh002机房 44 | * bj002 的服务调度 1/4 的流量到bj001 , 调度3/4的流量到bj002 机房 45 | 46 | ## sdk实现 47 | * sdk使用二级调度进行流量的负载调度 48 | 49 | 1. 根据返回的scheduler判断自身所处的机房匹配src获取机房调度信息 50 | 51 | 如:部署在sh001的服务匹配到到 52 | ``` 53 | { 54 | "src":"sh001", 55 | "dst":{ 56 | "sh001":3, 57 | "sh002":1 58 | } 59 | }, 60 | ``` 61 | 则需要调度3/4的流量到sh001机房,1/4的流量到sh002 机房 62 | 63 | 2. 根据机房获取机房应用实例,并由实例的weight进行节点负载的二次调度 64 | 参考 [sdk 实现](https://github.com/bilibili/discovery/blob/master/naming/naming.go#L76) 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /doc/sdk.md: -------------------------------------------------------------------------------- 1 | ### sdk实现逻辑 2 | #### discovery 服务节点自发现sdk逻辑 3 | 4 | ![discovery sdk self ](discovery_sdk_self.png) 5 | 6 | 1. 调用 http://discovery.bilibili.co/discovery/polls 并指定appid参数为infra.discovery 接口获取discovery服务中心节点信息,获取到discovery的ip:port 节点列表。discovery 服务更新的时候会返回最新的discovery节点信息,sdk需要更新discovery节点信息 7 | 2. 将获得的nodes列表的顺序随机打乱后,获取一个随机的nodes节点列表选择第一个节点使用 http://ip:port /discovery/polls 拉取服务节点。poll(polls) 接口为长轮训接口。如果server节点实例没有变更,则接口会阻塞直到30s返回-304 。如果server节点发生变更,则接口立即返回并带上所有instances信息。如果调用失败或服务端返回非-304的code码,则选择列表中后一个节点并进行重试直到成功并记录下当前使用的节点index。(注意: polls接口需要使用①获取到的ip地址进行直连,因为poll为长轮训接口,如果通过域名访问会slb超时) 8 | 3. 通过nodes[idx] 发起polls discovery的请求,实时监听discovery nodes节点变更。如果收到poll接口变更推送则进行④,否则进行⑤ 9 | 4. 收到节点变更推送,对比收到的节点列表与本地旧的列表是否一致 ,如果一致则回到③,否则回到②重新用新的列表打散获取新的nodes 10 | 5. 如果polls接口返回err 不为nil则转到⑥,否则如果code=-304转到⑦ 11 | 6. 收到err不为nil,说明当前节点可能故障,将idx+1 并且last_timestamp设置为0进行节点切换重新回到③发起polls 12 | 7. 收到code=-304 说明服务节点无变更,则回到③重新继续polls 13 | 14 | 15 | #### 应用发现实现逻辑 16 | 17 | ![discovery arch](discovery_arch.png) 18 | 19 | 1. 选择可用的节点,将应用appid加入poll的appid列表 20 | 2. 如果polls请求返回err,则切换node节点,切换逻辑与自发现错误时切换逻辑一致 21 | 3. 如果polls返回-304 ,说明appid无变更,重新发起poll监听变更 22 | 4. polls接口返回appid的instances列表,完成服务发现,根据需要选择不同的负载均衡算法进行节点的调度 -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/bilibili/discovery 2 | 3 | go 1.12 4 | 5 | require ( 6 | github.com/BurntSushi/toml v0.3.1 7 | github.com/go-kratos/kratos v0.6.0 8 | github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c // indirect 9 | github.com/jtolds/gls v4.20.0+incompatible // indirect 10 | github.com/nbio/st v0.0.0-20140626010706-e9e8d9816f32 // indirect 11 | github.com/smartystreets/assertions v0.0.0-20190401211740-f487f9de1cd3 // indirect 12 | github.com/smartystreets/goconvey v0.0.0-20180222194500-ef6db91d284a 13 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e 14 | google.golang.org/grpc v1.29.1 15 | gopkg.in/h2non/gock.v1 v1.0.8 16 | ) 17 | -------------------------------------------------------------------------------- /http/discovery.go: -------------------------------------------------------------------------------- 1 | package http 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "time" 7 | 8 | "github.com/bilibili/discovery/model" 9 | 10 | "github.com/go-kratos/kratos/pkg/ecode" 11 | log "github.com/go-kratos/kratos/pkg/log" 12 | bm "github.com/go-kratos/kratos/pkg/net/http/blademaster" 13 | ) 14 | 15 | const ( 16 | _pollWaitSecond = 30 * time.Second 17 | ) 18 | 19 | func register(c *bm.Context) { 20 | arg := new(model.ArgRegister) 21 | if err := c.Bind(arg); err != nil { 22 | return 23 | } 24 | i := model.NewInstance(arg) 25 | if i.Status == 0 || i.Status > 2 { 26 | log.Error("register params status invalid") 27 | return 28 | } 29 | if arg.Metadata != "" { 30 | // check the metadata type is json 31 | if !json.Valid([]byte(arg.Metadata)) { 32 | c.JSON(nil, ecode.RequestErr) 33 | log.Error("register params() metadata(%v) invalid json", arg.Metadata) 34 | return 35 | } 36 | } 37 | // register replication 38 | if arg.DirtyTimestamp > 0 { 39 | i.DirtyTimestamp = arg.DirtyTimestamp 40 | } 41 | dis.Register(c, i, arg.LatestTimestamp, arg.Replication, arg.FromZone) 42 | c.JSON(nil, nil) 43 | } 44 | 45 | func renew(c *bm.Context) { 46 | arg := new(model.ArgRenew) 47 | if err := c.Bind(arg); err != nil { 48 | return 49 | } 50 | // renew 51 | c.JSON(dis.Renew(c, arg)) 52 | } 53 | 54 | func cancel(c *bm.Context) { 55 | arg := new(model.ArgCancel) 56 | if err := c.Bind(arg); err != nil { 57 | return 58 | } 59 | // cancel 60 | c.JSON(nil, dis.Cancel(c, arg)) 61 | } 62 | 63 | func fetchAll(c *bm.Context) { 64 | c.JSON(dis.FetchAll(c), nil) 65 | } 66 | 67 | func fetch(c *bm.Context) { 68 | arg := new(model.ArgFetch) 69 | if err := c.Bind(arg); err != nil { 70 | return 71 | } 72 | c.JSON(dis.Fetch(c, arg)) 73 | } 74 | 75 | func fetchs(c *bm.Context) { 76 | arg := new(model.ArgFetchs) 77 | if err := c.Bind(arg); err != nil { 78 | return 79 | } 80 | c.JSON(dis.Fetchs(c, arg)) 81 | } 82 | 83 | func poll(c *bm.Context) { 84 | arg := new(model.ArgPolls) 85 | if err := c.Bind(arg); err != nil { 86 | return 87 | } 88 | ch, new, miss, err := dis.Polls(c, arg) 89 | if err != nil { 90 | c.JSON(nil, err) 91 | return 92 | } 93 | for _, mi := range miss { 94 | if mi == arg.AppID[0] { 95 | c.JSONMap(map[string]interface{}{ 96 | "message": fmt.Sprintf("%s not found", mi), 97 | }, ecode.NothingFound) 98 | return 99 | } 100 | } 101 | // wait for instance change 102 | select { 103 | case e := <-ch: 104 | c.JSON(e[arg.AppID[0]], nil) 105 | if !new { 106 | dis.DelConns(arg) // broadcast will delete all connections of appid 107 | } 108 | return 109 | case <-time.After(_pollWaitSecond): 110 | case <-c.Done(): 111 | } 112 | c.JSON(nil, ecode.NotModified) 113 | dis.DelConns(arg) 114 | } 115 | 116 | func polls(c *bm.Context) { 117 | arg := new(model.ArgPolls) 118 | if err := c.Bind(arg); err != nil { 119 | return 120 | } 121 | if len(arg.AppID) != len(arg.LatestTimestamp) { 122 | c.JSON(nil, ecode.RequestErr) 123 | return 124 | } 125 | ch, new, miss, err := dis.Polls(c, arg) 126 | if err != nil { 127 | c.JSON(nil, err) 128 | return 129 | } 130 | // wait for instance change 131 | select { 132 | case e := <-ch: 133 | c.JSONMap(map[string]interface{}{ 134 | "data": e, 135 | "error": map[ecode.Code]interface{}{ 136 | ecode.NothingFound: miss, 137 | }, 138 | }, nil) 139 | if !new { 140 | dis.DelConns(arg) // broadcast will delete all connections of appid 141 | } 142 | return 143 | case <-time.After(_pollWaitSecond): 144 | case <-c.Done(): 145 | } 146 | c.JSON(nil, ecode.NotModified) 147 | dis.DelConns(arg) 148 | } 149 | 150 | func set(c *bm.Context) { 151 | arg := new(model.ArgSet) 152 | if err := c.Bind(arg); err != nil { 153 | return 154 | } 155 | // len of color,status,metadata must equal to len of hostname or be zero 156 | if (len(arg.Hostname) != len(arg.Status) && len(arg.Status) != 0) || 157 | (len(arg.Hostname) != len(arg.Metadata) && len(arg.Metadata) != 0) { 158 | c.JSON(nil, ecode.RequestErr) 159 | return 160 | } 161 | if arg.SetTimestamp == 0 { 162 | arg.SetTimestamp = time.Now().UnixNano() 163 | } 164 | c.JSON(nil, dis.Set(c, arg)) 165 | } 166 | 167 | func nodes(c *bm.Context) { 168 | c.JSON(dis.Nodes(c), nil) 169 | } 170 | -------------------------------------------------------------------------------- /http/http.go: -------------------------------------------------------------------------------- 1 | package http 2 | 3 | import ( 4 | "errors" 5 | 6 | "github.com/bilibili/discovery/conf" 7 | "github.com/bilibili/discovery/discovery" 8 | 9 | log "github.com/go-kratos/kratos/pkg/log" 10 | bm "github.com/go-kratos/kratos/pkg/net/http/blademaster" 11 | ) 12 | 13 | var ( 14 | dis *discovery.Discovery 15 | protected = true 16 | errProtected = errors.New("discovery in protect mode and only support register") 17 | ) 18 | 19 | // Init init http 20 | func Init(c *conf.Config, s *discovery.Discovery) { 21 | dis = s 22 | engineInner := bm.DefaultServer(c.HTTPServer) 23 | innerRouter(engineInner) 24 | if err := engineInner.Start(); err != nil { 25 | log.Error("bm.DefaultServer error(%v)", err) 26 | panic(err) 27 | } 28 | log.Info("[HTTP] Listening on: %s", c.HTTPServer.Addr) 29 | } 30 | 31 | // innerRouter init local router api path. 32 | func innerRouter(e *bm.Engine) { 33 | group := e.Group("/discovery") 34 | { 35 | group.POST("/register", register) 36 | group.POST("/renew", renew) 37 | group.POST("/cancel", cancel) 38 | group.GET("/fetch/all", initProtect, fetchAll) 39 | group.GET("/fetch", initProtect, fetch) 40 | group.GET("/fetchs", initProtect, fetchs) 41 | group.GET("/poll", initProtect, poll) 42 | group.GET("/polls", initProtect, polls) 43 | //manager 44 | group.POST("/set", set) 45 | group.GET("/nodes", initProtect, nodes) 46 | } 47 | } 48 | 49 | func initProtect(ctx *bm.Context) { 50 | if dis.Protected() { 51 | ctx.JSON(nil, errProtected) 52 | ctx.AbortWithStatus(503) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | # Code generated by godownloader on 2018-12-19T15:04:18Z. DO NOT EDIT. 4 | # 5 | 6 | usage() { 7 | this=$1 8 | cat </dev/null 133 | } 134 | echoerr() { 135 | echo "$@" 1>&2 136 | } 137 | log_prefix() { 138 | echo "$0" 139 | } 140 | _logp=6 141 | log_set_priority() { 142 | _logp="$1" 143 | } 144 | log_priority() { 145 | if test -z "$1"; then 146 | echo "$_logp" 147 | return 148 | fi 149 | [ "$1" -le "$_logp" ] 150 | } 151 | log_tag() { 152 | case $1 in 153 | 0) echo "emerg" ;; 154 | 1) echo "alert" ;; 155 | 2) echo "crit" ;; 156 | 3) echo "err" ;; 157 | 4) echo "warning" ;; 158 | 5) echo "notice" ;; 159 | 6) echo "info" ;; 160 | 7) echo "debug" ;; 161 | *) echo "$1" ;; 162 | esac 163 | } 164 | log_debug() { 165 | log_priority 7 || return 0 166 | echoerr "$(log_prefix)" "$(log_tag 7)" "$@" 167 | } 168 | log_info() { 169 | log_priority 6 || return 0 170 | echoerr "$(log_prefix)" "$(log_tag 6)" "$@" 171 | } 172 | log_err() { 173 | log_priority 3 || return 0 174 | echoerr "$(log_prefix)" "$(log_tag 3)" "$@" 175 | } 176 | log_crit() { 177 | log_priority 2 || return 0 178 | echoerr "$(log_prefix)" "$(log_tag 2)" "$@" 179 | } 180 | uname_os() { 181 | os=$(uname -s | tr '[:upper:]' '[:lower:]') 182 | case "$os" in 183 | msys_nt) os="windows" ;; 184 | esac 185 | echo "$os" 186 | } 187 | uname_arch() { 188 | arch=$(uname -m) 189 | case $arch in 190 | x86_64) arch="amd64" ;; 191 | x86) arch="386" ;; 192 | i686) arch="386" ;; 193 | i386) arch="386" ;; 194 | aarch64) arch="arm64" ;; 195 | armv5*) arch="armv5" ;; 196 | armv6*) arch="armv6" ;; 197 | armv7*) arch="armv7" ;; 198 | esac 199 | echo ${arch} 200 | } 201 | uname_os_check() { 202 | os=$(uname_os) 203 | case "$os" in 204 | darwin) return 0 ;; 205 | dragonfly) return 0 ;; 206 | freebsd) return 0 ;; 207 | linux) return 0 ;; 208 | android) return 0 ;; 209 | nacl) return 0 ;; 210 | netbsd) return 0 ;; 211 | openbsd) return 0 ;; 212 | plan9) return 0 ;; 213 | solaris) return 0 ;; 214 | windows) return 0 ;; 215 | esac 216 | log_crit "uname_os_check '$(uname -s)' got converted to '$os' which is not a GOOS value. Please file bug at https://github.com/client9/shlib" 217 | return 1 218 | } 219 | uname_arch_check() { 220 | arch=$(uname_arch) 221 | case "$arch" in 222 | 386) return 0 ;; 223 | amd64) return 0 ;; 224 | arm64) return 0 ;; 225 | armv5) return 0 ;; 226 | armv6) return 0 ;; 227 | armv7) return 0 ;; 228 | ppc64) return 0 ;; 229 | ppc64le) return 0 ;; 230 | mips) return 0 ;; 231 | mipsle) return 0 ;; 232 | mips64) return 0 ;; 233 | mips64le) return 0 ;; 234 | s390x) return 0 ;; 235 | amd64p32) return 0 ;; 236 | esac 237 | log_crit "uname_arch_check '$(uname -m)' got converted to '$arch' which is not a GOARCH value. Please file bug report at https://github.com/client9/shlib" 238 | return 1 239 | } 240 | untar() { 241 | tarball=$1 242 | case "${tarball}" in 243 | *.tar.gz | *.tgz) tar -xzf "${tarball}" ;; 244 | *.tar) tar -xf "${tarball}" ;; 245 | *.zip) unzip "${tarball}" ;; 246 | *) 247 | log_err "untar unknown archive format for ${tarball}" 248 | return 1 249 | ;; 250 | esac 251 | } 252 | mktmpdir() { 253 | test -z "$TMPDIR" && TMPDIR="$(mktemp -d)" 254 | mkdir -p "${TMPDIR}" 255 | echo "${TMPDIR}" 256 | } 257 | http_download_curl() { 258 | local_file=$1 259 | source_url=$2 260 | header=$3 261 | if [ -z "$header" ]; then 262 | code=$(curl -w '%{http_code}' -sL -o "$local_file" "$source_url") 263 | else 264 | code=$(curl -w '%{http_code}' -sL -H "$header" -o "$local_file" "$source_url") 265 | fi 266 | if [ "$code" != "200" ]; then 267 | log_debug "http_download_curl received HTTP status $code" 268 | return 1 269 | fi 270 | return 0 271 | } 272 | http_download_wget() { 273 | local_file=$1 274 | source_url=$2 275 | header=$3 276 | if [ -z "$header" ]; then 277 | wget -q -O "$local_file" "$source_url" 278 | else 279 | wget -q --header "$header" -O "$local_file" "$source_url" 280 | fi 281 | } 282 | http_download() { 283 | log_debug "http_download $2" 284 | if is_command curl; then 285 | http_download_curl "$@" 286 | return 287 | elif is_command wget; then 288 | http_download_wget "$@" 289 | return 290 | fi 291 | log_crit "http_download unable to find wget or curl" 292 | return 1 293 | } 294 | http_copy() { 295 | tmp=$(mktemp) 296 | http_download "${tmp}" "$1" "$2" || return 1 297 | body=$(cat "$tmp") 298 | rm -f "${tmp}" 299 | echo "$body" 300 | } 301 | github_release() { 302 | owner_repo=$1 303 | version=$2 304 | test -z "$version" && version="latest" 305 | giturl="https://github.com/${owner_repo}/releases/${version}" 306 | json=$(http_copy "$giturl" "Accept:application/json") 307 | test -z "$json" && return 1 308 | version=$(echo "$json" | tr -s '\n' ' ' | sed 's/.*"tag_name":"//' | sed 's/".*//') 309 | test -z "$version" && return 1 310 | echo "$version" 311 | } 312 | hash_sha256() { 313 | TARGET=${1:-/dev/stdin} 314 | if is_command gsha256sum; then 315 | hash=$(gsha256sum "$TARGET") || return 1 316 | echo "$hash" | cut -d ' ' -f 1 317 | elif is_command sha256sum; then 318 | hash=$(sha256sum "$TARGET") || return 1 319 | echo "$hash" | cut -d ' ' -f 1 320 | elif is_command shasum; then 321 | hash=$(shasum -a 256 "$TARGET" 2>/dev/null) || return 1 322 | echo "$hash" | cut -d ' ' -f 1 323 | elif is_command openssl; then 324 | hash=$(openssl -dst openssl dgst -sha256 "$TARGET") || return 1 325 | echo "$hash" | cut -d ' ' -f a 326 | else 327 | log_crit "hash_sha256 unable to find command to compute sha-256 hash" 328 | return 1 329 | fi 330 | } 331 | hash_sha256_verify() { 332 | TARGET=$1 333 | checksums=$2 334 | if [ -z "$checksums" ]; then 335 | log_err "hash_sha256_verify checksum file not specified in arg2" 336 | return 1 337 | fi 338 | BASENAME=${TARGET##*/} 339 | want=$(grep "${BASENAME}" "${checksums}" 2>/dev/null | tr '\t' ' ' | cut -d ' ' -f 1) 340 | if [ -z "$want" ]; then 341 | log_err "hash_sha256_verify unable to find checksum for '${TARGET}' in '${checksums}'" 342 | return 1 343 | fi 344 | got=$(hash_sha256 "$TARGET") 345 | if [ "$want" != "$got" ]; then 346 | log_err "hash_sha256_verify checksum for '$TARGET' did not verify ${want} vs $got" 347 | return 1 348 | fi 349 | } 350 | cat /dev/null < 0 24 | } 25 | 26 | // Action Replicate type of node 27 | type Action int 28 | 29 | const ( 30 | // Register Replicate the add action to all nodes 31 | Register Action = iota 32 | // Renew Replicate the heartbeat action to all nodes 33 | Renew 34 | // Cancel Replicate the cancel action to all nodes 35 | Cancel 36 | // Weight Replicate the Weight action to all nodes 37 | Weight 38 | // Delete Replicate the Delete action to all nodes 39 | Delete 40 | // Status Replicate the Status action to all nodes 41 | Status 42 | ) 43 | 44 | // Instance holds information required for registration with 45 | // and to be discovered by other components. 46 | type Instance struct { 47 | Region string `json:"region"` 48 | Zone string `json:"zone"` 49 | Env string `json:"env"` 50 | AppID string `json:"appid"` 51 | Hostname string `json:"hostname"` 52 | Addrs []string `json:"addrs"` 53 | Version string `json:"version"` 54 | Metadata map[string]string `json:"metadata"` 55 | 56 | // Status enum instance status 57 | Status uint32 `json:"status"` 58 | 59 | // timestamp 60 | RegTimestamp int64 `json:"reg_timestamp"` 61 | UpTimestamp int64 `json:"up_timestamp"` // NOTE: It is latest timestamp that status becomes UP. 62 | RenewTimestamp int64 `json:"renew_timestamp"` 63 | DirtyTimestamp int64 `json:"dirty_timestamp"` 64 | 65 | LatestTimestamp int64 `json:"latest_timestamp"` 66 | } 67 | 68 | // NewInstance new a instance. 69 | func NewInstance(arg *ArgRegister) (i *Instance) { 70 | now := time.Now().UnixNano() 71 | i = &Instance{ 72 | Region: arg.Region, 73 | Zone: arg.Zone, 74 | Env: arg.Env, 75 | AppID: arg.AppID, 76 | Hostname: arg.Hostname, 77 | Addrs: arg.Addrs, 78 | Version: arg.Version, 79 | Status: arg.Status, 80 | RegTimestamp: now, 81 | UpTimestamp: now, 82 | LatestTimestamp: now, 83 | RenewTimestamp: now, 84 | DirtyTimestamp: now, 85 | } 86 | if arg.Metadata != "" { 87 | if err := json.Unmarshal([]byte(arg.Metadata), &i.Metadata); err != nil { 88 | log.Error("json unmarshal metadata err %v", err) 89 | } 90 | } 91 | return 92 | } 93 | 94 | // deep copy a new instance from old one 95 | func copyInstance(oi *Instance) (ni *Instance) { 96 | ni = new(Instance) 97 | *ni = *oi 98 | ni.Addrs = make([]string, len(oi.Addrs)) 99 | for i, add := range oi.Addrs { 100 | ni.Addrs[i] = add 101 | } 102 | ni.Metadata = make(map[string]string) 103 | for k, v := range oi.Metadata { 104 | ni.Metadata[k] = v 105 | } 106 | return 107 | } 108 | 109 | // InstanceInfo the info get by consumer. 110 | type InstanceInfo struct { 111 | Instances map[string][]*Instance `json:"instances"` 112 | Scheduler *Scheduler `json:"scheduler,omitempty"` 113 | LatestTimestamp int64 `json:"latest_timestamp"` 114 | } 115 | 116 | // Apps app distinguished by zone 117 | type Apps struct { 118 | apps map[string]*App 119 | lock sync.RWMutex 120 | latestTimestamp int64 121 | } 122 | 123 | // NewApps return new Apps. 124 | func NewApps() *Apps { 125 | return &Apps{ 126 | apps: make(map[string]*App), 127 | } 128 | } 129 | 130 | // NewApp news a app by appid. If ok=false, returns the app of already exist. 131 | func (p *Apps) NewApp(zone, appid string, lts int64) (a *App, new bool) { 132 | p.lock.Lock() 133 | a, ok := p.apps[zone] 134 | if !ok { 135 | a = NewApp(zone, appid) 136 | p.apps[zone] = a 137 | } 138 | if lts <= p.latestTimestamp { 139 | // insure increase 140 | lts = p.latestTimestamp + 1 141 | } 142 | p.latestTimestamp = lts 143 | p.lock.Unlock() 144 | new = !ok 145 | return 146 | } 147 | 148 | // App get app by zone. 149 | func (p *Apps) App(zone string) (as []*App) { 150 | p.lock.RLock() 151 | if zone != "" { 152 | a, ok := p.apps[zone] 153 | if !ok { 154 | p.lock.RUnlock() 155 | return 156 | } 157 | as = []*App{a} 158 | } else { 159 | for _, a := range p.apps { 160 | as = append(as, a) 161 | } 162 | } 163 | p.lock.RUnlock() 164 | return 165 | } 166 | 167 | // Del del app by zone. 168 | func (p *Apps) Del(zone string) { 169 | p.lock.Lock() 170 | delete(p.apps, zone) 171 | p.lock.Unlock() 172 | } 173 | 174 | // InstanceInfo return slice of instances.if up is true,return all status instance else return up status instance 175 | func (p *Apps) InstanceInfo(zone string, latestTime int64, status uint32) (ci *InstanceInfo, err error) { 176 | p.lock.RLock() 177 | defer p.lock.RUnlock() 178 | if latestTime >= p.latestTimestamp { 179 | err = ecode.NotModified 180 | return 181 | } 182 | ci = &InstanceInfo{ 183 | LatestTimestamp: p.latestTimestamp, 184 | Instances: make(map[string][]*Instance), 185 | } 186 | var ok bool 187 | for z, app := range p.apps { 188 | if zone == "" || z == zone { 189 | ok = true 190 | instances := make([]*Instance, 0) 191 | for _, i := range app.Instances() { 192 | // if up is false return all status instance 193 | if i.filter(status) { 194 | // if i.Status == InstanceStatusUP && i.LatestTimestamp > latestTime { // TODO(felix): increase 195 | ni := copyInstance(i) 196 | instances = append(instances, ni) 197 | } 198 | } 199 | ci.Instances[z] = instances 200 | } 201 | } 202 | if !ok { 203 | err = ecode.NothingFound 204 | } else if len(ci.Instances) == 0 { 205 | err = ecode.NotModified 206 | } 207 | return 208 | } 209 | 210 | // UpdateLatest update LatestTimestamp. 211 | func (p *Apps) UpdateLatest(latestTime int64) { 212 | p.lock.Lock() 213 | if latestTime <= p.latestTimestamp { 214 | // insure increase 215 | latestTime = p.latestTimestamp + 1 216 | } 217 | p.latestTimestamp = latestTime 218 | p.lock.Unlock() 219 | } 220 | 221 | // App Instances distinguished by hostname 222 | type App struct { 223 | AppID string 224 | Zone string 225 | instances map[string]*Instance 226 | latestTimestamp int64 227 | 228 | lock sync.RWMutex 229 | } 230 | 231 | // NewApp new App. 232 | func NewApp(zone, appid string) (a *App) { 233 | a = &App{ 234 | AppID: appid, 235 | Zone: zone, 236 | instances: make(map[string]*Instance), 237 | } 238 | return 239 | } 240 | 241 | // Instances return slice of instances. 242 | func (a *App) Instances() (is []*Instance) { 243 | a.lock.RLock() 244 | is = make([]*Instance, 0, len(a.instances)) 245 | for _, i := range a.instances { 246 | ni := new(Instance) 247 | *ni = *i 248 | is = append(is, ni) 249 | } 250 | a.lock.RUnlock() 251 | return 252 | } 253 | 254 | // NewInstance new a instance. 255 | func (a *App) NewInstance(ni *Instance, latestTime int64) (i *Instance, ok bool) { 256 | i = new(Instance) 257 | a.lock.Lock() 258 | oi, ok := a.instances[ni.Hostname] 259 | if ok { 260 | ni.UpTimestamp = oi.UpTimestamp 261 | if ni.DirtyTimestamp < oi.DirtyTimestamp { 262 | log.Warn("register exist(%v) dirty timestamp over than caller(%v)", oi, ni) 263 | ni = oi 264 | } 265 | } 266 | a.instances[ni.Hostname] = ni 267 | a.updateLatest(latestTime) 268 | *i = *ni 269 | a.lock.Unlock() 270 | ok = !ok 271 | return 272 | } 273 | 274 | // Renew new a instance. 275 | func (a *App) Renew(hostname string) (i *Instance, ok bool) { 276 | i = new(Instance) 277 | a.lock.Lock() 278 | defer a.lock.Unlock() 279 | oi, ok := a.instances[hostname] 280 | if !ok { 281 | return 282 | } 283 | oi.RenewTimestamp = time.Now().UnixNano() 284 | i = copyInstance(oi) 285 | return 286 | } 287 | 288 | func (a *App) updateLatest(latestTime int64) { 289 | if latestTime <= a.latestTimestamp { 290 | // insure increase 291 | latestTime = a.latestTimestamp + 1 292 | } 293 | a.latestTimestamp = latestTime 294 | } 295 | 296 | // Cancel cancel a instance. 297 | func (a *App) Cancel(hostname string, latestTime int64) (i *Instance, l int, ok bool) { 298 | i = new(Instance) 299 | a.lock.Lock() 300 | defer a.lock.Unlock() 301 | oi, ok := a.instances[hostname] 302 | if !ok { 303 | return 304 | } 305 | delete(a.instances, hostname) 306 | l = len(a.instances) 307 | oi.LatestTimestamp = latestTime 308 | a.updateLatest(latestTime) 309 | *i = *oi 310 | return 311 | } 312 | 313 | // Len returns the length of instances. 314 | func (a *App) Len() (l int) { 315 | a.lock.RLock() 316 | l = len(a.instances) 317 | a.lock.RUnlock() 318 | return 319 | } 320 | 321 | // Set set new status,metadata,color of instance . 322 | func (a *App) Set(changes *ArgSet) (ok bool) { 323 | a.lock.Lock() 324 | defer a.lock.Unlock() 325 | var ( 326 | dst *Instance 327 | setTime = changes.SetTimestamp 328 | ) 329 | for i, hostname := range changes.Hostname { 330 | if dst, ok = a.instances[hostname]; !ok { 331 | log.Error("SetWeight hostname(%s) not found", hostname) 332 | return 333 | } 334 | if len(changes.Status) != 0 { 335 | if uint32(changes.Status[i]) != InstanceStatusUP && uint32(changes.Status[i]) != InstancestatusWating { 336 | log.Error("SetWeight change status(%d) is error", changes.Status[i]) 337 | ok = false 338 | return 339 | } 340 | dst.Status = uint32(changes.Status[i]) 341 | if dst.Status == InstanceStatusUP { 342 | dst.UpTimestamp = setTime 343 | } 344 | } 345 | if len(changes.Metadata) != 0 { 346 | if err := json.Unmarshal([]byte(changes.Metadata[i]), &dst.Metadata); err != nil { 347 | log.Error("set change metadata err %s", changes.Metadata[i]) 348 | ok = false 349 | return 350 | } 351 | } 352 | dst.LatestTimestamp = setTime 353 | dst.DirtyTimestamp = setTime 354 | } 355 | a.updateLatest(setTime) 356 | return 357 | } 358 | -------------------------------------------------------------------------------- /model/node.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import ( 4 | "encoding/json" 5 | ) 6 | 7 | // NodeStatus Status of instance 8 | type NodeStatus int 9 | 10 | const ( 11 | // NodeStatusUP Ready to receive register 12 | NodeStatusUP NodeStatus = iota 13 | // NodeStatusLost lost with each other 14 | NodeStatusLost 15 | ) 16 | 17 | const ( 18 | // AppID is discvoery id 19 | AppID = "infra.discovery" 20 | ) 21 | 22 | // Node node 23 | type Node struct { 24 | Addr string `json:"addr"` 25 | Status NodeStatus `json:"status"` 26 | Zone string `json:"zone"` 27 | } 28 | 29 | // Scheduler info. 30 | type Scheduler struct { 31 | AppID string `json:"app_id,omitempty"` 32 | Env string `json:"env,omitempty"` 33 | Clients map[string]*ZoneStrategy `json:"clients"` // zone-ratio 34 | Remark string `json:"remark"` 35 | } 36 | 37 | // ZoneStrategy is the scheduling strategy of all zones 38 | type ZoneStrategy struct { 39 | Zones map[string]*Strategy `json:"zones"` 40 | } 41 | 42 | // Strategy is zone scheduling strategy. 43 | type Strategy struct { 44 | Weight int64 `json:"weight"` 45 | } 46 | 47 | // Zone info. 48 | type Zone struct { 49 | Src string `json:"src"` 50 | Dst map[string]int `json:"dst"` 51 | } 52 | 53 | // Set Scheduler conf settter. 54 | func (s *Scheduler) Set(content string) (err error) { 55 | if err = json.Unmarshal([]byte(content), &s); err != nil { 56 | return 57 | } 58 | return 59 | } 60 | -------------------------------------------------------------------------------- /model/param.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | // ArgRegister define register param. 4 | type ArgRegister struct { 5 | Region string `form:"region"` 6 | Zone string `form:"zone" validate:"required"` 7 | Env string `form:"env" validate:"required"` 8 | AppID string `form:"appid" validate:"required"` 9 | Hostname string `form:"hostname" validate:"required"` 10 | Status uint32 `form:"status" validate:"required"` 11 | Addrs []string `form:"addrs" validate:"gt=0"` 12 | Version string `form:"version"` 13 | Metadata string `form:"metadata"` 14 | Replication bool `form:"replication"` 15 | LatestTimestamp int64 `form:"latest_timestamp"` 16 | DirtyTimestamp int64 `form:"dirty_timestamp"` 17 | FromZone bool `form:"from_zone"` 18 | } 19 | 20 | // ArgRenew define renew params. 21 | type ArgRenew struct { 22 | Zone string `form:"zone" validate:"required"` 23 | Env string `form:"env" validate:"required"` 24 | AppID string `form:"appid" validate:"required"` 25 | Hostname string `form:"hostname" validate:"required"` 26 | Replication bool `form:"replication"` 27 | DirtyTimestamp int64 `form:"dirty_timestamp"` 28 | FromZone bool `form:"from_zone"` 29 | } 30 | 31 | // ArgCancel define cancel params. 32 | type ArgCancel struct { 33 | Zone string `form:"zone" validate:"required"` 34 | Env string `form:"env" validate:"required"` 35 | AppID string `form:"appid" validate:"required"` 36 | Hostname string `form:"hostname" validate:"required"` 37 | FromZone bool `form:"from_zone"` 38 | Replication bool `form:"replication"` 39 | LatestTimestamp int64 `form:"latest_timestamp"` 40 | } 41 | 42 | // ArgFetch define fetch param. 43 | type ArgFetch struct { 44 | Zone string `form:"zone"` 45 | Env string `form:"env" validate:"required"` 46 | AppID string `form:"appid" validate:"required"` 47 | Status uint32 `form:"status" validate:"required"` 48 | } 49 | 50 | // ArgFetchs define fetchs arg. 51 | type ArgFetchs struct { 52 | Zone string `form:"zone"` 53 | Env string `form:"env" validate:"required"` 54 | AppID []string `form:"appid" validate:"gt=0"` 55 | Status uint32 `form:"status" validate:"required"` 56 | } 57 | 58 | // ArgPoll define poll param. 59 | type ArgPoll struct { 60 | Zone string `form:"zone"` 61 | Env string `form:"env" validate:"required"` 62 | AppID string `form:"appid" validate:"required"` 63 | Hostname string `form:"hostname" validate:"required"` 64 | LatestTimestamp int64 `form:"latest_timestamp"` 65 | } 66 | 67 | // ArgPolls define poll param. 68 | type ArgPolls struct { 69 | Zone string `form:"zone"` 70 | Env string `form:"env" validate:"required"` 71 | AppID []string `form:"appid" validate:"gt=0"` 72 | Hostname string `form:"hostname" validate:"required"` 73 | LatestTimestamp []int64 `form:"latest_timestamp"` 74 | } 75 | 76 | // ArgSet define set param. 77 | type ArgSet struct { 78 | Region string `form:"region"` 79 | Zone string `form:"zone" validate:"required"` 80 | Env string `form:"env" validate:"required"` 81 | AppID string `form:"appid" validate:"required"` 82 | Hostname []string `form:"hostname" validate:"gte=0"` 83 | Status []int64 `form:"status" validate:"gte=0"` 84 | Metadata []string `form:"metadata" validate:"gte=0"` 85 | Replication bool `form:"replication"` 86 | FromZone bool `form:"from_zone"` 87 | SetTimestamp int64 `form:"set_timestamp"` 88 | } 89 | -------------------------------------------------------------------------------- /naming/client.go: -------------------------------------------------------------------------------- 1 | package naming 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "errors" 7 | "fmt" 8 | "math/rand" 9 | "net/url" 10 | "os" 11 | "strconv" 12 | "sync" 13 | "sync/atomic" 14 | "time" 15 | 16 | ecode "github.com/go-kratos/kratos/pkg/ecode" 17 | log "github.com/go-kratos/kratos/pkg/log" 18 | http "github.com/go-kratos/kratos/pkg/net/http/blademaster" 19 | xtime "github.com/go-kratos/kratos/pkg/time" 20 | ) 21 | 22 | const ( 23 | _registerURL = "http://%s/discovery/register" 24 | _setURL = "http://%s/discovery/set" 25 | _cancelURL = "http://%s/discovery/cancel" 26 | _renewURL = "http://%s/discovery/renew" 27 | _pollURL = "http://%s/discovery/polls" 28 | 29 | _registerGap = 30 * time.Second 30 | 31 | _statusUP = "1" 32 | 33 | _appid = "infra.discovery" 34 | ) 35 | 36 | var ( 37 | _ Builder = &Discovery{} 38 | _ Registry = &Discovery{} 39 | 40 | // ErrDuplication duplication treeid. 41 | ErrDuplication = errors.New("discovery: instance duplicate registration") 42 | ) 43 | 44 | // Config discovery configures. 45 | type Config struct { 46 | Nodes []string 47 | Region string 48 | Zone string 49 | Env string 50 | Host string 51 | } 52 | 53 | type appData struct { 54 | Instances map[string][]*Instance `json:"instances"` 55 | LastTs int64 `json:"latest_timestamp"` 56 | } 57 | 58 | // Discovery is discovery client. 59 | type Discovery struct { 60 | c *Config 61 | once sync.Once 62 | ctx context.Context 63 | cancelFunc context.CancelFunc 64 | httpClient *http.Client 65 | 66 | node atomic.Value 67 | nodeIdx uint64 68 | 69 | mutex sync.RWMutex 70 | apps map[string]*appInfo 71 | registry map[string]struct{} 72 | lastHost string 73 | cancelPolls context.CancelFunc 74 | 75 | delete chan *appInfo 76 | } 77 | 78 | type appInfo struct { 79 | resolver map[*Resolve]struct{} 80 | zoneIns atomic.Value 81 | lastTs int64 // latest timestamp 82 | } 83 | 84 | func fixConfig(c *Config) { 85 | if len(c.Nodes) == 0 { 86 | panic("conf nodes can not be nil") 87 | } 88 | if c.Region == "" { 89 | c.Region = os.Getenv("REGION") 90 | } 91 | if c.Zone == "" { 92 | c.Zone = os.Getenv("ZONE") 93 | } 94 | if c.Env == "" { 95 | c.Env = os.Getenv("DEPLOY_ENV") 96 | } 97 | if c.Host == "" { 98 | c.Host, _ = os.Hostname() 99 | } 100 | } 101 | 102 | // New new a discovery client. 103 | func New(c *Config) (d *Discovery) { 104 | fixConfig(c) 105 | ctx, cancel := context.WithCancel(context.Background()) 106 | d = &Discovery{ 107 | c: c, 108 | ctx: ctx, 109 | cancelFunc: cancel, 110 | apps: map[string]*appInfo{}, 111 | registry: map[string]struct{}{}, 112 | delete: make(chan *appInfo, 10), 113 | } 114 | // httpClient 115 | cfg := &http.ClientConfig{ 116 | Dial: xtime.Duration(3 * time.Second), 117 | KeepAlive: xtime.Duration(40 * time.Second), 118 | Timeout: xtime.Duration(40 * time.Second), 119 | } 120 | d.httpClient = http.NewClient(cfg) 121 | // discovery self 122 | resolver := d.Build(_appid) 123 | event := resolver.Watch() 124 | _, ok := <-event 125 | if !ok { 126 | panic("discovery watch failed") 127 | } 128 | ins, ok := resolver.Fetch() 129 | if ok { 130 | d.newSelf(ins.Instances) 131 | } 132 | go d.selfproc(resolver, event) 133 | return 134 | } 135 | 136 | func (d *Discovery) selfproc(resolver Resolver, event <-chan struct{}) { 137 | for { 138 | _, ok := <-event 139 | if !ok { 140 | return 141 | } 142 | zones, ok := resolver.Fetch() 143 | if ok { 144 | d.newSelf(zones.Instances) 145 | } 146 | } 147 | } 148 | 149 | func (d *Discovery) newSelf(zones map[string][]*Instance) { 150 | d.mutex.Lock() 151 | ins, ok := zones[d.c.Zone] 152 | d.mutex.Unlock() 153 | if !ok { 154 | return 155 | } 156 | var nodes []string 157 | for _, in := range ins { 158 | for _, addr := range in.Addrs { 159 | u, err := url.Parse(addr) 160 | if err == nil && u.Scheme == "http" { 161 | nodes = append(nodes, u.Host) 162 | } 163 | } 164 | } 165 | // diff old nodes 166 | var olds int 167 | for _, n := range nodes { 168 | if node, ok := d.node.Load().([]string); ok { 169 | for _, o := range node { 170 | if o == n { 171 | olds++ 172 | break 173 | } 174 | } 175 | } 176 | } 177 | if len(nodes) == olds { 178 | return 179 | } 180 | // FIXME: we should use rand.Shuffle() in golang 1.10 181 | shuffle(len(nodes), func(i, j int) { 182 | nodes[i], nodes[j] = nodes[j], nodes[i] 183 | }) 184 | d.node.Store(nodes) 185 | } 186 | 187 | // Build discovery resovler builder. 188 | func (d *Discovery) Build(appid string) Resolver { 189 | r := &Resolve{ 190 | id: appid, 191 | d: d, 192 | event: make(chan struct{}, 1), 193 | } 194 | d.mutex.Lock() 195 | app, ok := d.apps[appid] 196 | if !ok { 197 | app = &appInfo{ 198 | resolver: make(map[*Resolve]struct{}), 199 | } 200 | d.apps[appid] = app 201 | cancel := d.cancelPolls 202 | if cancel != nil { 203 | cancel() 204 | } 205 | } 206 | app.resolver[r] = struct{}{} 207 | d.mutex.Unlock() 208 | if ok { 209 | select { 210 | case r.event <- struct{}{}: 211 | default: 212 | } 213 | } 214 | log.Info("discovery: AddWatch(%s) already watch(%v)", appid, ok) 215 | d.once.Do(func() { 216 | go d.serverproc() 217 | }) 218 | return r 219 | } 220 | 221 | // Scheme return discovery's scheme 222 | func (d *Discovery) Scheme() string { 223 | return "discovery" 224 | } 225 | 226 | // Resolve discveory resolver. 227 | type Resolve struct { 228 | id string 229 | event chan struct{} 230 | d *Discovery 231 | } 232 | 233 | // Watch watch instance. 234 | func (r *Resolve) Watch() <-chan struct{} { 235 | return r.event 236 | } 237 | 238 | // Fetch fetch resolver instance. 239 | func (r *Resolve) Fetch() (ins *InstancesInfo, ok bool) { 240 | r.d.mutex.RLock() 241 | app, ok := r.d.apps[r.id] 242 | r.d.mutex.RUnlock() 243 | if ok { 244 | ins, ok = app.zoneIns.Load().(*InstancesInfo) 245 | return 246 | } 247 | return 248 | } 249 | 250 | // Close close resolver. 251 | func (r *Resolve) Close() error { 252 | r.d.mutex.Lock() 253 | if app, ok := r.d.apps[r.id]; ok && len(app.resolver) != 0 { 254 | delete(app.resolver, r) 255 | // TODO: delete app from builder 256 | } 257 | r.d.mutex.Unlock() 258 | return nil 259 | } 260 | 261 | // Reload reload the config 262 | func (d *Discovery) Reload(c *Config) { 263 | fixConfig(c) 264 | d.mutex.Lock() 265 | d.c = c 266 | d.mutex.Unlock() 267 | } 268 | 269 | // Close stop all running process including discovery and register 270 | func (d *Discovery) Close() error { 271 | d.cancelFunc() 272 | return nil 273 | } 274 | 275 | // Register Register an instance with discovery and renew automatically 276 | func (d *Discovery) Register(ins *Instance) (cancelFunc context.CancelFunc, err error) { 277 | d.mutex.Lock() 278 | if _, ok := d.registry[ins.AppID]; ok { 279 | err = ErrDuplication 280 | } else { 281 | d.registry[ins.AppID] = struct{}{} 282 | } 283 | d.mutex.Unlock() 284 | if err != nil { 285 | return 286 | } 287 | 288 | ctx, cancel := context.WithCancel(d.ctx) 289 | if err = d.register(ctx, ins); err != nil { 290 | d.mutex.Lock() 291 | delete(d.registry, ins.AppID) 292 | d.mutex.Unlock() 293 | cancel() 294 | return 295 | } 296 | ch := make(chan struct{}, 1) 297 | cancelFunc = context.CancelFunc(func() { 298 | cancel() 299 | <-ch 300 | }) 301 | go func() { 302 | ticker := time.NewTicker(_registerGap) 303 | defer ticker.Stop() 304 | for { 305 | select { 306 | case <-ticker.C: 307 | if err := d.renew(ctx, ins); err != nil && ecode.EqualError(ecode.NothingFound, err) { 308 | _ = d.register(ctx, ins) 309 | } 310 | case <-ctx.Done(): 311 | _ = d.cancel(ins) 312 | ch <- struct{}{} 313 | return 314 | } 315 | } 316 | }() 317 | return 318 | } 319 | 320 | // register Register an instance with discovery 321 | func (d *Discovery) register(ctx context.Context, ins *Instance) (err error) { 322 | d.mutex.RLock() 323 | c := d.c 324 | d.mutex.RUnlock() 325 | 326 | var metadata []byte 327 | if ins.Metadata != nil { 328 | if metadata, err = json.Marshal(ins.Metadata); err != nil { 329 | log.Error("discovery:register instance Marshal metadata(%v) failed!error(%v)", ins.Metadata, err) 330 | } 331 | } 332 | res := new(struct { 333 | Code int `json:"code"` 334 | Message string `json:"message"` 335 | }) 336 | uri := fmt.Sprintf(_registerURL, d.pickNode()) 337 | params := d.newParams(c) 338 | params.Set("appid", ins.AppID) 339 | for _, addr := range ins.Addrs { 340 | params.Add("addrs", addr) 341 | } 342 | params.Set("version", ins.Version) 343 | params.Set("status", _statusUP) 344 | params.Set("metadata", string(metadata)) 345 | if err = d.httpClient.Post(ctx, uri, "", params, &res); err != nil { 346 | d.switchNode() 347 | log.Error("discovery: register client.Get(%v) zone(%s) env(%s) appid(%s) addrs(%v) error(%v)", 348 | uri, c.Zone, c.Env, ins.AppID, ins.Addrs, err) 349 | return 350 | } 351 | if ec := ecode.Int(res.Code); !ecode.EqualError(ecode.OK, ec) { 352 | log.Warn("discovery: register client.Get(%v) env(%s) appid(%s) addrs(%v) code(%v)", uri, c.Env, ins.AppID, ins.Addrs, res.Code) 353 | err = ec 354 | return 355 | } 356 | log.Info("discovery: register client.Get(%v) env(%s) appid(%s) addrs(%s) success", uri, c.Env, ins.AppID, ins.Addrs) 357 | return 358 | } 359 | 360 | // renew Renew an instance with discovery 361 | func (d *Discovery) renew(ctx context.Context, ins *Instance) (err error) { 362 | d.mutex.RLock() 363 | c := d.c 364 | d.mutex.RUnlock() 365 | 366 | res := new(struct { 367 | Code int `json:"code"` 368 | Message string `json:"message"` 369 | }) 370 | uri := fmt.Sprintf(_renewURL, d.pickNode()) 371 | params := d.newParams(c) 372 | params.Set("appid", ins.AppID) 373 | if err = d.httpClient.Post(ctx, uri, "", params, &res); err != nil { 374 | d.switchNode() 375 | log.Error("discovery: renew client.Get(%v) env(%s) appid(%s) hostname(%s) error(%v)", 376 | uri, c.Env, ins.AppID, c.Host, err) 377 | return 378 | } 379 | if ec := ecode.Int(res.Code); !ecode.EqualError(ecode.OK, ec) { 380 | err = ec 381 | if ecode.EqualError(ecode.NothingFound, ec) { 382 | return 383 | } 384 | log.Error("discovery: renew client.Get(%v) env(%s) appid(%s) hostname(%s) code(%v)", 385 | uri, c.Env, ins.AppID, c.Host, res.Code) 386 | return 387 | } 388 | return 389 | } 390 | 391 | // cancel Remove the registered instance from discovery 392 | func (d *Discovery) cancel(ins *Instance) (err error) { 393 | d.mutex.RLock() 394 | c := d.c 395 | d.mutex.RUnlock() 396 | 397 | res := new(struct { 398 | Code int `json:"code"` 399 | Message string `json:"message"` 400 | }) 401 | uri := fmt.Sprintf(_cancelURL, d.pickNode()) 402 | params := d.newParams(c) 403 | params.Set("appid", ins.AppID) 404 | // request 405 | if err = d.httpClient.Post(context.TODO(), uri, "", params, &res); err != nil { 406 | d.switchNode() 407 | log.Error("discovery cancel client.Get(%v) env(%s) appid(%s) hostname(%s) error(%v)", 408 | uri, c.Env, ins.AppID, c.Host, err) 409 | return 410 | } 411 | if ec := ecode.Int(res.Code); !ecode.EqualError(ecode.OK, ec) { 412 | log.Warn("discovery cancel client.Get(%v) env(%s) appid(%s) hostname(%s) code(%v)", 413 | uri, c.Env, ins.AppID, c.Host, res.Code) 414 | err = ec 415 | return 416 | } 417 | log.Info("discovery cancel client.Get(%v) env(%s) appid(%s) hostname(%s) success", 418 | uri, c.Env, ins.AppID, c.Host) 419 | return 420 | } 421 | 422 | // Set set ins status and metadata. 423 | func (d *Discovery) Set(ins *Instance) error { 424 | return d.set(context.Background(), ins) 425 | } 426 | 427 | // set set instance info with discovery 428 | func (d *Discovery) set(ctx context.Context, ins *Instance) (err error) { 429 | d.mutex.RLock() 430 | conf := d.c 431 | d.mutex.RUnlock() 432 | res := new(struct { 433 | Code int `json:"code"` 434 | Message string `json:"message"` 435 | }) 436 | uri := fmt.Sprintf(_setURL, d.pickNode()) 437 | params := d.newParams(conf) 438 | params.Set("appid", ins.AppID) 439 | params.Set("version", ins.Version) 440 | params.Set("status", _statusUP) 441 | if ins.Metadata != nil { 442 | var metadata []byte 443 | if metadata, err = json.Marshal(ins.Metadata); err != nil { 444 | log.Error("discovery:set instance Marshal metadata(%v) failed!error(%v)", ins.Metadata, err) 445 | return 446 | } 447 | params.Set("metadata", string(metadata)) 448 | } 449 | if err = d.httpClient.Post(ctx, uri, "", params, &res); err != nil { 450 | d.switchNode() 451 | log.Error("discovery: set client.Get(%v) zone(%s) env(%s) appid(%s) addrs(%v) error(%v)", 452 | uri, conf.Zone, conf.Env, ins.AppID, ins.Addrs, err) 453 | return 454 | } 455 | if ec := ecode.Int(res.Code); !ecode.EqualError(ecode.OK, ec) { 456 | log.Warn("discovery: set client.Get(%v) env(%s) appid(%s) addrs(%v) code(%v)", 457 | uri, conf.Env, ins.AppID, ins.Addrs, res.Code) 458 | err = ec 459 | return 460 | } 461 | log.Info("discovery: set client.Get(%v) env(%s) appid(%s) addrs(%s) success", uri+"?"+params.Encode(), conf.Env, ins.AppID, ins.Addrs) 462 | return 463 | } 464 | 465 | func (d *Discovery) serverproc() { 466 | var ( 467 | retry int 468 | ctx context.Context 469 | cancel context.CancelFunc 470 | ) 471 | ticker := time.NewTicker(time.Minute * 30) 472 | defer ticker.Stop() 473 | for { 474 | if ctx == nil { 475 | ctx, cancel = context.WithCancel(d.ctx) 476 | d.mutex.Lock() 477 | d.cancelPolls = cancel 478 | d.mutex.Unlock() 479 | } 480 | select { 481 | case <-d.ctx.Done(): 482 | return 483 | case <-ticker.C: 484 | d.switchNode() 485 | default: 486 | } 487 | apps, err := d.polls(ctx) 488 | if err != nil { 489 | d.switchNode() 490 | if ctx.Err() == context.Canceled { 491 | ctx = nil 492 | continue 493 | } 494 | nodes, _ := d.node.Load().([]string) 495 | if retry >= len(nodes) { 496 | d.tryAppendSeedNodes() 497 | retry = 0 498 | } 499 | time.Sleep(time.Second) 500 | retry++ 501 | continue 502 | } 503 | retry = 0 504 | d.broadcast(apps) 505 | } 506 | } 507 | 508 | func (d *Discovery) tryAppendSeedNodes() { 509 | nodes, ok := d.node.Load().([]string) 510 | if !ok { 511 | return 512 | } 513 | exist := make(map[string]struct{}, len(nodes)) 514 | for _, node := range nodes { 515 | exist[node] = struct{}{} 516 | } 517 | d.mutex.Lock() 518 | seedNodes := d.c.Nodes 519 | d.mutex.Unlock() 520 | var changed bool 521 | for _, node := range seedNodes { 522 | if _, ok := exist[node]; !ok { 523 | nodes = append(nodes, node) 524 | changed = true 525 | } 526 | } 527 | if changed { 528 | d.node.Store(nodes) 529 | log.Info("discovery: append seed nodes(%s)", nodes) 530 | } 531 | } 532 | 533 | func (d *Discovery) pickNode() string { 534 | nodes, ok := d.node.Load().([]string) 535 | if !ok || len(nodes) == 0 { 536 | return d.c.Nodes[rand.Intn(len(d.c.Nodes))] 537 | } 538 | return nodes[atomic.LoadUint64(&d.nodeIdx)%uint64(len(nodes))] 539 | } 540 | 541 | func (d *Discovery) switchNode() { 542 | atomic.AddUint64(&d.nodeIdx, 1) 543 | } 544 | 545 | func (d *Discovery) polls(ctx context.Context) (apps map[string]*InstancesInfo, err error) { 546 | var ( 547 | lastTss []int64 548 | appIDs []string 549 | host = d.pickNode() 550 | changed bool 551 | ) 552 | if host != d.lastHost { 553 | d.lastHost = host 554 | changed = true 555 | } 556 | d.mutex.RLock() 557 | c := d.c 558 | for k, v := range d.apps { 559 | if changed { 560 | v.lastTs = 0 561 | } 562 | appIDs = append(appIDs, k) 563 | lastTss = append(lastTss, v.lastTs) 564 | } 565 | d.mutex.RUnlock() 566 | if len(appIDs) == 0 { 567 | return 568 | } 569 | uri := fmt.Sprintf(_pollURL, host) 570 | res := new(struct { 571 | Code int `json:"code"` 572 | Data map[string]*InstancesInfo `json:"data"` 573 | }) 574 | params := url.Values{} 575 | params.Set("env", c.Env) 576 | params.Set("hostname", c.Host) 577 | for _, appid := range appIDs { 578 | params.Add("appid", appid) 579 | } 580 | for _, ts := range lastTss { 581 | params.Add("latest_timestamp", strconv.FormatInt(ts, 10)) 582 | } 583 | if err = d.httpClient.Get(ctx, uri, "", params, res); err != nil { 584 | if ctx.Err() != context.Canceled { 585 | log.Error("discovery: client.Get(%s) error(%+v)", uri+"?"+params.Encode(), err) 586 | } 587 | return 588 | } 589 | if ec := ecode.Int(res.Code); !ecode.EqualError(ecode.OK, ec) { 590 | if !ecode.EqualError(ecode.NotModified, ec) { 591 | log.Error("discovery: client.Get(%s) get error code(%d)", uri+"?"+params.Encode(), res.Code) 592 | err = ec 593 | } 594 | return 595 | } 596 | info, _ := json.Marshal(res.Data) 597 | for _, app := range res.Data { 598 | if app == nil || app.LastTs == 0 { 599 | err = ecode.ServerErr 600 | log.Error("discovery: client.Get(%s) latest_timestamp is 0,instances:(%s)", uri+"?"+params.Encode(), info) 601 | return 602 | } 603 | } 604 | log.Info("discovery: successfully polls(%s) instances (%s)", uri+"?"+params.Encode(), info) 605 | apps = res.Data 606 | return 607 | } 608 | 609 | func (d *Discovery) broadcast(apps map[string]*InstancesInfo) { 610 | for appID, v := range apps { 611 | var count int 612 | for zone, ins := range v.Instances { 613 | if len(ins) == 0 { 614 | delete(v.Instances, zone) 615 | } 616 | count += len(ins) 617 | } 618 | if count == 0 { 619 | continue 620 | } 621 | d.mutex.RLock() 622 | app, ok := d.apps[appID] 623 | d.mutex.RUnlock() 624 | if ok { 625 | app.lastTs = v.LastTs 626 | app.zoneIns.Store(v) 627 | d.mutex.RLock() 628 | for rs := range app.resolver { 629 | select { 630 | case rs.event <- struct{}{}: 631 | default: 632 | } 633 | } 634 | d.mutex.RUnlock() 635 | } 636 | } 637 | } 638 | 639 | func (d *Discovery) newParams(c *Config) url.Values { 640 | params := url.Values{} 641 | params.Set("region", c.Region) 642 | params.Set("zone", c.Zone) 643 | params.Set("env", c.Env) 644 | params.Set("hostname", c.Host) 645 | return params 646 | } 647 | 648 | var r = rand.New(rand.NewSource(time.Now().UnixNano())) 649 | 650 | // shuffle pseudo-randomizes the order of elements. 651 | // n is the number of elements. Shuffle panics if n < 0. 652 | // swap swaps the elements with indexes i and j. 653 | func shuffle(n int, swap func(i, j int)) { 654 | if n < 0 { 655 | panic("invalid argument to Shuffle") 656 | } 657 | 658 | // Fisher-Yates shuffle: https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle 659 | // Shuffle really ought not be called with n that doesn't fit in 32 bits. 660 | // Not only will it take a very long time, but with 2³¹! possible permutations, 661 | // there's no way that any PRNG can have a big enough internal state to 662 | // generate even a minuscule percentage of the possible permutations. 663 | // Nevertheless, the right API signature accepts an int n, so handle it as best we can. 664 | i := n - 1 665 | for ; i > 1<<31-1-1; i-- { 666 | j := int(r.Int63n(int64(i + 1))) 667 | swap(i, j) 668 | } 669 | for ; i > 0; i-- { 670 | j := int(r.Int31n(int32(i + 1))) 671 | swap(i, j) 672 | } 673 | } 674 | -------------------------------------------------------------------------------- /naming/client_test.go: -------------------------------------------------------------------------------- 1 | package naming 2 | 3 | import ( 4 | "context" 5 | "flag" 6 | "net/url" 7 | "os" 8 | "strconv" 9 | "strings" 10 | "testing" 11 | "time" 12 | 13 | "github.com/bilibili/discovery/conf" 14 | "github.com/bilibili/discovery/discovery" 15 | "github.com/bilibili/discovery/http" 16 | 17 | "github.com/go-kratos/kratos/pkg/conf/paladin" 18 | xhttp "github.com/go-kratos/kratos/pkg/net/http/blademaster" 19 | xtime "github.com/go-kratos/kratos/pkg/time" 20 | . "github.com/smartystreets/goconvey/convey" 21 | ) 22 | 23 | func TestMain(m *testing.M) { 24 | flag.Set("conf", "./") 25 | flag.Parse() 26 | go mockDiscoverySvr() 27 | time.Sleep(time.Second) 28 | os.Exit(m.Run()) 29 | } 30 | 31 | func mockDiscoverySvr() { 32 | c := &conf.Config{ 33 | Env: &conf.Env{ 34 | Region: "test", 35 | Zone: "test", 36 | DeployEnv: "test", 37 | Host: "test_server", 38 | }, 39 | Nodes: []string{"127.0.0.1:7171"}, 40 | HTTPServer: &xhttp.ServerConfig{ 41 | Addr: "127.0.0.1:7171", 42 | Timeout: xtime.Duration(time.Second * 1), 43 | }, 44 | HTTPClient: &xhttp.ClientConfig{ 45 | Timeout: xtime.Duration(time.Second * 1), 46 | Dial: xtime.Duration(time.Second), 47 | KeepAlive: xtime.Duration(time.Second * 1), 48 | }, 49 | } 50 | paladin.Init() 51 | dis, _ := discovery.New(c) 52 | http.Init(c, dis) 53 | } 54 | 55 | func TestDiscovery(t *testing.T) { 56 | conf := &Config{ 57 | Nodes: []string{"127.0.0.1:7171"}, 58 | Region: "test", 59 | Zone: "test", 60 | Env: "test", 61 | Host: "test-host", 62 | } 63 | dis := New(conf) 64 | appid := "test1" 65 | Convey("test discovery register", t, func() { 66 | instance := &Instance{ 67 | Region: "test", 68 | Zone: "test", 69 | Env: "test", 70 | AppID: appid, 71 | Addrs: []string{"http://127.0.0.1:8000"}, 72 | Hostname: "test-host", 73 | } 74 | _, err := dis.Register(instance) 75 | So(err, ShouldBeNil) 76 | dis.node.Store([]string{"127.0.0.1:7172"}) 77 | instance.AppID = "test2" 78 | //instance.Metadata = map[string]string{"meta": "meta"} 79 | _, err = dis.Register(instance) 80 | So(err, ShouldNotBeNil) 81 | _ = dis.renew(context.TODO(), instance) 82 | dis.node.Store([]string{"127.0.0.1:7171"}) 83 | _ = dis.renew(context.TODO(), instance) 84 | Convey("test discovery set", func() { 85 | inSet := &Instance{ 86 | Region: "test", 87 | Zone: "test", 88 | Env: "test", 89 | AppID: appid, 90 | Hostname: "test-host", 91 | Addrs: []string{ 92 | "grpc://127.0.0.1:8080", 93 | }, 94 | Metadata: map[string]string{ 95 | "test": "1", 96 | "weight": "111", 97 | "color": "blue", 98 | }, 99 | } 100 | err = dis.Set(inSet) 101 | So(err, ShouldBeNil) 102 | dis.node.Store([]string{"127.0.0.1:7172"}) 103 | rs := dis.Build(appid) 104 | ch := rs.Watch() 105 | <-ch 106 | ins, _ := rs.Fetch() 107 | So(ins.Instances["test"][0].Metadata["weight"], ShouldResemble, "111") 108 | }) 109 | }) 110 | Convey("test discovery watch", t, func() { 111 | dis.node.Store([]string{"127.0.0.1:7172", "127.0.0.1:7171"}) 112 | rsl := dis.Build(appid) 113 | ch := rsl.Watch() 114 | <-ch 115 | ins, ok := rsl.Fetch() 116 | So(ok, ShouldBeTrue) 117 | So(len(ins.Instances["test"]), ShouldEqual, 1) 118 | So(ins.Instances["test"][0].AppID, ShouldEqual, appid) 119 | instance2 := &Instance{ 120 | Region: "test", 121 | Zone: "test", 122 | Env: "test", 123 | AppID: appid, 124 | Addrs: []string{"http://127.0.0.1:8000"}, 125 | Hostname: "test-host2", 126 | } 127 | err := addNewInstance(instance2) 128 | So(err, ShouldBeNil) 129 | // watch for next update 130 | <-ch 131 | ins, ok = rsl.Fetch() 132 | So(ok, ShouldBeTrue) 133 | So(len(ins.Instances["test"]), ShouldEqual, 2) 134 | So(ins.Instances["test"][0].AppID, ShouldEqual, appid) 135 | rsl.Close() 136 | conf.Nodes = []string{"127.0.0.1:7172"} 137 | dis.Reload(conf) 138 | So(dis.Scheme(), ShouldEqual, "discovery") 139 | dis.Close() 140 | }) 141 | 142 | } 143 | 144 | func addNewInstance(ins *Instance) error { 145 | cli := xhttp.NewClient(&xhttp.ClientConfig{ 146 | Timeout: xtime.Duration(time.Second * 30), 147 | Dial: xtime.Duration(time.Second), 148 | KeepAlive: xtime.Duration(time.Second * 30), 149 | }) 150 | params := url.Values{} 151 | params.Set("env", ins.Env) 152 | params.Set("zone", ins.Zone) 153 | params.Set("hostname", ins.Hostname) 154 | params.Set("appid", ins.AppID) 155 | params.Set("addrs", strings.Join(ins.Addrs, ",")) 156 | params.Set("version", ins.Version) 157 | params.Set("status", "1") 158 | params.Set("latest_timestamp", strconv.FormatInt(time.Now().UnixNano(), 10)) 159 | res := new(struct { 160 | Code int `json:"code"` 161 | }) 162 | return cli.Post(context.TODO(), "http://127.0.0.1:7171/discovery/register", "", params, &res) 163 | } 164 | 165 | func TestUseScheduler(t *testing.T) { 166 | newIns := func() *InstancesInfo { 167 | insInfo := &InstancesInfo{} 168 | insInfo.Instances = make(map[string][]*Instance) 169 | insInfo.Instances["sh001"] = []*Instance{ 170 | &Instance{Zone: "sh001", Metadata: map[string]string{ 171 | "weight": "10", 172 | }}, 173 | &Instance{Zone: "sh001", Metadata: map[string]string{ 174 | "weight": "10", 175 | }}, 176 | } 177 | insInfo.Instances["sh002"] = []*Instance{ 178 | &Instance{Zone: "sh002", Metadata: map[string]string{ 179 | "weight": "5", 180 | }}, 181 | &Instance{Zone: "sh002", Metadata: map[string]string{ 182 | "weight": "2", 183 | }}, 184 | } 185 | insInfo.Instances["sh003"] = []*Instance{ 186 | &Instance{Zone: "sh003", Metadata: map[string]string{ 187 | "weight": "5", 188 | }}, 189 | &Instance{Zone: "sh003", Metadata: map[string]string{ 190 | "weight": "3", 191 | }}, 192 | } 193 | insInfo.Scheduler = []Zone{ 194 | Zone{ 195 | Src: "sh001", 196 | Dst: map[string]int64{ 197 | "sh001": 2, 198 | "sh002": 1, 199 | }, 200 | }, 201 | Zone{ 202 | Src: "sh002", 203 | Dst: map[string]int64{ 204 | "sh001": 1, 205 | "sh002": 2, 206 | }, 207 | }, 208 | } 209 | return insInfo 210 | } 211 | Convey("use scheduler for sh001", t, func() { 212 | insInfo := newIns() 213 | inss := insInfo.UseScheduler("sh001") 214 | So(len(inss), ShouldEqual, 4) 215 | logInss(t, "sh001", inss) 216 | }) 217 | Convey("use scheduler for sh002", t, func() { 218 | insInfo := newIns() 219 | inss := insInfo.UseScheduler("sh002") 220 | So(len(inss), ShouldEqual, 4) 221 | logInss(t, "sh002", inss) 222 | }) 223 | Convey("use scheduler for sh003 without scheduler", t, func() { 224 | insInfo := newIns() 225 | inss := insInfo.UseScheduler("sh003") 226 | So(len(inss), ShouldEqual, 2) 227 | logInss(t, "sh003", inss) 228 | }) 229 | Convey("zone not exit", t, func() { 230 | insInfo := newIns() 231 | inss := insInfo.UseScheduler("sh004") 232 | So(len(inss), ShouldEqual, 6) 233 | logInss(t, "sh004", inss) 234 | }) 235 | } 236 | 237 | func logInss(t *testing.T, msg string, inss []*Instance) { 238 | t.Log("instance of", msg) 239 | for _, in := range inss { 240 | t.Logf("%+v", in) 241 | } 242 | } 243 | -------------------------------------------------------------------------------- /naming/example_test.go: -------------------------------------------------------------------------------- 1 | package naming_test 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | 7 | "github.com/bilibili/discovery/naming" 8 | ) 9 | 10 | // This Example register a server provider into discovery. 11 | func ExampleDiscovery_Register() { 12 | conf := &naming.Config{ 13 | Nodes: []string{"127.0.0.1:7171"}, // NOTE: 配置种子节点(1个或多个),client内部可根据/discovery/nodes节点获取全部node(方便后面增减节点) 14 | Zone: "sh1", 15 | Env: "test", 16 | } 17 | dis := naming.New(conf) 18 | ins := &naming.Instance{ 19 | Zone: "sh1", 20 | Env: "test", 21 | AppID: "provider", 22 | // Hostname:"", // NOTE: hostname 不需要,会优先使用discovery new时Config配置的值,如没有则从os.Hostname方法获取!!! 23 | Addrs: []string{"http://172.0.0.1:8888", "grpc://172.0.0.1:9999"}, 24 | LastTs: time.Now().Unix(), 25 | Metadata: map[string]string{"weight": "10"}, 26 | } 27 | cancel, _ := dis.Register(ins) 28 | defer cancel() // NOTE: 注意一般在进程退出的时候执行,会调用discovery的cancel接口,使实例从discovery移除 29 | fmt.Println("register") 30 | // Unordered output4 31 | } 32 | 33 | type consumer struct { 34 | conf *naming.Config 35 | appID string 36 | dis naming.Resolver 37 | ins []*naming.Instance 38 | } 39 | 40 | // This Example show how get watch a server provider and get provider instances. 41 | func ExampleResolver_Watch() { 42 | conf := &naming.Config{ 43 | Nodes: []string{"127.0.0.1:7171"}, 44 | Zone: "sh1", 45 | Env: "test", 46 | } 47 | dis := naming.New(conf) 48 | c := &consumer{ 49 | conf: conf, 50 | appID: "provider", 51 | dis: dis.Build("provider"), 52 | } 53 | rsl := dis.Build(c.appID) 54 | ch := rsl.Watch() 55 | go c.getInstances(ch) 56 | in := c.getInstance() 57 | _ = in 58 | } 59 | 60 | func (c *consumer) getInstances(ch <-chan struct{}) { 61 | for { // NOTE: 通过watch返回的event chan => 62 | if _, ok := <-ch; !ok { 63 | return 64 | } 65 | // NOTE: <= 实时fetch最新的instance实例 66 | ins, ok := c.dis.Fetch() 67 | if !ok { 68 | continue 69 | } 70 | // get local zone instances, otherwise get all zone instances. 71 | if in, ok := ins.Instances[c.conf.Zone]; ok { 72 | c.ins = in 73 | } else { 74 | for _, in := range ins.Instances { 75 | c.ins = append(c.ins, in...) 76 | } 77 | } 78 | } 79 | } 80 | 81 | func (c *consumer) getInstance() (ins *naming.Instance) { 82 | // get instance by loadbalance 83 | // you can use any loadbalance algorithm what you want. 84 | return 85 | } 86 | -------------------------------------------------------------------------------- /naming/grpc/resolver.go: -------------------------------------------------------------------------------- 1 | package resolver 2 | 3 | import ( 4 | "fmt" 5 | "net/url" 6 | "os" 7 | "strconv" 8 | "strings" 9 | 10 | "github.com/bilibili/discovery/naming" 11 | 12 | log "github.com/go-kratos/kratos/pkg/log" 13 | "google.golang.org/grpc/resolver" 14 | ) 15 | 16 | const ( 17 | // Scheme is the scheme of discovery address 18 | Scheme = "grpc" 19 | ) 20 | 21 | var ( 22 | _ resolver.Resolver = &Resolver{} 23 | _ resolver.Builder = &Builder{} 24 | ) 25 | 26 | // MD is context metadata for balancer and resolver. 27 | type MD struct { 28 | Weight int64 29 | Color string 30 | } 31 | 32 | // Register register resolver builder if nil. 33 | func Register(b naming.Builder) { 34 | resolver.Register(&Builder{b}) 35 | } 36 | 37 | // Builder is also a resolver builder. 38 | // It's build() function always returns itself. 39 | type Builder struct { 40 | naming.Builder 41 | } 42 | 43 | // Build returns itself for Resolver, because it's both a builder and a resolver. 44 | func (b *Builder) Build(target resolver.Target, cc resolver.ClientConn, opts resolver.BuildOptions) (resolver.Resolver, error) { 45 | // discovery://default/service.name?zone=sh001&cluster=c1&cluster=c2&cluster=c3 46 | dsn := strings.SplitN(target.Endpoint, "?", 2) 47 | if len(dsn) == 0 { 48 | return nil, fmt.Errorf("grpc resolver: parse target.Endpoint(%s) failed! the endpoint is empty", target.Endpoint) 49 | } 50 | // parse params info 51 | zone := os.Getenv("ZONE") 52 | clusters := map[string]struct{}{} 53 | if len(dsn) == 2 { 54 | if u, err := url.ParseQuery(dsn[1]); err == nil { 55 | if zones := u[naming.MetaZone]; len(zones) > 0 { 56 | zone = zones[0] 57 | } 58 | for _, c := range u[naming.MetaCluster] { 59 | clusters[c] = struct{}{} 60 | } 61 | } 62 | } 63 | r := &Resolver{ 64 | cc: cc, 65 | nr: b.Builder.Build(dsn[0]), 66 | quit: make(chan struct{}, 1), 67 | zone: zone, 68 | clusters: clusters, 69 | } 70 | go r.watcher() 71 | return r, nil 72 | } 73 | 74 | // Resolver watches for the updates on the specified target. 75 | // Updates include address updates and service config updates. 76 | type Resolver struct { 77 | nr naming.Resolver 78 | cc resolver.ClientConn 79 | quit chan struct{} 80 | 81 | zone string 82 | clusters map[string]struct{} 83 | } 84 | 85 | // Close is a noop for Resolver. 86 | func (r *Resolver) Close() { 87 | select { 88 | case r.quit <- struct{}{}: 89 | r.nr.Close() 90 | default: 91 | } 92 | } 93 | 94 | // ResolveNow is a noop for Resolver. 95 | func (r *Resolver) ResolveNow(o resolver.ResolveNowOptions) { 96 | } 97 | 98 | func (r *Resolver) watcher() { 99 | event := r.nr.Watch() 100 | for { 101 | select { 102 | case <-r.quit: 103 | return 104 | case _, ok := <-event: 105 | if !ok { 106 | return 107 | } 108 | } 109 | ins, ok := r.nr.Fetch() 110 | if ok { 111 | instances, ok := ins.Instances[r.zone] 112 | if !ok { 113 | for _, value := range ins.Instances { 114 | instances = append(instances, value...) 115 | } 116 | } 117 | if len(instances) > 0 { 118 | r.newAddress(instances) 119 | } 120 | } 121 | } 122 | } 123 | 124 | func (r *Resolver) newAddress(instances []*naming.Instance) { 125 | var ( 126 | totalWeight int64 127 | addrs = make([]resolver.Address, 0, len(instances)) 128 | ) 129 | for n, ins := range instances { 130 | if len(r.clusters) > 0 { 131 | if _, ok := r.clusters[ins.Metadata[naming.MetaCluster]]; !ok { 132 | continue 133 | } 134 | } 135 | rpcAddr, color, weight := extractAddrs(ins) 136 | if rpcAddr == "" { 137 | log.Warn("grpc resolver: invalid rpc address(%s,%s,%v) found!", ins.AppID, ins.Hostname, ins.Addrs) 138 | continue 139 | } 140 | if weight <= 0 { 141 | if totalWeight == 0 { 142 | weight = 10 143 | } else { 144 | weight = totalWeight / int64(n) 145 | } 146 | } 147 | totalWeight += weight 148 | addr := resolver.Address{ 149 | Addr: rpcAddr, 150 | Type: resolver.Backend, 151 | ServerName: ins.AppID, 152 | Metadata: &MD{Weight: weight, Color: color}, 153 | } 154 | addrs = append(addrs, addr) 155 | } 156 | r.cc.NewAddress(addrs) 157 | } 158 | 159 | func extractAddrs(ins *naming.Instance) (addr, color string, weight int64) { 160 | color = ins.Metadata[naming.MetaColor] 161 | weight, _ = strconv.ParseInt(ins.Metadata[naming.MetaWeight], 10, 64) 162 | for _, a := range ins.Addrs { 163 | u, err := url.Parse(a) 164 | if err == nil && u.Scheme == Scheme { 165 | addr = u.Host 166 | } 167 | } 168 | return 169 | } 170 | -------------------------------------------------------------------------------- /naming/grpc/resolver_test.go: -------------------------------------------------------------------------------- 1 | package resolver 2 | 3 | import ( 4 | "sync" 5 | "testing" 6 | "time" 7 | 8 | "github.com/bilibili/discovery/naming" 9 | 10 | "google.golang.org/grpc/resolver" 11 | "google.golang.org/grpc/serviceconfig" 12 | ) 13 | 14 | type mockResolver struct { 15 | } 16 | 17 | func (m *mockResolver) Fetch() (insInfo *naming.InstancesInfo, ok bool) { 18 | ins := make(map[string][]*naming.Instance) 19 | ins["sh001"] = []*naming.Instance{ 20 | &naming.Instance{Addrs: []string{"http://127.0.0.1:8080", "grpc://127.0.0.1:9090"}, Metadata: map[string]string{naming.MetaCluster: "c1"}}, 21 | &naming.Instance{Addrs: []string{"http://127.0.0.2:8080", "grpc://127.0.0.2:9090"}, Metadata: map[string]string{naming.MetaCluster: "c1"}}, 22 | &naming.Instance{Addrs: []string{"http://127.0.0.3:8080", "grpc://127.0.0.3:9090"}, Metadata: map[string]string{naming.MetaCluster: "c1"}}, 23 | &naming.Instance{Addrs: []string{"http://127.0.0.4:8080", "grpc://127.0.0.4:9090"}, Metadata: map[string]string{naming.MetaCluster: "c2"}}, 24 | &naming.Instance{Addrs: []string{"http://127.0.0.5:8080", "grpc://127.0.0.5:9090"}, Metadata: map[string]string{naming.MetaCluster: "c3"}}, 25 | &naming.Instance{Addrs: []string{"http://127.0.0.5:8080", "grpc://127.0.0.5:9090"}, Metadata: map[string]string{naming.MetaCluster: "c4"}}, 26 | } 27 | ins["sh002"] = []*naming.Instance{ 28 | &naming.Instance{Addrs: []string{"http://127.0.0.1:8080", "grpc://127.0.0.1:9090"}}, 29 | &naming.Instance{Addrs: []string{"http://127.0.0.2:8080", "grpc://127.0.0.2:9090"}}, 30 | &naming.Instance{Addrs: []string{"http://127.0.0.3:8080", "grpc://127.0.0.3:9090"}}, 31 | } 32 | insInfo = &naming.InstancesInfo{ 33 | Instances: ins, 34 | } 35 | ok = true 36 | return 37 | } 38 | 39 | func (m *mockResolver) Watch() <-chan struct{} { 40 | event := make(chan struct{}, 10) 41 | event <- struct{}{} 42 | event <- struct{}{} 43 | event <- struct{}{} 44 | return event 45 | } 46 | 47 | func (m *mockResolver) Close() (err error) { 48 | return 49 | } 50 | 51 | type mockBuilder struct { 52 | } 53 | 54 | func (m *mockBuilder) Build(id string) naming.Resolver { 55 | return &mockResolver{} 56 | } 57 | 58 | func (m *mockBuilder) Scheme() string { 59 | return "discovery" 60 | } 61 | 62 | type mockClientConn struct { 63 | mu sync.Mutex 64 | addrs []resolver.Address 65 | } 66 | 67 | func (m *mockClientConn) NewAddress(addresses []resolver.Address) { 68 | m.mu.Lock() 69 | m.addrs = addresses 70 | m.mu.Unlock() 71 | } 72 | func (m *mockClientConn) NewServiceConfig(serviceConfig string) { 73 | 74 | } 75 | func (m *mockClientConn) UpdateState(state resolver.State) { 76 | 77 | } 78 | func (m *mockClientConn) ReportError(error) { 79 | } 80 | 81 | func (m *mockClientConn) ParseServiceConfig(serviceConfigJSON string) *serviceconfig.ParseResult { 82 | return nil 83 | } 84 | 85 | func TestBuilder(t *testing.T) { 86 | target := resolver.Target{Endpoint: "discovery://default/service.name?zone=sh001&cluster=c1&cluster=c2&cluster=c3"} 87 | mb := &mockBuilder{} 88 | b := &Builder{mb} 89 | cc := &mockClientConn{} 90 | r, err := b.Build(target, cc, resolver.BuildOptions{}) 91 | if err != nil { 92 | t.Fatal(err) 93 | } 94 | res := r.(*Resolver) 95 | if res.zone != "sh001" { 96 | t.Fatalf("want sh001, but got:%s", res.zone) 97 | } 98 | if len(res.clusters) != 3 { 99 | t.Fatalf("want c1,c2,c3, but got:%v", res.clusters) 100 | } 101 | time.Sleep(time.Second) 102 | cc.mu.Lock() 103 | defer cc.mu.Unlock() 104 | if len(cc.addrs) != 5 { 105 | t.Fatalf("want 3, but got:%v", cc.addrs) 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /naming/naming.go: -------------------------------------------------------------------------------- 1 | package naming 2 | 3 | import ( 4 | "context" 5 | "strconv" 6 | ) 7 | 8 | // metadata common key 9 | const ( 10 | MetaWeight = "weight" 11 | MetaCluster = "cluster" 12 | MetaZone = "zone" 13 | MetaColor = "color" 14 | ) 15 | 16 | // Instance represents a server the client connects to. 17 | type Instance struct { 18 | // Region is region. 19 | Region string `json:"region"` 20 | // Zone is IDC. 21 | Zone string `json:"zone"` 22 | // Env prod/pre、uat/fat1 23 | Env string `json:"env"` 24 | // AppID is mapping servicetree appid. 25 | AppID string `json:"appid"` 26 | // Hostname is hostname from docker. 27 | Hostname string `json:"hostname"` 28 | // Addrs is the address of app instance 29 | // format: scheme://host 30 | Addrs []string `json:"addrs"` 31 | // Version is publishing version. 32 | Version string `json:"version"` 33 | // LastTs is instance latest updated timestamp 34 | LastTs int64 `json:"latest_timestamp"` 35 | // Metadata is the information associated with Addr, which may be used 36 | // to make load balancing decision. 37 | Metadata map[string]string `json:"metadata"` 38 | } 39 | 40 | // Resolver resolve naming service 41 | type Resolver interface { 42 | Fetch() (*InstancesInfo, bool) 43 | Watch() <-chan struct{} 44 | Close() error 45 | } 46 | 47 | // Registry Register an instance and renew automatically. 48 | type Registry interface { 49 | Register(ins *Instance) (cancel context.CancelFunc, err error) 50 | Close() error 51 | } 52 | 53 | // Builder resolver builder. 54 | type Builder interface { 55 | Build(id string) Resolver 56 | Scheme() string 57 | } 58 | 59 | // InstancesInfo instance info. 60 | type InstancesInfo struct { 61 | Instances map[string][]*Instance `json:"instances"` 62 | LastTs int64 `json:"latest_timestamp"` 63 | Scheduler []Zone `json:"scheduler"` 64 | } 65 | 66 | // Zone zone scheduler info. 67 | type Zone struct { 68 | Src string `json:"src"` 69 | Dst map[string]int64 `json:"dst"` 70 | } 71 | 72 | // UseScheduler use scheduler info on instances. 73 | // if instancesInfo contains scheduler info about zone, 74 | // return releated zone's instances weighted by scheduler. 75 | // if not,only zone instances be returned. 76 | func (insInf *InstancesInfo) UseScheduler(zone string) (inss []*Instance) { 77 | var scheduler struct { 78 | zone []string 79 | weights []int64 80 | } 81 | var oriWeights []int64 82 | for _, sch := range insInf.Scheduler { 83 | if sch.Src == zone { 84 | for zone, schWeight := range sch.Dst { 85 | if zins, ok := insInf.Instances[zone]; ok { 86 | var totalWeight int64 87 | for _, ins := range zins { 88 | var weight int64 89 | if weight, _ = strconv.ParseInt(ins.Metadata[MetaWeight], 10, 64); weight <= 0 { 90 | weight = 10 91 | } 92 | totalWeight += weight 93 | } 94 | oriWeights = append(oriWeights, totalWeight) 95 | inss = append(inss, zins...) 96 | } 97 | scheduler.weights = append(scheduler.weights, schWeight) 98 | scheduler.zone = append(scheduler.zone, zone) 99 | } 100 | } 101 | } 102 | if len(inss) == 0 { 103 | var ok bool 104 | if inss, ok = insInf.Instances[zone]; ok { 105 | return 106 | } 107 | for _, v := range insInf.Instances { 108 | inss = append(inss, v...) 109 | } 110 | return 111 | } 112 | var comMulti int64 = 1 113 | for _, weight := range oriWeights { 114 | comMulti *= weight 115 | } 116 | var fixWeight = make(map[string]int64, len(scheduler.weights)) 117 | for i, zone := range scheduler.zone { 118 | fixWeight[zone] = scheduler.weights[i] * comMulti / oriWeights[i] 119 | } 120 | for _, ins := range inss { 121 | var weight int64 122 | if weight, _ = strconv.ParseInt(ins.Metadata[MetaWeight], 10, 64); weight <= 0 { 123 | weight = 10 124 | } 125 | if fix, ok := fixWeight[ins.Zone]; ok { 126 | weight = weight * fix 127 | } 128 | ins.Metadata[MetaWeight] = strconv.FormatInt(weight, 10) 129 | } 130 | return 131 | } 132 | -------------------------------------------------------------------------------- /registry/guard.go: -------------------------------------------------------------------------------- 1 | package registry 2 | 3 | import ( 4 | "sync" 5 | "sync/atomic" 6 | 7 | log "github.com/go-kratos/kratos/pkg/log" 8 | ) 9 | 10 | const ( 11 | _percentThreshold float64 = 0.85 12 | ) 13 | 14 | // Guard count the renew of all operations for self protection 15 | type Guard struct { 16 | expPerMin int64 17 | expThreshold int64 18 | facInMin int64 19 | facLastMin int64 20 | lock sync.RWMutex 21 | } 22 | 23 | func (g *Guard) setExp(cnt int64) { 24 | g.lock.Lock() 25 | g.expPerMin = cnt * 2 26 | g.expThreshold = int64(float64(g.expPerMin) * _percentThreshold) 27 | g.lock.Unlock() 28 | } 29 | 30 | func (g *Guard) incrExp() { 31 | g.lock.Lock() 32 | g.expPerMin = g.expPerMin + 2 33 | g.expThreshold = int64(float64(g.expPerMin) * _percentThreshold) 34 | g.lock.Unlock() 35 | } 36 | 37 | func (g *Guard) updateFac() { 38 | atomic.StoreInt64(&g.facLastMin, atomic.SwapInt64(&g.facInMin, 0)) 39 | } 40 | 41 | func (g *Guard) decrExp() { 42 | g.lock.Lock() 43 | if g.expPerMin > 0 { 44 | g.expPerMin = g.expPerMin - 2 45 | g.expThreshold = int64(float64(g.expPerMin) * _percentThreshold) 46 | } 47 | g.lock.Unlock() 48 | } 49 | 50 | func (g *Guard) incrFac() { 51 | atomic.AddInt64(&g.facInMin, 1) 52 | } 53 | 54 | func (g *Guard) ok() (is bool) { 55 | is = atomic.LoadInt64(&g.facLastMin) < atomic.LoadInt64(&g.expThreshold) 56 | if is { 57 | log.Warn("discovery is protected, the factual renews(%d) less than expected renews(%d)", atomic.LoadInt64(&g.facLastMin), atomic.LoadInt64(&g.expThreshold)) 58 | } 59 | return 60 | } 61 | -------------------------------------------------------------------------------- /registry/guard_test.go: -------------------------------------------------------------------------------- 1 | package registry 2 | 3 | import ( 4 | "testing" 5 | 6 | . "github.com/smartystreets/goconvey/convey" 7 | ) 8 | 9 | func TestIncrExp(t *testing.T) { 10 | Convey("test IncrExp", t, func() { 11 | re := new(Guard) 12 | re.incrExp() 13 | So(re.expPerMin, ShouldResemble, int64(2)) 14 | }) 15 | } 16 | 17 | func TestDecrExp(t *testing.T) { 18 | Convey("test DecrExp", t, func() { 19 | re := new(Guard) 20 | re.incrExp() 21 | re.decrExp() 22 | So(re.expPerMin, ShouldResemble, int64(0)) 23 | }) 24 | } 25 | 26 | func TestSetExp(t *testing.T) { 27 | Convey("test SetExp", t, func() { 28 | re := new(Guard) 29 | re.setExp(10) 30 | So(re.expPerMin, ShouldResemble, int64(20)) 31 | So(re.expThreshold, ShouldResemble, int64(17)) 32 | }) 33 | } 34 | 35 | func TestUpdateFac(t *testing.T) { 36 | Convey("test UpdateFac", t, func() { 37 | re := new(Guard) 38 | re.incrFac() 39 | re.updateFac() 40 | So(re.facLastMin, ShouldResemble, int64(1)) 41 | }) 42 | } 43 | 44 | func TestIncrFac(t *testing.T) { 45 | Convey("test IncrFac", t, func() { 46 | re := new(Guard) 47 | re.incrFac() 48 | So(re.facInMin, ShouldResemble, int64(1)) 49 | }) 50 | } 51 | 52 | func TestIsProtected(t *testing.T) { 53 | Convey("test IncrFac", t, func() { 54 | re := new(Guard) 55 | re.incrExp() 56 | re.incrExp() 57 | re.incrFac() 58 | re.updateFac() 59 | So(re.ok(), ShouldBeTrue) 60 | re = new(Guard) 61 | re.incrExp() 62 | re.incrFac() 63 | re.updateFac() 64 | So(re.ok(), ShouldBeFalse) 65 | }) 66 | } 67 | -------------------------------------------------------------------------------- /registry/node.go: -------------------------------------------------------------------------------- 1 | package registry 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "net/url" 8 | "strconv" 9 | "strings" 10 | 11 | "github.com/bilibili/discovery/conf" 12 | "github.com/bilibili/discovery/model" 13 | "github.com/go-kratos/kratos/pkg/ecode" 14 | log "github.com/go-kratos/kratos/pkg/log" 15 | http "github.com/go-kratos/kratos/pkg/net/http/blademaster" 16 | ) 17 | 18 | const ( 19 | _registerURL = "/discovery/register" 20 | _cancelURL = "/discovery/cancel" 21 | _renewURL = "/discovery/renew" 22 | _setURL = "/discovery/set" 23 | ) 24 | 25 | // Node represents a peer node to which information should be shared from this node. 26 | // 27 | // This struct handles replicating all update operations like 'Register,Renew,Cancel,Expiration and Status Changes' 28 | // to the node it represents. 29 | type Node struct { 30 | c *conf.Config 31 | 32 | // url 33 | client *http.Client 34 | pRegisterURL string 35 | registerURL string 36 | cancelURL string 37 | renewURL string 38 | setURL string 39 | 40 | addr string 41 | status model.NodeStatus 42 | zone string 43 | otherZone bool 44 | } 45 | 46 | // newNode return a node. 47 | func newNode(c *conf.Config, addr string) (n *Node) { 48 | n = &Node{ 49 | c: c, 50 | // url 51 | client: http.NewClient(c.HTTPClient), 52 | registerURL: fmt.Sprintf("http://%s%s", addr, _registerURL), 53 | cancelURL: fmt.Sprintf("http://%s%s", addr, _cancelURL), 54 | renewURL: fmt.Sprintf("http://%s%s", addr, _renewURL), 55 | setURL: fmt.Sprintf("http://%s%s", addr, _setURL), 56 | 57 | addr: addr, 58 | status: model.NodeStatusLost, 59 | } 60 | return 61 | } 62 | 63 | // Register send the registration information of Instance receiving by this node to the peer node represented. 64 | func (n *Node) Register(c context.Context, i *model.Instance) (err error) { 65 | err = n.call(c, model.Register, i, n.registerURL, nil) 66 | if err != nil { 67 | log.Warn("node be called(%s) register instance(%v) error(%v)", n.registerURL, i, err) 68 | } 69 | return 70 | } 71 | 72 | // Cancel send the cancellation information of Instance receiving by this node to the peer node represented. 73 | func (n *Node) Cancel(c context.Context, i *model.Instance) (err error) { 74 | err = n.call(c, model.Cancel, i, n.cancelURL, nil) 75 | if err != nil { 76 | log.Warn("node be called(%s) instance(%v) already canceled", n.cancelURL, i) 77 | } 78 | return 79 | } 80 | 81 | // Renew send the heartbeat information of Instance receiving by this node to the peer node represented. 82 | // If the instance does not exist the node, the instance registration information is sent again to the peer node. 83 | func (n *Node) Renew(c context.Context, i *model.Instance) (err error) { 84 | var res *model.Instance 85 | err = n.call(c, model.Renew, i, n.renewURL, &res) 86 | if err == ecode.ServerErr { 87 | log.Warn("node be called(%s) instance(%v) error(%v)", n.renewURL, i, err) 88 | n.status = model.NodeStatusLost 89 | return 90 | } 91 | n.status = model.NodeStatusUP 92 | if err == ecode.NothingFound { 93 | log.Warn("node be called(%s) instance(%v) error(%v)", n.renewURL, i, err) 94 | err = n.call(c, model.Register, i, n.registerURL, nil) 95 | return 96 | } 97 | // NOTE: register response instance whitch in conflict with peer node 98 | if err == ecode.Conflict && res != nil { 99 | err = n.call(c, model.Register, res, n.pRegisterURL, nil) 100 | } 101 | return 102 | } 103 | 104 | // Set the infomation of instance by this node to the peer node represented 105 | func (n *Node) Set(c context.Context, arg *model.ArgSet) (err error) { 106 | err = n.setCall(c, arg, n.setURL) 107 | return 108 | } 109 | func (n *Node) call(c context.Context, action model.Action, i *model.Instance, uri string, data interface{}) (err error) { 110 | params := url.Values{} 111 | params.Set("region", i.Region) 112 | params.Set("zone", i.Zone) 113 | params.Set("env", i.Env) 114 | params.Set("appid", i.AppID) 115 | params.Set("hostname", i.Hostname) 116 | params.Set("from_zone", "true") 117 | if n.otherZone { 118 | params.Set("replication", "false") 119 | } else { 120 | params.Set("replication", "true") 121 | } 122 | switch action { 123 | case model.Register: 124 | params.Set("addrs", strings.Join(i.Addrs, ",")) 125 | params.Set("status", strconv.FormatUint(uint64(i.Status), 10)) 126 | params.Set("version", i.Version) 127 | meta, _ := json.Marshal(i.Metadata) 128 | params.Set("metadata", string(meta)) 129 | params.Set("reg_timestamp", strconv.FormatInt(i.RegTimestamp, 10)) 130 | params.Set("dirty_timestamp", strconv.FormatInt(i.DirtyTimestamp, 10)) 131 | params.Set("latest_timestamp", strconv.FormatInt(i.LatestTimestamp, 10)) 132 | case model.Renew: 133 | params.Set("dirty_timestamp", strconv.FormatInt(i.DirtyTimestamp, 10)) 134 | case model.Cancel: 135 | params.Set("latest_timestamp", strconv.FormatInt(i.LatestTimestamp, 10)) 136 | } 137 | var res struct { 138 | Code int `json:"code"` 139 | Data json.RawMessage `json:"data"` 140 | } 141 | if err = n.client.Post(c, uri, "", params, &res); err != nil { 142 | log.Error("node be called(%s) instance(%v) error(%v)", uri, i, err) 143 | return 144 | } 145 | if res.Code != 0 { 146 | log.Error("node be called(%s) instance(%v) response code(%v)", uri, i, res.Code) 147 | if err = ecode.Int(res.Code); err == ecode.Conflict { 148 | _ = json.Unmarshal([]byte(res.Data), data) 149 | } 150 | } 151 | return 152 | } 153 | 154 | func (n *Node) setCall(c context.Context, arg *model.ArgSet, uri string) (err error) { 155 | params := url.Values{} 156 | params.Set("region", arg.Region) 157 | params.Set("zone", arg.Zone) 158 | params.Set("env", arg.Env) 159 | params.Set("appid", arg.AppID) 160 | params.Set("set_timestamp", strconv.FormatInt(arg.SetTimestamp, 10)) 161 | params.Set("from_zone", "true") 162 | if n.otherZone { 163 | params.Set("replication", "false") 164 | } else { 165 | params.Set("replication", "true") 166 | } 167 | if len(arg.Hostname) != 0 { 168 | for _, name := range arg.Hostname { 169 | params.Add("hostname", name) 170 | } 171 | } 172 | if len(arg.Status) != 0 { 173 | for _, status := range arg.Status { 174 | params.Add("status", strconv.FormatInt(status, 10)) 175 | } 176 | } 177 | if len(arg.Metadata) != 0 { 178 | for _, metadata := range arg.Metadata { 179 | params.Add("metadata", metadata) 180 | } 181 | } 182 | var res struct { 183 | Code int `json:"code"` 184 | } 185 | if err = n.client.Post(c, uri, "", params, &res); err != nil { 186 | log.Error("node be setCalled(%s) appid(%s) env (%s) error(%v)", uri, arg.AppID, arg.Env, err) 187 | return 188 | } 189 | if res.Code != 0 { 190 | log.Error("node be setCalled(%s) appid(%s) env (%s) responce code(%v)", uri, arg.AppID, arg.Env, res.Code) 191 | err = ecode.Int(res.Code) 192 | } 193 | return 194 | } 195 | -------------------------------------------------------------------------------- /registry/node_test.go: -------------------------------------------------------------------------------- 1 | package registry 2 | 3 | import ( 4 | "context" 5 | "net/http" 6 | "strings" 7 | "testing" 8 | "time" 9 | 10 | dc "github.com/bilibili/discovery/conf" 11 | "github.com/bilibili/discovery/model" 12 | "github.com/go-kratos/kratos/pkg/ecode" 13 | bm "github.com/go-kratos/kratos/pkg/net/http/blademaster" 14 | "github.com/go-kratos/kratos/pkg/net/http/blademaster/binding" 15 | xtime "github.com/go-kratos/kratos/pkg/time" 16 | 17 | . "github.com/smartystreets/goconvey/convey" 18 | gock "gopkg.in/h2non/gock.v1" 19 | ) 20 | 21 | var config = newConfig() 22 | 23 | func newConfig() *dc.Config { 24 | c := &dc.Config{ 25 | HTTPClient: &bm.ClientConfig{ 26 | Timeout: xtime.Duration(time.Second * 30), 27 | Dial: xtime.Duration(time.Second), 28 | KeepAlive: xtime.Duration(time.Second * 30), 29 | }, 30 | HTTPServer: &bm.ServerConfig{Addr: "127.0.0.1:7171"}, 31 | Nodes: []string{"127.0.0.1:7172"}, 32 | Env: &dc.Env{ 33 | Zone: "sh001", 34 | DeployEnv: "pre", 35 | Host: "test_server", 36 | }, 37 | } 38 | return c 39 | } 40 | func TestReplicate(t *testing.T) { 41 | Convey("test replicate", t, func() { 42 | i := model.NewInstance(reg) 43 | nodes := NewNodes(config) 44 | nodes.nodes[0].client.SetTransport(gock.DefaultTransport) 45 | httpMock("POST", "http://127.0.0.1:7172/discovery/register").Reply(200).JSON(`{"code":0}`) 46 | err := nodes.Replicate(context.TODO(), model.Register, i, false) 47 | So(err, ShouldBeNil) 48 | err = nodes.Replicate(context.TODO(), model.Renew, i, false) 49 | So(err, ShouldBeNil) 50 | err = nodes.Replicate(context.TODO(), model.Cancel, i, false) 51 | So(err, ShouldBeNil) 52 | }) 53 | } 54 | func match(h *http.Request, mock *gock.Request) (ok bool, err error) { 55 | ok = true 56 | err = nil 57 | var arg = new(model.ArgSet) 58 | err = binding.Form.Bind(h, arg) 59 | 60 | if h.URL.Path == "/discovery/set" { 61 | if err != nil { 62 | mock.Reply(200).JSON(`{"ts":1514341945,"code":-400}`) 63 | return 64 | } 65 | if len(arg.Hostname) != len(arg.Status) || len(arg.Hostname) != len(arg.Metadata) { 66 | mock.Reply(200).JSON(`{"ts":1514341945,"code":-400}`) 67 | return 68 | } 69 | mock.Reply(200).JSON(`{"ts":1514341945,"code":0}`) 70 | } 71 | return 72 | } 73 | 74 | func TestReplicateSet(t *testing.T) { 75 | Convey("test replicate set", t, func(c C) { 76 | nodes := NewNodes(newConfig()) 77 | nodes.nodes[0].client.SetTransport(gock.DefaultTransport) 78 | httpMock("POST", "http://127.0.0.1:7172/discovery/set").AddMatcher(match) 79 | set := &model.ArgSet{ 80 | Region: "shsb", 81 | Zone: "sh001", 82 | Env: "pre", 83 | AppID: "main.arch.account-service", 84 | Hostname: []string{"test1", "test2"}, 85 | Status: []int64{1, 1}, 86 | Metadata: []string{`{"aa":1,"bb:2"}`, `{"aa":1,"bb:3"}`}, 87 | } 88 | err := nodes.ReplicateSet(context.TODO(), set, false) 89 | c.So(err, ShouldBeNil) 90 | set = &model.ArgSet{ 91 | Region: "shsb", 92 | Zone: "sh001", 93 | Env: "pre", 94 | AppID: "main.arch.account-service", 95 | Hostname: []string{"test1", "test2"}, 96 | Status: []int64{1, 1}, 97 | Metadata: []string{`{"aa":1,"bb:2"}`}, 98 | } 99 | err = nodes.ReplicateSet(context.TODO(), set, false) 100 | c.So(err, ShouldNotBeNil) 101 | }) 102 | } 103 | 104 | func TestNodes(t *testing.T) { 105 | Convey("test nodes", t, func() { 106 | nodes := NewNodes(config) 107 | res := nodes.Nodes() 108 | So(len(res), ShouldResemble, 1) 109 | }) 110 | Convey("test all nodes", t, func() { 111 | cfg := newConfig() 112 | cfg.Zones = map[string][]string{"zone": []string{"127.0.0.1:7172"}} 113 | nodes := NewNodes(cfg) 114 | res := nodes.AllNodes() 115 | So(len(res), ShouldResemble, 2) 116 | }) 117 | } 118 | 119 | func TestUp(t *testing.T) { 120 | Convey("test up", t, func() { 121 | nodes := NewNodes(config) 122 | nodes.UP() 123 | for _, nd := range nodes.nodes { 124 | if nd.addr == "127.0.0.1:7171" { 125 | So(nd.status, ShouldResemble, model.NodeStatusUP) 126 | } 127 | } 128 | }) 129 | } 130 | 131 | func TestCall(t *testing.T) { 132 | Convey("test call", t, func() { 133 | var res *model.Instance 134 | node := newNode(newConfig(), "127.0.0.1:7173") 135 | node.client.SetTransport(gock.DefaultTransport) 136 | httpMock("POST", "http://127.0.0.1:7174/discovery/register").Reply(200).JSON(`{"ts":1514341945,"code":-409,"data":{"region":"shsb","zone":"fuck","appid":"main.arch.account-service","env":"pre","hostname":"cs4sq","http":"","rpc":"0.0.0.0:18888","weight":2}}`) 137 | i := model.NewInstance(reg) 138 | err := node.call(context.TODO(), model.Register, i, "http://127.0.0.1:7174/discovery/register", &res) 139 | So(err, ShouldResemble, ecode.Conflict) 140 | So(res.AppID, ShouldResemble, "main.arch.account-service") 141 | }) 142 | } 143 | 144 | func TestNodeCancel(t *testing.T) { 145 | Convey("test node renew 409 error", t, func() { 146 | i := model.NewInstance(reg) 147 | node := newNode(config, "127.0.0.1:7172") 148 | node.pRegisterURL = "http://127.0.0.1:7171/discovery/register" 149 | node.client.SetTransport(gock.DefaultTransport) 150 | httpMock("POST", "http://127.0.0.1:7172/discovery/cancel").Reply(200).JSON(`{"code":0}`) 151 | err := node.Cancel(context.TODO(), i) 152 | So(err, ShouldBeNil) 153 | }) 154 | } 155 | 156 | func TestNodeRenew(t *testing.T) { 157 | Convey("test node renew 409 error", t, func() { 158 | i := model.NewInstance(reg) 159 | node := newNode(config, "127.0.0.1:7172") 160 | node.pRegisterURL = "http://127.0.0.1:7171/discovery/register" 161 | node.client.SetTransport(gock.DefaultTransport) 162 | httpMock("POST", "http://127.0.0.1:7172/discovery/renew").Reply(200).JSON(`{"code":-409,"data":{"region":"shsb","zone":"fuck","appid":"main.arch.account-service","env":"pre","hostname":"cs4sq","http":"","rpc":"0.0.0.0:18888","weight":2}}`) 163 | httpMock("POST", "http://127.0.0.1:7171/discovery/register").Reply(200).JSON(`{"code":0}`) 164 | err := node.Renew(context.TODO(), i) 165 | So(err, ShouldBeNil) 166 | }) 167 | } 168 | 169 | func TestNodeRenew2(t *testing.T) { 170 | Convey("test node renew 404 error", t, func() { 171 | i := model.NewInstance(reg) 172 | node := newNode(config, "127.0.0.1:7172") 173 | node.client.SetTransport(gock.DefaultTransport) 174 | httpMock("POST", "http://127.0.0.1:7172/discovery/renew").Reply(200).JSON(`{"code":-404}`) 175 | httpMock("POST", "http://127.0.0.1:7172/discovery/register").Reply(200).JSON(`{"code":0}`) 176 | httpMock("POST", "http://127.0.0.1:7171/discovery/register").Reply(200).JSON(`{"code":0}`) 177 | err := node.Renew(context.TODO(), i) 178 | So(err, ShouldBeNil) 179 | }) 180 | } 181 | 182 | func httpMock(method, url string) *gock.Request { 183 | r := gock.New(url) 184 | r.Method = strings.ToUpper(method) 185 | return r 186 | } 187 | -------------------------------------------------------------------------------- /registry/nodes.go: -------------------------------------------------------------------------------- 1 | package registry 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "math/rand" 7 | 8 | "github.com/bilibili/discovery/conf" 9 | "github.com/bilibili/discovery/model" 10 | 11 | "golang.org/x/sync/errgroup" 12 | ) 13 | 14 | // Nodes is helper to manage lifecycle of a collection of Nodes. 15 | type Nodes struct { 16 | nodes []*Node 17 | zones map[string][]*Node 18 | selfAddr string 19 | } 20 | 21 | // NewNodes new nodes and return. 22 | func NewNodes(c *conf.Config) *Nodes { 23 | nodes := make([]*Node, 0, len(c.Nodes)) 24 | for _, addr := range c.Nodes { 25 | n := newNode(c, addr) 26 | n.zone = c.Env.Zone 27 | n.pRegisterURL = fmt.Sprintf("http://%s%s", c.HTTPServer.Addr, _registerURL) 28 | nodes = append(nodes, n) 29 | } 30 | zones := make(map[string][]*Node) 31 | for name, addrs := range c.Zones { 32 | var znodes []*Node 33 | for _, addr := range addrs { 34 | n := newNode(c, addr) 35 | n.otherZone = true 36 | n.zone = name 37 | n.pRegisterURL = fmt.Sprintf("http://%s%s", c.HTTPServer.Addr, _registerURL) 38 | znodes = append(znodes, n) 39 | } 40 | zones[name] = znodes 41 | } 42 | return &Nodes{ 43 | nodes: nodes, 44 | zones: zones, 45 | selfAddr: c.HTTPServer.Addr, 46 | } 47 | } 48 | 49 | // Replicate replicate information to all nodes except for this node. 50 | func (ns *Nodes) Replicate(c context.Context, action model.Action, i *model.Instance, otherZone bool) (err error) { 51 | if len(ns.nodes) == 0 { 52 | return 53 | } 54 | eg, c := errgroup.WithContext(c) 55 | for _, n := range ns.nodes { 56 | if !ns.Myself(n.addr) { 57 | ns.action(c, eg, action, n, i) 58 | } 59 | } 60 | if !otherZone { 61 | for _, zns := range ns.zones { 62 | if n := len(zns); n > 0 { 63 | ns.action(c, eg, action, zns[rand.Intn(n)], i) 64 | } 65 | } 66 | } 67 | err = eg.Wait() 68 | return 69 | } 70 | 71 | // ReplicateSet replicate set information to all nodes except for this node. 72 | func (ns *Nodes) ReplicateSet(c context.Context, arg *model.ArgSet, otherZone bool) (err error) { 73 | if len(ns.nodes) == 0 { 74 | return 75 | } 76 | eg, c := errgroup.WithContext(c) 77 | for _, n := range ns.nodes { 78 | if !ns.Myself(n.addr) { 79 | node := n 80 | eg.Go(func() error { 81 | return node.Set(c, arg) 82 | }) 83 | } 84 | } 85 | if !otherZone { 86 | for _, zns := range ns.zones { 87 | if n := len(zns); n > 0 { 88 | node := zns[rand.Intn(n)] 89 | eg.Go(func() error { 90 | return node.Set(c, arg) 91 | }) 92 | } 93 | } 94 | } 95 | err = eg.Wait() 96 | return 97 | } 98 | func (ns *Nodes) action(c context.Context, eg *errgroup.Group, action model.Action, n *Node, i *model.Instance) { 99 | switch action { 100 | case model.Register: 101 | eg.Go(func() error { 102 | _ = n.Register(c, i) 103 | return nil 104 | }) 105 | case model.Renew: 106 | eg.Go(func() error { 107 | _ = n.Renew(c, i) 108 | return nil 109 | }) 110 | case model.Cancel: 111 | eg.Go(func() error { 112 | _ = n.Cancel(c, i) 113 | return nil 114 | }) 115 | } 116 | } 117 | 118 | // Nodes returns nodes of local zone. 119 | func (ns *Nodes) Nodes() (nsi []*model.Node) { 120 | nsi = make([]*model.Node, 0, len(ns.nodes)) 121 | for _, nd := range ns.nodes { 122 | if nd.otherZone { 123 | continue 124 | } 125 | node := &model.Node{ 126 | Addr: nd.addr, 127 | Status: nd.status, 128 | Zone: nd.zone, 129 | } 130 | nsi = append(nsi, node) 131 | } 132 | return 133 | } 134 | 135 | // AllNodes returns nodes contain other zone nodes. 136 | func (ns *Nodes) AllNodes() (nsi []*model.Node) { 137 | nsi = make([]*model.Node, 0, len(ns.nodes)) 138 | for _, nd := range ns.nodes { 139 | node := &model.Node{ 140 | Addr: nd.addr, 141 | Status: nd.status, 142 | Zone: nd.zone, 143 | } 144 | nsi = append(nsi, node) 145 | } 146 | for _, zns := range ns.zones { 147 | if n := len(zns); n > 0 { 148 | nd := zns[rand.Intn(n)] 149 | node := &model.Node{ 150 | Addr: nd.addr, 151 | Status: nd.status, 152 | Zone: nd.zone, 153 | } 154 | nsi = append(nsi, node) 155 | } 156 | } 157 | return 158 | } 159 | 160 | // Myself returns whether or not myself. 161 | func (ns *Nodes) Myself(addr string) bool { 162 | return ns.selfAddr == addr 163 | } 164 | 165 | // UP marks status of myself node up. 166 | func (ns *Nodes) UP() { 167 | for _, nd := range ns.nodes { 168 | if ns.Myself(nd.addr) { 169 | nd.status = model.NodeStatusUP 170 | } 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /registry/registry.go: -------------------------------------------------------------------------------- 1 | package registry 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | "sync" 7 | "time" 8 | 9 | "github.com/bilibili/discovery/conf" 10 | "github.com/bilibili/discovery/model" 11 | 12 | "github.com/go-kratos/kratos/pkg/ecode" 13 | log "github.com/go-kratos/kratos/pkg/log" 14 | ) 15 | 16 | const ( 17 | _evictThreshold = int64(90 * time.Second) 18 | _evictCeiling = int64(3600 * time.Second) 19 | ) 20 | 21 | // Registry handles replication of all operations to peer Discovery nodes to keep them all in sync. 22 | type Registry struct { 23 | appm map[string]*model.Apps // appid-env -> apps 24 | aLock sync.RWMutex 25 | 26 | conns map[string]*hosts // region.zone.env.appid-> host 27 | cLock sync.RWMutex 28 | scheduler *scheduler 29 | gd *Guard 30 | } 31 | 32 | type hosts struct { 33 | hclock sync.RWMutex 34 | hosts map[string]*conn // host name to conn 35 | } 36 | 37 | // conn the poll chan contains consumer. 38 | type conn struct { 39 | ch chan map[string]*model.InstanceInfo // TODO(felix): increase 40 | arg *model.ArgPolls 41 | latestTime int64 42 | count int 43 | } 44 | 45 | // newConn new consumer chan. 46 | func newConn(ch chan map[string]*model.InstanceInfo, latestTime int64, arg *model.ArgPolls) *conn { 47 | return &conn{ch: ch, latestTime: latestTime, arg: arg, count: 1} 48 | } 49 | 50 | // NewRegistry new register. 51 | func NewRegistry(conf *conf.Config) (r *Registry) { 52 | r = &Registry{ 53 | appm: make(map[string]*model.Apps), 54 | conns: make(map[string]*hosts), 55 | gd: new(Guard), 56 | } 57 | r.scheduler = newScheduler(r) 58 | r.scheduler.Load() 59 | go r.scheduler.Reload() 60 | go r.proc() 61 | return 62 | } 63 | 64 | func (r *Registry) newapps(appid, env string) (a *model.Apps, ok bool) { 65 | key := appsKey(appid, env) 66 | r.aLock.Lock() 67 | if a, ok = r.appm[key]; !ok { 68 | a = model.NewApps() 69 | r.appm[key] = a 70 | } 71 | r.aLock.Unlock() 72 | return 73 | } 74 | 75 | func (r *Registry) apps(appid, env, zone string) (as []*model.App, a *model.Apps, ok bool) { 76 | key := appsKey(appid, env) 77 | r.aLock.RLock() 78 | a, ok = r.appm[key] 79 | r.aLock.RUnlock() 80 | if ok { 81 | as = a.App(zone) 82 | } 83 | return 84 | } 85 | 86 | func appsKey(appid, env string) string { 87 | return fmt.Sprintf("%s-%s", appid, env) 88 | } 89 | 90 | func (r *Registry) newApp(ins *model.Instance) (a *model.App) { 91 | as, _ := r.newapps(ins.AppID, ins.Env) 92 | a, _ = as.NewApp(ins.Zone, ins.AppID, ins.LatestTimestamp) 93 | return 94 | } 95 | 96 | // Register a new instance. 97 | func (r *Registry) Register(ins *model.Instance, latestTime int64) (err error) { 98 | a := r.newApp(ins) 99 | i, ok := a.NewInstance(ins, latestTime) 100 | if ok { 101 | r.gd.incrExp() 102 | } 103 | // NOTE: make sure free poll before update appid latest timestamp. 104 | r.broadcast(i.Env, i.AppID) 105 | return 106 | } 107 | 108 | // Renew marks the given instance of the given app name as renewed, and also marks whether it originated from replication. 109 | func (r *Registry) Renew(arg *model.ArgRenew) (i *model.Instance, ok bool) { 110 | a, _, _ := r.apps(arg.AppID, arg.Env, arg.Zone) 111 | if len(a) == 0 { 112 | return 113 | } 114 | if i, ok = a[0].Renew(arg.Hostname); !ok { 115 | return 116 | } 117 | r.gd.incrFac() 118 | return 119 | } 120 | 121 | // Cancel cancels the registration of an instance. 122 | func (r *Registry) Cancel(arg *model.ArgCancel) (i *model.Instance, ok bool) { 123 | if i, ok = r.cancel(arg.Zone, arg.Env, arg.AppID, arg.Hostname, arg.LatestTimestamp); !ok { 124 | return 125 | } 126 | r.gd.decrExp() 127 | return 128 | } 129 | 130 | func (r *Registry) cancel(zone, env, appid, hostname string, latestTime int64) (i *model.Instance, ok bool) { 131 | var l int 132 | a, as, _ := r.apps(appid, env, zone) 133 | if len(a) == 0 { 134 | return 135 | } 136 | if i, l, ok = a[0].Cancel(hostname, latestTime); !ok { 137 | return 138 | } 139 | as.UpdateLatest(latestTime) 140 | if l == 0 { 141 | if a[0].Len() == 0 { 142 | as.Del(zone) 143 | } 144 | } 145 | if len(as.App("")) == 0 { 146 | r.aLock.Lock() 147 | delete(r.appm, appsKey(appid, env)) 148 | r.aLock.Unlock() 149 | } 150 | r.broadcast(env, appid) // NOTE: make sure free poll before update appid latest timestamp. 151 | return 152 | } 153 | 154 | // FetchAll fetch all instances of all the families. 155 | func (r *Registry) FetchAll() (im map[string][]*model.Instance) { 156 | ass := r.allapp() 157 | im = make(map[string][]*model.Instance) 158 | for _, as := range ass { 159 | for _, a := range as.App("") { 160 | im[a.AppID] = append(im[a.AppID], a.Instances()...) 161 | } 162 | } 163 | return 164 | } 165 | 166 | // Fetch fetch all instances by appid. 167 | func (r *Registry) Fetch(zone, env, appid string, latestTime int64, status uint32) (info *model.InstanceInfo, err error) { 168 | key := appsKey(appid, env) 169 | r.aLock.RLock() 170 | a, ok := r.appm[key] 171 | r.aLock.RUnlock() 172 | if !ok { 173 | err = ecode.NothingFound 174 | return 175 | } 176 | info, err = a.InstanceInfo(zone, latestTime, status) 177 | if err != nil { 178 | return 179 | } 180 | sch := r.scheduler.Get(appid, env) 181 | if sch != nil { 182 | info.Scheduler = new(model.Scheduler) 183 | info.Scheduler.Clients = sch.Clients 184 | } 185 | return 186 | } 187 | 188 | // Polls hangs request and then write instances when that has changes, or return NotModified. 189 | func (r *Registry) Polls(arg *model.ArgPolls) (ch chan map[string]*model.InstanceInfo, new bool, miss []string, err error) { 190 | var ( 191 | ins = make(map[string]*model.InstanceInfo, len(arg.AppID)) 192 | ) 193 | if len(arg.AppID) != len(arg.LatestTimestamp) { 194 | arg.LatestTimestamp = make([]int64, len(arg.AppID)) 195 | } 196 | for i := range arg.AppID { 197 | in, err := r.Fetch(arg.Zone, arg.Env, arg.AppID[i], arg.LatestTimestamp[i], model.InstanceStatusUP) 198 | if err == ecode.NothingFound { 199 | miss = append(miss, arg.AppID[i]) 200 | log.Error("Polls zone(%s) env(%s) appid(%s) error(%v)", arg.Zone, arg.Env, arg.AppID[i], err) 201 | continue 202 | } 203 | if err == nil { 204 | ins[arg.AppID[i]] = in 205 | new = true 206 | } 207 | } 208 | if new { 209 | ch = make(chan map[string]*model.InstanceInfo, 1) 210 | ch <- ins 211 | return 212 | } 213 | for i := range arg.AppID { 214 | k := pollKey(arg.Env, arg.AppID[i]) 215 | r.cLock.Lock() 216 | if _, ok := r.conns[k]; !ok { 217 | r.conns[k] = &hosts{hosts: make(map[string]*conn, 1)} 218 | } 219 | hosts := r.conns[k] 220 | r.cLock.Unlock() 221 | 222 | hosts.hclock.Lock() 223 | connection, ok := hosts.hosts[arg.Hostname] 224 | if !ok { 225 | if ch == nil { 226 | ch = make(chan map[string]*model.InstanceInfo, 5) // NOTE: there maybe have more than one connection on the same hostname!!! 227 | } 228 | connection = newConn(ch, arg.LatestTimestamp[i], arg) 229 | log.Info("Polls from(%s) new connection(%d)", arg.Hostname, connection.count) 230 | } else { 231 | connection.count++ // NOTE: there maybe have more than one connection on the same hostname!!! 232 | if ch == nil { 233 | ch = connection.ch 234 | } 235 | log.Info("Polls from(%s) reuse connection(%d)", arg.Hostname, connection.count) 236 | } 237 | hosts.hosts[arg.Hostname] = connection 238 | hosts.hclock.Unlock() 239 | } 240 | return 241 | } 242 | 243 | // broadcast on poll by chan. 244 | // NOTE: make sure free poll before update appid latest timestamp. 245 | func (r *Registry) broadcast(env, appid string) { 246 | key := pollKey(env, appid) 247 | r.cLock.Lock() 248 | conns, ok := r.conns[key] 249 | if !ok { 250 | r.cLock.Unlock() 251 | return 252 | } 253 | delete(r.conns, key) 254 | r.cLock.Unlock() 255 | conns.hclock.RLock() 256 | for _, conn := range conns.hosts { 257 | ii, err := r.Fetch(conn.arg.Zone, env, appid, 0, model.InstanceStatusUP) // TODO(felix): latesttime!=0 increase 258 | if err != nil { 259 | // may be not found ,just continue until next poll return err. 260 | log.Error("get appid:%s env:%s zone:%s err:%v", appid, env, conn.arg.Zone, err) 261 | continue 262 | } 263 | for i := 0; i < conn.count; i++ { 264 | select { 265 | case conn.ch <- map[string]*model.InstanceInfo{appid: ii}: // NOTE: if chan is full, means no poller. 266 | log.Info("broadcast to(%s) success(%d)", conn.arg.Hostname, i+1) 267 | case <-time.After(time.Millisecond * 500): 268 | log.Info("broadcast to(%s) failed(%d) maybe chan full", conn.arg.Hostname, i+1) 269 | } 270 | } 271 | } 272 | conns.hclock.RUnlock() 273 | } 274 | 275 | func pollKey(env, appid string) string { 276 | return fmt.Sprintf("%s.%s", env, appid) 277 | } 278 | 279 | // Set Set the metadata of instance by hostnames. 280 | func (r *Registry) Set(arg *model.ArgSet) (ok bool) { 281 | a, _, _ := r.apps(arg.AppID, arg.Env, arg.Zone) 282 | if len(a) == 0 { 283 | return 284 | } 285 | if ok = a[0].Set(arg); !ok { 286 | return 287 | } 288 | r.broadcast(arg.Env, arg.AppID) 289 | return 290 | } 291 | 292 | func (r *Registry) allapp() (ass []*model.Apps) { 293 | r.aLock.RLock() 294 | ass = make([]*model.Apps, 0, len(r.appm)) 295 | for _, as := range r.appm { 296 | ass = append(ass, as) 297 | } 298 | r.aLock.RUnlock() 299 | return 300 | } 301 | 302 | // reset expect renews, count the renew of all app, one app has two expect remews in minute. 303 | func (r *Registry) resetExp() { 304 | cnt := int64(0) 305 | for _, p := range r.allapp() { 306 | for _, a := range p.App("") { 307 | cnt += int64(a.Len()) 308 | } 309 | } 310 | r.gd.setExp(cnt) 311 | } 312 | 313 | func (r *Registry) proc() { 314 | tk := time.Tick(1 * time.Minute) 315 | tk2 := time.Tick(15 * time.Minute) 316 | for { 317 | select { 318 | case <-tk: 319 | r.gd.updateFac() 320 | r.evict() 321 | case <-tk2: 322 | r.resetExp() 323 | } 324 | } 325 | } 326 | 327 | func (r *Registry) evict() { 328 | protect := r.gd.ok() 329 | // We collect first all expired items, to evict them in random order. For large eviction sets, 330 | // if we do not that, we might wipe out whole apps before self preservation kicks in. By randomizing it, 331 | // the impact should be evenly distributed across all applications. 332 | var eis []*model.Instance 333 | var registrySize int 334 | // all projects 335 | ass := r.allapp() 336 | for _, as := range ass { 337 | for _, a := range as.App("") { 338 | registrySize += a.Len() 339 | is := a.Instances() 340 | for _, i := range is { 341 | delta := time.Now().UnixNano() - i.RenewTimestamp 342 | if (!protect && delta > _evictThreshold) || delta > _evictCeiling { 343 | eis = append(eis, i) 344 | } 345 | } 346 | } 347 | } 348 | // To compensate for GC pauses or drifting local time, we need to use current registry size as a base for 349 | // triggering self-preservation. Without that we would wipe out full registry. 350 | eCnt := len(eis) 351 | registrySizeThreshold := int(float64(registrySize) * _percentThreshold) 352 | evictionLimit := registrySize - registrySizeThreshold 353 | if eCnt > evictionLimit { 354 | eCnt = evictionLimit 355 | } 356 | if eCnt == 0 { 357 | return 358 | } 359 | for i := 0; i < eCnt; i++ { 360 | // Pick a random item (Knuth shuffle algorithm) 361 | next := i + rand.Intn(len(eis)-i) 362 | eis[i], eis[next] = eis[next], eis[i] 363 | ei := eis[i] 364 | r.cancel(ei.Zone, ei.Env, ei.AppID, ei.Hostname, time.Now().UnixNano()) 365 | } 366 | } 367 | 368 | // DelConns delete conn of host in appid 369 | func (r *Registry) DelConns(arg *model.ArgPolls) { 370 | for i := range arg.AppID { 371 | r.cLock.Lock() 372 | k := pollKey(arg.Env, arg.AppID[i]) 373 | conns, ok := r.conns[k] 374 | r.cLock.Unlock() 375 | if !ok { 376 | log.Warn("DelConn key(%s) not found", k) 377 | continue 378 | } 379 | conns.hclock.Lock() 380 | if connection, ok := conns.hosts[arg.Hostname]; ok { 381 | if connection.count > 1 { 382 | log.Info("DelConns from(%s) count decr(%d)", arg.Hostname, connection.count) 383 | connection.count-- 384 | } else { 385 | log.Info("DelConns from(%s) delete(%d)", arg.Hostname, connection.count) 386 | delete(conns.hosts, arg.Hostname) 387 | } 388 | } 389 | conns.hclock.Unlock() 390 | } 391 | } 392 | -------------------------------------------------------------------------------- /registry/registry_test.go: -------------------------------------------------------------------------------- 1 | package registry 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "os" 7 | "sync" 8 | "testing" 9 | "time" 10 | 11 | "github.com/bilibili/discovery/conf" 12 | "github.com/bilibili/discovery/model" 13 | "github.com/go-kratos/kratos/pkg/conf/paladin" 14 | "github.com/go-kratos/kratos/pkg/ecode" 15 | 16 | . "github.com/smartystreets/goconvey/convey" 17 | ) 18 | 19 | var reg = &model.ArgRegister{AppID: "main.arch.test", Hostname: "reg", Zone: "sh0001", Env: "pre", Status: 1} 20 | var regH1 = &model.ArgRegister{AppID: "main.arch.test", Hostname: "regH1", Zone: "sh0001", Env: "pre", Status: 1} 21 | 22 | var reg2 = &model.ArgRegister{AppID: "main.arch.test2", Hostname: "reg2", Zone: "sh0001", Env: "pre", Status: 1} 23 | 24 | var arg = &model.ArgRenew{Zone: "sh0001", Env: "pre", AppID: "main.arch.test", Hostname: "reg"} 25 | var cancel = &model.ArgCancel{Zone: "sh0001", Env: "pre", AppID: "main.arch.test", Hostname: "reg"} 26 | var cancel2 = &model.ArgCancel{Zone: "sh0001", Env: "pre", AppID: "main.arch.test", Hostname: "regH1"} 27 | 28 | func TestMain(m *testing.M) { 29 | flag.Set("conf", "./") 30 | flag.Parse() 31 | paladin.Init() 32 | m.Run() 33 | os.Exit(0) 34 | } 35 | func TestReigster(t *testing.T) { 36 | i := model.NewInstance(reg) 37 | register(t, i) 38 | } 39 | 40 | func TestDiscovery(t *testing.T) { 41 | i1 := model.NewInstance(reg) 42 | i2 := model.NewInstance(regH1) 43 | r := register(t, i1, i2) 44 | Convey("test discovery", t, func() { 45 | pollArg := &model.ArgPolls{Zone: "sh0001", Env: "pre", AppID: []string{"main.arch.test"}, Hostname: "test"} 46 | fetchArg := &model.ArgFetch{Zone: "sh0001", Env: "pre", AppID: "main.arch.test", Status: 3} 47 | info, err := r.Fetch(fetchArg.Zone, fetchArg.Env, fetchArg.AppID, 0, fetchArg.Status) 48 | So(err, ShouldBeNil) 49 | So(len(info.Instances["sh0001"]), ShouldEqual, 2) 50 | ch, _, _, err := r.Polls(pollArg) 51 | So(err, ShouldBeNil) 52 | apps := <-ch 53 | So(len(apps["main.arch.test"].Instances["sh0001"]), ShouldEqual, 2) 54 | pollArg.LatestTimestamp[0] = apps["main.arch.test"].LatestTimestamp 55 | fmt.Println(apps["main.arch.test"]) 56 | r.Cancel(cancel) 57 | ch, _, _, err = r.Polls(pollArg) 58 | So(err, ShouldBeNil) 59 | apps = <-ch 60 | So(len(apps["main.arch.test"].Instances), ShouldEqual, 1) 61 | pollArg.LatestTimestamp[0] = apps["main.arch.test"].LatestTimestamp 62 | r.Cancel(cancel2) 63 | }) 64 | } 65 | 66 | func TestRenew(t *testing.T) { 67 | src := model.NewInstance(reg) 68 | r := register(t, src) 69 | Convey("test renew", t, func() { 70 | i, ok := r.Renew(arg) 71 | So(ok, ShouldBeTrue) 72 | So(i, ShouldResemble, src) 73 | }) 74 | } 75 | 76 | func BenchmarkRenew(b *testing.B) { 77 | var ( 78 | i *model.Instance 79 | ok bool 80 | ) 81 | b.RunParallel(func(pb *testing.PB) { 82 | for pb.Next() { 83 | r, src := benchRegister(b) 84 | if i, ok = r.Renew(arg); !ok { 85 | b.Errorf("Renew(%v)", src.AppID) 86 | } 87 | benchCompareInstance(b, src, i) 88 | } 89 | }) 90 | } 91 | 92 | func TestCancel(t *testing.T) { 93 | src := model.NewInstance(reg) 94 | r := register(t, src) 95 | Convey("test cancel", t, func() { 96 | i, ok := r.Cancel(cancel) 97 | So(ok, ShouldBeTrue) 98 | So(i, ShouldResemble, src) 99 | fetchArg := &model.ArgFetch{Zone: "sh0001", Env: "pre", AppID: "main.arch.test", Status: 3} 100 | _, err := r.Fetch(fetchArg.Zone, fetchArg.Env, fetchArg.AppID, 0, fetchArg.Status) 101 | So(err, ShouldResemble, ecode.NothingFound) 102 | }) 103 | } 104 | 105 | func BenchmarkCancel(b *testing.B) { 106 | var ( 107 | i *model.Instance 108 | ok bool 109 | err error 110 | ) 111 | b.RunParallel(func(pb *testing.PB) { 112 | for pb.Next() { 113 | r, src := benchRegister(b) 114 | if i, ok = r.Cancel(cancel); !ok { 115 | b.Errorf("Cancel(%v) error", src.AppID) 116 | } 117 | benchCompareInstance(b, src, i) 118 | fetchArg := &model.ArgFetch{Zone: "sh0001", Env: "pre", AppID: "main.arch.test", Status: 3} 119 | if _, err = r.Fetch(fetchArg.Zone, fetchArg.Env, fetchArg.AppID, 0, fetchArg.Status); err != ecode.NothingFound { 120 | b.Errorf("Fetch(%v) error(%v)", src.AppID, err) 121 | } 122 | } 123 | }) 124 | } 125 | 126 | func TestFetchAll(t *testing.T) { 127 | i := model.NewInstance(reg) 128 | r := register(t, i) 129 | Convey("test fetch all", t, func() { 130 | am := r.FetchAll() 131 | So(len(am), ShouldResemble, 1) 132 | }) 133 | } 134 | 135 | func BenchmarkFetchAll(b *testing.B) { 136 | b.RunParallel(func(pb *testing.PB) { 137 | for pb.Next() { 138 | r, _ := benchRegister(b) 139 | if am := r.FetchAll(); len(am) != 1 { 140 | b.Errorf("FetchAll() error") 141 | } 142 | } 143 | }) 144 | } 145 | 146 | func TestFetch(t *testing.T) { 147 | i := model.NewInstance(reg) 148 | r := register(t, i) 149 | Convey("test fetch", t, func() { 150 | fetchArg2 := &model.ArgFetch{Zone: "sh0001", Env: "pre", AppID: "main.arch.test", Status: 1} 151 | c, err := r.Fetch(fetchArg2.Zone, fetchArg2.Env, fetchArg2.AppID, 0, fetchArg2.Status) 152 | So(err, ShouldBeNil) 153 | So(len(c.Instances), ShouldResemble, 1) 154 | }) 155 | } 156 | 157 | func BenchmarkFetch(b *testing.B) { 158 | var ( 159 | err error 160 | c *model.InstanceInfo 161 | ) 162 | b.RunParallel(func(pb *testing.PB) { 163 | for pb.Next() { 164 | r, _ := benchRegister(b) 165 | fetchArg := &model.ArgFetch{Zone: "sh0001", Env: "pre", AppID: "main.arch.test", Status: 1} 166 | if c, err = r.Fetch(fetchArg.Zone, fetchArg.Env, fetchArg.AppID, 0, fetchArg.Status); err != nil { 167 | b.Errorf("Fetch(%v) error(%v)", arg.AppID, err) 168 | } 169 | fetchArg2 := &model.ArgFetch{Zone: "sh0001", Env: "pre", AppID: "main.arch.test", Status: 2} 170 | if c, err = r.Fetch(fetchArg2.Zone, fetchArg2.Env, fetchArg2.AppID, 0, fetchArg2.Status); err != nil { 171 | b.Errorf("Fetch(%v) error(%v)", arg.AppID, err) 172 | } 173 | _ = c 174 | } 175 | }) 176 | } 177 | 178 | func TestPoll(t *testing.T) { 179 | i := model.NewInstance(reg) 180 | r := register(t, i) 181 | Convey("test poll", t, func() { 182 | pollArg := &model.ArgPolls{Zone: "sh0001", Env: "pre", AppID: []string{"main.arch.test"}, Hostname: "csq"} 183 | ch, _, _, err := r.Polls(pollArg) 184 | So(err, ShouldBeNil) 185 | c := <-ch 186 | So(len(c[pollArg.AppID[0]].Instances), ShouldEqual, 1) 187 | }) 188 | } 189 | 190 | func TestPolls(t *testing.T) { 191 | i1 := model.NewInstance(reg) 192 | i2 := model.NewInstance(reg2) 193 | r := register(t, i1, i2) 194 | Convey("test polls", t, func() { 195 | pollArg := &model.ArgPolls{Zone: "sh0001", Env: "pre", LatestTimestamp: []int64{0, 0}, AppID: []string{"main.arch.test", "main.arch.test2"}, Hostname: "csq"} 196 | ch, new, _, err := r.Polls(pollArg) 197 | So(err, ShouldBeNil) 198 | So(new, ShouldBeTrue) 199 | c := <-ch 200 | So(len(c), ShouldResemble, 2) 201 | }) 202 | } 203 | 204 | func TestPollsChan(t *testing.T) { 205 | i1 := model.NewInstance(reg) 206 | i2 := model.NewInstance(reg2) 207 | r := register(t, i1, i2) 208 | 209 | Convey("test polls parallel", t, func(c C) { 210 | var ( 211 | wg sync.WaitGroup 212 | ch1, ch2 chan map[string]*model.InstanceInfo 213 | new bool 214 | err error 215 | ) 216 | pollArg := &model.ArgPolls{Zone: "sh0001", Env: "pre", LatestTimestamp: []int64{time.Now().UnixNano(), time.Now().UnixNano()}, AppID: []string{"main.arch.test", "main.arch.test2"}, Hostname: "csq"} 217 | ch1, new, _, err = r.Polls(pollArg) 218 | c.So(err, ShouldEqual, ecode.NotModified) 219 | c.So(new, ShouldBeFalse) 220 | c.So(ch1, ShouldNotBeNil) 221 | ch2, new, _, err = r.Polls(pollArg) 222 | c.So(err, ShouldEqual, ecode.NotModified) 223 | c.So(new, ShouldBeFalse) 224 | c.So(ch2, ShouldNotBeNil) 225 | // wait group 226 | wg.Add(2) 227 | go func() { 228 | res := <-ch1 229 | c.So(len(res), ShouldResemble, 1) 230 | wg.Done() 231 | }() 232 | go func() { 233 | res := <-ch2 234 | c.So(len(res), ShouldResemble, 1) 235 | wg.Done() 236 | }() 237 | // re register when 1s later, make sure latest_timestamp changed 238 | time.Sleep(time.Second) 239 | h1 := model.NewInstance(regH1) 240 | _ = r.Register(h1, 0) 241 | // wait 242 | wg.Wait() 243 | }) 244 | } 245 | 246 | func BenchmarkPoll(b *testing.B) { 247 | b.RunParallel(func(pb *testing.PB) { 248 | for pb.Next() { 249 | var ( 250 | err error 251 | ch chan map[string]*model.InstanceInfo 252 | c map[string]*model.InstanceInfo 253 | ) 254 | r, _ := benchRegister(b) 255 | pollArg := &model.ArgPolls{Zone: "sh0001", Env: "pre", AppID: []string{"main.arch.test"}, Hostname: "csq"} 256 | if ch, _, _, err = r.Polls(pollArg); err != nil { 257 | b.Errorf("Poll(%v) error(%v)", arg.AppID, err) 258 | } 259 | if c = <-ch; len(c[pollArg.AppID[0]].Instances) != 1 { 260 | b.Errorf("Poll(%v) lenth error", arg.AppID) 261 | } 262 | } 263 | }) 264 | } 265 | 266 | func TestBroadcast(t *testing.T) { 267 | i := model.NewInstance(reg) 268 | r := register(t, i) 269 | Convey("test poll push connection", t, func() { 270 | go func() { 271 | Convey("must poll ahead of time", t, func() { 272 | time.Sleep(time.Microsecond * 5) 273 | var arg2 = &model.ArgRegister{AppID: "main.arch.test", Hostname: "go", Zone: "sh0001", Env: "pre", Status: 1} 274 | m2 := model.NewInstance(arg2) 275 | err2 := r.Register(m2, 0) 276 | So(err2, ShouldBeNil) 277 | }) 278 | }() 279 | pollArg := &model.ArgPolls{Zone: "sh0001", Env: "pre", AppID: []string{"main.arch.test"}, LatestTimestamp: []int64{time.Now().UnixNano()}} 280 | ch, _, _, err := r.Polls(pollArg) 281 | So(err, ShouldResemble, ecode.NotModified) 282 | c := <-ch 283 | So(len(c[pollArg.AppID[0]].Instances["sh0001"]), ShouldResemble, 2) 284 | So(c[pollArg.AppID[0]].Instances, ShouldNotBeNil) 285 | So(len(c[pollArg.AppID[0]].Instances["sh0001"]), ShouldResemble, 2) 286 | }) 287 | } 288 | 289 | func BenchmarkBroadcast(b *testing.B) { 290 | for i := 0; i < b.N; i++ { 291 | var ( 292 | err error 293 | err2 error 294 | ch chan map[string]*model.InstanceInfo 295 | c map[string]*model.InstanceInfo 296 | ) 297 | r, _ := benchRegister(b) 298 | go func() { 299 | time.Sleep(time.Millisecond * 1) 300 | var arg2 = &model.ArgRegister{AppID: "main.arch.test", Hostname: "go", Zone: "sh0001", Env: "pre", Status: 1} 301 | m2 := model.NewInstance(arg2) 302 | if err2 = r.Register(m2, 0); err2 != nil { 303 | b.Errorf("Reigster(%v) error(%v)", m2.AppID, err2) 304 | } 305 | }() 306 | pollArg := &model.ArgPolls{Zone: "sh0001", Env: "pre", AppID: []string{"main.arch.test"}, LatestTimestamp: []int64{time.Now().UnixNano()}} 307 | if ch, _, _, err = r.Polls(pollArg); err != nil && err != ecode.NotModified { 308 | b.Errorf("Poll(%v) error(%v)", pollArg.AppID, err) 309 | } 310 | c = <-ch 311 | if len(c[pollArg.AppID[0]].Instances) != 2 { 312 | b.Errorf("Poll(%v) length error", pollArg.AppID) 313 | } 314 | if c[pollArg.AppID[0]].Instances == nil { 315 | b.Errorf("Poll(%v) zone instances nil error", pollArg.AppID) 316 | } 317 | if len(c[pollArg.AppID[0]].Instances["sh0001"]) != 2 { 318 | b.Errorf("Poll(%v) zone instances length error", pollArg.AppID) 319 | } 320 | } 321 | } 322 | 323 | func TestSet(t *testing.T) { 324 | i := model.NewInstance(reg) 325 | r := register(t, i) 326 | changes := &model.ArgSet{Zone: "sh0001", Env: "pre", AppID: "main.arch.test"} 327 | changes.Hostname = []string{"reg"} 328 | changes.Status = []int64{1} 329 | Convey("test setstatus to 1", t, func() { 330 | ok := r.Set(changes) 331 | So(ok, ShouldBeTrue) 332 | fetchArg := &model.ArgFetch{Zone: "sh0001", Env: "pre", AppID: "main.arch.test", Status: 3} 333 | c, err := r.Fetch(fetchArg.Zone, fetchArg.Env, fetchArg.AppID, 0, fetchArg.Status) 334 | So(err, ShouldBeNil) 335 | So(c.Instances["sh0001"][0].Status, ShouldResemble, uint32(1)) 336 | }) 337 | changes = &model.ArgSet{Zone: "sh0001", Env: "pre", AppID: "main.arch.test"} 338 | changes.Hostname = []string{"reg"} 339 | changes = &model.ArgSet{Zone: "sh0001", Env: "pre", AppID: "main.arch.test"} 340 | changes.Hostname = []string{"reg"} 341 | changes.Metadata = []string{`{"weight":"11"}`} 342 | Convey("test set metadata weight to 11", t, func() { 343 | ok := r.Set(changes) 344 | So(ok, ShouldBeTrue) 345 | fetchArg := &model.ArgFetch{Zone: "sh0001", Env: "pre", AppID: "main.arch.test", Status: 3} 346 | c, err := r.Fetch(fetchArg.Zone, fetchArg.Env, fetchArg.AppID, 0, fetchArg.Status) 347 | So(err, ShouldBeNil) 348 | So(c.Instances["sh0001"][0].Metadata["weight"], ShouldResemble, "11") 349 | }) 350 | i1 := model.NewInstance(regH1) 351 | _ = r.Register(i1, 0) 352 | changes = &model.ArgSet{Zone: "sh0001", Env: "pre", AppID: "main.arch.test"} 353 | changes.Hostname = []string{"reg", "regH1"} 354 | changes.Metadata = []string{`{"weight":"12"}`, `{"weight":"13"}`} 355 | Convey("test set multi instance's color to blue and metadata weight to 12", t, func() { 356 | ok := r.Set(changes) 357 | So(ok, ShouldBeTrue) 358 | fetchArg := &model.ArgFetch{Zone: "sh0001", Env: "pre", AppID: "main.arch.test", Status: 3} 359 | c, err := r.Fetch(fetchArg.Zone, fetchArg.Env, fetchArg.AppID, 0, fetchArg.Status) 360 | So(err, ShouldBeNil) 361 | for _, ins := range c.Instances["sh0001"] { 362 | if ins.Hostname == "reg" { 363 | So(ins.Metadata["weight"], ShouldResemble, "12") 364 | } else if ins.Hostname == "regH1" { 365 | So(ins.Metadata["weight"], ShouldResemble, "13") 366 | } 367 | } 368 | }) 369 | } 370 | 371 | func BenchmarkSet(b *testing.B) { 372 | b.RunParallel(func(pb *testing.PB) { 373 | for pb.Next() { 374 | var ( 375 | c *model.InstanceInfo 376 | err error 377 | ok bool 378 | ) 379 | r, _ := benchRegister(b) 380 | changes := &model.ArgSet{Zone: "sh0001", Env: "pre", AppID: "main.arch.test"} 381 | changes.Hostname = []string{"reg"} 382 | changes.Status = []int64{1} 383 | 384 | if ok = r.Set(changes); !ok { 385 | b.Errorf("SetStatus(%v) error", arg.AppID) 386 | } 387 | fetchArg := &model.ArgFetch{Zone: "sh0001", Env: "pre", AppID: "main.arch.test", Status: 3} 388 | if c, err = r.Fetch(fetchArg.Zone, fetchArg.Env, fetchArg.AppID, 0, fetchArg.Status); err != nil { 389 | b.Errorf("Fetch(%v) error(%v)", fetchArg.AppID, err) 390 | b.FailNow() 391 | } 392 | if c.Instances["sh0001"][0].Status != 1 { 393 | b.Errorf("SetStatus(%v) change error", fetchArg.AppID) 394 | } 395 | } 396 | }) 397 | } 398 | 399 | func TestResetExp(t *testing.T) { 400 | i := model.NewInstance(reg) 401 | r := register(t, i) 402 | Convey("test ResetExp", t, func() { 403 | r.resetExp() 404 | So(r.gd.expPerMin, ShouldResemble, int64(2)) 405 | }) 406 | } 407 | 408 | func benchCompareInstance(b *testing.B, src *model.Instance, i *model.Instance) { 409 | if src.AppID != i.AppID || src.Env != i.Env || src.Hostname != i.Hostname { 410 | b.Errorf("instance compare error") 411 | } 412 | } 413 | 414 | func register(t *testing.T, is ...*model.Instance) (r *Registry) { 415 | Convey("test register", t, func() { 416 | r = NewRegistry(&conf.Config{}) 417 | var num int 418 | for _, i := range is { 419 | err := r.Register(i, 0) 420 | So(err, ShouldBeNil) 421 | if i.AppID == "main.arch.test" { 422 | num++ 423 | } 424 | } 425 | fetchArg := &model.ArgFetch{Zone: "sh0001", Env: "pre", AppID: "main.arch.test", Status: 3} 426 | instancesInfo, err := r.Fetch(fetchArg.Zone, fetchArg.Env, fetchArg.AppID, 0, fetchArg.Status) 427 | So(err, ShouldBeNil) 428 | So(len(instancesInfo.Instances["sh0001"]), ShouldResemble, num) 429 | }) 430 | return r 431 | } 432 | 433 | func benchRegister(b *testing.B) (r *Registry, i *model.Instance) { 434 | r = NewRegistry(&conf.Config{}) 435 | i = model.NewInstance(reg) 436 | if err := r.Register(i, 0); err != nil { 437 | b.Errorf("Reigster(%v) error(%v)", i.AppID, err) 438 | } 439 | return r, i 440 | } 441 | 442 | func TestEvict(t *testing.T) { 443 | Convey("test evict for protect", t, func() { 444 | r := NewRegistry(&conf.Config{}) 445 | m := model.NewInstance(reg) 446 | // promise the renewtime of instance is expire 447 | m.RenewTimestamp -= 100 448 | err := r.Register(m, 0) 449 | So(err, ShouldBeNil) 450 | // move up the statistics of heartbeat for evict 451 | r.gd.facLastMin = r.gd.facInMin 452 | r.evict() 453 | fetchArg := &model.ArgFetch{Zone: "sh0001", Env: "pre", AppID: "main.arch.test", Status: 3} 454 | c, err := r.Fetch(fetchArg.Zone, fetchArg.Env, fetchArg.AppID, 0, fetchArg.Status) 455 | So(err, ShouldBeNil) 456 | // protect 457 | So(len(c.Instances), ShouldResemble, 1) 458 | }) 459 | } 460 | 461 | func TestEvict2(t *testing.T) { 462 | Convey("test evict for cancel", t, func() { 463 | r := NewRegistry(&conf.Config{}) 464 | m := model.NewInstance(reg) 465 | err := r.Register(m, 0) 466 | So(err, ShouldBeNil) 467 | _, ok := r.Renew(arg) 468 | So(ok, ShouldBeTrue) 469 | // promise the renewtime of instance is expire 470 | m.RenewTimestamp -= int64(time.Second * 100) 471 | _ = r.Register(m, 0) 472 | // move up the statistics of heartbeat for evict 473 | r.gd.facLastMin = r.gd.facInMin 474 | r.evict() 475 | fetchArg := &model.ArgFetch{Zone: "sh0001", Env: "pre", AppID: "main.arch.test", Status: 1} 476 | _, err = r.Fetch(fetchArg.Zone, fetchArg.Env, fetchArg.AppID, 0, fetchArg.Status) 477 | So(err, ShouldResemble, ecode.NothingFound) 478 | }) 479 | } 480 | -------------------------------------------------------------------------------- /registry/scheduler.go: -------------------------------------------------------------------------------- 1 | package registry 2 | 3 | import ( 4 | "context" 5 | "strings" 6 | "sync" 7 | 8 | "github.com/bilibili/discovery/model" 9 | "github.com/go-kratos/kratos/pkg/conf/paladin" 10 | ) 11 | 12 | // Scheduler info. 13 | type scheduler struct { 14 | schedulers map[string]*model.Scheduler 15 | mutex sync.RWMutex 16 | r *Registry 17 | } 18 | 19 | func newScheduler(r *Registry) *scheduler { 20 | return &scheduler{ 21 | schedulers: make(map[string]*model.Scheduler), 22 | r: r, 23 | } 24 | } 25 | 26 | // Load load scheduler info. 27 | func (s *scheduler) Load() { 28 | for _, key := range paladin.Keys() { 29 | if !strings.HasSuffix(key, ".json") { 30 | continue 31 | } 32 | v := paladin.Get(key) 33 | content, err := v.String() 34 | if err != nil { 35 | return 36 | } 37 | sch := new(model.Scheduler) 38 | if err := sch.Set(content); err != nil { 39 | continue 40 | } 41 | s.schedulers[appsKey(sch.AppID, sch.Env)] = sch 42 | } 43 | } 44 | 45 | // Reload reload scheduler info. 46 | func (s *scheduler) Reload() { 47 | event := paladin.WatchEvent(context.Background()) 48 | for { 49 | e := <-event 50 | 51 | if !strings.HasSuffix(e.Key, ".json") { 52 | continue 53 | } 54 | sch := new(model.Scheduler) 55 | if err := sch.Set(e.Value); err != nil { 56 | continue 57 | } 58 | s.mutex.Lock() 59 | key := appsKey(sch.AppID, sch.Env) 60 | s.r.aLock.Lock() 61 | if a, ok := s.r.appm[key]; ok { 62 | a.UpdateLatest(0) 63 | } 64 | s.r.aLock.Unlock() 65 | s.schedulers[key] = sch 66 | s.mutex.Unlock() 67 | s.r.broadcast(sch.Env, sch.AppID) 68 | } 69 | } 70 | 71 | // Get get scheduler info. 72 | func (s *scheduler) Get(appid, env string) *model.Scheduler { 73 | s.mutex.RLock() 74 | sch := s.schedulers[appsKey(appid, env)] 75 | s.mutex.RUnlock() 76 | return sch 77 | } 78 | --------------------------------------------------------------------------------