├── .codecov.yml ├── .gitignore ├── .gosimpleignore ├── .travis.yml ├── CHANGELOG.md ├── LICENSE ├── Makefile ├── README.md ├── anzi ├── anzi_test.go ├── callback.go ├── callback_test.go ├── config.go ├── dumps │ ├── dictionary.rdb │ ├── easily_compressible_string_key.rdb │ ├── empty_database.rdb │ ├── hash_as_ziplist.rdb │ ├── integer_keys.rdb │ ├── intset_16.rdb │ ├── intset_32.rdb │ ├── intset_64.rdb │ ├── keys_with_expiry.rdb │ ├── linkedlist.rdb │ ├── multiple_databases.rdb │ ├── non_ascii_values.rdb │ ├── parser_filters.rdb │ ├── rdb_version_5_with_checksum.rdb │ ├── rdb_version_8_with_64b_length_and_scores.rdb │ ├── redis_40_with_module.rdb │ ├── redis_50_with_streams.rdb │ ├── regular_set.rdb │ ├── regular_sorted_set.rdb │ ├── sorted_set_as_ziplist.rdb │ ├── uncompressible_string_keys.rdb │ ├── ziplist_that_compresses_easily.rdb │ ├── ziplist_that_doesnt_compress.rdb │ ├── ziplist_with_integers.rdb │ ├── zipmap_that_compresses_easily.rdb │ ├── zipmap_that_doesnt_compress.rdb │ └── zipmap_with_big_values.rdb ├── laf_test.go ├── lzf.go ├── proc.go ├── proc_test.go ├── rdb.go └── rdb_test.go ├── ci ├── README.md ├── fuzz │ ├── .gitignore │ ├── mcNodeConnRetirival │ │ ├── fuzz.go │ │ ├── gen │ │ │ └── main.go │ │ └── mock.go │ ├── mcparser │ │ ├── fuzz.go │ │ └── gen │ │ │ └── main.go │ └── redisparser │ │ ├── corpus │ │ ├── 0 │ │ ├── 1 │ │ ├── 2 │ │ └── 3 │ │ ├── gen │ │ └── main.go │ │ └── redisparser.go └── tests │ ├── proxy │ ├── proxy.go │ └── proxy_test.go │ └── reload │ ├── conf.d │ ├── .gitkeep │ ├── invalid │ │ ├── duplicate_name.conf │ │ ├── duplicate_port.conf │ │ ├── empty_server.conf │ │ ├── invalid_port.conf │ │ ├── invalid_weight.conf │ │ ├── more_alisa.conf │ │ ├── no_weight.conf │ │ └── some_has_alisa.conf │ ├── mc0.toml │ ├── mc1.toml │ ├── redis2.toml │ └── redis3.toml │ ├── example │ ├── .gitignore │ └── .gitkeep │ ├── realod.go │ └── reload_test.go ├── cmd ├── anzi │ ├── README.md │ ├── anzi.toml │ └── main.go ├── apicli │ └── main.go ├── apiserver │ ├── apiserver.toml │ └── main.go ├── balancer │ └── main.go ├── enri │ └── main.go ├── executor │ └── main.go ├── proxy │ ├── main.go │ ├── proxy-cluster-example.toml │ └── proxy-example.toml └── scheduler │ ├── main.go │ └── scheduler.toml ├── doc ├── cache_apiserver.md ├── cache_platform.md ├── chunk.text ├── deploy.md ├── images │ ├── appid-list.gif │ ├── cache-platform-arch.png │ ├── cluster-detail.png │ ├── create-cluster.gif │ ├── job-list.png │ ├── overlord.jpeg │ ├── overlord.png │ └── overlord_arch.png ├── wiki-cn │ ├── .gitignore │ ├── README.md │ ├── SUMMARY.md │ ├── enri.md │ ├── package-lock.json │ ├── platform-deploy.md │ ├── platform-usage.md │ ├── platform.md │ ├── platform │ │ ├── chunk.md │ │ ├── deploy-cluster.md │ │ ├── front-end.md │ │ ├── group.md │ │ ├── high-avaliable.md │ │ ├── recovery-policy.md │ │ ├── scale.md │ │ └── why-mesos.md │ ├── proxy-usage.md │ ├── proxy.md │ ├── proxy │ │ ├── benchmark.md │ │ ├── cmd.md │ │ ├── design.md │ │ └── features.md │ └── tools.md └── wiki │ └── overview.md ├── enri ├── cluster.go ├── cluster_test.go ├── cmd.go ├── manage.go ├── node.go └── util.go ├── go.mod ├── pkg ├── bufio │ ├── buffer.go │ ├── buffer_test.go │ ├── io.go │ └── io_test.go ├── container │ └── container.go ├── conv │ ├── conv.go │ └── conv_test.go ├── dir │ └── dir.go ├── etcd │ ├── etcd.go │ └── etcd_test.go ├── hashkit │ ├── crc.go │ ├── crc_test.go │ ├── fnv.go │ ├── hash.go │ ├── hash_methods_test.go │ ├── hash_test.go │ ├── hsieh.go │ ├── ketama.go │ ├── ketama_test.go │ ├── methods.go │ └── murmur.go ├── log │ ├── file.go │ ├── handler.go │ ├── log.go │ ├── log_test.go │ ├── stdout.go │ └── verbose.go ├── memcache │ └── conn.go ├── mockconn │ └── conn.go ├── myredis │ ├── cmd.go │ └── myredis.go ├── net │ ├── conn.go │ └── conn_test.go ├── proc │ └── proc.go ├── prom │ └── prom.go ├── systemd │ └── systemd.go └── types │ └── types.go ├── platform ├── CHANGELOG.md ├── api │ ├── dao │ │ ├── appid.go │ │ ├── cluster.go │ │ ├── dao.go │ │ ├── errors.go │ │ ├── instance.go │ │ ├── job.go │ │ └── spec.go │ ├── model │ │ ├── config.go │ │ ├── error.go │ │ ├── model.go │ │ └── types.go │ ├── server │ │ ├── appid.go │ │ ├── approve.go │ │ ├── cluster.go │ │ ├── command.go │ │ ├── group.go │ │ ├── instance.go │ │ ├── job.go │ │ ├── server.go │ │ ├── spec.go │ │ ├── tools.go │ │ └── version.go │ └── service │ │ ├── appid.go │ │ ├── cluster.go │ │ ├── command.go │ │ ├── group.go │ │ ├── instance.go │ │ ├── job.go │ │ ├── service.go │ │ ├── spec.go │ │ └── version.go ├── chunk │ ├── chunk.go │ ├── chunk_test.go │ ├── dist.go │ ├── dist_test.go │ ├── node.go │ ├── slot.go │ └── tools.go ├── job │ ├── balance │ │ └── exec.go │ ├── create │ │ ├── exec.go │ │ ├── job_test.go │ │ ├── server_cluster.go │ │ ├── server_singleton.go │ │ ├── tools.go │ │ ├── tpl.go │ │ └── types.go │ ├── job.go │ └── state.go └── mesos │ ├── executor.go │ ├── mesos.go │ ├── mesos_test.go │ ├── scheduler.go │ └── tools.go ├── proxy ├── CHANGELOG.md ├── config.go ├── config_test.go ├── forwarder.go ├── handler.go ├── listen.go ├── proto │ ├── memcache │ │ ├── binary │ │ │ ├── node_conn.go │ │ │ ├── node_conn_test.go │ │ │ ├── pinger.go │ │ │ ├── pinger_test.go │ │ │ ├── proxy_conn.go │ │ │ ├── proxy_conn_test.go │ │ │ ├── request.go │ │ │ └── request_test.go │ │ ├── node_conn.go │ │ ├── node_conn_test.go │ │ ├── pinger.go │ │ ├── pinger_test.go │ │ ├── proxy_conn.go │ │ ├── proxy_conn_test.go │ │ ├── request.go │ │ └── request_test.go │ ├── message.go │ ├── message_test.go │ ├── pipe.go │ ├── pipe_test.go │ ├── redis │ │ ├── cluster │ │ │ ├── cluster.go │ │ │ ├── fetch.go │ │ │ ├── node_conn.go │ │ │ ├── node_conn_test.go │ │ │ ├── proxy_conn.go │ │ │ ├── proxy_conn_test.go │ │ │ ├── slot.go │ │ │ └── slot_test.go │ │ ├── node_conn.go │ │ ├── node_conn_test.go │ │ ├── pinger.go │ │ ├── pinger_test.go │ │ ├── proxy_conn.go │ │ ├── proxy_conn_test.go │ │ ├── request.go │ │ ├── request_test.go │ │ ├── resp.go │ │ └── resp_test.go │ ├── slowlog.go │ └── types.go ├── proxy.go └── slowlog │ ├── file.go │ ├── http.go │ ├── slowlog.go │ └── slowlog_test.go ├── scripts ├── codecov.sh ├── fuzz_tools.py ├── init-ci-all.sh ├── install │ ├── install-mesos.sh │ ├── install_java.sh │ ├── install_zk.sh │ ├── mesos-agent.sh │ └── setupetcd.sh ├── memcache_multi_thread.py ├── run-fuzz.sh ├── validate_keys_dist.py └── validate_redis_features.py ├── version └── version.go └── web ├── .browserslistrc ├── .editorconfig ├── .eslintrc.js ├── .gitignore ├── README.md ├── babel.config.js ├── package.json ├── postcss.config.js ├── public ├── favicon.ico └── index.html ├── src ├── App.vue ├── assets │ └── Starbounder-2.otf ├── constants │ └── CREATE_TYPES.js ├── http │ ├── api.js │ ├── config.js │ └── service.js ├── layout │ ├── Header.vue │ └── SideBar.vue ├── main.js ├── router.js ├── store │ ├── index.js │ ├── modules │ │ ├── cluster.js │ │ └── job.js │ └── mutation-types.js ├── style │ ├── element-custom.scss │ ├── element-variables.scss │ ├── mixin.scss │ └── reset.scss └── views │ ├── AddCluster.vue │ ├── AppId.vue │ ├── Cluster.vue │ ├── Home.vue │ └── Job.vue ├── vue.config.js └── yarn.lock /.codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | project: on 4 | patch: off 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.dll 4 | *.so 5 | *.dylib 6 | *.idea 7 | 8 | # Test binary, build with `go test -c` 9 | *.test 10 | *.prof 11 | 12 | # Output of the go coverage tool, specifically when used with LiteIDE 13 | *.out 14 | 15 | # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 16 | .glide/ 17 | 18 | cmd/proxy/proxy 19 | cmd/executor/executor 20 | cmd/apiserver/apiserver 21 | cmd/scheduler/scheduler 22 | cmd/balancer/balancer 23 | cmd/apicli/apicli 24 | cmd/anzi/anzi 25 | cmd/enri/enri 26 | coverage.txt 27 | go.sum 28 | -------------------------------------------------------------------------------- /.gosimpleignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/.gosimpleignore -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - 1.12 5 | 6 | # Only clone the most recent commit. 7 | git: 8 | depth: 1 9 | 10 | # Force-enable Go modules. This will be unnecessary when Go 1.12 lands. 11 | env: 12 | - GO111MODULE=on 13 | 14 | # Skip the install step. Don't `go get` dependencies. Only build with the code 15 | # in vendor/ 16 | install: true 17 | 18 | services: 19 | - memcached 20 | - redis 21 | - docker 22 | 23 | # Anything in before_script that returns a nonzero exit code will flunk the 24 | # build and immediately stop. It's sorta like having set -e enabled in bash. 25 | # Make sure golangci-lint is vendored. 26 | before_script: 27 | - curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b $GOPATH/bin 28 | 29 | install: 30 | - bash -x scripts/init-ci-all.sh 31 | 32 | script: 33 | - ./scripts/codecov.sh 34 | - make build 35 | 36 | after_success: 37 | - bash <(curl -s https://codecov.io/bash) 38 | - golangci-lint run # run a bunch of code checkers/linters in parallel 39 | 40 | after_script: 41 | - docker stop redis-cluster 42 | - docker rm redis-cluster 43 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Overlord 2 | 3 | # proxy 4 | 5 | Latest Version 1.5.5 [history](proxy/CHANGELOG.md) 6 | 7 | # platform 8 | 9 | Latest Version 2.0.0 [history](platform/CHANGELOG.md) 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Felix Hao 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | export GO111MODULE=on 2 | 3 | test: 4 | ./scripts/codecov.sh 5 | 6 | build: 7 | cd cmd/apicli && go build && cd - 8 | cd cmd/apiserver && go build && cd - 9 | cd cmd/balancer && go build && cd - 10 | cd cmd/executor && go build && cd - 11 | cd cmd/proxy && go build && cd - 12 | cd cmd/scheduler && go build && cd - 13 | cd cmd/anzi && go build && cd - 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Overlord 2 | [![Build Status](https://travis-ci.org/bilibili/overlord.svg?branch=master)](https://travis-ci.org/bilibili/overlord) 3 | [![GoDoc](http://godoc.org/github.com/bilibili/overlord/proxy?status.svg)](http://godoc.org/github.com/bilibili/overlord/proxy) 4 | [![codecov](https://codecov.io/gh/bilibili/overlord/branch/master/graph/badge.svg)](https://codecov.io/gh/bilibili/overlord) 5 | [![Go Report Card](https://goreportcard.com/badge/github.com/bilibili/overlord)](https://goreportcard.com/report/github.com/bilibili/overlord) 6 | 7 | Overlord是[哔哩哔哩](https://www.bilibili.com)基于Go语言编写的memcache和redis&cluster的代理及集群管理功能,致力于提供自动化高可用的缓存服务解决方案。主要包括以下组件: 8 | 9 | * [proxy](doc/wiki-cn/proxy-usage.md):轻量高可用的缓存代理模块,支持memcache和redis的代理,相当于twemproxy,不同在于支持redis-cluster及能将自己伪装为cluster模式。 10 | * [platform](doc/wiki-cn/platform-deploy.md):包含apiserver、mesos framework&executor、集群节点任务管理job等。 11 | * [GUI](doc/wiki-cn/platform-usage.md):web管理界面,通过dashboard可视化方便用于集群管理,包括创建删除、扩缩容、加减节点等。 12 | * [anzi](doc/wiki-cn/tools.md):redis-cluster的数据同步工具,可服务化与apiserver进行配合工作。 13 | * [enri](doc/wiki-cn/tools.md):redis-cluster的集群管理工具,可灵活的创建集群、迁移slot等。 14 | 15 | Overlord已被哔哩哔哩用于生产环境。 16 | 17 | ## Document 18 | [简体中文](doc/wiki-cn/SUMMARY.md) 19 | 20 | ## GUI 21 | 22 | ![GUI](doc/images/create-cluster.gif) 23 | 24 | ## Architecture 25 | 26 | ![architecture](doc/images/overlord_arch.png) 27 | 28 | ## Cache-Platform 29 | 30 | 基于mesos&etcd提供了自动化的缓存节点管理平台 31 | 32 | ![cache-platform Architecture](doc/images/cache-platform-arch.png) 33 | 34 | ------------- 35 | 36 | *Please report bugs, concerns, suggestions by issues, or join QQ-group 716486124 to discuss problems around source code.* 37 | -------------------------------------------------------------------------------- /anzi/anzi_test.go: -------------------------------------------------------------------------------- 1 | package anzi 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "net" 7 | "sync/atomic" 8 | "time" 9 | ) 10 | 11 | const ( 12 | stateClosed = 1 13 | stateOpening = 0 14 | ) 15 | 16 | type mockAddr string 17 | 18 | func (m mockAddr) Network() string { 19 | return "tcp" 20 | } 21 | func (m mockAddr) String() string { 22 | return string(m) 23 | } 24 | 25 | type mockConn struct { 26 | addr mockAddr 27 | rbuf *bytes.Buffer 28 | wbuf *bytes.Buffer 29 | data []byte 30 | repeat int 31 | err error 32 | closed int32 33 | } 34 | 35 | func (m *mockConn) Read(b []byte) (n int, err error) { 36 | if atomic.LoadInt32(&m.closed) == stateClosed { 37 | return 0, io.EOF 38 | } 39 | if m.err != nil { 40 | err = m.err 41 | return 42 | } 43 | if m.repeat > 0 { 44 | m.rbuf.Write(m.data) 45 | m.repeat-- 46 | } 47 | return m.rbuf.Read(b) 48 | } 49 | 50 | func (m *mockConn) Write(b []byte) (n int, err error) { 51 | if atomic.LoadInt32(&m.closed) == stateClosed { 52 | return 0, io.EOF 53 | } 54 | 55 | if m.err != nil { 56 | err = m.err 57 | return 58 | } 59 | return m.wbuf.Write(b) 60 | } 61 | 62 | // writeBuffers impl the net.buffersWriter to support writev 63 | func (m *mockConn) writeBuffers(buf *net.Buffers) (int64, error) { 64 | if m.err != nil { 65 | return 0, m.err 66 | } 67 | return buf.WriteTo(m.wbuf) 68 | } 69 | 70 | func (m *mockConn) Close() error { 71 | atomic.StoreInt32(&m.closed, stateClosed) 72 | return nil 73 | } 74 | func (m *mockConn) LocalAddr() net.Addr { return m.addr } 75 | func (m *mockConn) RemoteAddr() net.Addr { return m.addr } 76 | 77 | func (m *mockConn) SetDeadline(t time.Time) error { return nil } 78 | func (m *mockConn) SetReadDeadline(t time.Time) error { return nil } 79 | func (m *mockConn) SetWriteDeadline(t time.Time) error { return nil } 80 | 81 | // _createConn is useful tools for handler test 82 | func _createConn(data []byte) net.Conn { 83 | return _createRepeatConn(data, 1) 84 | } 85 | 86 | func _createRepeatConn(data []byte, r int) net.Conn { 87 | mconn := &mockConn{ 88 | addr: "127.0.0.1:12345", 89 | rbuf: bytes.NewBuffer(nil), 90 | wbuf: new(bytes.Buffer), 91 | data: data, 92 | repeat: r, 93 | } 94 | return mconn 95 | } 96 | 97 | func _createDownStreamConn() (net.Conn, *bytes.Buffer) { 98 | buf := new(bytes.Buffer) 99 | mconn := &mockConn{ 100 | addr: "127.0.0.1:12345", 101 | wbuf: buf, 102 | } 103 | return mconn, buf 104 | } 105 | -------------------------------------------------------------------------------- /anzi/callback_test.go: -------------------------------------------------------------------------------- 1 | package anzi 2 | 3 | import ( 4 | "bufio" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestCallback(t *testing.T) { 11 | for _, rname := range allRdbs { 12 | t.Run(rname, func(tt *testing.T) { 13 | buf, err := _loadRDB(rname + ".rdb") 14 | assert.NoError(tt, err, "should load db ok") 15 | cb := NewProtocolCallbacker("127.0.0.1:6379") 16 | rdb := NewRDB(bufio.NewReader(buf), cb) 17 | err = rdb.bgSyncProc() 18 | assert.NoError(tt, err) 19 | }) 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /anzi/config.go: -------------------------------------------------------------------------------- 1 | package anzi 2 | 3 | import ( 4 | "runtime" 5 | 6 | "overlord/pkg/log" 7 | "overlord/proxy" 8 | ) 9 | 10 | // Config is the struct which used by cmd/anzi 11 | type Config struct { 12 | *log.Config 13 | Migrate *MigrateConfig `toml:"migrate"` 14 | } 15 | 16 | // MigrateConfig is the config file which nedd to read/write into target dir. 17 | type MigrateConfig struct { 18 | From []*proxy.ClusterConfig `toml:"from"` 19 | To *proxy.ClusterConfig `toml:"to"` 20 | MaxRDBConcurrency int `toml:"max_rdb_concurrency"` 21 | } 22 | 23 | // SetDefault migrate config 24 | func (m *MigrateConfig) SetDefault() { 25 | if m.MaxRDBConcurrency == 0 { 26 | m.MaxRDBConcurrency = runtime.NumCPU() 27 | } 28 | for _, from := range m.From { 29 | from.SetDefault() 30 | } 31 | m.To.SetDefault() 32 | } 33 | -------------------------------------------------------------------------------- /anzi/dumps/dictionary.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/dictionary.rdb -------------------------------------------------------------------------------- /anzi/dumps/easily_compressible_string_key.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/easily_compressible_string_key.rdb -------------------------------------------------------------------------------- /anzi/dumps/empty_database.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/empty_database.rdb -------------------------------------------------------------------------------- /anzi/dumps/hash_as_ziplist.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/hash_as_ziplist.rdb -------------------------------------------------------------------------------- /anzi/dumps/integer_keys.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/integer_keys.rdb -------------------------------------------------------------------------------- /anzi/dumps/intset_16.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/intset_16.rdb -------------------------------------------------------------------------------- /anzi/dumps/intset_32.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/intset_32.rdb -------------------------------------------------------------------------------- /anzi/dumps/intset_64.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/intset_64.rdb -------------------------------------------------------------------------------- /anzi/dumps/keys_with_expiry.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/keys_with_expiry.rdb -------------------------------------------------------------------------------- /anzi/dumps/linkedlist.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/linkedlist.rdb -------------------------------------------------------------------------------- /anzi/dumps/multiple_databases.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/multiple_databases.rdb -------------------------------------------------------------------------------- /anzi/dumps/non_ascii_values.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/non_ascii_values.rdb -------------------------------------------------------------------------------- /anzi/dumps/parser_filters.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/parser_filters.rdb -------------------------------------------------------------------------------- /anzi/dumps/rdb_version_5_with_checksum.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/rdb_version_5_with_checksum.rdb -------------------------------------------------------------------------------- /anzi/dumps/rdb_version_8_with_64b_length_and_scores.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/rdb_version_8_with_64b_length_and_scores.rdb -------------------------------------------------------------------------------- /anzi/dumps/redis_40_with_module.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/redis_40_with_module.rdb -------------------------------------------------------------------------------- /anzi/dumps/redis_50_with_streams.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/redis_50_with_streams.rdb -------------------------------------------------------------------------------- /anzi/dumps/regular_set.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/regular_set.rdb -------------------------------------------------------------------------------- /anzi/dumps/regular_sorted_set.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/regular_sorted_set.rdb -------------------------------------------------------------------------------- /anzi/dumps/sorted_set_as_ziplist.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/sorted_set_as_ziplist.rdb -------------------------------------------------------------------------------- /anzi/dumps/uncompressible_string_keys.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/uncompressible_string_keys.rdb -------------------------------------------------------------------------------- /anzi/dumps/ziplist_that_compresses_easily.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/ziplist_that_compresses_easily.rdb -------------------------------------------------------------------------------- /anzi/dumps/ziplist_that_doesnt_compress.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/ziplist_that_doesnt_compress.rdb -------------------------------------------------------------------------------- /anzi/dumps/ziplist_with_integers.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/ziplist_with_integers.rdb -------------------------------------------------------------------------------- /anzi/dumps/zipmap_that_compresses_easily.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/zipmap_that_compresses_easily.rdb -------------------------------------------------------------------------------- /anzi/dumps/zipmap_that_doesnt_compress.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/zipmap_that_doesnt_compress.rdb -------------------------------------------------------------------------------- /anzi/dumps/zipmap_with_big_values.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/anzi/dumps/zipmap_with_big_values.rdb -------------------------------------------------------------------------------- /anzi/laf_test.go: -------------------------------------------------------------------------------- 1 | package anzi 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestLzfDecompressOk(t *testing.T) { 11 | data := []byte{1, 97, 97, 224, 246, 0, 1, 97, 97} 12 | ulen := int64(259) 13 | 14 | output := lzfDecompress(data, ulen) 15 | expected := strings.Repeat("a", int(ulen)) 16 | assert.Equal(t, expected, string(output)) 17 | } 18 | 19 | func TestLzfDecompressNoData(t *testing.T) { 20 | output := lzfDecompress([]byte{}, 0) 21 | assert.Len(t, output, 0) 22 | } 23 | -------------------------------------------------------------------------------- /anzi/lzf.go: -------------------------------------------------------------------------------- 1 | package anzi 2 | 3 | // COPY from https://github.com/vrischmann/rdbtools/blob/master/lzf.go 4 | func lzfDecompress(data []byte, ulen int64) []byte { 5 | output := make([]byte, ulen) 6 | 7 | if len(data) <= 0 { 8 | return output 9 | } 10 | 11 | i := uint32(0) 12 | o := uint32(0) 13 | 14 | for i < uint32(len(data)) { 15 | var ctrl = uint32(data[i]) 16 | i++ 17 | if ctrl < 32 { 18 | copy(output[o:], data[i:i+ctrl+1]) 19 | i += ctrl + 1 20 | o += ctrl + 1 21 | } else { 22 | var length = uint32(ctrl >> 5) 23 | if length == 7 { 24 | length += uint32(data[i]) 25 | i++ 26 | } 27 | 28 | var ref = uint32(o) - uint32(ctrl&0x1F<<8) - uint32(data[i]) - 1 29 | 30 | i++ 31 | for j := uint32(0); j < length+2; j++ { 32 | output[o] = output[ref] 33 | ref++ 34 | o++ 35 | } 36 | } 37 | } 38 | 39 | return output 40 | } 41 | -------------------------------------------------------------------------------- /anzi/proc_test.go: -------------------------------------------------------------------------------- 1 | package anzi 2 | 3 | import ( 4 | "io" 5 | "testing" 6 | "time" 7 | 8 | "bufio" 9 | "bytes" 10 | 11 | "overlord/pkg/mockconn" 12 | 13 | "github.com/stretchr/testify/assert" 14 | ) 15 | 16 | func TestGetStrLenOk(t *testing.T) { 17 | assert.Equal(t, 2, getStrLen(10)) 18 | assert.Equal(t, 3, getStrLen(999)) 19 | assert.Equal(t, 5, getStrLen(99999)) 20 | assert.Equal(t, 4, getStrLen(1024)) 21 | assert.Equal(t, 4, getStrLen(1000)) 22 | } 23 | 24 | func TestSyncRDBCmdOk(t *testing.T) { 25 | buf := bytes.NewBuffer(nil) 26 | 27 | inst := &Instance{ 28 | br: bufio.NewReader(buf), 29 | bw: bufio.NewWriter(buf), 30 | offset: int64(120), 31 | } 32 | inst.replAckConf() 33 | data := make([]byte, 36) 34 | size, err := io.ReadFull(buf, data) 35 | assert.Equal(t, 36, size) 36 | assert.NoError(t, err) 37 | } 38 | 39 | var longData = `"I am the Bone of my Sword 40 | Steel is my Body and Fire is my Blood. 41 | I have created over a Thousand Blades, 42 | Unknown to Death, 43 | Nor known to Life. 44 | Have withstood Pain to create many Weapons 45 | Yet those Hands will never hold Anything. 46 | So, as I Pray-- 47 | Unlimited Blade Works"` 48 | 49 | func TestWriteAllOk(t *testing.T) { 50 | buf := bytes.NewBuffer(nil) 51 | err := writeAll([]byte(longData), buf) 52 | assert.NoError(t, err) 53 | assert.Equal(t, len(longData), buf.Len()) 54 | } 55 | 56 | func TestCmdForwrad(t *testing.T) { 57 | tconn := mockconn.CreateConn([]byte(longData), 1) 58 | conn := mockconn.CreateConn([]byte(longData), 10) 59 | inst := &Instance{ 60 | tconn: tconn, 61 | 62 | conn: conn, 63 | br: bufio.NewReader(conn), 64 | bw: bufio.NewWriter(conn), 65 | } 66 | 67 | go inst.cmdForward() 68 | time.Sleep(time.Millisecond * 10) 69 | inst.Close() 70 | } 71 | 72 | func TestParsePSyncReply(t *testing.T) { 73 | data := []byte("+fullsync 0123456789012345678901234567890123456789 7788\r\n") 74 | inst := &Instance{} 75 | err := inst.parsePSyncReply(data) 76 | assert.NoError(t, err) 77 | assert.Equal(t, "0123456789012345678901234567890123456789", inst.masterID) 78 | assert.Equal(t, int64(7788), inst.offset) 79 | } 80 | -------------------------------------------------------------------------------- /ci/README.md: -------------------------------------------------------------------------------- 1 | # overlord 集成测试工具集 2 | 3 | ## 确定性测试 4 | 5 | ### redis-cluster 6 | 7 | 1. 测试发送随机长度,随机的命令的 8 | 2. 测试发送不支持的命令的时候的结果 9 | 10 | ### proxy 模式测试 11 | 12 | 1. 测试发送单命令、多命令、超长命令的情况 13 | 2. 测试发送不支持的命令的测试结果。 14 | 3. 测试后端断开连接的时候的结果 15 | 4. 分别测试 request 模式下的三种协议支持 16 | 17 | ## 基准测试 18 | 19 | 测试各种基本(get/set)命令的 benchmark 。 20 | 21 | ## FUZZ测试 22 | 23 | 利用 go-fuzz 库开启测试 24 | -------------------------------------------------------------------------------- /ci/fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | */corpus/* 2 | */crashers/* 3 | */suppressions/* 4 | */*.zip -------------------------------------------------------------------------------- /ci/fuzz/mcNodeConnRetirival/fuzz.go: -------------------------------------------------------------------------------- 1 | package mcNodeConnRetrival 2 | 3 | import ( 4 | "overlord/proxy/proto" 5 | "overlord/proxy/proto/memcache" 6 | ) 7 | 8 | func Fuzz(data []byte) int { 9 | msg := proto.GetMsgs(1, 1)[0] 10 | nc := memcache.NewNodeConnWithLibConn("test-mc", "127.0.0.1", _createLibConn(data)) 11 | 12 | memcache.WithReq(msg, memcache.RequestTypeGet, []byte("1824"), []byte("\r\n")) 13 | if err := nc.Read(msg); err != nil { 14 | // assert.EqualError(t, errors.Cause(err), "read error") 15 | return -1 16 | } 17 | return 0 18 | } 19 | -------------------------------------------------------------------------------- /ci/fuzz/mcNodeConnRetirival/gen/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/dvyukov/go-fuzz/gen" 5 | ) 6 | 7 | var zdata = []string{ 8 | "VALUE a 0 2\r\nab\r\n", 9 | } 10 | 11 | func main() { 12 | for _, data := range zdata { 13 | gen.Emit([]byte(data), nil, true) 14 | } 15 | 16 | bodySize := 1048576 17 | head := []byte("VALUE a 1 0 1048576\r\n") 18 | tail := "\r\nEND\r\n" 19 | 20 | data := []byte{} 21 | for i := 0; i < 3; i++ { 22 | data = append(data, head...) 23 | data = append(data, make([]byte, bodySize)...) 24 | data = append(data, tail...) 25 | } 26 | 27 | gen.Emit(data, nil, true) 28 | } 29 | -------------------------------------------------------------------------------- /ci/fuzz/mcNodeConnRetirival/mock.go: -------------------------------------------------------------------------------- 1 | package mcNodeConnRetrival 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "net" 7 | libnet "overlord/pkg/net" 8 | "sync/atomic" 9 | "time" 10 | ) 11 | 12 | const ( 13 | stateClosed = 1 14 | stateOpening = 0 15 | ) 16 | 17 | type mockAddr string 18 | 19 | func (m mockAddr) Network() string { 20 | return "tcp" 21 | } 22 | func (m mockAddr) String() string { 23 | return string(m) 24 | } 25 | 26 | type mockConn struct { 27 | addr mockAddr 28 | buf *bytes.Buffer 29 | err error 30 | closed int32 31 | } 32 | 33 | func (m *mockConn) Read(b []byte) (n int, err error) { 34 | if atomic.LoadInt32(&m.closed) == stateClosed { 35 | return 0, io.EOF 36 | } 37 | if m.err != nil { 38 | err = m.err 39 | return 40 | } 41 | return m.buf.Read(b) 42 | } 43 | 44 | func (m *mockConn) Write(b []byte) (n int, err error) { 45 | if atomic.LoadInt32(&m.closed) == stateClosed { 46 | return 0, io.EOF 47 | } 48 | 49 | if m.err != nil { 50 | err = m.err 51 | return 52 | } 53 | return m.buf.Write(b) 54 | } 55 | 56 | // writeBuffers impl the net.buffersWriter to support writev 57 | func (m *mockConn) writeBuffers(buf *net.Buffers) (int64, error) { 58 | if m.err != nil { 59 | return 0, m.err 60 | } 61 | return buf.WriteTo(m.buf) 62 | } 63 | 64 | func (m *mockConn) Close() error { 65 | atomic.StoreInt32(&m.closed, stateClosed) 66 | return nil 67 | } 68 | 69 | func (m *mockConn) LocalAddr() net.Addr { return m.addr } 70 | func (m *mockConn) RemoteAddr() net.Addr { return m.addr } 71 | 72 | func (m *mockConn) SetDeadline(t time.Time) error { return nil } 73 | func (m *mockConn) SetReadDeadline(t time.Time) error { return nil } 74 | func (m *mockConn) SetWriteDeadline(t time.Time) error { return nil } 75 | 76 | // _createConn is useful tools for handler test 77 | func _createConn(data []byte) net.Conn { 78 | mconn := &mockConn{ 79 | addr: "127.0.0.1:12345", 80 | buf: bytes.NewBuffer(data), 81 | } 82 | return mconn 83 | } 84 | 85 | func _createLibConn(data []byte) *libnet.Conn { 86 | mconn := _createConn(data) 87 | return libnet.NewConn(mconn, time.Second, time.Second) 88 | } 89 | -------------------------------------------------------------------------------- /ci/fuzz/mcparser/gen/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/dvyukov/go-fuzz/gen" 5 | ) 6 | 7 | var zdata = []string{ 8 | `SET A 1 1 1\r\n1\r\n`, 9 | "GET A", 10 | } 11 | 12 | func main() { 13 | for _, data := range zdata { 14 | gen.Emit([]byte(data), nil, true) 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /ci/fuzz/redisparser/corpus/0: -------------------------------------------------------------------------------- 1 | *3 2 | $4 3 | MGET 4 | $4 5 | baka 6 | $4 7 | kaba 8 | *5 9 | $4 10 | MSET 11 | $1 12 | a 13 | $1 14 | b 15 | $3 16 | eee 17 | $5 18 | 12345 19 | *3 20 | $4 21 | MGET 22 | $4 23 | enen 24 | $4 25 | nime 26 | *2 27 | $3 28 | GET 29 | $5 30 | abcde 31 | *3 32 | $3 33 | DEL 34 | $1 35 | a 36 | $1 37 | b 38 | -------------------------------------------------------------------------------- /ci/fuzz/redisparser/corpus/1: -------------------------------------------------------------------------------- 1 | *3 2 | $5 3 | SETNX 4 | $1 5 | a 6 | $10 7 | abcdeabcde 8 | -------------------------------------------------------------------------------- /ci/fuzz/redisparser/corpus/2: -------------------------------------------------------------------------------- 1 | set a b 2 | -------------------------------------------------------------------------------- /ci/fuzz/redisparser/corpus/3: -------------------------------------------------------------------------------- 1 | get a 2 | -------------------------------------------------------------------------------- /ci/fuzz/redisparser/gen/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/dvyukov/go-fuzz/gen" 5 | ) 6 | 7 | var zdata = []string{ 8 | "*3\r\n$4\r\nMGET\r\n$4\r\nbaka\r\n$4\r\nkaba\r\n*5\r\n$4\r\nMSET\r\n$1\r\na\r\n$1\r\nb\r\n$3\r\neee\r\n$5\r\n12345\r\n*3\r\n$4\r\nMGET\r\n$4\r\nenen\r\n$4\r\nnime\r\n*2\r\n$3\r\nGET\r\n$5\r\nabcde\r\n*3\r\n$3\r\nDEL\r\n$1\r\na\r\n$1\r\nb\r\n", 9 | "*3\r\n$5\r\nSETNX\r\n$1\r\na\r\n$10\r\nabcdeabcde\r\n", 10 | "set a b\r\n", 11 | "get a\r\n", 12 | } 13 | 14 | func main() { 15 | for _, data := range zdata { 16 | gen.Emit([]byte(data), nil, true) 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /ci/tests/proxy/proxy.go: -------------------------------------------------------------------------------- 1 | package proxy 2 | 3 | import ( 4 | "bufio" 5 | "context" 6 | "fmt" 7 | "io" 8 | "net" 9 | "testing" 10 | "time" 11 | 12 | "github.com/stretchr/testify/assert" 13 | ) 14 | 15 | // ================== tools functions 16 | 17 | func _createTCPProxy(t *testing.T, dist, origin int64) (cancel context.CancelFunc) { 18 | ctx := context.Background() 19 | var sub context.Context 20 | sub, cancel = context.WithCancel(ctx) 21 | listen, err := net.Listen("tcp", fmt.Sprintf("0.0.0.0:%d", origin)) 22 | if !assert.NoError(t, err) { 23 | return 24 | } 25 | conn, err := net.Dial("tcp", fmt.Sprintf("127.0.0.1:%d", dist)) 26 | if !assert.NoError(t, err) { 27 | return 28 | } 29 | // defer conn.Close() 30 | 31 | go func() { 32 | for { 33 | select { 34 | case <-sub.Done(): 35 | return 36 | default: 37 | } 38 | 39 | sock, err := listen.Accept() 40 | assert.NoError(t, err) 41 | 42 | forward := func(rd io.Reader, wr io.Writer) { 43 | for { 44 | _, err := io.Copy(wr, rd) 45 | if !assert.NoError(t, err) { 46 | return 47 | } 48 | } 49 | } 50 | 51 | go forward(sock, conn) 52 | go forward(conn, sock) 53 | } 54 | }() 55 | return 56 | } 57 | 58 | func _execute(t *testing.T) (bs []byte) { 59 | conn, err := net.DialTimeout("tcp", "127.0.0.1:21221", time.Second) 60 | if err != nil { 61 | t.Errorf("dial fail: %s", err) 62 | return 63 | } 64 | 65 | br := bufio.NewReader(conn) 66 | cmd := []byte("SET a_11 0 0 1\r\n1\r\n") 67 | conn.SetWriteDeadline(time.Now().Add(time.Second)) 68 | if _, err = conn.Write(cmd); err != nil { 69 | t.Errorf("conn write cmd:%s error:%v", cmd, err) 70 | } 71 | conn.SetReadDeadline(time.Now().Add(time.Second)) 72 | if bs, err = br.ReadBytes('\n'); err != nil { 73 | t.Errorf("conn read cmd:%s error:%s resp:xxx%sxxx", cmd, err, bs) 74 | return 75 | } 76 | 77 | return 78 | } 79 | -------------------------------------------------------------------------------- /ci/tests/reload/conf.d/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/ci/tests/reload/conf.d/.gitkeep -------------------------------------------------------------------------------- /ci/tests/reload/conf.d/invalid/duplicate_name.conf: -------------------------------------------------------------------------------- 1 | [[clusters]] 2 | name = "standalone1" 3 | hash_method = "fnv1a_64" 4 | hash_distribution = "ketama" 5 | hash_tag = "" 6 | cache_type = "memcache" 7 | listen_proto = "tcp" 8 | listen_addr = "0.0.0.0:8509" 9 | redis_auth = "" 10 | dial_timeout = 1000 11 | read_timeout = 1000 12 | write_timeout = 1000 13 | node_connections = 1 14 | ping_fail_limit = 3 15 | ping_auto_eject = true 16 | close_front_conn_when_conf_change = false 17 | servers = [ 18 | "127.0.0.1:8209:2 mc1", 19 | ] 20 | [[clusters]] 21 | name = "standalone1" 22 | hash_method = "fnv1a_64" 23 | hash_distribution = "ketama" 24 | hash_tag = "" 25 | cache_type = "memcache" 26 | listen_proto = "tcp" 27 | listen_addr = "0.0.0.0:8809" 28 | redis_auth = "" 29 | dial_timeout = 1000 30 | read_timeout = 1000 31 | write_timeout = 1000 32 | node_connections = 1 33 | ping_fail_limit = 3 34 | ping_auto_eject = true 35 | close_front_conn_when_conf_change = false 36 | servers = [ 37 | "127.0.0.1:8209:2 mc1", 38 | ] 39 | -------------------------------------------------------------------------------- /ci/tests/reload/conf.d/invalid/duplicate_port.conf: -------------------------------------------------------------------------------- 1 | [[clusters]] 2 | name = "standalone1" 3 | hash_method = "fnv1a_64" 4 | hash_distribution = "ketama" 5 | hash_tag = "" 6 | cache_type = "memcache" 7 | listen_proto = "tcp" 8 | listen_addr = "0.0.0.0:8509" 9 | redis_auth = "" 10 | dial_timeout = 1000 11 | read_timeout = 1000 12 | write_timeout = 1000 13 | node_connections = 1 14 | ping_fail_limit = 3 15 | ping_auto_eject = true 16 | close_front_conn_when_conf_change = false 17 | servers = [ 18 | "127.0.0.1:8209:2 mc1", 19 | ] 20 | [[clusters]] 21 | name = "standalone2" 22 | hash_method = "fnv1a_64" 23 | hash_distribution = "ketama" 24 | hash_tag = "" 25 | cache_type = "memcache" 26 | listen_proto = "tcp" 27 | listen_addr = "0.0.0.0:8509" 28 | redis_auth = "" 29 | dial_timeout = 1000 30 | read_timeout = 1000 31 | write_timeout = 1000 32 | node_connections = 1 33 | ping_fail_limit = 3 34 | ping_auto_eject = true 35 | close_front_conn_when_conf_change = false 36 | servers = [ 37 | "127.0.0.1:8209:2 mc1", 38 | ] 39 | -------------------------------------------------------------------------------- /ci/tests/reload/conf.d/invalid/empty_server.conf: -------------------------------------------------------------------------------- 1 | [[clusters]] 2 | name = "standalone1" 3 | hash_method = "fnv1a_64" 4 | hash_distribution = "ketama" 5 | hash_tag = "" 6 | cache_type = "memcache" 7 | listen_proto = "tcp" 8 | listen_addr = "0.0.0.0:8109" 9 | redis_auth = "" 10 | dial_timeout = 1000 11 | read_timeout = 1000 12 | write_timeout = 1000 13 | node_connections = 1 14 | ping_fail_limit = 3 15 | ping_auto_eject = true 16 | close_front_conn_when_conf_change = false 17 | servers = [ 18 | ] 19 | -------------------------------------------------------------------------------- /ci/tests/reload/conf.d/invalid/invalid_port.conf: -------------------------------------------------------------------------------- 1 | [[clusters]] 2 | name = "standalone1" 3 | hash_method = "fnv1a_64" 4 | hash_distribution = "ketama" 5 | hash_tag = "" 6 | cache_type = "memcache" 7 | listen_proto = "tcp" 8 | listen_addr = "0.0.0.0:8109" 9 | redis_auth = "" 10 | dial_timeout = 1000 11 | read_timeout = 1000 12 | write_timeout = 1000 13 | node_connections = 1 14 | ping_fail_limit = 3 15 | ping_auto_eject = true 16 | close_front_conn_when_conf_change = false 17 | servers = [ 18 | "127.0.0.1:0:2 mc1", 19 | ] 20 | -------------------------------------------------------------------------------- /ci/tests/reload/conf.d/invalid/invalid_weight.conf: -------------------------------------------------------------------------------- 1 | [[clusters]] 2 | name = "standalone1" 3 | hash_method = "fnv1a_64" 4 | hash_distribution = "ketama" 5 | hash_tag = "" 6 | cache_type = "memcache" 7 | listen_proto = "tcp" 8 | listen_addr = "0.0.0.0:8109" 9 | redis_auth = "" 10 | dial_timeout = 1000 11 | read_timeout = 1000 12 | write_timeout = 1000 13 | node_connections = 1 14 | ping_fail_limit = 3 15 | ping_auto_eject = true 16 | close_front_conn_when_conf_change = false 17 | servers = [ 18 | "127.0.0.1:8209:-2 mc1", 19 | ] 20 | -------------------------------------------------------------------------------- /ci/tests/reload/conf.d/invalid/more_alisa.conf: -------------------------------------------------------------------------------- 1 | [[clusters]] 2 | name = "standalone1" 3 | hash_method = "fnv1a_64" 4 | hash_distribution = "ketama" 5 | hash_tag = "" 6 | cache_type = "memcache" 7 | listen_proto = "tcp" 8 | listen_addr = "0.0.0.0:8109" 9 | redis_auth = "" 10 | dial_timeout = 1000 11 | read_timeout = 1000 12 | write_timeout = 1000 13 | node_connections = 1 14 | ping_fail_limit = 3 15 | ping_auto_eject = true 16 | close_front_conn_when_conf_change = false 17 | servers = [ 18 | "127.0.0.1:8209:2 mc1 mc2", 19 | ] 20 | -------------------------------------------------------------------------------- /ci/tests/reload/conf.d/invalid/no_weight.conf: -------------------------------------------------------------------------------- 1 | [[clusters]] 2 | name = "standalone1" 3 | hash_method = "fnv1a_64" 4 | hash_distribution = "ketama" 5 | hash_tag = "" 6 | cache_type = "memcache" 7 | listen_proto = "tcp" 8 | listen_addr = "0.0.0.0:8109" 9 | redis_auth = "" 10 | dial_timeout = 1000 11 | read_timeout = 1000 12 | write_timeout = 1000 13 | node_connections = 1 14 | ping_fail_limit = 3 15 | ping_auto_eject = true 16 | close_front_conn_when_conf_change = false 17 | servers = [ 18 | "127.0.0.1:8209 mc1", 19 | ] 20 | -------------------------------------------------------------------------------- /ci/tests/reload/conf.d/invalid/some_has_alisa.conf: -------------------------------------------------------------------------------- 1 | [[clusters]] 2 | name = "standalone1" 3 | hash_method = "fnv1a_64" 4 | hash_distribution = "ketama" 5 | hash_tag = "" 6 | cache_type = "memcache" 7 | listen_proto = "tcp" 8 | listen_addr = "0.0.0.0:8109" 9 | redis_auth = "" 10 | dial_timeout = 1000 11 | read_timeout = 1000 12 | write_timeout = 1000 13 | node_connections = 1 14 | ping_fail_limit = 3 15 | ping_auto_eject = true 16 | close_front_conn_when_conf_change = false 17 | servers = [ 18 | "127.0.0.1:8209:2 mc1", 19 | "127.0.0.1:8210:2", 20 | ] 21 | -------------------------------------------------------------------------------- /ci/tests/reload/conf.d/mc0.toml: -------------------------------------------------------------------------------- 1 | [[clusters]] 2 | name = "reload_mc" 3 | hash_method = "fnv1a_64" 4 | hash_distribution = "ketama" 5 | hash_tag = "" 6 | cache_type = "memcache" 7 | listen_proto = "tcp" 8 | listen_addr = "0.0.0.0:20001" 9 | redis_auth = "" 10 | dial_timeout = 1000 11 | read_timeout = 1000 12 | write_timeout = 1000 13 | node_connections = 2 14 | ping_fail_limit = 3 15 | ping_auto_eject = true 16 | servers = [ 17 | "127.0.0.1:9101:1 mcExample", 18 | ] 19 | 20 | [[clusters]] 21 | name = "reload-redis" 22 | hash_method = "fnv1a_64" 23 | hash_distribution = "ketama" 24 | hash_tag = "" 25 | cache_type = "redis" 26 | listen_proto = "tcp" 27 | listen_addr = "0.0.0.0:26379" 28 | redis_auth = "" 29 | dial_timeout = 1000 30 | read_timeout = 1000 31 | write_timeout = 1000 32 | node_connections = 2 33 | ping_fail_limit = 3 34 | ping_auto_eject = false 35 | servers = [ 36 | "127.0.0.1:9001:1 redisExample", 37 | ] -------------------------------------------------------------------------------- /ci/tests/reload/conf.d/mc1.toml: -------------------------------------------------------------------------------- 1 | [[clusters]] 2 | name = "reload_mc" 3 | hash_method = "fnv1a_64" 4 | hash_distribution = "ketama" 5 | hash_tag = "" 6 | cache_type = "memcache" 7 | listen_proto = "tcp" 8 | listen_addr = "0.0.0.0:20001" 9 | redis_auth = "" 10 | dial_timeout = 1000 11 | read_timeout = 1000 12 | write_timeout = 1000 13 | node_connections = 2 14 | ping_fail_limit = 3 15 | ping_auto_eject = true 16 | servers = [ 17 | "127.0.0.1:9102:1 mcExample", 18 | ] 19 | 20 | [[clusters]] 21 | name = "reload-redis" 22 | hash_method = "fnv1a_64" 23 | hash_distribution = "ketama" 24 | hash_tag = "" 25 | cache_type = "redis" 26 | listen_proto = "tcp" 27 | listen_addr = "0.0.0.0:26379" 28 | redis_auth = "" 29 | dial_timeout = 1000 30 | read_timeout = 1000 31 | write_timeout = 1000 32 | node_connections = 2 33 | ping_fail_limit = 3 34 | ping_auto_eject = false 35 | servers = [ 36 | "127.0.0.1:9001:1 redisExample", 37 | ] 38 | -------------------------------------------------------------------------------- /ci/tests/reload/conf.d/redis2.toml: -------------------------------------------------------------------------------- 1 | [[clusters]] 2 | name = "reload_mc" 3 | hash_method = "fnv1a_64" 4 | hash_distribution = "ketama" 5 | hash_tag = "" 6 | cache_type = "memcache" 7 | listen_proto = "tcp" 8 | listen_addr = "0.0.0.0:20001" 9 | redis_auth = "" 10 | dial_timeout = 1000 11 | read_timeout = 1000 12 | write_timeout = 1000 13 | node_connections = 2 14 | ping_fail_limit = 3 15 | ping_auto_eject = true 16 | servers = [ 17 | "127.0.0.1:9101:1 mcExample", 18 | ] 19 | 20 | [[clusters]] 21 | name = "reload-redis" 22 | hash_method = "fnv1a_64" 23 | hash_distribution = "ketama" 24 | hash_tag = "" 25 | cache_type = "redis" 26 | listen_proto = "tcp" 27 | listen_addr = "0.0.0.0:26379" 28 | redis_auth = "" 29 | dial_timeout = 1000 30 | read_timeout = 1000 31 | write_timeout = 1000 32 | node_connections = 2 33 | ping_fail_limit = 3 34 | ping_auto_eject = false 35 | servers = [ 36 | "127.0.0.1:9001:1 redisExample", 37 | ] 38 | -------------------------------------------------------------------------------- /ci/tests/reload/conf.d/redis3.toml: -------------------------------------------------------------------------------- 1 | [[clusters]] 2 | name = "reload_mc" 3 | hash_method = "fnv1a_64" 4 | hash_distribution = "ketama" 5 | hash_tag = "" 6 | cache_type = "memcache" 7 | listen_proto = "tcp" 8 | listen_addr = "0.0.0.0:20001" 9 | redis_auth = "" 10 | dial_timeout = 1000 11 | read_timeout = 1000 12 | write_timeout = 1000 13 | node_connections = 2 14 | ping_fail_limit = 3 15 | ping_auto_eject = true 16 | servers = [ 17 | "127.0.0.1:9101:1 mcExample", 18 | ] 19 | 20 | [[clusters]] 21 | name = "reload-redis" 22 | hash_method = "fnv1a_64" 23 | hash_distribution = "ketama" 24 | hash_tag = "" 25 | cache_type = "redis" 26 | listen_proto = "tcp" 27 | listen_addr = "0.0.0.0:26379" 28 | redis_auth = "" 29 | dial_timeout = 1000 30 | read_timeout = 1000 31 | write_timeout = 1000 32 | node_connections = 2 33 | ping_fail_limit = 3 34 | ping_auto_eject = false 35 | servers = [ 36 | "127.0.0.1:9002:1 redisExample", 37 | ] -------------------------------------------------------------------------------- /ci/tests/reload/example/.gitignore: -------------------------------------------------------------------------------- 1 | *.toml -------------------------------------------------------------------------------- /ci/tests/reload/example/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/ci/tests/reload/example/.gitkeep -------------------------------------------------------------------------------- /ci/tests/reload/realod.go: -------------------------------------------------------------------------------- 1 | // Package reload is the test suits for reload config files. 2 | package reload 3 | 4 | import ( 5 | "io" 6 | "os" 7 | ) 8 | 9 | func cp(src, dst string) error { 10 | in, err := os.Open(src) 11 | if err != nil { 12 | return err 13 | } 14 | defer in.Close() 15 | 16 | out, err := os.Create(dst) 17 | if err != nil { 18 | return err 19 | } 20 | defer out.Close() 21 | 22 | _, err = io.Copy(out, in) 23 | if err != nil { 24 | return err 25 | } 26 | return nil 27 | } 28 | -------------------------------------------------------------------------------- /cmd/anzi/README.md: -------------------------------------------------------------------------------- 1 | # Anzi 2 | 3 | see doc in [Anzi](../../doc/wiki-cn/tools.md) 4 | -------------------------------------------------------------------------------- /cmd/anzi/anzi.toml: -------------------------------------------------------------------------------- 1 | log_vl = 10 2 | log = "" 3 | debug = true 4 | stdout = true 5 | 6 | [migrate] 7 | max_rdb_concurrency = 10 8 | 9 | [[migrate.from]] 10 | cache_type = "redis_cluster" 11 | dial_timeout = 1000 12 | servers = [ 13 | "127.0.0.1:7000", 14 | "127.0.0.1:7001", 15 | ] 16 | 17 | 18 | [[migrate.from]] 19 | cache_type = "redis_cluster" 20 | dial_timeout = 1000 21 | servers = [ 22 | "127.0.0.1:7002", 23 | "127.0.0.1:7003", 24 | ] 25 | 26 | [migrate.to] 27 | name = "simple-redis" 28 | hash_method = "fnv1a_64" 29 | hash_distribution = "ketama" 30 | hash_tag = "{}" 31 | cache_type = "redis" 32 | listen_proto = "tcp" 33 | listen_addr = "0.0.0.0:27001" 34 | servers = ["127.0.0.1:6379:1 redis-1"] 35 | -------------------------------------------------------------------------------- /cmd/anzi/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | 6 | "github.com/BurntSushi/toml" 7 | 8 | "overlord/anzi" 9 | "overlord/pkg/log" 10 | "overlord/version" 11 | ) 12 | 13 | var confPath string 14 | 15 | func main() { 16 | flag.StringVar(&confPath, "conf", "anzi.toml", "anzi config file") 17 | flag.Parse() 18 | if version.ShowVersion() { 19 | return 20 | } 21 | 22 | conf := new(anzi.Config) 23 | _, err := toml.DecodeFile(confPath, &conf) 24 | if err != nil { 25 | panic(err) 26 | } 27 | conf.Migrate.SetDefault() 28 | if log.Init(conf.Config) { 29 | defer log.Close() 30 | } 31 | log.Info("start anzi redis migrate data tool") 32 | 33 | proc := anzi.NewMigrateProc(conf.Migrate) 34 | proc.Migrate() 35 | } 36 | -------------------------------------------------------------------------------- /cmd/apiserver/apiserver.toml: -------------------------------------------------------------------------------- 1 | listen = "0.0.0.0:8880" 2 | etcd = "http://127.0.0.1:2379" 3 | log_vl = 10 4 | log = "info" 5 | debug = true 6 | stdout = true 7 | 8 | [monitor] #overlord集成普罗米修斯与grafana的参数 9 | url = "http://127.0.0.1:1234" 10 | panel = "overlord" 11 | name_var = "cluster" 12 | org_id = 1 13 | 14 | [groups] 15 | sh001 = "上海核心" 16 | bj001 = "北京核心" 17 | 18 | [[versions]] # 版本信息 19 | cache_type = "redis" 20 | versions = ["4.0.11", "3.2.8"] 21 | image = "docker.io/library/redis" 22 | 23 | [[versions]] 24 | cache_type = "redis_cluster" 25 | versions = ["4.0.11", "3.2.8", "5.0"] 26 | image = "docker.io/library/redis" 27 | 28 | [[versions]] 29 | cache_type = "memcache" 30 | versions = ["1.5.0"] 31 | 32 | [cluster] #集群默认配置:overlord-proxy专用配置 33 | dial_timeout = 1000 34 | read_timeout = 1000 35 | write_timeout = 1000 36 | node_connections = 2 37 | ping_fail_limit = 3 38 | ping_auto_eject = true 39 | -------------------------------------------------------------------------------- /cmd/apiserver/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | 6 | "github.com/BurntSushi/toml" 7 | 8 | "overlord/pkg/log" 9 | "overlord/platform/api/model" 10 | "overlord/platform/api/server" 11 | "overlord/platform/api/service" 12 | "overlord/version" 13 | ) 14 | 15 | var ( 16 | confPath string 17 | ) 18 | 19 | func main() { 20 | flag.StringVar(&confPath, "conf", "conf.toml", "scheduler conf") 21 | flag.Parse() 22 | 23 | if version.ShowVersion() { 24 | return 25 | } 26 | 27 | conf := new(model.ServerConfig) 28 | _, err := toml.DecodeFile(confPath, &conf) 29 | if err != nil { 30 | panic(err) 31 | } 32 | if log.Init(conf.Config) { 33 | defer log.Close() 34 | } 35 | svc := service.New(conf) 36 | server.Run(conf, svc) 37 | } 38 | -------------------------------------------------------------------------------- /cmd/balancer/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "strings" 6 | 7 | "overlord/pkg/etcd" 8 | "overlord/pkg/log" 9 | "overlord/platform/job/balance" 10 | "overlord/version" 11 | ) 12 | 13 | var ( 14 | cluster string 15 | db string 16 | ) 17 | 18 | func main() { 19 | flag.StringVar(&cluster, "cluster", "", "cluster name") 20 | flag.StringVar(&db, "db", "", "etcd dsn") 21 | flag.Parse() 22 | if version.ShowVersion() { 23 | return 24 | } 25 | 26 | log.InitHandle(log.NewStdHandler()) 27 | var etcdURL string 28 | if strings.HasPrefix(db, "http://") { 29 | etcdURL = db 30 | } else { 31 | etcdURL = "http://" + db 32 | } 33 | 34 | e, err := etcd.New(etcdURL) 35 | if err != nil { 36 | log.Errorf("balance fail to connect to etcd due %v", err) 37 | return 38 | } 39 | 40 | err = balance.Balance(cluster, e) 41 | if err != nil { 42 | log.Errorf("fail to init balance %s job due %v", cluster, err) 43 | return 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /cmd/enri/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "overlord/enri" 5 | ) 6 | 7 | func main() { 8 | enri.Run() 9 | } 10 | -------------------------------------------------------------------------------- /cmd/executor/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "flag" 6 | 7 | "overlord/pkg/log" 8 | "overlord/platform/mesos" 9 | "overlord/version" 10 | ) 11 | 12 | func main() { 13 | flag.Parse() 14 | if version.ShowVersion() { 15 | return 16 | } 17 | 18 | ec := mesos.New() 19 | log.InitHandle(log.NewStdHandler()) 20 | ec.Run(context.Background()) 21 | } 22 | -------------------------------------------------------------------------------- /cmd/proxy/proxy-example.toml: -------------------------------------------------------------------------------- 1 | ################################################## 2 | # # 3 | # Overlord # 4 | # a proxy based high performance # 5 | # Memcached&Redis solution # 6 | # written in Go # 7 | # # 8 | ################################################## 9 | pprof = "0.0.0.0:2110" 10 | debug = false 11 | log = "" 12 | log_lv = 0 13 | 14 | [proxy] 15 | # The read timeout value in msec that we wait for to receive a response from the client. By default, we wait indefinitely. 16 | read_timeout = 0 17 | # The write timeout value in msec that we wait for to write a response to the client. By default, we wait indefinitely. 18 | write_timeout = 0 19 | # proxy accept max connections from client. By default, we no limit. 20 | max_connections = 0 21 | # proxy support prometheus metrics. By default, we use it. 22 | use_metrics = true 23 | -------------------------------------------------------------------------------- /cmd/scheduler/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "time" 6 | 7 | "overlord/pkg/etcd" 8 | "overlord/pkg/log" 9 | "overlord/platform/mesos" 10 | "overlord/version" 11 | 12 | "github.com/BurntSushi/toml" 13 | ) 14 | 15 | var confPath string 16 | var defConf = &mesos.Config{ 17 | User: "root", 18 | Name: "test", 19 | Master: "127.0.0.1:5050", 20 | ExecutorURL: "http://127.0.0.1:8000/executor", 21 | DBEndPoint: "http://127.0.0.1:2379", 22 | Checkpoint: true, 23 | FailOver: mesos.Duration(time.Hour), 24 | } 25 | 26 | func main() { 27 | flag.StringVar(&confPath, "conf", "", "scheduler conf") 28 | flag.Parse() 29 | if version.ShowVersion() { 30 | return 31 | } 32 | 33 | conf := new(mesos.Config) 34 | if confPath != "" { 35 | _, err := toml.DecodeFile(confPath, &conf) 36 | if err != nil { 37 | panic(err) 38 | } 39 | } else { 40 | conf = defConf 41 | } 42 | if log.Init(conf.Config) { 43 | defer log.Close() 44 | } 45 | log.Infof("start framework with conf %v", conf) 46 | db, err := etcd.New(conf.DBEndPoint) 47 | if err != nil { 48 | panic(err) 49 | } 50 | log.Info("init etcd successful") 51 | sched := mesos.NewScheduler(conf, db) 52 | _ = sched.Run() 53 | } 54 | -------------------------------------------------------------------------------- /cmd/scheduler/scheduler.toml: -------------------------------------------------------------------------------- 1 | user = "root" 2 | name = "overlord" 3 | checkpoint = true 4 | master = "172.22.33.167:5050" 5 | executor_url = "http://172.22.33.198:20001/executor" 6 | db_end_point = "http://172.22.33.198:2379" 7 | fail_over = "72h" 8 | role=["sh001"] 9 | -------------------------------------------------------------------------------- /doc/deploy.md: -------------------------------------------------------------------------------- 1 | ## 部署说明 2 | 3 | ### 安装jdk 4 | mesos及zookeeper依赖java,必须使用jdk8以上 5 | [安装jdk8](../scripts/install/install_java.sh) 6 | 7 | ### 安装mesos 8 | 建议版本 v1.7.0,建议部署三master以保证高可用 9 | [安装步骤](http://mesos.apache.org/documentation/latest/building/) 参考官方说明 10 | **mesos构建较慢,建议构建一次后打包到其他机器减少其他机器构建时间** 11 | 12 | ### 安装etcd 13 | 用于存储部署信息及集群原信息,推荐版本v3.3.10 14 | 15 | ### 安装zookeeper 16 | 可选,如果mesos-master需要保证高可用,则为必选,推荐版本v3.4.12 17 | [安装zk集群](../scripts/install/install_zk.sh) 18 | ** 需要把ip_array替换为部署的机器列表地址** 19 | 20 | ### 安装ngnix 21 | 可选,用于作为文件服务器,提供redis memcache mesos-executor的二进制下载 22 | 23 | ### 部署scheduler 24 | #### build 25 | ```shell 26 | cd $GOPATH/src/overlord/cmd/scheduler 27 | go build 28 | ``` 29 | #### run 30 | ```shell 31 | ./scheduler -conf scheduler.toml 32 | ``` 33 | ### 部署executor 34 | #### build 35 | ``` 36 | cd $GOPATH/src/overlord/cmd/executor 37 | go build 38 | ``` -------------------------------------------------------------------------------- /doc/images/appid-list.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/doc/images/appid-list.gif -------------------------------------------------------------------------------- /doc/images/cache-platform-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/doc/images/cache-platform-arch.png -------------------------------------------------------------------------------- /doc/images/cluster-detail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/doc/images/cluster-detail.png -------------------------------------------------------------------------------- /doc/images/create-cluster.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/doc/images/create-cluster.gif -------------------------------------------------------------------------------- /doc/images/job-list.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/doc/images/job-list.png -------------------------------------------------------------------------------- /doc/images/overlord.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/doc/images/overlord.jpeg -------------------------------------------------------------------------------- /doc/images/overlord.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/doc/images/overlord.png -------------------------------------------------------------------------------- /doc/images/overlord_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/doc/images/overlord_arch.png -------------------------------------------------------------------------------- /doc/wiki-cn/.gitignore: -------------------------------------------------------------------------------- 1 | _book/ 2 | -------------------------------------------------------------------------------- /doc/wiki-cn/README.md: -------------------------------------------------------------------------------- 1 | # overlord 2 | 3 | overlord 是 [bilibili](www.bilibili.com) 提出的一套完整的缓存解决方案。旨在为初创型(startup)公司提供一套完整迅速的缓存解决方案。 4 | 5 | 它由一系列的组件组成,其中包括: 6 | 7 | 1. 一个轻量、快速、多功能的缓存代理组件:overlord-proxy。 8 | 2. 一个迅速、自愈、管理,负责部署缓存的平台:overlord-platform。 9 | 3. 一个简单的的命令行工具:enri。 10 | 4. 一个快速简单的redis数据导入导出工具: anzi。 11 | 12 | ## overlord-proxy 介绍 13 | 14 | overlord-proxy 的灵感来源自著名的缓存代理组件 twemproxy 和 corvus,在一致性 hash 的基础上,overlord-prxoy 同时支持了 redis-cluster 模式。同时支持了 memcache text/memcache binary/redis cluster/ redis单节点。你可以自由的选择 memcache、redis 甚至是 redis cluster 客户端连接到对应的端口即可。 15 | 16 | 我们为所有的 overlord-proxy 用户提供了一套默认配置,你可以直接在 'cmd/proxy' 目录里找到他。这个代理组件是如此的轻量,甚至于你可以脱离整个overlord体系,单独使用proxy做自己的缓存代理。 17 | 18 | 同时,proxy 目前正在计划进行下一步计划,我们将在 overlord-proxy 1.0 的基础上加上一些更为高级的功能:暖机、多级缓存、多写、先序路由、动态重载等。我们将在保证缓存代理基本功能的情况下,尽快的支持一些高级的使用方式。 19 | 20 | ## overlord-platform 21 | 22 | 在缓存运维的工作中,我们总结了过去的运维的经验,开发出了一套部署缓存的平台。它着重解决两个核心目标: 23 | 24 | 1. 如何快速的利用所有的池化的机器资源。 25 | 2. 如何保证集群在节点故障的时候的稳定性。 26 | 27 | 为此,我们使用了 [chunk算法](https://github.com/eleme/ruskit/pull/46)/dist 两大算法作为保证 redis-cluster 故障稳定性的基础,同时配合着 mesos 强大的 scale 能力。这样一来,overlord-platform 要做的事情其实就已经极少了。 28 | 29 | 我们的 overlord-platform 平台,支持以单例模式部署 memcache/redis ,同时也支持以 redis-cluster 模式部署 redis。其中,由于单例模式本身是无状态的,所以部署起来极其的简单。 30 | 31 | 但是,现行的 redis-cluster 创建方案却有着非常显著的缺陷:慢。 32 | 33 | 通常,我们为了创建一个 redis-cluster,要在各个机器上渲染配置文件,以 cluster 模式配置启动,最后还要拿 redis-trib 来创建整个完整的集群。其中, redis-trib 将会依次的给各个节点发送 `CLUSTER MEET` 请求,并进行握手,握手成功之后通过 gossip 互相更新协议。然而节点一多,O(n^2) 的复杂度,让节点数量达到一定数量级的集群,更难吃创建出来,尤其在600个主节点情况下,最差甚至能达到30分钟以上。 34 | 35 | 为此,我们使用了一种,模拟集群恢复的方法进行创建集群。同样是创建600个主节点,使用新方法仅仅需要10s的时间,速度提升了百倍不止。 36 | 37 | ## redis cluster集群管理工具:enri 38 | 39 | enri 是我们自己开发拓展的 redis cluster 管理工具,它将在命令行为运维人员提供一种简单快速的检查和管理集群的方式。它受 [ruskit](https://github.com/eleme/ruskit) 的启发,并且加入了许多其没有的功能,同时在性能上进行了改进。可以让治理集群的工作变得简单轻松。 40 | 41 | ## 数据迁移工具: anzi 42 | 43 | 受 [redis-migrate-tool](https://github.com/vipshop/redis-migrate-tool) 的启发,我们开发了自己的迁移工具。与原工具相比,新的迁移工具采用Go语言编写,同时将支持更加服务化的场景,可以同时创建和传输多个集群的数据。同时可以集成 platform,也就意味着用户可以更加方便的将集群数据在平台之间迁移。 44 | -------------------------------------------------------------------------------- /doc/wiki-cn/SUMMARY.md: -------------------------------------------------------------------------------- 1 | # Summary 2 | 3 | * [Introduction](README.md) 4 | * [overlord-platform 部署指南](platform-deploy.md) 5 | * [overlord-platform 使用指南](platform-usage.md) 6 | * [overlord-proxy 使用指南](proxy-usage.md) 7 | * [overlord-proxy](proxy.md) 8 | * [功能支持](proxy/features.md) 9 | * [性能测试](proxy/benchmark.md) 10 | * [设计](proxy/design.md) 11 | * [支持&不支持的命令列表](proxy/cmd.md) 12 | * [overlord-platform](platform.md) 13 | * [为什么选择mesos](platform/why-mesos.md) 14 | * [chunk算法本质](platform/chunk.md) 15 | * [资源分组](platform/group.md) 16 | * [高可用](platform/high-avaliable.md) 17 | * [集群部署](platform/deploy-cluster.md) 18 | * [集群伸缩](platform/scale.md) 19 | * [节点恢复策略](platform/recovery-policy.md) 20 | * [前端开发指南](platform/front-end.md) 21 | * [工具支持 enri/anzi](tools.md) 22 | -------------------------------------------------------------------------------- /doc/wiki-cn/enri.md: -------------------------------------------------------------------------------- 1 | # enri 2 | 3 | ## Install 4 | ``` 5 | go get github.com/bilibili/overlord/cmd/enri 6 | ``` 7 | 8 | ## useage 9 | 10 | ### 创建集群 11 | 12 | ```shell 13 | // 创建redis cluster并指定slave数为1 14 | enri create -n 127.0.0.1:7001 -n 127.0.0.1:7002 -n 127.0.0.1:7003 -n 127.0.0.1:7004 -n 127.0.0.1:7005 -n 127.0.0.1:7006 -s 1 15 | ``` 16 | 17 | ### 添加节点 18 | 19 | ```shell 20 | // 把 7007 节点添加进集群 7000 21 | enri add -c 127.0.0.1:7000 -n 127.0.0.1:7007 22 | 23 | // 把 7007,7008 节点添加进集群 7000,并且7007为master 7008为slave 24 | enri add -c 127.0.0.1:7000 -n 127.0.0.1:7007,127.0.0.1:7008 25 | ``` 26 | 27 | ### 删除节点 28 | 29 | ```shell 30 | // 从集群中删除7007 节点 31 | enri del -c 127.0.0.1:7000 -n 127.0.0.1:7007 32 | ``` 33 | 34 | ### 修复集群 35 | 36 | ```shell 37 | // 修复集群信息 38 | enri fix -n 127.0.0.1:7001 39 | ``` 40 | 41 | ### 重新分布slot 42 | 43 | ```shell 44 | enri reshard -n 127.0.0.1:7001 45 | ``` 46 | 47 | ### 迁移slot 48 | 49 | ```shell 50 | // 从7001 迁移10个slot到7002 51 | enri migrate -o 127.0.0.1:7001 -d 127.0.0.1:7002 -c 10 52 | // 迁移7001全部slot到7002 53 | enri migrate -o 127.0.0.1:7001 -d 127.0.0.1:7002 54 | // 把slot 10 从7001迁移到7002 55 | enri migrate -o 127.0.0.1:7001 -d 127.0.0.1:7002 -s 10 56 | // 从集群其他节点迁移10个slot到7001 57 | enri migrate -d 127.0.0.1:7001 -c 10 58 | // 从7001迁移10个slot到集群其他节点 59 | enri migrate -o 127.0.0.1:7001 -c 10 60 | ``` 61 | 62 | ### 设置replicate 63 | ```shell 64 | // 设置7006 为7007的从节点 65 | enri replicate -m 127.0.0.1:7007 -s 127.0.0.1:7006 66 | ``` 67 | 68 | ### 集群信息info 69 | ```shell 70 | enri info -c 127.0.0.1:7001 71 | ``` -------------------------------------------------------------------------------- /doc/wiki-cn/package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "lockfileVersion": 1 3 | } 4 | -------------------------------------------------------------------------------- /doc/wiki-cn/platform-usage.md: -------------------------------------------------------------------------------- 1 | # overlord-platform 使用指南 2 | 3 | ## 创建集群 4 | 5 | 1. 填写名称、总容量(支持单位 G/M)、集群类型(推荐使用 Redis Cluster、版本、型号(支持定制)、分组,APPID 信息为选填内容。 6 | 2. 填写完成后点击 立即创建 按钮,将会自动跳转至集群详情页。 7 | 8 | ![create cluster](../images/create-cluster.gif) 9 | 10 | ## 查看集群详情 11 | 12 | ![cluster detail](../images/cluster-detail.png) 13 | 14 | *删除集群操作前请务必解除 Appid 关联关系* 15 | 16 | ## 查看 Appid 列表 17 | 18 | 1. 查看与 Appid 有关联的集群信息,可通过 *添加关联* 按钮为当前 Appid 继续添加集群。 19 | 2. 点击集群列表右侧到 *解除关联* 按钮,可将解除当前 Appid 和集群的关联关系。 20 | 3. 点击 Appid 列表底部的 *添加 Appid* 按钮,可创建 Appid。 21 | 22 | ![appid list](../images/appid-list.gif) 23 | 24 | ## 查看 Job 列表 25 | 26 | ![job list](../images/job-list.png) -------------------------------------------------------------------------------- /doc/wiki-cn/platform.md: -------------------------------------------------------------------------------- 1 | # overlord-platform 2 | 3 | 4 | overlord-platform 是一个依赖于 mesos 搭建的缓存节点部署平台。 5 | 6 | * [为什么选择mesos](platform/why-mesos.md) 7 | * [chunk算法本质](platform/chunk.md) 8 | * [资源分组](platform/group.md) 9 | * [高可用](platform/high-avaliable.md) 10 | * [集群部署](platform/deploy-cluster.md) 11 | * [集群伸缩](platform/scale.md) 12 | * [节点恢复策略](platform/recovery-policy.md) 13 | * [前端开发指南](platform/front-end.md) 14 | -------------------------------------------------------------------------------- /doc/wiki-cn/platform/chunk.md: -------------------------------------------------------------------------------- 1 | # chunk/dist 算法 2 | chunk算法的目的是保证redis cluster部署的高可用。chunk算法节点分布必须满足如下三个条件 3 | 1. 主从不在同一个物理节点 4 | 2. 任意一个物理节点分配的节点数少于总数的一半 5 | 3. 尽可能部署在资源最充足的物理节点 6 | 对于非cluster的缓存集群,由于没有主从的概念,因此只需简化为dist,只需满足上述的2 3 两个条件。 7 | 同时对chunk的实现有如下约定: 8 | 1. master数量必须为偶数 9 | 2. 可用机器数必须不小于三 10 | 2. 机器节点数不能为3且master数量不能为4 11 | 12 | ## chunk 算法go实现 13 | * 把offer根据节点mem cpu的需求转换成对应的可用资源单元 14 | 15 | ``` 16 | func mapIntoHostRes(offers []ms.Offer, mem float64, cpu float64) (hosts []*hostRes) { 17 | // 具体实现chunk.go/mapIntoHostRes 18 | } 19 | 20 | type hostRes struct { 21 | name string // 机器名 22 | count int // 当前机器可分配的节点数 23 | } 24 | ``` 25 | 26 | * 按每台机器可部署的节点数进行降序排序 27 | 28 | * 填充每台机器应部署的节点数 29 | 30 | ``` 31 | func dpFillHostRes(chunks []*Chunk, disableHost map[string]struct{}, hrs []*hostRes, count int, scale int) (hosts []*hostRes) { 32 | for { 33 | // 寻找已部署该集群节点数最少的机器 34 | i := findMinHrs(hrs, hosts, disableHost, all, scale) 35 | if left == 0 { 36 | return 37 | } 38 | hosts[i].count += scale 39 | left -= scale 40 | 41 | } 42 | } 43 | ``` 44 | * 根据hostRes寻找chunk组 45 | 46 | ```chunk.go/Chunks 47 | for { 48 | // 找到剩余资源最多的机器 49 | name, count := maxHost(hrs) 50 | if count == 0 { 51 | break 52 | } 53 | m := hrmap[name] 54 | // 寻找与该机器chunk对最少的对端机器,组成一组chunk 55 | llh := findMinLink(linkTable, m) 56 | if hrs[llh].count < 2 { 57 | linkTable[llh][m]++ 58 | linkTable[m][llh]++ 59 | continue 60 | } 61 | llHost := hrs[llh] 62 | links = append(links, link{Base: name, LinkTo: llHost.name}) 63 | linkTable[llh][m]++ 64 | linkTable[m][llh]++ 65 | hrs[m].count -= 2 66 | hrs[llh].count -= 2 67 | } 68 | ``` 69 | 70 | * 把chunk组转化为最终的主从分布关系 71 | 72 | ``` 73 | func links2Chunks(links []link, portsMap map[string][]int) []*Chunk { 74 | // ... 75 | } 76 | ``` 77 | 78 | ## 动态chunk 79 | chunk算法实现了redis cluster集群创建时节点分布的规则计算。但是对于集群扩容,集群节点故障则没法没法的处理,为了解决这种问题,基于chunk算法,又实现了chunkAppend和chunkRecovery。 80 | 81 | **ChunkAppend**会在原有的chunk的基础上,在不破坏原有chunk的情况下,寻找到合适的新chunk组并加入原有的chunks。保证新加入的chunk与旧chunk同时满足chunk的三个条件。 82 | 83 | **ChunkRecovery**则会剔除故障的old host,并尝试寻找全新的host替代旧的host,如果寻找不到全新的host,则会在原先的chunk基础上,重新寻找合适的new host并将old host 的节点从新规划到合适的host上。并且依然满足chunk的约束。 84 | 85 | ## chunk 保证 86 | 根据chunk规划了redis cluster节点的分布以主从情况。但是实际redis启动的时候,可能由于种种原因发生了不希望看到的主从切换。或者其他意料不到的情况。因此为了保证最终部署情况满足chunk,还需要对部署好的集群进行chunk校验. 87 | -------------------------------------------------------------------------------- /doc/wiki-cn/platform/deploy-cluster.md: -------------------------------------------------------------------------------- 1 | # 集群部署 2 | 3 | 本小节主要介绍创建一套集群的全部流程,包括 4 | 1. 资源获取 5 | 2. 资源分布 6 | 3. 主从分布 7 | 3. 集群一致性检验 8 | 9 | 以及[如何快速创建一套redis cluster](#一种利用nodes.conf快速创建集群的办法) 10 | ## 资源获取 11 | 当framework收到集群创建任务的时候,首先要向mesos请求恢复资源(call.Revive),确保mesos集群有足够的资源能够部署当前任务。收到revice后,mesos会向framework回复当前平台可用的资源offer,scheduler根据当前offer判断是否满足集群部署的需求。 12 | 13 | ## 资源分布 14 | 当获取到的offer有足够的资源可以部署当前创建任务时,scheduler根据算法规则[chunk/dist](chunk.md)进行集群节点分布规划。节点分布规划包括两部分 每个主机上应该部署几个节点;cluster的主从要如何分布。规划节点分布的同时,scheduler会生成具体的节点创建计划,并下发到executor创建具体的节点实例。 15 | 16 | ## 主从分布 17 | 对于redis cluster,为了保证集群的高可用,必须确保集群的主从不能同时分布在一台host上,为了解决这一问题,我们使用了[chunk算法](chunk.md)对节点进行分组。通过(A主B从)-(A从B主)这样一对互为主从的chunk组,保证了主从分布的高可用。 18 | 19 | ## 集群一致性校验 20 | redis cluster启动后,为了保证集群的真实可用,以及集群的分布的确服务预先创建的chunk规则,我们还需要对进群进行一致性校验,确保所有节点已经通过gossip达成了最终一致,并且cluster status处于ok状态。 21 | 22 | ## 一种利用nodes.conf快速创建集群的办法 23 | 传统的redis cluster创建方式是各个节点以cluster的模式启动,节点启动完毕后,通过redis-trib.rb工具进行节点间的握手。redis-trib通过向每一个节点发送CLUSTER MEET请求,来进行节点间的握手,节点握手成功后,通过Gossip协议进行集群间nodes节点信息的同步。但是,单节点数增长到一定程度的时候,通过CLUSTER MEET 进行集群握手将会有巨大的效率问题,由于O(n^2)的复杂度,当节点数量进一步增长时,握手效率也会急速下降。 24 | 25 | 为了解决这一问题,我们模拟了集群恢复的流程,通过mock nodes.conf的方式提前创建好nodes文件。通过mock nodes.conf 同样是创建600个主节点,使用新方法仅仅需要10s的时间,速度提升了百倍不止。 26 | 27 | 以下是一个mock nodes.conf 的例子 28 | 29 | ``` 30 | 0000000000000000000000000000000000225157 172.22.33.199:31000@41000 master - 0 0 0 connected 0-4096 31 | 0000000000000000000000000000000000225158 172.22.33.199:31001@41001 myself,slave 0000000000000000000000000000000000225159 0 0 0 connected 32 | 0000000000000000000000000000000000225159 172.22.33.184:31000@41000 master - 0 0 0 connected 4097-8193 33 | 0000000000000000000000000000000000225160 172.22.33.184:31001@41001 slave 0000000000000000000000000000000000225157 0 0 0 connected 34 | 0000000000000000000000000000000000225161 172.22.33.192:31000@41000 master - 0 0 0 connected 8194-12290 35 | 0000000000000000000000000000000000225162 172.22.33.192:31001@41001 slave 0000000000000000000000000000000000225163 0 0 0 connected 36 | 0000000000000000000000000000000000225163 172.22.33.187:31000@41000 master - 0 0 0 connected 12291-16383 37 | 0000000000000000000000000000000000225164 172.22.33.187:31001@41001 slave 0000000000000000000000000000000000225161 0 0 0 connected 38 | vars currentEpoch 0 lastVoteEpoch 0 39 | ``` 40 | 41 | 通过提前为每一个节点生成runnid,并渲染nodes.conf模板,节点启动时,只需要根据nodes.conf进行集群的恢复,而无需进行漫长的meet握手,大大提升了集群创建的速度。 -------------------------------------------------------------------------------- /doc/wiki-cn/platform/front-end.md: -------------------------------------------------------------------------------- 1 | # overlord 前端开发指南 2 | 3 | overlord-platform 前端项目基于 vue cli 3 搭建的单页面应用。 4 | 5 | ## 环境要求 6 | 7 | nodejs 8.x 8 | 9 | ## 安装编译 10 | 11 | ```bash 12 | cd web 13 | 14 | # 安装依赖 15 | npm install 16 | 17 | # 启动 18 | yarn run serve 19 | 20 | # Lint 文件 21 | yarn run lint 22 | ``` 23 | 24 | ## 文件结构 25 | 26 | ```bash 27 | ├── README.md 28 | ├── babel.config.js // babel 配置 29 | ├── package.json 30 | ├── postcss.config.js // postcss 配置 31 | ├── public 32 | │   ├── favicon.ico 33 | │   └── index.html 34 | ├── src 35 | │   ├── App.vue // 页面入口文件 36 | │   ├── assets // 资源文件 37 | │   │   ├── Starbounder-2.otf 38 | │   ├── constants // 常量文件 39 | │   │   └── CREATE_TYPES.js 40 | │   ├── http 41 | │   │   ├── api.js // API 方法 42 | │   │   ├── config.js // axios 基本配置 43 | │   │   └── service.js // axios 拦截器 44 | │   ├── layout // 页面布局组件 45 | │   │   ├── Header.vue 46 | │   │   └── SideBar.vue 47 | │   ├── main.js // 程序入口文件 48 | │   ├── router.js // 路由配置 49 | │   ├── store // vuex 状态管理 50 | │   │   ├── index.js 51 | │   │   ├── modules 52 | │   │   │   ├── cluster.js 53 | │   │   │   └── job.js 54 | │   │   └── mutation-types.js 55 | │   ├── style // 全局样式 56 | │   │   ├── element-custom.scss 57 | │   │   ├── element-variables.scss 58 | │   │   ├── mixin.scss 59 | │   │   └── reset.scss 60 | │   └── views // 业务代码 61 | │   ├── AddCluster.vue // 创建集群 62 | │   ├── AppId.vue // AppId 管理 63 | │   ├── Cluster.vue // 集群详情 64 | │   ├── Home.vue // 首页(Cluster 搜索) 65 | │   └── Job.vue // Job 列表 66 | ├── vue.config.js // 全局 CLI 配置文件 67 | └── yarn.lock // 依赖 lock 文件 68 | ``` 69 | 70 | ## 文件配置 71 | 72 | 你可以在 `vue.config.js` 文件中配置 proxy,以及其他 webpack 相关的配置,详细配置文档请参考 [配置参考](https://cli.vuejs.org/zh/config/)。 73 | -------------------------------------------------------------------------------- /doc/wiki-cn/platform/group.md: -------------------------------------------------------------------------------- 1 | # 资源分组 2 | 资源分组通过mesos的[roles](http://mesos.apache.org/documentation/latest/roles/)实现,mesos-agent启动时,通过设置roles来指定资源所属分组。framework同样也是通过配置role来获取对应role提供的offers。 3 | 4 | 一般情况下,推荐所有的机器加入公用的资源池,无需对资源进行分组,可以高效利用机器资源,减少机器资源碎片以及合理部署不同类型资源提搞机器利用率。 5 | 6 | 如果必须对资源进行分组,则根据上述所述 7 | 1. 设置agent资源role 8 | 2. 设置framework role 9 | 10 | mesos会自动帮你你实现资源分组。具体逻辑参见[mesos](http://mesos.apache.org/documentation/latest/roles/),这里不再赘述。 11 | -------------------------------------------------------------------------------- /doc/wiki-cn/platform/high-avaliable.md: -------------------------------------------------------------------------------- 1 | # 高可用设计 2 | 3 | ## framework 高可用 4 | mesos framework 的高可用参考了[Designing Highly Available Mesos Frameworks](http://mesos.apache.org/documentation/latest/high-availability-framework-guide/#designing-highly-available-mesos-frameworks) 5 | 包括: 6 | 1. [mesos-master高可用](#mesos-master高可用) 7 | 2. [framework高可用](#framework高可用) 8 | 3. [mesos-agent高可用](#mesos-agent高可用) 9 | 4. [元数据存储高可用](#etcd高可用) 10 | 11 | ### mesos-master高可用 12 | mesos原生支持[高可用模式部署](http://mesos.apache.org/documentation/latest/high-availability/)通过zk进行master的选举,保证单个master挂掉的情况下请他standby master升级为leader提供服务。通过启动参数 --zk=zk://host1:port1,host2:port2,.../path 连接zk启用高可用mesos集群。 13 | 14 | ### framework高可用 15 | 在framewrok运行的过程中,可能出现服务的故障,即使没有故障,也会出现服务的升级迭代等,那么如何在framework重新启动的时候获取已经运行的任务。 16 | framework初次启动的时候,mesos master会给framework分给一个framework id。通过将这个framework id保存在etcd中,并framework故障重启的时候只需要冲etcd获取这个framework id并重新注册到mesos即可重新获取到当前framwork正在运行的task信息。需要注意的是,在线上环境中,一定要确保把 fail_over 设置的足够大,fail_over 是mesos master允许的framwork最大故障时间,如果超过这个时间framework没有恢复,那么mesos将剔除这个framework并删除所有已运行的task并回收资源。只有当framework在这个时间内恢复的时候,才会根据framework id从mesos获取运行task信息,并进行后续的task管理。线上建议把fail_over设置为一周。 17 | 18 | ### mesos-agent高可用 19 | mesos-agent 发生故障的时候,mesos默认会结束这个agent下所有运行的task,对于缓存服务来说,这明显是不可接受的。因此要如何保证mesos agent重启的时候不影响运行的缓存实例呢。答案是**设置checkpoint**。 20 | 通过在启动scheduler的时候设置checkpoint为true,当agent故障的时候,当前agent运行的task检测到checkpoint为true就不会立即自动退出。而是等待agent恢复进行重连。 21 | ### etcd高可用 22 | 使用etcd存储集群元信息,使用集群的方式部署etcd,保证存储的高可用 23 | -------------------------------------------------------------------------------- /doc/wiki-cn/platform/recovery-policy.md: -------------------------------------------------------------------------------- 1 | # 节点恢复策略 2 | framework强烈建议开启checkpoint,节点恢复策略只基于开启checkpoint的情况进行讨论。节点发生故障通常分为两种情况: 3 | 1. 服务本身实例故障 4 | 服务实例启动的后,executor会启动一个单独线程对服务进行health check,当health check连续多次出现失败的时候,executor会认为当前服务不可用,并向shceduler发送task fail消息。scheduler收到task fail消息后及进入failover流程。 5 | 2. 服务实例所在机器故障或网络故障导致机器失联。 6 | 如果是机器故障(如机器宕机)导致机器上所有的服务节点都退出,framewrok会收到agent fail事件,进入failover流程。如果只是mesos-ageng故障,由于开启了checkpoint,只要及时回复对服务无影响 7 | 如果是网络分区导致的失联,mesos-master会在agent_ping_timeout时间后把agent以及agent所在机器上的所有task设置为lost,并进入fail_over 8 | 9 | 对于故障的恢复,采取了以下恢复策略: 10 | 1. [原地重启](#原地重启) 11 | 2. [寻找新机器恢复](#寻找新机器恢复) 12 | 3. [原有集群机器里恢复](#原有集群机器里恢复) 13 | 14 | 三种恢复策略的优先级为1>2>3,每一种恢复策略都会累计故障重试次数,当故障重试次数超过6次后,则会认为该故障无法自动failover,scheduler会停止自动重试,需要由人工介入 15 | ## 原地重启 16 | scheduler收到task失败的消息后,首先会尝试从task原先所在机器恢复task,并将task id加1,用于表示task历史失败的次数。如果从原机器恢复task成功并成功重启服务节点,则failover流程结束。 17 | ## 寻找新机器恢复 18 | 如果无法从原机器恢复节点,则scheduler会尝试寻找当前节点所在集群没有使用过的机器,并将故障节点部署在该机器上。如果从mesos获取到的offer所在机器都已经部署有该集群的节点,那么则进入下一步。 19 | ## 原有集群机器里恢复 20 | 当无法找到该集群未部署任何节点的机器的时候,scheduler会尝试从现在集群所在机器里寻找最合适的机器部署新节点,新机器的寻找必须依然遵循[chunk/dist算法](./chunk.md). 21 | 22 | -------------------------------------------------------------------------------- /doc/wiki-cn/platform/scale.md: -------------------------------------------------------------------------------- 1 | # 集群伸缩 2 | 在业务发展过程中,随着业务量的增长,现有的缓存资源可能无法满足现有的业务需求。可能出现资源不足,单节点qps过高等情况。此时就需要对现有的缓存集群进行扩容。或者因为活动的需求对集群进行了扩容,活动过后可能需要回收多余的资源,此时就需要对集群进行缩容回收。集群的伸缩主要使用以下三种方案: 3 | 1. [调整单节点容量](#调整单节点容量) 4 | 2. [修改节点数](#修改节点数) 5 | 3. [大集群换集群](#大集群换小集群) 6 | 7 | ## 调整单节点容量 8 | 调整单节点容量主要分为两部分: 9 | 1. 修改从mesos获取的资源offer 10 | 2. 修改缓存服务的最大内存限制 11 | 12 | ### 修改memcache容量 13 | 修改memcache的容量需要对memcache进行重启,并通过设置--memory-limit来指定最大使用的内存 14 | 15 | ### 修改redis容量 16 | 1. 通过修改配置文件的maxmemory并重启服务来修改最大内存限制。 17 | 2. 通过config set maxmemory $bytes 来动态修改最大内存限制。无需对服务进行重启,同时也需要修改配置文件防止重启后失效 18 | ### 修改mesos分配的offer 19 | 1. 当使用原地调整单节点容量时,需要提前判断当前机器是否有足够的容量进行原地扩容。缓存的资源是由mesos分配的,因此第一步需要先从mesos获取当前机器是否有剩余的资源。 20 | 2. 获取到offer后,需要重新accept mesos offer。防止资源泄露。 21 | 22 | ## 修改节点数 23 | 相比原地修改单节点容量,修改节点数进行伸缩是更推荐的方式。修改节点数量只需在原有的集群上进行节点的增删,而且对于单节点qps过高的情况,增加节点数可以降低单节点的负载。 24 | ### 修改memcache和redis(singleton)节点数 25 | 对于memcache和singleton的集群,通常是使用代理的模式(overlord-proxy)。对于这一类集群,修改节点数只需要新建节点,并将新节点加入代理服务的配置server list。通过一致性hash进行节点的负载均衡。**需要注意的是,如果新增的节点过多,需要注意新节点的加入速度,防止一次性加入太多新节点导致的缓存miss (proxy会自动选择合适的时机加入新节点防止大量的miss)** 26 | 27 | ### 修改redis-clusrer 节点 28 | 相比于修改redis sigleton节点数,修改cluster的节点数需要在原有的基础上把新节点通过cluster meet 加入cluster,然后通过[集群管理工具](https://github.com/bilibili/enri)进行key以及槽位的迁移 29 | 30 | ## 大集群换小集群 31 | 对于非cluster的集群,这是最推荐的一种方式。相比前二种方式通过大集群换小集群有以下优点: 32 | 1. 对比增删节点,不会出现缓存的miss回源 33 | 2. 对比第一种,无需对节点进行重启,造成节点访问的短暂失效。 34 | 3. 不存在历史数据的增删操作,新建的集群内存碎片更小, 35 | 36 | 37 | **注意**:使用大集群换小集群的前提是需要对数据进行迁移,且集群切换的过程中可能会有少量的脏数据。对于redis 我们使用[数据迁移工具](https://github.com/bilibili/anzi)进行数据的同步迁移。很不幸,mc不支持数据的迁移,因此无法使用这种方式进行进群的伸缩。 38 | -------------------------------------------------------------------------------- /doc/wiki-cn/platform/why-mesos.md: -------------------------------------------------------------------------------- 1 | # 为什么选择 mesos 2 | 随着业务发展,以及对自动化运维的需求,我们急需一套自动化的缓存部署解决方案,通过缓存管理平台,快速实现集群的创建扩容以及管理。mesos作为一套成熟的数据中心资源调度方案成为了我们调研的首选。同时我们还对比了业界比较流行的k8s,基于两者的对比,最终我们决定选择基于mesos开发一套缓存管理平台。 3 | ## mesos vs k8s 4 | |对比项|k8s|mesos| 5 | |:---|:---|:---| 6 | |开发难度 |代码量少,基本上不可侵入式改动| 代码量中等,需要自己实现mesos的framwork、executor| 7 | | overhead |存在cfs 以及kube-proxy会有额外开销|可以直接采用 宿主机 上裸起cache的方式| 8 | |redis-cluster支持|难以支持|支持| 9 | |资源调度|简单|需要做二次调度| 10 | |故障恢复|简单,有k8s自动调度恢复|需要自己实现framework进行故障恢复| 11 | |能否脱离docker|不能|可以| 12 | 基于以上对比,综合我们对cfs及额外latency的接受度以及对redis cluster的需求,最终决定使用mesos作为数据中心资源调度系统 13 | -------------------------------------------------------------------------------- /doc/wiki-cn/proxy.md: -------------------------------------------------------------------------------- 1 | # overlord-proxy 2 | 3 | overlord-proxy 是一个轻量级的缓存代理工具。 4 | 5 | * [功能支持](proxy/features.md) 6 | * [性能测试](proxy/benchmark.md) 7 | * [设计](proxy/design.md) 8 | * [支持&不支持的命令列表](proxy/cmd.md) 9 | -------------------------------------------------------------------------------- /doc/wiki-cn/proxy/features.md: -------------------------------------------------------------------------------- 1 | # 支持的功能 2 | 3 | ## 多种协议支持 4 | 5 | 我们支持memcache的两种协议:text和binary,支持redis的两种使用模式:纯代理和cluster。 6 | 7 | 因为B站不同业务对缓存的使用场景不同,memcache和redis的使用都非常广泛,且redis两种使用模式也分别都有。 8 | 对memcache binary的支持是我们打算后期业务都使用binary协议,因为相对text协议来说,最大的优势是支持pipeline,可以节省很多消耗。 9 | 虽然redis-cluster模式已经很成熟了,但相信还是有业务场景不想要冗余一倍内存,而只是单纯将redis当做一层缓存来使用。 10 | 11 | 在proxy的配置文件中,有`cache_type`配置项,可以配置为:`memcache` | `memcache_binary` | `redis` | `redis_cluster` 12 | 当使用`redis-cluster`模式时,proxy会将自己伪装为cluster的节点,可以支持`cluster nodes`和`cluster slots`命令,方便使用SDK如jedis的客户端无缝使用overlord-proxy。 13 | 14 | ## 哈希标签 15 | 16 | 我们支持哈希标签,默认为`{}`。与redis-cluster一致,且将这个特性扩展到四种模式都支持。 17 | 18 | ## 固定连接数 19 | 20 | 我们将proxy与缓存节点之间的连接数作为配置`node_connections`,可以自定义连接数。为了充分节省和利用资源,建议将其配置为`2`。这个值是我们经过压测和线上尝试后的最佳实践。 21 | 22 | ## 代理模式下自动踢节点 23 | 24 | proxy内设计了`Pinger`接口,且支持配置项`ping_auto_eject`和`ping_fail_limit`,分别表示是否自动踢出节点和连续ping失败多少次后踢出。 25 | 缓存(不是存储,默认对一致性要求较低)是可以被降级容错的,所以我们优先支持了故障节点自动踢出,快速恢复服务优先。当然,使用方也可以配置为关闭该功能。 26 | 27 | ## TODO: 多级缓存 28 | 29 | ## TODO: 缓存多写 30 | 31 | ## TODO: 冷缓存预热 32 | 33 | ## TODO: 平滑 reload 配置 34 | -------------------------------------------------------------------------------- /doc/wiki-cn/tools.md: -------------------------------------------------------------------------------- 1 | # 工具列表 2 | 3 | ## 集群管理工具 - enri 4 | 5 | 鉴于[ruskit](https://github.com/eleme/ruskit) 已经不再维护,我们决定重写这个管理工具。并添加一些诸如监控、报告、分析等更加自动化的功能。 6 | [enri使用](enri.md) 7 | 8 | ## redis数据导入导出工具 - anzi 9 | 10 | anzi 是源自 bilibili 的轻量级 Redis 数据同步工具。在过去,我们采用 vipshop 开源的 [redis-migrate-tool](https://github.com/vipshop/redis-migrate-tool) 工具进行迁移,然而在使用的时候,我们发现了这个工具的很多不足之处。首先,这个工具不再支持 RDB 7 (redis-3.x)以上的版本,也就意味着它不能再将 redis-4.x 及以上版本的 redis 当做数据源来导入,这是我们要替换掉它的最主要原因。另外就是,原版本工具使用C编写,在维护性上稍差;原版本工具对磁盘磁盘性能高,主要是需要将RDB导入到磁盘中再读出来。 11 | 12 | anzi 采用 Go 语言编写,同时借助 [overlord/proxy](https://github.com/bilibili/overlord/blob/master/doc/wiki-cn/proxy.md) 启动一个代理将命令分发。 13 | 14 | ### 功能 15 | 16 | anzi 支持的功能要点如下: 17 | 18 | * redis 高版本支持(^redis 5.0, RDB v9) 19 | * 多数据源支持: 多数据源中的 key 覆盖规则为随机覆盖 20 | * 多后端协议支持:目前支持后端为 `redis`(twemproxy模式)和 `redis_cluster**(redis_cluster** 模式。 21 | * hash method 支持列表: 22 | * one_at_a_time 23 | * md5 24 | * crc16 25 | * crc32 (crc32 implementation compatible with libmemcached) 26 | * crc32a (correct crc32 implementation as per the spec) 27 | * fnv1_64 28 | * fnv1a_64 29 | * fnv1_32 30 | * fnv1a_32 31 | * hsieh 32 | * murmur 33 | * hash distribution 列表:ketama 34 | * 后端多连接支持 35 | * RDB不落盘,流式解析RDB 36 | 37 | 将来可能会做的功能: 38 | 39 | * 服务化:接收请求并开始同步 40 | * 上游断线重连: 按照主从协议的要求进行断线重连 41 | 42 | ### 使用 43 | 44 | ``` 45 | cd cmd/anzi && go build && ./anzi -std 46 | ``` 47 | 48 | ### 解析流程 49 | 50 | ``` 51 | [redis server] -> psync -> full sync -> ANZI -> as redis command -> consistent hash -> [redis server] 52 | -> repl sync -> 53 | ``` 54 | -------------------------------------------------------------------------------- /doc/wiki/overview.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/doc/wiki/overview.md -------------------------------------------------------------------------------- /pkg/bufio/buffer_test.go: -------------------------------------------------------------------------------- 1 | package bufio 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestBufferGrowOk(t *testing.T) { 10 | b := Get(defaultBufferSize) 11 | b.grow() 12 | assert.Equal(t, 0, b.r) 13 | assert.Equal(t, 0, b.w) 14 | assert.Len(t, b.buf, defaultBufferSize*2) 15 | assert.Equal(t, len(b.buf), b.len()) 16 | Put(b) 17 | } 18 | 19 | func TestBuffer(t *testing.T) { 20 | b := Get(defaultBufferSize) 21 | assert.Len(t, b.buf, defaultBufferSize) 22 | assert.Len(t, b.Bytes(), 0) 23 | b.w = 1 24 | assert.Len(t, b.Bytes(), 1) 25 | b.Reset() 26 | assert.Len(t, b.Bytes(), 0) 27 | Put(b) 28 | } 29 | 30 | func TestGetOk(t *testing.T) { 31 | b := Get(defaultBufferSize) 32 | assert.Len(t, b.buf, defaultBufferSize) 33 | 34 | b = Get(maxBufferSize) 35 | assert.Len(t, b.buf, maxBufferSize) 36 | 37 | b = Get(maxBufferSize + 1) 38 | assert.Len(t, b.buf, maxBufferSize+1) 39 | Put(b) 40 | } 41 | 42 | func TestBufferAdvance(t *testing.T) { 43 | b := Get(defaultBufferSize) 44 | b.r += 100 45 | b.Advance(-10) 46 | assert.Equal(t, 90, b.r) 47 | Put(b) 48 | } 49 | 50 | func TestBufferShrink(t *testing.T) { 51 | b := Get(defaultBufferSize) 52 | copy(b.buf, []byte("abcde")) 53 | b.r += 3 54 | b.w += 5 55 | b.shrink() 56 | assert.Equal(t, []byte("de"), b.Bytes()) 57 | Put(b) 58 | } 59 | -------------------------------------------------------------------------------- /pkg/container/container.go: -------------------------------------------------------------------------------- 1 | package container 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | "os" 8 | "overlord/pkg/log" 9 | "time" 10 | 11 | "github.com/docker/docker/api/types" 12 | "github.com/docker/docker/api/types/container" 13 | "github.com/docker/docker/api/types/mount" 14 | "github.com/docker/docker/client" 15 | ) 16 | 17 | // Container define container with cancel. 18 | type Container struct { 19 | ctx context.Context 20 | cancel context.CancelFunc 21 | cli client.APIClient 22 | id string 23 | image string 24 | name string 25 | } 26 | 27 | // New new and return container with cancel. 28 | func New(image, name, workdir string, cmd []string) (c *Container, err error) { 29 | ctx, cancel := context.WithCancel(context.Background()) 30 | c = &Container{ 31 | ctx: ctx, 32 | cancel: cancel, 33 | image: image, 34 | name: name, 35 | } 36 | c.cli, err = client.NewEnvClient() 37 | if err != nil { 38 | return 39 | } 40 | 41 | var output io.ReadCloser 42 | output, err = c.cli.ImagePull(ctx, image, types.ImagePullOptions{}) 43 | if err != nil { 44 | return 45 | } 46 | io.Copy(os.Stdout, output) 47 | 48 | resp, err := c.cli.ContainerCreate(ctx, &container.Config{ 49 | Image: image, 50 | WorkingDir: workdir, 51 | Entrypoint: cmd, 52 | }, &container.HostConfig{ 53 | NetworkMode: "host", 54 | Mounts: []mount.Mount{{ 55 | Type: mount.TypeBind, 56 | Source: workdir, 57 | Target: workdir, 58 | }}, 59 | }, nil, name) 60 | if err != nil { 61 | return 62 | } 63 | c.id = resp.ID 64 | return 65 | } 66 | 67 | // Start start container. 68 | func (c *Container) Start() (err error) { 69 | log.Infof("start service %s: %v", c.image, c.name) 70 | 71 | if c.id == "" { 72 | err = fmt.Errorf("container %s absent", c.id) 73 | return 74 | } 75 | 76 | err = c.cli.ContainerStart(c.ctx, c.id, types.ContainerStartOptions{}) 77 | return 78 | } 79 | 80 | // Stop stop container by using cancel.Stop 81 | func (c *Container) Stop() { 82 | if c.id != "" { 83 | timeout := 10 * time.Second 84 | c.cli.ContainerStop(c.ctx, c.id, &timeout) 85 | } 86 | c.cancel() 87 | } 88 | 89 | // Wait wait container to exit. 90 | func (c *Container) Wait() error { 91 | if c.id == "" { 92 | return fmt.Errorf("container %s absent", c.id) 93 | } 94 | _, err := c.cli.ContainerWait(c.ctx, c.id) 95 | return err 96 | } 97 | -------------------------------------------------------------------------------- /pkg/conv/conv.go: -------------------------------------------------------------------------------- 1 | package conv 2 | 3 | import ( 4 | "strconv" 5 | ) 6 | 7 | // Btoi returns the corresponding value i. 8 | func Btoi(b []byte) (int64, error) { 9 | if len(b) != 0 && len(b) < 10 { 10 | var neg, i = false, 0 11 | switch b[0] { 12 | case '-': 13 | neg = true 14 | fallthrough 15 | case '+': 16 | i++ 17 | } 18 | if len(b) != i { 19 | var n int64 20 | for ; i < len(b) && b[i] >= '0' && b[i] <= '9'; i++ { 21 | n = int64(b[i]-'0') + n*10 22 | } 23 | if len(b) == i { 24 | if neg { 25 | n = -n 26 | } 27 | return n, nil 28 | } 29 | } 30 | } 31 | n, err := strconv.ParseInt(string(b), 10, 64) 32 | if err != nil { 33 | return 0, err 34 | } 35 | return n, nil 36 | } 37 | 38 | // UpdateToLower will convert to lower case 39 | func UpdateToLower(src []byte) { 40 | const step = byte('a') - byte('A') 41 | for i := range src { 42 | if src[i] >= 'A' && src[i] <= 'Z' { 43 | src[i] += step 44 | } 45 | } 46 | } 47 | 48 | // UpdateToUpper will convert to lower case 49 | func UpdateToUpper(src []byte) { 50 | const step = byte('a') - byte('A') 51 | for i := range src { 52 | if src[i] >= 'a' && src[i] <= 'z' { 53 | src[i] -= step 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /pkg/conv/conv_test.go: -------------------------------------------------------------------------------- 1 | package conv 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestBtoi(t *testing.T) { 10 | i := int64(123) 11 | is := []byte("123") 12 | ni, err := Btoi(is) 13 | assert.NoError(t, err) 14 | assert.Equal(t, i, ni) 15 | } 16 | 17 | func TestUpdateToLower(t *testing.T) { 18 | bs := []byte{'A', 'B', 'c'} 19 | UpdateToLower(bs) 20 | assert.Equal(t, []byte{'a', 'b', 'c'}, bs) 21 | } 22 | 23 | func TestUpdateToUpper(t *testing.T) { 24 | bs := []byte{'a', 'b', 'C'} 25 | UpdateToUpper(bs) 26 | assert.Equal(t, []byte{'A', 'B', 'C'}, bs) 27 | } 28 | -------------------------------------------------------------------------------- /pkg/dir/dir.go: -------------------------------------------------------------------------------- 1 | package dir 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | 7 | pkgerr "github.com/pkg/errors" 8 | ) 9 | 10 | // define errors 11 | var ( 12 | ErrNotFile = pkgerr.New("path must be a File") 13 | ErrNotDir = pkgerr.New("path must be a Dir") 14 | ) 15 | 16 | //IsExists check if the file or dir was exists 17 | func IsExists(path string) (bool, error) { 18 | if os.Getenv("RunMode") == "test" { 19 | return true, nil 20 | } 21 | _, err := os.Stat(path) 22 | if err == nil { 23 | return true, nil 24 | } 25 | if os.IsNotExist(err) { 26 | return false, nil 27 | } 28 | return true, err 29 | } 30 | 31 | // GetAbsDir will get the file's dir absolute path. 32 | func GetAbsDir(path string) (absDir string, err error) { 33 | absPath, err := filepath.Abs(path) 34 | if err != nil { 35 | err = pkgerr.Wrapf(err, "path get absolute in GetAbsDir") 36 | return 37 | } 38 | absDir = filepath.Dir(absPath) 39 | return 40 | } 41 | 42 | // MkDirAll will create dir as using `mkdir -p`. 43 | func MkDirAll(path string) (err error) { 44 | absPath, err := filepath.Abs(path) 45 | if err != nil { 46 | err = pkgerr.Wrapf(err, "path get absolute") 47 | return 48 | } 49 | 50 | stat, err := os.Stat(absPath) 51 | if err != nil { 52 | if os.IsExist(err) { 53 | if stat.IsDir() { 54 | err = nil 55 | return 56 | } 57 | err = pkgerr.Wrapf(err, "check state of path") 58 | return 59 | } 60 | err = nil 61 | } 62 | 63 | err = os.MkdirAll(absPath, 0755) 64 | if err != nil { 65 | err = pkgerr.Wrapf(err, "when mkdirall meet error") 66 | } 67 | return 68 | } 69 | -------------------------------------------------------------------------------- /pkg/etcd/etcd_test.go: -------------------------------------------------------------------------------- 1 | package etcd 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "overlord/pkg/types" 7 | "testing" 8 | 9 | "overlord/platform/job" 10 | 11 | "github.com/stretchr/testify/assert" 12 | ) 13 | 14 | func TestEtcd(t *testing.T) { 15 | e, err := New("http://127.0.0.1:2379") 16 | ctx := context.TODO() 17 | assert.NoError(t, err) 18 | _, _ = e.GenID(ctx, "/order", "1") 19 | _, _ = e.GenID(ctx, "/order", "2") 20 | _, err = e.Get(ctx, "/order") 21 | assert.NoError(t, err) 22 | } 23 | func TestSet(t *testing.T) { 24 | e, err := New("http://127.0.0.1:2379") 25 | assert.NoError(t, err) 26 | ctx := context.TODO() 27 | assert.NoError(t, err) 28 | mcjob := job.Job{ 29 | Name: "test", 30 | OpType: job.OpCreate, 31 | CacheType: types.CacheTypeMemcache, 32 | Version: "1.5.12", 33 | Num: 6, 34 | MaxMem: 10, 35 | CPU: 0.1, 36 | } 37 | bs, err := json.Marshal(mcjob) 38 | assert.NoError(t, err) 39 | err = e.Set(ctx, "/overlord/jobs/job1", string(bs)) 40 | assert.NoError(t, err) 41 | 42 | // redisjob := &job.Job{ 43 | // Name: "test", 44 | // CacheType: types.CacheTypeRedis, 45 | // Version: "4.0.11", 46 | // Num: 6, 47 | // MaxMem: 10, 48 | // CPU: 0.1, 49 | // } 50 | // bs, err = json.Marshal(redisjob) 51 | // assert.NoError(t, err) 52 | // err = e.Set(ctx, "/overlord/jobs/job12", string(bs)) 53 | // assert.NoError(t, err) 54 | } 55 | 56 | func TestSequnenceOk(t *testing.T) { 57 | e, err := New("http://127.0.0.1:2379") 58 | assert.NoError(t, err) 59 | ctx := context.Background() 60 | _ = e.Delete(ctx, PortSequence) 61 | 62 | port, err := e.Sequence(ctx, PortSequence) 63 | assert.NoError(t, err) 64 | assert.Equal(t, int64(20000), port) 65 | 66 | port, err = e.Sequence(ctx, PortSequence) 67 | assert.NoError(t, err) 68 | assert.Equal(t, int64(20001), port) 69 | } 70 | -------------------------------------------------------------------------------- /pkg/hashkit/crc_test.go: -------------------------------------------------------------------------------- 1 | package hashkit 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestCrcCheckOk(t *testing.T) { 10 | assert.Equal(t, uint16(0x31C3), Crc16([]byte("123456789"))) 11 | assert.Equal(t, uint16(21847), Crc16([]byte{83, 153, 134, 118, 229, 214, 244, 75, 140, 37, 215, 215})) 12 | } 13 | -------------------------------------------------------------------------------- /pkg/hashkit/fnv.go: -------------------------------------------------------------------------------- 1 | package hashkit 2 | 3 | const ( 4 | prime64 = 1099511628211 5 | offset64 = 14695981039346656037 6 | 7 | prime64tw = 1099511628211 & 0x0000ffff 8 | offset64tw = 14695981039346656037 & 0xffffffff 9 | 10 | prime32 = 16777619 11 | offset32 = 2166136261 12 | ) 13 | 14 | func hashFnv1a64(key []byte) uint { 15 | hash := uint32(offset64tw) 16 | for _, c := range key { 17 | hash ^= uint32(c) 18 | hash *= uint32(prime64tw) 19 | } 20 | 21 | return uint(hash) 22 | } 23 | 24 | func hashFnv164(key []byte) uint { 25 | var hash uint64 = offset64 26 | for _, c := range key { 27 | hash *= prime64 28 | hash ^= uint64(c) 29 | } 30 | return uint(uint32(hash)) 31 | } 32 | 33 | func hashFnv1a32(key []byte) uint { 34 | var hash uint32 = offset32 35 | for _, c := range key { 36 | hash ^= uint32(c) 37 | hash *= prime32 38 | } 39 | return uint(hash) 40 | } 41 | 42 | func hashFnv132(key []byte) (value uint) { 43 | var hash uint32 = offset32 44 | for _, c := range key { 45 | hash *= prime32 46 | hash ^= uint32(c) 47 | } 48 | return uint(hash) 49 | } 50 | -------------------------------------------------------------------------------- /pkg/hashkit/hash.go: -------------------------------------------------------------------------------- 1 | package hashkit 2 | 3 | // constants defines 4 | const ( 5 | HashMethodFnv1a64 = "fnv1a_64" 6 | HashMethodFnv1a32 = "fnv1a_32" 7 | HashMethodFnv164 = "fnv1_64" 8 | HashMethodFnv132 = "fnv1_32" 9 | 10 | HashMethodCRC16 = "crc16" 11 | HashMethodCRC32 = "crc32" 12 | HashMethodCRC32a = "crc32a" 13 | 14 | HashMethodMD5 = "md5" 15 | HashMethodOneOnTime = "one_on_time" 16 | HashMethodHsieh = "hsieh" 17 | HashMethodMurmur = "murmur" 18 | ) 19 | 20 | // NewRing will create new and need init method. 21 | func NewRing(des, method string) *HashRing { 22 | 23 | var hash func([]byte) uint 24 | switch method { 25 | 26 | case HashMethodFnv1a64: // fnv family 27 | hash = hashFnv1a64 28 | case HashMethodFnv164: 29 | hash = hashFnv164 30 | case HashMethodFnv1a32: 31 | hash = hashFnv1a32 32 | case HashMethodFnv132: 33 | hash = hashFnv132 34 | 35 | case HashMethodCRC32a: // crc family 36 | hash = hashCrc32a 37 | case HashMethodCRC32: 38 | hash = hashCrc32 39 | case HashMethodCRC16: 40 | hash = hashCrc16 41 | 42 | case HashMethodMD5: // others 43 | hash = hashMD5 44 | case HashMethodOneOnTime: 45 | hash = hashOneOnTime 46 | case HashMethodHsieh: 47 | hash = hashHsieh 48 | case HashMethodMurmur: 49 | hash = hashMurmur 50 | default: 51 | hash = hashFnv1a64 52 | } 53 | return newRingWithHash(hash) 54 | } 55 | -------------------------------------------------------------------------------- /pkg/hashkit/hash_methods_test.go: -------------------------------------------------------------------------------- 1 | package hashkit 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestAllHashMethods(t *testing.T) { 10 | key := []byte("0123456789") 11 | assert.Equal(t, uint(3227819096), hashCrc16(key), "crc16") 12 | assert.Equal(t, uint(9860), hashCrc32(key), "crc32") 13 | assert.Equal(t, uint(2793719750), hashCrc32a(key), "crc32a") 14 | assert.Equal(t, uint(610147960), hashMD5(key), "md5") 15 | assert.Equal(t, uint(2336436402), hashFnv1a64(key), "fnv1a64") 16 | assert.Equal(t, uint(1576209164), hashFnv164(key), "fnv164") 17 | assert.Equal(t, uint(4185952242), hashFnv1a32(key), "fnv1a32") 18 | assert.Equal(t, uint(1737638188), hashFnv132(key), "fnv132") 19 | assert.Equal(t, uint(2264676836), hashHsieh(key), "hsieh") 20 | assert.Equal(t, uint(1957635836), hashMurmur(key), "murmur") 21 | assert.Equal(t, uint(2451084222), hashOneOnTime(key), "hash one on time") 22 | } 23 | -------------------------------------------------------------------------------- /pkg/hashkit/hash_test.go: -------------------------------------------------------------------------------- 1 | package hashkit 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestNewRingOk(t *testing.T) { 10 | ring := NewRing("redis_cluster", "crc16") 11 | assert.NotNil(t, ring) 12 | 13 | ring = NewRing("ketama", "fnv1a_64") 14 | assert.NotNil(t, ring) 15 | } 16 | -------------------------------------------------------------------------------- /pkg/hashkit/hsieh.go: -------------------------------------------------------------------------------- 1 | package hashkit 2 | 3 | func get16bits(key []byte) uint32 { 4 | var uval = uint32(key[0]) 5 | uval += uint32(key[1]) << 8 6 | return uval 7 | } 8 | 9 | func hashHsieh(key []byte) uint { 10 | if len(key) == 0 { 11 | return uint(0) 12 | } 13 | 14 | var ( 15 | hash uint32 16 | tmp uint32 17 | klen = len(key) 18 | rem = klen & 3 19 | ) 20 | 21 | klen >>= 2 22 | 23 | // main loop 24 | for ; klen > 0; klen-- { 25 | hash += get16bits(key) 26 | tmp = (get16bits(key[2:]) << 11) ^ hash 27 | hash = (hash << 16) ^ tmp 28 | key = key[4:] 29 | hash += hash >> 11 30 | } 31 | 32 | // deal with rem 33 | switch rem { 34 | case 3: 35 | hash += get16bits(key) 36 | hash ^= hash << 16 37 | hash ^= uint32(key[2]) << 18 38 | hash += hash >> 11 39 | case 2: 40 | hash += get16bits(key) 41 | hash ^= hash << 11 42 | hash += hash >> 17 43 | case 1: 44 | hash += uint32(key[0]) 45 | hash ^= hash << 10 46 | hash += hash >> 1 47 | } 48 | 49 | // Force "avalanching" of final 127 bits 50 | hash ^= hash << 3 51 | hash += hash >> 5 52 | hash ^= hash << 4 53 | hash += hash >> 17 54 | hash ^= hash << 25 55 | hash += hash >> 6 56 | 57 | return uint(hash) 58 | } 59 | -------------------------------------------------------------------------------- /pkg/hashkit/ketama_test.go: -------------------------------------------------------------------------------- 1 | package hashkit 2 | 3 | import ( 4 | "bytes" 5 | "strconv" 6 | "testing" 7 | ) 8 | 9 | var ( 10 | ring = Ketama() 11 | nodes = []string{ 12 | "test1.server.com", 13 | "test2.server.com", 14 | "test3.server.com", 15 | "test4.server.com", 16 | } 17 | sis = []int{1, 1, 2, 5} 18 | 19 | node5 = "test5.server.com" 20 | 21 | delAll bool 22 | ) 23 | 24 | func TestGetInfo(t *testing.T) { 25 | ring.Init(nodes, sis) 26 | testHash(t) 27 | t.Log("----init test ok:expect 1 1 2 5----\n") 28 | 29 | ring.AddNode(nodes[3], 1) 30 | testHash(t) 31 | t.Log("----add exist node test ok:expect 1 1 2 1----\n") 32 | 33 | ring.AddNode(node5, 5) 34 | testHash(t) 35 | t.Log("----add no exist node test ok:expect 1 1 2 1 5----\n") 36 | 37 | ring.DelNode(nodes[0]) 38 | testHash(t) 39 | t.Log("----del exist node test ok:expect 0 1 2 1 5----\n") 40 | 41 | ring.DelNode("wocao") 42 | testHash(t) 43 | t.Log("----del not exist node test ok:expect 0 1 2 1 5----\n") 44 | 45 | for _, node := range nodes { 46 | ring.DelNode(node) 47 | } 48 | ring.DelNode(node5) 49 | delAll = true 50 | testHash(t) 51 | t.Log("----del all node test ok:expect 0 0 0 0 0----\n") 52 | } 53 | 54 | func testHash(t *testing.T) { 55 | m := make(map[string]int) 56 | for i := 0; i < 1e6; i++ { 57 | s := "test value" + strconv.FormatUint(uint64(i), 10) 58 | bs := []byte(s) 59 | n, ok := ring.GetNode(bs) 60 | if !ok { 61 | if !delAll { 62 | t.Error("unexpected not ok???") 63 | } 64 | } 65 | m[n]++ 66 | if !bytes.Equal([]byte(s), bs) { 67 | t.Error("hash change the bytes") 68 | } 69 | } 70 | for _, node := range nodes { 71 | t.Log(node, m[node]) 72 | } 73 | t.Log(node5, m[node5]) 74 | } 75 | 76 | func BenchmarkHash(b *testing.B) { 77 | ring.Init(nodes, sis) 78 | for i := 0; i < b.N; i++ { 79 | s := "test value" + strconv.FormatUint(uint64(i), 10) 80 | ring.GetNode([]byte(s)) 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /pkg/hashkit/methods.go: -------------------------------------------------------------------------------- 1 | package hashkit 2 | 3 | import ( 4 | "crypto/md5" 5 | ) 6 | 7 | func hashOneOnTime(key []byte) uint { 8 | var value uint32 9 | for _, c := range key { 10 | val := uint32(c) 11 | value += val 12 | value += value << 10 13 | value ^= value >> 6 14 | } 15 | 16 | value += value << 3 17 | value ^= value >> 11 18 | value += value << 15 19 | return uint(value) 20 | } 21 | 22 | func hashMD5(key []byte) uint { 23 | m := md5.New() 24 | m.Write(key) 25 | results := m.Sum(nil) 26 | return uint( 27 | (uint32(results[3]&0xFF) << 24) | 28 | (uint32(results[2]&0xFF) << 16) | 29 | (uint32(results[1]&0xFF) << 8) | 30 | (uint32(results[0]) & 0xFF)) 31 | } 32 | -------------------------------------------------------------------------------- /pkg/hashkit/murmur.go: -------------------------------------------------------------------------------- 1 | package hashkit 2 | 3 | import "github.com/aviddiviner/go-murmur" 4 | 5 | func hashMurmur(key []byte) uint { 6 | var uklen = uint32(len(key)) 7 | var seed = 0xdeadbeef * uklen 8 | return uint(murmur.MurmurHash2(key, seed)) 9 | } 10 | -------------------------------------------------------------------------------- /pkg/log/file.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016 CodisLabs. All Rights Reserved. 2 | // Licensed under the MIT (MIT-LICENSE.txt) license. 3 | 4 | package log 5 | 6 | import ( 7 | "fmt" 8 | stdlog "log" 9 | "os" 10 | "path/filepath" 11 | "time" 12 | ) 13 | 14 | const ( 15 | dailyRolling = "2006-01-02" 16 | ) 17 | 18 | type fileHandler struct { 19 | l *stdlog.Logger 20 | 21 | f *os.File 22 | basePath string 23 | filePath string 24 | fileFrag string 25 | } 26 | 27 | // NewFileHandler new file handler. 28 | func NewFileHandler(basePath string) Handler { 29 | if _, file := filepath.Split(basePath); file == "" { 30 | panic("invalid base path") 31 | } 32 | l := stdlog.New(nil, "", stdlog.LstdFlags|stdlog.Lshortfile) 33 | f := &fileHandler{l: l, basePath: basePath} 34 | if err := f.roll(); err != nil { 35 | panic(err) 36 | } 37 | return f 38 | } 39 | 40 | func (r *fileHandler) Log(lv Level, msg string) { 41 | _ = r.roll() 42 | _ = r.l.Output(5, fmt.Sprintf("[%s] %s", lv, msg)) 43 | } 44 | 45 | func (r *fileHandler) Close() error { 46 | if r.f != nil { 47 | return r.f.Close() 48 | } 49 | return nil 50 | } 51 | 52 | func (r *fileHandler) roll() error { 53 | suffix := time.Now().Format(dailyRolling) 54 | if r.f != nil { 55 | if suffix == r.fileFrag { 56 | return nil 57 | } 58 | r.f.Close() 59 | r.f = nil 60 | } 61 | r.fileFrag = suffix 62 | r.filePath = fmt.Sprintf("%s.%s", r.basePath, r.fileFrag) 63 | 64 | if dir, _ := filepath.Split(r.basePath); dir != "" && dir != "." { 65 | if err := os.MkdirAll(dir, 0777); err != nil { 66 | return err 67 | } 68 | } 69 | f, err := os.OpenFile(r.filePath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) 70 | if err != nil { 71 | return err 72 | } 73 | r.f = f 74 | r.l.SetOutput(f) 75 | return nil 76 | } 77 | -------------------------------------------------------------------------------- /pkg/log/handler.go: -------------------------------------------------------------------------------- 1 | package log 2 | 3 | import ( 4 | "github.com/pkg/errors" 5 | ) 6 | 7 | // Handler is used to handle log events, outputting them to 8 | // stdio or sending them to remote services. See the "handlers" 9 | // directory for implementations. 10 | // 11 | // It is left up to Handlers to implement thread-safety. 12 | type Handler interface { 13 | Log(lv Level, msg string) 14 | Close() error 15 | } 16 | 17 | // Handlers . 18 | type Handlers []Handler 19 | 20 | // Log handlers logging. 21 | func (hs Handlers) Log(lv Level, msg string) { 22 | for _, h := range hs { 23 | h.Log(lv, msg) 24 | } 25 | } 26 | 27 | // Close close resource. 28 | func (hs Handlers) Close() (err error) { 29 | for _, h := range hs { 30 | if e := h.Close(); e != nil { 31 | err = errors.WithStack(e) 32 | } 33 | } 34 | return 35 | } 36 | -------------------------------------------------------------------------------- /pkg/log/log_test.go: -------------------------------------------------------------------------------- 1 | package log_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "overlord/pkg/log" 7 | 8 | "github.com/pkg/errors" 9 | ) 10 | 11 | func TestLog(t *testing.T) { 12 | // std := log.NewStdHandler() 13 | // f := log.NewFileHandler("/tmp/overlord.log") 14 | 15 | log.Init(&log.Config{ 16 | Stdout: true, 17 | Debug: true, 18 | Log: "/tmp/overlord.log", 19 | LogVL: 10, 20 | }) 21 | 22 | log.Info("test1") 23 | log.Info("test1", "test2") 24 | log.Info("test1", "test2", "test3") 25 | 26 | log.Warn("test1") 27 | log.Warn("test1", "test2") 28 | log.Warn("test1", "test2", "test3") 29 | 30 | log.Error("test1") 31 | log.Error("test1", "test2") 32 | log.Error("test1", "test2", "test3") 33 | 34 | log.Infof("1(%s)", "test1") 35 | log.Infof("1(%s) 2(%s)", "test1", "test2") 36 | log.Infof("1(%s) 2(%s) 3(%s)", "test1", "test2", "test3") 37 | 38 | log.Warnf("1(%s)", "test1") 39 | log.Warnf("1(%s) 2(%s)", "test1", "test2") 40 | log.Warnf("1(%s) 2(%s) 3(%s)", "test1", "test2", "test3") 41 | 42 | log.Errorf("1(%s)", "test1") 43 | log.Errorf("1(%s) 2(%s)", "test1", "test2") 44 | log.Errorf("1(%s) 2(%s) 3(%s)", "test1", "test2", "test3") 45 | 46 | log.DefaultVerboseLevel = 3 47 | if log.V(5) { 48 | log.Info("this cannot be print") 49 | log.Infof("this cannot be print:%s", "yeah") 50 | log.Warn("this cannot be print") 51 | log.Warnf("this cannot be print:%s", "yeah") 52 | log.Error("this cannot be print") 53 | log.Errorf("this cannot be print:%s", "yeah") 54 | } 55 | if log.V(2) { 56 | log.Info("this will be printing1") 57 | log.Infof("this will be printing1:%s", "yeah") 58 | log.Warn("this will be printing1") 59 | log.Warnf("this will be printing1:%s", "yeah") 60 | log.Error("this will be printing1") 61 | log.Errorf("this will be printing1:%s", "yeah") 62 | } 63 | log.V(3).Info("this will be printing2") 64 | log.V(3).Infof("this will be printing2:%s", "yeah") 65 | log.V(3).Warn("this will be printing2") 66 | log.V(3).Warnf("this will be printing2:%s", "yeah") 67 | log.V(3).Error("this will be printing2") 68 | log.V(3).Errorf("this will be printing2:%s", "yeah") 69 | 70 | log.Errorf("stack:%+v", errors.New("this is a error")) 71 | 72 | if err := log.Close(); err != nil { 73 | t.Error(err) 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /pkg/log/stdout.go: -------------------------------------------------------------------------------- 1 | package log 2 | 3 | import ( 4 | "fmt" 5 | stdlog "log" 6 | "os" 7 | ) 8 | 9 | // stdoutHandler stdout log handler 10 | type stdoutHandler struct { 11 | out *stdlog.Logger 12 | } 13 | 14 | // NewStdHandler create a stdout log handler 15 | func NewStdHandler() Handler { 16 | return &stdoutHandler{out: stdlog.New(os.Stdout, "", stdlog.LstdFlags|stdlog.Lshortfile)} 17 | } 18 | 19 | // Log stdout loging 20 | func (h *stdoutHandler) Log(lv Level, msg string) { 21 | _ = h.out.Output(5, fmt.Sprintf("[%s] %s", lv, msg)) 22 | } 23 | 24 | // Close stdout loging 25 | func (h *stdoutHandler) Close() (err error) { 26 | return 27 | } 28 | -------------------------------------------------------------------------------- /pkg/log/verbose.go: -------------------------------------------------------------------------------- 1 | package log 2 | 3 | // Verbose . 4 | type Verbose bool 5 | 6 | // DefaultVerboseLevel default Verbose level. 7 | var DefaultVerboseLevel = 0 8 | 9 | // V enable verbose log. 10 | // v must be more than 0. 11 | func V(v int) Verbose { 12 | return Verbose(v <= DefaultVerboseLevel) 13 | } 14 | 15 | // Infof logs a message at the info log level. 16 | func (v Verbose) Infof(format string, args ...interface{}) { 17 | if v { 18 | logf(_infoLevel, format, args...) 19 | } 20 | } 21 | 22 | // Warnf logs a message at the warning log level. 23 | func (v Verbose) Warnf(format string, args ...interface{}) { 24 | if v { 25 | logf(_warnLevel, format, args...) 26 | } 27 | } 28 | 29 | // Errorf logs a message at the error log level. 30 | func (v Verbose) Errorf(format string, args ...interface{}) { 31 | if v { 32 | logf(_errorLevel, format, args...) 33 | } 34 | } 35 | 36 | // Info logs a message at the info log level. 37 | func (v Verbose) Info(args ...interface{}) { 38 | if v { 39 | logs(_infoLevel, args...) 40 | } 41 | } 42 | 43 | // Warn logs a message at the warning log level. 44 | func (v Verbose) Warn(args ...interface{}) { 45 | if v { 46 | logs(_warnLevel, args...) 47 | } 48 | } 49 | 50 | // Error logs a message at the error log level. 51 | func (v Verbose) Error(args ...interface{}) { 52 | if v { 53 | logs(_errorLevel, args...) 54 | } 55 | } 56 | 57 | // Close close resource. 58 | func (v Verbose) Close() error { 59 | return h.Close() 60 | } 61 | -------------------------------------------------------------------------------- /pkg/memcache/conn.go: -------------------------------------------------------------------------------- 1 | package memcache 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "time" 7 | 8 | "overlord/pkg/bufio" 9 | "overlord/pkg/net" 10 | 11 | "github.com/pkg/errors" 12 | ) 13 | 14 | // Conn mc conn 15 | type Conn struct { 16 | conn *net.Conn 17 | addr string 18 | dt time.Duration 19 | wt time.Duration 20 | rt time.Duration 21 | bw *bufio.Writer 22 | br *bufio.Reader 23 | } 24 | 25 | var ( 26 | pingBytes = []byte("set _ping 0 0 4\r\npong\r\n") 27 | pongBytes = []byte("STORED\r\n") 28 | errping = fmt.Errorf("get pong err") 29 | ) 30 | 31 | // New mc conn. 32 | func New(addr string, dialTimeout, writeTimeout, readTimeout time.Duration) *Conn { 33 | c := &Conn{ 34 | addr: addr, 35 | dt: dialTimeout, 36 | wt: writeTimeout, 37 | rt: readTimeout, 38 | conn: net.DialWithTimeout(addr, dialTimeout, readTimeout, writeTimeout), 39 | } 40 | c.br = bufio.NewReader(c.conn, bufio.NewBuffer(1024)) 41 | c.bw = bufio.NewWriter(c.conn) 42 | return c 43 | } 44 | 45 | // Ping mc server. 46 | func (c *Conn) Ping() (err error) { 47 | defer func() { 48 | // reconnect if err happend. 49 | if err != nil { 50 | c.conn.Close() 51 | c.conn = net.DialWithTimeout(c.addr, c.dt, c.rt, c.wt) 52 | c.br = bufio.NewReader(c.conn, bufio.NewBuffer(1024)) 53 | c.bw = bufio.NewWriter(c.conn) 54 | } 55 | }() 56 | if err = c.bw.Write(pingBytes); err != nil { 57 | err = errors.WithStack(err) 58 | return 59 | } 60 | if err = c.bw.Flush(); err != nil { 61 | err = errors.WithStack(err) 62 | return 63 | } 64 | _ = c.br.Read() 65 | var b []byte 66 | if b, err = c.br.ReadLine(); err != nil { 67 | err = errors.WithStack(err) 68 | return 69 | } 70 | if !bytes.Equal(b, pongBytes) { 71 | err = errors.WithStack(errping) 72 | } 73 | return 74 | } 75 | 76 | // Close conn. 77 | func (c *Conn) Close() error { 78 | return c.conn.Close() 79 | } 80 | -------------------------------------------------------------------------------- /pkg/mockconn/conn.go: -------------------------------------------------------------------------------- 1 | package mockconn 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "net" 7 | "sync/atomic" 8 | "time" 9 | ) 10 | 11 | const ( 12 | stateClosed = 1 13 | stateOpening = 0 14 | ) 15 | 16 | type mockAddr string 17 | 18 | func (m mockAddr) Network() string { 19 | return "tcp" 20 | } 21 | func (m mockAddr) String() string { 22 | return string(m) 23 | } 24 | 25 | // MockConn mock tcp conn. 26 | type MockConn struct { 27 | addr mockAddr 28 | rbuf *bytes.Buffer 29 | Wbuf *bytes.Buffer 30 | data []byte 31 | repeat int 32 | Err error 33 | closed int32 34 | } 35 | 36 | func (m *MockConn) Read(b []byte) (n int, err error) { 37 | if atomic.LoadInt32(&m.closed) == stateClosed { 38 | return 0, io.EOF 39 | } 40 | if m.Err != nil { 41 | err = m.Err 42 | return 43 | } 44 | if m.repeat > 0 { 45 | m.rbuf.Write(m.data) 46 | m.repeat-- 47 | } 48 | return m.rbuf.Read(b) 49 | } 50 | 51 | func (m *MockConn) Write(b []byte) (n int, err error) { 52 | if atomic.LoadInt32(&m.closed) == stateClosed { 53 | return 0, io.EOF 54 | } 55 | 56 | if m.Err != nil { 57 | err = m.Err 58 | return 59 | } 60 | return m.Wbuf.Write(b) 61 | } 62 | 63 | // writeBuffers impl the net.buffersWriter to support writev 64 | func (m *MockConn) writeBuffers(buf *net.Buffers) (int64, error) { 65 | if m.Err != nil { 66 | return 0, m.Err 67 | } 68 | return buf.WriteTo(m.Wbuf) 69 | } 70 | 71 | func (m *MockConn) Close() error { 72 | atomic.StoreInt32(&m.closed, stateClosed) 73 | return nil 74 | } 75 | 76 | func (m *MockConn) LocalAddr() net.Addr { return m.addr } 77 | func (m *MockConn) RemoteAddr() net.Addr { return m.addr } 78 | 79 | func (m *MockConn) SetDeadline(t time.Time) error { return nil } 80 | func (m *MockConn) SetReadDeadline(t time.Time) error { return nil } 81 | func (m *MockConn) SetWriteDeadline(t time.Time) error { return nil } 82 | 83 | // CreateConn with mock data repeate for r times. 84 | func CreateConn(data []byte, r int) net.Conn { 85 | mconn := &MockConn{ 86 | addr: "127.0.0.1:12345", 87 | rbuf: bytes.NewBuffer(nil), 88 | Wbuf: new(bytes.Buffer), 89 | data: data, 90 | repeat: r, 91 | } 92 | return mconn 93 | } 94 | 95 | // CreateDownStreamConn for mock conn write. 96 | func CreateDownStreamConn() (net.Conn, *bytes.Buffer) { 97 | buf := new(bytes.Buffer) 98 | mconn := &MockConn{ 99 | addr: "127.0.0.1:12345", 100 | Wbuf: buf, 101 | } 102 | return mconn, buf 103 | } 104 | -------------------------------------------------------------------------------- /pkg/net/conn.go: -------------------------------------------------------------------------------- 1 | package net 2 | 3 | import ( 4 | "errors" 5 | "net" 6 | "time" 7 | ) 8 | 9 | var ( 10 | // ErrConnClosed error connection closed. 11 | ErrConnClosed = errors.New("connection is closed") 12 | ) 13 | 14 | // Conn is a net.Conn self implement 15 | // Add auto timeout setting. 16 | type Conn struct { 17 | addr string 18 | net.Conn 19 | 20 | dialTimeout time.Duration 21 | readTimeout time.Duration 22 | writeTimeout time.Duration 23 | 24 | closed bool 25 | } 26 | 27 | // DialWithTimeout will create new auto timeout Conn 28 | func DialWithTimeout(addr string, dialTimeout, readTimeout, writeTimeout time.Duration) (c *Conn) { 29 | sock, _ := net.DialTimeout("tcp", addr, dialTimeout) 30 | c = &Conn{addr: addr, Conn: sock, dialTimeout: dialTimeout, readTimeout: readTimeout, writeTimeout: writeTimeout} 31 | return 32 | } 33 | 34 | // NewConn will create new Connection with given socket 35 | func NewConn(sock net.Conn, readTimeout, writeTimeout time.Duration) (c *Conn) { 36 | c = &Conn{Conn: sock, readTimeout: readTimeout, writeTimeout: writeTimeout} 37 | return 38 | } 39 | 40 | // Dup will re-dial to the given addr by using timeouts stored in itself. 41 | func (c *Conn) Dup() *Conn { 42 | return DialWithTimeout(c.addr, c.dialTimeout, c.readTimeout, c.writeTimeout) 43 | } 44 | 45 | func (c *Conn) Read(b []byte) (n int, err error) { 46 | if c.closed || c.Conn == nil { 47 | return 0, ErrConnClosed 48 | } 49 | if timeout := c.readTimeout; timeout != 0 { 50 | if err = c.SetReadDeadline(time.Now().Add(timeout)); err != nil { 51 | return 52 | } 53 | } 54 | n, err = c.Conn.Read(b) 55 | return 56 | } 57 | 58 | func (c *Conn) Write(b []byte) (n int, err error) { 59 | if c.closed || c.Conn == nil { 60 | return 0, ErrConnClosed 61 | } 62 | if timeout := c.writeTimeout; timeout != 0 { 63 | if err = c.SetWriteDeadline(time.Now().Add(timeout)); err != nil { 64 | return 65 | } 66 | } 67 | n, err = c.Conn.Write(b) 68 | return 69 | } 70 | 71 | // Close close conn. 72 | func (c *Conn) Close() error { 73 | if c.Conn != nil && !c.closed { 74 | c.closed = true 75 | return c.Conn.Close() 76 | } 77 | return nil 78 | } 79 | 80 | // Writev impl the net.buffersWriter to support writev 81 | func (c *Conn) Writev(buf *net.Buffers) (int64, error) { 82 | if c.closed || c.Conn == nil { 83 | return 0, ErrConnClosed 84 | } 85 | n, err := buf.WriteTo(c.Conn) 86 | return n, err 87 | } 88 | -------------------------------------------------------------------------------- /pkg/proc/proc.go: -------------------------------------------------------------------------------- 1 | package proc 2 | 3 | import ( 4 | "context" 5 | "os/exec" 6 | "overlord/pkg/log" 7 | ) 8 | 9 | // Proc define process with cancel. 10 | type Proc struct { 11 | ctx context.Context 12 | cancel context.CancelFunc 13 | cmd *exec.Cmd 14 | } 15 | 16 | // NewProc new and return proc with cancel. 17 | func NewProc(name string, arg ...string) *Proc { 18 | ctx, cancel := context.WithCancel(context.Background()) 19 | cmd := exec.CommandContext(ctx, name, arg...) 20 | return &Proc{ 21 | ctx: ctx, 22 | cancel: cancel, 23 | cmd: cmd, 24 | } 25 | } 26 | 27 | // Start start proc. 28 | func (p *Proc) Start() (err error) { 29 | log.Infof("start service %s %v", p.cmd.Path, p.cmd.Args) 30 | return p.cmd.Start() 31 | } 32 | 33 | // Stop stop process by useing cancel.Stop 34 | func (p *Proc) Stop() { 35 | p.cancel() 36 | } 37 | 38 | // Wait wait proc to exit. 39 | func (p *Proc) Wait() error { 40 | return p.cmd.Wait() 41 | } 42 | -------------------------------------------------------------------------------- /pkg/systemd/systemd.go: -------------------------------------------------------------------------------- 1 | package systemd 2 | 3 | import ( 4 | "errors" 5 | "os/exec" 6 | ) 7 | 8 | // defind errors 9 | var ( 10 | ErrNotSupportAction = errors.New("aciton not support") 11 | ) 12 | 13 | // ActionType is the type which was used for systemd safe check 14 | type ActionType string 15 | 16 | // define actions 17 | const ( 18 | ActionStart = "start" 19 | ActionRestart = "restart" 20 | ActionStop = "stop" 21 | ActionDaemonReload = "daemon-reload" 22 | ) 23 | 24 | func checkSafe(action ActionType) bool { 25 | switch action { 26 | case ActionDaemonReload, ActionStop, ActionRestart, ActionStart: 27 | return true 28 | default: 29 | return false 30 | } 31 | } 32 | 33 | // Run limited allowed command 34 | func Run(serviceName string, action ActionType) error { 35 | if !checkSafe(action) { 36 | return ErrNotSupportAction 37 | } 38 | cmd := exec.Command("systemctl", string(action), serviceName) 39 | return cmd.Run() 40 | } 41 | 42 | // Start service 43 | func Start(serviceName string) error { 44 | return Run(serviceName, ActionStart) 45 | } 46 | 47 | // Stop service 48 | func Stop(serviceName string) error { 49 | return Run(serviceName, ActionStop) 50 | } 51 | 52 | // Restart service 53 | func Restart(serviceName string) error { 54 | return Run(serviceName, ActionRestart) 55 | } 56 | 57 | // DaemonReload systemd 58 | func DaemonReload() error { 59 | cmd := exec.Command("systemctl", string(ActionDaemonReload)) 60 | return cmd.Run() 61 | } 62 | -------------------------------------------------------------------------------- /pkg/types/types.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | import "errors" 4 | 5 | // CacheType memcache or redis 6 | type CacheType string 7 | 8 | // errors 9 | var ( 10 | ErrNoSupportCacheType = errors.New("unsupported cache type") 11 | ) 12 | 13 | // Cache type: memcache or redis. 14 | const ( 15 | CacheTypeUnknown CacheType = "unknown" 16 | CacheTypeMemcache CacheType = "memcache" 17 | CacheTypeMemcacheBinary CacheType = "memcache_binary" 18 | CacheTypeRedis CacheType = "redis" 19 | CacheTypeRedisCluster CacheType = "redis_cluster" 20 | ) 21 | -------------------------------------------------------------------------------- /platform/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Overlord-platform 2 | 3 | ## Version 2.0.0 4 | 1. add apiserver: http front end 5 | 2. add cmd/executor: mesos executor 6 | 3. add cmd/scheduler: mesos framework 7 | 4. add mesos support: mesos library 8 | 5. add job library: job is a work progress abstraction(support balance/create now) 9 | 6. add lib/myredis: myredis is a simple redis client. 10 | 7. add lib/etcd: etcd is the etcd helper based on etcd api/v1 11 | 8. add front-end: front-end design and completed 12 | -------------------------------------------------------------------------------- /platform/api/dao/dao.go: -------------------------------------------------------------------------------- 1 | package dao 2 | 3 | import ( 4 | "overlord/pkg/etcd" 5 | "overlord/platform/api/model" 6 | ) 7 | 8 | // New create new dao layer 9 | func New(cfg *model.ServerConfig) *Dao { 10 | e, err := etcd.New(cfg.Etcd) 11 | if err != nil { 12 | panic(err) 13 | } 14 | 15 | d := &Dao{e: e, m: cfg.Monitor, c: cfg.Cluster, vs: cfg.Versions} 16 | return d 17 | } 18 | 19 | // Dao is the dao level abstraction 20 | type Dao struct { 21 | e *etcd.Etcd 22 | m *model.MonitorConfig 23 | c *model.DefaultClusterConfig 24 | vs []*model.VersionConfig 25 | } 26 | 27 | func (d *Dao) ETCD() *etcd.Etcd { 28 | return d.e 29 | } 30 | -------------------------------------------------------------------------------- /platform/api/dao/errors.go: -------------------------------------------------------------------------------- 1 | package dao 2 | 3 | import "errors" 4 | 5 | // define errors 6 | var ( 7 | ErrMasterNumMustBeEven = errors.New("master number must be even") 8 | ErrCacheTypeNotSupport = errors.New("cache type only support memcache|redis|redis_cluster") 9 | ) 10 | -------------------------------------------------------------------------------- /platform/api/dao/instance.go: -------------------------------------------------------------------------------- 1 | package dao 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "overlord/pkg/etcd" 8 | "overlord/pkg/types" 9 | "overlord/platform/api/model" 10 | "overlord/platform/job" 11 | ) 12 | 13 | // SetInstanceWeight will change the given instance weight 14 | func (d *Dao) SetInstanceWeight(ctx context.Context, addr string, weight int) error { 15 | sub, cancel := context.WithCancel(ctx) 16 | defer cancel() 17 | return d.e.Set(sub, fmt.Sprintf("%s/%s/weight", etcd.InstanceDirPrefix, addr), fmt.Sprint(weight)) 18 | } 19 | 20 | // RestartInstance will try to save new task into job stats 21 | func (d *Dao) RestartInstance(ctx context.Context, cname, addr string) (string, error) { 22 | sub, cancel := context.WithCancel(ctx) 23 | defer cancel() 24 | cluster, err := d.GetCluster(sub, cname) 25 | if err != nil { 26 | return "", err 27 | } 28 | contains := false 29 | for _, inst := range cluster.Instances { 30 | if fmt.Sprintf("%s:%d", inst.IP, inst.Port) == addr { 31 | contains = true 32 | break 33 | } 34 | } 35 | 36 | if !contains { 37 | return "", fmt.Errorf("cluster %s doesn't contains node %s", cname, addr) 38 | } 39 | j := d.createResartInstance(cluster, addr) 40 | return d.saveJob(sub, j) 41 | } 42 | 43 | func (d *Dao) createResartInstance(c *model.Cluster, addr string) *job.Job { 44 | j := &job.Job{ 45 | Cluster: c.Name, 46 | Nodes: []string{addr}, 47 | OpType: job.OpRestart, 48 | Group: c.Group, 49 | CacheType: types.CacheType(c.CacheType), 50 | } 51 | return j 52 | } 53 | -------------------------------------------------------------------------------- /platform/api/dao/spec.go: -------------------------------------------------------------------------------- 1 | package dao 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "overlord/pkg/etcd" 8 | ) 9 | 10 | // GetAllSpecs will query and load all specs 11 | func (d *Dao) GetAllSpecs(ctx context.Context) ([]string, error) { 12 | return d.e.GetAllSpecs(ctx) 13 | } 14 | 15 | // RemoveSpec remove the given specification. 16 | func (d *Dao) RemoveSpec(ctx context.Context, spec string) error { 17 | return d.e.Delete(ctx, fmt.Sprintf("%s/%s", etcd.SpecsDir, spec)) 18 | } 19 | -------------------------------------------------------------------------------- /platform/api/model/config.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import ( 4 | "fmt" 5 | "overlord/pkg/log" 6 | ) 7 | 8 | // ServerConfig is apiserver's config 9 | type ServerConfig struct { 10 | Listen string `toml:"listen"` 11 | Etcd string `toml:"etcd"` 12 | Versions []*VersionConfig `toml:"versions"` 13 | Groups map[string]string `toml:"groups"` 14 | Monitor *MonitorConfig `toml:"monitor"` 15 | Cluster *DefaultClusterConfig `toml:"cluster"` 16 | *log.Config 17 | } 18 | 19 | // DefaultClusterConfig is the config used to write into cluster 20 | type DefaultClusterConfig struct { 21 | DialTimeout int `toml:"dial_timeout"` 22 | ReadTimeout int `toml:"read_timeout"` 23 | WriteTimeout int `toml:"write_timeout"` 24 | NodeConns int `toml:"node_connections"` 25 | PingFailLimit int `toml:"ping_fail_limit"` 26 | PingAutoEject bool `toml:"ping_auto_eject"` 27 | } 28 | 29 | // MonitorConfig types 30 | type MonitorConfig struct { 31 | URL string `toml:"url"` 32 | Panel string `toml:"panel"` 33 | NameVar string `toml:"name_var"` 34 | // in url is `orgId` 35 | OrgID int `toml:"org_id"` 36 | } 37 | 38 | // Href get monitory href 39 | func (mc *MonitorConfig) Href(cname string) string { 40 | return fmt.Sprintf("%s/%s?orgId=%d&var-%s=%s", mc.URL, mc.Panel, mc.OrgID, mc.NameVar, cname) 41 | } 42 | 43 | // VersionConfig is the config for used version 44 | type VersionConfig struct { 45 | CacheType string `toml:"cache_type"` 46 | Versions []string `toml:"versions"` 47 | Image string `toml:"image"` 48 | } 49 | -------------------------------------------------------------------------------- /platform/api/model/error.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import "errors" 4 | 5 | // define global errors 6 | var ( 7 | ErrConflict = errors.New("conflict") 8 | ErrNotFound = errors.New("not found") 9 | ) 10 | -------------------------------------------------------------------------------- /platform/api/server/appid.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "net/http" 5 | 6 | "overlord/platform/api/model" 7 | 8 | "github.com/gin-gonic/gin" 9 | ) 10 | 11 | func createAppid(c *gin.Context) { 12 | p := new(model.ParamAppid) 13 | if err := c.ShouldBind(p); err != nil { 14 | c.JSON(http.StatusBadRequest, err) 15 | return 16 | } 17 | 18 | if err := p.Validate(); err != nil { 19 | eJSON(c, err) 20 | return 21 | } 22 | 23 | err := svc.CreateAppid(p.Appid) 24 | if err != nil { 25 | eJSON(c, err) 26 | return 27 | } 28 | 29 | done(c) 30 | } 31 | 32 | func getAppids(c *gin.Context) { 33 | format := c.DefaultQuery("format", "plain") 34 | if format == "tree" { 35 | appids, err := svc.GetTreeAppid() 36 | if err != nil { 37 | eJSON(c, err) 38 | return 39 | } 40 | listJSON(c, appids, len(appids)) 41 | return 42 | } else if format == "plain" { 43 | appids, err := svc.GetPlainAppid() 44 | if err != nil { 45 | eJSON(c, err) 46 | return 47 | } 48 | listJSON(c, appids, len(appids)) 49 | return 50 | } 51 | 52 | c.JSON(http.StatusBadRequest, map[string]string{"error": "output format must be one of plain|tree"}) 53 | } 54 | 55 | func getAppid(c *gin.Context) { 56 | appid := c.Param("appid") 57 | ga, err := svc.GetGroupedAppid(appid) 58 | if err != nil { 59 | eJSON(c, err) 60 | return 61 | } 62 | c.JSON(http.StatusOK, ga) 63 | } 64 | 65 | func removeAppid(c *gin.Context) { 66 | appid := c.Param("appid") 67 | err := svc.RemoveAppid(appid) 68 | if err != nil { 69 | eJSON(c, err) 70 | return 71 | } 72 | done(c) 73 | } 74 | -------------------------------------------------------------------------------- /platform/api/server/approve.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "net/http" 5 | 6 | "github.com/gin-gonic/gin" 7 | ) 8 | 9 | func approveJob(c *gin.Context) { 10 | id := c.PostForm("job_id") 11 | if id == "" { 12 | c.JSON(http.StatusBadRequest, "job_id not exists") 13 | return 14 | } 15 | 16 | err := svc.ApproveJob(id) 17 | if err != nil { 18 | eJSON(c, err) 19 | return 20 | } 21 | 22 | c.JSON(http.StatusOK, map[string]string{"job": "approved"}) 23 | } 24 | -------------------------------------------------------------------------------- /platform/api/server/command.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | "strings" 7 | 8 | "github.com/gin-gonic/gin" 9 | ) 10 | 11 | // POST /commands/:ip/:port 12 | func executeCommand(c *gin.Context) { 13 | ip := c.Param("ip") 14 | port := c.Param("port") 15 | 16 | cmd := c.PostForm("command") 17 | args := strings.Split(cmd, " ") 18 | rcmd, err := svc.Execute(fmt.Sprintf("%s:%s", ip, port), args[0], args[1:]...) 19 | if err != nil { 20 | eJSON(c, err) 21 | return 22 | } 23 | 24 | c.JSON(http.StatusOK, rcmd) 25 | } 26 | -------------------------------------------------------------------------------- /platform/api/server/group.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import "github.com/gin-gonic/gin" 4 | 5 | // e.GET("/groups", getAllGroups) 6 | func getAllGroups(c *gin.Context) { 7 | groups := svc.GetAllGroups() 8 | 9 | listJSON(c, groups, len(groups)) 10 | } 11 | -------------------------------------------------------------------------------- /platform/api/server/instance.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "net/http" 5 | "overlord/platform/api/model" 6 | "overlord/platform/job" 7 | 8 | "github.com/gin-gonic/gin" 9 | ) 10 | 11 | // GET /clusters/:cluster_id/instances 12 | func getInstances(c *gin.Context) { 13 | } 14 | 15 | func changeInstanceWeight(c *gin.Context) { 16 | p := model.ParamScaleWeight{} 17 | if err := c.ShouldBind(&p); err != nil { 18 | eJSON(c, err) 19 | return 20 | } 21 | instance := c.Param("instance_addr") 22 | 23 | err := svc.SetInstanceWeight(instance, p.Weight) 24 | if err != nil { 25 | eJSON(c, err) 26 | return 27 | } 28 | done(c) 29 | } 30 | 31 | func restartInstance(c *gin.Context) { 32 | cname := c.Param("cluster_name") 33 | addr := c.Param("instance_addr") 34 | jobid, err := svc.RestartInstance(cname, addr) 35 | if err != nil { 36 | eJSON(c, err) 37 | return 38 | } 39 | c.JSON(http.StatusOK, &model.Job{ID: jobid, State: job.StatePending}) 40 | } 41 | -------------------------------------------------------------------------------- /platform/api/server/job.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "net/http" 5 | 6 | "strings" 7 | 8 | "github.com/gin-gonic/gin" 9 | "go.etcd.io/etcd/client" 10 | ) 11 | 12 | // getJob get the job by given number 13 | func getJob(c *gin.Context) { 14 | jobID := c.Param("job_id") 15 | jobID = strings.Replace(jobID, ".", "/", -1) 16 | t, err := svc.GetJob(jobID) 17 | if client.IsKeyNotFound(err) { 18 | c.JSON(http.StatusNotFound, err) 19 | return 20 | } else if err != nil { 21 | eJSON(c, err) 22 | return 23 | } 24 | 25 | c.JSON(http.StatusOK, t) 26 | } 27 | 28 | func getJobs(c *gin.Context) { 29 | j, err := svc.GetJobs() 30 | if err != nil { 31 | eJSON(c, err) 32 | return 33 | } 34 | 35 | listJSON(c, j, len(j)) 36 | } 37 | -------------------------------------------------------------------------------- /platform/api/server/server.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "overlord/pkg/log" 5 | "overlord/platform/api/model" 6 | "overlord/platform/api/service" 7 | 8 | "github.com/gin-gonic/gin" 9 | ) 10 | 11 | var ( 12 | svc *service.Service 13 | ) 14 | 15 | // Run the whole overlord app 16 | func Run(cfg *model.ServerConfig, s *service.Service) { 17 | svc = s 18 | engine := gin.Default() 19 | initRouter(engine) 20 | if err := engine.Run(cfg.Listen); err != nil { 21 | log.Errorf("engine start fail due to %v", err) 22 | panic(err) 23 | } 24 | } 25 | 26 | func initRouter(ge *gin.Engine) { 27 | e := ge.Group("/api/v1") 28 | 29 | clusters := e.Group("/clusters") 30 | clusters.POST("/", createCluster) 31 | clusters.GET("/", getClusters) 32 | 33 | clusters.DELETE("/:cluster_name", removeCluster) 34 | clusters.GET("/:cluster_name", getCluster) 35 | 36 | clusters.POST("/:cluster_name/instance/:instance_addr/restart", restartInstance) 37 | 38 | clusters.PATCH("/:cluster_name/instances/:instance_addr", changeInstanceWeight) 39 | clusters.PATCH("/:cluster_name/instances", scaleCluster) 40 | // TODO: impl it 41 | clusters.GET("/:cluster_name/instances", getInstances) 42 | 43 | clusters.POST("/:cluster_name/appid", assignAppid) 44 | clusters.DELETE("/:cluster_name/appid", unassignAppid) 45 | 46 | // clusters.POST("/:cluster_name/appids", ) 47 | 48 | cmds := e.Group("/commands") 49 | cmds.POST("/:ip/:port", executeCommand) 50 | 51 | jobs := e.Group("/jobs") 52 | jobs.GET("/", getJobs) 53 | jobs.GET("/:job_id", getJob) 54 | 55 | job := e.Group("/job") 56 | job.POST("/", approveJob) 57 | 58 | specs := e.Group("/specs") 59 | specs.GET("/", getSpecs) 60 | specs.DELETE("/:spec", removeSpecs) 61 | 62 | appids := e.Group("/appids") 63 | appids.POST("/", createAppid) 64 | appids.GET("/", getAppids) 65 | appids.GET("/:appid", getAppid) 66 | appids.DELETE("/:appid", removeAppid) 67 | 68 | e.GET("/versions", getAllVersions) 69 | e.GET("/groups", getAllGroups) 70 | 71 | } 72 | -------------------------------------------------------------------------------- /platform/api/server/spec.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import "github.com/gin-gonic/gin" 4 | 5 | // GET /specs 6 | func getSpecs(c *gin.Context) { 7 | specs, err := svc.GetAllSpecs() 8 | if err != nil { 9 | eJSON(c, err) 10 | return 11 | } 12 | listJSON(c, specs, len(specs)) 13 | } 14 | 15 | // DELETE /specs/:spec 16 | func removeSpecs(c *gin.Context) { 17 | spec := c.Param("spec") 18 | err := svc.RemoveSpec(spec) 19 | if err != nil { 20 | eJSON(c, err) 21 | return 22 | } 23 | done(c) 24 | } 25 | -------------------------------------------------------------------------------- /platform/api/server/tools.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | 7 | "overlord/platform/api/model" 8 | 9 | "github.com/gin-gonic/gin" 10 | "go.etcd.io/etcd/client" 11 | ) 12 | 13 | // eJSON will report error json into body 14 | func eJSON(c *gin.Context, err error) { 15 | merr := map[string]interface{}{"error": fmt.Sprintf("%v", err)} 16 | 17 | if client.IsKeyNotFound(err) { 18 | c.JSON(http.StatusNotFound, err) 19 | return 20 | } 21 | 22 | if err == model.ErrNotFound { 23 | c.JSON(http.StatusNotFound, merr) 24 | return 25 | } 26 | if err == model.ErrConflict { 27 | c.JSON(http.StatusConflict, merr) 28 | return 29 | } 30 | 31 | c.JSON(http.StatusInternalServerError, merr) 32 | } 33 | 34 | type list struct { 35 | Count int `json:"count"` 36 | Items interface{} `json:"items"` 37 | } 38 | 39 | func empty() *list { 40 | return &list{ 41 | Count: 0, 42 | Items: []struct{}{}, 43 | } 44 | } 45 | 46 | func listJSON(c *gin.Context, vals interface{}, count int) { 47 | if count == 0 { 48 | c.JSON(http.StatusOK, empty()) 49 | return 50 | } 51 | 52 | c.JSON(http.StatusOK, &list{ 53 | Count: count, 54 | Items: vals, 55 | }) 56 | } 57 | 58 | func done(c *gin.Context) { 59 | c.JSON(http.StatusOK, struct { 60 | Message string `json:"message"` 61 | }{ 62 | Message: "done", 63 | }) 64 | } 65 | -------------------------------------------------------------------------------- /platform/api/server/version.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import "github.com/gin-gonic/gin" 4 | 5 | // e.GET("/versions", getAllVersion) 6 | func getAllVersions(c *gin.Context) { 7 | versions, err := svc.GetAllVersions() 8 | if err != nil { 9 | eJSON(c, err) 10 | return 11 | } 12 | listJSON(c, versions, len(versions)) 13 | } 14 | -------------------------------------------------------------------------------- /platform/api/service/appid.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "context" 5 | "overlord/platform/api/model" 6 | ) 7 | 8 | // CreateAppid create new appid 9 | func (s *Service) CreateAppid(appid string) error { 10 | return s.d.CreateAppid(context.Background(), appid) 11 | } 12 | 13 | // GetPlainAppid will get appid by plain format 14 | func (s *Service) GetPlainAppid() ([]string, error) { 15 | return s.d.GetPlainAppid(context.Background()) 16 | } 17 | 18 | // GetTreeAppid get the grouped all result 19 | func (s *Service) GetTreeAppid() ([]*model.TreeAppid, error) { 20 | return s.d.GetTreeAppid(context.Background()) 21 | } 22 | 23 | // GetGroupedAppid will query the grouped cluster by appid 24 | func (s *Service) GetGroupedAppid(appid string) (*model.GroupedAppid, error) { 25 | return s.d.GetGroupedAppid(context.Background(), appid) 26 | } 27 | 28 | // RemoveAppid the given appid 29 | func (s *Service) RemoveAppid(appid string) error { 30 | return s.d.RemoveAppid(context.Background(), appid) 31 | } 32 | -------------------------------------------------------------------------------- /platform/api/service/cluster.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "context" 5 | "overlord/platform/api/model" 6 | ) 7 | 8 | // CreateCluster will create new cluster 9 | func (s *Service) CreateCluster(p *model.ParamCluster) (string, error) { 10 | return s.d.CreateCluster(context.Background(), p) 11 | } 12 | 13 | // GetCluster by given cluster name 14 | func (s *Service) GetCluster(cname string) (*model.Cluster, error) { 15 | return s.d.GetCluster(context.Background(), cname) 16 | } 17 | 18 | // GetClusters will get all clusters 19 | func (s *Service) GetClusters(name string) ([]*model.Cluster, error) { 20 | return s.d.GetClusters(context.Background(), name) 21 | } 22 | 23 | // RemoveCluster will remove the cluster if the cluster is not associated with other appids 24 | func (s *Service) RemoveCluster(cname string) (string, error) { 25 | return s.d.RemoveCluster(context.Background(), cname) 26 | } 27 | 28 | // ScaleCluster will scale the given cluster with params. 29 | // for redis cluster, number means scale chunk. 30 | // for redis/memcache, number means scale numbers. 31 | func (s *Service) ScaleCluster(p *model.ParamScale) (jobID string, err error) { 32 | jobID, err = s.d.ScaleCluster(context.Background(), p) 33 | return 34 | } 35 | 36 | // AssignAppid will asign appid and cluster 37 | func (s *Service) AssignAppid(cname, appid string) error { 38 | sub, cancel := context.WithCancel(context.Background()) 39 | defer cancel() 40 | return s.d.AssignAppid(sub, cname, appid) 41 | } 42 | 43 | // UnassignAppid will unasign appid and cluster 44 | func (s *Service) UnassignAppid(cname, appid string) error { 45 | sub, cancel := context.WithCancel(context.Background()) 46 | defer cancel() 47 | return s.d.UnassignAppid(sub, cname, appid) 48 | } 49 | -------------------------------------------------------------------------------- /platform/api/service/command.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import "overlord/pkg/myredis" 4 | 5 | // Execute command into the given addr 6 | func (s *Service) Execute(addr string, cmd string, arg ...string) (*myredis.Command, error) { 7 | return s.client.Execute(addr, myredis.NewCmd(cmd).Arg(arg...)) 8 | } 9 | -------------------------------------------------------------------------------- /platform/api/service/group.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import "overlord/platform/api/model" 4 | 5 | // GetAllGroups will load groups from file path 6 | func (s *Service) GetAllGroups() []*model.Group { 7 | groups := make([]*model.Group, 0, len(s.cfg.Groups)) 8 | for slug, nameCN := range s.cfg.Groups { 9 | groups = append(groups, &model.Group{ 10 | Name: slug, 11 | NameCN: nameCN, 12 | }) 13 | } 14 | return groups 15 | } 16 | -------------------------------------------------------------------------------- /platform/api/service/instance.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "context" 5 | ) 6 | 7 | // SetInstanceWeight will search the given name 8 | func (s *Service) SetInstanceWeight(addr string, weight int) error { 9 | return s.d.SetInstanceWeight(context.Background(), addr, weight) 10 | } 11 | 12 | // RestartInstance will try send restart job to etcd 13 | func (s *Service) RestartInstance(cname string, addr string) (string, error) { 14 | return s.d.RestartInstance(context.Background(), cname, addr) 15 | } 16 | -------------------------------------------------------------------------------- /platform/api/service/service.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "overlord/pkg/myredis" 5 | "overlord/platform/api/dao" 6 | "overlord/platform/api/model" 7 | ) 8 | 9 | // New create new service of overlord 10 | func New(cfg *model.ServerConfig) *Service { 11 | s := &Service{ 12 | cfg: cfg, 13 | client: myredis.New(), 14 | d: dao.New(cfg), 15 | } 16 | go s.jobManager() 17 | return s 18 | } 19 | 20 | // Service is the struct for api server 21 | type Service struct { 22 | d *dao.Dao 23 | client *myredis.Client 24 | cfg *model.ServerConfig 25 | } 26 | -------------------------------------------------------------------------------- /platform/api/service/spec.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import "context" 4 | 5 | // GetAllSpecs will get all specifications. 6 | func (s *Service) GetAllSpecs() ([]string, error) { 7 | return s.d.GetAllSpecs(context.Background()) 8 | } 9 | 10 | // RemoveSpec will remove the given specification 11 | func (s *Service) RemoveSpec(spec string) error { 12 | return s.d.RemoveSpec(context.Background(), spec) 13 | } 14 | -------------------------------------------------------------------------------- /platform/api/service/version.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import "overlord/platform/api/model" 4 | 5 | // GetAllVersions will load version from file path 6 | func (s *Service) GetAllVersions() ([]*model.Version, error) { 7 | versions := make([]*model.Version, len(s.cfg.Versions)) 8 | for i, version := range s.cfg.Versions { 9 | versions[i] = &model.Version{CacheType: version.CacheType, Versions: version.Versions} 10 | } 11 | return versions, nil 12 | } 13 | -------------------------------------------------------------------------------- /platform/chunk/dist_test.go: -------------------------------------------------------------------------------- 1 | package chunk 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestDist(t *testing.T) { 10 | offers := _createOffers(5, 100, 20, 1000, 2000) 11 | dist, err := DistIt(10, 10, 1, offers...) 12 | assert.NoError(t, err) 13 | assert.Len(t, dist.Addrs, 10) 14 | } 15 | 16 | func TestDistAppendItNewOffers(t *testing.T) { 17 | offers := _createOffers(7, 100, 20, 1000, 2000) 18 | dist, err := DistIt(10, 10, 1, offers[:5]...) 19 | assert.NoError(t, err) 20 | assert.Len(t, dist.Addrs, 10) 21 | newDist, err := DistAppendIt(dist, 2, 2, 2, offers...) 22 | assert.NoError(t, err) 23 | assert.Len(t, newDist.Addrs, 2) 24 | t.Log(dist.Addrs) 25 | } 26 | 27 | func TestDistAppendItNotOffers(t *testing.T) { 28 | offers := _createOffers(7, 100, 20, 1000, 2000) 29 | dist, err := DistIt(10, 10, 1, offers...) 30 | assert.NoError(t, err) 31 | assert.Len(t, dist.Addrs, 10) 32 | newDist, err := DistAppendIt(dist, 2, 2, 2, offers...) 33 | assert.NoError(t, err) 34 | assert.Len(t, newDist.Addrs, 2) 35 | t.Log(dist.Addrs) 36 | } 37 | -------------------------------------------------------------------------------- /platform/chunk/node.go: -------------------------------------------------------------------------------- 1 | package chunk 2 | 3 | import ( 4 | "fmt" 5 | "overlord/pkg/log" 6 | "strings" 7 | ) 8 | 9 | // Node is the type for a cache node. 10 | type Node struct { 11 | Name string 12 | Port int 13 | Role string 14 | 15 | // Node Run ID 16 | RunID string 17 | SlaveOf string 18 | Slots []Slot 19 | } 20 | 21 | // 0c415ea9a9244803d33f8ce97cf6f5b30f862904 127.0.0.1:7020@17020 master - 0 1540462008000 11 connected 13654-15018 22 | const nodesConfLine = "%040s %s:%d@%d %s %s 0 0 0 connected %s\n" 23 | 24 | // IntoConfLine will genenrate line for this node in nodes.conf 25 | func (n *Node) IntoConfLine(myself bool) string { 26 | flags := n.Role 27 | if myself { 28 | flags = "myself," + flags 29 | } 30 | slots := make([]string, len(n.Slots)) 31 | for i, s := range n.Slots { 32 | slots[i] = s.String() 33 | } 34 | slaveOf := n.SlaveOf 35 | if n.Role == RoleSlave { 36 | slaveOf = fmt.Sprintf("%040s", n.SlaveOf) 37 | } 38 | 39 | line := fmt.Sprintf(nodesConfLine, 40 | n.RunID, n.Name, n.Port, n.Port+10000, 41 | flags, slaveOf, strings.Join(slots, " ")) 42 | log.Infof("generated nodes.conf line as: %s", line) 43 | return line 44 | } 45 | 46 | func (n *Node) String() string { 47 | return fmt.Sprintf("Node", n.Name, n.Port, n.Role) 48 | } 49 | 50 | // Addr get the node addr contians ip:port 51 | func (n *Node) Addr() string { 52 | return fmt.Sprintf("%s:%d", n.Name, n.Port) 53 | } 54 | 55 | const epochSet = "vars currentEpoch 0 lastVoteEpoch 0" 56 | 57 | // GenNodesConfFile will gennerate nodes.conf file content 58 | func GenNodesConfFile(name string, port int, chunks []*Chunk) string { 59 | var sb strings.Builder 60 | for _, chunk := range chunks { 61 | for _, node := range chunk.Nodes { 62 | myself := node.Name == name && node.Port == port 63 | _, _ = sb.WriteString(node.IntoConfLine(myself)) 64 | } 65 | } 66 | _, _ = sb.WriteString(epochSet) 67 | return sb.String() 68 | } 69 | -------------------------------------------------------------------------------- /platform/chunk/slot.go: -------------------------------------------------------------------------------- 1 | package chunk 2 | 3 | import "fmt" 4 | 5 | // Slot is the type which means slot range .e.g.: [9,10] is means contains 9, 10 slot. 6 | type Slot struct { 7 | Begin int 8 | End int 9 | } 10 | 11 | func (s *Slot) String() string { 12 | if s.Begin == s.End { 13 | return fmt.Sprintf("%d", s.Begin) 14 | } 15 | return fmt.Sprintf("%d-%d", s.Begin, s.End) 16 | } 17 | -------------------------------------------------------------------------------- /platform/chunk/tools.go: -------------------------------------------------------------------------------- 1 | package chunk 2 | 3 | import ( 4 | "net" 5 | 6 | "overlord/pkg/log" 7 | ) 8 | 9 | // ValidateIPAddress check if given hostname is a valid ipaddress 10 | // and try to resolve if not. 11 | // Return original host if resolving failed 12 | func ValidateIPAddress(hostname string) string { 13 | ip := net.ParseIP(hostname) 14 | if ip != nil { 15 | return ip.String() 16 | } 17 | addr, err := net.LookupIP(hostname) 18 | if err != nil { 19 | log.Warnf("error resolving hostname %s: %+v", hostname, err) 20 | return hostname 21 | } 22 | if len(addr) == 0 { 23 | log.Warnf("hostname %s could not be resolved", hostname) 24 | return hostname 25 | } 26 | return addr[0].String() 27 | } 28 | -------------------------------------------------------------------------------- /platform/job/create/job_test.go: -------------------------------------------------------------------------------- 1 | package create 2 | 3 | import ( 4 | "math/rand" 5 | "os" 6 | "overlord/platform/chunk" 7 | "overlord/pkg/etcd" 8 | "overlord/pkg/types" 9 | "testing" 10 | "time" 11 | 12 | "github.com/stretchr/testify/assert" 13 | ) 14 | 15 | func mockDist(num int) (dist *chunk.Dist) { 16 | rand.Seed(time.Now().Unix()) 17 | dist = new(chunk.Dist) 18 | for i := 0; i < num; i++ { 19 | addr := &chunk.Addr{ 20 | IP: "0.0.0.0", 21 | Port: rand.Intn(30000), 22 | } 23 | dist.Addrs = append(dist.Addrs, addr) 24 | } 25 | return 26 | } 27 | func newEtcd(conf string) (e *etcd.Etcd, err error) { 28 | return etcd.New(conf) 29 | } 30 | 31 | func TestCreateJob(t *testing.T) { 32 | os.Setenv("RunMode", "test") 33 | SetWorkDir("/tmp/data/%d") 34 | info := &CacheInfo{ 35 | JobID: "test", 36 | Name: "test", 37 | CacheType: types.CacheTypeMemcache, 38 | MaxMemory: 10, 39 | Number: 4, 40 | Thread: 1, 41 | Version: "1.5.12", 42 | } 43 | info.Dist = mockDist(info.Number) 44 | db, err := newEtcd("http://127.0.0.1:2379") 45 | assert.NoError(t, err) 46 | job := NewCacheJob(db, info) 47 | _ = job.Create() 48 | for _, inst := range info.Dist.Addrs { 49 | dpinfo, err := GenDeployInfo(db, inst.IP, inst.Port) 50 | assert.NoError(t, err) 51 | // assert.Equal(t, info.CacheType, dpinfo.CacheType, "assert cache type") 52 | assert.Equal(t, info.JobID, dpinfo.JobID, "assert job id") 53 | assert.NoError(t, err) 54 | p, err := SetupCacheService(dpinfo) 55 | assert.NoError(t, err, "setup cache service") 56 | p.Stop() 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /platform/job/create/tools.go: -------------------------------------------------------------------------------- 1 | package create 2 | 3 | import ( 4 | "context" 5 | "overlord/pkg/etcd" 6 | "fmt" 7 | ) 8 | 9 | func cleanEtcdDirtyDir(ctx context.Context, e *etcd.Etcd, instance string) error { 10 | return e.RMDir(ctx, fmt.Sprintf("%s/%s", etcd.InstanceDirPrefix, instance)) 11 | } 12 | -------------------------------------------------------------------------------- /platform/job/create/types.go: -------------------------------------------------------------------------------- 1 | package create 2 | 3 | import ( 4 | "overlord/pkg/types" 5 | "overlord/platform/chunk" 6 | ) 7 | 8 | // CacheInfo is the server side create cache info. 9 | type CacheInfo struct { 10 | JobID string 11 | 12 | Name string 13 | 14 | CacheType types.CacheType 15 | 16 | MaxMemory float64 17 | CPU float64 18 | Number int 19 | 20 | // for redis : it will be ignore becasue redis only run 1 cpu at all. 21 | Thread int 22 | 23 | Version string 24 | Image string 25 | 26 | Dist *chunk.Dist 27 | Group string 28 | 29 | Chunks []*chunk.Chunk 30 | IDMap map[string]map[int]string 31 | } 32 | -------------------------------------------------------------------------------- /platform/job/job.go: -------------------------------------------------------------------------------- 1 | // Package job defines the job meta data 2 | package job 3 | 4 | import "overlord/pkg/types" 5 | 6 | // OpType is the operation of job name 7 | type OpType = string 8 | 9 | // define Optration types 10 | const ( 11 | // create means create empty cluster into metadata and scale nodes into given. 12 | OpCreate OpType = "create" 13 | 14 | // OpDestroy means that destroy the whole cluster. 15 | OpDestroy OpType = "destroy" 16 | 17 | // scale may be scale with given node count 18 | OpScale OpType = "scale" 19 | 20 | // OpStretch will scale the instance memory and may migrating slot. 21 | OpStretch OpType = "stretch" 22 | 23 | // OpMigrate means delete specified node and restart in new agent. 24 | OpMigrate OpType = "migrate" 25 | 26 | // OpFix will trying to run `rustkit fix` to the given cluster(redis cluster only) 27 | OpFix OpType = "fix" 28 | 29 | // Balance will balance the given cluster 30 | OpBalance OpType = "balance" 31 | 32 | // OpRestart will trying to restart the special node 33 | OpRestart OpType = "restart" 34 | ) 35 | 36 | // Job is a single POD type which represent a single job. 37 | type Job struct { 38 | // Order was generated by etcd post 39 | ID string 40 | Name string 41 | 42 | CacheType types.CacheType 43 | Version string // service version 44 | Image string // service image 45 | Num int // num of instances ,if redis-cluster,mean master number. 46 | MaxMem float64 // max memory MB use of instance. 47 | CPU float64 // cpu count for each instance. 48 | 49 | // Scheduler is the name of scheduler and path of etcd 50 | Scheduler string 51 | 52 | OpType OpType 53 | 54 | Group string 55 | 56 | Nodes []string 57 | // Users to apply that 58 | // the first is the job commiter 59 | Users []string 60 | 61 | // cluster must never be absent unless create. 62 | Cluster string 63 | 64 | // Params is the given parameters by the frontend interface. 65 | Params map[string]string 66 | 67 | // ParamsValid is the function which check need to check the jobs. 68 | ParamsValid func(*Job, map[string]string) (bool, map[string]string) `json:"-"` 69 | 70 | // Args is the auto gennerated arguments for the whole job 71 | // maybe: 72 | // role map 73 | // nodes 74 | // template data 75 | // and so on. 76 | Args map[string]interface{} 77 | 78 | // ArgsValid is the function which check need to check the jobs. 79 | ArgsValid func(*Job, map[string]interface{}) (bool, map[string]string) `json:"-"` 80 | } 81 | -------------------------------------------------------------------------------- /platform/job/state.go: -------------------------------------------------------------------------------- 1 | package job 2 | 3 | // StateType is the state enum for job 4 | type StateType = string 5 | 6 | // define status enum 7 | var ( 8 | StatePending StateType = "pending" 9 | StateRunning StateType = "running" 10 | StateRecover StateType = "recover" 11 | 12 | StateWaitApprove StateType = "wait_approve" 13 | StateApproved StateType = "approved" 14 | 15 | // done status 16 | StateDone StateType = "done" 17 | 18 | // fail status 19 | StateLost StateType = "lost" 20 | StateFail StateType = "fail" 21 | ) 22 | 23 | // define cluster deploying state enum 24 | var ( 25 | StateChunking StateType = "deploy_chunking" 26 | // StateNeedBalance is the state which all executer has ben succeed lunched the cache instance 27 | StateNeedBalance StateType = "deploy_needbalance" 28 | ) 29 | -------------------------------------------------------------------------------- /platform/mesos/mesos_test.go: -------------------------------------------------------------------------------- 1 | package mesos 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestMakeResource(t *testing.T) { 10 | rs := makeResources(0.1, 100, 31000) 11 | assert.Equal(t, "cpus:0.1;mem:100;ports:[31000]", rs.String()) 12 | } 13 | -------------------------------------------------------------------------------- /platform/mesos/tools.go: -------------------------------------------------------------------------------- 1 | package mesos 2 | 3 | import ( 4 | "path/filepath" 5 | "strconv" 6 | "strings" 7 | "time" 8 | 9 | ms "github.com/mesos/mesos-go/api/v1/lib" 10 | ) 11 | 12 | func splitJobID(key string) string { 13 | _, file := filepath.Split(key) 14 | return file 15 | } 16 | 17 | // Duration parse toml time duration 18 | type Duration time.Duration 19 | 20 | func (d *Duration) UnmarshalText(text []byte) error { 21 | tmp, err := time.ParseDuration(string(text)) 22 | if err == nil { 23 | *d = Duration(tmp) 24 | } 25 | return err 26 | } 27 | 28 | // taskid should be ip:port-cluster-id 29 | // if id not equal zero mean task had fail before and been recover. 30 | func parseTaskID(t ms.TaskID) (cluster, ip, port string, id int64, err error) { 31 | v := t.GetValue() 32 | tids := strings.Split(v, ",") 33 | ss := strings.Split(tids[0], "-") 34 | if len(ss) != 3 { 35 | err = errTaskID 36 | return 37 | } 38 | host := ss[0] 39 | cluster = ss[1] 40 | ids := ss[2] 41 | id, _ = strconv.ParseInt(ids, 10, 64) 42 | idx := strings.IndexByte(host, ':') 43 | ip = host[:idx] 44 | port = host[idx+1:] 45 | return 46 | } 47 | -------------------------------------------------------------------------------- /proxy/listen.go: -------------------------------------------------------------------------------- 1 | package proxy 2 | 3 | import ( 4 | "net" 5 | "os" 6 | 7 | "github.com/pkg/errors" 8 | ) 9 | 10 | // Listen listen. 11 | func Listen(proto string, addr string) (net.Listener, error) { 12 | switch proto { 13 | case "tcp": 14 | return listenTCP(addr) 15 | case "unix": 16 | return listenUnix(addr) 17 | } 18 | return nil, errors.New("no support proto") 19 | } 20 | 21 | func listenTCP(addr string) (net.Listener, error) { 22 | tcpAddr, err := net.ResolveTCPAddr("tcp", addr) 23 | if err != nil { 24 | return nil, errors.Wrap(err, "Proxy Listen tcp ResolveTCPAddr") 25 | } 26 | return net.ListenTCP("tcp", tcpAddr) 27 | } 28 | 29 | func listenUnix(addr string) (net.Listener, error) { 30 | err := os.Remove(addr) 31 | if err != nil && !os.IsNotExist(err) { 32 | return nil, errors.Wrap(err, "Proxy Listen unix sock but path exist and can't remove") 33 | } 34 | unixAddr, err := net.ResolveUnixAddr("unix", addr) 35 | if err != nil { 36 | return nil, errors.Wrap(err, "Proxy Listen unix ResolveUnixAddr") 37 | } 38 | return net.ListenUnix("unix", unixAddr) 39 | } 40 | -------------------------------------------------------------------------------- /proxy/proto/memcache/binary/pinger.go: -------------------------------------------------------------------------------- 1 | package binary 2 | 3 | import ( 4 | "bytes" 5 | "sync/atomic" 6 | 7 | "overlord/pkg/bufio" 8 | libnet "overlord/pkg/net" 9 | "overlord/proxy/proto" 10 | 11 | "github.com/pkg/errors" 12 | ) 13 | 14 | const ( 15 | pingBufferSize = 24 16 | ) 17 | 18 | var ( 19 | pingBs = []byte{ 20 | 0x80, // magic 21 | 0x0a, // cmd: noop 22 | 0x00, 0x00, // key len 23 | 0x00, // extra len 24 | 0x00, // data type 25 | 0x00, 0x00, // vbucket 26 | 0x00, 0x00, 0x00, 0x00, // body len 27 | 0x00, 0x00, 0x00, 0x00, // opaque 28 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // cas 29 | } 30 | pongBs = []byte{ 31 | 0x81, // magic 32 | 0x0a, // cmd: noop 33 | 0x00, 0x00, // key len 34 | 0x00, // extra len 35 | 0x00, // data type 36 | 0x00, 0x00, // status 37 | 0x00, 0x00, 0x00, 0x00, // body len 38 | 0x00, 0x00, 0x00, 0x00, // opaque 39 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // cas 40 | } 41 | ) 42 | 43 | type mcPinger struct { 44 | conn *libnet.Conn 45 | bw *bufio.Writer 46 | br *bufio.Reader 47 | 48 | state int32 49 | } 50 | 51 | // NewPinger new pinger. 52 | func NewPinger(nc *libnet.Conn) proto.Pinger { 53 | return &mcPinger{ 54 | conn: nc, 55 | bw: bufio.NewWriter(nc), 56 | br: bufio.NewReader(nc, bufio.NewBuffer(pingBufferSize)), 57 | } 58 | } 59 | 60 | func (m *mcPinger) Ping() (err error) { 61 | if atomic.LoadInt32(&m.state) == closed { 62 | err = errors.WithStack(ErrPingerPong) 63 | return 64 | } 65 | _ = m.bw.Write(pingBs) 66 | if err = m.bw.Flush(); err != nil { 67 | err = errors.WithStack(err) 68 | return 69 | } 70 | _ = m.br.Read() 71 | defer m.br.AdvanceTo(0) 72 | head, err := m.br.ReadExact(requestHeaderLen) 73 | if err != nil { 74 | err = errors.WithStack(err) 75 | return 76 | } 77 | if !bytes.Equal(head, pongBs) { 78 | err = errors.WithStack(ErrPingerPong) 79 | } 80 | return 81 | } 82 | 83 | func (m *mcPinger) Close() error { 84 | if atomic.CompareAndSwapInt32(&m.state, opened, closed) { 85 | return m.conn.Close() 86 | } 87 | return nil 88 | } 89 | -------------------------------------------------------------------------------- /proxy/proto/memcache/binary/pinger_test.go: -------------------------------------------------------------------------------- 1 | package binary 2 | 3 | import ( 4 | "overlord/pkg/mockconn" 5 | "testing" 6 | "time" 7 | 8 | "overlord/pkg/bufio" 9 | libcon "overlord/pkg/net" 10 | 11 | "github.com/pkg/errors" 12 | "github.com/stretchr/testify/assert" 13 | ) 14 | 15 | func TestPingerPingOk(t *testing.T) { 16 | conn := libcon.NewConn(mockconn.CreateConn(pongBs, 1), time.Second, time.Second) 17 | pinger := NewPinger(conn) 18 | 19 | err := pinger.Ping() 20 | assert.NoError(t, err) 21 | } 22 | 23 | func TestPingerPingEOF(t *testing.T) { 24 | conn := libcon.NewConn(mockconn.CreateConn(pongBs, 2), time.Second, time.Second) 25 | pinger := NewPinger(conn) 26 | 27 | err := pinger.Ping() 28 | assert.NoError(t, err) 29 | 30 | err = pinger.Ping() 31 | assert.NoError(t, err) 32 | } 33 | 34 | func TestPingerPing100Ok(t *testing.T) { 35 | conn := libcon.NewConn(mockconn.CreateConn(pongBs, 100), time.Second, time.Second) 36 | pinger := NewPinger(conn) 37 | 38 | for i := 0; i < 100; i++ { 39 | err := pinger.Ping() 40 | assert.NoError(t, err, "error iter: %d", i) 41 | } 42 | 43 | err := pinger.Ping() 44 | assert.EqualError(t, err, "EOF") 45 | } 46 | 47 | func TestPingerFlushErr(t *testing.T) { 48 | conn := libcon.NewConn(mockconn.CreateConn(pongBs, 100), time.Second, time.Second) 49 | c := conn.Conn.(*mockconn.MockConn) 50 | c.Err = errors.New("some error") 51 | pinger := NewPinger(conn) 52 | err := pinger.Ping() 53 | assert.EqualError(t, err, "some error") 54 | } 55 | 56 | func TestPingerClosed(t *testing.T) { 57 | conn := libcon.NewConn(mockconn.CreateConn(pongBs, 100), time.Second, time.Second) 58 | pinger := NewPinger(conn) 59 | err := pinger.Close() 60 | assert.NoError(t, err) 61 | 62 | err = pinger.Ping() 63 | assert.Error(t, err) 64 | assert.NoError(t, pinger.Close()) 65 | } 66 | 67 | func TestPingerNotReturnPong(t *testing.T) { 68 | conn := libcon.NewConn(mockconn.CreateConn([]byte("iam test bytes 24 length"), 1), time.Second, time.Second) 69 | pinger := NewPinger(conn) 70 | err := pinger.Ping() 71 | assert.Error(t, err) 72 | _causeEqual(t, ErrPingerPong, err) 73 | 74 | conn = libcon.NewConn(mockconn.CreateConn([]byte("less than 24 length"), 1), time.Second, time.Second) 75 | pinger = NewPinger(conn) 76 | err = pinger.Ping() 77 | assert.Error(t, err) 78 | _causeEqual(t, bufio.ErrBufferFull, err) 79 | } 80 | -------------------------------------------------------------------------------- /proxy/proto/memcache/binary/request_test.go: -------------------------------------------------------------------------------- 1 | package binary 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | var _allReqTypes = []RequestType{ 10 | RequestTypeGet, 11 | RequestTypeSet, 12 | RequestTypeAdd, 13 | RequestTypeReplace, 14 | RequestTypeDelete, 15 | RequestTypeIncr, 16 | RequestTypeDecr, 17 | RequestTypeGetQ, 18 | RequestTypeNoop, 19 | RequestTypeGetK, 20 | RequestTypeGetKQ, 21 | RequestTypeAppend, 22 | RequestTypePrepend, 23 | RequestTypeTouch, 24 | RequestTypeGat, 25 | RequestTypeUnknown, 26 | } 27 | 28 | func TestRequestTypeBytes(t *testing.T) { 29 | for _, rtype := range _allReqTypes { 30 | assert.Equal(t, []byte{byte(rtype)}, rtype.Bytes()) 31 | } 32 | assert.Equal(t, getString, RequestTypeGet.String()) 33 | assert.Equal(t, setString, RequestTypeSet.String()) 34 | assert.Equal(t, addString, RequestTypeAdd.String()) 35 | assert.Equal(t, replaceString, RequestTypeReplace.String()) 36 | assert.Equal(t, deleteString, RequestTypeDelete.String()) 37 | assert.Equal(t, incrString, RequestTypeIncr.String()) 38 | assert.Equal(t, decrString, RequestTypeDecr.String()) 39 | assert.Equal(t, getQString, RequestTypeGetQ.String()) 40 | assert.Equal(t, noopString, RequestTypeNoop.String()) 41 | assert.Equal(t, getKString, RequestTypeGetK.String()) 42 | assert.Equal(t, getKQString, RequestTypeGetKQ.String()) 43 | assert.Equal(t, appendString, RequestTypeAppend.String()) 44 | assert.Equal(t, prependString, RequestTypePrepend.String()) 45 | assert.Equal(t, touchString, RequestTypeTouch.String()) 46 | assert.Equal(t, gatString, RequestTypeGat.String()) 47 | assert.Equal(t, unknownString, RequestTypeUnknown.String()) 48 | } 49 | 50 | func TestMCRequestFuncsOk(t *testing.T) { 51 | req := newReq() 52 | req.respType = RequestTypeGet 53 | req.key = []byte("abc") 54 | req.data = []byte("\r\n") 55 | assert.Equal(t, []byte{byte(RequestTypeGet)}, req.Cmd()) 56 | assert.Equal(t, "abc", string(req.Key())) 57 | assert.Equal(t, "type:get key:abc data:\r\n", req.String()) 58 | 59 | req.Put() 60 | 61 | assert.Equal(t, RequestTypeUnknown, req.respType) 62 | assert.Len(t, req.keyLen, 2) 63 | assert.Len(t, req.extraLen, 1) 64 | assert.Len(t, req.status, 2) 65 | assert.Len(t, req.bodyLen, 4) 66 | assert.Len(t, req.opaque, 4) 67 | assert.Len(t, req.cas, 8) 68 | assert.Len(t, req.key, 0) 69 | assert.Len(t, req.data, 0) 70 | } 71 | -------------------------------------------------------------------------------- /proxy/proto/memcache/pinger.go: -------------------------------------------------------------------------------- 1 | package memcache 2 | 3 | import ( 4 | "bytes" 5 | "sync/atomic" 6 | 7 | "overlord/pkg/bufio" 8 | libnet "overlord/pkg/net" 9 | "overlord/proxy/proto" 10 | 11 | "github.com/pkg/errors" 12 | ) 13 | 14 | const ( 15 | pingBufferSize = 128 16 | ) 17 | 18 | var ( 19 | pingBytes = []byte("set _ping 0 0 4\r\npong\r\n") 20 | pongBytes = []byte("STORED\r\n") 21 | ) 22 | 23 | type mcPinger struct { 24 | conn *libnet.Conn 25 | bw *bufio.Writer 26 | br *bufio.Reader 27 | 28 | state int32 29 | } 30 | 31 | // NewPinger new pinger. 32 | func NewPinger(nc *libnet.Conn) proto.Pinger { 33 | return &mcPinger{ 34 | conn: nc, 35 | br: bufio.NewReader(nc, bufio.NewBuffer(pingBufferSize)), 36 | bw: bufio.NewWriter(nc), 37 | } 38 | } 39 | 40 | func (m *mcPinger) Ping() (err error) { 41 | if atomic.LoadInt32(&m.state) == closed { 42 | err = errors.WithStack(ErrPingerPong) 43 | return 44 | } 45 | m.bw.Write(pingBytes) 46 | if err = m.bw.Flush(); err != nil { 47 | err = errors.WithStack(err) 48 | return 49 | } 50 | _ = m.br.Read() 51 | defer m.br.Buffer().Reset() 52 | var b []byte 53 | if b, err = m.br.ReadLine(); err != nil { 54 | err = errors.WithStack(err) 55 | return 56 | } 57 | if !bytes.Equal(b, pongBytes) { 58 | err = errors.WithStack(ErrPingerPong) 59 | } 60 | return 61 | } 62 | 63 | func (m *mcPinger) Close() error { 64 | if atomic.CompareAndSwapInt32(&m.state, opened, closed) { 65 | return m.conn.Close() 66 | } 67 | return nil 68 | } 69 | -------------------------------------------------------------------------------- /proxy/proto/memcache/pinger_test.go: -------------------------------------------------------------------------------- 1 | package memcache 2 | 3 | import ( 4 | "overlord/pkg/mockconn" 5 | libnet "overlord/pkg/net" 6 | "testing" 7 | "time" 8 | 9 | "github.com/pkg/errors" 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestPingerPingOk(t *testing.T) { 14 | conn := libnet.NewConn(mockconn.CreateConn(pongBytes, 1), time.Second, time.Second) 15 | pinger := NewPinger(conn) 16 | 17 | err := pinger.Ping() 18 | assert.NoError(t, err) 19 | } 20 | 21 | func TestPingerPingMore(t *testing.T) { 22 | conn := libnet.NewConn(mockconn.CreateConn(pongBytes, 2), time.Second, time.Second) 23 | pinger := NewPinger(conn) 24 | 25 | err := pinger.Ping() 26 | assert.NoError(t, err) 27 | 28 | err = pinger.Ping() 29 | assert.NoError(t, err) 30 | } 31 | 32 | func TestPingerPing100Ok(t *testing.T) { 33 | conn := libnet.NewConn(mockconn.CreateConn(pongBytes, 100), time.Second, time.Second) 34 | pinger := NewPinger(conn) 35 | 36 | for i := 0; i < 100; i++ { 37 | err := pinger.Ping() 38 | assert.NoError(t, err, "error iter: %d", i) 39 | } 40 | 41 | err := pinger.Ping() 42 | assert.EqualError(t, err, "EOF") 43 | } 44 | 45 | func TestPingerErr(t *testing.T) { 46 | conn := libnet.NewConn(mockconn.CreateConn(pongBytes, 100), time.Second, time.Second) 47 | c := conn.Conn.(*mockconn.MockConn) 48 | c.Err = errors.New("some error") 49 | pinger := NewPinger(conn) 50 | err := pinger.Ping() 51 | assert.EqualError(t, err, "some error") 52 | } 53 | 54 | func TestPingerClosed(t *testing.T) { 55 | conn := libnet.NewConn(mockconn.CreateConn(pongBytes, 100), time.Second, time.Second) 56 | pinger := NewPinger(conn) 57 | err := pinger.Close() 58 | assert.NoError(t, err) 59 | 60 | err = pinger.Ping() 61 | assert.Error(t, err) 62 | assert.NoError(t, pinger.Close()) 63 | } 64 | 65 | func TestPingerNotReturnPong(t *testing.T) { 66 | conn := libnet.NewConn(mockconn.CreateConn([]byte("baka\r\n"), 100), time.Second, time.Second) 67 | pinger := NewPinger(conn) 68 | err := pinger.Ping() 69 | assert.Error(t, err) 70 | _causeEqual(t, ErrPingerPong, err) 71 | } 72 | -------------------------------------------------------------------------------- /proxy/proto/memcache/request_test.go: -------------------------------------------------------------------------------- 1 | package memcache 2 | 3 | import ( 4 | "regexp" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | var _allReqTypes = []RequestType{ 11 | RequestTypeUnknown, 12 | RequestTypeSet, 13 | RequestTypeAdd, 14 | RequestTypeReplace, 15 | RequestTypeAppend, 16 | RequestTypePrepend, 17 | RequestTypeCas, 18 | RequestTypeGet, 19 | RequestTypeGets, 20 | RequestTypeDelete, 21 | RequestTypeIncr, 22 | RequestTypeDecr, 23 | RequestTypeTouch, 24 | RequestTypeGat, 25 | RequestTypeGats, 26 | } 27 | 28 | func TestRequestTypeString(t *testing.T) { 29 | reg := regexp.MustCompile(`[a-z]+`) 30 | for _, rtype := range _allReqTypes { 31 | assert.True(t, reg.Match(rtype.Bytes())) 32 | assert.True(t, reg.MatchString(rtype.String())) 33 | } 34 | } 35 | 36 | func TestMCRequestFuncsOk(t *testing.T) { 37 | req := &MCRequest{ 38 | respType: RequestTypeGet, 39 | key: []byte("abc"), 40 | data: []byte("\r\n"), 41 | } 42 | assert.Equal(t, []byte("get"), req.Cmd()) 43 | assert.Equal(t, "abc", string(req.Key())) 44 | assert.Equal(t, "type:get key:abc data:\r\n", req.String()) 45 | 46 | req.Put() 47 | 48 | assert.Equal(t, RequestTypeUnknown, req.respType) 49 | assert.Equal(t, []byte{}, req.key) 50 | assert.Equal(t, []byte{}, req.data) 51 | } 52 | -------------------------------------------------------------------------------- /proxy/proto/message_test.go: -------------------------------------------------------------------------------- 1 | package proto 2 | 3 | import ( 4 | "errors" 5 | "sync" 6 | "testing" 7 | "time" 8 | 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func TestMessage(t *testing.T) { 13 | msgs := GetMsgs(1) 14 | assert.Len(t, msgs, 1) 15 | PutMsgs(msgs) 16 | msgs = GetMsgs(1, 1) 17 | assert.Len(t, msgs, 1) 18 | PutMsgs(msgs) 19 | 20 | msg := NewMessage() 21 | msg.Reset() 22 | msg.clear() 23 | msg.WithRequest(&mockRequest{}) 24 | msg.ResetSubs() 25 | 26 | msg.WithRequest(&mockRequest{}) 27 | req := msg.Request() 28 | assert.NotNil(t, req) 29 | reqs := msg.Requests() 30 | assert.Len(t, reqs, 2) 31 | isb := msg.IsBatch() 32 | assert.True(t, isb) 33 | msgs = msg.Batch() 34 | assert.Len(t, msgs, 2) 35 | 36 | msg.ResetSubs() 37 | req = msg.NextReq() 38 | assert.NotNil(t, req) 39 | 40 | wg := &sync.WaitGroup{} 41 | msg.WithWaitGroup(wg) 42 | msg.Add() 43 | msg.Done() 44 | 45 | msg.MarkStart() 46 | time.Sleep(time.Millisecond * 50) 47 | msg.MarkRead() 48 | time.Sleep(time.Millisecond * 50) 49 | msg.MarkWrite() 50 | time.Sleep(time.Millisecond * 50) 51 | msg.MarkEnd() 52 | ts := msg.TotalDur() 53 | assert.NotZero(t, ts) 54 | ts = msg.RemoteDur() 55 | assert.NotZero(t, ts) 56 | 57 | msg.WithError(errors.New("some error")) 58 | err := msg.Err() 59 | assert.EqualError(t, err, "some error") 60 | 61 | emsg := ErrMessage(errors.New("some error")) 62 | err = emsg.Err() 63 | assert.EqualError(t, err, "some error") 64 | } 65 | -------------------------------------------------------------------------------- /proxy/proto/pipe_test.go: -------------------------------------------------------------------------------- 1 | package proto 2 | 3 | import ( 4 | "crypto/rand" 5 | "errors" 6 | "sync" 7 | "testing" 8 | "time" 9 | 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | type mockNodeConn struct { 14 | closed bool 15 | count, num int 16 | err error 17 | } 18 | 19 | func (n *mockNodeConn) Addr() string { 20 | return "mock" 21 | } 22 | 23 | func (n *mockNodeConn) Cluster() string { 24 | return "mock" 25 | } 26 | 27 | func (n *mockNodeConn) Write(*Message) error { return nil } 28 | func (n *mockNodeConn) Read(*Message) error { 29 | if n.count == n.num { 30 | return n.err 31 | } 32 | n.count++ 33 | return nil 34 | } 35 | func (n *mockNodeConn) Flush() error { return nil } 36 | func (n *mockNodeConn) Close() error { 37 | n.closed = true 38 | return nil 39 | } 40 | 41 | type mockRequest struct{} 42 | 43 | func (r *mockRequest) Merge([]Request) error { 44 | return nil 45 | } 46 | 47 | func (*mockRequest) CmdString() string { return "" } 48 | func (*mockRequest) Cmd() []byte { return nil } 49 | func (*mockRequest) Key() []byte { 50 | bs := make([]byte, 8) 51 | rand.Read(bs) 52 | return bs 53 | } 54 | func (*mockRequest) Put() {} 55 | func (*mockRequest) Slowlog() *SlowlogEntry { return nil } 56 | 57 | func TestPipe(t *testing.T) { 58 | nc1 := &mockNodeConn{} 59 | ncp1 := NewNodeConnPipe(1, 32, func() NodeConn { 60 | return nc1 61 | }) 62 | nc2 := &mockNodeConn{} 63 | ncp2 := NewNodeConnPipe(2, 32, func() NodeConn { 64 | return nc2 65 | }) 66 | wg := &sync.WaitGroup{} 67 | for i := 0; i < 10; i++ { 68 | m := getMsg() 69 | m.WithRequest(&mockRequest{}) 70 | m.WithWaitGroup(wg) 71 | ncp1.Push(m) 72 | ncp2.Push(m) 73 | } 74 | wg.Wait() 75 | ncp1.Close() 76 | ncp2.Close() 77 | time.Sleep(10 * time.Millisecond) 78 | assert.True(t, nc1.closed) 79 | assert.True(t, nc2.closed) 80 | 81 | const whenErrNum = 3 82 | nc3 := &mockNodeConn{} 83 | nc3.num = whenErrNum 84 | nc3.err = errors.New("some error") 85 | ncp3 := NewNodeConnPipe(1, 32, func() NodeConn { 86 | return nc3 87 | }) 88 | wg = &sync.WaitGroup{} 89 | var msgs []*Message 90 | for i := 0; i < 10; i++ { 91 | m := getMsg() 92 | m.WithRequest(&mockRequest{}) 93 | m.WithWaitGroup(wg) 94 | ncp3.Push(m) 95 | msgs = append(msgs, m) 96 | } 97 | wg.Wait() 98 | ncp3.Close() 99 | time.Sleep(100 * time.Millisecond) 100 | for _, msg := range msgs { 101 | assert.EqualError(t, msg.Err(), "some error") 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /proxy/proto/redis/cluster/fetch.go: -------------------------------------------------------------------------------- 1 | package cluster 2 | 3 | import ( 4 | "bytes" 5 | errs "errors" 6 | 7 | "overlord/pkg/bufio" 8 | libnet "overlord/pkg/net" 9 | "overlord/proxy/proto/redis" 10 | 11 | "github.com/pkg/errors" 12 | ) 13 | 14 | const ( 15 | respFetch = '$' 16 | ) 17 | 18 | var ( 19 | crlfBytes = []byte("\r\n") 20 | ) 21 | 22 | // fetcher will execute `CLUSTER NODES` by the given address。 23 | type fetcher struct { 24 | conn *libnet.Conn 25 | bw *bufio.Writer 26 | br *bufio.Reader 27 | } 28 | 29 | var ( 30 | cmdClusterNodesBytes = []byte("*2\r\n$7\r\nCLUSTER\r\n$5\r\nNODES\r\n") 31 | 32 | // ErrBadReplyType error bad reply type 33 | ErrBadReplyType = errs.New("fetcher CLUSTER NODES bad reply type") 34 | ) 35 | 36 | // newFetcher will create new fetcher 37 | func newFetcher(conn *libnet.Conn) *fetcher { 38 | f := &fetcher{ 39 | conn: conn, 40 | br: bufio.NewReader(conn, bufio.Get(4096)), 41 | bw: bufio.NewWriter(conn), 42 | } 43 | return f 44 | } 45 | 46 | // Fetch new CLUSTER NODES result 47 | func (f *fetcher) fetch() (ns *nodeSlots, err error) { 48 | if err = f.bw.Write(cmdClusterNodesBytes); err != nil { 49 | err = errors.WithStack(err) 50 | return 51 | } 52 | if err = f.bw.Flush(); err != nil { 53 | err = errors.WithStack(err) 54 | return 55 | } 56 | var data []byte 57 | begin := f.br.Mark() 58 | for { 59 | err = f.br.Read() 60 | if err != nil { 61 | err = errors.WithStack(err) 62 | return 63 | } 64 | reply := &redis.RESP{} 65 | if err = reply.Decode(f.br); err == bufio.ErrBufferFull { 66 | f.br.AdvanceTo(begin) 67 | continue 68 | } else if err != nil { 69 | err = errors.WithStack(err) 70 | return 71 | } 72 | if reply.Type() != respFetch { 73 | err = errors.WithStack(ErrBadReplyType) 74 | return 75 | } 76 | data = reply.Data() 77 | idx := bytes.Index(data, crlfBytes) 78 | data = data[idx+2:] 79 | break 80 | } 81 | return parseSlots(data) 82 | } 83 | 84 | // Close enable to close the conneciton of backend. 85 | func (f *fetcher) Close() error { 86 | return f.conn.Close() 87 | } 88 | -------------------------------------------------------------------------------- /proxy/proto/redis/cluster/proxy_conn.go: -------------------------------------------------------------------------------- 1 | package cluster 2 | 3 | import ( 4 | "bytes" 5 | errs "errors" 6 | "overlord/pkg/conv" 7 | libnet "overlord/pkg/net" 8 | "overlord/proxy/proto" 9 | "overlord/proxy/proto/redis" 10 | 11 | "github.com/pkg/errors" 12 | ) 13 | 14 | var ( 15 | cmdClusterBytes = []byte("7\r\nCLUSTER") 16 | cmdNodesBytes = []byte("5\r\nNODES") 17 | cmdSlotsBytes = []byte("5\r\nSLOTS") 18 | notSupportBytes = []byte("-Error: command not support\r\n") 19 | ) 20 | 21 | // errors 22 | var ( 23 | ErrInvalidArgument = errs.New("cluster command with wrong argument") 24 | ) 25 | 26 | type proxyConn struct { 27 | c *cluster 28 | pc proto.ProxyConn 29 | } 30 | 31 | // NewProxyConn creates new redis cluster Encoder and Decoder. 32 | func NewProxyConn(conn *libnet.Conn, fer proto.Forwarder) proto.ProxyConn { 33 | var c *cluster 34 | if fer != nil { 35 | c = fer.(*cluster) 36 | } 37 | r := &proxyConn{ 38 | c: c, 39 | pc: redis.NewProxyConn(conn, false), 40 | } 41 | return r 42 | } 43 | 44 | func (pc *proxyConn) Decode(msgs []*proto.Message) ([]*proto.Message, error) { 45 | return pc.pc.Decode(msgs) 46 | } 47 | 48 | func (pc *proxyConn) Encode(m *proto.Message) (err error) { 49 | if !m.IsBatch() { 50 | req := m.Request().(*redis.Request) 51 | if !req.IsSupport() && !req.IsCtl() { 52 | resp := req.RESP() 53 | arr := resp.Array() 54 | if bytes.Equal(arr[0].Data(), cmdClusterBytes) { 55 | if len(arr) == 2 { 56 | // CLUSTER COMMANDS 57 | conv.UpdateToUpper(arr[1].Data()) // NOTE: when arr[0] is CLUSTER, upper arr[1] 58 | pcc := pc.pc.(*redis.ProxyConn) 59 | if bytes.Equal(arr[1].Data(), cmdNodesBytes) { 60 | // CLUSTER NODES 61 | err = pcc.Bw().Write(pc.c.fakeNodesBytes) 62 | return 63 | } else if bytes.Equal(arr[1].Data(), cmdSlotsBytes) { 64 | // CLUSTER SLOTS 65 | err = pcc.Bw().Write(pc.c.fakeSlotsBytes) 66 | return 67 | } 68 | err = pcc.Bw().Write(notSupportBytes) 69 | return 70 | } 71 | err = errors.WithStack(ErrInvalidArgument) 72 | return 73 | } 74 | } 75 | } 76 | return pc.pc.Encode(m) 77 | } 78 | 79 | func (pc *proxyConn) Flush() (err error) { 80 | return pc.pc.Flush() 81 | } 82 | -------------------------------------------------------------------------------- /proxy/proto/redis/cluster/slot_test.go: -------------------------------------------------------------------------------- 1 | package cluster 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | const ( 10 | _slotDemo = "6b22f87b78cdb181f7b9b1e0298da177606394f7 172.17.0.2:7003@17003 slave 8f02f3135c65482ac00f217df0edb6b9702691f8 0 1532770704000 4 connected\n" + 11 | "dff2f7b0fbda82c72d426eeb9616d9d6455bb4ff 172.17.0.2:7004@17004 slave 828c400ea2b55c43e5af67af94bec4943b7b3d93 0 1532770704538 5 connected\n" + 12 | "b1798ba2171a4bd765846ddb5d5bdc9f3ca6fdf3 172.17.0.2:7000@17000 master - 0 1532770705458 1 connected 0-5460\n" + 13 | "db2dd7d6fbd2a03f16f6ab61d0576edc9c3b04e2 172.17.0.2:7005@17005 slave b1798ba2171a4bd765846ddb5d5bdc9f3ca6fdf3 0 1532770704437 6 connected\n" + 14 | "828c400ea2b55c43e5af67af94bec4943b7b3d93 172.17.0.2:7002@17002 master - 0 1532770704000 3 connected 10923-16383\n" + 15 | "8f02f3135c65482ac00f217df0edb6b9702691f8 172.17.0.2:7001@17001 myself,master - 0 1532770703000 2 connected 5461-10922\n" 16 | ) 17 | 18 | var ( 19 | _masterMap = map[string]struct{}{ 20 | "172.17.0.2:7000": struct{}{}, 21 | "172.17.0.2:7001": struct{}{}, 22 | "172.17.0.2:7002": struct{}{}, 23 | } 24 | ) 25 | 26 | func TestParseSlot(t *testing.T) { 27 | ns, err := parseSlots([]byte(_slotDemo)) 28 | assert.NoError(t, err) 29 | assert.NotNil(t, ns) 30 | 31 | masters := ns.getMasters() 32 | for _, master := range masters { 33 | _, ok := _masterMap[master] 34 | assert.True(t, ok) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /proxy/proto/redis/pinger.go: -------------------------------------------------------------------------------- 1 | package redis 2 | 3 | import ( 4 | "bytes" 5 | errs "errors" 6 | "sync/atomic" 7 | 8 | "overlord/pkg/bufio" 9 | libnet "overlord/pkg/net" 10 | "overlord/proxy/proto" 11 | 12 | "github.com/pkg/errors" 13 | ) 14 | 15 | const ( 16 | pingBufferSize = 128 17 | ) 18 | 19 | // errors 20 | var ( 21 | ErrPingClosed = errs.New("ping interface has been closed") 22 | ErrBadPong = errs.New("pong response payload is bad") 23 | ) 24 | 25 | var ( 26 | pingBytes = []byte("*1\r\n$4\r\nPING\r\n") 27 | pongBytes = []byte("+PONG\r\n") 28 | ) 29 | 30 | type pinger struct { 31 | conn *libnet.Conn 32 | 33 | br *bufio.Reader 34 | bw *bufio.Writer 35 | 36 | state int32 37 | } 38 | 39 | // NewPinger new pinger. 40 | func NewPinger(conn *libnet.Conn) proto.Pinger { 41 | return &pinger{ 42 | conn: conn, 43 | br: bufio.NewReader(conn, bufio.NewBuffer(pingBufferSize)), 44 | bw: bufio.NewWriter(conn), 45 | state: opened, 46 | } 47 | } 48 | 49 | func (p *pinger) Ping() (err error) { 50 | if atomic.LoadInt32(&p.state) == closed { 51 | err = errors.WithStack(ErrPingClosed) 52 | return 53 | } 54 | _ = p.bw.Write(pingBytes) 55 | if err = p.bw.Flush(); err != nil { 56 | err = errors.WithStack(err) 57 | return 58 | } 59 | _ = p.br.Read() 60 | defer p.br.Buffer().Reset() 61 | data, err := p.br.ReadLine() 62 | if err != nil { 63 | err = errors.WithStack(err) 64 | return 65 | } 66 | if !bytes.Equal(data, pongBytes) { 67 | err = errors.WithStack(ErrBadPong) 68 | } 69 | return 70 | } 71 | 72 | func (p *pinger) Close() error { 73 | if atomic.CompareAndSwapInt32(&p.state, opened, closed) { 74 | return p.conn.Close() 75 | } 76 | return nil 77 | } 78 | -------------------------------------------------------------------------------- /proxy/proto/redis/pinger_test.go: -------------------------------------------------------------------------------- 1 | package redis 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "overlord/pkg/mockconn" 8 | libnet "overlord/pkg/net" 9 | 10 | "github.com/pkg/errors" 11 | "github.com/stretchr/testify/assert" 12 | ) 13 | 14 | func TestPingerPingOk(t *testing.T) { 15 | conn := libnet.NewConn(mockconn.CreateConn(pongBytes, 1), time.Second, time.Second) 16 | p := NewPinger(conn) 17 | err := p.Ping() 18 | assert.NoError(t, err) 19 | } 20 | 21 | func TestPingerClosed(t *testing.T) { 22 | conn := libnet.NewConn(mockconn.CreateConn(pongBytes, 10), time.Second, time.Second) 23 | p := NewPinger(conn) 24 | assert.NoError(t, p.Close()) 25 | err := p.Ping() 26 | assert.Equal(t, ErrPingClosed, errors.Cause(err)) 27 | assert.NoError(t, p.Close()) 28 | } 29 | 30 | func TestPingerWrongResp(t *testing.T) { 31 | conn := libnet.NewConn(mockconn.CreateConn([]byte("-Error: iam more than 7 bytes\r\n"), 1), time.Second, time.Second) 32 | p := NewPinger(conn) 33 | err := p.Ping() 34 | assert.Equal(t, ErrBadPong, errors.Cause(err)) 35 | conn = libnet.NewConn(mockconn.CreateConn([]byte("-Err\r\n"), 1), time.Second, time.Second) 36 | p = NewPinger(conn) 37 | err = p.Ping() 38 | assert.Equal(t, ErrBadPong, errors.Cause(err)) 39 | } 40 | 41 | func TestPingerPingErr(t *testing.T) { 42 | conn := libnet.NewConn(mockconn.CreateConn(pingBytes, 1), time.Second, time.Second) 43 | c := conn.Conn.(*mockconn.MockConn) 44 | c.Err = errors.New("some error") 45 | p := NewPinger(conn) 46 | err := p.Ping() 47 | assert.EqualError(t, err, "some error") 48 | } 49 | -------------------------------------------------------------------------------- /proxy/proto/redis/request_test.go: -------------------------------------------------------------------------------- 1 | package redis 2 | 3 | import ( 4 | "overlord/proxy/proto" 5 | "testing" 6 | "time" 7 | 8 | "overlord/pkg/bufio" 9 | "overlord/pkg/mockconn" 10 | libnet "overlord/pkg/net" 11 | 12 | "github.com/stretchr/testify/assert" 13 | ) 14 | 15 | func TestRequestNewRequest(t *testing.T) { 16 | var bs = []byte("*2\r\n$4\r\nLLEN\r\n$6\r\nmylist\r\n") 17 | // conn 18 | conn := libnet.NewConn(mockconn.CreateConn(bs, 1), time.Second, time.Second) 19 | br := bufio.NewReader(conn, bufio.Get(1024)) 20 | br.Read() 21 | req := getReq() 22 | err := req.resp.decode(br) 23 | assert.Nil(t, err) 24 | assert.Equal(t, mergeTypeNo, req.mType) 25 | assert.Equal(t, 2, req.resp.arraySize) 26 | assert.Equal(t, "LLEN", req.CmdString()) 27 | assert.Equal(t, []byte("LLEN"), req.Cmd()) 28 | assert.Equal(t, "mylist", string(req.Key())) 29 | assert.True(t, req.IsSupport()) 30 | assert.False(t, req.IsCtl()) 31 | } 32 | 33 | func TestMergeRequest(t *testing.T) { 34 | bs := []byte("*7\r\n$4\r\nMSET\r\n$2\r\nk1\r\n$2\r\nv1\r\n$2\r\nk2\r\n$2\r\nv2\r\n$2\r\nk3\r\n$2\r\nv3\r\n") 35 | conn := libnet.NewConn(mockconn.CreateConn(bs, 1), time.Second, time.Second) 36 | pc := NewProxyConn(conn, true) 37 | // message 38 | msg := proto.NewMessage() 39 | msg.WithRequest(getReq()) 40 | msg.WithRequest(getReq()) 41 | msg.WithRequest(getReq()) 42 | msg.Reset() 43 | // decode 44 | msgs, err := pc.Decode([]*proto.Message{msg}) 45 | assert.NoError(t, err) 46 | assert.Len(t, msgs, 1) 47 | // merge 48 | msg = msgs[0] 49 | reqs := msg.Requests() 50 | mainReq := reqs[0].(*Request) 51 | err = mainReq.Merge(reqs[1:]) 52 | assert.NoError(t, err) 53 | assert.Len(t, mainReq.resp.array, 7) 54 | assert.Equal(t, []byte("7"), mainReq.resp.data) 55 | assert.Equal(t, []byte("4\r\nMSET"), mainReq.resp.array[0].data) 56 | assert.Equal(t, []byte("2\r\nk1"), mainReq.resp.array[1].data) 57 | assert.Equal(t, []byte("2\r\nv1"), mainReq.resp.array[2].data) 58 | assert.Equal(t, []byte("2\r\nk2"), mainReq.resp.array[3].data) 59 | assert.Equal(t, []byte("2\r\nv2"), mainReq.resp.array[4].data) 60 | assert.Equal(t, []byte("2\r\nk3"), mainReq.resp.array[5].data) 61 | assert.Equal(t, []byte("2\r\nv3"), mainReq.resp.array[6].data) 62 | } 63 | 64 | func BenchmarkCmdTypeCheck(b *testing.B) { 65 | req := getReq() 66 | req.resp.array = append(req.resp.array, &resp{ 67 | data: []byte("3\r\nSET"), 68 | }) 69 | for i := 0; i < b.N; i++ { 70 | req.IsSupport() 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /proxy/proto/slowlog.go: -------------------------------------------------------------------------------- 1 | package proto 2 | 3 | import ( 4 | "time" 5 | 6 | "overlord/pkg/types" 7 | ) 8 | 9 | const copyCollapsedMaxLength = 256 - 3 10 | 11 | // SlowlogEntries is the struct with additional infomation of slowlog 12 | type SlowlogEntries struct { 13 | Cluster string `json:"cluster"` 14 | Entries []*SlowlogEntry `json:"entries"` 15 | } 16 | 17 | // NewSlowlogEntry build empty slowlog 18 | func NewSlowlogEntry(ctype types.CacheType) *SlowlogEntry { 19 | return &SlowlogEntry{ 20 | CacheType: ctype, 21 | Cmd: nil, 22 | StartTime: defaultTime, 23 | TotalDur: time.Duration(0), 24 | RemoteDur: time.Duration(0), 25 | Subs: nil, 26 | } 27 | } 28 | 29 | // SlowlogEntry is each slowlog item 30 | type SlowlogEntry struct { 31 | Cluster string `json:"cluster,omitempty"` 32 | CacheType types.CacheType 33 | Cmd []string 34 | 35 | StartTime time.Time 36 | TotalDur time.Duration 37 | RemoteDur time.Duration 38 | WaitWriteDur time.Duration 39 | PreEndDur time.Duration 40 | PipeDur time.Duration 41 | InputDur time.Duration 42 | Addr string 43 | Subs []*SlowlogEntry `json:"Subs,omitempty"` 44 | } 45 | 46 | // collapseSymbol is the fill in strings. 47 | var collapseSymbol = []byte("...") 48 | 49 | // CollapseBody will copy the src data if src is small than 256 byte 50 | // otherwise it will copy a collapsed body only contains the beginning bytes 51 | // with length 256 - len("...") = 253 52 | func CollapseBody(src []byte) (dst []byte) { 53 | if len(src) < copyCollapsedMaxLength { 54 | dst = make([]byte, len(src)) 55 | copy(dst, src) 56 | return 57 | } 58 | 59 | dst = make([]byte, 256) 60 | copy(dst, src[:copyCollapsedMaxLength]) 61 | copy(dst[copyCollapsedMaxLength:], collapseSymbol) 62 | return 63 | } 64 | -------------------------------------------------------------------------------- /proxy/proto/types.go: -------------------------------------------------------------------------------- 1 | package proto 2 | 3 | import ( 4 | "errors" 5 | ) 6 | 7 | // defined common errors 8 | var ( 9 | ErrQuit = errors.New("close client conn") 10 | ) 11 | 12 | // Slowlogger is the type which can convert self into slowlog entry 13 | type Slowlogger interface { 14 | Slowlog() *SlowlogEntry 15 | } 16 | 17 | // Request request interface. 18 | type Request interface { 19 | CmdString() string 20 | Cmd() []byte 21 | Key() []byte 22 | Put() 23 | Merge([]Request) error 24 | 25 | Slowlogger 26 | } 27 | 28 | // ProxyConn decode bytes from client and encode write to conn. 29 | type ProxyConn interface { 30 | Decode([]*Message) ([]*Message, error) 31 | Encode(msg *Message) error 32 | Flush() error 33 | } 34 | 35 | // NodeConn handle Msg to backend cache server and read response. 36 | type NodeConn interface { 37 | Write(*Message) error 38 | Read(*Message) error 39 | Flush() error 40 | Close() error 41 | Addr() string 42 | Cluster() string 43 | } 44 | 45 | // Pinger for executor ping node. 46 | type Pinger interface { 47 | Ping() error 48 | Close() error 49 | } 50 | 51 | // Forwarder is the interface for backend run and process the messages. 52 | type Forwarder interface { 53 | Forward([]*Message) error 54 | Close() error 55 | Update(servers []string) error 56 | } 57 | -------------------------------------------------------------------------------- /proxy/slowlog/http.go: -------------------------------------------------------------------------------- 1 | package slowlog 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "net/http" 7 | 8 | "overlord/proxy/proto" 9 | ) 10 | 11 | // showlog will show slowlog to http 12 | func showlog(w http.ResponseWriter, _req *http.Request) { 13 | storeLock.RLock() 14 | var slogs = make([]*proto.SlowlogEntries, len(storeMap)) 15 | idx := 0 16 | for _, s := range storeMap { 17 | slogs[idx] = s.Reply() 18 | idx++ 19 | } 20 | storeLock.RUnlock() 21 | 22 | encoder := json.NewEncoder(w) 23 | err := encoder.Encode(slogs) 24 | if err != nil { 25 | http.Error(w, fmt.Sprintf("%s", err), http.StatusInternalServerError) 26 | } 27 | } 28 | 29 | // registerSlowlogHTTP will register slowlog by /slowlog 30 | func registerSlowlogHTTP() { 31 | http.HandleFunc("/slowlog", showlog) 32 | } 33 | -------------------------------------------------------------------------------- /proxy/slowlog/slowlog.go: -------------------------------------------------------------------------------- 1 | package slowlog 2 | 3 | import ( 4 | "sync" 5 | "sync/atomic" 6 | 7 | "overlord/pkg/log" 8 | "overlord/proxy/proto" 9 | ) 10 | 11 | const slowlogMaxCount = 1024 12 | 13 | func newStore(name string) *Store { 14 | return &Store{ 15 | name: name, 16 | cursor: 0, 17 | msgs: make([]atomic.Value, slowlogMaxCount), 18 | } 19 | } 20 | 21 | // Store is the collector of slowlog 22 | type Store struct { 23 | name string 24 | cursor uint32 25 | msgs []atomic.Value 26 | } 27 | 28 | // Record impl the Handler 29 | func (s *Store) Record(msg *proto.SlowlogEntry) { 30 | if msg == nil { 31 | return 32 | } 33 | 34 | for { 35 | if atomic.CompareAndSwapUint32(&s.cursor, s.cursor, s.cursor+1) { 36 | idx := (s.cursor - 1) % slowlogMaxCount 37 | s.msgs[idx].Store(msg) 38 | if fh != nil { 39 | fh.save(s.name, msg) 40 | } 41 | break 42 | } 43 | } 44 | } 45 | 46 | // Reply impl the Replyer 47 | func (s *Store) Reply() *proto.SlowlogEntries { 48 | entries := make([]*proto.SlowlogEntry, 0) 49 | for _, msg := range s.msgs { 50 | m := msg.Load() 51 | if m == nil { 52 | break 53 | } 54 | sentry := m.(*proto.SlowlogEntry) 55 | entries = append(entries, sentry) 56 | } 57 | 58 | ses := &proto.SlowlogEntries{ 59 | Cluster: s.name, 60 | Entries: entries, 61 | } 62 | return ses 63 | } 64 | 65 | var ( 66 | storeMap = map[string]*Store{} 67 | storeLock sync.RWMutex 68 | ) 69 | 70 | // Handler is the handler which contains the store instance with async call 71 | type Handler interface { 72 | Record(msg *proto.SlowlogEntry) 73 | Reply() *proto.SlowlogEntries 74 | } 75 | 76 | // Get create the message Handler or get the exists one 77 | func Get(name string) Handler { 78 | storeLock.RLock() 79 | if s, ok := storeMap[name]; ok { 80 | storeLock.RUnlock() 81 | return s 82 | } 83 | storeLock.RUnlock() 84 | 85 | storeLock.Lock() 86 | defer storeLock.Unlock() 87 | s := newStore(name) 88 | storeMap[name] = s 89 | return s 90 | } 91 | 92 | // Init slowlog with file and http 93 | func Init(fileName string, maxBytes int, backupCount int) error { 94 | registerSlowlogHTTP() 95 | if fileName == "" { 96 | return nil 97 | } 98 | log.Infof("setup slowlog for file [%s]", fileName) 99 | return initFileHandler(fileName, maxBytes, backupCount) 100 | } 101 | -------------------------------------------------------------------------------- /proxy/slowlog/slowlog_test.go: -------------------------------------------------------------------------------- 1 | package slowlog 2 | 3 | import ( 4 | "fmt" 5 | "github.com/stretchr/testify/assert" 6 | "os" 7 | "sync/atomic" 8 | "testing" 9 | ) 10 | 11 | //in slowlog cursorInt32 init val is -1 12 | var cursorInt32 int32 13 | 14 | //in slowlog cursorUint32 init val is 0 15 | var cursorUint32 uint32 16 | 17 | func TestRecordWithCursorUint32(t *testing.T) { 18 | //now cursorUint32 is uint32's max val 19 | cursorUint32 = ^uint32(0) 20 | idxOk := true 21 | for i := 1; i < 10; i++ { 22 | if atomic.CompareAndSwapUint32(&cursorUint32, cursorUint32, cursorUint32+1) { 23 | idx := (cursorUint32 - 1) % slowlogMaxCount 24 | fmt.Fprintf(os.Stdout, "%d mod 1024 = %d\n", cursorUint32-1, idx) 25 | if idx < 0 { 26 | idxOk = false 27 | break 28 | } 29 | } 30 | } 31 | assert.True(t, idxOk) 32 | } 33 | 34 | func TestRecordWithCursorInt32(t *testing.T) { 35 | //now cursorInt32 is int32's max val 36 | cursorInt32 = int32(^uint32(0) >> 1) 37 | idxOk := true 38 | for i := 1; i < 10; i++ { 39 | if atomic.CompareAndSwapInt32(&cursorInt32, cursorInt32, cursorInt32+1) { 40 | idx := cursorInt32 % slowlogMaxCount 41 | if idx < 0 { 42 | fmt.Fprintf(os.Stdout, "%d mod 1024 = %d\n", cursorInt32, idx) 43 | idxOk = false 44 | break 45 | } 46 | } 47 | } 48 | assert.False(t, idxOk) 49 | } 50 | -------------------------------------------------------------------------------- /scripts/codecov.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | echo "" > coverage.txt 5 | 6 | for d in $(go list ./... | grep -v vendor | grep -v cmd|grep -v enri); do 7 | echo "testing for $d ..." 8 | go test -gcflags="-N -l" -v -coverprofile=profile.out -covermode=atomic $d 9 | if [ -f profile.out ]; then 10 | cat profile.out >> coverage.txt 11 | rm profile.out 12 | fi 13 | done 14 | -------------------------------------------------------------------------------- /scripts/fuzz_tools.py: -------------------------------------------------------------------------------- 1 | import shlex 2 | import subprocess 3 | import sys 4 | import os 5 | import functools 6 | import time 7 | 8 | 9 | def timeit(f): 10 | @functools.wraps(f) 11 | def inner(*args, **kwargs): 12 | before = time.time() 13 | try: 14 | f(*args, **kwargs) 15 | except Exception: 16 | raise 17 | finally: 18 | after = time.time() 19 | print("elapsed %f seconds" % (after - before,)) 20 | 21 | return inner 22 | 23 | 24 | @timeit 25 | def check_gen(cwd): 26 | corpus_dir = os.path.join(cwd, "corpus") 27 | if not os.path.exists(corpus_dir): 28 | os.mkdir(corpus_dir) 29 | arr = list(os.listdir(corpus_dir)) 30 | if arr: 31 | return 32 | print("generate corpus ing...") 33 | 34 | gen_file = os.path.join(cwd, "gen", "main.go") 35 | if not os.path.exists(gen_file): 36 | print("skip generate because %s not exists" % (gen_file,)) 37 | return 38 | out = subprocess.check_call( 39 | shlex.split("go run ./gen/main.go -out ./corpus/"), cwd=cwd) 40 | print(out) 41 | 42 | 43 | @timeit 44 | def check_and_build_fuzz(cwd): 45 | for item in os.listdir(cwd): 46 | if item.endswith(".zip"): 47 | print("find %s and using cache" % (item, )) 48 | print("if you has change your code, rm %s first" % (item, )) 49 | return 50 | 51 | print("running go-fuzz-build, that may take a lone time") 52 | print( 53 | "WARNING: you must set http_proxy and https_proxy in command line to download https://golang.org/x/net?go-get=1 dependecies" 54 | ) 55 | print("if not, that may take a long time and fail finally.") 56 | out = subprocess.check_output(shlex.split("go-fuzz-build"), cwd=cwd) 57 | print(out) 58 | 59 | 60 | def main(): 61 | cwd = sys.argv[1] 62 | check_gen(cwd) 63 | check_and_build_fuzz(cwd) 64 | 65 | sub = subprocess.Popen(shlex.split("go-fuzz"), cwd=cwd) 66 | sub.communicate() 67 | 68 | 69 | if __name__ == '__main__': 70 | main() 71 | -------------------------------------------------------------------------------- /scripts/init-ci-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | export HostIP="127.0.0.1" 6 | 7 | docker run -d -v /usr/share/ca-certificates/:/etc/ssl/certs -p 4001:4001 -p 2380:2380 -p 2379:2379 \ 8 | --name etcd quay.io/coreos/etcd:v2.3.8 \ 9 | -name etcd0 \ 10 | -advertise-client-urls http://${HostIP}:2379,http://${HostIP}:4001 \ 11 | -listen-client-urls http://0.0.0.0:2379,http://0.0.0.0:4001 \ 12 | -initial-advertise-peer-urls http://${HostIP}:2380 \ 13 | -listen-peer-urls http://0.0.0.0:2380 \ 14 | -initial-cluster-token etcd-cluster-1 \ 15 | -initial-cluster etcd0=http://${HostIP}:2380 \ 16 | -initial-cluster-state new 17 | 18 | docker pull grokzen/redis-cluster:4.0.9 19 | docker run -e "IP=0.0.0.0" -d -p 7000-7007:7000-7007 grokzen/redis-cluster:4.0.9 20 | docker run -e "IP=0.0.0.0" -d -p 8000-8007:8000-8007 grokzen/redis-cluster:4.0.9 21 | 22 | cd /tmp/ 23 | 24 | which memcached 25 | which redis-server 26 | 27 | redis-server --port 9001 --daemonize yes 28 | redis-server --port 9002 --daemonize yes 29 | 30 | memcached -h 31 | memcached -p 9101 -l 0.0.0.0 -d 32 | memcached -p 9102 -l 0.0.0.0 -d 33 | cd - 34 | 35 | -------------------------------------------------------------------------------- /scripts/install/install-mesos.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | MESOS_VERSION=1.7.0 4 | 5 | wget -c http://mirrors.tuna.tsinghua.edu.cn/apache/mesos/${MESOS_VERSION}/mesos-${MESOS_VERSION}.tar.gz -O mesos-${MESOS_VERSION}.tar.gz 6 | 7 | mkdir -p /data/ 8 | tar -zxvf mesos-${MESOS_VERSION}.tar.gz -C /data/ 9 | 10 | cd /data/mesos-${MESOS_VERSION} 11 | apt update 12 | apt-get install -y tar wget git 13 | apt-get install -y openjdk-7-jdk 14 | apt-get install -y autoconf libtool 15 | apt-get -y install build-essential python-dev python-six python-virtualenv libcurl4-nss-dev libsasl2-dev libsasl2-modules maven libapr1-dev libsvn-dev zlib1g-dev iputils-ping 16 | 17 | ./bootstrap 18 | mkdir -p build 19 | cd build 20 | ../configure 21 | make -j 40 22 | make install 23 | -------------------------------------------------------------------------------- /scripts/install/install_java.sh: -------------------------------------------------------------------------------- 1 | curl -L https://download.oracle.com/otn-pub/java/jdk/8u191-b12/2787e4a523244c269598db4e85c51e0c/jdk-8u191-linux-x64.tar.gz -o jdk-8u191-linux-x64.tar.gz 2 | tar zxf jdk-8u191-linux-x64.tar.gz 3 | mkdir -p /usr/local/jdk8/ 4 | mv jdk1.8.0_191/* /usr/local/jdk8/ 5 | update-alternatives --install "/usr/bin/java" "java" "/usr/local/jdk8/bin/java" 1500 6 | update-alternatives --install "/usr/bin/javac" "javac" "/usr/local/jdk8/bin/javac" 1500 7 | update-alternatives --install "/usr/bin/javaws" "javaws" "/usr/local/jdk8/bin/javaws" 1500 -------------------------------------------------------------------------------- /scripts/install/install_zk.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ ! -n "$1" ];then 3 | echo "zookeeper myid can not be null,pass myid by arg. 4 | eg:./install_zk.sh 2" 5 | exit 1 6 | else 7 | myid=$1 8 | fi 9 | echo $myid 10 | zk_ver=3.4.12 11 | ip_array=("host1" "host2" "host3") 12 | ip_num=${#ip_array[@]} 13 | jdk=/usr/local/jdk8 14 | zk_path=/usr/local/zookeeper-${zk_ver} 15 | work_dir=/data/server/zookeeper 16 | log_dir=/data/log/zookeeper 17 | curl -L http://apache.stu.edu.tw/zookeeper/zookeeper-{zk_ver}/zookeeper-3.4.12.tar.gz -o zookeeper-${zk_ver}.tar.gz 18 | tar zxf zookeeper-${zk_ver}.tar.gz 19 | mkdir -p $zk_path 20 | mkdir -p $log_dir 21 | mkdir -p $work_dir 22 | echo $myid>$work_dir/myid 23 | mv zookeeper-${zk_ver}/* $zk_path 24 | echo "tickTime=2000" > $zk_path/conf/zoo.cfg 25 | echo "initLimit=10">> $zk_path/conf/zoo.cfg 26 | echo "syncLimit=5" >>$zk_path/conf/zoo.cfg 27 | echo "dataDir=/data/server/zookeeper" >>$zk_path/conf/zoo.cfg 28 | echo "clientPort=2181" >>$zk_path/conf/zoo.cfg 29 | echo "autopurge.snapRetainCount=5" >>$zk_path/conf/zoo.cfg 30 | echo "autopurge.purgeInterval=24" >>$zk_path/conf/zoo.cfg 31 | for ((index=0;index<$ip_num;index++)) 32 | do 33 | tmp=$[$index+1] 34 | echo "server.$tmp=${ip_array[index]}:2888:3888" >>$zk_path/conf/zoo.cfg 35 | done 36 | 37 | ### start by supervisor 38 | apt-get install -y supervisor 39 | supervisor_path=/etc/supervisor/conf.d/zookeeper.conf 40 | echo "[program:zookeeper]">$supervisor_path 41 | echo "command=/usr/local/zookeeper-${zk_ver}/bin/zkServer.sh start-foreground">>$supervisor_path 42 | echo "directory=/usr/local/zookeeper-${zk_ver}">>$supervisor_path 43 | echo 'user=root 44 | autostart=true 45 | autorestart=true 46 | stopsignal=KILL 47 | startsecs=10 48 | startretries=3 49 | stdout_logfile = /data/log/zookeeper/stdout.log 50 | stdout_logfile_backups = 3 51 | stderr_logfile = /data/log/zookeeper/stderr.log 52 | stderr_logfile_backups = 3 53 | logfile_maxbytes=20MB'>>$supervisor_path 54 | echo "environment=JAVA_HOME=\"${jdk}\",JRE_HOME=\"${jdk}/jre\"">>$supervisor_path 55 | supervisorctl update -------------------------------------------------------------------------------- /scripts/install/mesos-agent.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #mesos agent 使用supvisor启动配置脚本 3 | apt-get install -y supervisor 4 | # 写入mesos启动配置 5 | zk_addr=127.0.0.1:2181 6 | echo -e "[program:mesos]">/etc/supervisor/conf.d/mesos-agent.conf 7 | echo -e "command=/data/mesos-1.7.0/build/bin/mesos-agent.sh --master=zk://${zk_addr}/mesos --work_dir=/data/lib/mesos --ip=\c">>/etc/supervisor/conf.d/mesos-agent.conf 8 | ifconfig |grep inet|grep -v '127.0.0.1'|awk '{print $2}' |awk -F ':' '{print $2}'>>/etc/supervisor/conf.d/mesos-agent.conf 9 | echo 'directoy=/data/mesos-1.7.0/build 10 | user=root 11 | autostart=true 12 | autorestart=true 13 | stopsignal=KILL 14 | startsecs=10 15 | startretries=3 16 | stdout_logfile = /data/log/mesos/stdout.log 17 | stdout_logfile_backups = 3 18 | stderr_logfile = /data/log/mesos/stderr.log 19 | stderr_logfile_backups = 3 20 | logfile_maxbytes=20MB'>>/etc/supervisor/conf.d/mesos-agent.conf 21 | 22 | mkdir -p /data/log/mesos 23 | supervisorctl update 24 | -------------------------------------------------------------------------------- /scripts/install/setupetcd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | /data/etcd/etcdctl mkdir /overlord/clusters 4 | /data/etcd/etcdctl mkdir /ovelord/instances 5 | /data/etcd/etcdctl mkdir /overlord/heartbeat 6 | /data/etcd/etcdctl mkdir /overlord/config 7 | /data/etcd/etcdctl mkdir /overlord/jobs 8 | /data/etcd/etcdctl mkdir /overlord/job_detail 9 | /data/etcd/etcdctl mkdir /overlord/framework 10 | /data/etcd/etcdctl mkdir /overlord/appids 11 | /data/etcd/etcdctl mkdir /overlord/specs 12 | /data/etcd/etcdctl set /overlord/fs "http://172.22.20.48:20080" 13 | -------------------------------------------------------------------------------- /scripts/memcache_multi_thread.py: -------------------------------------------------------------------------------- 1 | from gevent.monkey import patch_all 2 | patch_all() 3 | from gevent.pool import Pool 4 | import memcache 5 | import random 6 | 7 | def run(_x): 8 | gets = ["key_{}".format(x) for x in range(1000)] 9 | sets = [("key_{}".format(x), x) for x in range(1000)] 10 | cmds = gets + sets 11 | mc = memcache.Client(["127.0.0.1:21211"]) 12 | for x in range(1000): 13 | item = random.choice(cmds) 14 | if isinstance(item, basestring): 15 | _value = mc.get(item) 16 | else: 17 | key, val = item 18 | mc.set(key, val, noreply=False) 19 | 20 | 21 | def main(): 22 | p = Pool(20) 23 | p.map(run, range(20)) 24 | 25 | if __name__ == '__main__': 26 | main() -------------------------------------------------------------------------------- /scripts/run-fuzz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export GO111MODULE=on 4 | 5 | # generate vendor clean go mod files 6 | go mod vendor 7 | mv go.mod go.mod.backup 8 | mv go.sum go.sum.backup 9 | 10 | # unset go11module 11 | export GO111MODULE=auto 12 | 13 | #ls ci/fuzz/ | xargs -n1 -I"{}" cd {} && go-fuzz-build 14 | ls ci/fuzz/ | xargs -n1 -I"{}" python scripts/fuzz_tools.py ci/fuzz/{} 15 | 16 | # defer setting 17 | rm -rf vendor 18 | mv go.mod.backup go.mod 19 | mv go.sum.backup go.sum 20 | 21 | export GO111MODULE=on 22 | -------------------------------------------------------------------------------- /scripts/validate_keys_dist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import redis 4 | import argparse 5 | import os 6 | 7 | from contextlib import contextmanager 8 | 9 | def gen_str(n): 10 | return ''.join(map(lambda xx: (hex(ord(xx))[2:]), os.urandom(n))) 11 | 12 | 13 | def gen_items(prefix, n): 14 | return [ 15 | "_overlord-%s-%s-%010d" % (prefix, gen_str(8), num) for num in range(n) 16 | ] 17 | 18 | 19 | def parse_ip_port(addr): 20 | asp = addr.split(":") 21 | return (asp[0], int(asp[1])) 22 | 23 | 24 | def dial(expect): 25 | ip, port = parse_ip_port(expect) 26 | rc = redis.StrictRedis(host=ip, port=port) 27 | return rc 28 | 29 | @contextmanager 30 | def del_keys(rc, keys): 31 | try: 32 | yield 33 | finally: 34 | epipe = rc.pipeline(transaction=False) 35 | for key in keys: 36 | epipe.delete(key) 37 | epipe.execute() 38 | 39 | 40 | def check_vals(expect_rc, check_rc, keys, vals): 41 | epipe = expect_rc.pipeline(transaction=False) 42 | for key, val in zip(keys, vals): 43 | epipe.set(key, val, ex=10) 44 | epipe.execute() 45 | 46 | cpipe = check_rc.pipeline(transaction=False) 47 | for key in keys: 48 | epipe.get(key) 49 | for i,val in enumerate(epipe.execute()): 50 | assert vals[i] == val 51 | 52 | 53 | def run_check(check, expect, n=1024): 54 | keys = gen_items("keys", n) 55 | vals = gen_items("vals", n) 56 | 57 | expect_rc = dial(expect) 58 | check_rc = dial(check) 59 | 60 | with del_keys(expect_rc, keys): 61 | check_vals(expect_rc, check_rc, keys, vals) 62 | 63 | def main(): 64 | parser = argparse.ArgumentParser() 65 | parser.add_argument("check", help="address need to be checked.") 66 | parser.add_argument( 67 | "expect", 68 | help= 69 | "expect validate address. command will be send to this address first.") 70 | parser.add_argument("-k", "--keys", type=int, default=1024, help="default 1024. It's recommands be the 10 times than the count of backends.") 71 | opt = parser.parse_args() 72 | check = opt.check 73 | expect = opt.check 74 | run_check(check, expect, n=opt.keys) 75 | 76 | 77 | if __name__ == "__main__": 78 | main() 79 | -------------------------------------------------------------------------------- /version/version.go: -------------------------------------------------------------------------------- 1 | package version 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "os" 7 | ) 8 | 9 | // Define overlord version consts 10 | const ( 11 | OverlordMajor = 1 12 | OverlordMinor = 9 13 | OverlordPatch = 0 14 | ) 15 | 16 | var ( 17 | showVersion bool 18 | vstr string 19 | vbytes []byte 20 | ) 21 | 22 | func init() { 23 | vstr = fmt.Sprintf("%d.%d.%d", OverlordMajor, OverlordMinor, OverlordPatch) 24 | vbytes = []byte(vstr) 25 | flag.BoolVar(&showVersion, "version", false, "show version and exit.") 26 | } 27 | 28 | // ShowVersion print version if -version flag is seted and return true 29 | func ShowVersion() bool { 30 | if showVersion { 31 | fmt.Fprintln(os.Stdout, vstr) 32 | } 33 | return showVersion 34 | } 35 | 36 | // Bytes return version bytes 37 | func Bytes() []byte { 38 | return vbytes 39 | } 40 | 41 | // Str is the formatted version string 42 | func Str() string { 43 | return vstr 44 | } 45 | -------------------------------------------------------------------------------- /web/.browserslistrc: -------------------------------------------------------------------------------- 1 | > 1% 2 | last 2 versions 3 | not ie <= 8 4 | -------------------------------------------------------------------------------- /web/.editorconfig: -------------------------------------------------------------------------------- 1 | [*.{js,jsx,ts,tsx,vue}] 2 | indent_style = space 3 | indent_size = 2 4 | trim_trailing_whitespace = true 5 | insert_final_newline = true 6 | -------------------------------------------------------------------------------- /web/.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | root: true, 3 | env: { 4 | node: true 5 | }, 6 | 'extends': [ 7 | 'plugin:vue/essential', 8 | '@vue/standard' 9 | ], 10 | rules: { 11 | 'no-console': process.env.NODE_ENV === 'production' ? 'error' : 'off', 12 | 'no-debugger': process.env.NODE_ENV === 'production' ? 'error' : 'off' 13 | }, 14 | parserOptions: { 15 | parser: 'babel-eslint' 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /web/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules 3 | /dist 4 | 5 | # local env files 6 | .env.local 7 | .env.*.local 8 | 9 | # Log files 10 | npm-debug.log* 11 | yarn-debug.log* 12 | yarn-error.log* 13 | 14 | # Editor directories and files 15 | .idea 16 | .vscode 17 | *.suo 18 | *.ntvs* 19 | *.njsproj 20 | *.sln 21 | *.sw* 22 | -------------------------------------------------------------------------------- /web/README.md: -------------------------------------------------------------------------------- 1 | # web 2 | 3 | ## Project setup 4 | ``` 5 | yarn install 6 | ``` 7 | 8 | ### Compiles and hot-reloads for development 9 | ``` 10 | yarn run serve 11 | ``` 12 | 13 | ### Compiles and minifies for production 14 | ``` 15 | yarn run build 16 | ``` 17 | 18 | ### Run your tests 19 | ``` 20 | yarn run test 21 | ``` 22 | 23 | ### Lints and fixes files 24 | ``` 25 | yarn run lint 26 | ``` 27 | 28 | ### Customize configuration 29 | See [Configuration Reference](https://cli.vuejs.org/config/). 30 | -------------------------------------------------------------------------------- /web/babel.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | presets: [ 3 | '@vue/app' 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /web/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "web", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "serve": "vue-cli-service serve", 7 | "build": "vue-cli-service build", 8 | "lint": "vue-cli-service lint" 9 | }, 10 | "dependencies": { 11 | "axios": "^0.21.1", 12 | "element-ui": "^2.4.11", 13 | "lodash": "^4.17.19", 14 | "vue": "^2.5.17", 15 | "vue-json-pretty": "^1.4.1", 16 | "vue-router": "^3.0.1", 17 | "vuex": "^3.0.1" 18 | }, 19 | "devDependencies": { 20 | "@vue/cli-plugin-babel": "^3.2.0", 21 | "@vue/cli-plugin-eslint": "^3.2.0", 22 | "@vue/cli-service": "^3.2.0", 23 | "@vue/eslint-config-standard": "^4.0.0", 24 | "babel-eslint": "^10.0.1", 25 | "babel-plugin-lodash": "^3.3.4", 26 | "eslint": "^5.8.0", 27 | "eslint-plugin-vue": "^5.0.0-0", 28 | "node-sass": "^4.13.1", 29 | "sass-loader": "^7.0.1", 30 | "vue-template-compiler": "^2.5.17" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /web/postcss.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | autoprefixer: {} 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /web/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/web/public/favicon.ico -------------------------------------------------------------------------------- /web/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Overlord 9 | 10 | 11 | 14 |
15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /web/src/App.vue: -------------------------------------------------------------------------------- 1 | 12 | 13 | 24 | 25 | 50 | -------------------------------------------------------------------------------- /web/src/assets/Starbounder-2.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bilibili/overlord/b524449801b54115831e7f2663558824bdc15a03/web/src/assets/Starbounder-2.otf -------------------------------------------------------------------------------- /web/src/constants/CREATE_TYPES.js: -------------------------------------------------------------------------------- 1 | // 集群类型 2 | const TYPE_OPTIONS = [{ 3 | name: 'Redis Cluster', 4 | value: 'redis_cluster' 5 | }, { 6 | name: 'Redis', 7 | value: 'redis' 8 | }, { 9 | name: 'Memcache', 10 | value: 'memcache' 11 | }] 12 | 13 | // 集群型号 14 | const SPEC_OPTIONS = [{ 15 | name: '小型', 16 | value: '0.25c2g' 17 | }, { 18 | name: '中型', 19 | value: '0.5c2g' 20 | }, { 21 | name: '大型', 22 | value: '1c4g' 23 | }, { 24 | name: '定制', 25 | value: 'custom' 26 | }] 27 | 28 | export { TYPE_OPTIONS, SPEC_OPTIONS } 29 | -------------------------------------------------------------------------------- /web/src/http/api.js: -------------------------------------------------------------------------------- 1 | import http from '@/http/service' 2 | 3 | // 根据关键字搜索获取 cluster 列表 4 | const getClusterListByQueryApi = params => { 5 | return http.get(`api/v1/clusters`, { 6 | params 7 | }) 8 | } 9 | 10 | // 获取 appid 列表 11 | const getAppidsApi = params => { 12 | return http.get(`api/v1/appids`, { 13 | params 14 | }) 15 | } 16 | 17 | // 获取 job 列表 18 | const getJobsApi = params => { 19 | return http.get('api/v1/jobs') 20 | } 21 | 22 | // 获取 version 列表 23 | const getVersionsApi = params => { 24 | return http.get('api/v1/versions') 25 | } 26 | 27 | // 获取 group 列表 28 | const getGroupsApi = params => { 29 | return http.get('api/v1/groups') 30 | } 31 | 32 | // 获取 appid 详情 33 | const getAppidDetailApi = params => { 34 | return http.get(`api/v1/appids/${params}`) 35 | } 36 | 37 | // 获取 appid 详情 38 | const getClusterDetailApi = params => { 39 | return http.get(`api/v1/clusters/${params}`) 40 | } 41 | 42 | // 解除 cluster 和 appid 的绑定 43 | const removeCorrelationApi = (clusterName, params) => { 44 | return http.delete(`api/v1/clusters/${clusterName}/appid`, { 45 | data: params 46 | }) 47 | } 48 | 49 | // 删除 cluster 50 | const deleteClusterApi = (clusterName, params) => { 51 | return http.delete(`api/v1/clusters/${clusterName}`) 52 | } 53 | 54 | // 更新集群节点权重 55 | const patchInstanceWeightApi = (clusterName, addr, params) => { 56 | return http.patch(`api/v1/clusters/${clusterName}/instances/${addr}`, params) 57 | } 58 | 59 | // 创建 cluster 60 | const createClusterApi = params => { 61 | return http.post('api/v1/clusters', params) 62 | } 63 | 64 | // 添加 cluster 和 appid 关联 65 | const addCorrelationApi = (clusterName, params) => { 66 | return http.post(`api/v1/clusters/${clusterName}/appid`, params) 67 | } 68 | 69 | // 新增 appid 70 | const addAppIdApi = (params) => { 71 | return http.post('api/v1/appids', params) 72 | } 73 | 74 | // 重启节点 75 | const restartInstanceApi = (clusterName, instance) => { 76 | return http.post(`api/v1/clusters/${clusterName}/instance/${instance}/restart`) 77 | } 78 | 79 | export { 80 | getClusterListByQueryApi, 81 | getAppidsApi, 82 | getJobsApi, 83 | getVersionsApi, 84 | getGroupsApi, 85 | getAppidDetailApi, 86 | getClusterDetailApi, 87 | removeCorrelationApi, 88 | deleteClusterApi, 89 | patchInstanceWeightApi, 90 | createClusterApi, 91 | addCorrelationApi, 92 | addAppIdApi, 93 | restartInstanceApi 94 | } 95 | -------------------------------------------------------------------------------- /web/src/http/config.js: -------------------------------------------------------------------------------- 1 | const axiosConfig = { 2 | baseURL: '/', 3 | // 请求后的数据处理 4 | transformResponse: [function (data) { 5 | return data 6 | }], 7 | // 超时设置s 8 | timeout: 30000, 9 | responseType: 'json' 10 | } 11 | 12 | export default axiosConfig 13 | -------------------------------------------------------------------------------- /web/src/http/service.js: -------------------------------------------------------------------------------- 1 | import axios from 'axios' 2 | import config from './config' 3 | // import { Message } from 'element-ui' 4 | 5 | const service = axios.create(config) 6 | 7 | // 添加请求拦截器 8 | service.interceptors.request.use( 9 | req => { 10 | return req 11 | }, 12 | error => { 13 | return Promise.reject(error) 14 | } 15 | ) 16 | 17 | // 返回状态判断(添加响应拦截器) todo 18 | service.interceptors.response.use( 19 | res => { 20 | return res 21 | }, 22 | error => { 23 | return Promise.reject(error.response.data || { 24 | error: error.message 25 | }) 26 | } 27 | ) 28 | 29 | export default service 30 | -------------------------------------------------------------------------------- /web/src/layout/Header.vue: -------------------------------------------------------------------------------- 1 | 6 | 7 | 12 | 13 | 27 | -------------------------------------------------------------------------------- /web/src/layout/SideBar.vue: -------------------------------------------------------------------------------- 1 | 31 | 32 | 37 | 38 | 64 | -------------------------------------------------------------------------------- /web/src/main.js: -------------------------------------------------------------------------------- 1 | import Vue from 'vue' 2 | import App from './App.vue' 3 | import router from './router' 4 | import store from './store/index' 5 | import ElementUI from 'element-ui' 6 | import '@/style/element-variables.scss' 7 | 8 | Vue.config.productionTip = false 9 | 10 | Vue.use(ElementUI, { size: 'small', zIndex: 3000 }) 11 | 12 | new Vue({ 13 | router, 14 | store, 15 | render: h => h(App) 16 | }).$mount('#app') 17 | -------------------------------------------------------------------------------- /web/src/router.js: -------------------------------------------------------------------------------- 1 | import Vue from 'vue' 2 | import Router from 'vue-router' 3 | import Home from './views/Home.vue' 4 | 5 | Vue.use(Router) 6 | 7 | export default new Router({ 8 | mode: 'history', 9 | base: process.env.BASE_URL, 10 | routes: [ 11 | { 12 | path: '/', 13 | name: 'home', 14 | component: Home 15 | }, 16 | { 17 | path: '/appid', 18 | name: 'appId', 19 | // route level code-splitting 20 | // this generates a separate chunk (about.[hash].js) for this route 21 | // which is lazy-loaded when the route is visited. 22 | component: () => import(/* webpackChunkName: "about" */ './views/AppId.vue') 23 | }, 24 | { 25 | path: '/cluster/:name', 26 | name: 'cluster', 27 | // route level code-splitting 28 | // this generates a separate chunk (about.[hash].js) for this route 29 | // which is lazy-loaded when the route is visited. 30 | component: () => import(/* webpackChunkName: "about" */ './views/Cluster.vue') 31 | }, 32 | { 33 | path: '/job', 34 | name: 'job', 35 | // route level code-splitting 36 | // this generates a separate chunk (about.[hash].js) for this route 37 | // which is lazy-loaded when the route is visited. 38 | component: () => import(/* webpackChunkName: "about" */ './views/Job.vue') 39 | }, 40 | { 41 | path: '/add', 42 | name: 'add', 43 | // route level code-splitting 44 | // this generates a separate chunk (about.[hash].js) for this route 45 | // which is lazy-loaded when the route is visited. 46 | component: () => import(/* webpackChunkName: "about" */ './views/AddCluster.vue') 47 | } 48 | ] 49 | }) 50 | -------------------------------------------------------------------------------- /web/src/store/index.js: -------------------------------------------------------------------------------- 1 | import Vue from 'vue' 2 | import Vuex from 'vuex' 3 | import jobs from './modules/job' 4 | import clusters from './modules/cluster' 5 | 6 | Vue.use(Vuex) 7 | 8 | const debug = process.env.NODE_ENV !== 'production' 9 | 10 | export default new Vuex.Store({ 11 | modules: { 12 | jobs, 13 | clusters 14 | }, 15 | strict: debug 16 | }) 17 | -------------------------------------------------------------------------------- /web/src/store/modules/cluster.js: -------------------------------------------------------------------------------- 1 | import Vue from 'vue' 2 | import * as types from '../mutation-types' 3 | import { Message } from 'element-ui' 4 | import { getClusterDetailApi, getClusterListByQueryApi } from '@/http/api' 5 | 6 | // initial state 7 | const state = { 8 | clusterDetail: {}, 9 | loading: false, 10 | clusterResult: [], 11 | queryLoading: false 12 | } 13 | 14 | // getters 15 | const getters = { 16 | } 17 | 18 | // actions 19 | const actions = { 20 | async getClusterDetail ({ commit }, { name }) { 21 | commit(types.SAVE_CLUSTER_DETAIL_LOADING, true) 22 | try { 23 | const { data } = await getClusterDetailApi(name) 24 | data.instances && data.instances.forEach(item => { 25 | Vue.set(item, 'weightInfo', { 26 | value: item.weight, 27 | type: 'view' 28 | }) 29 | }) 30 | commit(types.SAVE_CLUSTER_INFO, data) 31 | } catch ({ error }) { 32 | Message.error(`获取失败:${error}`) 33 | } 34 | commit(types.SAVE_CLUSTER_DETAIL_LOADING, false) 35 | }, 36 | updateInstance ({ commit }, { changeType, index, item, newType, weightValue }) { 37 | commit(types.UPDATE_CLUSTER_INSTANCES, { changeType, index, item, newType, weightValue }) 38 | }, 39 | async getClusterResult ({ commit }, { name }) { 40 | commit(types.SAVE_CLUSTER_DETAIL_LOADING, true) 41 | try { 42 | const { data: { items } } = await getClusterListByQueryApi({ 43 | name 44 | }) 45 | commit(types.SAVE_CLUSTER_BY_QUERY, items) 46 | } catch ({ error }) { 47 | Message.error(`获取失败:${error}`) 48 | } 49 | commit(types.SAVE_CLUSTER_DETAIL_LOADING, false) 50 | } 51 | } 52 | 53 | // mutations 54 | const mutations = { 55 | [types.SAVE_CLUSTER_INFO] (state, data) { 56 | state.clusterDetail = data 57 | }, 58 | [types.SAVE_CLUSTER_DETAIL_LOADING] (state, loading) { 59 | state.loading = loading 60 | }, 61 | [types.UPDATE_CLUSTER_INSTANCES] (state, { changeType, index, item, newType, weightValue }) { 62 | if (changeType === 'display') { 63 | state.clusterDetail.instances[index].weightInfo.type = newType 64 | } else { 65 | state.clusterDetail.instances[index].weightInfo.value = weightValue 66 | } 67 | }, 68 | [types.SAVE_CLUSTER_BY_QUERY] (state, data) { 69 | state.clusterResult = data 70 | }, 71 | [types.SAVE_CLUSTER_BY_QUERY_LOADING] (state, queryLoading) { 72 | state.queryLoading = queryLoading 73 | } 74 | } 75 | 76 | export default { 77 | namespaced: true, 78 | state, 79 | getters, 80 | actions, 81 | mutations 82 | } 83 | -------------------------------------------------------------------------------- /web/src/store/modules/job.js: -------------------------------------------------------------------------------- 1 | import { getJobsApi } from '@/http/api' 2 | import { Message } from 'element-ui' 3 | import * as types from '../mutation-types' 4 | 5 | // initial state 6 | const state = { 7 | all: [], 8 | loading: false 9 | } 10 | 11 | // getters 12 | const getters = { 13 | jobStateList: (state, getters) => { 14 | return state.all.map(job => job.state) 15 | .filter((item, index, arr) => arr.indexOf(item) === index) 16 | .map(stateItem => ({ 17 | text: stateItem, 18 | value: stateItem 19 | })) 20 | } 21 | } 22 | 23 | // actions 24 | const actions = { 25 | async getAllJobs ({ commit }) { 26 | commit(types.SAVE_JOB_LOADING, true) 27 | try { 28 | const { data: { items } } = await getJobsApi() 29 | commit(types.SAVE_JOB_LIST, items) 30 | } catch ({ error }) { 31 | Message.error(`获取失败:${error}`) 32 | } 33 | commit(types.SAVE_JOB_LOADING, false) 34 | } 35 | } 36 | 37 | // mutations 38 | const mutations = { 39 | [types.SAVE_JOB_LIST] (state, jobs) { 40 | state.all = jobs 41 | }, 42 | [types.SAVE_JOB_LOADING] (state, loading) { 43 | state.loading = loading 44 | } 45 | } 46 | 47 | export default { 48 | namespaced: true, 49 | state, 50 | getters, 51 | actions, 52 | mutations 53 | } 54 | -------------------------------------------------------------------------------- /web/src/store/mutation-types.js: -------------------------------------------------------------------------------- 1 | // Job 2 | export const SAVE_JOB_LIST = 'SAVE_JOB_LIST' 3 | export const SAVE_JOB_LOADING = 'SAVE_JOB_LOADING' 4 | 5 | // Cluster 6 | export const SAVE_CLUSTER_INFO = 'SAVE_CLUSTER_INFO' 7 | export const SAVE_CLUSTER_DETAIL_LOADING = 'SAVE_CLUSTER_DETAIL_LOADING' 8 | export const UPDATE_CLUSTER_INSTANCES = 'UPDATE_CLUSTER_INSTANCES' 9 | export const SAVE_CLUSTER_BY_QUERY = 'SAVE_CLUSTER_BY_QUERY' 10 | export const SAVE_CLUSTER_BY_QUERY_LOADING = 'SAVE_CLUSTER_BY_QUERY_LOADING' 11 | -------------------------------------------------------------------------------- /web/src/style/element-custom.scss: -------------------------------------------------------------------------------- 1 | // el-table 2 | .el-table .el-table__header-wrapper th { 3 | background: #F5F7FA; 4 | } 5 | 6 | // el-menu 7 | .el-menu { 8 | height: 100%; 9 | text-align: left; 10 | border: none; 11 | @include linear-gradient(to bottom, rgba(10, 32, 56, 1) 0%, rgba(56, 76, 110, 1) 16%, rgba(88, 108, 149, 1) 56%, rgba(111, 132, 181, 1) 100%); 12 | 13 | .el-menu-item { 14 | border-left: 2px; 15 | } 16 | 17 | .el-menu-item i { 18 | color: #94A0B4 !important; 19 | } 20 | 21 | .el-menu-item:focus, 22 | .el-menu-item:hover { 23 | outline: 0; 24 | background-color: #e59ca4 !important; 25 | color: #fff !important; 26 | 27 | i { 28 | color: #fff !important; 29 | } 30 | } 31 | 32 | .el-menu-item.is-active { 33 | @include linear-gradient(to left, rgba(124, 138, 178, 1) 0%, rgba(252, 125, 127, 1) 100%); 34 | font-weight: bold; 35 | color: #ffffff !important; 36 | 37 | i { 38 | font-weight: bold; 39 | color: #fff !important; 40 | } 41 | } 42 | } 43 | 44 | // el-step 45 | .el-step__title.is-success { 46 | color: #f8878c !important; 47 | } 48 | 49 | .el-step__head.is-success { 50 | color: #fa878c !important; 51 | border-color: #fc888b !important; 52 | } 53 | 54 | .table-mini-input { 55 | width: 80px !important; 56 | } 57 | 58 | .el-tree__empty-text { 59 | color: #909399 !important; 60 | } -------------------------------------------------------------------------------- /web/src/style/element-variables.scss: -------------------------------------------------------------------------------- 1 | /* 改变主题色变量 */ 2 | $--color-primary: #3B5071; 3 | 4 | /* 改变 icon 字体路径变量,必需 */ 5 | $--font-path: '~element-ui/lib/theme-chalk/fonts'; 6 | 7 | @import "~element-ui/packages/theme-chalk/src/index"; -------------------------------------------------------------------------------- /web/src/style/mixin.scss: -------------------------------------------------------------------------------- 1 | @mixin linear-gradient($direction, $color-stops...) { 2 | background: nth(nth($color-stops, 1), 1); 3 | background: linear-gradient($direction, $color-stops); 4 | } 5 | 6 | @mixin slide-transition { 7 | .slide-fade-enter-active { 8 | transition: all .15s ease; 9 | } 10 | 11 | .slide-fade-leave-active { 12 | transition: all .15s cubic-bezier(1.0, 0.5, 0.8, 1.0); 13 | } 14 | 15 | .slide-fade-enter, 16 | .slide-fade-leave-to { 17 | transform: translateY(10px); 18 | opacity: 0; 19 | } 20 | } 21 | 22 | // 垂直居中 水平居中 23 | @mixin flex-horizon-justify-center { 24 | display: flex; 25 | align-items: center; 26 | justify-content: center; 27 | } 28 | 29 | @mixin flex-vertical-justify-center { 30 | display: flex; 31 | flex-direction: column; 32 | justify-content: center; 33 | align-content: center; 34 | // justify-items: center; 35 | // align-content: center; 36 | } 37 | 38 | @mixin page-title-font { 39 | font-size: 18px; 40 | font-weight: 500; 41 | } 42 | 43 | @mixin box-shadow($opacity) { 44 | box-shadow: 0 2px 5px 0 rgba(101, 121, 162 ,$opacity); 45 | } 46 | -------------------------------------------------------------------------------- /web/src/style/reset.scss: -------------------------------------------------------------------------------- 1 | html { 2 | box-sizing: border-box; 3 | font-family: "Helvetica Neue", Helvetica, "PingFang SC", "Hiragino Sans GB", "Microsoft YaHei", "微软雅黑", Arial, sans-serif; 4 | overflow-y: hidden; 5 | } 6 | 7 | body { 8 | margin: 0; 9 | padding: 0; 10 | font-family: "Helvetica Neue",Helvetica,"PingFang SC","Hiragino Sans GB","Microsoft YaHei","微软雅黑",Arial,sans-serif; 11 | font-size: 12px; 12 | color: #495060; 13 | background-color: #fff; 14 | -webkit-font-smoothing: antialiased; 15 | -moz-osx-font-smoothing: grayscale; 16 | } 17 | 18 | *, 19 | *:after, 20 | *:before { 21 | box-sizing: inherit; 22 | } 23 | 24 | article, aside, blockquote, body, button, dd, details, div, dl, dt, fieldset, figcaption, figure, 25 | footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, input, legend, li, menu, nav, ol, p, 26 | section, td, textarea, th, ul { 27 | margin: 0; 28 | padding: 0; 29 | } 30 | 31 | a { 32 | color: #2d8cf0; 33 | background: 0 0; 34 | text-decoration: none; 35 | outline: 0; 36 | cursor: pointer; 37 | transition: color .2s ease; 38 | } -------------------------------------------------------------------------------- /web/src/views/Job.vue: -------------------------------------------------------------------------------- 1 | 26 | 27 | 59 | 60 | 68 | 69 | 74 | -------------------------------------------------------------------------------- /web/vue.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | devServer: { 3 | proxy: { 4 | '/api/v1': { 5 | target: 'http://172.22.33.198:8880' 6 | } 7 | } 8 | }, 9 | configureWebpack: { 10 | performance: { 11 | hints: false 12 | } 13 | } 14 | } 15 | --------------------------------------------------------------------------------