├── .gitignore ├── .travis.yml ├── README.md ├── aliverchecker.go ├── main.go ├── redischecker.go ├── redischecker_test.go ├── rest.go ├── servergroup.go └── servergroup_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | *.yml 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - 1.3 5 | 6 | script: "go test -v" 7 | 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Notes: The project is deprecated after codis 3.0 2 | 3 | Please use new version of [codis](https://github.com/codislabs/codis) 4 | 5 | 6 | [![Build Status](https://travis-ci.org/ngaut/codis-ha.svg?branch=master)](https://travis-ci.org/ngaut/codis-ha) 7 | 8 | 9 | Usage: 10 | 11 | go get github.com/ngaut/codis-ha 12 | 13 | 14 | cd codis-ha 15 | 16 | 17 | go build 18 | 19 | 20 | codis-ha --codis-config=localhost:18087 --productName=test 21 | 22 | 23 | -------------------------------------------------------------------------------- /aliverchecker.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | type AliveChecker interface { 4 | CheckAlive() error 5 | } 6 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "github.com/juju/errors" 6 | log "github.com/ngaut/logging" 7 | "strconv" 8 | "time" 9 | ) 10 | 11 | type fnHttpCall func(objPtr interface{}, api string, method string, arg interface{}) error 12 | type aliveCheckerFactory func(addr string, defaultTimeout time.Duration) AliveChecker 13 | 14 | var ( 15 | apiServer = flag.String("codis-config", "localhost:18087", "api server address") 16 | productName = flag.String("productName", "test", "product name, can be found in codis-proxy's config") 17 | logLevel = flag.String("log-level", "info", "log level") 18 | 19 | callHttp fnHttpCall = httpCall 20 | acf aliveCheckerFactory = func(addr string, timeout time.Duration) AliveChecker { 21 | return &redisChecker{ 22 | addr: addr, 23 | defaultTimeout: timeout, 24 | } 25 | } 26 | ) 27 | 28 | func genUrl(args ...interface{}) string { 29 | url := "http://" 30 | for _, v := range args { 31 | switch v.(type) { 32 | case string: 33 | url += v.(string) 34 | case int: 35 | url += strconv.Itoa(v.(int)) 36 | default: 37 | log.Errorf("unsupported type %T", v) 38 | } 39 | } 40 | 41 | return url 42 | } 43 | 44 | func main() { 45 | flag.Parse() 46 | log.SetLevelByString(*logLevel) 47 | 48 | for { 49 | groups, err := GetServerGroups() 50 | if err != nil { 51 | log.Error(errors.ErrorStack(err)) 52 | return 53 | } 54 | 55 | CheckAliveAndPromote(groups) 56 | CheckOfflineAndPromoteSlave(groups) 57 | time.Sleep(3 * time.Second) 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /redischecker.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/garyburd/redigo/redis" 5 | "time" 6 | ) 7 | 8 | var ( 9 | _ AliveChecker = &redisChecker{} 10 | ) 11 | 12 | type redisChecker struct { 13 | addr string 14 | defaultTimeout time.Duration 15 | } 16 | 17 | func (r *redisChecker) ping() error { 18 | c, err := redis.DialTimeout("tcp", r.addr, r.defaultTimeout, r.defaultTimeout, r.defaultTimeout) 19 | if err != nil { 20 | return err 21 | } 22 | 23 | defer c.Close() 24 | _, err = c.Do("ping") 25 | return err 26 | } 27 | 28 | func (r *redisChecker) CheckAlive() error { 29 | var err error 30 | for i := 0; i < 2; i++ { //try a few times 31 | err = r.ping() 32 | if err != nil { 33 | time.Sleep(3 * time.Second) 34 | continue 35 | } 36 | 37 | return nil 38 | } 39 | 40 | return err 41 | } 42 | -------------------------------------------------------------------------------- /redischecker_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/alicebob/miniredis" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | func TestRedisChecker(t *testing.T) { 10 | r, _ := miniredis.Run() 11 | defer r.Close() 12 | addr := r.Addr() 13 | rc := &redisChecker{ 14 | addr: addr, 15 | defaultTimeout: 5 * time.Second, 16 | } 17 | 18 | err := rc.CheckAlive() 19 | if err != nil { 20 | t.Error(err) 21 | } 22 | 23 | //test bad address 24 | rc.addr = "xxx" 25 | err = rc.CheckAlive() 26 | if err == nil { 27 | t.Error("should be error") 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /rest.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "net/http" 6 | 7 | "encoding/json" 8 | "github.com/juju/errors" 9 | "io/ioutil" 10 | ) 11 | 12 | //call http url and get json, then decode to objptr 13 | func httpCall(objPtr interface{}, url string, method string, arg interface{}) error { 14 | client := &http.Client{Transport: http.DefaultTransport} 15 | rw := &bytes.Buffer{} 16 | if arg != nil { 17 | buf, err := json.Marshal(arg) 18 | if err != nil { 19 | return errors.Trace(err) 20 | } 21 | rw.Write(buf) 22 | } 23 | 24 | req, err := http.NewRequest(method, url, rw) 25 | if err != nil { 26 | return errors.Trace(err) 27 | } 28 | 29 | resp, err := client.Do(req) 30 | if err != nil { 31 | return errors.Trace(err) 32 | } 33 | defer resp.Body.Close() 34 | 35 | if resp.StatusCode/100 != 2 { 36 | msg, _ := ioutil.ReadAll(resp.Body) 37 | return errors.Errorf("error: %d, message: %s", resp.StatusCode, string(msg)) 38 | } 39 | 40 | if objPtr != nil { 41 | return json.NewDecoder(resp.Body).Decode(objPtr) 42 | } 43 | 44 | return nil 45 | } 46 | -------------------------------------------------------------------------------- /servergroup.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/juju/errors" 5 | log "github.com/ngaut/logging" 6 | "github.com/wandoulabs/codis/pkg/models" 7 | "time" 8 | ) 9 | 10 | func GetServerGroups() ([]models.ServerGroup, error) { 11 | var groups []models.ServerGroup 12 | err := callHttp(&groups, genUrl(*apiServer, "/api/server_groups"), "GET", nil) 13 | return groups, err 14 | } 15 | 16 | func PingServer(checker AliveChecker, errCtx interface{}, errCh chan<- interface{}) { 17 | err := checker.CheckAlive() 18 | log.Debugf("check %+v, result:%v, errCtx:%+v", checker, err, errCtx) 19 | if err != nil { 20 | errCh <- errCtx 21 | return 22 | } 23 | errCh <- nil 24 | } 25 | 26 | func verifyAndUpServer(checker AliveChecker, errCtx interface{}) { 27 | errCh := make(chan interface{}, 100) 28 | 29 | go PingServer(checker, errCtx, errCh) 30 | 31 | s := <-errCh 32 | 33 | if s == nil { //alive 34 | handleAddServer(errCtx.(*models.Server)) 35 | } 36 | 37 | } 38 | 39 | func getSlave(master *models.Server) (*models.Server, error) { 40 | var group models.ServerGroup 41 | err := callHttp(&group, genUrl(*apiServer, "/api/server_group/", master.GroupId), "GET", nil) 42 | if err != nil { 43 | return nil, errors.Trace(err) 44 | } 45 | 46 | for _, s := range group.Servers { 47 | if s.Type == models.SERVER_TYPE_SLAVE { 48 | return s, nil 49 | } 50 | } 51 | 52 | return nil, errors.Errorf("can not find any slave in this group: %v", group) 53 | } 54 | 55 | func handleCrashedServer(s *models.Server) error { 56 | switch s.Type { 57 | case models.SERVER_TYPE_MASTER: 58 | //get slave and do promote 59 | slave, err := getSlave(s) 60 | if err != nil { 61 | log.Warning(errors.ErrorStack(err)) 62 | return err 63 | } 64 | 65 | log.Infof("try promote %+v", slave) 66 | err = callHttp(nil, genUrl(*apiServer, "/api/server_group/", slave.GroupId, "/promote"), "POST", slave) 67 | if err != nil { 68 | log.Errorf("do promote %v failed %v", slave, errors.ErrorStack(err)) 69 | return err 70 | } 71 | case models.SERVER_TYPE_SLAVE: 72 | log.Errorf("slave is down: %+v", s) 73 | case models.SERVER_TYPE_OFFLINE: 74 | //no need to handle it 75 | default: 76 | log.Fatalf("unkonwn type %+v", s) 77 | } 78 | 79 | return nil 80 | } 81 | 82 | func handleAddServer(s *models.Server) { 83 | s.Type = models.SERVER_TYPE_SLAVE 84 | log.Infof("try reusing slave %+v", s) 85 | err := callHttp(nil, genUrl(*apiServer, "/api/server_group/", s.GroupId, "/addServer"), "PUT", s) 86 | log.Errorf("do reusing slave %v failed %v", s, errors.ErrorStack(err)) 87 | } 88 | 89 | //ping codis-server find crashed codis-server 90 | func CheckAliveAndPromote(groups []models.ServerGroup) ([]models.Server, error) { 91 | errCh := make(chan interface{}, 100) 92 | var serverCnt int 93 | for _, group := range groups { //each group 94 | for _, s := range group.Servers { //each server 95 | serverCnt++ 96 | rc := acf(s.Addr, 5*time.Second) 97 | news := s 98 | go PingServer(rc, news, errCh) 99 | } 100 | } 101 | 102 | //get result 103 | var crashedServer []models.Server 104 | for i := 0; i < serverCnt; i++ { 105 | s := <-errCh 106 | if s == nil { //alive 107 | continue 108 | } 109 | 110 | log.Warningf("server maybe crashed %+v", s) 111 | crashedServer = append(crashedServer, *s.(*models.Server)) 112 | 113 | err := handleCrashedServer(s.(*models.Server)) 114 | if err != nil { 115 | return crashedServer, err 116 | } 117 | } 118 | 119 | return crashedServer, nil 120 | } 121 | 122 | //ping codis-server find node up with type offine 123 | func CheckOfflineAndPromoteSlave(groups []models.ServerGroup) ([]models.Server, error) { 124 | for _, group := range groups { //each group 125 | for _, s := range group.Servers { //each server 126 | rc := acf(s.Addr, 5*time.Second) 127 | news := s 128 | if (s.Type == models.SERVER_TYPE_OFFLINE) { 129 | verifyAndUpServer(rc, news) 130 | } 131 | } 132 | } 133 | return nil, nil 134 | } 135 | -------------------------------------------------------------------------------- /servergroup_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | 7 | "github.com/alicebob/miniredis" 8 | "github.com/wandoulabs/codis/pkg/models" 9 | "testing" 10 | "time" 11 | ) 12 | 13 | const GROUP_ID = 1 14 | 15 | var ( 16 | redisServer, _ = miniredis.Run() 17 | groups1 = []models.ServerGroup{ 18 | models.ServerGroup{ 19 | Servers: []*models.Server{ 20 | &models.Server{GroupId: GROUP_ID, Type: models.SERVER_TYPE_MASTER, Addr: "localhost:xxx"}, 21 | &models.Server{GroupId: GROUP_ID, Type: models.SERVER_TYPE_SLAVE, Addr: redisServer.Addr()}, 22 | &models.Server{GroupId: GROUP_ID, Type: models.SERVER_TYPE_SLAVE, Addr: "xx"}, 23 | &models.Server{GroupId: GROUP_ID, Type: models.SERVER_TYPE_OFFLINE, Addr: "xx"}, 24 | }, 25 | }, 26 | } 27 | ) 28 | 29 | func TestGetServerGroups(t *testing.T) { 30 | callHttp = func(objPtr interface{}, url string, method string, arg interface{}) error { 31 | buf, _ := json.Marshal(groups1) 32 | json.Unmarshal(buf, objPtr) 33 | return nil 34 | } 35 | 36 | servergroups, err := GetServerGroups() 37 | if err != nil { 38 | t.Error(err) 39 | } 40 | 41 | if len(servergroups) == 0 { 42 | t.Error("empty server groups") 43 | } 44 | 45 | callHttp = func(objPtr interface{}, url string, method string, arg interface{}) error { 46 | return fmt.Errorf("mock return error") 47 | } 48 | 49 | if _, err := GetServerGroups(); err == nil { 50 | t.Error("should be error") 51 | } 52 | } 53 | 54 | func TestPingServer(t *testing.T) { 55 | rc := &redisChecker{ 56 | defaultTimeout: 1 * time.Second, 57 | } 58 | 59 | errCh := make(chan interface{}) 60 | go PingServer(rc, "context", errCh) 61 | if str := <-errCh; str.(string) != "context" { 62 | t.Error("should be error") 63 | } 64 | 65 | redis, _ := miniredis.Run() 66 | defer redis.Close() 67 | rc.addr = redis.Addr() 68 | go PingServer(rc, "context", errCh) 69 | if obj := <-errCh; obj != nil { 70 | t.Error("should be error") 71 | } 72 | } 73 | 74 | func TestGetSlave(t *testing.T) { 75 | callHttp = func(objPtr interface{}, url string, method string, arg interface{}) error { 76 | buf, _ := json.Marshal(groups1[0]) 77 | json.Unmarshal(buf, objPtr) 78 | return nil 79 | } 80 | 81 | s, err := getSlave(&models.Server{}) 82 | if err != nil { 83 | t.Error(err) 84 | } 85 | 86 | if s.Type != models.SERVER_TYPE_SLAVE { 87 | t.Error("should be slave") 88 | } 89 | 90 | callHttp = func(objPtr interface{}, url string, method string, arg interface{}) error { 91 | return fmt.Errorf("mock return error") 92 | } 93 | 94 | if _, err := getSlave(&models.Server{}); err == nil { 95 | t.Error("should be error") 96 | } 97 | } 98 | 99 | func TestCheckAliveAndPromote(t *testing.T) { 100 | //test promote with slave 101 | groups := groups1 102 | callHttp = func(objPtr interface{}, url string, method string, arg interface{}) error { 103 | if url == genUrl(*apiServer, "/api/server_group/", GROUP_ID) { 104 | group := groups[0] 105 | buf, _ := json.Marshal(group) 106 | json.Unmarshal(buf, objPtr) 107 | return nil 108 | } 109 | 110 | return nil 111 | } 112 | 113 | _, err := CheckAliveAndPromote(groups) 114 | if err != nil { 115 | t.Error(err) 116 | } 117 | 118 | //test no slave 119 | groups = []models.ServerGroup{ 120 | models.ServerGroup{ 121 | Servers: []*models.Server{ 122 | &models.Server{GroupId: GROUP_ID, Type: models.SERVER_TYPE_MASTER, Addr: "dead master"}, 123 | }, 124 | }, 125 | } 126 | 127 | _, err = CheckAliveAndPromote(groups) 128 | if err == nil { 129 | t.Error("should be error") 130 | } 131 | 132 | //test have slave but promote error 133 | groups = groups1 134 | callHttp = func(objPtr interface{}, url string, method string, arg interface{}) error { 135 | if url == genUrl(*apiServer, "/api/server_group/", GROUP_ID) { 136 | fmt.Println(url) 137 | group := groups[0] 138 | buf, _ := json.Marshal(group) 139 | json.Unmarshal(buf, objPtr) 140 | return nil 141 | } 142 | 143 | return fmt.Errorf("mock error") 144 | } 145 | 146 | _, err = CheckAliveAndPromote(groups) 147 | if err == nil { 148 | t.Error("should be error") 149 | } 150 | } 151 | --------------------------------------------------------------------------------