├── .gitignore ├── LICENSE ├── README.md ├── img ├── alipay.JPG ├── appent_rpc.png ├── check.png ├── ii.png ├── kv-fram.png ├── m.png ├── qc.jpg ├── rpc_vote.png ├── rule.png ├── select.png ├── state.png └── test.png └── src ├── kvraft ├── README.md ├── client.go ├── common.go ├── config.go ├── server.go └── test_test.go ├── labrpc ├── labrpc.go └── test_test.go ├── main ├── diff.out ├── ii.go ├── mr-challenge.txt ├── mr-testout.txt ├── pg-being_ernest.txt ├── pg-dorian_gray.txt ├── pg-dracula.txt ├── pg-emma.txt ├── pg-frankenstein.txt ├── pg-great_expectations.txt ├── pg-grimm.txt ├── pg-huckleberry_finn.txt ├── pg-les_miserables.txt ├── pg-metamorphosis.txt ├── pg-moby_dick.txt ├── pg-sherlock_holmes.txt ├── pg-tale_of_two_cities.txt ├── pg-tom_sawyer.txt ├── pg-ulysses.txt ├── pg-war_and_peace.txt ├── test-ii.sh ├── test-wc.sh └── wc.go ├── mapreduce ├── README.md ├── common.go ├── common_map.go ├── common_reduce.go ├── common_rpc.go ├── master.go ├── master_rpc.go ├── master_splitmerge.go ├── readme.go ├── schedule.go ├── test_test.go └── worker.go ├── paxos └── README.md ├── raft ├── README.md ├── config.go ├── persister.go ├── raft.go ├── test_test.go └── util.go ├── shardkv ├── README.md ├── client.go ├── common.go ├── config.go ├── server.go └── test_test.go ├── shardmaster ├── client.go ├── common.go ├── config.go ├── server.go └── test_test.go └── shardmaster2 ├── client.go ├── common.go ├── config.go ├── server.go └── test_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | *.prof 25 | 26 | .DS_Store 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 chauncy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 老司机带你飞系列 2 | 3 | 1. [《老司机带你用 Go 语言实现 MapReduce 框架》](src/mapreduce) 4 | 2. [《老司机带你用 Go 语言实现 Raft 分布式一致性协议》](src/raft) 5 | 3. [《老司机带你用 Go 语言实现 Paxos 算法》](src/paxos) 6 | 4. [《老司机带你用 Go 语言实现分布式数据库》](src/shardkv) 7 | 8 | 9 | 10 | 希望对你有帮助,你的支持是我的动力 11 | ![](https://github.com/happyer/distributed-computing/blob/master/img/alipay.JPG) -------------------------------------------------------------------------------- /img/alipay.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happyer/distributed-computing/ea8e5dcfb122d5955f454350485c773b0ff51d3d/img/alipay.JPG -------------------------------------------------------------------------------- /img/appent_rpc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happyer/distributed-computing/ea8e5dcfb122d5955f454350485c773b0ff51d3d/img/appent_rpc.png -------------------------------------------------------------------------------- /img/check.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happyer/distributed-computing/ea8e5dcfb122d5955f454350485c773b0ff51d3d/img/check.png -------------------------------------------------------------------------------- /img/ii.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happyer/distributed-computing/ea8e5dcfb122d5955f454350485c773b0ff51d3d/img/ii.png -------------------------------------------------------------------------------- /img/kv-fram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happyer/distributed-computing/ea8e5dcfb122d5955f454350485c773b0ff51d3d/img/kv-fram.png -------------------------------------------------------------------------------- /img/m.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happyer/distributed-computing/ea8e5dcfb122d5955f454350485c773b0ff51d3d/img/m.png -------------------------------------------------------------------------------- /img/qc.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happyer/distributed-computing/ea8e5dcfb122d5955f454350485c773b0ff51d3d/img/qc.jpg -------------------------------------------------------------------------------- /img/rpc_vote.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happyer/distributed-computing/ea8e5dcfb122d5955f454350485c773b0ff51d3d/img/rpc_vote.png -------------------------------------------------------------------------------- /img/rule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happyer/distributed-computing/ea8e5dcfb122d5955f454350485c773b0ff51d3d/img/rule.png -------------------------------------------------------------------------------- /img/select.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happyer/distributed-computing/ea8e5dcfb122d5955f454350485c773b0ff51d3d/img/select.png -------------------------------------------------------------------------------- /img/state.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happyer/distributed-computing/ea8e5dcfb122d5955f454350485c773b0ff51d3d/img/state.png -------------------------------------------------------------------------------- /img/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happyer/distributed-computing/ea8e5dcfb122d5955f454350485c773b0ff51d3d/img/test.png -------------------------------------------------------------------------------- /src/kvraft/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # 使用GO 语言实现一个分布式KV数据库 5 | 6 | 7 | #part1 8 | 9 | 主要是依赖之前的 分布式raft 一直性协议,这里即是写一个client 和server 来存储 或读取数据,代码又有注释这里就不在赘述了 10 | 11 | 12 | 主要提供 Get, Put Append 操作 13 | 14 | 15 | Get 操作代码 16 | 17 | 主要是通过,key 来获取相应的value, 如果key 不存在,那么会是空"" 18 | ''' 19 | 20 | 21 | func (kv *RaftKV) Get(args *GetArgs, reply *GetReply) { 22 | // Your code here. 23 | entry := Op{Kind:"Get",Key:args.Key,Id:args.Id,ReqId:args.ReqID} 24 | 25 | ok := kv.AppendEntryToLog(entry) 26 | if !ok { 27 | reply.WrongLeader = true 28 | } else { 29 | reply.WrongLeader = false 30 | 31 | reply.Err = OK 32 | kv.mu.Lock() 33 | reply.Value = kv.db[args.Key] 34 | kv.ack[args.Id] = args.ReqID 35 | //log.Printf("%d get:%v value:%s\n",kv.me,entry,reply.Value) 36 | kv.mu.Unlock() 37 | } 38 | } 39 | ''' 40 | 41 | 方法 42 | 43 | func (kv *RaftKV) AppendEntryToLog(entry Op) bool 44 | 主要调用之前 raft 那边的Start 方法,将日志记录到raft 中的log[]之中,之所以这样写是更好地方便判断该KEY 是否存在 45 | 46 | 47 | 48 | 方法Put 操作 49 | 不言而喻,将相应的key value 放入到log[] 中,当然这里为了方便用于更好地实现,这里将数据里面的值 存在 map 中 在apply 方法中是实现 50 | ''' 51 | 52 | func (kv *RaftKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) { 53 | // Your code here. 54 | entry := Op{Kind:args.Op,Key:args.Key,Value:args.Value,Id:args.Id,ReqId:args.ReqID} 55 | ok := kv.AppendEntryToLog(entry) 56 | if !ok { 57 | reply.WrongLeader = true 58 | } else { 59 | reply.WrongLeader = false 60 | reply.Err = OK 61 | } 62 | } 63 | 64 | ''' 65 | 66 | 67 | 68 | ''' 69 | func (kv *RaftKV) Apply(args Op) { 70 | switch args.Kind { 71 | case "Put": 72 | kv.db[args.Key] = args.Value 73 | case "Append": 74 | kv.db[args.Key] += args.Value 75 | } 76 | kv.ack[args.Id] = args.ReqId 77 | } 78 | 79 | ''' 80 | 81 | 82 | 由于raft sever 需要等待 raft 达成一致,在开始的时候操作 put get 的时候 需要时用 applyCh chan,避免找出死锁 83 | 84 | 85 | raft server 在启动之后需要立即返回,所以在真正实现的逻辑的时候使用go routing,通过不同的类型,不同的处理 86 | 87 | ''' 88 | 89 | go func() { 90 | for { 91 | msg := <-kv.applyCh 92 | if msg.UseSnapshot { 93 | var LastIncludedIndex int 94 | var LastIncludedTerm int 95 | 96 | r := bytes.NewBuffer(msg.Snapshot) 97 | d := gob.NewDecoder(r) 98 | 99 | kv.mu.Lock() 100 | d.Decode(&LastIncludedIndex) 101 | d.Decode(&LastIncludedTerm) 102 | kv.db = make(map[string]string) 103 | kv.ack = make(map[int64]int) 104 | d.Decode(&kv.db) 105 | d.Decode(&kv.ack) 106 | kv.mu.Unlock() 107 | } else { 108 | op := msg.Command.(Op) 109 | kv.mu.Lock() 110 | if !kv.CheckDup(op.Id,op.ReqId) { 111 | kv.Apply(op) 112 | } 113 | 114 | ch,ok := kv.result[msg.Index] 115 | if ok { 116 | select { 117 | case <-kv.result[msg.Index]: 118 | default: 119 | } 120 | ch <- op 121 | } else { 122 | kv.result[msg.Index] = make(chan Op, 1) 123 | } 124 | 125 | //need snapshot 126 | if maxraftstate != -1 && kv.rf.GetPerisistSize() > maxraftstate { 127 | w := new(bytes.Buffer) 128 | e := gob.NewEncoder(w) 129 | e.Encode(kv.db) 130 | e.Encode(kv.ack) 131 | data := w.Bytes() 132 | go kv.rf.StartSnapshot(data,msg.Index) 133 | } 134 | kv.mu.Unlock() 135 | } 136 | } 137 | }() 138 | 139 | 140 | ''' 141 | 142 | 143 | 144 | 145 | 146 | 147 | -------------------------------------------------------------------------------- /src/kvraft/client.go: -------------------------------------------------------------------------------- 1 | package raftkv 2 | 3 | import "labrpc" 4 | import "crypto/rand" 5 | import ( 6 | "math/big" 7 | "sync" 8 | ) 9 | 10 | 11 | type Clerk struct { 12 | servers []*labrpc.ClientEnd 13 | // You will have to modify this struct. 14 | id int64 15 | reqid int 16 | mu sync.Mutex 17 | } 18 | 19 | func nrand() int64 { 20 | max := big.NewInt(int64(1) << 62) 21 | bigx, _ := rand.Int(rand.Reader, max) 22 | x := bigx.Int64() 23 | return x 24 | } 25 | 26 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk { 27 | ck := new(Clerk) 28 | ck.servers = servers 29 | // You'll have to add code here. 30 | ck.id = nrand() 31 | ck.reqid = 0 32 | return ck 33 | } 34 | 35 | // 36 | // fetch the current value for a key. 37 | // returns "" if the key does not exist. 38 | // keeps trying forever in the face of all other errors. 39 | // 40 | // you can send an RPC with code like this: 41 | // ok := ck.servers[i].Call("RaftKV.Get", &args, &reply) 42 | // 43 | // the types of args and reply (including whether they are pointers) 44 | // must match the declared types of the RPC handler function's 45 | // arguments. and reply must be passed as a pointer. 46 | // 47 | func (ck *Clerk) Get(key string) string { 48 | 49 | // You will have to modify this function. 50 | var args GetArgs 51 | args.Key = key 52 | args.Id = ck.id 53 | ck.mu.Lock() 54 | args.ReqID = ck.reqid 55 | ck.reqid++ 56 | ck.mu.Unlock() 57 | for { 58 | for _,v := range ck.servers { 59 | var reply GetReply 60 | ok := v.Call("RaftKV.Get", &args, &reply) 61 | if ok && reply.WrongLeader == false { 62 | //if reply.Err == ErrNoKey { 63 | // reply.Value = "" 64 | // } 65 | return reply.Value 66 | } 67 | } 68 | } 69 | } 70 | 71 | // 72 | // shared by Put and Append. 73 | // 74 | // you can send an RPC with code like this: 75 | // ok := ck.servers[i].Call("RaftKV.PutAppend", &args, &reply) 76 | // 77 | // the types of args and reply (including whether they are pointers) 78 | // must match the declared types of the RPC handler function's 79 | // arguments. and reply must be passed as a pointer. 80 | // 81 | func (ck *Clerk) PutAppend(key string, value string, op string) { 82 | // You will have to modify this function. 83 | var args PutAppendArgs 84 | args.Key = key 85 | args.Value = value 86 | args.Op = op 87 | args.Id = ck.id 88 | ck.mu.Lock() 89 | args.ReqID = ck.reqid 90 | ck.reqid++ 91 | ck.mu.Unlock() 92 | for { 93 | for _,v := range ck.servers { 94 | var reply PutAppendReply 95 | ok := v.Call("RaftKV.PutAppend", &args, &reply) 96 | if ok && reply.WrongLeader == false { 97 | return 98 | } 99 | } 100 | } 101 | } 102 | 103 | func (ck *Clerk) Put(key string, value string) { 104 | ck.PutAppend(key, value, "Put") 105 | } 106 | func (ck *Clerk) Append(key string, value string) { 107 | ck.PutAppend(key, value, "Append") 108 | } 109 | -------------------------------------------------------------------------------- /src/kvraft/common.go: -------------------------------------------------------------------------------- 1 | package raftkv 2 | 3 | const ( 4 | OK = "OK" 5 | ErrNoKey = "ErrNoKey" 6 | ) 7 | 8 | type Err string 9 | 10 | // Put or Append 11 | type PutAppendArgs struct { 12 | // You'll have to add definitions here. 13 | Key string 14 | Value string 15 | Op string // "Put" or "Append" 16 | // You'll have to add definitions here. 17 | // Field names must start with capital letters, 18 | // otherwise RPC will break. 19 | Id int64 20 | ReqID int 21 | } 22 | 23 | type PutAppendReply struct { 24 | WrongLeader bool 25 | Err Err 26 | } 27 | 28 | type GetArgs struct { 29 | Key string 30 | // You'll have to add definitions here. 31 | Id int64 32 | ReqID int 33 | } 34 | 35 | type GetReply struct { 36 | WrongLeader bool 37 | Err Err 38 | Value string 39 | } 40 | -------------------------------------------------------------------------------- /src/kvraft/config.go: -------------------------------------------------------------------------------- 1 | package raftkv 2 | 3 | import "labrpc" 4 | import "testing" 5 | import "os" 6 | 7 | // import "log" 8 | import crand "crypto/rand" 9 | import "math/rand" 10 | import "encoding/base64" 11 | import "sync" 12 | import "runtime" 13 | import "raft" 14 | 15 | func randstring(n int) string { 16 | b := make([]byte, 2*n) 17 | crand.Read(b) 18 | s := base64.URLEncoding.EncodeToString(b) 19 | return s[0:n] 20 | } 21 | 22 | // Randomize server handles 23 | func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd { 24 | sa := make([]*labrpc.ClientEnd, len(kvh)) 25 | copy(sa, kvh) 26 | for i := range sa { 27 | j := rand.Intn(i + 1) 28 | sa[i], sa[j] = sa[j], sa[i] 29 | } 30 | return sa 31 | } 32 | 33 | type config struct { 34 | mu sync.Mutex 35 | t *testing.T 36 | tag string 37 | net *labrpc.Network 38 | n int 39 | kvservers []*RaftKV 40 | saved []*raft.Persister 41 | endnames [][]string // names of each server's sending ClientEnds 42 | clerks map[*Clerk][]string 43 | nextClientId int 44 | maxraftstate int 45 | } 46 | 47 | func (cfg *config) cleanup() { 48 | cfg.mu.Lock() 49 | defer cfg.mu.Unlock() 50 | for i := 0; i < len(cfg.kvservers); i++ { 51 | if cfg.kvservers[i] != nil { 52 | cfg.kvservers[i].Kill() 53 | } 54 | } 55 | } 56 | 57 | // Maximum log size across all servers 58 | func (cfg *config) LogSize() int { 59 | logsize := 0 60 | for i := 0; i < cfg.n; i++ { 61 | n := cfg.saved[i].RaftStateSize() 62 | if n > logsize { 63 | logsize = n 64 | } 65 | } 66 | return logsize 67 | } 68 | 69 | // attach server i to servers listed in to 70 | // caller must hold cfg.mu 71 | func (cfg *config) connectUnlocked(i int, to []int) { 72 | // log.Printf("connect peer %d to %v\n", i, to) 73 | 74 | // outgoing socket files 75 | for j := 0; j < len(to); j++ { 76 | endname := cfg.endnames[i][to[j]] 77 | cfg.net.Enable(endname, true) 78 | } 79 | 80 | // incoming socket files 81 | for j := 0; j < len(to); j++ { 82 | endname := cfg.endnames[to[j]][i] 83 | cfg.net.Enable(endname, true) 84 | } 85 | } 86 | 87 | func (cfg *config) connect(i int, to []int) { 88 | cfg.mu.Lock() 89 | defer cfg.mu.Unlock() 90 | cfg.connectUnlocked(i, to) 91 | } 92 | 93 | // detach server i from the servers listed in from 94 | // caller must hold cfg.mu 95 | func (cfg *config) disconnectUnlocked(i int, from []int) { 96 | // log.Printf("disconnect peer %d from %v\n", i, from) 97 | 98 | // outgoing socket files 99 | for j := 0; j < len(from); j++ { 100 | if cfg.endnames[i] != nil { 101 | endname := cfg.endnames[i][from[j]] 102 | cfg.net.Enable(endname, false) 103 | } 104 | } 105 | 106 | // incoming socket files 107 | for j := 0; j < len(from); j++ { 108 | if cfg.endnames[j] != nil { 109 | endname := cfg.endnames[from[j]][i] 110 | cfg.net.Enable(endname, false) 111 | } 112 | } 113 | } 114 | 115 | func (cfg *config) disconnect(i int, from []int) { 116 | cfg.mu.Lock() 117 | defer cfg.mu.Unlock() 118 | cfg.disconnectUnlocked(i, from) 119 | } 120 | 121 | func (cfg *config) All() []int { 122 | all := make([]int, cfg.n) 123 | for i := 0; i < cfg.n; i++ { 124 | all[i] = i 125 | } 126 | return all 127 | } 128 | 129 | func (cfg *config) ConnectAll() { 130 | cfg.mu.Lock() 131 | defer cfg.mu.Unlock() 132 | for i := 0; i < cfg.n; i++ { 133 | cfg.connectUnlocked(i, cfg.All()) 134 | } 135 | } 136 | 137 | // Sets up 2 partitions with connectivity between servers in each partition. 138 | func (cfg *config) partition(p1 []int, p2 []int) { 139 | cfg.mu.Lock() 140 | defer cfg.mu.Unlock() 141 | // log.Printf("partition servers into: %v %v\n", p1, p2) 142 | for i := 0; i < len(p1); i++ { 143 | cfg.disconnectUnlocked(p1[i], p2) 144 | cfg.connectUnlocked(p1[i], p1) 145 | } 146 | for i := 0; i < len(p2); i++ { 147 | cfg.disconnectUnlocked(p2[i], p1) 148 | cfg.connectUnlocked(p2[i], p2) 149 | } 150 | } 151 | 152 | // Create a clerk with clerk specific server names. 153 | // Give it connections to all of the servers, but for 154 | // now enable only connections to servers in to[]. 155 | func (cfg *config) makeClient(to []int) *Clerk { 156 | cfg.mu.Lock() 157 | defer cfg.mu.Unlock() 158 | 159 | // a fresh set of ClientEnds. 160 | ends := make([]*labrpc.ClientEnd, cfg.n) 161 | endnames := make([]string, cfg.n) 162 | for j := 0; j < cfg.n; j++ { 163 | endnames[j] = randstring(20) 164 | ends[j] = cfg.net.MakeEnd(endnames[j]) 165 | cfg.net.Connect(endnames[j], j) 166 | } 167 | 168 | ck := MakeClerk(random_handles(ends)) 169 | cfg.clerks[ck] = endnames 170 | cfg.nextClientId++ 171 | cfg.ConnectClientUnlocked(ck, to) 172 | return ck 173 | } 174 | 175 | func (cfg *config) deleteClient(ck *Clerk) { 176 | cfg.mu.Lock() 177 | defer cfg.mu.Unlock() 178 | 179 | v := cfg.clerks[ck] 180 | for i := 0; i < len(v); i++ { 181 | os.Remove(v[i]) 182 | } 183 | delete(cfg.clerks, ck) 184 | } 185 | 186 | // caller should hold cfg.mu 187 | func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) { 188 | // log.Printf("ConnectClient %v to %v\n", ck, to) 189 | endnames := cfg.clerks[ck] 190 | for j := 0; j < len(to); j++ { 191 | s := endnames[to[j]] 192 | cfg.net.Enable(s, true) 193 | } 194 | } 195 | 196 | func (cfg *config) ConnectClient(ck *Clerk, to []int) { 197 | cfg.mu.Lock() 198 | defer cfg.mu.Unlock() 199 | cfg.ConnectClientUnlocked(ck, to) 200 | } 201 | 202 | // caller should hold cfg.mu 203 | func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) { 204 | // log.Printf("DisconnectClient %v from %v\n", ck, from) 205 | endnames := cfg.clerks[ck] 206 | for j := 0; j < len(from); j++ { 207 | s := endnames[from[j]] 208 | cfg.net.Enable(s, false) 209 | } 210 | } 211 | 212 | func (cfg *config) DisconnectClient(ck *Clerk, from []int) { 213 | cfg.mu.Lock() 214 | defer cfg.mu.Unlock() 215 | cfg.DisconnectClientUnlocked(ck, from) 216 | } 217 | 218 | // Shutdown a server by isolating it 219 | func (cfg *config) ShutdownServer(i int) { 220 | cfg.mu.Lock() 221 | defer cfg.mu.Unlock() 222 | 223 | cfg.disconnectUnlocked(i, cfg.All()) 224 | 225 | // disable client connections to the server. 226 | // it's important to do this before creating 227 | // the new Persister in saved[i], to avoid 228 | // the possibility of the server returning a 229 | // positive reply to an Append but persisting 230 | // the result in the superseded Persister. 231 | cfg.net.DeleteServer(i) 232 | 233 | // a fresh persister, in case old instance 234 | // continues to update the Persister. 235 | // but copy old persister's content so that we always 236 | // pass Make() the last persisted state. 237 | if cfg.saved[i] != nil { 238 | cfg.saved[i] = cfg.saved[i].Copy() 239 | } 240 | 241 | kv := cfg.kvservers[i] 242 | if kv != nil { 243 | cfg.mu.Unlock() 244 | kv.Kill() 245 | cfg.mu.Lock() 246 | cfg.kvservers[i] = nil 247 | } 248 | } 249 | 250 | // If restart servers, first call ShutdownServer 251 | func (cfg *config) StartServer(i int) { 252 | cfg.mu.Lock() 253 | 254 | // a fresh set of outgoing ClientEnd names. 255 | cfg.endnames[i] = make([]string, cfg.n) 256 | for j := 0; j < cfg.n; j++ { 257 | cfg.endnames[i][j] = randstring(20) 258 | } 259 | 260 | // a fresh set of ClientEnds. 261 | ends := make([]*labrpc.ClientEnd, cfg.n) 262 | for j := 0; j < cfg.n; j++ { 263 | ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j]) 264 | cfg.net.Connect(cfg.endnames[i][j], j) 265 | } 266 | 267 | // a fresh persister, so old instance doesn't overwrite 268 | // new instance's persisted state. 269 | // give the fresh persister a copy of the old persister's 270 | // state, so that the spec is that we pass StartKVServer() 271 | // the last persisted state. 272 | if cfg.saved[i] != nil { 273 | cfg.saved[i] = cfg.saved[i].Copy() 274 | } else { 275 | cfg.saved[i] = raft.MakePersister() 276 | } 277 | cfg.mu.Unlock() 278 | 279 | cfg.kvservers[i] = StartKVServer(ends, i, cfg.saved[i], cfg.maxraftstate) 280 | 281 | kvsvc := labrpc.MakeService(cfg.kvservers[i]) 282 | rfsvc := labrpc.MakeService(cfg.kvservers[i].rf) 283 | srv := labrpc.MakeServer() 284 | srv.AddService(kvsvc) 285 | srv.AddService(rfsvc) 286 | cfg.net.AddServer(i, srv) 287 | } 288 | 289 | func (cfg *config) Leader() (bool, int) { 290 | cfg.mu.Lock() 291 | defer cfg.mu.Unlock() 292 | 293 | for i := 0; i < cfg.n; i++ { 294 | _, is_leader := cfg.kvservers[i].rf.GetState() 295 | if is_leader { 296 | return true, i 297 | } 298 | } 299 | return false, 0 300 | } 301 | 302 | // Partition servers into 2 groups and put current leader in minority 303 | func (cfg *config) make_partition() ([]int, []int) { 304 | _, l := cfg.Leader() 305 | p1 := make([]int, cfg.n/2+1) 306 | p2 := make([]int, cfg.n/2) 307 | j := 0 308 | for i := 0; i < cfg.n; i++ { 309 | if i != l { 310 | if j < len(p1) { 311 | p1[j] = i 312 | } else { 313 | p2[j-len(p1)] = i 314 | } 315 | j++ 316 | } 317 | } 318 | p2[len(p2)-1] = l 319 | return p1, p2 320 | } 321 | 322 | func make_config(t *testing.T, tag string, n int, unreliable bool, maxraftstate int) *config { 323 | runtime.GOMAXPROCS(4) 324 | cfg := &config{} 325 | cfg.t = t 326 | cfg.tag = tag 327 | cfg.net = labrpc.MakeNetwork() 328 | cfg.n = n 329 | cfg.kvservers = make([]*RaftKV, cfg.n) 330 | cfg.saved = make([]*raft.Persister, cfg.n) 331 | cfg.endnames = make([][]string, cfg.n) 332 | cfg.clerks = make(map[*Clerk][]string) 333 | cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid 334 | cfg.maxraftstate = maxraftstate 335 | 336 | // create a full set of KV servers. 337 | for i := 0; i < cfg.n; i++ { 338 | cfg.StartServer(i) 339 | } 340 | 341 | cfg.ConnectAll() 342 | 343 | cfg.net.Reliable(!unreliable) 344 | 345 | return cfg 346 | } 347 | -------------------------------------------------------------------------------- /src/kvraft/server.go: -------------------------------------------------------------------------------- 1 | package raftkv 2 | 3 | import ( 4 | "encoding/gob" 5 | "labrpc" 6 | "log" 7 | "raft" 8 | "sync" 9 | "time" 10 | "bytes" 11 | ) 12 | 13 | const Debug = 0 14 | 15 | func DPrintf(format string, a ...interface{}) (n int, err error) { 16 | if Debug > 0 { 17 | log.Printf(format, a...) 18 | } 19 | return 20 | } 21 | 22 | 23 | type Op struct { 24 | // Your definitions here. 25 | // Field names must start with capital letters, 26 | // otherwise RPC will break. 27 | Kind string //"Put" or "Append" "Get" 28 | Key string 29 | Value string 30 | Id int64 31 | ReqId int 32 | } 33 | 34 | type RaftKV struct { 35 | mu sync.Mutex 36 | me int 37 | rf *raft.Raft 38 | applyCh chan raft.ApplyMsg 39 | 40 | maxraftstate int // snapshot if log grows this big 41 | 42 | // Your definitions here. 43 | db map[string]string 44 | ack map[int64]int 45 | result map[int]chan Op 46 | } 47 | 48 | 49 | func (kv *RaftKV) AppendEntryToLog(entry Op) bool { 50 | index, _, isLeader := kv.rf.Start(entry) 51 | if !isLeader { 52 | return false 53 | } 54 | 55 | kv.mu.Lock() 56 | ch,ok := kv.result[index] 57 | if !ok { 58 | ch = make(chan Op,1) 59 | kv.result[index] = ch 60 | } 61 | kv.mu.Unlock() 62 | select { 63 | case op := <-ch: 64 | return op == entry 65 | case <-time.After(1000 * time.Millisecond): 66 | //log.Printf("timeout\n") 67 | return false 68 | } 69 | } 70 | 71 | 72 | func (kv *RaftKV) CheckDup(id int64,reqid int) bool { 73 | //kv.mu.Lock() 74 | //defer kv.mu.Unlock() 75 | v,ok := kv.ack[id] 76 | if ok { 77 | return v >= reqid 78 | } 79 | return false 80 | } 81 | 82 | func (kv *RaftKV) Get(args *GetArgs, reply *GetReply) { 83 | // Your code here. 84 | entry := Op{Kind:"Get",Key:args.Key,Id:args.Id,ReqId:args.ReqID} 85 | 86 | ok := kv.AppendEntryToLog(entry) 87 | if !ok { 88 | reply.WrongLeader = true 89 | } else { 90 | reply.WrongLeader = false 91 | 92 | reply.Err = OK 93 | kv.mu.Lock() 94 | reply.Value = kv.db[args.Key] 95 | kv.ack[args.Id] = args.ReqID 96 | //log.Printf("%d get:%v value:%s\n",kv.me,entry,reply.Value) 97 | kv.mu.Unlock() 98 | } 99 | } 100 | 101 | func (kv *RaftKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) { 102 | // Your code here. 103 | entry := Op{Kind:args.Op,Key:args.Key,Value:args.Value,Id:args.Id,ReqId:args.ReqID} 104 | ok := kv.AppendEntryToLog(entry) 105 | if !ok { 106 | reply.WrongLeader = true 107 | } else { 108 | reply.WrongLeader = false 109 | reply.Err = OK 110 | } 111 | } 112 | 113 | func (kv *RaftKV) Apply(args Op) { 114 | switch args.Kind { 115 | case "Put": 116 | kv.db[args.Key] = args.Value 117 | case "Append": 118 | kv.db[args.Key] += args.Value 119 | } 120 | kv.ack[args.Id] = args.ReqId 121 | } 122 | 123 | // 124 | // the tester calls Kill() when a RaftKV instance won't 125 | // be needed again. you are not required to do anything 126 | // in Kill(), but it might be convenient to (for example) 127 | // turn off debug output from this instance. 128 | // 129 | func (kv *RaftKV) Kill() { 130 | kv.rf.Kill() 131 | // Your code here, if desired. 132 | } 133 | 134 | // 135 | // servers[] contains the ports of the set of 136 | // servers that will cooperate via Raft to 137 | // form the fault-tolerant key/value service. 138 | // me is the index of the current server in servers[]. 139 | // the k/v server should store snapshots with persister.SaveSnapshot(), 140 | // and Raft should save its state (including log) with persister.SaveRaftState(). 141 | // the k/v server should snapshot when Raft's saved state exceeds maxraftstate bytes, 142 | // in order to allow Raft to garbage-collect its log. if maxraftstate is -1, 143 | // you don't need to snapshot. 144 | // StartKVServer() must return quickly, so it should start goroutines 145 | // for any long-running work. 146 | // 147 | func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *RaftKV { 148 | // call gob.Register on structures you want 149 | // Go's RPC library to marshall/unmarshall. 150 | gob.Register(Op{}) 151 | 152 | kv := new(RaftKV) 153 | kv.me = me 154 | kv.maxraftstate = maxraftstate 155 | 156 | // Your initialization code here. 157 | 158 | kv.applyCh = make(chan raft.ApplyMsg) 159 | kv.rf = raft.Make(servers, me, persister, kv.applyCh) 160 | 161 | 162 | kv.db = make(map[string]string) 163 | kv.ack = make(map[int64]int) 164 | kv.result = make(map[int]chan Op) 165 | 166 | go func() { 167 | for { 168 | msg := <-kv.applyCh 169 | if msg.UseSnapshot { 170 | var LastIncludedIndex int 171 | var LastIncludedTerm int 172 | 173 | r := bytes.NewBuffer(msg.Snapshot) 174 | d := gob.NewDecoder(r) 175 | 176 | kv.mu.Lock() 177 | d.Decode(&LastIncludedIndex) 178 | d.Decode(&LastIncludedTerm) 179 | kv.db = make(map[string]string) 180 | kv.ack = make(map[int64]int) 181 | d.Decode(&kv.db) 182 | d.Decode(&kv.ack) 183 | kv.mu.Unlock() 184 | } else { 185 | op := msg.Command.(Op) 186 | kv.mu.Lock() 187 | if !kv.CheckDup(op.Id,op.ReqId) { 188 | kv.Apply(op) 189 | } 190 | 191 | ch,ok := kv.result[msg.Index] 192 | if ok { 193 | select { 194 | case <-kv.result[msg.Index]: 195 | default: 196 | } 197 | ch <- op 198 | } else { 199 | kv.result[msg.Index] = make(chan Op, 1) 200 | } 201 | 202 | //need snapshot 203 | if maxraftstate != -1 && kv.rf.GetPerisistSize() > maxraftstate { 204 | w := new(bytes.Buffer) 205 | e := gob.NewEncoder(w) 206 | e.Encode(kv.db) 207 | e.Encode(kv.ack) 208 | data := w.Bytes() 209 | go kv.rf.StartSnapshot(data,msg.Index) 210 | } 211 | kv.mu.Unlock() 212 | } 213 | } 214 | }() 215 | 216 | return kv 217 | } 218 | -------------------------------------------------------------------------------- /src/kvraft/test_test.go: -------------------------------------------------------------------------------- 1 | package raftkv 2 | 3 | import "testing" 4 | import "strconv" 5 | import "time" 6 | import "fmt" 7 | import "math/rand" 8 | import "log" 9 | import "strings" 10 | import "sync/atomic" 11 | 12 | // The tester generously allows solutions to complete elections in one second 13 | // (much more than the paper's range of timeouts). 14 | const electionTimeout = 1 * time.Second 15 | 16 | func check(t *testing.T, ck *Clerk, key string, value string) { 17 | v := ck.Get(key) 18 | if v != value { 19 | t.Fatalf("Get(%v): expected:\n%v\nreceived:\n%v", key, value, v) 20 | } 21 | } 22 | 23 | // a client runs the function f and then signals it is done 24 | func run_client(t *testing.T, cfg *config, me int, ca chan bool, fn func(me int, ck *Clerk, t *testing.T)) { 25 | ok := false 26 | defer func() { ca <- ok }() 27 | ck := cfg.makeClient(cfg.All()) 28 | fn(me, ck, t) 29 | ok = true 30 | cfg.deleteClient(ck) 31 | } 32 | 33 | // spawn ncli clients and wait until they are all done 34 | func spawn_clients_and_wait(t *testing.T, cfg *config, ncli int, fn func(me int, ck *Clerk, t *testing.T)) { 35 | ca := make([]chan bool, ncli) 36 | for cli := 0; cli < ncli; cli++ { 37 | ca[cli] = make(chan bool) 38 | go run_client(t, cfg, cli, ca[cli], fn) 39 | } 40 | // log.Printf("spawn_clients_and_wait: waiting for clients") 41 | for cli := 0; cli < ncli; cli++ { 42 | ok := <-ca[cli] 43 | // log.Printf("spawn_clients_and_wait: client %d is done\n", cli) 44 | if ok == false { 45 | t.Fatalf("failure") 46 | } 47 | } 48 | } 49 | 50 | // predict effect of Append(k, val) if old value is prev. 51 | func NextValue(prev string, val string) string { 52 | return prev + val 53 | } 54 | 55 | // check that for a specific client all known appends are present in a value, 56 | // and in order 57 | func checkClntAppends(t *testing.T, clnt int, v string, count int) { 58 | lastoff := -1 59 | for j := 0; j < count; j++ { 60 | wanted := "x " + strconv.Itoa(clnt) + " " + strconv.Itoa(j) + " y" 61 | off := strings.Index(v, wanted) 62 | if off < 0 { 63 | t.Fatalf("%v missing element %v in Append result %v", clnt, wanted, v) 64 | } 65 | off1 := strings.LastIndex(v, wanted) 66 | if off1 != off { 67 | fmt.Printf("off1 %v off %v\n", off1, off) 68 | t.Fatalf("duplicate element %v in Append result", wanted) 69 | } 70 | if off <= lastoff { 71 | t.Fatalf("wrong order for element %v in Append result", wanted) 72 | } 73 | lastoff = off 74 | } 75 | } 76 | 77 | // check that all known appends are present in a value, 78 | // and are in order for each concurrent client. 79 | func checkConcurrentAppends(t *testing.T, v string, counts []int) { 80 | nclients := len(counts) 81 | for i := 0; i < nclients; i++ { 82 | lastoff := -1 83 | for j := 0; j < counts[i]; j++ { 84 | wanted := "x " + strconv.Itoa(i) + " " + strconv.Itoa(j) + " y" 85 | off := strings.Index(v, wanted) 86 | if off < 0 { 87 | t.Fatalf("%v missing element %v in Append result %v", i, wanted, v) 88 | } 89 | off1 := strings.LastIndex(v, wanted) 90 | if off1 != off { 91 | t.Fatalf("duplicate element %v in Append result", wanted) 92 | } 93 | if off <= lastoff { 94 | t.Fatalf("wrong order for element %v in Append result", wanted) 95 | } 96 | lastoff = off 97 | } 98 | } 99 | } 100 | 101 | // repartition the servers periodically 102 | func partitioner(t *testing.T, cfg *config, ch chan bool, done *int32) { 103 | defer func() { ch <- true }() 104 | for atomic.LoadInt32(done) == 0 { 105 | a := make([]int, cfg.n) 106 | for i := 0; i < cfg.n; i++ { 107 | a[i] = (rand.Int() % 2) 108 | } 109 | pa := make([][]int, 2) 110 | for i := 0; i < 2; i++ { 111 | pa[i] = make([]int, 0) 112 | for j := 0; j < cfg.n; j++ { 113 | if a[j] == i { 114 | pa[i] = append(pa[i], j) 115 | } 116 | } 117 | } 118 | cfg.partition(pa[0], pa[1]) 119 | time.Sleep(electionTimeout + time.Duration(rand.Int63()%200)*time.Millisecond) 120 | } 121 | } 122 | 123 | // Basic test is as follows: one or more clients submitting Append/Get 124 | // operations to set of servers for some period of time. After the period is 125 | // over, test checks that all appended values are present and in order for a 126 | // particular key. If unreliable is set, RPCs may fail. If crash is set, the 127 | // servers crash after the period is over and restart. If partitions is set, 128 | // the test repartitions the network concurrently with the clients and servers. If 129 | // maxraftstate is a positive number, the size of the state for Raft (i.e., log 130 | // size) shouldn't exceed 2*maxraftstate. 131 | func GenericTest(t *testing.T, tag string, nclients int, unreliable bool, crash bool, partitions bool, maxraftstate int) { 132 | const nservers = 5 133 | cfg := make_config(t, tag, nservers, unreliable, maxraftstate) 134 | defer cfg.cleanup() 135 | 136 | ck := cfg.makeClient(cfg.All()) 137 | 138 | done_partitioner := int32(0) 139 | done_clients := int32(0) 140 | ch_partitioner := make(chan bool) 141 | clnts := make([]chan int, nclients) 142 | for i := 0; i < nclients; i++ { 143 | clnts[i] = make(chan int) 144 | } 145 | for i := 0; i < 3; i++ { 146 | // log.Printf("Iteration %v\n", i) 147 | atomic.StoreInt32(&done_clients, 0) 148 | atomic.StoreInt32(&done_partitioner, 0) 149 | go spawn_clients_and_wait(t, cfg, nclients, func(cli int, myck *Clerk, t *testing.T) { 150 | j := 0 151 | defer func() { 152 | clnts[cli] <- j 153 | }() 154 | last := "" 155 | key := strconv.Itoa(cli) 156 | myck.Put(key, last) 157 | for atomic.LoadInt32(&done_clients) == 0 { 158 | if (rand.Int() % 1000) < 500 { 159 | nv := "x " + strconv.Itoa(cli) + " " + strconv.Itoa(j) + " y" 160 | // log.Printf("%d: client new append %v\n", cli, nv) 161 | myck.Append(key, nv) 162 | last = NextValue(last, nv) 163 | j++ 164 | } else { 165 | // log.Printf("%d: client new get %v\n", cli, key) 166 | v := myck.Get(key) 167 | if v != last { 168 | log.Fatalf("get wrong value, key %v, wanted:\n%v\n, got\n%v\n", key, last, v) 169 | } 170 | } 171 | } 172 | }) 173 | 174 | if partitions { 175 | // Allow the clients to perform some operations without interruption 176 | time.Sleep(1 * time.Second) 177 | go partitioner(t, cfg, ch_partitioner, &done_partitioner) 178 | } 179 | time.Sleep(5 * time.Second) 180 | 181 | atomic.StoreInt32(&done_clients, 1) // tell clients to quit 182 | atomic.StoreInt32(&done_partitioner, 1) // tell partitioner to quit 183 | 184 | if partitions { 185 | // log.Printf("wait for partitioner\n") 186 | <-ch_partitioner 187 | // reconnect network and submit a request. A client may 188 | // have submitted a request in a minority. That request 189 | // won't return until that server discovers a new term 190 | // has started. 191 | cfg.ConnectAll() 192 | // wait for a while so that we have a new term 193 | time.Sleep(electionTimeout) 194 | } 195 | 196 | if crash { 197 | // log.Printf("shutdown servers\n") 198 | for i := 0; i < nservers; i++ { 199 | cfg.ShutdownServer(i) 200 | } 201 | // Wait for a while for servers to shutdown, since 202 | // shutdown isn't a real crash and isn't instantaneous 203 | time.Sleep(electionTimeout) 204 | // log.Printf("restart servers\n") 205 | // crash and re-start all 206 | for i := 0; i < nservers; i++ { 207 | cfg.StartServer(i) 208 | } 209 | cfg.ConnectAll() 210 | } 211 | 212 | // log.Printf("wait for clients\n") 213 | for i := 0; i < nclients; i++ { 214 | // log.Printf("read from clients %d\n", i) 215 | j := <-clnts[i] 216 | if j < 10 { 217 | log.Printf("Warning: client %d managed to perform only %d put operations in 1 sec?\n", i, j) 218 | } 219 | key := strconv.Itoa(i) 220 | // log.Printf("Check %v for client %d\n", j, i) 221 | v := ck.Get(key) 222 | checkClntAppends(t, i, v, j) 223 | } 224 | 225 | if maxraftstate > 0 { 226 | // Check maximum after the servers have processed all client 227 | // requests and had time to checkpoint 228 | if cfg.LogSize() > 2*maxraftstate { 229 | t.Fatalf("logs were not trimmed (%v > 2*%v)", cfg.LogSize(), maxraftstate) 230 | } 231 | } 232 | } 233 | 234 | fmt.Printf(" ... Passed\n") 235 | } 236 | 237 | func TestBasic(t *testing.T) { 238 | fmt.Printf("Test: One client ...\n") 239 | GenericTest(t, "basic", 1, false, false, false, -1) 240 | } 241 | 242 | func TestConcurrent(t *testing.T) { 243 | fmt.Printf("Test: concurrent clients ...\n") 244 | GenericTest(t, "concur", 5, false, false, false, -1) 245 | } 246 | 247 | func TestUnreliable(t *testing.T) { 248 | fmt.Printf("Test: unreliable ...\n") 249 | GenericTest(t, "unreliable", 5, true, false, false, -1) 250 | } 251 | 252 | func TestUnreliableOneKey(t *testing.T) { 253 | const nservers = 3 254 | cfg := make_config(t, "onekey", nservers, true, -1) 255 | defer cfg.cleanup() 256 | 257 | ck := cfg.makeClient(cfg.All()) 258 | 259 | fmt.Printf("Test: Concurrent Append to same key, unreliable ...\n") 260 | 261 | ck.Put("k", "") 262 | 263 | const nclient = 5 264 | const upto = 10 265 | spawn_clients_and_wait(t, cfg, nclient, func(me int, myck *Clerk, t *testing.T) { 266 | n := 0 267 | for n < upto { 268 | myck.Append("k", "x "+strconv.Itoa(me)+" "+strconv.Itoa(n)+" y") 269 | n++ 270 | } 271 | }) 272 | 273 | var counts []int 274 | for i := 0; i < nclient; i++ { 275 | counts = append(counts, upto) 276 | } 277 | 278 | vx := ck.Get("k") 279 | checkConcurrentAppends(t, vx, counts) 280 | 281 | fmt.Printf(" ... Passed\n") 282 | } 283 | 284 | // Submit a request in the minority partition and check that the requests 285 | // doesn't go through until the partition heals. The leader in the original 286 | // network ends up in the minority partition. 287 | func TestOnePartition(t *testing.T) { 288 | const nservers = 5 289 | cfg := make_config(t, "partition", nservers, false, -1) 290 | defer cfg.cleanup() 291 | ck := cfg.makeClient(cfg.All()) 292 | 293 | ck.Put("1", "13") 294 | 295 | fmt.Printf("Test: Progress in majority ...\n") 296 | 297 | p1, p2 := cfg.make_partition() 298 | cfg.partition(p1, p2) 299 | 300 | ckp1 := cfg.makeClient(p1) // connect ckp1 to p1 301 | ckp2a := cfg.makeClient(p2) // connect ckp2a to p2 302 | ckp2b := cfg.makeClient(p2) // connect ckp2b to p2 303 | 304 | ckp1.Put("1", "14") 305 | check(t, ckp1, "1", "14") 306 | 307 | fmt.Printf(" ... Passed\n") 308 | 309 | done0 := make(chan bool) 310 | done1 := make(chan bool) 311 | 312 | fmt.Printf("Test: No progress in minority ...\n") 313 | go func() { 314 | ckp2a.Put("1", "15") 315 | done0 <- true 316 | }() 317 | go func() { 318 | ckp2b.Get("1") // different clerk in p2 319 | done1 <- true 320 | }() 321 | 322 | select { 323 | case <-done0: 324 | t.Fatalf("Put in minority completed") 325 | case <-done1: 326 | t.Fatalf("Get in minority completed") 327 | case <-time.After(time.Second): 328 | } 329 | 330 | check(t, ckp1, "1", "14") 331 | ckp1.Put("1", "16") 332 | check(t, ckp1, "1", "16") 333 | 334 | fmt.Printf(" ... Passed\n") 335 | 336 | fmt.Printf("Test: Completion after heal ...\n") 337 | 338 | cfg.ConnectAll() 339 | cfg.ConnectClient(ckp2a, cfg.All()) 340 | cfg.ConnectClient(ckp2b, cfg.All()) 341 | 342 | time.Sleep(electionTimeout) 343 | 344 | select { 345 | case <-done0: 346 | case <-time.After(30 * 100 * time.Millisecond): 347 | t.Fatalf("Put did not complete") 348 | } 349 | 350 | select { 351 | case <-done1: 352 | case <-time.After(30 * 100 * time.Millisecond): 353 | t.Fatalf("Get did not complete") 354 | default: 355 | } 356 | 357 | check(t, ck, "1", "15") 358 | 359 | fmt.Printf(" ... Passed\n") 360 | } 361 | 362 | func TestManyPartitionsOneClient(t *testing.T) { 363 | fmt.Printf("Test: many partitions ...\n") 364 | GenericTest(t, "manypartitions", 1, false, false, true, -1) 365 | } 366 | 367 | func TestManyPartitionsManyClients(t *testing.T) { 368 | fmt.Printf("Test: many partitions, many clients ...\n") 369 | GenericTest(t, "manypartitionsclnts", 5, false, false, true, -1) 370 | } 371 | 372 | func TestPersistOneClient(t *testing.T) { 373 | fmt.Printf("Test: persistence with one client ...\n") 374 | GenericTest(t, "persistone", 1, false, true, false, -1) 375 | } 376 | 377 | func TestPersistConcurrent(t *testing.T) { 378 | fmt.Printf("Test: persistence with concurrent clients ...\n") 379 | GenericTest(t, "persistconcur", 5, false, true, false, -1) 380 | } 381 | 382 | func TestPersistConcurrentUnreliable(t *testing.T) { 383 | fmt.Printf("Test: persistence with concurrent clients, unreliable ...\n") 384 | GenericTest(t, "persistconcurunreliable", 5, true, true, false, -1) 385 | } 386 | 387 | func TestPersistPartition(t *testing.T) { 388 | fmt.Printf("Test: persistence with concurrent clients and repartitioning servers...\n") 389 | GenericTest(t, "persistpart", 5, false, true, true, -1) 390 | } 391 | 392 | func TestPersistPartitionUnreliable(t *testing.T) { 393 | fmt.Printf("Test: persistence with concurrent clients and repartitioning servers, unreliable...\n") 394 | GenericTest(t, "persistpartunreliable", 5, true, true, true, -1) 395 | } 396 | 397 | // 398 | // if one server falls behind, then rejoins, does it 399 | // recover by using the InstallSnapshot RPC? 400 | // also checks that majority discards committed log entries 401 | // even if minority doesn't respond. 402 | // 403 | func TestSnapshotRPC(t *testing.T) { 404 | const nservers = 3 405 | maxraftstate := 1000 406 | cfg := make_config(t, "snapshotrpc", nservers, false, maxraftstate) 407 | defer cfg.cleanup() 408 | 409 | ck := cfg.makeClient(cfg.All()) 410 | 411 | fmt.Printf("Test: InstallSnapshot RPC ...\n") 412 | 413 | ck.Put("a", "A") 414 | check(t, ck, "a", "A") 415 | 416 | // a bunch of puts into the majority partition. 417 | cfg.partition([]int{0, 1}, []int{2}) 418 | { 419 | ck1 := cfg.makeClient([]int{0, 1}) 420 | for i := 0; i < 50; i++ { 421 | ck1.Put(strconv.Itoa(i), strconv.Itoa(i)) 422 | } 423 | time.Sleep(electionTimeout) 424 | ck1.Put("b", "B") 425 | } 426 | 427 | // check that the majority partition has thrown away 428 | // most of its log entries. 429 | if cfg.LogSize() > 2*maxraftstate { 430 | t.Fatalf("logs were not trimmed (%v > 2*%v)", cfg.LogSize(), maxraftstate) 431 | } 432 | 433 | // now make group that requires participation of 434 | // lagging server, so that it has to catch up. 435 | cfg.partition([]int{0, 2}, []int{1}) 436 | { 437 | ck1 := cfg.makeClient([]int{0, 2}) 438 | ck1.Put("c", "C") 439 | ck1.Put("d", "D") 440 | check(t, ck1, "a", "A") 441 | check(t, ck1, "b", "B") 442 | check(t, ck1, "1", "1") 443 | check(t, ck1, "49", "49") 444 | } 445 | 446 | // now everybody 447 | cfg.partition([]int{0, 1, 2}, []int{}) 448 | 449 | ck.Put("e", "E") 450 | check(t, ck, "c", "C") 451 | check(t, ck, "e", "E") 452 | check(t, ck, "1", "1") 453 | 454 | fmt.Printf(" ... Passed\n") 455 | } 456 | 457 | func TestSnapshotRecover(t *testing.T) { 458 | fmt.Printf("Test: persistence with one client and snapshots ...\n") 459 | GenericTest(t, "snapshot", 1, false, true, false, 1000) 460 | } 461 | 462 | func TestSnapshotRecoverManyClients(t *testing.T) { 463 | fmt.Printf("Test: persistence with several clients and snapshots ...\n") 464 | GenericTest(t, "snapshotunreliable", 20, false, true, false, 1000) 465 | } 466 | 467 | func TestSnapshotUnreliable(t *testing.T) { 468 | fmt.Printf("Test: persistence with several clients, snapshots, unreliable ...\n") 469 | GenericTest(t, "snapshotunreliable", 5, true, false, false, 1000) 470 | } 471 | 472 | func TestSnapshotUnreliableRecover(t *testing.T) { 473 | fmt.Printf("Test: persistence with several clients, failures, and snapshots, unreliable ...\n") 474 | GenericTest(t, "snapshotunreliablecrash", 5, true, true, false, 1000) 475 | } 476 | 477 | func TestSnapshotUnreliableRecoverConcurrentPartition(t *testing.T) { 478 | fmt.Printf("Test: persistence with several clients, failures, and snapshots, unreliable and partitions ...\n") 479 | GenericTest(t, "snapshotunreliableconcurpartitions", 5, true, true, true, 1000) 480 | } 481 | -------------------------------------------------------------------------------- /src/labrpc/labrpc.go: -------------------------------------------------------------------------------- 1 | package labrpc 2 | 3 | // 4 | // channel-based RPC, for 824 labs. 5 | // allows tests to disconnect RPC connections. 6 | // 7 | // we will use the original labrpc.go to test your code for grading. 8 | // so, while you can modify this code to help you debug, please 9 | // test against the original before submitting. 10 | // 11 | // adapted from Go net/rpc/server.go. 12 | // 13 | // sends gob-encoded values to ensure that RPCs 14 | // don't include references to program objects. 15 | // 16 | // net := MakeNetwork() -- holds network, clients, servers. 17 | // end := net.MakeEnd(endname) -- create a client end-point, to talk to one server. 18 | // net.AddServer(servername, server) -- adds a named server to network. 19 | // net.DeleteServer(servername) -- eliminate the named server. 20 | // net.Connect(endname, servername) -- connect a client to a server. 21 | // net.Enable(endname, enabled) -- enable/disable a client. 22 | // net.Reliable(bool) -- false means drop/delay messages 23 | // 24 | // end.Call("Raft.AppendEntries", &args, &reply) -- send an RPC, wait for reply. 25 | // the "Raft" is the name of the server struct to be called. 26 | // the "AppendEntries" is the name of the method to be called. 27 | // Call() returns true to indicate that the server executed the request 28 | // and the reply is valid. 29 | // Call() returns false if the network lost the request or reply 30 | // or the server is down. 31 | // It is OK to have multiple Call()s in progress at the same time on the 32 | // same ClientEnd. 33 | // Concurrent calls to Call() may be delivered to the server out of order, 34 | // since the network may re-order messages. 35 | // Call() is guaranteed to return (perhaps after a delay) *except* if the 36 | // handler function on the server side does not return. That is, there 37 | // is no need to implement your own timeouts around Call(). 38 | // the server RPC handler function must declare its args and reply arguments 39 | // as pointers, so that their types exactly match the types of the arguments 40 | // to Call(). 41 | // 42 | // srv := MakeServer() 43 | // srv.AddService(svc) -- a server can have multiple services, e.g. Raft and k/v 44 | // pass srv to net.AddServer() 45 | // 46 | // svc := MakeService(receiverObject) -- obj's methods will handle RPCs 47 | // much like Go's rpcs.Register() 48 | // pass svc to srv.AddService() 49 | // 50 | 51 | import "encoding/gob" 52 | import "bytes" 53 | import "reflect" 54 | import "sync" 55 | import "log" 56 | import "strings" 57 | import "math/rand" 58 | import "time" 59 | 60 | type reqMsg struct { 61 | endname interface{} // name of sending ClientEnd 62 | svcMeth string // e.g. "Raft.AppendEntries" 63 | argsType reflect.Type 64 | args []byte 65 | replyCh chan replyMsg 66 | } 67 | 68 | type replyMsg struct { 69 | ok bool 70 | reply []byte 71 | } 72 | 73 | type ClientEnd struct { 74 | endname interface{} // this end-point's name 75 | ch chan reqMsg // copy of Network.endCh 76 | } 77 | 78 | // send an RPC, wait for the reply. 79 | // the return value indicates success; false means the 80 | // server couldn't be contacted. 81 | func (e *ClientEnd) Call(svcMeth string, args interface{}, reply interface{}) bool { 82 | req := reqMsg{} 83 | req.endname = e.endname 84 | req.svcMeth = svcMeth 85 | req.argsType = reflect.TypeOf(args) 86 | req.replyCh = make(chan replyMsg) 87 | 88 | qb := new(bytes.Buffer) 89 | qe := gob.NewEncoder(qb) 90 | qe.Encode(args) 91 | req.args = qb.Bytes() 92 | 93 | e.ch <- req 94 | 95 | rep := <-req.replyCh 96 | if rep.ok { 97 | rb := bytes.NewBuffer(rep.reply) 98 | rd := gob.NewDecoder(rb) 99 | if err := rd.Decode(reply); err != nil { 100 | log.Fatalf("ClientEnd.Call(): decode reply: %v\n", err) 101 | } 102 | return true 103 | } else { 104 | return false 105 | } 106 | } 107 | 108 | type Network struct { 109 | mu sync.Mutex 110 | reliable bool 111 | longDelays bool // pause a long time on send on disabled connection 112 | longReordering bool // sometimes delay replies a long time 113 | ends map[interface{}]*ClientEnd // ends, by name 114 | enabled map[interface{}]bool // by end name 115 | servers map[interface{}]*Server // servers, by name 116 | connections map[interface{}]interface{} // endname -> servername 117 | endCh chan reqMsg 118 | } 119 | 120 | func MakeNetwork() *Network { 121 | rn := &Network{} 122 | rn.reliable = true 123 | rn.ends = map[interface{}]*ClientEnd{} 124 | rn.enabled = map[interface{}]bool{} 125 | rn.servers = map[interface{}]*Server{} 126 | rn.connections = map[interface{}](interface{}){} 127 | rn.endCh = make(chan reqMsg) 128 | 129 | // single goroutine to handle all ClientEnd.Call()s 130 | go func() { 131 | for xreq := range rn.endCh { 132 | go rn.ProcessReq(xreq) 133 | } 134 | }() 135 | 136 | return rn 137 | } 138 | 139 | func (rn *Network) Reliable(yes bool) { 140 | rn.mu.Lock() 141 | defer rn.mu.Unlock() 142 | 143 | rn.reliable = yes 144 | } 145 | 146 | func (rn *Network) LongReordering(yes bool) { 147 | rn.mu.Lock() 148 | defer rn.mu.Unlock() 149 | 150 | rn.longReordering = yes 151 | } 152 | 153 | func (rn *Network) LongDelays(yes bool) { 154 | rn.mu.Lock() 155 | defer rn.mu.Unlock() 156 | 157 | rn.longDelays = yes 158 | } 159 | 160 | func (rn *Network) ReadEndnameInfo(endname interface{}) (enabled bool, 161 | servername interface{}, server *Server, reliable bool, longreordering bool, 162 | ) { 163 | rn.mu.Lock() 164 | defer rn.mu.Unlock() 165 | 166 | enabled = rn.enabled[endname] 167 | servername = rn.connections[endname] 168 | if servername != nil { 169 | server = rn.servers[servername] 170 | } 171 | reliable = rn.reliable 172 | longreordering = rn.longReordering 173 | return 174 | } 175 | 176 | func (rn *Network) IsServerDead(endname interface{}, servername interface{}, server *Server) bool { 177 | rn.mu.Lock() 178 | defer rn.mu.Unlock() 179 | 180 | if rn.enabled[endname] == false || rn.servers[servername] != server { 181 | return true 182 | } 183 | return false 184 | } 185 | 186 | func (rn *Network) ProcessReq(req reqMsg) { 187 | enabled, servername, server, reliable, longreordering := rn.ReadEndnameInfo(req.endname) 188 | 189 | if enabled && servername != nil && server != nil { 190 | if reliable == false { 191 | // short delay 192 | ms := (rand.Int() % 27) 193 | time.Sleep(time.Duration(ms) * time.Millisecond) 194 | } 195 | 196 | if reliable == false && (rand.Int()%1000) < 100 { 197 | // drop the request, return as if timeout 198 | req.replyCh <- replyMsg{false, nil} 199 | return 200 | } 201 | 202 | // execute the request (call the RPC handler). 203 | // in a separate thread so that we can periodically check 204 | // if the server has been killed and the RPC should get a 205 | // failure reply. 206 | ech := make(chan replyMsg) 207 | go func() { 208 | r := server.dispatch(req) 209 | ech <- r 210 | }() 211 | 212 | // wait for handler to return, 213 | // but stop waiting if DeleteServer() has been called, 214 | // and return an error. 215 | var reply replyMsg 216 | replyOK := false 217 | serverDead := false 218 | for replyOK == false && serverDead == false { 219 | select { 220 | case reply = <-ech: 221 | replyOK = true 222 | case <-time.After(100 * time.Millisecond): 223 | serverDead = rn.IsServerDead(req.endname, servername, server) 224 | } 225 | } 226 | 227 | // do not reply if DeleteServer() has been called, i.e. 228 | // the server has been killed. this is needed to avoid 229 | // situation in which a client gets a positive reply 230 | // to an Append, but the server persisted the update 231 | // into the old Persister. config.go is careful to call 232 | // DeleteServer() before superseding the Persister. 233 | serverDead = rn.IsServerDead(req.endname, servername, server) 234 | 235 | if replyOK == false || serverDead == true { 236 | // server was killed while we were waiting; return error. 237 | req.replyCh <- replyMsg{false, nil} 238 | } else if reliable == false && (rand.Int()%1000) < 100 { 239 | // drop the reply, return as if timeout 240 | req.replyCh <- replyMsg{false, nil} 241 | } else if longreordering == true && rand.Intn(900) < 600 { 242 | // delay the response for a while 243 | ms := 200 + rand.Intn(1+rand.Intn(2000)) 244 | time.Sleep(time.Duration(ms) * time.Millisecond) 245 | req.replyCh <- reply 246 | } else { 247 | req.replyCh <- reply 248 | } 249 | } else { 250 | // simulate no reply and eventual timeout. 251 | ms := 0 252 | if rn.longDelays { 253 | // let Raft tests check that leader doesn't send 254 | // RPCs synchronously. 255 | ms = (rand.Int() % 7000) 256 | } else { 257 | // many kv tests require the client to try each 258 | // server in fairly rapid succession. 259 | ms = (rand.Int() % 100) 260 | } 261 | time.Sleep(time.Duration(ms) * time.Millisecond) 262 | req.replyCh <- replyMsg{false, nil} 263 | } 264 | 265 | } 266 | 267 | // create a client end-point. 268 | // start the thread that listens and delivers. 269 | func (rn *Network) MakeEnd(endname interface{}) *ClientEnd { 270 | rn.mu.Lock() 271 | defer rn.mu.Unlock() 272 | 273 | if _, ok := rn.ends[endname]; ok { 274 | log.Fatalf("MakeEnd: %v already exists\n", endname) 275 | } 276 | 277 | e := &ClientEnd{} 278 | e.endname = endname 279 | e.ch = rn.endCh 280 | rn.ends[endname] = e 281 | rn.enabled[endname] = false 282 | rn.connections[endname] = nil 283 | 284 | return e 285 | } 286 | 287 | func (rn *Network) AddServer(servername interface{}, rs *Server) { 288 | rn.mu.Lock() 289 | defer rn.mu.Unlock() 290 | 291 | rn.servers[servername] = rs 292 | } 293 | 294 | func (rn *Network) DeleteServer(servername interface{}) { 295 | rn.mu.Lock() 296 | defer rn.mu.Unlock() 297 | 298 | rn.servers[servername] = nil 299 | } 300 | 301 | // connect a ClientEnd to a server. 302 | // a ClientEnd can only be connected once in its lifetime. 303 | func (rn *Network) Connect(endname interface{}, servername interface{}) { 304 | rn.mu.Lock() 305 | defer rn.mu.Unlock() 306 | 307 | rn.connections[endname] = servername 308 | } 309 | 310 | // enable/disable a ClientEnd. 311 | func (rn *Network) Enable(endname interface{}, enabled bool) { 312 | rn.mu.Lock() 313 | defer rn.mu.Unlock() 314 | 315 | rn.enabled[endname] = enabled 316 | } 317 | 318 | // get a server's count of incoming RPCs. 319 | func (rn *Network) GetCount(servername interface{}) int { 320 | rn.mu.Lock() 321 | defer rn.mu.Unlock() 322 | 323 | svr := rn.servers[servername] 324 | return svr.GetCount() 325 | } 326 | 327 | // 328 | // a server is a collection of services, all sharing 329 | // the same rpc dispatcher. so that e.g. both a Raft 330 | // and a k/v server can listen to the same rpc endpoint. 331 | // 332 | type Server struct { 333 | mu sync.Mutex 334 | services map[string]*Service 335 | count int // incoming RPCs 336 | } 337 | 338 | func MakeServer() *Server { 339 | rs := &Server{} 340 | rs.services = map[string]*Service{} 341 | return rs 342 | } 343 | 344 | func (rs *Server) AddService(svc *Service) { 345 | rs.mu.Lock() 346 | defer rs.mu.Unlock() 347 | rs.services[svc.name] = svc 348 | } 349 | 350 | func (rs *Server) dispatch(req reqMsg) replyMsg { 351 | rs.mu.Lock() 352 | 353 | rs.count += 1 354 | 355 | // split Raft.AppendEntries into service and method 356 | dot := strings.LastIndex(req.svcMeth, ".") 357 | serviceName := req.svcMeth[:dot] 358 | methodName := req.svcMeth[dot+1:] 359 | 360 | service, ok := rs.services[serviceName] 361 | 362 | rs.mu.Unlock() 363 | 364 | if ok { 365 | return service.dispatch(methodName, req) 366 | } else { 367 | choices := []string{} 368 | for k, _ := range rs.services { 369 | choices = append(choices, k) 370 | } 371 | log.Fatalf("labrpc.Server.dispatch(): unknown service %v in %v.%v; expecting one of %v\n", 372 | serviceName, serviceName, methodName, choices) 373 | return replyMsg{false, nil} 374 | } 375 | } 376 | 377 | func (rs *Server) GetCount() int { 378 | rs.mu.Lock() 379 | defer rs.mu.Unlock() 380 | return rs.count 381 | } 382 | 383 | // an object with methods that can be called via RPC. 384 | // a single server may have more than one Service. 385 | type Service struct { 386 | name string 387 | rcvr reflect.Value 388 | typ reflect.Type 389 | methods map[string]reflect.Method 390 | } 391 | 392 | func MakeService(rcvr interface{}) *Service { 393 | svc := &Service{} 394 | svc.typ = reflect.TypeOf(rcvr) 395 | svc.rcvr = reflect.ValueOf(rcvr) 396 | svc.name = reflect.Indirect(svc.rcvr).Type().Name() 397 | svc.methods = map[string]reflect.Method{} 398 | 399 | for m := 0; m < svc.typ.NumMethod(); m++ { 400 | method := svc.typ.Method(m) 401 | mtype := method.Type 402 | mname := method.Name 403 | 404 | //fmt.Printf("%v pp %v ni %v 1k %v 2k %v no %v\n", 405 | // mname, method.PkgPath, mtype.NumIn(), mtype.In(1).Kind(), mtype.In(2).Kind(), mtype.NumOut()) 406 | 407 | if method.PkgPath != "" || // capitalized? 408 | mtype.NumIn() != 3 || 409 | //mtype.In(1).Kind() != reflect.Ptr || 410 | mtype.In(2).Kind() != reflect.Ptr || 411 | mtype.NumOut() != 0 { 412 | // the method is not suitable for a handler 413 | //fmt.Printf("bad method: %v\n", mname) 414 | } else { 415 | // the method looks like a handler 416 | svc.methods[mname] = method 417 | } 418 | } 419 | 420 | return svc 421 | } 422 | 423 | func (svc *Service) dispatch(methname string, req reqMsg) replyMsg { 424 | if method, ok := svc.methods[methname]; ok { 425 | // prepare space into which to read the argument. 426 | // the Value's type will be a pointer to req.argsType. 427 | args := reflect.New(req.argsType) 428 | 429 | // decode the argument. 430 | ab := bytes.NewBuffer(req.args) 431 | ad := gob.NewDecoder(ab) 432 | ad.Decode(args.Interface()) 433 | 434 | // allocate space for the reply. 435 | replyType := method.Type.In(2) 436 | replyType = replyType.Elem() 437 | replyv := reflect.New(replyType) 438 | 439 | // call the method. 440 | function := method.Func 441 | function.Call([]reflect.Value{svc.rcvr, args.Elem(), replyv}) 442 | 443 | // encode the reply. 444 | rb := new(bytes.Buffer) 445 | re := gob.NewEncoder(rb) 446 | re.EncodeValue(replyv) 447 | 448 | return replyMsg{true, rb.Bytes()} 449 | } else { 450 | choices := []string{} 451 | for k, _ := range svc.methods { 452 | choices = append(choices, k) 453 | } 454 | log.Fatalf("labrpc.Service.dispatch(): unknown method %v in %v; expecting one of %v\n", 455 | methname, req.svcMeth, choices) 456 | return replyMsg{false, nil} 457 | } 458 | } 459 | -------------------------------------------------------------------------------- /src/labrpc/test_test.go: -------------------------------------------------------------------------------- 1 | package labrpc 2 | 3 | import "testing" 4 | import "strconv" 5 | import "sync" 6 | import "runtime" 7 | import "time" 8 | import "fmt" 9 | 10 | type JunkArgs struct { 11 | X int 12 | } 13 | type JunkReply struct { 14 | X string 15 | } 16 | 17 | type JunkServer struct { 18 | mu sync.Mutex 19 | log1 []string 20 | log2 []int 21 | } 22 | 23 | func (js *JunkServer) Handler1(args string, reply *int) { 24 | js.mu.Lock() 25 | defer js.mu.Unlock() 26 | js.log1 = append(js.log1, args) 27 | *reply, _ = strconv.Atoi(args) 28 | } 29 | 30 | func (js *JunkServer) Handler2(args int, reply *string) { 31 | js.mu.Lock() 32 | defer js.mu.Unlock() 33 | js.log2 = append(js.log2, args) 34 | *reply = "handler2-" + strconv.Itoa(args) 35 | } 36 | 37 | func (js *JunkServer) Handler3(args int, reply *int) { 38 | js.mu.Lock() 39 | defer js.mu.Unlock() 40 | time.Sleep(20 * time.Second) 41 | *reply = -args 42 | } 43 | 44 | // args is a pointer 45 | func (js *JunkServer) Handler4(args *JunkArgs, reply *JunkReply) { 46 | reply.X = "pointer" 47 | } 48 | 49 | // args is a not pointer 50 | func (js *JunkServer) Handler5(args JunkArgs, reply *JunkReply) { 51 | reply.X = "no pointer" 52 | } 53 | 54 | func TestBasic(t *testing.T) { 55 | runtime.GOMAXPROCS(4) 56 | 57 | rn := MakeNetwork() 58 | 59 | e := rn.MakeEnd("end1-99") 60 | 61 | js := &JunkServer{} 62 | svc := MakeService(js) 63 | 64 | rs := MakeServer() 65 | rs.AddService(svc) 66 | rn.AddServer("server99", rs) 67 | 68 | rn.Connect("end1-99", "server99") 69 | rn.Enable("end1-99", true) 70 | 71 | { 72 | reply := "" 73 | e.Call("JunkServer.Handler2", 111, &reply) 74 | if reply != "handler2-111" { 75 | t.Fatalf("wrong reply from Handler2") 76 | } 77 | } 78 | 79 | { 80 | reply := 0 81 | e.Call("JunkServer.Handler1", "9099", &reply) 82 | if reply != 9099 { 83 | t.Fatalf("wrong reply from Handler1") 84 | } 85 | } 86 | } 87 | 88 | func TestTypes(t *testing.T) { 89 | runtime.GOMAXPROCS(4) 90 | 91 | rn := MakeNetwork() 92 | 93 | e := rn.MakeEnd("end1-99") 94 | 95 | js := &JunkServer{} 96 | svc := MakeService(js) 97 | 98 | rs := MakeServer() 99 | rs.AddService(svc) 100 | rn.AddServer("server99", rs) 101 | 102 | rn.Connect("end1-99", "server99") 103 | rn.Enable("end1-99", true) 104 | 105 | { 106 | var args JunkArgs 107 | var reply JunkReply 108 | // args must match type (pointer or not) of handler. 109 | e.Call("JunkServer.Handler4", &args, &reply) 110 | if reply.X != "pointer" { 111 | t.Fatalf("wrong reply from Handler4") 112 | } 113 | } 114 | 115 | { 116 | var args JunkArgs 117 | var reply JunkReply 118 | // args must match type (pointer or not) of handler. 119 | e.Call("JunkServer.Handler5", args, &reply) 120 | if reply.X != "no pointer" { 121 | t.Fatalf("wrong reply from Handler5") 122 | } 123 | } 124 | } 125 | 126 | // 127 | // does net.Enable(endname, false) really disconnect a client? 128 | // 129 | func TestDisconnect(t *testing.T) { 130 | runtime.GOMAXPROCS(4) 131 | 132 | rn := MakeNetwork() 133 | 134 | e := rn.MakeEnd("end1-99") 135 | 136 | js := &JunkServer{} 137 | svc := MakeService(js) 138 | 139 | rs := MakeServer() 140 | rs.AddService(svc) 141 | rn.AddServer("server99", rs) 142 | 143 | rn.Connect("end1-99", "server99") 144 | 145 | { 146 | reply := "" 147 | e.Call("JunkServer.Handler2", 111, &reply) 148 | if reply != "" { 149 | t.Fatalf("unexpected reply from Handler2") 150 | } 151 | } 152 | 153 | rn.Enable("end1-99", true) 154 | 155 | { 156 | reply := 0 157 | e.Call("JunkServer.Handler1", "9099", &reply) 158 | if reply != 9099 { 159 | t.Fatalf("wrong reply from Handler1") 160 | } 161 | } 162 | } 163 | 164 | // 165 | // test net.GetCount() 166 | // 167 | func TestCounts(t *testing.T) { 168 | runtime.GOMAXPROCS(4) 169 | 170 | rn := MakeNetwork() 171 | 172 | e := rn.MakeEnd("end1-99") 173 | 174 | js := &JunkServer{} 175 | svc := MakeService(js) 176 | 177 | rs := MakeServer() 178 | rs.AddService(svc) 179 | rn.AddServer(99, rs) 180 | 181 | rn.Connect("end1-99", 99) 182 | rn.Enable("end1-99", true) 183 | 184 | for i := 0; i < 17; i++ { 185 | reply := "" 186 | e.Call("JunkServer.Handler2", i, &reply) 187 | wanted := "handler2-" + strconv.Itoa(i) 188 | if reply != wanted { 189 | t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted) 190 | } 191 | } 192 | 193 | n := rn.GetCount(99) 194 | if n != 17 { 195 | t.Fatalf("wrong GetCount() %v, expected 17\n", n) 196 | } 197 | } 198 | 199 | // 200 | // test RPCs from concurrent ClientEnds 201 | // 202 | func TestConcurrentMany(t *testing.T) { 203 | runtime.GOMAXPROCS(4) 204 | 205 | rn := MakeNetwork() 206 | 207 | js := &JunkServer{} 208 | svc := MakeService(js) 209 | 210 | rs := MakeServer() 211 | rs.AddService(svc) 212 | rn.AddServer(1000, rs) 213 | 214 | ch := make(chan int) 215 | 216 | nclients := 20 217 | nrpcs := 10 218 | for ii := 0; ii < nclients; ii++ { 219 | go func(i int) { 220 | n := 0 221 | defer func() { ch <- n }() 222 | 223 | e := rn.MakeEnd(i) 224 | rn.Connect(i, 1000) 225 | rn.Enable(i, true) 226 | 227 | for j := 0; j < nrpcs; j++ { 228 | arg := i*100 + j 229 | reply := "" 230 | e.Call("JunkServer.Handler2", arg, &reply) 231 | wanted := "handler2-" + strconv.Itoa(arg) 232 | if reply != wanted { 233 | t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted) 234 | } 235 | n += 1 236 | } 237 | }(ii) 238 | } 239 | 240 | total := 0 241 | for ii := 0; ii < nclients; ii++ { 242 | x := <-ch 243 | total += x 244 | } 245 | 246 | if total != nclients*nrpcs { 247 | t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nclients*nrpcs) 248 | } 249 | 250 | n := rn.GetCount(1000) 251 | if n != total { 252 | t.Fatalf("wrong GetCount() %v, expected %v\n", n, total) 253 | } 254 | } 255 | 256 | // 257 | // test unreliable 258 | // 259 | func TestUnreliable(t *testing.T) { 260 | runtime.GOMAXPROCS(4) 261 | 262 | rn := MakeNetwork() 263 | rn.Reliable(false) 264 | 265 | js := &JunkServer{} 266 | svc := MakeService(js) 267 | 268 | rs := MakeServer() 269 | rs.AddService(svc) 270 | rn.AddServer(1000, rs) 271 | 272 | ch := make(chan int) 273 | 274 | nclients := 300 275 | for ii := 0; ii < nclients; ii++ { 276 | go func(i int) { 277 | n := 0 278 | defer func() { ch <- n }() 279 | 280 | e := rn.MakeEnd(i) 281 | rn.Connect(i, 1000) 282 | rn.Enable(i, true) 283 | 284 | arg := i * 100 285 | reply := "" 286 | ok := e.Call("JunkServer.Handler2", arg, &reply) 287 | if ok { 288 | wanted := "handler2-" + strconv.Itoa(arg) 289 | if reply != wanted { 290 | t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted) 291 | } 292 | n += 1 293 | } 294 | }(ii) 295 | } 296 | 297 | total := 0 298 | for ii := 0; ii < nclients; ii++ { 299 | x := <-ch 300 | total += x 301 | } 302 | 303 | if total == nclients || total == 0 { 304 | t.Fatalf("all RPCs succeeded despite unreliable") 305 | } 306 | } 307 | 308 | // 309 | // test concurrent RPCs from a single ClientEnd 310 | // 311 | func TestConcurrentOne(t *testing.T) { 312 | runtime.GOMAXPROCS(4) 313 | 314 | rn := MakeNetwork() 315 | 316 | js := &JunkServer{} 317 | svc := MakeService(js) 318 | 319 | rs := MakeServer() 320 | rs.AddService(svc) 321 | rn.AddServer(1000, rs) 322 | 323 | e := rn.MakeEnd("c") 324 | rn.Connect("c", 1000) 325 | rn.Enable("c", true) 326 | 327 | ch := make(chan int) 328 | 329 | nrpcs := 20 330 | for ii := 0; ii < nrpcs; ii++ { 331 | go func(i int) { 332 | n := 0 333 | defer func() { ch <- n }() 334 | 335 | arg := 100 + i 336 | reply := "" 337 | e.Call("JunkServer.Handler2", arg, &reply) 338 | wanted := "handler2-" + strconv.Itoa(arg) 339 | if reply != wanted { 340 | t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted) 341 | } 342 | n += 1 343 | }(ii) 344 | } 345 | 346 | total := 0 347 | for ii := 0; ii < nrpcs; ii++ { 348 | x := <-ch 349 | total += x 350 | } 351 | 352 | if total != nrpcs { 353 | t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nrpcs) 354 | } 355 | 356 | js.mu.Lock() 357 | defer js.mu.Unlock() 358 | if len(js.log2) != nrpcs { 359 | t.Fatalf("wrong number of RPCs delivered") 360 | } 361 | 362 | n := rn.GetCount(1000) 363 | if n != total { 364 | t.Fatalf("wrong GetCount() %v, expected %v\n", n, total) 365 | } 366 | } 367 | 368 | // 369 | // regression: an RPC that's delayed during Enabled=false 370 | // should not delay subsequent RPCs (e.g. after Enabled=true). 371 | // 372 | func TestRegression1(t *testing.T) { 373 | runtime.GOMAXPROCS(4) 374 | 375 | rn := MakeNetwork() 376 | 377 | js := &JunkServer{} 378 | svc := MakeService(js) 379 | 380 | rs := MakeServer() 381 | rs.AddService(svc) 382 | rn.AddServer(1000, rs) 383 | 384 | e := rn.MakeEnd("c") 385 | rn.Connect("c", 1000) 386 | 387 | // start some RPCs while the ClientEnd is disabled. 388 | // they'll be delayed. 389 | rn.Enable("c", false) 390 | ch := make(chan bool) 391 | nrpcs := 20 392 | for ii := 0; ii < nrpcs; ii++ { 393 | go func(i int) { 394 | ok := false 395 | defer func() { ch <- ok }() 396 | 397 | arg := 100 + i 398 | reply := "" 399 | // this call ought to return false. 400 | e.Call("JunkServer.Handler2", arg, &reply) 401 | ok = true 402 | }(ii) 403 | } 404 | 405 | time.Sleep(100 * time.Millisecond) 406 | 407 | // now enable the ClientEnd and check that an RPC completes quickly. 408 | t0 := time.Now() 409 | rn.Enable("c", true) 410 | { 411 | arg := 99 412 | reply := "" 413 | e.Call("JunkServer.Handler2", arg, &reply) 414 | wanted := "handler2-" + strconv.Itoa(arg) 415 | if reply != wanted { 416 | t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted) 417 | } 418 | } 419 | dur := time.Since(t0).Seconds() 420 | 421 | if dur > 0.03 { 422 | t.Fatalf("RPC took too long (%v) after Enable", dur) 423 | } 424 | 425 | for ii := 0; ii < nrpcs; ii++ { 426 | <-ch 427 | } 428 | 429 | js.mu.Lock() 430 | defer js.mu.Unlock() 431 | if len(js.log2) != 1 { 432 | t.Fatalf("wrong number (%v) of RPCs delivered, expected 1", len(js.log2)) 433 | } 434 | 435 | n := rn.GetCount(1000) 436 | if n != 1 { 437 | t.Fatalf("wrong GetCount() %v, expected %v\n", n, 1) 438 | } 439 | } 440 | 441 | // 442 | // if an RPC is stuck in a server, and the server 443 | // is killed with DeleteServer(), does the RPC 444 | // get un-stuck? 445 | // 446 | func TestKilled(t *testing.T) { 447 | runtime.GOMAXPROCS(4) 448 | 449 | rn := MakeNetwork() 450 | 451 | e := rn.MakeEnd("end1-99") 452 | 453 | js := &JunkServer{} 454 | svc := MakeService(js) 455 | 456 | rs := MakeServer() 457 | rs.AddService(svc) 458 | rn.AddServer("server99", rs) 459 | 460 | rn.Connect("end1-99", "server99") 461 | rn.Enable("end1-99", true) 462 | 463 | doneCh := make(chan bool) 464 | go func() { 465 | reply := 0 466 | ok := e.Call("JunkServer.Handler3", 99, &reply) 467 | doneCh <- ok 468 | }() 469 | 470 | time.Sleep(1000 * time.Millisecond) 471 | 472 | select { 473 | case <-doneCh: 474 | t.Fatalf("Handler3 should not have returned yet") 475 | case <-time.After(100 * time.Millisecond): 476 | } 477 | 478 | rn.DeleteServer("server99") 479 | 480 | select { 481 | case x := <-doneCh: 482 | if x != false { 483 | t.Fatalf("Handler3 returned successfully despite DeleteServer()") 484 | } 485 | case <-time.After(100 * time.Millisecond): 486 | t.Fatalf("Handler3 should return after DeleteServer()") 487 | } 488 | } 489 | 490 | func TestBenchmark(t *testing.T) { 491 | runtime.GOMAXPROCS(4) 492 | 493 | rn := MakeNetwork() 494 | 495 | e := rn.MakeEnd("end1-99") 496 | 497 | js := &JunkServer{} 498 | svc := MakeService(js) 499 | 500 | rs := MakeServer() 501 | rs.AddService(svc) 502 | rn.AddServer("server99", rs) 503 | 504 | rn.Connect("end1-99", "server99") 505 | rn.Enable("end1-99", true) 506 | 507 | t0 := time.Now() 508 | n := 100000 509 | for iters := 0; iters < n; iters++ { 510 | reply := "" 511 | e.Call("JunkServer.Handler2", 111, &reply) 512 | if reply != "handler2-111" { 513 | t.Fatalf("wrong reply from Handler2") 514 | } 515 | } 516 | fmt.Printf("%v for %v\n", time.Since(t0), n) 517 | // march 2016, rtm laptop, 22 microseconds per RPC 518 | } 519 | -------------------------------------------------------------------------------- /src/main/diff.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happyer/distributed-computing/ea8e5dcfb122d5955f454350485c773b0ff51d3d/src/main/diff.out -------------------------------------------------------------------------------- /src/main/ii.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "os" 4 | import "fmt" 5 | import ( 6 | "mapreduce" 7 | "strings" 8 | "unicode" 9 | "bytes" 10 | "strconv" 11 | ) 12 | 13 | // The mapping function is called once for each piece of the input. 14 | // In this framework, the key is the name of the file that is being processed, 15 | // and the value is the file's contents. The return value should be a slice of 16 | // key/value pairs, each represented by a mapreduce.KeyValue. 17 | func mapF(document string, value string) (res []mapreduce.KeyValue) { 18 | // TODO: you should complete this to do the inverted index challenge 19 | 20 | f := func(c rune) bool { 21 | return !unicode.IsLetter(c) && !unicode.IsNumber(c) 22 | } 23 | 24 | words := strings.FieldsFunc(value, f) 25 | 26 | for _, key := range words { 27 | res = append(res, mapreduce.KeyValue{key, document}) 28 | } 29 | return res 30 | 31 | } 32 | 33 | // The reduce function is called once for each key generated by Map, with a 34 | // list of that key's string value (merged across all inputs). The return value 35 | // should be a single output value for that key. 36 | func reduceF(key string, values []string) string { 37 | // TODO: you should complete this to do the inverted index challenge 38 | RemoveDuplicates(&values) 39 | var buffer bytes.Buffer 40 | //buffer.WriteString(key)q 41 | //buffer.WriteString(":") 42 | buffer.WriteString(strconv.Itoa(len(values))) 43 | buffer.WriteString(" ") 44 | buffer.WriteString(strings.Join(values,",")) 45 | 46 | return buffer.String() 47 | } 48 | 49 | func RemoveDuplicates(xs *[]string) { 50 | found := make(map[string]bool) 51 | j := 0 52 | for i, x := range *xs { 53 | if !found[x] { 54 | found[x] = true 55 | (*xs)[j] = (*xs)[i] 56 | j++ 57 | } 58 | } 59 | *xs = (*xs)[:j] 60 | } 61 | 62 | // Can be run in 3 ways: 63 | // 1) Sequential (e.g., go run wc.go master sequential x1.txt .. xN.txt) 64 | // 2) Master (e.g., go run wc.go master localhost:7777 x1.txt .. xN.txt) 65 | // 3) Worker (e.g., go run wc.go worker localhost:7777 localhost:7778 &) 66 | func main() { 67 | if len(os.Args) < 4 { 68 | fmt.Printf("%s: see usage comments in file\n", os.Args[0]) 69 | } else if os.Args[1] == "master" { 70 | var mr *mapreduce.Master 71 | if os.Args[2] == "sequential" { 72 | mr = mapreduce.Sequential("iiseq", os.Args[3:], 3, mapF, reduceF) 73 | } else { 74 | mr = mapreduce.Distributed("iiseq", os.Args[3:], 3, os.Args[2]) 75 | } 76 | mr.Wait() 77 | } else { 78 | mapreduce.RunWorker(os.Args[2], os.Args[3], mapF, reduceF, 100) 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/main/mr-challenge.txt: -------------------------------------------------------------------------------- 1 | women: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt 2 | won: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt 3 | wonderful: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt 4 | words: 15 pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt 5 | worked: 15 pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt 6 | worse: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt 7 | wounded: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt 8 | yes: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt 9 | younger: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt 10 | yours: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt 11 | -------------------------------------------------------------------------------- /src/main/mr-testout.txt: -------------------------------------------------------------------------------- 1 | he: 34077 2 | was: 37044 3 | that: 37495 4 | I: 44502 5 | in: 46092 6 | a: 60558 7 | to: 74357 8 | of: 79727 9 | and: 93990 10 | the: 154024 11 | -------------------------------------------------------------------------------- /src/main/test-ii.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | go run ii.go master sequential pg-*.txt 3 | sort -k1,1 mrtmp.iiseq | sort -snk2,2 | grep -v '16' | tail -10 | diff - mr-challenge.txt > diff.out 4 | if [ -s diff.out ] 5 | then 6 | echo "Failed test. Output should be as in mr-challenge.txt. Your output differs as follows (from diff.out):" > /dev/stderr 7 | cat diff.out 8 | else 9 | echo "Passed test" > /dev/stderr 10 | fi 11 | 12 | -------------------------------------------------------------------------------- /src/main/test-wc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | go run wc.go master sequential pg-*.txt 3 | sort -n -k2 mrtmp.wcseq | tail -10 | diff - mr-testout.txt > diff.out 4 | if [ -s diff.out ] 5 | then 6 | echo "Failed test. Output should be as in mr-testout.txt. Your output differs as follows (from diff.out):" > /dev/stderr 7 | cat diff.out 8 | else 9 | echo "Passed test" > /dev/stderr 10 | fi 11 | 12 | -------------------------------------------------------------------------------- /src/main/wc.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "mapreduce" 6 | "os" 7 | "unicode" 8 | "strings" 9 | "strconv" 10 | ) 11 | 12 | // The mapping function is called once for each piece of the input. 13 | // In this framework, the key is the name of the file that is being processed, 14 | // and the value is the file's contents. The return value should be a slice of 15 | // key/value pairs, each represented by a mapreduce.KeyValue. 16 | func mapF(document string, value string) (res []mapreduce.KeyValue) { 17 | // TODO: you have to write this function 18 | f := func(c rune) bool { 19 | return !unicode.IsLetter(c) && !unicode.IsNumber(c) 20 | } 21 | 22 | words := strings.FieldsFunc(value, f) 23 | 24 | for _, key := range words { 25 | res = append(res, mapreduce.KeyValue{key, "1"}) 26 | } 27 | return res 28 | 29 | } 30 | 31 | // The reduce function is called once for each key generated by Map, with a 32 | // list of that key's string value (merged across all inputs). The return value 33 | // should be a single output value for that key. 34 | func reduceF(key string, values []string) string { 35 | // TODO: you also have to write this function 36 | count := 0 37 | for _, value := range values { 38 | num, _ := strconv.ParseInt(value, 10, 64) 39 | count = count + int(num) 40 | } 41 | return strconv.Itoa(count) 42 | } 43 | 44 | // Can be run in 3 ways: 45 | // 1) Sequential (e.g., go run wc.go master sequential x1.txt .. xN.txt) 46 | // 2) Master (e.g., go run wc.go master localhost:7777 x1.txt .. xN.txt) 47 | // 3) Worker (e.g., go run wc.go worker localhost:7777 localhost:7778 &) 48 | func main() { 49 | //fmt.Printf(" arg2:%s \n args3: %s \n",os.Args[2],os.Args[3:]) 50 | if len(os.Args) < 4 { 51 | fmt.Printf("%s: see usage comments in file\n", os.Args[0]) 52 | } else if os.Args[1] == "master" { 53 | var mr *mapreduce.Master 54 | if os.Args[2] == "sequential" { 55 | mr = mapreduce.Sequential("wcseq", os.Args[3:], 3, mapF, reduceF) 56 | } else { 57 | mr = mapreduce.Distributed("wcseq", os.Args[3:], 3, os.Args[2]) 58 | } 59 | mr.Wait() 60 | } else { 61 | mapreduce.RunWorker(os.Args[2], os.Args[3], mapF, reduceF, 100) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/mapreduce/README.md: -------------------------------------------------------------------------------- 1 | # 老司机带你用 Go 语言实现 MapReduce 框架 2 | 3 |   MapReduce 是 Google 提出的一个软件架构,用于大规模数据集(大于1TB)的并行运算。简而言之,就是将任务切分成很小的任务然后一个一个区的执行最后汇总,这就像小时候我们老师经常教育我们一样,大事化小,小事化了(瞬间感觉那时候老师好言简意赅啊!!!)思想就这么一个思想,那么按照这个思想在现代软件定义一切的世界里面,我们怎么运用这样的方式来解决海量数据的处理,这篇就告诉你一个这样的一个简单的实现使用 Go 语言。 4 | 5 | ## 上车 6 | 7 |   简单介绍一下几个概念: 8 | 9 |   概念“Map(映射)”和“Reduce(归纳)”,及他们的主要思想,都是从函数式编程语言借来的,还有从矢量编程语言借来的特性。当前的软件实现是指定一个 Map(映射)函数,用来把一组键值对映射成一组新的键值对,指定并发的 Reduce(归纳)函数,用来保证所有映射的键值对中的每一个共享相同的键组。 10 | 11 |   以一个例子为简单的开始: 12 | 13 |   词频的统计(WorldCount),在现实的需求的上面可能我们可能有这样的一个需求,就是计算出一篇文章里面出现每个单词的个数。具体到生活就是,就算 Top N 的结果,比如全校要开表彰大会,找出 10 个好学生这样的 Top N 这样的例子比比皆是,而 World Count 就是他的一个实现,只是最终的结果只取出排在前面的结果而已。 14 | 15 |   有了上面找出 10 个好学生的需求的时候,我们来想想怎么去实现它呢,很显然这个需求可能是校长在开会的时候提出来的,那么具体的实现就是每个年级组长是不是要把每个年级排名前 10 的学生找出来,然后年级组长的领导,将这些信息在汇总取出 前 10 的学生咯,那么具体的每个年级怎么做呢?同理,将每个班的前10名学生找出来,然后汇总到年级部门咯。 16 | 17 | ## 发车 18 | 19 |   基本概览和思路已经明白了,现在开始构建整个 MapReduce 框架了,首先我们明确一个思想就是,将任务划分成合适的大小,然后对其进行计算,然后将每一步计算的的结果,进行一个汇总合并的过程。那么这两个过程我们先分别定义为Map 和Reduce 过程。 20 | 21 |   还是以 World Count 这个为例子: 22 | 23 |   Map 的处理过程就是读取给定的文件,将文件里面的每个单词的出现频率初始化为 1。 24 | 25 |   Reduce 的处理过程就是将相同的单词,数据进行一个累加的过程。那么,我们 MapReduce 框架的目的是调用在合适的时候调用这个 Map 和 Reduce 的过程。 26 | 在 common_map.go 里面 doMap 方法就是给定文件,读取数据然后,调用 Map 这个过程,代码里面有注释,在这里进行一个简单概述一下主要有这几个步骤: 27 | 28 | 1. 读取文件; 29 | 2. 将读文件的内容,调用用户 Map 函数,生产对于的 KeyValue 值; 30 | 3. 最后按照 KeyValue 里面的 Key 进行分区,将内容写入到文件里面,以便于后面的 Reduce 过程执行; 31 | 32 | ``` go 33 | func doMap( 34 | jobName string, // // the name of the MapReduce job 35 | mapTaskNumber int, // which map task this is 36 | inFile string, 37 | nReduce int, // the number of reduce task that will be run 38 | mapF func(file string, contents string) []KeyValue, 39 | ) { 40 | 41 | //setp 1 read file 42 | contents, err := ioutil.ReadFile(inFile) 43 | if err != nil { 44 | log.Fatal("do map error for inFile ",err) 45 | } 46 | //setp 2 call user user-map method ,to get kv 47 | kvResult := mapF(inFile, string(contents)) 48 | 49 | /** 50 | * setp 3 use key of kv generator nReduce file ,partition 51 | * a. create tmpFiles 52 | * b. create encoder for tmpFile to write contents 53 | * c. partition by key, then write tmpFile 54 | */ 55 | 56 | var tmpFiles [] *os.File = make([] *os.File, nReduce) 57 | var encoders [] *json.Encoder = make([] *json.Encoder, nReduce) 58 | 59 | for i := 0; i < nReduce; i++ { 60 | tmpFileName := reduceName(jobName,mapTaskNumber,i) 61 | tmpFiles[i],err = os.Create(tmpFileName) 62 | if err!=nil { 63 | log.Fatal(err) 64 | } 65 | 66 | defer tmpFiles[i].Close() 67 | encoders[i] = json.NewEncoder(tmpFiles[i]) 68 | if err!=nil { 69 | log.Fatal(err) 70 | } 71 | } 72 | 73 | for _ , kv := range kvResult { 74 | hashKey := int(ihash(kv.Key)) % nReduce 75 | err := encoders[hashKey].Encode(&kv) 76 | if err!=nil { 77 | log.Fatal("do map encoders ",err) 78 | } 79 | } 80 | } 81 | ``` 82 | 83 |   doReduce 函数在 common_reduce.go 里面,主要步骤: 84 | 85 | 1. 读取 doMap 过程中产生的中间文件; 86 | 2. 按照读取相同文件中的 Key 进新按照字典顺序进行排序; 87 | 3. 遍历读取的 KeyValue,并且调用用户的 Reduce 方法,将计算的结果继续写入到文件中; 88 | 89 | ``` go 90 | func doReduce( 91 | jobName string, // the name of the whole MapReduce job 92 | reduceTaskNumber int, // which reduce task this is 93 | nMap int, // the number of map tasks that were run ("M" in the paper) 94 | reduceF func(key string, values []string) string, 95 | ) { 96 | 97 | // file.Close() 98 | 99 | //setp 1,read map generator file ,same key merge put map[string][]string 100 | 101 | kvs := make(map[string][]string) 102 | 103 | for i := 0; i < nMap; i++ { 104 | fileName := reduceName(jobName, i, reduceTaskNumber) 105 | file, err := os.Open(fileName) 106 | if err != nil { 107 | log.Fatal("doReduce1: ", err) 108 | } 109 | 110 | dec := json.NewDecoder(file) 111 | 112 | for { 113 | var kv KeyValue 114 | err = dec.Decode(&kv) 115 | if err != nil { 116 | break 117 | } 118 | 119 | _, ok := kvs[kv.Key] 120 | if !ok { 121 | kvs[kv.Key] = []string{} 122 | } 123 | kvs[kv.Key] = append(kvs[kv.Key], kv.Value) 124 | } 125 | file.Close() 126 | } 127 | 128 | var keys []string 129 | 130 | for k := range kvs { 131 | keys = append(keys, k) 132 | } 133 | 134 | //setp 2 sort by keys 135 | sort.Strings(keys) 136 | 137 | //setp 3 create result file 138 | p := mergeName(jobName, reduceTaskNumber) 139 | file, err := os.Create(p) 140 | if err != nil { 141 | log.Fatal("doReduce2: ceate ", err) 142 | } 143 | enc := json.NewEncoder(file) 144 | 145 | //setp 4 call user reduce each key of kvs 146 | for _, k := range keys { 147 | res := reduceF(k, kvs[k]) 148 | enc.Encode(KeyValue{k, res}) 149 | } 150 | 151 | file.Close() 152 | } 153 | ``` 154 | 155 |   Merge 过程 156 | 157 |   当然最后就是将每个 Reduce 产生的结果进行一个Merge 的过程,在 merge 的过程中,同样也是需要进行按照 Key 进行字典顺序排列,然后写入到最终的文件中。代码跟 reduce 还是相似的,这里就不自爱赘述了。 158 | 159 |   使用 go 的多线程来实现分布式的任务执行,这里主要是是 schedule.go 里面的 schedule 方法,主要是步骤: 160 | 161 | 1. 通过不同的阶段( Map or Reduce ),获取到需要执行多少个 map (reduce),然后调用远程的 worker.go 里面的 DoTask 方法; 162 | 2. 等待所有的任务完成,然后才结束。这里主要使用了go 语言的一些特性,[Go RPC documentation](https://golang.org/pkg/net/rpc/) 和 [Concurrency in Go](https://golang.org/doc/effective_go.html#concurrency)。 163 | 164 | ``` go 165 | func (mr *Master) schedule(phase jobPhase) { 166 | var ntasks int 167 | var nios int // number of inputs (for reduce) or outputs (for map) 168 | switch phase { 169 | case mapPhase: 170 | ntasks = len(mr.files) 171 | nios = mr.nReduce 172 | case reducePhase: 173 | ntasks = mr.nReduce 174 | nios = len(mr.files) 175 | } 176 | 177 | fmt.Printf("Schedule: %v %v tasks (%d I/Os)\n", ntasks, phase, nios) 178 | 179 | //use go routing,worker rpc executor task, 180 | done := make(chan bool) 181 | for i := 0; i < ntasks; i++ { 182 | go func(number int) { 183 | 184 | args := DoTaskArgs{mr.jobName, mr.files[ntasks], phase, number, nios} 185 | var worker string 186 | reply := new(struct{}) 187 | ok := false 188 | for ok != true { 189 | worker = <- mr.registerChannel 190 | ok = call(worker, "Worker.DoTask", args, reply) 191 | } 192 | done <- true 193 | mr.registerChannel <- worker 194 | }(i) 195 | 196 | } 197 | 198 | //wait for all task is complate 199 | for i := 0; i< ntasks; i++ { 200 | <- done 201 | } 202 | fmt.Printf("Schedule: %v phase done\n", phase) 203 | } 204 | ``` 205 | 206 | ## 到站 207 | 208 | - 运行测试: 209 | 210 | ![运行测试](../../img/test.png) 211 | 212 | - 测试结果: 213 | 214 | ![测试结果](../../img/check.png) 215 | 216 | - 测试倒排结果: 217 | 218 | ![倒排索引结果](../../img/ii.png) 219 | -------------------------------------------------------------------------------- /src/mapreduce/common.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | ) 7 | 8 | // Debugging enabled? 9 | const debugEnabled = true 10 | 11 | // DPrintf will only print if the debugEnabled const has been set to true 12 | func debug(format string, a ...interface{}) (n int, err error) { 13 | if debugEnabled { 14 | n, err = fmt.Printf(format, a...) 15 | } 16 | return 17 | } 18 | 19 | // jobPhase indicates whether a task is scheduled as a map or reduce task. 20 | type jobPhase string 21 | 22 | const ( 23 | mapPhase jobPhase = "Map" 24 | reducePhase = "Reduce" 25 | ) 26 | 27 | // KeyValue is a type used to hold the key/value pairs passed to the map and 28 | // reduce functions. 29 | type KeyValue struct { 30 | Key string 31 | Value string 32 | } 33 | 34 | // reduceName constructs the name of the intermediate file which map task 35 | // produces for reduce task . 36 | func reduceName(jobName string, mapTask int, reduceTask int) string { 37 | return "mrtmp." + jobName + "-" + strconv.Itoa(mapTask) + "-" + strconv.Itoa(reduceTask) 38 | } 39 | 40 | // mergeName constructs the name of the output file of reduce task 41 | func mergeName(jobName string, reduceTask int) string { 42 | return "mrtmp." + jobName + "-res-" + strconv.Itoa(reduceTask) 43 | } 44 | -------------------------------------------------------------------------------- /src/mapreduce/common_map.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "hash/fnv" 5 | "io/ioutil" 6 | "log" 7 | "encoding/json" 8 | "os" 9 | ) 10 | 11 | // doMap does the job of a map worker: it reads one of the input files 12 | // (inFile), calls the user-defined map function (mapF) for that file's 13 | // contents, and partitions the output into nReduce intermediate files. 14 | 15 | func doMap( 16 | jobName string, // the name of the MapReduce job 17 | mapTaskNumber int, // which map task this is 18 | inFile string, 19 | nReduce int, // the number of reduce task that will be run ("R" in the paper) 20 | mapF func(file string, contents string) []KeyValue, 21 | ) { 22 | // TODO: 23 | // You will need to write this function. 24 | // You can find the filename for this map task's input to reduce task number 25 | // r using reduceName(jobName, mapTaskNumber, r). The ihash function (given 26 | // below doMap) should be used to decide which file a given key belongs into. 27 | // 28 | // The intermediate output of a map task is stored in the file 29 | // system as multiple files whose name indicates which map task produced 30 | // them, as well as which reduce task they are for. Coming up with a 31 | // scheme for how to store the key/value pairs on disk can be tricky, 32 | // especially when taking into account that both keys and values could 33 | // contain newlines, quotes, and any other character you can think of. 34 | // 35 | // One format often used for serializing data to a byte stream that the 36 | // other end can correctly reconstruct is JSON. You are not required to 37 | // use JSON, but as the output of the reduce tasks *must* be JSON, 38 | // familiarizing yourself with it here may prove useful. You can write 39 | // out a data structure as a JSON string to a file using the commented 40 | // code below. The corresponding decoding functions can be found in 41 | // common_reduce.go. 42 | // 43 | // enc := json.NewEncoder(file) 44 | // for _, kv := ... { 45 | // err := enc.Encode(&kv) 46 | // 47 | // Remember to close the file after you have written all the values! 48 | 49 | 50 | //setp 1 read file 51 | contents, err := ioutil.ReadFile(inFile) 52 | if err != nil { 53 | log.Fatal("do map error for inFile ",err) 54 | } 55 | //setp 2 call user user-map method ,to get kv 56 | kvResult := mapF(inFile, string(contents)) 57 | 58 | /** 59 | * setp 3 use key of kv generator nReduce file ,partition 60 | * a. create tmpFiles 61 | * b. create encoder for tmpFile to write contents 62 | * c. partition by key, then write tmpFile 63 | */ 64 | 65 | var tmpFiles [] *os.File = make([] *os.File, nReduce) 66 | var encoders [] *json.Encoder = make([] *json.Encoder, nReduce) 67 | 68 | for i := 0; i < nReduce; i++ { 69 | tmpFileName := reduceName(jobName,mapTaskNumber,i) 70 | tmpFiles[i],err = os.Create(tmpFileName) 71 | if err!=nil { 72 | log.Fatal(err) 73 | } 74 | 75 | defer tmpFiles[i].Close() 76 | encoders[i] = json.NewEncoder(tmpFiles[i]) 77 | if err!=nil { 78 | log.Fatal(err) 79 | } 80 | } 81 | 82 | for _ , kv := range kvResult { 83 | hashKey := int(ihash(kv.Key)) % nReduce 84 | err := encoders[hashKey].Encode(&kv) 85 | if err!=nil { 86 | log.Fatal("do map encoders ",err) 87 | } 88 | } 89 | 90 | } 91 | 92 | func ihash(s string) uint32 { 93 | h := fnv.New32a() 94 | h.Write([]byte(s)) 95 | return h.Sum32() 96 | } 97 | -------------------------------------------------------------------------------- /src/mapreduce/common_reduce.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "os" 5 | "log" 6 | "encoding/json" 7 | "sort" 8 | ) 9 | 10 | // doReduce does the job of a reduce worker: it reads the intermediate 11 | // key/value pairs (produced by the map phase) for this task, sorts the 12 | // intermediate key/value pairs by key, calls the user-defined reduce function 13 | // (reduceF) for each key, and writes the output to disk. 14 | func doReduce( 15 | jobName string, // the name of the whole MapReduce job 16 | reduceTaskNumber int, // which reduce task this is 17 | nMap int, // the number of map tasks that were run ("M" in the paper) 18 | reduceF func(key string, values []string) string, 19 | ) { 20 | 21 | // file.Close() 22 | 23 | //setp 1,read map generator file ,same key merge put map[string][]string 24 | 25 | kvs := make(map[string][]string) 26 | 27 | for i := 0; i < nMap; i++ { 28 | fileName := reduceName(jobName, i, reduceTaskNumber) 29 | file, err := os.Open(fileName) 30 | if err != nil { 31 | log.Fatal("doReduce1: ", err) 32 | } 33 | 34 | dec := json.NewDecoder(file) 35 | 36 | for { 37 | var kv KeyValue 38 | err = dec.Decode(&kv) 39 | if err != nil { 40 | break 41 | } 42 | 43 | _, ok := kvs[kv.Key] 44 | if !ok { 45 | kvs[kv.Key] = []string{} 46 | } 47 | kvs[kv.Key] = append(kvs[kv.Key], kv.Value) 48 | } 49 | file.Close() 50 | } 51 | 52 | var keys []string 53 | 54 | for k := range kvs { 55 | keys = append(keys, k) 56 | } 57 | 58 | 59 | //setp 2 sort by keys 60 | sort.Strings(keys) 61 | 62 | //setp 3 create result file 63 | p := mergeName(jobName, reduceTaskNumber) 64 | file, err := os.Create(p) 65 | if err != nil { 66 | log.Fatal("doReduce2: ceate ", err) 67 | } 68 | enc := json.NewEncoder(file) 69 | 70 | //setp 4 call user reduce each key of kvs 71 | for _, k := range keys { 72 | res := reduceF(k, kvs[k]) 73 | enc.Encode(KeyValue{k, res}) 74 | } 75 | 76 | file.Close() 77 | } 78 | -------------------------------------------------------------------------------- /src/mapreduce/common_rpc.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | "net/rpc" 6 | ) 7 | 8 | // What follows are RPC types and methods. 9 | // Field names must start with capital letters, otherwise RPC will break. 10 | 11 | // DoTaskArgs holds the arguments that are passed to a worker when a job is 12 | // scheduled on it. 13 | type DoTaskArgs struct { 14 | JobName string 15 | File string // the file to process 16 | Phase jobPhase // are we in mapPhase or reducePhase? 17 | TaskNumber int // this task's index in the current phase 18 | 19 | // NumOtherPhase is the total number of tasks in other phase; mappers 20 | // need this to compute the number of output bins, and reducers needs 21 | // this to know how many input files to collect. 22 | NumOtherPhase int 23 | } 24 | 25 | // ShutdownReply is the response to a WorkerShutdown. 26 | // It holds the number of tasks this worker has processed since it was started. 27 | type ShutdownReply struct { 28 | Ntasks int 29 | } 30 | 31 | // RegisterArgs is the argument passed when a worker registers with the master. 32 | type RegisterArgs struct { 33 | Worker string 34 | } 35 | 36 | // call() sends an RPC to the rpcname handler on server srv 37 | // with arguments args, waits for the reply, and leaves the 38 | // reply in reply. the reply argument should be the address 39 | // of a reply structure. 40 | // 41 | // call() returns true if the server responded, and false 42 | // if call() was not able to contact the server. in particular, 43 | // reply's contents are valid if and only if call() returned true. 44 | // 45 | // you should assume that call() will time out and return an 46 | // error after a while if it doesn't get a reply from the server. 47 | // 48 | // please use call() to send all RPCs, in master.go, mapreduce.go, 49 | // and worker.go. please don't change this function. 50 | // 51 | func call(srv string, rpcname string, 52 | args interface{}, reply interface{}) bool { 53 | c, errx := rpc.Dial("unix", srv) 54 | if errx != nil { 55 | return false 56 | } 57 | defer c.Close() 58 | 59 | err := c.Call(rpcname, args, reply) 60 | if err == nil { 61 | return true 62 | } 63 | 64 | fmt.Println(err) 65 | return false 66 | } 67 | -------------------------------------------------------------------------------- /src/mapreduce/master.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | "sync" 7 | ) 8 | 9 | // Master holds all the state that the master needs to keep track of. Of 10 | // particular importance is registerChannel, the channel that notifies the 11 | // master of workers that have gone idle and are in need of new work. 12 | type Master struct { 13 | sync.Mutex 14 | 15 | address string 16 | registerChannel chan string 17 | doneChannel chan bool 18 | workers []string // protected by the mutex 19 | 20 | // Per-task information 21 | jobName string // Name of currently executing job 22 | files []string // Input files 23 | nReduce int // Number of reduce partitions 24 | 25 | shutdown chan struct{} 26 | l net.Listener 27 | stats []int 28 | } 29 | 30 | // Register is an RPC method that is called by workers after they have started 31 | // up to report that they are ready to receive tasks. 32 | func (mr *Master) Register(args *RegisterArgs, _ *struct{}) error { 33 | mr.Lock() 34 | defer mr.Unlock() 35 | debug("Register: worker %s\n", args.Worker) 36 | mr.workers = append(mr.workers, args.Worker) 37 | go func() { 38 | mr.registerChannel <- args.Worker 39 | }() 40 | return nil 41 | } 42 | 43 | // newMaster initializes a new Map/Reduce Master 44 | func newMaster(master string) (mr *Master) { 45 | mr = new(Master) 46 | mr.address = master 47 | mr.shutdown = make(chan struct{}) 48 | mr.registerChannel = make(chan string) 49 | mr.doneChannel = make(chan bool) 50 | return 51 | } 52 | 53 | // Sequential runs map and reduce tasks sequentially, waiting for each task to 54 | // complete before scheduling the next. 55 | func Sequential(jobName string, files []string, nreduce int, 56 | mapF func(string, string) []KeyValue, 57 | reduceF func(string, []string) string, 58 | ) (mr *Master) { 59 | mr = newMaster("master") 60 | go mr.run(jobName, files, nreduce, func(phase jobPhase) { 61 | switch phase { 62 | case mapPhase: 63 | for i, f := range mr.files { 64 | doMap(mr.jobName, i, f, mr.nReduce, mapF) 65 | } 66 | case reducePhase: 67 | for i := 0; i < mr.nReduce; i++ { 68 | doReduce(mr.jobName, i, len(mr.files), reduceF) 69 | } 70 | } 71 | }, func() { 72 | mr.stats = []int{len(files) + nreduce} 73 | }) 74 | return 75 | } 76 | 77 | // Distributed schedules map and reduce tasks on workers that register with the 78 | // master over RPC. 79 | func Distributed(jobName string, files []string, nreduce int, master string) (mr *Master) { 80 | mr = newMaster(master) 81 | mr.startRPCServer() 82 | go mr.run(jobName, files, nreduce, mr.schedule, func() { 83 | mr.stats = mr.killWorkers() 84 | mr.stopRPCServer() 85 | }) 86 | return 87 | } 88 | 89 | // run executes a mapreduce job on the given number of mappers and reducers. 90 | // 91 | // First, it divides up the input file among the given number of mappers, and 92 | // schedules each task on workers as they become available. Each map task bins 93 | // its output in a number of bins equal to the given number of reduce tasks. 94 | // Once all the mappers have finished, workers are assigned reduce tasks. 95 | // 96 | // When all tasks have been completed, the reducer outputs are merged, 97 | // statistics are collected, and the master is shut down. 98 | // 99 | // Note that this implementation assumes a shared file system. 100 | func (mr *Master) run(jobName string, files []string, nreduce int, 101 | schedule func(phase jobPhase), 102 | finish func(), 103 | ) { 104 | mr.jobName = jobName 105 | mr.files = files 106 | mr.nReduce = nreduce 107 | 108 | fmt.Printf("%s: Starting Map/Reduce task %s\n", mr.address, mr.jobName) 109 | 110 | schedule(mapPhase) 111 | schedule(reducePhase) 112 | finish() 113 | mr.merge() 114 | 115 | fmt.Printf("%s: Map/Reduce task completed\n", mr.address) 116 | 117 | mr.doneChannel <- true 118 | } 119 | 120 | // Wait blocks until the currently scheduled work has completed. 121 | // This happens when all tasks have scheduled and completed, the final output 122 | // have been computed, and all workers have been shut down. 123 | func (mr *Master) Wait() { 124 | <-mr.doneChannel 125 | } 126 | 127 | // killWorkers cleans up all workers by sending each one a Shutdown RPC. 128 | // It also collects and returns the number of tasks each worker has performed. 129 | func (mr *Master) killWorkers() []int { 130 | mr.Lock() 131 | defer mr.Unlock() 132 | ntasks := make([]int, 0, len(mr.workers)) 133 | for _, w := range mr.workers { 134 | debug("Master: shutdown worker %s\n", w) 135 | var reply ShutdownReply 136 | ok := call(w, "Worker.Shutdown", new(struct{}), &reply) 137 | if ok == false { 138 | fmt.Printf("Master: RPC %s shutdown error\n", w) 139 | } else { 140 | ntasks = append(ntasks, reply.Ntasks) 141 | } 142 | } 143 | return ntasks 144 | } 145 | -------------------------------------------------------------------------------- /src/mapreduce/master_rpc.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "net" 7 | "net/rpc" 8 | "os" 9 | ) 10 | 11 | // Shutdown is an RPC method that shuts down the Master's RPC server. 12 | func (mr *Master) Shutdown(_, _ *struct{}) error { 13 | debug("Shutdown: registration server\n") 14 | close(mr.shutdown) 15 | mr.l.Close() // causes the Accept to fail 16 | return nil 17 | } 18 | 19 | // startRPCServer staarts the Master's RPC server. It continues accepting RPC 20 | // calls (Register in particular) for as long as the worker is alive. 21 | func (mr *Master) startRPCServer() { 22 | rpcs := rpc.NewServer() 23 | rpcs.Register(mr) 24 | os.Remove(mr.address) // only needed for "unix" 25 | l, e := net.Listen("unix", mr.address) 26 | if e != nil { 27 | log.Fatal("RegstrationServer", mr.address, " error: ", e) 28 | } 29 | mr.l = l 30 | 31 | // now that we are listening on the master address, can fork off 32 | // accepting connections to another thread. 33 | go func() { 34 | loop: 35 | for { 36 | select { 37 | case <-mr.shutdown: 38 | break loop 39 | default: 40 | } 41 | conn, err := mr.l.Accept() 42 | if err == nil { 43 | go func() { 44 | rpcs.ServeConn(conn) 45 | conn.Close() 46 | }() 47 | } else { 48 | debug("RegistrationServer: accept error", err) 49 | break 50 | } 51 | } 52 | debug("RegistrationServer: done\n") 53 | }() 54 | } 55 | 56 | // stopRPCServer stops the master RPC server. 57 | // This must be done through an RPC to avoid race conditions between the RPC 58 | // server thread and the current thread. 59 | func (mr *Master) stopRPCServer() { 60 | var reply ShutdownReply 61 | ok := call(mr.address, "Master.Shutdown", new(struct{}), &reply) 62 | if ok == false { 63 | fmt.Printf("Cleanup: RPC %s error\n", mr.address) 64 | } 65 | debug("cleanupRegistration: done\n") 66 | } 67 | -------------------------------------------------------------------------------- /src/mapreduce/master_splitmerge.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "bufio" 5 | "encoding/json" 6 | "fmt" 7 | "log" 8 | "os" 9 | "sort" 10 | ) 11 | 12 | // merge combines the results of the many reduce jobs into a single output file 13 | // XXX use merge sort 14 | func (mr *Master) merge() { 15 | debug("Merge phase") 16 | kvs := make(map[string]string) 17 | for i := 0; i < mr.nReduce; i++ { 18 | p := mergeName(mr.jobName, i) 19 | fmt.Printf("Merge: read %s\n", p) 20 | file, err := os.Open(p) 21 | if err != nil { 22 | log.Fatal("Merge: ", err) 23 | } 24 | dec := json.NewDecoder(file) 25 | for { 26 | var kv KeyValue 27 | err = dec.Decode(&kv) 28 | if err != nil { 29 | break 30 | } 31 | kvs[kv.Key] = kv.Value 32 | } 33 | file.Close() 34 | } 35 | var keys []string 36 | for k := range kvs { 37 | keys = append(keys, k) 38 | } 39 | sort.Strings(keys) 40 | 41 | file, err := os.Create("mrtmp." + mr.jobName) 42 | if err != nil { 43 | log.Fatal("Merge: create ", err) 44 | } 45 | w := bufio.NewWriter(file) 46 | for _, k := range keys { 47 | fmt.Fprintf(w, "%s: %s\n", k, kvs[k]) 48 | } 49 | w.Flush() 50 | file.Close() 51 | } 52 | 53 | // removeFile is a simple wrapper around os.Remove that logs errors. 54 | func removeFile(n string) { 55 | err := os.Remove(n) 56 | if err != nil { 57 | log.Fatal("CleanupFiles ", err) 58 | } 59 | } 60 | 61 | // CleanupFiles removes all intermediate files produced by running mapreduce. 62 | func (mr *Master) CleanupFiles() { 63 | for i := range mr.files { 64 | for j := 0; j < mr.nReduce; j++ { 65 | removeFile(reduceName(mr.jobName, i, j)) 66 | } 67 | } 68 | for i := 0; i < mr.nReduce; i++ { 69 | removeFile(mergeName(mr.jobName, i)) 70 | } 71 | removeFile("mrtmp." + mr.jobName) 72 | } 73 | -------------------------------------------------------------------------------- /src/mapreduce/readme.go: -------------------------------------------------------------------------------- 1 | // Package mapreduce provides a simple mapreduce library with a sequential 2 | // implementation. Applications should normally call Distributed() [located in 3 | // master.go] to start a job, but may instead call Sequential() [also in 4 | // master.go] to get a sequential execution for debugging purposes. 5 | // 6 | // The flow of the mapreduce implementation is as follows: 7 | // 8 | // 1. The application provides a number of input files, a map function, a 9 | // reduce function, and the number of reduce tasks (nReduce). 10 | // 2. A master is created with this knowledge. It spins up an RPC server (see 11 | // master_rpc.go), and waits for workers to register (using the RPC call 12 | // Register() [defined in master.go]). As tasks become available (in steps 13 | // 4 and 5), schedule() [schedule.go] decides how to assign those tasks to 14 | // workers, and how to handle worker failures. 15 | // 3. The master considers each input file one map tasks, and makes a call to 16 | // doMap() [common_map.go] at least once for each task. It does so either 17 | // directly (when using Sequential()) or by issuing the DoJob RPC on a 18 | // worker [worker.go]. Each call to doMap() reads the appropriate file, 19 | // calls the map function on that file's contents, and produces nReduce 20 | // files for each map file. Thus, there will be #files x nReduce files 21 | // after all map tasks are done: 22 | // 23 | // f0-0, ..., f0-0, f0-, ..., 24 | // f<#files-1>-0, ... f<#files-1>-. 25 | // 26 | // 4. The master next makes a call to doReduce() [common_reduce.go] at least 27 | // once for each reduce task. As for doMap(), it does so either directly or 28 | // through a worker. doReduce() collects nReduce reduce files from each 29 | // map (f-*-), and runs the reduce function on those files. This 30 | // produces nReduce result files. 31 | // 5. The master calls mr.merge() [master_splitmerge.go], which merges all 32 | // the nReduce files produced by the previous step into a single output. 33 | // 6. The master sends a Shutdown RPC to each of its workers, and then shuts 34 | // down its own RPC server. 35 | // 36 | // TODO: 37 | // You will have to write/modify doMap, doReduce, and schedule yourself. These 38 | // are located in common_map.go, common_reduce.go, and schedule.go 39 | // respectively. You will also have to write the map and reduce functions in 40 | // ../main/wc.go. 41 | // 42 | // You should not need to modify any other files, but reading them might be 43 | // useful in order to understand how the other methods fit into the overall 44 | // architecture of the system. 45 | package mapreduce 46 | -------------------------------------------------------------------------------- /src/mapreduce/schedule.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import "fmt" 4 | 5 | // schedule starts and waits for all tasks in the given phase (Map or Reduce). 6 | func (mr *Master) schedule(phase jobPhase) { 7 | var ntasks int 8 | var nios int // number of inputs (for reduce) or outputs (for map) 9 | switch phase { 10 | case mapPhase: 11 | ntasks = len(mr.files) 12 | nios = mr.nReduce 13 | case reducePhase: 14 | ntasks = mr.nReduce 15 | nios = len(mr.files) 16 | } 17 | 18 | fmt.Printf("Schedule: %v %v tasks (%d I/Os)\n", ntasks, phase, nios) 19 | 20 | //use go routing,worker rpc executor task, 21 | done := make(chan bool) 22 | for i := 0; i < ntasks; i++ { 23 | go func(number int) { 24 | 25 | args := DoTaskArgs{mr.jobName, mr.files[number], phase, number, nios} 26 | var worker string 27 | reply := new(struct{}) 28 | ok := false 29 | for ok != true { 30 | worker = <- mr.registerChannel 31 | ok = call(worker, "Worker.DoTask", args, reply) 32 | } 33 | done <- true 34 | mr.registerChannel <- worker 35 | }(i) 36 | 37 | } 38 | 39 | //wait for all task is complate 40 | for i := 0; i< ntasks; i++ { 41 | <- done 42 | } 43 | fmt.Printf("Schedule: %v phase done\n", phase) 44 | } 45 | -------------------------------------------------------------------------------- /src/mapreduce/test_test.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | "time" 7 | "bufio" 8 | "log" 9 | "os" 10 | "sort" 11 | "strconv" 12 | "strings" 13 | ) 14 | 15 | const ( 16 | nNumber = 100000 17 | nMap = 100 18 | nReduce = 50 19 | ) 20 | 21 | // Create input file with N numbers 22 | // Check if we have N numbers in output file 23 | 24 | // Split in words 25 | func MapFunc(file string, value string) (res []KeyValue) { 26 | debug("Map %v\n", value) 27 | words := strings.Fields(value) 28 | for _, w := range words { 29 | kv := KeyValue{w, ""} 30 | res = append(res, kv) 31 | } 32 | return 33 | } 34 | 35 | // Just return key 36 | func ReduceFunc(key string, values []string) string { 37 | for _, e := range values { 38 | debug("Reduce %s %v\n", key, e) 39 | } 40 | return "" 41 | } 42 | 43 | // Checks input file against output file: each input number should show up 44 | // in the output file in string sorted order 45 | func check(t *testing.T, files []string) { 46 | output, err := os.Open("mrtmp.test") 47 | if err != nil { 48 | log.Fatal("check: ", err) 49 | } 50 | defer output.Close() 51 | 52 | var lines []string 53 | for _, f := range files { 54 | input, err := os.Open(f) 55 | if err != nil { 56 | log.Fatal("check: ", err) 57 | } 58 | defer input.Close() 59 | inputScanner := bufio.NewScanner(input) 60 | for inputScanner.Scan() { 61 | lines = append(lines, inputScanner.Text()) 62 | } 63 | } 64 | 65 | sort.Strings(lines) 66 | 67 | outputScanner := bufio.NewScanner(output) 68 | i := 0 69 | for outputScanner.Scan() { 70 | var v1 int 71 | var v2 int 72 | text := outputScanner.Text() 73 | n, err := fmt.Sscanf(lines[i], "%d", &v1) 74 | if n == 1 && err == nil { 75 | n, err = fmt.Sscanf(text, "%d", &v2) 76 | } 77 | if err != nil || v1 != v2 { 78 | t.Fatalf("line %d: %d != %d err %v\n", i, v1, v2, err) 79 | } 80 | i++ 81 | } 82 | if i != nNumber { 83 | t.Fatalf("Expected %d lines in output\n", nNumber) 84 | } 85 | } 86 | 87 | // Workers report back how many RPCs they have processed in the Shutdown reply. 88 | // Check that they processed at least 1 RPC. 89 | func checkWorker(t *testing.T, l []int) { 90 | for _, tasks := range l { 91 | if tasks == 0 { 92 | t.Fatalf("Some worker didn't do any work\n") 93 | } 94 | } 95 | } 96 | 97 | // Make input file 98 | func makeInputs(num int) []string { 99 | var names []string 100 | var i = 0 101 | for f := 0; f < num; f++ { 102 | names = append(names, fmt.Sprintf("824-mrinput-%d.txt", f)) 103 | file, err := os.Create(names[f]) 104 | if err != nil { 105 | log.Fatal("mkInput: ", err) 106 | } 107 | w := bufio.NewWriter(file) 108 | for i < (f+1)*(nNumber/num) { 109 | fmt.Fprintf(w, "%d\n", i) 110 | i++ 111 | } 112 | w.Flush() 113 | file.Close() 114 | } 115 | return names 116 | } 117 | 118 | 119 | 120 | 121 | // Cook up a unique-ish UNIX-domain socket name 122 | // in /var/tmp. can't use current directory since 123 | // AFS doesn't support UNIX-domain sockets. 124 | func port(suffix string) string { 125 | s := "/var/tmp/824-" 126 | s += strconv.Itoa(os.Getuid()) + "/" 127 | os.Mkdir(s, 0777) 128 | s += "mr" 129 | s += strconv.Itoa(os.Getpid()) + "-" 130 | s += suffix 131 | return s 132 | } 133 | 134 | func setup() *Master { 135 | files := makeInputs(nMap) 136 | master := port("master") 137 | mr := Distributed("test", files, nReduce, master) 138 | return mr 139 | } 140 | 141 | func cleanup(mr *Master) { 142 | mr.CleanupFiles() 143 | for _, f := range mr.files { 144 | removeFile(f) 145 | } 146 | } 147 | 148 | func TestSequentialSingle(t *testing.T) { 149 | mr := Sequential("test", makeInputs(1), 1, MapFunc, ReduceFunc) 150 | mr.Wait() 151 | check(t, mr.files) 152 | checkWorker(t, mr.stats) 153 | cleanup(mr) 154 | } 155 | 156 | func TestSequentialMany(t *testing.T) { 157 | mr := Sequential("test", makeInputs(5), 3, MapFunc, ReduceFunc) 158 | mr.Wait() 159 | check(t, mr.files) 160 | checkWorker(t, mr.stats) 161 | cleanup(mr) 162 | } 163 | 164 | func TestBasic(t *testing.T) { 165 | mr := setup() 166 | for i := 0; i < 2; i++ { 167 | go RunWorker(mr.address, port("worker"+strconv.Itoa(i)), 168 | MapFunc, ReduceFunc, -1) 169 | } 170 | mr.Wait() 171 | check(t, mr.files) 172 | checkWorker(t, mr.stats) 173 | cleanup(mr) 174 | } 175 | 176 | func TestOneFailure(t *testing.T) { 177 | mr := setup() 178 | // Start 2 workers that fail after 10 tasks 179 | go RunWorker(mr.address, port("worker"+strconv.Itoa(0)), 180 | MapFunc, ReduceFunc, 10) 181 | go RunWorker(mr.address, port("worker"+strconv.Itoa(1)), 182 | MapFunc, ReduceFunc, -1) 183 | mr.Wait() 184 | check(t, mr.files) 185 | checkWorker(t, mr.stats) 186 | cleanup(mr) 187 | } 188 | 189 | func TestManyFailures(t *testing.T) { 190 | mr := setup() 191 | i := 0 192 | done := false 193 | for !done { 194 | select { 195 | case done = <-mr.doneChannel: 196 | check(t, mr.files) 197 | cleanup(mr) 198 | break 199 | default: 200 | // Start 2 workers each sec. The workers fail after 10 tasks 201 | w := port("worker" + strconv.Itoa(i)) 202 | go RunWorker(mr.address, w, MapFunc, ReduceFunc, 10) 203 | i++ 204 | w = port("worker" + strconv.Itoa(i)) 205 | go RunWorker(mr.address, w, MapFunc, ReduceFunc, 10) 206 | i++ 207 | time.Sleep(1 * time.Second) 208 | } 209 | } 210 | } 211 | -------------------------------------------------------------------------------- /src/mapreduce/worker.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "net" 7 | "net/rpc" 8 | "os" 9 | "sync" 10 | ) 11 | 12 | // Worker holds the state for a server waiting for DoTask or Shutdown RPCs 13 | type Worker struct { 14 | sync.Mutex 15 | 16 | name string 17 | Map func(string, string) []KeyValue 18 | Reduce func(string, []string) string 19 | nRPC int // protected by mutex 20 | nTasks int // protected by mutex 21 | l net.Listener 22 | } 23 | 24 | // DoTask is called by the master when a new task is being scheduled on this 25 | // worker. 26 | func (wk *Worker) DoTask(arg *DoTaskArgs, _ *struct{}) error { 27 | fmt.Printf("%s: given %v task #%d on file %s (nios: %d)\n", 28 | wk.name, arg.Phase, arg.TaskNumber, arg.File, arg.NumOtherPhase) 29 | 30 | switch arg.Phase { 31 | case mapPhase: 32 | doMap(arg.JobName, arg.TaskNumber, arg.File, arg.NumOtherPhase, wk.Map) 33 | case reducePhase: 34 | doReduce(arg.JobName, arg.TaskNumber, arg.NumOtherPhase, wk.Reduce) 35 | } 36 | 37 | fmt.Printf("%s: %v task #%d done\n", wk.name, arg.Phase, arg.TaskNumber) 38 | return nil 39 | } 40 | 41 | // Shutdown is called by the master when all work has been completed. 42 | // We should respond with the number of tasks we have processed. 43 | func (wk *Worker) Shutdown(_ *struct{}, res *ShutdownReply) error { 44 | debug("Shutdown %s\n", wk.name) 45 | wk.Lock() 46 | defer wk.Unlock() 47 | res.Ntasks = wk.nTasks 48 | wk.nRPC = 1 49 | wk.nTasks-- // Don't count the shutdown RPC 50 | return nil 51 | } 52 | 53 | // Tell the master we exist and ready to work 54 | func (wk *Worker) register(master string) { 55 | args := new(RegisterArgs) 56 | args.Worker = wk.name 57 | ok := call(master, "Master.Register", args, new(struct{})) 58 | if ok == false { 59 | fmt.Printf("Register: RPC %s register error\n", master) 60 | } 61 | } 62 | 63 | // RunWorker sets up a connection with the master, registers its address, and 64 | // waits for tasks to be scheduled. 65 | func RunWorker(MasterAddress string, me string, 66 | MapFunc func(string, string) []KeyValue, 67 | ReduceFunc func(string, []string) string, 68 | nRPC int, 69 | ) { 70 | debug("RunWorker %s\n", me) 71 | wk := new(Worker) 72 | wk.name = me 73 | wk.Map = MapFunc 74 | wk.Reduce = ReduceFunc 75 | wk.nRPC = nRPC 76 | rpcs := rpc.NewServer() 77 | rpcs.Register(wk) 78 | os.Remove(me) // only needed for "unix" 79 | l, e := net.Listen("unix", me) 80 | if e != nil { 81 | log.Fatal("RunWorker: worker ", me, " error: ", e) 82 | } 83 | wk.l = l 84 | wk.register(MasterAddress) 85 | 86 | // DON'T MODIFY CODE BELOW 87 | for { 88 | wk.Lock() 89 | if wk.nRPC == 0 { 90 | wk.Unlock() 91 | break 92 | } 93 | wk.Unlock() 94 | conn, err := wk.l.Accept() 95 | if err == nil { 96 | wk.Lock() 97 | wk.nRPC-- 98 | wk.Unlock() 99 | go rpcs.ServeConn(conn) 100 | wk.Lock() 101 | wk.nTasks++ 102 | wk.Unlock() 103 | } else { 104 | break 105 | } 106 | } 107 | wk.l.Close() 108 | debug("RunWorker %s exit\n", me) 109 | } 110 | -------------------------------------------------------------------------------- /src/paxos/README.md: -------------------------------------------------------------------------------- 1 | # 老司机带你用 Go 语言实现 Paxos 算法 2 | 3 |   在理论计算机科学中,CAP定理(CAP theorem),又被称作布鲁尔定理(Brewer's theorem),它指出对于一个分布式计算系统来说,不可能同时满足以下三点: 4 | 5 | 1. 一致性(Consistence),等同于所有节点访问同一份最新的数据副本; 6 | 2. 可用性(Availability),每次请求都能获取到非错的响应——但是不保证获取的数据为最新数据; 7 | 3. 分区容错性(Network partitioning),以实际效果而言,分区相当于对通信的时限要求。系统如果不能在时限内达成数据一致性,就意味着发生了分区的情况,必须就当前操作在 C 和 A 之间做出选择。 8 | 9 | ## 上车 10 | 11 |   今天说说一致性,分布式系统中的节点通信存在两种模型:共享内存(Shared memory)和消息传递(Messages passing)。 12 | 13 |   基于消息传递通信模型的分布式系统,不可避免的会发生以下错误:进程可能会慢、被杀死或者重启,消息可能会延迟、丢失、重复。在基础 Paxos 场景中,先不考虑可能出现消息篡改即拜占庭错误的情况。Paxos 算法解决的问题是在一个可能发生上述异常的分布式系统中如何就某个值达成一致,保证不论发生以上任何异常,都不会破坏决议的一致性。一个典型的场景是,在一个分布式数据库系统中,如果各节点的初始状态一致,每个节点都执行相同的操作序列,那么他们最后能得到一个一致的状态。为保证每个节点执行相同的命令序列,需要在每一条指令上执行一个“一致性算法”以保证每个节点看到的指令一致。一个通用的一致性算法可以应用在许多场景中,是分布式计算中的重要问题,因此从 20 世纪 80 年代起对于一致性算法的研究就没有停止过。 14 | 15 | ## 发车 (Paxos 算法) 16 | 17 |   Paxos 算法就是通过两个阶段确定一个决议: 18 | 19 | - Phase1:确定谁的编号最高,只有编号最高者才有权利提交 Proposal(提议:给定的具体值); 20 | - Phase2:编号最高者提交 Proposal,如果没有其他节点提出更高编号的 Proposal,则该提案会被顺利通过,否则整个过程就会重来。 21 | 22 |   结论就是这个结论,至于整个过程的推导,就不在这里展开细说了。但是有一点需要注意的是,在过程第一阶段,可能会出现活锁。你编号高,我比你更高,反复如此,算法永远无法结束。可使用一个“Leader”来解决问题,这个 Leader 并非我们刻意去选出来一个,而是自然形成出来的。同样再次也不展开讨论了,本篇主要是以 Code 为主的哈! 23 | 24 | ### Phase1 25 | 26 | ``` go 27 | func (px *Paxos)Prepare(args *PrepareArgs, reply *PrepareReply) error { 28 | px.mu.Lock() 29 | defer px.mu.Unlock() 30 | round, exist := px.rounds[args.Seq] 31 | if !exist { 32 | //new seq of commit,so need new 33 | px.rounds[args.Seq] = px.newInstance() 34 | round, _ = px.rounds[args.Seq] 35 | reply.Err = OK 36 | }else { 37 | if args.PNum > round.proposeNumber { 38 | reply.Err = OK 39 | }else { 40 | reply.Err = Reject 41 | } 42 | } 43 | if reply.Err == OK { 44 | reply.AcceptPnum = round.acceptorNumber 45 | reply.AcceptValue = round.acceptValue 46 | px.rounds[args.Seq].proposeNumber = args.PNum 47 | }else { 48 | //reject 49 | } 50 | return nil 51 | } 52 | ``` 53 | 54 |   在 Prepare 阶段,主要是通过 RPC 调用,询问每一台机器,当前的这个提议能不能通过,判断的条件就是,当前提交的编号大于之前的其他机器 Prepare 的编号,代码 `if args.PNum > round.proposeNumber` 的判断。还有一个就是,如果之前一台机器都没有通过,即使当前是第一个提交 Prepare 的机器,那就直接同意通过了。代码片段: 55 | 56 | ``` go 57 | round, exist := px.rounds[args.Seq] 58 | if !exist { 59 | //new seq of commit,so need new 60 | px.rounds[args.Seq] = px.newInstance() 61 | round, _ = px.rounds[args.Seq] 62 | reply.Err = OK 63 | } 64 | ``` 65 | 66 |   在完成逻辑判断过后,如果本次提议是通过的,那么还需返回给提议者,已经通过提议和确定的值。代码片段: 67 | 68 | ``` go 69 | if reply.Err == OK { 70 | reply.AcceptPnum = round.acceptorNumber 71 | reply.AcceptValue = round.acceptValue 72 | px.rounds[args.Seq].proposeNumber = args.PNum 73 | } 74 | ``` 75 | 76 | ### Phase2 77 | 78 | ``` go 79 | func (px Paxos)Accept(args *AcceptArgs, reply *AcceptReply) error { 80 | px.mu.Lock() 81 | defer px.mu.Unlock() 82 | round, exist := px.rounds[args.Seq] 83 | if !exist { 84 | px.rounds[args.Seq] = px.newInstance() 85 | reply.Err = OK 86 | }else { 87 | if args.PNum >= round.proposeNumber { 88 | reply.Err = OK 89 | }else { 90 | reply.Err = Reject 91 | } 92 | } 93 | if reply.Err == OK { 94 | px.rounds[args.Seq].acceptorNumber = args.PNum 95 | px.rounds[args.Seq].proposeNumber = args.PNum 96 | px.rounds[args.Seq].acceptValue = args.Value 97 | }else { 98 | //reject 99 | } 100 | return nil 101 | } 102 | ``` 103 | 104 |   在 Accept 阶段基本和 Prepare 阶段如出一辙咯。判断当前的提议是否存在,如果不纯在表明是新的,那就直接返回 OK 咯! 105 | 106 | ``` go 107 | round, exist := px.rounds[args.Seq] 108 | if !exist { 109 | px.rounds[args.Seq] = px.newInstance() 110 | reply.Err = OK 111 | } 112 | ``` 113 | 114 |   然后同样判断提议号是否大于等于当前的提议编号,如果是,那同样也返回 OK 咯,否者就拒绝。 115 | 116 | ``` go 117 | if args.PNum >= round.proposeNumber { 118 | reply.Err = OK 119 | }else { 120 | reply.Err = Reject 121 | } 122 | ``` 123 | 124 |   与此重要的一点就是,如果提议通过,那么就需设置当轮的提议编号和提议的值。 125 | 126 | ``` go 127 | if reply.Err == OK { 128 | px.rounds[args.Seq].acceptorNumber = args.PNum 129 | px.rounds[args.Seq].proposeNumber = args.PNum 130 | px.rounds[args.Seq].acceptValue = args.Value 131 | } 132 | ``` 133 | 134 |   整个使用过程中使用了 Map 和数组来存储一些辅助信息,Map 主要存储的是,每一轮的投票被确定的结果,Key 表示每一轮的投票编号,Round 表示存储已经接受的值。Completes 数组主要是用于存储在使用的过程中,已经确定完成了的最小的编号。 135 | 136 | ``` go 137 | rounds map[int]*Round //cache each round paxos result key is seq value is value 138 | completes [] int //maintain peer min seq of completed 139 | 140 | func (px *Paxos)Decide(args *DecideArgs, reply *DecideReply) error { 141 | px.mu.Lock() 142 | defer px.mu.Unlock() 143 | _, exist := px.rounds[args.Seq] 144 | if !exist { 145 | px.rounds[args.Seq] = px.newInstance() 146 | } 147 | px.rounds[args.Seq].acceptorNumber = args.PNum 148 | px.rounds[args.Seq].acceptValue = args.Value 149 | px.rounds[args.Seq].proposeNumber = args.PNum 150 | px.rounds[args.Seq].state = Decided 151 | px.completes[args.Me] = args.Done 152 | return nil 153 | } 154 | ``` 155 | 156 |   同时 Decide 方法,用于提议者来确定某个值,这个映射到分布式里面的状态机的应用。 157 | 158 | ![日志的复制](../../img/m.png) 159 | 160 |   客户段通过提交指令给服务器,服务器通过 Paxos 算法是现在多台机器上面,所有的服务器按照顺序执行相同的指令,然后状态机对指令进行执行最后每台机器的结果都是一样的。 161 | 162 | ![日志的修复](../../img/select.png) 163 | 164 | ## 到站 165 | 166 |   在分布式环境之中,网络故障宕机属于正常现象。如果一台机器宕机了,过了一段时间又恢复了,那么他宕机的时间之中,怎么将之前的指令恢复回来?当他提交一个 jmp 指令的时候,索引 1、2 都是已经确定了的指令,所以可以直接从索引 3 开始,当他提交 Propser(jmp)的时候,会收到 s1、s3 的返回值(cmp),根据 Paxos 算法后者认同前者的原则,所以他会在 Phase2 阶段提交一个值为 cmp accept 的请求,最后索引为 3 的就变成了 cmp,如果说在这个阶段没有返回值,那么就选择客户端的返回值就可以了,最后就达成了一致。 167 | 168 | 源于 MIT,然后用于自己学习,[源码注释地址](https://github.com/happyer/distributed-computing)。 169 | -------------------------------------------------------------------------------- /src/raft/README.md: -------------------------------------------------------------------------------- 1 | # 老司机带你用 Go 语言实现 Raft 分布式一致性协议 2 | 3 |   随着大型网站的各种高并发访问、海量数据处理等场景越来越多,如何实现网站的高可用、易伸缩、可扩展、安全等目标就显得越来越重要。 4 | 5 |   为了解决这样一系列问题,大型网站的架构也在不断发展。提高大型网站的高可用架构,不得不提的就是分布式。任何一个分布式系统都无法同时满足 Consistency(一致性),Availability(可用性),Partition tolerance(分区容错性)这三个基本需求,最多只能满足其中两项。 但是,一个分布式系统无论在 CAP 三者之间如何权衡,都无法彻底放弃一致性(Consistency),如果真的放弃一致性,那么就说明这个系统中的数据根本不可信,数据也就没有意义,那么这个系统也就没有任何价值可言。所以,无论如何,分布式系统的一致性问题都需要重点关注。 6 | 7 |   Raft 适用于一个管理日志一致性的协议,相比于 Paxos 协议 Raft 更易于理解和去实现它。 8 | 9 | ## 上车 10 | 11 |   Raft 通过选举一个领导人,然后给予他全部的管理复制日志的责任来实现一致性。领导人从客户端接收日志条目,把日志条目复制到其他服务器上,并且当保证安全性的时候告诉其他的服务器应用日志条目到他们的状态机中。拥有一个领导人大大简化了对复制日志的管理。例如:领导人可以决定新的日志条目需要放在日志中的什么位置而不需要和其他服务器商议,并且数据都从领导人流向其他服务器。一个领导人可以宕机,可以和其他服务器失去连接,这时一个新的领导人会被选举出来。 12 | 13 |   Raft 把时间分割成任意长度的任期,任期用连续的整数标记。每一段任期从一次选举开始,一个或者多个候选人尝试成为领导者。如果一个候选人赢得选举,然后他就在接下来的任期内充当领导人的职责。在某些情况下,一次选举过程会造成选票的瓜分。在这种情况下,这一任期会以没有领导人结束;一个新的任期(和一次新的选举)会很快重新开始。Raft 保证了在一个给定的任期内,最多只有一个领导者。 14 | 15 |   要实现 Raft 协议,参见下图: 16 | 17 | ![状态机持有的变量](../../img/state.png) 18 | 19 | ![投票请求](../../img/rpc_vote.png) 20 | 21 | ![日志发送](../../img/appent_rpc.png) 22 | 23 | ![服务器的应用规则](../../img/rule.png) 24 | 25 |   Raft 协议将整个过程分为主要3个步骤: 26 | 27 | 1. 领导者:和其他一致性算法相比,Raft 使用一种更强的领导能力形式。比如,日志条目只从领导者发送给其他的服务器。这种方式简化了对复制日志的管理并且使得 Raft 算法更加易于理解。 28 | 29 | 2. 领导选举:Raft 算法使用一个随机计时器来选举领导者。这种方式只是在任何一致性算法都必须实现的心跳机制上增加了一点机制。在解决冲突的时候会更加简单快捷。 30 | 31 | 3. 关系调整:Raft 使用一种共同一致的方法来处理集群成员变换的问题,在这种方法中,两种不同的配置都要求的大多数机器会重叠。这就使得集群在成员变换的时候依然可以继续工作。 32 | 33 |   后面将通过这3个主要过程进行展开。 34 | 35 | ## 发车(领导的选举) 36 | 37 |   Raft 使用一种心跳机制来触发领导人选举。当服务器程序启动时,他们都是跟随者身份。如果一个跟随者在一段时间里没有接收到任何消息,也就是选举超时,然后他就会认为系统中没有可用的领导者然后开始进行选举以选出新的领导者。要开始一次选举过程,跟随者先要增加自己的当前任期号并且转换到候选人状态。然后他会并行的向集群中的其他服务器节点发送请求投票的 RPCs 来给自己投票。候选人的状态维持直到发生以下任何一个条件发生的时候,(a) 他自己赢得了这次的选举,(b) 其他的服务器成为领导者,(c) 一段时间之后没有任何一个获胜的人。当一个候选人从整个集群的大多数服务器节点获得了针对同一个任期号的选票,那么他就赢得了这次选举并成为领导人。每一个服务器最多会对一个任期号投出一张选票,按照先来先服务的原则。 38 | 39 |   Raft 使用投票的方式来阻止候选人赢得选举除非这个候选人包含了所有已经提交的日志条目。候选人为了赢得选举必须联系集群中的大部分节点,这意味着每一个已经提交的日志条目肯定在这些服务器节点中至少存在一个上面。如果候选人的日志至少和大多数的服务器节点一样新,那么他一定持有了所有已经提交的日志条目。请求投票 RPC 实现了这样的限制: RPC 中包含了候选人的日志信息,然后投票人会拒绝掉那些日志没有自己新的投票请求。 40 | 41 |   Raft 通过比较两份日志中最后一条日志条目的索引值和任期号定义谁的日志比较新。如果两份日志最后的条目的任期号不同,那么任期号大的日志更加新。如果两份日志最后的条目任期号相同,那么日志比较长的那个就更加新。 42 | 43 | ## 到站(日志复制) 44 | 45 |   一旦一个领导人被选举出来,他就开始为客户端提供服务。客户端的每一个请求都包含一条被复制状态机执行的指令。领导人把这条指令作为一条新的日志条目附加到日志中去,然后并行的发起附加条目 RPCs 给其他的服务器,让他们复制这条日志条目。当这条日志条目被安全的复制,领导人会应用这条日志条目到它的状态机中然后把执行的结果返回给客户端。如果跟随者崩溃或者运行缓慢,再或者网络丢包,领导人会不断的重复尝试附加日志条目 RPCs(尽管已经回复了客户端)直到所有的跟随者都最终存储了所有的日志条目。 46 | 47 |   Raft 的日志机制来维护一个不同服务器的日志之间的高层次的一致性。这么做不仅简化了系统的行为也使得更加可预计,同时他也是安全性保证的一个重要组件。Raft 维护着以下的特性,这些同时也组成了的日志匹配特性: 48 | 49 | 1. 如果在不同的日志中的两个条目拥有相同的索引和任期号,那么他们存储了相同的指令。 50 | 2. 如果在不同的日志中的两个条目拥有相同的索引和任期号,那么他们之前的所有日志条目也全部相同。 51 | 52 |   第一个特性来自这样的一个事实,领导人最多在一个任期里在指定的一个日志索引位置创建一条日志条目,同时日志条目在日志中的位置也从来不会改变。第二个特性由附加日志 RPC 的一个简单的一致性检查所保证。在发送附加日志 RPC 的时候,领导人会把新的日志条目紧接着之前的条目的索引位置和任期号包含在里面。如果跟随者在它的日志中找不到包含相同索引位置和任期号的条目,那么他就会拒绝接收新的日志条目。 53 | 54 |   一致性检查就像一个归纳步骤:一开始空的日志状态肯定是满足日志匹配特性的,然后一致性检查保护了日志匹配特性当日志扩展的时候。因此,每当附加日志 RPC 返回成功时,领导人就知道跟随者的日志一定是和自己相同的了。 55 | 56 | ## 下车 57 | 58 | 源于 MIT,然后用于自己学习,[源码注释地址](https://github.com/happyer/distributed-computing)。 59 | -------------------------------------------------------------------------------- /src/raft/config.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | // 4 | // support for Raft tester. 5 | // 6 | // we will use the original config.go to test your code for grading. 7 | // so, while you can modify this code to help you debug, please 8 | // test with the original before submitting. 9 | // 10 | 11 | import "labrpc" 12 | import "log" 13 | import "sync" 14 | import "testing" 15 | import "runtime" 16 | import crand "crypto/rand" 17 | import "encoding/base64" 18 | import "sync/atomic" 19 | import "time" 20 | import ( 21 | "fmt" 22 | ) 23 | 24 | func randstring(n int) string { 25 | b := make([]byte, 2 * n) 26 | crand.Read(b) 27 | s := base64.URLEncoding.EncodeToString(b) 28 | return s[0:n] 29 | } 30 | 31 | type config struct { 32 | mu sync.Mutex 33 | t *testing.T 34 | net *labrpc.Network 35 | n int 36 | done int32 // tell internal threads to die 37 | rafts []*Raft 38 | applyErr []string // from apply channel readers 39 | connected []bool // whether each server is on the net 40 | saved []*Persister 41 | endnames [][]string // the port file names each sends to 42 | logs []map[int]int // copy of each server's committed entries 43 | } 44 | 45 | func make_config(t *testing.T, n int, unreliable bool) *config { 46 | runtime.GOMAXPROCS(4) 47 | cfg := &config{} 48 | cfg.t = t 49 | cfg.net = labrpc.MakeNetwork() 50 | cfg.n = n 51 | cfg.applyErr = make([]string, cfg.n) 52 | cfg.rafts = make([]*Raft, cfg.n) 53 | cfg.connected = make([]bool, cfg.n) 54 | cfg.saved = make([]*Persister, cfg.n) 55 | cfg.endnames = make([][]string, cfg.n) 56 | cfg.logs = make([]map[int]int, cfg.n) 57 | 58 | cfg.setunreliable(unreliable) 59 | 60 | cfg.net.LongDelays(true) 61 | 62 | // create a full set of Rafts. 63 | for i := 0; i < cfg.n; i++ { 64 | cfg.logs[i] = map[int]int{} 65 | cfg.start1(i) 66 | } 67 | 68 | // connect everyone 69 | for i := 0; i < cfg.n; i++ { 70 | cfg.connect(i) 71 | } 72 | 73 | return cfg 74 | } 75 | 76 | // shut down a Raft server but save its persistent state. 77 | func (cfg *config) crash1(i int) { 78 | cfg.disconnect(i) 79 | cfg.net.DeleteServer(i) // disable client connections to the server. 80 | 81 | cfg.mu.Lock() 82 | defer cfg.mu.Unlock() 83 | 84 | // a fresh persister, in case old instance 85 | // continues to update the Persister. 86 | // but copy old persister's content so that we always 87 | // pass Make() the last persisted state. 88 | if cfg.saved[i] != nil { 89 | cfg.saved[i] = cfg.saved[i].Copy() 90 | } 91 | 92 | rf := cfg.rafts[i] 93 | if rf != nil { 94 | cfg.mu.Unlock() 95 | rf.Kill() 96 | cfg.mu.Lock() 97 | cfg.rafts[i] = nil 98 | } 99 | 100 | if cfg.saved[i] != nil { 101 | raftlog := cfg.saved[i].ReadRaftState() 102 | cfg.saved[i] = &Persister{} 103 | cfg.saved[i].SaveRaftState(raftlog) 104 | } 105 | } 106 | 107 | // 108 | // start or re-start a Raft. 109 | // if one already exists, "kill" it first. 110 | // allocate new outgoing port file names, and a new 111 | // state persister, to isolate previous instance of 112 | // this server. since we cannot really kill it. 113 | // 114 | func (cfg *config) start1(i int) { 115 | cfg.crash1(i) 116 | 117 | // a fresh set of outgoing ClientEnd names. 118 | // so that old crashed instance's ClientEnds can't send. 119 | cfg.endnames[i] = make([]string, cfg.n) 120 | for j := 0; j < cfg.n; j++ { 121 | cfg.endnames[i][j] = randstring(20) 122 | } 123 | 124 | // a fresh set of ClientEnds. 125 | ends := make([]*labrpc.ClientEnd, cfg.n) 126 | for j := 0; j < cfg.n; j++ { 127 | ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j]) 128 | cfg.net.Connect(cfg.endnames[i][j], j) 129 | } 130 | 131 | cfg.mu.Lock() 132 | 133 | // a fresh persister, so old instance doesn't overwrite 134 | // new instance's persisted state. 135 | // but copy old persister's content so that we always 136 | // pass Make() the last persisted state. 137 | if cfg.saved[i] != nil { 138 | cfg.saved[i] = cfg.saved[i].Copy() 139 | } else { 140 | cfg.saved[i] = MakePersister() 141 | } 142 | 143 | cfg.mu.Unlock() 144 | 145 | // listen to messages from Raft indicating newly committed messages. 146 | applyCh := make(chan ApplyMsg) 147 | go func() { 148 | for m := range applyCh { 149 | err_msg := "" 150 | if m.UseSnapshot { 151 | // ignore the snapshot 152 | } else if v, ok := (m.Command).(int); ok { 153 | cfg.mu.Lock() 154 | for j := 0; j < len(cfg.logs); j++ { 155 | if old, oldok := cfg.logs[j][m.Index]; oldok && old != v { 156 | // some server has already committed a different value for this entry! 157 | err_msg = fmt.Sprintf("commit index=%v server=%v %v != server=%v %v", 158 | m.Index, i, m.Command, j, old) 159 | } 160 | } 161 | _, prevok := cfg.logs[i][m.Index - 1] 162 | cfg.logs[i][m.Index] = v 163 | cfg.mu.Unlock() 164 | 165 | if m.Index > 1 && prevok == false { 166 | err_msg = fmt.Sprintf("server %v apply out of order %v", i, m.Index) 167 | } 168 | } else { 169 | err_msg = fmt.Sprintf("committed command %v is not an int", m.Command) 170 | } 171 | 172 | if err_msg != "" { 173 | log.Fatalf("apply error: %v\n", err_msg) 174 | cfg.applyErr[i] = err_msg 175 | // keep reading after error so that Raft doesn't block 176 | // holding locks... 177 | } 178 | } 179 | }() 180 | 181 | rf := Make(ends, i, cfg.saved[i], applyCh) 182 | 183 | cfg.mu.Lock() 184 | cfg.rafts[i] = rf 185 | cfg.mu.Unlock() 186 | 187 | svc := labrpc.MakeService(rf) 188 | srv := labrpc.MakeServer() 189 | srv.AddService(svc) 190 | cfg.net.AddServer(i, srv) 191 | } 192 | 193 | func (cfg *config) cleanup() { 194 | for i := 0; i < len(cfg.rafts); i++ { 195 | if cfg.rafts[i] != nil { 196 | cfg.rafts[i].Kill() 197 | } 198 | } 199 | atomic.StoreInt32(&cfg.done, 1) 200 | } 201 | 202 | // attach server i to the net. 203 | func (cfg *config) connect(i int) { 204 | // fmt.Printf("connect(%d)\n", i) 205 | 206 | cfg.connected[i] = true 207 | 208 | // outgoing ClientEnds 209 | for j := 0; j < cfg.n; j++ { 210 | if cfg.connected[j] { 211 | endname := cfg.endnames[i][j] 212 | cfg.net.Enable(endname, true) 213 | } 214 | } 215 | 216 | // incoming ClientEnds 217 | for j := 0; j < cfg.n; j++ { 218 | if cfg.connected[j] { 219 | endname := cfg.endnames[j][i] 220 | cfg.net.Enable(endname, true) 221 | } 222 | } 223 | } 224 | 225 | // detach server i from the net. 226 | func (cfg *config) disconnect(i int) { 227 | // fmt.Printf("disconnect(%d)\n", i) 228 | 229 | cfg.connected[i] = false 230 | 231 | // outgoing ClientEnds 232 | for j := 0; j < cfg.n; j++ { 233 | if cfg.endnames[i] != nil { 234 | endname := cfg.endnames[i][j] 235 | cfg.net.Enable(endname, false) 236 | } 237 | } 238 | 239 | // incoming ClientEnds 240 | for j := 0; j < cfg.n; j++ { 241 | if cfg.endnames[j] != nil { 242 | endname := cfg.endnames[j][i] 243 | cfg.net.Enable(endname, false) 244 | } 245 | } 246 | } 247 | 248 | func (cfg *config) rpcCount(server int) int { 249 | return cfg.net.GetCount(server) 250 | } 251 | 252 | func (cfg *config) setunreliable(unrel bool) { 253 | cfg.net.Reliable(!unrel) 254 | } 255 | 256 | func (cfg *config) setlongreordering(longrel bool) { 257 | cfg.net.LongReordering(longrel) 258 | } 259 | 260 | // check that there's exactly one leader. 261 | // try a few times in case re-elections are needed. 262 | func (cfg *config) checkOneLeader() int { 263 | for iters := 0; iters < 10; iters++ { 264 | time.Sleep(500 * time.Millisecond) 265 | leaders := make(map[int][]int) 266 | for i := 0; i < cfg.n; i++ { 267 | if cfg.connected[i] { 268 | if t, leader := cfg.rafts[i].GetState(); leader { 269 | leaders[t] = append(leaders[t], i) 270 | } 271 | } 272 | } 273 | 274 | lastTermWithLeader := -1 275 | for t, leaders := range leaders { 276 | if len(leaders) > 1 { 277 | cfg.t.Fatalf("term %d has %d (>1) leaders", t, len(leaders)) 278 | } 279 | if t > lastTermWithLeader { 280 | lastTermWithLeader = t 281 | } 282 | } 283 | 284 | if len(leaders) != 0 { 285 | return leaders[lastTermWithLeader][0] 286 | } 287 | } 288 | cfg.t.Fatalf("expected one leader, got none") 289 | return -1 290 | } 291 | 292 | // check that everyone agrees on the term. 293 | func (cfg *config) checkTerms() int { 294 | term := -1 295 | for i := 0; i < cfg.n; i++ { 296 | if cfg.connected[i] { 297 | xterm, _ := cfg.rafts[i].GetState() 298 | if term == -1 { 299 | term = xterm 300 | } else if term != xterm { 301 | cfg.t.Fatalf("servers disagree on term") 302 | } 303 | } 304 | } 305 | return term 306 | } 307 | 308 | // check that there's no leader 309 | func (cfg *config) checkNoLeader() { 310 | for i := 0; i < cfg.n; i++ { 311 | if cfg.connected[i] { 312 | _, is_leader := cfg.rafts[i].GetState() 313 | if is_leader { 314 | cfg.t.Fatalf("expected no leader, but %v claims to be leader", i) 315 | } 316 | } 317 | } 318 | } 319 | 320 | // how many servers think a log entry is committed? 321 | func (cfg *config) nCommitted(index int) (int, interface{}) { 322 | count := 0 323 | cmd := -1 324 | for i := 0; i < len(cfg.rafts); i++ { 325 | if cfg.applyErr[i] != "" { 326 | cfg.t.Fatal(cfg.applyErr[i]) 327 | } 328 | 329 | cfg.mu.Lock() 330 | cmd1, ok := cfg.logs[i][index] 331 | cfg.mu.Unlock() 332 | //DPrintf("i=",i,"index=",index,"cmd=",cmd1) 333 | if ok { 334 | if count > 0 && cmd != cmd1 { 335 | cfg.t.Fatalf("committed values do not match: index %v, %v, %v\n", 336 | index, cmd, cmd1) 337 | } 338 | count += 1 339 | cmd = cmd1 340 | } 341 | } 342 | return count, cmd 343 | } 344 | 345 | // wait for at least n servers to commit. 346 | // but don't wait forever. 347 | func (cfg *config) wait(index int, n int, startTerm int) interface{} { 348 | to := 10 * time.Millisecond 349 | for iters := 0; iters < 30; iters++ { 350 | nd, _ := cfg.nCommitted(index) 351 | if nd >= n { 352 | break 353 | } 354 | time.Sleep(to) 355 | if to < time.Second { 356 | to *= 2 357 | } 358 | if startTerm > -1 { 359 | for _, r := range cfg.rafts { 360 | if t, _ := r.GetState(); t > startTerm { 361 | // someone has moved on 362 | // can no longer guarantee that we'll "win" 363 | return -1 364 | } 365 | } 366 | } 367 | } 368 | nd, cmd := cfg.nCommitted(index) 369 | if nd < n { 370 | cfg.t.Fatalf("only %d decided for index %d; wanted %d\n", 371 | nd, index, n) 372 | } 373 | return cmd 374 | } 375 | 376 | // do a complete agreement. 377 | // it might choose the wrong leader initially, 378 | // and have to re-submit after giving up. 379 | // entirely gives up after about 10 seconds. 380 | // indirectly checks that the servers agree on the 381 | // same value, since nCommitted() checks this, 382 | // as do the threads that read from applyCh. 383 | // returns index. 384 | func (cfg *config) one(cmd int, expectedServers int) int { 385 | 386 | for i:= 0;i< len(cfg.rafts);i++ { 387 | //DPrintf(cfg.rafts) 388 | } 389 | 390 | t0 := time.Now() 391 | starts := 0 392 | for time.Since(t0).Seconds() < 10 { 393 | // try all the servers, maybe one is the leader. 394 | index := -1 395 | for si := 0; si < cfg.n; si++ { 396 | starts = (starts + 1) % cfg.n 397 | var rf *Raft 398 | cfg.mu.Lock() 399 | if cfg.connected[starts] { 400 | rf = cfg.rafts[starts] 401 | 402 | } 403 | cfg.mu.Unlock() 404 | if rf != nil { 405 | index1, _, ok := rf.Start(cmd) 406 | if ok { 407 | index = index1 408 | break 409 | } 410 | } 411 | } 412 | 413 | if index != -1 { 414 | // somebody claimed to be the leader and to have 415 | // submitted our command; wait a while for agreement. 416 | t1 := time.Now() 417 | for time.Since(t1).Seconds() < 2 { 418 | nd, cmd1 := cfg.nCommitted(index) 419 | if nd > 0 && nd >= expectedServers { 420 | // committed 421 | if cmd2, ok := cmd1.(int); ok && cmd2 == cmd { 422 | // and it was the command we submitted. 423 | return index 424 | } 425 | } 426 | time.Sleep(20 * time.Millisecond) 427 | } 428 | } else { 429 | time.Sleep(50 * time.Millisecond) 430 | } 431 | } 432 | printLogs(cfg) 433 | cfg.t.Fatalf("one(%v) failed to reach agreement", cmd) 434 | return -1 435 | } 436 | 437 | func printLogs(cfg *config) { 438 | for si := 0; si < cfg.n; si++ { 439 | //var rf *Raft = cfg.rafts[si] 440 | //fmt.Printf("%s",rf.role) 441 | //DPrintf(strconv.Itoa(rf.me),"data=",rf.logs) 442 | } 443 | } 444 | -------------------------------------------------------------------------------- /src/raft/persister.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | // 4 | // support for Raft and kvraft to save persistent 5 | // Raft state (log &c) and k/v server snapshots. 6 | // 7 | // we will use the original persister.go to test your code for grading. 8 | // so, while you can modify this code to help you debug, please 9 | // test with the original before submitting. 10 | // 11 | 12 | import "sync" 13 | 14 | type Persister struct { 15 | mu sync.Mutex 16 | raftstate []byte 17 | snapshot []byte 18 | } 19 | 20 | func MakePersister() *Persister { 21 | return &Persister{} 22 | } 23 | 24 | func (ps *Persister) Copy() *Persister { 25 | ps.mu.Lock() 26 | defer ps.mu.Unlock() 27 | np := MakePersister() 28 | np.raftstate = ps.raftstate 29 | np.snapshot = ps.snapshot 30 | return np 31 | } 32 | 33 | func (ps *Persister) SaveRaftState(data []byte) { 34 | ps.mu.Lock() 35 | defer ps.mu.Unlock() 36 | ps.raftstate = data 37 | } 38 | 39 | func (ps *Persister) ReadRaftState() []byte { 40 | ps.mu.Lock() 41 | defer ps.mu.Unlock() 42 | return ps.raftstate 43 | } 44 | 45 | func (ps *Persister) RaftStateSize() int { 46 | ps.mu.Lock() 47 | defer ps.mu.Unlock() 48 | return len(ps.raftstate) 49 | } 50 | 51 | func (ps *Persister) SaveSnapshot(snapshot []byte) { 52 | ps.mu.Lock() 53 | defer ps.mu.Unlock() 54 | ps.snapshot = snapshot 55 | } 56 | 57 | func (ps *Persister) ReadSnapshot() []byte { 58 | ps.mu.Lock() 59 | defer ps.mu.Unlock() 60 | return ps.snapshot 61 | } 62 | -------------------------------------------------------------------------------- /src/raft/util.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import "log" 4 | 5 | // Debugging 6 | const Debug = 1 7 | 8 | func DPrintf(format string, a ...interface{}) (n int, err error) { 9 | if Debug > 0 { 10 | log.Printf(format, a...) 11 | } 12 | return 13 | } 14 | -------------------------------------------------------------------------------- /src/shardkv/README.md: -------------------------------------------------------------------------------- 1 | # 老司机带你用 Go 语言实现分布式数据库 2 | 3 | 该分布式数据使用了 Raft 协议作为一致性协议为基础,Raft 协议的实现在 [《老司机带你用 Go 语言实现 Raft 分布式一致性协议》](src/raft)。 4 | 5 | 一个良好的分布式数据实现包括以下几个方面: 6 | 7 | 1. 可用性,即在一些机器各种故障的情况下是否能够继续提供服务(A); 8 | 2. 在分布式环境中,机器之间通过网络通信,它们是否能容忍网络分区(P); 9 | 3. 在数据分布到各个机器之中,他们的数据是否能够保持一致(C); 10 | 4. 在之前的大数据环境之中,对于分布式是否支持水平扩展(Shard)也成为了一个必不可少的功能; 11 | 12 | 上述是 CAP 理论和分布式拓展的必要功能。在很多系统里面,一致性上面做了一些妥协,使用了弱一致性协议。当然像 Dynamo 这样提供 W + R > N 这种协议的,让用户选择一致性也是一种新的思路,对应的开源版本:Cassandra。 13 | 14 | ### Shard 15 | 16 | 分区作为分布式里面一个重要的的特性,它可以通过增加机器来达到增加吞吐量的目的。与此同时,也带来了一些对于每一个 Shard 的负载挑战。 17 | 所以通用的做法是,客户端通过提供一个 Partition 接口,让用户自己决定分为的规则。 18 | 19 | ### 架构 20 | 21 | ![分布式数据库架构](../../img/kv-fram.png) 22 | 23 | Sharemaster 主要负责根据 Client 提供的分区规则,将数据储存在不同的 Group 中,为了 Sharemaster 有多台机器组成,他们之间使用 Raft 协议来保证一致性。每一个 Group 由多台机器组成,他们之间也是通过 Raft 协议来保证一致性。 24 | 25 | 在实现分布式数据之中为了简单起见,将数据直接放入到一个 Map 之中,这里只是想告诉你整个流程是这样的。 26 | 持续更新中,后续会把 LSM 融入进来。 27 | 28 | [源码注释地址](https://github.com/happyer/distributed-computing)。 29 | 30 | 扫描二维码,关注我的个人微信公众号: 31 | 32 | ![公众号](../../img/qc.jpg) 33 | -------------------------------------------------------------------------------- /src/shardkv/client.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | // 4 | // client code to talk to a sharded key/value service. 5 | // 6 | // the client first talks to the shardmaster to find out 7 | // the assignment of shards (keys) to groups, and then 8 | // talks to the group that holds the key's shard. 9 | // 10 | 11 | import "labrpc" 12 | import "crypto/rand" 13 | import "math/big" 14 | import "shardmaster" 15 | import ( 16 | "time" 17 | "sync" 18 | ) 19 | 20 | // 21 | // which shard is a key in? 22 | // please use this function, 23 | // and please do not change it. 24 | // 25 | func key2shard(key string) int { 26 | shard := 0 27 | if len(key) > 0 { 28 | shard = int(key[0]) 29 | } 30 | shard %= shardmaster.NShards 31 | return shard 32 | } 33 | 34 | func nrand() int64 { 35 | max := big.NewInt(int64(1) << 62) 36 | bigx, _ := rand.Int(rand.Reader, max) 37 | x := bigx.Int64() 38 | return x 39 | } 40 | 41 | type Clerk struct { 42 | sm *shardmaster.Clerk 43 | config shardmaster.Config 44 | make_end func(string) *labrpc.ClientEnd 45 | // You will have to modify this struct. 46 | mu sync.Mutex 47 | clientId int64 48 | requestId int 49 | } 50 | 51 | // 52 | // the tester calls MakeClerk. 53 | // 54 | // masters[] is needed to call shardmaster.MakeClerk(). 55 | // 56 | // make_end(servername) turns a server name from a 57 | // Config.Groups[gid][i] into a labrpc.ClientEnd on which you can 58 | // send RPCs. 59 | // 60 | func MakeClerk(masters []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *Clerk { 61 | ck := new(Clerk) 62 | ck.sm = shardmaster.MakeClerk(masters) 63 | ck.make_end = make_end 64 | // You'll have to add code here. 65 | ck.clientId = nrand() 66 | ck.requestId = 0 67 | 68 | return ck 69 | } 70 | 71 | // 72 | // fetch the current value for a key. 73 | // returns "" if the key does not exist. 74 | // keeps trying forever in the face of all other errors. 75 | // You will have to modify this function. 76 | // 77 | func (ck *Clerk) Get(key string) string { 78 | ck.mu.Lock() 79 | defer ck.mu.Unlock() 80 | 81 | args := &GetArgs{Key:key, ClientId:ck.clientId} 82 | ck.requestId++ 83 | args.RequestId = ck.requestId 84 | 85 | for { 86 | shard := key2shard(key) 87 | gid := ck.config.Shards[shard] 88 | if servers, ok := ck.config.Groups[gid]; ok { 89 | // try each server for the shard. 90 | for si := 0; si < len(servers); si++ { 91 | srv := ck.make_end(servers[si]) 92 | var reply GetReply 93 | ok := srv.Call("ShardKV.Get", args, &reply) 94 | if ok && reply.WrongLeader == false && (reply.Err == OK || reply.Err == ErrNoKey) { 95 | return reply.Value 96 | } 97 | if ok && (reply.Err == ErrWrongGroup) { 98 | break 99 | } 100 | } 101 | } 102 | time.Sleep(100 * time.Millisecond) 103 | // ask master for the latest configuration. 104 | ck.config = ck.sm.Query(-1) 105 | } 106 | 107 | return "" 108 | } 109 | 110 | // 111 | // shared by Put and Append. 112 | // You will have to modify this function. 113 | // 114 | func (ck *Clerk) PutAppend(key string, value string, op string) { 115 | ck.mu.Lock() 116 | defer ck.mu.Unlock() 117 | 118 | args := &PutAppendArgs{Key:key, Value:value, Op:op, ClientId: ck.clientId} 119 | ck.requestId++ 120 | args.RequestId = ck.requestId 121 | 122 | for { 123 | shard := key2shard(key) 124 | gid := ck.config.Shards[shard] 125 | if servers, ok := ck.config.Groups[gid]; ok { 126 | for si := 0; si < len(servers); si++ { 127 | srv := ck.make_end(servers[si]) 128 | var reply PutAppendReply 129 | ok := srv.Call("ShardKV.PutAppend", args, &reply) 130 | if ok && reply.WrongLeader == false && reply.Err == OK { 131 | return 132 | } 133 | if ok && reply.Err == ErrWrongGroup { 134 | break 135 | } 136 | } 137 | } 138 | time.Sleep(100 * time.Millisecond) 139 | // ask master for the latest configuration. 140 | ck.config = ck.sm.Query(-1) 141 | } 142 | } 143 | 144 | func (ck *Clerk) Put(key string, value string) { 145 | ck.PutAppend(key, value, "Put") 146 | } 147 | func (ck *Clerk) Append(key string, value string) { 148 | ck.PutAppend(key, value, "Append") 149 | } 150 | -------------------------------------------------------------------------------- /src/shardkv/common.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | import "shardmaster" 4 | 5 | // 6 | // Sharded key/value server. 7 | // Lots of replica groups, each running op-at-a-time paxos. 8 | // Shardmaster decides which group serves each shard. 9 | // Shardmaster may change shard assignment from time to time. 10 | // 11 | // You will have to modify these definitions. 12 | // 13 | 14 | const ( 15 | OK = "OK" 16 | ErrNoKey = "ErrNoKey" 17 | ErrWrongGroup = "ErrWrongGroup" 18 | ErrNotReady = "ErrNotReady" 19 | ErrWrongConfig= "ErrWrongCOnfig" 20 | ) 21 | 22 | const ( 23 | Get = "Get" 24 | Put = "Put" 25 | Append = "Append" 26 | PutAppend = "PutAppend" 27 | Reconfigure = "Configure" 28 | ) 29 | 30 | type Err string 31 | 32 | // Put or Append 33 | type PutAppendArgs struct { 34 | // You'll have to add definitions here. 35 | Key string 36 | Value string 37 | Op string // "Put" or "Append" 38 | // You'll have to add definitions here. 39 | // Field names must start with capital letters, 40 | // otherwise RPC will break. 41 | ClientId int64 42 | RequestId int 43 | } 44 | 45 | type PutAppendReply struct { 46 | WrongLeader bool 47 | Err Err 48 | } 49 | 50 | type GetArgs struct { 51 | Key string 52 | // You'll have to add definitions here. 53 | ClientId int64 54 | RequestId int 55 | } 56 | 57 | type GetReply struct { 58 | WrongLeader bool 59 | Err Err 60 | Value string 61 | } 62 | 63 | /*--------------Add by Yang----------------------*/ 64 | // send to follower server in group 65 | type ReconfigureArgs struct { 66 | Cfg shardmaster.Config 67 | StoreShard [shardmaster.NShards]map[string]string 68 | Ack map[int64]int 69 | //Replies map[int64]Result //!!! be careful of gob 70 | } 71 | 72 | type ReconfigureReply struct { 73 | Err Err 74 | } 75 | 76 | // send to another group leader 77 | type TransferArgs struct { 78 | ConfigNum int 79 | Shards []int 80 | } 81 | 82 | type TransferReply struct { 83 | StoreShard [shardmaster.NShards]map[string]string 84 | Ack map[int64]int 85 | WrongLeader bool 86 | Err Err 87 | } -------------------------------------------------------------------------------- /src/shardkv/config.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | import "shardmaster" 4 | import "labrpc" 5 | import "testing" 6 | import "os" 7 | 8 | // import "log" 9 | import crand "crypto/rand" 10 | import "math/rand" 11 | import "encoding/base64" 12 | import "sync" 13 | import "runtime" 14 | import "raft" 15 | import "strconv" 16 | 17 | func randstring(n int) string { 18 | b := make([]byte, 2*n) 19 | crand.Read(b) 20 | s := base64.URLEncoding.EncodeToString(b) 21 | return s[0:n] 22 | } 23 | 24 | // Randomize server handles 25 | func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd { 26 | sa := make([]*labrpc.ClientEnd, len(kvh)) 27 | copy(sa, kvh) 28 | for i := range sa { 29 | j := rand.Intn(i + 1) 30 | sa[i], sa[j] = sa[j], sa[i] 31 | } 32 | return sa 33 | } 34 | 35 | type group struct { 36 | gid int 37 | servers []*ShardKV 38 | saved []*raft.Persister 39 | endnames [][]string 40 | mendnames [][]string 41 | } 42 | 43 | type config struct { 44 | mu sync.Mutex 45 | t *testing.T 46 | net *labrpc.Network 47 | 48 | nmasters int 49 | masterservers []*shardmaster.ShardMaster 50 | mck *shardmaster.Clerk 51 | 52 | ngroups int 53 | n int // servers per k/v group 54 | groups []*group 55 | 56 | clerks map[*Clerk][]string 57 | nextClientId int 58 | maxraftstate int 59 | } 60 | 61 | func (cfg *config) cleanup() { 62 | for gi := 0; gi < cfg.ngroups; gi++ { 63 | cfg.ShutdownGroup(gi) 64 | } 65 | } 66 | 67 | // check that no server's log is too big. 68 | func (cfg *config) checklogs() { 69 | for gi := 0; gi < cfg.ngroups; gi++ { 70 | for i := 0; i < cfg.n; i++ { 71 | raft := cfg.groups[gi].saved[i].RaftStateSize() 72 | snap := len(cfg.groups[gi].saved[i].ReadSnapshot()) 73 | if cfg.maxraftstate >= 0 && raft > 2*cfg.maxraftstate { 74 | cfg.t.Fatalf("persister.RaftStateSize() %v, but maxraftstate %v", 75 | raft, cfg.maxraftstate) 76 | } 77 | if cfg.maxraftstate < 0 && snap > 0 { 78 | cfg.t.Fatalf("maxraftstate is -1, but snapshot is non-empty!") 79 | } 80 | } 81 | } 82 | } 83 | 84 | // master server name for labrpc. 85 | func (cfg *config) mastername(i int) string { 86 | return "master" + strconv.Itoa(i) 87 | } 88 | 89 | // shard server name for labrpc. 90 | // i'th server of group gid. 91 | func (cfg *config) servername(gid int, i int) string { 92 | return "server-" + strconv.Itoa(gid) + "-" + strconv.Itoa(i) 93 | } 94 | 95 | func (cfg *config) makeClient() *Clerk { 96 | cfg.mu.Lock() 97 | defer cfg.mu.Unlock() 98 | 99 | // ClientEnds to talk to master service. 100 | ends := make([]*labrpc.ClientEnd, cfg.nmasters) 101 | endnames := make([]string, cfg.n) 102 | for j := 0; j < cfg.nmasters; j++ { 103 | endnames[j] = randstring(20) 104 | ends[j] = cfg.net.MakeEnd(endnames[j]) 105 | cfg.net.Connect(endnames[j], cfg.mastername(j)) 106 | cfg.net.Enable(endnames[j], true) 107 | } 108 | 109 | ck := MakeClerk(ends, func(servername string) *labrpc.ClientEnd { 110 | name := randstring(20) 111 | end := cfg.net.MakeEnd(name) 112 | cfg.net.Connect(name, servername) 113 | cfg.net.Enable(name, true) 114 | return end 115 | }) 116 | cfg.clerks[ck] = endnames 117 | cfg.nextClientId++ 118 | return ck 119 | } 120 | 121 | func (cfg *config) deleteClient(ck *Clerk) { 122 | cfg.mu.Lock() 123 | defer cfg.mu.Unlock() 124 | 125 | v := cfg.clerks[ck] 126 | for i := 0; i < len(v); i++ { 127 | os.Remove(v[i]) 128 | } 129 | delete(cfg.clerks, ck) 130 | } 131 | 132 | // Shutdown i'th server of gi'th group, by isolating it 133 | func (cfg *config) ShutdownServer(gi int, i int) { 134 | cfg.mu.Lock() 135 | defer cfg.mu.Unlock() 136 | 137 | gg := cfg.groups[gi] 138 | 139 | // prevent this server from sending 140 | for j := 0; j < len(gg.servers); j++ { 141 | name := gg.endnames[i][j] 142 | cfg.net.Enable(name, false) 143 | } 144 | for j := 0; j < len(gg.mendnames[i]); j++ { 145 | name := gg.mendnames[i][j] 146 | cfg.net.Enable(name, false) 147 | } 148 | 149 | // disable client connections to the server. 150 | // it's important to do this before creating 151 | // the new Persister in saved[i], to avoid 152 | // the possibility of the server returning a 153 | // positive reply to an Append but persisting 154 | // the result in the superseded Persister. 155 | cfg.net.DeleteServer(cfg.servername(gg.gid, i)) 156 | 157 | // a fresh persister, in case old instance 158 | // continues to update the Persister. 159 | // but copy old persister's content so that we always 160 | // pass Make() the last persisted state. 161 | if gg.saved[i] != nil { 162 | gg.saved[i] = gg.saved[i].Copy() 163 | } 164 | 165 | kv := gg.servers[i] 166 | if kv != nil { 167 | cfg.mu.Unlock() 168 | kv.Kill() 169 | cfg.mu.Lock() 170 | gg.servers[i] = nil 171 | } 172 | } 173 | 174 | func (cfg *config) ShutdownGroup(gi int) { 175 | for i := 0; i < cfg.n; i++ { 176 | cfg.ShutdownServer(gi, i) 177 | } 178 | } 179 | 180 | // start i'th server in gi'th group 181 | func (cfg *config) StartServer(gi int, i int) { 182 | cfg.mu.Lock() 183 | 184 | gg := cfg.groups[gi] 185 | 186 | // a fresh set of outgoing ClientEnd names 187 | // to talk to other servers in this group. 188 | gg.endnames[i] = make([]string, cfg.n) 189 | for j := 0; j < cfg.n; j++ { 190 | gg.endnames[i][j] = randstring(20) 191 | } 192 | 193 | // and the connections to other servers in this group. 194 | ends := make([]*labrpc.ClientEnd, cfg.n) 195 | for j := 0; j < cfg.n; j++ { 196 | ends[j] = cfg.net.MakeEnd(gg.endnames[i][j]) 197 | cfg.net.Connect(gg.endnames[i][j], cfg.servername(gg.gid, j)) 198 | cfg.net.Enable(gg.endnames[i][j], true) 199 | } 200 | 201 | // ends to talk to shardmaster service 202 | mends := make([]*labrpc.ClientEnd, cfg.nmasters) 203 | gg.mendnames[i] = make([]string, cfg.nmasters) 204 | for j := 0; j < cfg.nmasters; j++ { 205 | gg.mendnames[i][j] = randstring(20) 206 | mends[j] = cfg.net.MakeEnd(gg.mendnames[i][j]) 207 | cfg.net.Connect(gg.mendnames[i][j], cfg.mastername(j)) 208 | cfg.net.Enable(gg.mendnames[i][j], true) 209 | } 210 | 211 | // a fresh persister, so old instance doesn't overwrite 212 | // new instance's persisted state. 213 | // give the fresh persister a copy of the old persister's 214 | // state, so that the spec is that we pass StartKVServer() 215 | // the last persisted state. 216 | if gg.saved[i] != nil { 217 | gg.saved[i] = gg.saved[i].Copy() 218 | } else { 219 | gg.saved[i] = raft.MakePersister() 220 | } 221 | cfg.mu.Unlock() 222 | 223 | gg.servers[i] = StartServer(ends, i, gg.saved[i], cfg.maxraftstate, 224 | gg.gid, mends, 225 | func(servername string) *labrpc.ClientEnd { 226 | name := randstring(20) 227 | end := cfg.net.MakeEnd(name) 228 | cfg.net.Connect(name, servername) 229 | cfg.net.Enable(name, true) 230 | return end 231 | }) 232 | 233 | kvsvc := labrpc.MakeService(gg.servers[i]) 234 | rfsvc := labrpc.MakeService(gg.servers[i].rf) 235 | srv := labrpc.MakeServer() 236 | srv.AddService(kvsvc) 237 | srv.AddService(rfsvc) 238 | cfg.net.AddServer(cfg.servername(gg.gid, i), srv) 239 | } 240 | 241 | func (cfg *config) StartGroup(gi int) { 242 | for i := 0; i < cfg.n; i++ { 243 | cfg.StartServer(gi, i) 244 | } 245 | } 246 | 247 | func (cfg *config) StartMasterServer(i int) { 248 | // ClientEnds to talk to other master replicas. 249 | ends := make([]*labrpc.ClientEnd, cfg.nmasters) 250 | for j := 0; j < cfg.nmasters; j++ { 251 | endname := randstring(20) 252 | ends[j] = cfg.net.MakeEnd(endname) 253 | cfg.net.Connect(endname, cfg.mastername(j)) 254 | cfg.net.Enable(endname, true) 255 | } 256 | 257 | p := raft.MakePersister() 258 | 259 | cfg.masterservers[i] = shardmaster.StartServer(ends, i, p) 260 | 261 | msvc := labrpc.MakeService(cfg.masterservers[i]) 262 | rfsvc := labrpc.MakeService(cfg.masterservers[i].Raft()) 263 | srv := labrpc.MakeServer() 264 | srv.AddService(msvc) 265 | srv.AddService(rfsvc) 266 | cfg.net.AddServer(cfg.mastername(i), srv) 267 | } 268 | 269 | func (cfg *config) shardclerk() *shardmaster.Clerk { 270 | // ClientEnds to talk to master service. 271 | ends := make([]*labrpc.ClientEnd, cfg.nmasters) 272 | for j := 0; j < cfg.nmasters; j++ { 273 | name := randstring(20) 274 | ends[j] = cfg.net.MakeEnd(name) 275 | cfg.net.Connect(name, cfg.mastername(j)) 276 | cfg.net.Enable(name, true) 277 | } 278 | 279 | return shardmaster.MakeClerk(ends) 280 | } 281 | 282 | // tell the shardmaster that a group is joining. 283 | func (cfg *config) join(gi int) { 284 | gid := cfg.groups[gi].gid 285 | servernames := make([]string, cfg.n) 286 | for i := 0; i < cfg.n; i++ { 287 | servernames[i] = cfg.servername(gid, i) 288 | } 289 | cfg.mck.Join(gid, servernames) 290 | } 291 | 292 | // tell the shardmaster that a group is leaving. 293 | func (cfg *config) leave(gi int) { 294 | gid := cfg.groups[gi].gid 295 | servernames := make([]string, cfg.n) 296 | for i := 0; i < cfg.n; i++ { 297 | servernames[i] = cfg.servername(gid, i) 298 | } 299 | cfg.mck.Leave(gid) 300 | } 301 | 302 | func make_config(t *testing.T, n int, unreliable bool, maxraftstate int) *config { 303 | runtime.GOMAXPROCS(4) 304 | cfg := &config{} 305 | cfg.t = t 306 | cfg.maxraftstate = maxraftstate 307 | cfg.net = labrpc.MakeNetwork() 308 | 309 | // master 310 | cfg.nmasters = 3 311 | cfg.masterservers = make([]*shardmaster.ShardMaster, cfg.nmasters) 312 | for i := 0; i < cfg.nmasters; i++ { 313 | cfg.StartMasterServer(i) 314 | } 315 | cfg.mck = cfg.shardclerk() 316 | 317 | cfg.ngroups = 3 318 | cfg.groups = make([]*group, cfg.ngroups) 319 | cfg.n = n 320 | for gi := 0; gi < cfg.ngroups; gi++ { 321 | gg := &group{} 322 | cfg.groups[gi] = gg 323 | gg.gid = 100 + gi 324 | gg.servers = make([]*ShardKV, cfg.n) 325 | gg.saved = make([]*raft.Persister, cfg.n) 326 | gg.endnames = make([][]string, cfg.n) 327 | gg.mendnames = make([][]string, cfg.nmasters) 328 | for i := 0; i < cfg.n; i++ { 329 | cfg.StartServer(gi, i) 330 | } 331 | } 332 | 333 | cfg.clerks = make(map[*Clerk][]string) 334 | cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid 335 | 336 | cfg.net.Reliable(!unreliable) 337 | 338 | return cfg 339 | } 340 | -------------------------------------------------------------------------------- /src/shardmaster/client.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | // 4 | // Shardmaster clerk. 5 | // 6 | 7 | import "labrpc" 8 | import "time" 9 | import "crypto/rand" 10 | import ( 11 | "math/big" 12 | "sync" 13 | ) 14 | 15 | type Clerk struct { 16 | servers []*labrpc.ClientEnd 17 | // Your data here. 18 | mu sync.Mutex 19 | clientId int64 20 | requestId int 21 | } 22 | 23 | func nrand() int64 { 24 | max := big.NewInt(int64(1) << 62) 25 | bigx, _ := rand.Int(rand.Reader, max) 26 | x := bigx.Int64() 27 | return x 28 | } 29 | 30 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk { 31 | ck := new(Clerk) 32 | ck.servers = servers 33 | // Your code here. 34 | ck.requestId = 0 35 | ck.clientId = nrand() 36 | return ck 37 | } 38 | 39 | func (ck *Clerk) Query(num int) Config { 40 | args := &QueryArgs{} 41 | args.Num = num 42 | // Your code here. 43 | ck.mu.Lock() 44 | ck.requestId++ 45 | args.ClientId = ck.clientId 46 | args.RequestId = ck.requestId 47 | ck.mu.Unlock() 48 | for { 49 | // try each known server. 50 | for _, srv := range ck.servers { 51 | var reply QueryReply 52 | ok := srv.Call("ShardMaster.Query", args, &reply) 53 | if ok && reply.WrongLeader == false { 54 | return reply.Config 55 | } 56 | } 57 | time.Sleep(100 * time.Millisecond) 58 | } 59 | } 60 | 61 | func (ck *Clerk) Join(gid int, servers []string) { 62 | args := &JoinArgs{} 63 | args.GID = gid 64 | args.Servers = servers 65 | // Your code here. 66 | ck.mu.Lock() 67 | ck.requestId++ 68 | args.ClientId = ck.clientId 69 | args.RequestId = ck.requestId 70 | ck.mu.Unlock() 71 | 72 | for { 73 | // try each known server. 74 | for _, srv := range ck.servers { 75 | var reply JoinReply 76 | ok := srv.Call("ShardMaster.Join", args, &reply) 77 | if ok && reply.WrongLeader == false { 78 | return 79 | } 80 | } 81 | time.Sleep(100 * time.Millisecond) 82 | } 83 | } 84 | 85 | func (ck *Clerk) Leave(gid int) { 86 | args := &LeaveArgs{} 87 | args.GID = gid 88 | // Your code here. 89 | ck.mu.Lock() 90 | ck.requestId++ 91 | args.ClientId = ck.clientId 92 | args.RequestId = ck.requestId 93 | ck.mu.Unlock() 94 | 95 | for { 96 | // try each known server. 97 | for _, srv := range ck.servers { 98 | var reply LeaveReply 99 | ok := srv.Call("ShardMaster.Leave", args, &reply) 100 | if ok && reply.WrongLeader == false { 101 | return 102 | } 103 | } 104 | time.Sleep(100 * time.Millisecond) 105 | } 106 | } 107 | 108 | func (ck *Clerk) Move(shard int, gid int) { 109 | args := &MoveArgs{} 110 | args.Shard = shard 111 | args.GID = gid 112 | // Your code here. 113 | ck.mu.Lock() 114 | ck.requestId++ 115 | args.ClientId = ck.clientId 116 | args.RequestId = ck.requestId 117 | ck.mu.Unlock() 118 | 119 | for { 120 | // try each known server. 121 | for _, srv := range ck.servers { 122 | var reply MoveReply 123 | ok := srv.Call("ShardMaster.Move", args, &reply) 124 | if ok && reply.WrongLeader == false { 125 | return 126 | } 127 | } 128 | time.Sleep(100 * time.Millisecond) 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /src/shardmaster/common.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | // 4 | // Master shard server: assigns shards to replication groups. 5 | // 6 | // RPC interface: 7 | // Join(gid, servers) -- replica group gid is joining, give it some shards. 8 | // Leave(gid) -- replica group gid is retiring, hand off all its shards. 9 | // Move(shard, gid) -- hand off one shard from current owner to gid. 10 | // Query(num) -> fetch Config # num, or latest config if num==-1. 11 | // 12 | // A Config (configuration) describes a set of replica groups, and the 13 | // replica group responsible for each shard. Configs are numbered. Config 14 | // #0 is the initial configuration, with no groups and all shards 15 | // assigned to group 0 (the invalid group). 16 | // 17 | // A GID is a replica group ID. GIDs must be unique and > 0. 18 | // Once a GID joins, and leaves, it should never join again. 19 | // 20 | // You will need to add fields to the RPC arguments. 21 | // 22 | 23 | // The number of shards. 24 | const NShards = 10 25 | 26 | // A configuration -- an assignment of shards to groups. 27 | // Please don't change this. 28 | type Config struct { 29 | Num int // config number 30 | Shards [NShards]int // shard -> gid 31 | Groups map[int][]string // gid -> servers[] 32 | } 33 | 34 | const ( 35 | OK = "OK" 36 | ) 37 | 38 | const ( 39 | Join = "Join" 40 | Leave = "Leave" 41 | Move = "Move" 42 | Query = "Query" 43 | ) 44 | 45 | type Err string 46 | 47 | type JoinArgs struct { 48 | GID int // unique replica group ID 49 | Servers []string // group server ports 50 | ClientId int64 51 | RequestId int 52 | } 53 | 54 | type JoinReply struct { 55 | WrongLeader bool 56 | Err Err 57 | } 58 | 59 | type LeaveArgs struct { 60 | GID int 61 | ClientId int64 62 | RequestId int 63 | } 64 | 65 | type LeaveReply struct { 66 | WrongLeader bool 67 | Err Err 68 | } 69 | 70 | type MoveArgs struct { 71 | Shard int 72 | GID int 73 | ClientId int64 74 | RequestId int 75 | } 76 | 77 | type MoveReply struct { 78 | WrongLeader bool 79 | Err Err 80 | } 81 | 82 | type QueryArgs struct { 83 | Num int // desired config number 84 | ClientId int64 85 | RequestId int 86 | } 87 | 88 | type QueryReply struct { 89 | WrongLeader bool 90 | Err Err 91 | Config Config 92 | } 93 | -------------------------------------------------------------------------------- /src/shardmaster/config.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | import "labrpc" 4 | import "raft" 5 | import "testing" 6 | import "os" 7 | 8 | // import "log" 9 | import crand "crypto/rand" 10 | import "math/rand" 11 | import "encoding/base64" 12 | import "sync" 13 | import "runtime" 14 | 15 | func randstring(n int) string { 16 | b := make([]byte, 2*n) 17 | crand.Read(b) 18 | s := base64.URLEncoding.EncodeToString(b) 19 | return s[0:n] 20 | } 21 | 22 | // Randomize server handles 23 | func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd { 24 | sa := make([]*labrpc.ClientEnd, len(kvh)) 25 | copy(sa, kvh) 26 | for i := range sa { 27 | j := rand.Intn(i + 1) 28 | sa[i], sa[j] = sa[j], sa[i] 29 | } 30 | return sa 31 | } 32 | 33 | type config struct { 34 | mu sync.Mutex 35 | t *testing.T 36 | net *labrpc.Network 37 | n int 38 | servers []*ShardMaster 39 | saved []*raft.Persister 40 | endnames [][]string // names of each server's sending ClientEnds 41 | clerks map[*Clerk][]string 42 | nextClientId int 43 | } 44 | 45 | func (cfg *config) cleanup() { 46 | cfg.mu.Lock() 47 | defer cfg.mu.Unlock() 48 | for i := 0; i < len(cfg.servers); i++ { 49 | if cfg.servers[i] != nil { 50 | cfg.servers[i].Kill() 51 | } 52 | } 53 | } 54 | 55 | // Maximum log size across all servers 56 | func (cfg *config) LogSize() int { 57 | logsize := 0 58 | for i := 0; i < cfg.n; i++ { 59 | n := cfg.saved[i].RaftStateSize() 60 | if n > logsize { 61 | logsize = n 62 | } 63 | } 64 | return logsize 65 | } 66 | 67 | // attach server i to servers listed in to 68 | // caller must hold cfg.mu 69 | func (cfg *config) connectUnlocked(i int, to []int) { 70 | // log.Printf("connect peer %d to %v\n", i, to) 71 | 72 | // outgoing socket files 73 | for j := 0; j < len(to); j++ { 74 | endname := cfg.endnames[i][to[j]] 75 | cfg.net.Enable(endname, true) 76 | } 77 | 78 | // incoming socket files 79 | for j := 0; j < len(to); j++ { 80 | endname := cfg.endnames[to[j]][i] 81 | cfg.net.Enable(endname, true) 82 | } 83 | } 84 | 85 | func (cfg *config) connect(i int, to []int) { 86 | cfg.mu.Lock() 87 | defer cfg.mu.Unlock() 88 | cfg.connectUnlocked(i, to) 89 | } 90 | 91 | // detach server i from the servers listed in from 92 | // caller must hold cfg.mu 93 | func (cfg *config) disconnectUnlocked(i int, from []int) { 94 | // log.Printf("disconnect peer %d from %v\n", i, from) 95 | 96 | // outgoing socket files 97 | for j := 0; j < len(from); j++ { 98 | if cfg.endnames[i] != nil { 99 | endname := cfg.endnames[i][from[j]] 100 | cfg.net.Enable(endname, false) 101 | } 102 | } 103 | 104 | // incoming socket files 105 | for j := 0; j < len(from); j++ { 106 | if cfg.endnames[j] != nil { 107 | endname := cfg.endnames[from[j]][i] 108 | cfg.net.Enable(endname, false) 109 | } 110 | } 111 | } 112 | 113 | func (cfg *config) disconnect(i int, from []int) { 114 | cfg.mu.Lock() 115 | defer cfg.mu.Unlock() 116 | cfg.disconnectUnlocked(i, from) 117 | } 118 | 119 | func (cfg *config) All() []int { 120 | all := make([]int, cfg.n) 121 | for i := 0; i < cfg.n; i++ { 122 | all[i] = i 123 | } 124 | return all 125 | } 126 | 127 | func (cfg *config) ConnectAll() { 128 | cfg.mu.Lock() 129 | defer cfg.mu.Unlock() 130 | for i := 0; i < cfg.n; i++ { 131 | cfg.connectUnlocked(i, cfg.All()) 132 | } 133 | } 134 | 135 | // Sets up 2 partitions with connectivity between servers in each partition. 136 | func (cfg *config) partition(p1 []int, p2 []int) { 137 | cfg.mu.Lock() 138 | defer cfg.mu.Unlock() 139 | // log.Printf("partition servers into: %v %v\n", p1, p2) 140 | for i := 0; i < len(p1); i++ { 141 | cfg.disconnectUnlocked(p1[i], p2) 142 | cfg.connectUnlocked(p1[i], p1) 143 | } 144 | for i := 0; i < len(p2); i++ { 145 | cfg.disconnectUnlocked(p2[i], p1) 146 | cfg.connectUnlocked(p2[i], p2) 147 | } 148 | } 149 | 150 | // Create a clerk with clerk specific server names. 151 | // Give it connections to all of the servers, but for 152 | // now enable only connections to servers in to[]. 153 | func (cfg *config) makeClient(to []int) *Clerk { 154 | cfg.mu.Lock() 155 | defer cfg.mu.Unlock() 156 | 157 | // a fresh set of ClientEnds. 158 | ends := make([]*labrpc.ClientEnd, cfg.n) 159 | endnames := make([]string, cfg.n) 160 | for j := 0; j < cfg.n; j++ { 161 | endnames[j] = randstring(20) 162 | ends[j] = cfg.net.MakeEnd(endnames[j]) 163 | cfg.net.Connect(endnames[j], j) 164 | } 165 | 166 | ck := MakeClerk(random_handles(ends)) 167 | cfg.clerks[ck] = endnames 168 | cfg.nextClientId++ 169 | cfg.ConnectClientUnlocked(ck, to) 170 | return ck 171 | } 172 | 173 | func (cfg *config) deleteClient(ck *Clerk) { 174 | cfg.mu.Lock() 175 | defer cfg.mu.Unlock() 176 | 177 | v := cfg.clerks[ck] 178 | for i := 0; i < len(v); i++ { 179 | os.Remove(v[i]) 180 | } 181 | delete(cfg.clerks, ck) 182 | } 183 | 184 | // caller should hold cfg.mu 185 | func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) { 186 | // log.Printf("ConnectClient %v to %v\n", ck, to) 187 | endnames := cfg.clerks[ck] 188 | for j := 0; j < len(to); j++ { 189 | s := endnames[to[j]] 190 | cfg.net.Enable(s, true) 191 | } 192 | } 193 | 194 | func (cfg *config) ConnectClient(ck *Clerk, to []int) { 195 | cfg.mu.Lock() 196 | defer cfg.mu.Unlock() 197 | cfg.ConnectClientUnlocked(ck, to) 198 | } 199 | 200 | // caller should hold cfg.mu 201 | func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) { 202 | // log.Printf("DisconnectClient %v from %v\n", ck, from) 203 | endnames := cfg.clerks[ck] 204 | for j := 0; j < len(from); j++ { 205 | s := endnames[from[j]] 206 | cfg.net.Enable(s, false) 207 | } 208 | } 209 | 210 | func (cfg *config) DisconnectClient(ck *Clerk, from []int) { 211 | cfg.mu.Lock() 212 | defer cfg.mu.Unlock() 213 | cfg.DisconnectClientUnlocked(ck, from) 214 | } 215 | 216 | // Shutdown a server by isolating it 217 | func (cfg *config) ShutdownServer(i int) { 218 | cfg.mu.Lock() 219 | defer cfg.mu.Unlock() 220 | 221 | cfg.disconnectUnlocked(i, cfg.All()) 222 | 223 | // disable client connections to the server. 224 | // it's important to do this before creating 225 | // the new Persister in saved[i], to avoid 226 | // the possibility of the server returning a 227 | // positive reply to an Append but persisting 228 | // the result in the superseded Persister. 229 | cfg.net.DeleteServer(i) 230 | 231 | // a fresh persister, in case old instance 232 | // continues to update the Persister. 233 | // but copy old persister's content so that we always 234 | // pass Make() the last persisted state. 235 | if cfg.saved[i] != nil { 236 | cfg.saved[i] = cfg.saved[i].Copy() 237 | } 238 | 239 | kv := cfg.servers[i] 240 | if kv != nil { 241 | cfg.mu.Unlock() 242 | kv.Kill() 243 | cfg.mu.Lock() 244 | cfg.servers[i] = nil 245 | } 246 | } 247 | 248 | // If restart servers, first call ShutdownServer 249 | func (cfg *config) StartServer(i int) { 250 | cfg.mu.Lock() 251 | 252 | // a fresh set of outgoing ClientEnd names. 253 | cfg.endnames[i] = make([]string, cfg.n) 254 | for j := 0; j < cfg.n; j++ { 255 | cfg.endnames[i][j] = randstring(20) 256 | } 257 | 258 | // a fresh set of ClientEnds. 259 | ends := make([]*labrpc.ClientEnd, cfg.n) 260 | for j := 0; j < cfg.n; j++ { 261 | ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j]) 262 | cfg.net.Connect(cfg.endnames[i][j], j) 263 | } 264 | 265 | // a fresh persister, so old instance doesn't overwrite 266 | // new instance's persisted state. 267 | // give the fresh persister a copy of the old persister's 268 | // state, so that the spec is that we pass StartKVServer() 269 | // the last persisted state. 270 | if cfg.saved[i] != nil { 271 | cfg.saved[i] = cfg.saved[i].Copy() 272 | } else { 273 | cfg.saved[i] = raft.MakePersister() 274 | } 275 | 276 | cfg.mu.Unlock() 277 | 278 | cfg.servers[i] = StartServer(ends, i, cfg.saved[i]) 279 | 280 | kvsvc := labrpc.MakeService(cfg.servers[i]) 281 | rfsvc := labrpc.MakeService(cfg.servers[i].rf) 282 | srv := labrpc.MakeServer() 283 | srv.AddService(kvsvc) 284 | srv.AddService(rfsvc) 285 | cfg.net.AddServer(i, srv) 286 | } 287 | 288 | func (cfg *config) Leader() (bool, int) { 289 | cfg.mu.Lock() 290 | defer cfg.mu.Unlock() 291 | 292 | for i := 0; i < cfg.n; i++ { 293 | _, is_leader := cfg.servers[i].rf.GetState() 294 | if is_leader { 295 | return true, i 296 | } 297 | } 298 | return false, 0 299 | } 300 | 301 | // Partition servers into 2 groups and put current leader in minority 302 | func (cfg *config) make_partition() ([]int, []int) { 303 | _, l := cfg.Leader() 304 | p1 := make([]int, cfg.n/2+1) 305 | p2 := make([]int, cfg.n/2) 306 | j := 0 307 | for i := 0; i < cfg.n; i++ { 308 | if i != l { 309 | if j < len(p1) { 310 | p1[j] = i 311 | } else { 312 | p2[j-len(p1)] = i 313 | } 314 | j++ 315 | } 316 | } 317 | p2[len(p2)-1] = l 318 | return p1, p2 319 | } 320 | 321 | func make_config(t *testing.T, n int, unreliable bool) *config { 322 | runtime.GOMAXPROCS(4) 323 | cfg := &config{} 324 | cfg.t = t 325 | cfg.net = labrpc.MakeNetwork() 326 | cfg.n = n 327 | cfg.servers = make([]*ShardMaster, cfg.n) 328 | cfg.saved = make([]*raft.Persister, cfg.n) 329 | cfg.endnames = make([][]string, cfg.n) 330 | cfg.clerks = make(map[*Clerk][]string) 331 | cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid 332 | 333 | // create a full set of KV servers. 334 | for i := 0; i < cfg.n; i++ { 335 | cfg.StartServer(i) 336 | } 337 | 338 | cfg.ConnectAll() 339 | 340 | cfg.net.Reliable(!unreliable) 341 | 342 | return cfg 343 | } 344 | -------------------------------------------------------------------------------- /src/shardmaster/server.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | 4 | import "raft" 5 | import "labrpc" 6 | import "sync" 7 | import ( 8 | "encoding/gob" 9 | "log" 10 | "time" 11 | "runtime/debug" 12 | "os" 13 | ) 14 | 15 | const Debug = 0 16 | 17 | func DPrintf(format string, a ...interface{}) (n int, err error) { 18 | if Debug > 0 { 19 | log.Printf(format, a...) 20 | } 21 | return 22 | } 23 | 24 | func DPrintln(a ...interface{}) { 25 | if Debug > 0 { 26 | log.Println(a...) 27 | } 28 | return 29 | } 30 | 31 | type Op struct { 32 | // Your definitions here. 33 | // Field names must start with capital letters, 34 | // otherwise RPC will break. 35 | OpType string 36 | Args interface{} 37 | } 38 | 39 | type Result struct { 40 | opType string 41 | args interface{} 42 | reply interface{} 43 | } 44 | 45 | type ShardMaster struct { 46 | mu sync.Mutex 47 | me int 48 | rf *raft.Raft 49 | applyCh chan raft.ApplyMsg 50 | 51 | // Your data here. 52 | cfgNum int 53 | ack map[int64]int 54 | messages map[int]chan Result 55 | configs []Config // indexed by config num 56 | } 57 | 58 | func (sm *ShardMaster) Join(args *JoinArgs, reply *JoinReply) { 59 | // Your code here. 60 | index, _, isLeader := sm.rf.Start(Op{OpType: Join, Args: *args}) 61 | if !isLeader { 62 | reply.WrongLeader = true 63 | return 64 | } 65 | 66 | sm.mu.Lock() 67 | if _, ok := sm.messages[index]; !ok { 68 | sm.messages[index] = make(chan Result, 1) 69 | 70 | } 71 | chanMsg := sm.messages[index] 72 | sm.mu.Unlock() 73 | 74 | select { 75 | case msg := <- chanMsg: 76 | if recArgs, ok := msg.args.(JoinArgs); !ok { 77 | reply.WrongLeader = true 78 | } else { 79 | if args.ClientId != recArgs.ClientId || args.RequestId != recArgs.RequestId { 80 | reply.WrongLeader = true 81 | } else { 82 | reply.Err = msg.reply.(JoinReply).Err 83 | reply.WrongLeader = false 84 | //DPrintf("[%d] Apply Join: [%d]", sm.me, args.RequestId) 85 | } 86 | } 87 | case <- time.After(time.Second * 1): 88 | reply.WrongLeader = true 89 | } 90 | } 91 | 92 | func (sm *ShardMaster) Leave(args *LeaveArgs, reply *LeaveReply) { 93 | // Your code here. 94 | index, _, isLeader := sm.rf.Start(Op{OpType: Leave, Args: *args}) 95 | if !isLeader { 96 | reply.WrongLeader = true 97 | return 98 | } 99 | 100 | sm.mu.Lock() 101 | if _, ok := sm.messages[index]; !ok { 102 | sm.messages[index] = make(chan Result, 1) 103 | 104 | } 105 | chanMsg := sm.messages[index] 106 | sm.mu.Unlock() 107 | 108 | select { 109 | case msg := <- chanMsg: 110 | if recArgs, ok := msg.args.(LeaveArgs); !ok { 111 | reply.WrongLeader = true 112 | } else { 113 | if args.ClientId != recArgs.ClientId || args.RequestId != recArgs.RequestId { 114 | reply.WrongLeader = true 115 | } else { 116 | reply.Err = msg.reply.(LeaveReply).Err 117 | reply.WrongLeader = false 118 | //DPrintf("[%d] Apply Leave: [%d]", sm.me, args.RequestId) 119 | } 120 | } 121 | case <- time.After(time.Second * 1): 122 | reply.WrongLeader = true 123 | } 124 | } 125 | 126 | func (sm *ShardMaster) Move(args *MoveArgs, reply *MoveReply) { 127 | // Your code here. 128 | index, _, isLeader := sm.rf.Start(Op{OpType: Move, Args: *args}) 129 | if !isLeader { 130 | reply.WrongLeader = true 131 | return 132 | } 133 | 134 | sm.mu.Lock() 135 | if _, ok := sm.messages[index]; !ok { 136 | sm.messages[index] = make(chan Result, 1) 137 | 138 | } 139 | chanMsg := sm.messages[index] 140 | sm.mu.Unlock() 141 | 142 | select { 143 | case msg := <- chanMsg: 144 | if recArgs, ok := msg.args.(MoveArgs); !ok { 145 | reply.WrongLeader = true 146 | } else { 147 | if args.ClientId != recArgs.ClientId || args.RequestId != recArgs.RequestId { 148 | reply.WrongLeader = true 149 | } else { 150 | reply.Err = msg.reply.(MoveReply).Err 151 | reply.WrongLeader = false 152 | //DPrintf("[%d] Apply Move: [%d]", sm.me, args.RequestId) 153 | } 154 | } 155 | case <- time.After(time.Second * 1): 156 | reply.WrongLeader = true 157 | } 158 | } 159 | 160 | func (sm *ShardMaster) Query(args *QueryArgs, reply *QueryReply) { 161 | // Your code here. 162 | index, _, isLeader := sm.rf.Start(Op{OpType: Query, Args: *args}) 163 | if !isLeader { 164 | reply.WrongLeader = true 165 | return 166 | } 167 | 168 | sm.mu.Lock() 169 | if _, ok := sm.messages[index]; !ok { 170 | sm.messages[index] = make(chan Result, 1) 171 | 172 | } 173 | chanMsg := sm.messages[index] 174 | sm.mu.Unlock() 175 | 176 | select { 177 | case msg := <- chanMsg: 178 | if recArgs, ok := msg.args.(QueryArgs); !ok { 179 | reply.WrongLeader = true 180 | } else { 181 | if args.ClientId != recArgs.ClientId || args.RequestId != recArgs.RequestId { 182 | reply.WrongLeader = true 183 | } else { 184 | //!!! return query value 185 | *reply = msg.reply.(QueryReply) 186 | reply.WrongLeader = false 187 | //DPrintf("[%d] Apply Query: [%d]", sm.me, args.RequestId) 188 | } 189 | } 190 | case <- time.After(time.Second * 1): 191 | reply.WrongLeader = true 192 | } 193 | } 194 | 195 | 196 | // 197 | // the tester calls Kill() when a ShardMaster instance won't 198 | // be needed again. you are not required to do anything 199 | // in Kill(), but it might be convenient to (for example) 200 | // turn off debug output from this instance. 201 | // 202 | func (sm *ShardMaster) Kill() { 203 | sm.rf.Kill() 204 | // Your code here, if desired. 205 | } 206 | 207 | // needed by shardkv tester 208 | func (sm *ShardMaster) Raft() *raft.Raft { 209 | return sm.rf 210 | } 211 | 212 | // 213 | // servers[] contains the ports of the set of 214 | // servers that will cooperate via Paxos to 215 | // form the fault-tolerant shardmaster service. 216 | // me is the index of the current server in servers[]. 217 | // 218 | func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister) *ShardMaster { 219 | sm := new(ShardMaster) 220 | sm.me = me 221 | 222 | sm.configs = make([]Config, 1) 223 | sm.configs[0].Groups = map[int][]string{} 224 | 225 | gob.Register(Op{}) 226 | sm.applyCh = make(chan raft.ApplyMsg) 227 | 228 | // Your code here. 229 | gob.Register(JoinArgs{}) 230 | gob.Register(LeaveArgs{}) 231 | gob.Register(MoveArgs{}) 232 | gob.Register(QueryArgs{}) 233 | gob.Register(JoinReply{}) 234 | gob.Register(LeaveReply{}) 235 | gob.Register(MoveReply{}) 236 | gob.Register(QueryReply{}) 237 | sm.cfgNum = 0 238 | sm.ack = make(map[int64]int) 239 | sm.messages = make(map[int]chan Result, 1) 240 | sm.rf = raft.Make(servers, me, persister, sm.applyCh) 241 | 242 | go sm.Update() 243 | 244 | return sm 245 | } 246 | 247 | // receive command form raft to update database 248 | func (sm *ShardMaster) Update() { 249 | for true { 250 | msg := <- sm.applyCh 251 | request := msg.Command.(Op) 252 | //!!! value and type is a type of variable 253 | var result Result 254 | var clientId int64 255 | var requestId int 256 | switch request.OpType { 257 | case Join: 258 | args := request.Args.(JoinArgs) 259 | clientId = args.ClientId 260 | requestId = args.RequestId 261 | result.args = args 262 | case Leave: 263 | args := request.Args.(LeaveArgs) 264 | clientId = args.ClientId 265 | requestId = args.RequestId 266 | result.args = args 267 | case Move: 268 | args := request.Args.(MoveArgs) 269 | clientId = args.ClientId 270 | requestId = args.RequestId 271 | result.args = args 272 | case Query: 273 | args := request.Args.(QueryArgs) 274 | clientId = args.ClientId 275 | requestId = args.RequestId 276 | result.args = args 277 | } 278 | 279 | result.opType = request.OpType 280 | result.reply = sm.Apply(request, sm.IsDuplicated(clientId, requestId)) 281 | sm.SendResult(msg.Index, result) 282 | sm.CheckValid() 283 | } 284 | } 285 | 286 | //!!! Be careful of what the variable means 287 | func (sm *ShardMaster) CheckValid() { 288 | c := sm.configs[sm.cfgNum] 289 | for _, v := range c.Shards { 290 | //!!! init group is zero 291 | if len(c.Groups) == 0 && v == 0 { 292 | continue 293 | } 294 | if _, ok := c.Groups[v]; !ok { 295 | DPrintln("Check failed that", v, "group does not exit", c.Shards, c.Groups) 296 | debug.PrintStack() 297 | os.Exit(-1) 298 | } 299 | } 300 | } 301 | 302 | func (sm *ShardMaster) GetShardByGid(cfg Config, gid int) int { 303 | for i := range cfg.Shards { 304 | if cfg.Shards[i] == gid { 305 | return i 306 | } 307 | } 308 | return -1 309 | } 310 | func (sm *ShardMaster) Apply(request Op, isDuplicated bool) interface{} { 311 | sm.mu.Lock() 312 | defer sm.mu.Unlock() 313 | switch request.Args.(type) { 314 | case JoinArgs: 315 | var reply JoinReply 316 | if !isDuplicated { 317 | sm.ApplyJoin(request.Args.(JoinArgs)) 318 | DPrintln(sm.me, "apply Join", request.Args.(JoinArgs), "->", sm.configs[sm.cfgNum]) 319 | } 320 | reply.Err = OK 321 | return reply 322 | case LeaveArgs: 323 | var reply LeaveReply 324 | if !isDuplicated { 325 | sm.ApplyLeave(request.Args.(LeaveArgs)) 326 | DPrintln(sm.me, "apply Leave", request.Args.(LeaveArgs), "->", sm.configs[sm.cfgNum]) 327 | } 328 | reply.Err = OK 329 | return reply 330 | case MoveArgs: 331 | var reply MoveReply 332 | if !isDuplicated { 333 | sm.ApplyMove(request.Args.(MoveArgs)) 334 | } 335 | reply.Err = OK 336 | DPrintln(sm.me, "apply Move", request.Args.(MoveArgs), "->", sm.configs[sm.cfgNum]) 337 | return reply 338 | case QueryArgs: 339 | var reply QueryReply 340 | args := request.Args.(QueryArgs) 341 | if args.Num == -1 || args.Num > sm.cfgNum { 342 | reply.Config = sm.configs[sm.cfgNum] 343 | } else { 344 | reply.Config = sm.configs[args.Num] 345 | } 346 | reply.Err = OK 347 | DPrintln(sm.me, "apply Query", request.Args.(QueryArgs), "->", sm.configs[sm.cfgNum]) 348 | return reply 349 | } 350 | return nil 351 | } 352 | 353 | func (sm *ShardMaster) NextConfig() *Config { 354 | var c Config 355 | c.Num = sm.cfgNum + 1 356 | c.Shards = sm.configs[sm.cfgNum].Shards 357 | c.Groups = map[int][]string{} 358 | for k, v := range sm.configs[sm.cfgNum].Groups { 359 | c.Groups[k] = v 360 | } 361 | sm.cfgNum += 1 362 | sm.configs = append(sm.configs, c) 363 | //!!! return reference 364 | return &sm.configs[sm.cfgNum] 365 | } 366 | 367 | func (sm *ShardMaster) GetMaxGidByShards(shardsCount map[int][]int) int { 368 | max := -1 369 | var gid int 370 | for k, v := range shardsCount { 371 | if max < len(v) { 372 | max = len(v) 373 | gid = k 374 | } 375 | } 376 | return gid 377 | } 378 | 379 | func (sm *ShardMaster) GetMinGidByShards(shardsCount map[int][]int) int { 380 | min := -1 381 | var gid int 382 | for k, v := range shardsCount { 383 | if min == -1 || min > len(v) { 384 | min = len(v) 385 | gid = k 386 | } 387 | } 388 | return gid 389 | } 390 | 391 | func (sm *ShardMaster) CountShards(cfg *Config) map[int][]int { 392 | shardsCount := map[int][]int{} 393 | for k := range cfg.Groups { 394 | shardsCount[k] = []int{} 395 | } 396 | for k, v := range cfg.Shards { 397 | shardsCount[v] = append(shardsCount[v], k) 398 | } 399 | return shardsCount 400 | } 401 | 402 | func (sm *ShardMaster) ReBalanceShards(cfg *Config, request string, gid int) { 403 | shardsCount := sm.CountShards(cfg) // gid -> number of shards 404 | switch request { 405 | case Join: 406 | meanNum := NShards / len(cfg.Groups) 407 | for i := 0; i < meanNum; i++ { 408 | maxGid := sm.GetMaxGidByShards(shardsCount) 409 | if len(shardsCount[maxGid]) == 0 { 410 | DPrintf("ReBalanceShards: max gid does not have shards") 411 | debug.PrintStack() 412 | os.Exit(-1) 413 | } 414 | cfg.Shards[shardsCount[maxGid][0]] = gid 415 | shardsCount[maxGid] = shardsCount[maxGid][1:] 416 | } 417 | case Leave: 418 | shardsArray := shardsCount[gid] 419 | delete(shardsCount, gid) 420 | for _, v := range(shardsArray) { 421 | minGid := sm.GetMinGidByShards(shardsCount) 422 | cfg.Shards[v] = minGid 423 | shardsCount[minGid] = append(shardsCount[minGid], v) 424 | } 425 | } 426 | } 427 | 428 | func (sm *ShardMaster) ApplyJoin(args JoinArgs) { 429 | cfg := sm.NextConfig() 430 | //!!! consider whether gid is exist or not 431 | if _, exist := cfg.Groups[args.GID]; !exist { 432 | cfg.Groups[args.GID] = args.Servers 433 | sm.ReBalanceShards(cfg, Join, args.GID) 434 | } 435 | } 436 | 437 | func (sm *ShardMaster) ApplyLeave(args LeaveArgs) { 438 | cfg := sm.NextConfig() 439 | //!!! consider whether gid is exist or not 440 | if _, exist := cfg.Groups[args.GID]; exist { 441 | delete(cfg.Groups, args.GID) 442 | sm.ReBalanceShards(cfg, Leave, args.GID) 443 | } 444 | } 445 | 446 | func (sm *ShardMaster) ApplyMove(args MoveArgs) { 447 | cfg := sm.NextConfig() 448 | cfg.Shards[args.Shard] = args.GID 449 | } 450 | 451 | func (sm *ShardMaster) SendResult(index int, result Result) { 452 | sm.mu.Lock() 453 | defer sm.mu.Unlock() 454 | if _, ok := sm.messages[index]; !ok { 455 | sm.messages[index] = make(chan Result, 1) 456 | } else { 457 | select { 458 | case <- sm.messages[index]: 459 | default: 460 | } 461 | } 462 | sm.messages[index] <- result 463 | } 464 | 465 | func (sm *ShardMaster) IsDuplicated(clientId int64, requestId int) bool { 466 | sm.mu.Lock() 467 | defer sm.mu.Unlock() 468 | if value, ok := sm.ack[clientId]; ok && value >= requestId { 469 | return true 470 | } 471 | sm.ack[clientId] = requestId 472 | return false 473 | } -------------------------------------------------------------------------------- /src/shardmaster/test_test.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | import "testing" 4 | 5 | // import "time" 6 | import "fmt" 7 | 8 | func check(t *testing.T, groups []int, ck *Clerk) { 9 | c := ck.Query(-1) 10 | if len(c.Groups) != len(groups) { 11 | t.Fatalf("wanted %v groups, got %v", len(groups), len(c.Groups)) 12 | } 13 | 14 | // are the groups as expected? 15 | for _, g := range groups { 16 | _, ok := c.Groups[g] 17 | if ok != true { 18 | t.Fatalf("missing group %v", g) 19 | } 20 | } 21 | 22 | // any un-allocated shards? 23 | if len(groups) > 0 { 24 | for s, g := range c.Shards { 25 | _, ok := c.Groups[g] 26 | if ok == false { 27 | t.Fatalf("shard %v -> invalid group %v", s, g) 28 | } 29 | } 30 | } 31 | 32 | // more or less balanced sharding? 33 | counts := map[int]int{} 34 | for _, g := range c.Shards { 35 | counts[g] += 1 36 | } 37 | min := 257 38 | max := 0 39 | for g, _ := range c.Groups { 40 | if counts[g] > max { 41 | max = counts[g] 42 | } 43 | if counts[g] < min { 44 | min = counts[g] 45 | } 46 | } 47 | if max > min+1 { 48 | t.Fatalf("max %v too much larger than min %v", max, min) 49 | } 50 | } 51 | 52 | func check_same_config(t *testing.T, c1 Config, c2 Config) { 53 | if c1.Num != c2.Num { 54 | t.Fatalf("Num wrong") 55 | } 56 | if c1.Shards != c2.Shards { 57 | t.Fatalf("Shards wrong") 58 | } 59 | if len(c1.Groups) != len(c2.Groups) { 60 | t.Fatalf("number of Groups is wrong") 61 | } 62 | for gid, sa := range c1.Groups { 63 | sa1, ok := c2.Groups[gid] 64 | if ok == false || len(sa1) != len(sa) { 65 | t.Fatalf("len(Groups) wrong") 66 | } 67 | if ok && len(sa1) == len(sa) { 68 | for j := 0; j < len(sa); j++ { 69 | if sa[j] != sa1[j] { 70 | t.Fatalf("Groups wrong") 71 | } 72 | } 73 | } 74 | } 75 | } 76 | 77 | func TestBasic(t *testing.T) { 78 | const nservers = 3 79 | cfg := make_config(t, nservers, false) 80 | defer cfg.cleanup() 81 | 82 | ck := cfg.makeClient(cfg.All()) 83 | 84 | fmt.Printf("Test: Basic leave/join ...\n") 85 | 86 | cfa := make([]Config, 6) 87 | cfa[0] = ck.Query(-1) 88 | 89 | check(t, []int{}, ck) 90 | 91 | var gid1 int = 1 92 | ck.Join(gid1, []string{"x", "y", "z"}) 93 | check(t, []int{gid1}, ck) 94 | cfa[1] = ck.Query(-1) 95 | 96 | var gid2 int = 2 97 | ck.Join(gid2, []string{"a", "b", "c"}) 98 | check(t, []int{gid1, gid2}, ck) 99 | cfa[2] = ck.Query(-1) 100 | 101 | ck.Join(gid2, []string{"a", "b", "c"}) 102 | check(t, []int{gid1, gid2}, ck) 103 | cfa[3] = ck.Query(-1) 104 | 105 | cfx := ck.Query(-1) 106 | sa1 := cfx.Groups[gid1] 107 | if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" { 108 | t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1) 109 | } 110 | sa2 := cfx.Groups[gid2] 111 | if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" { 112 | t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2) 113 | } 114 | 115 | ck.Leave(gid1) 116 | check(t, []int{gid2}, ck) 117 | cfa[4] = ck.Query(-1) 118 | 119 | ck.Leave(gid1) 120 | check(t, []int{gid2}, ck) 121 | cfa[5] = ck.Query(-1) 122 | 123 | fmt.Printf(" ... Passed\n") 124 | 125 | fmt.Printf("Test: Historical queries ...\n") 126 | 127 | for s := 0; s < nservers; s++ { 128 | cfg.ShutdownServer(s) 129 | for i := 0; i < len(cfa); i++ { 130 | c := ck.Query(cfa[i].Num) 131 | check_same_config(t, c, cfa[i]) 132 | } 133 | cfg.StartServer(s) 134 | cfg.ConnectAll() 135 | } 136 | 137 | fmt.Printf(" ... Passed\n") 138 | 139 | fmt.Printf("Test: Move ...\n") 140 | { 141 | var gid3 int = 503 142 | ck.Join(gid3, []string{"3a", "3b", "3c"}) 143 | var gid4 int = 504 144 | ck.Join(gid4, []string{"4a", "4b", "4c"}) 145 | for i := 0; i < NShards; i++ { 146 | cf := ck.Query(-1) 147 | if i < NShards/2 { 148 | ck.Move(i, gid3) 149 | if cf.Shards[i] != gid3 { 150 | cf1 := ck.Query(-1) 151 | if cf1.Num <= cf.Num { 152 | t.Fatalf("Move should increase Config.Num") 153 | } 154 | } 155 | } else { 156 | ck.Move(i, gid4) 157 | if cf.Shards[i] != gid4 { 158 | cf1 := ck.Query(-1) 159 | if cf1.Num <= cf.Num { 160 | t.Fatalf("Move should increase Config.Num") 161 | } 162 | } 163 | } 164 | } 165 | cf2 := ck.Query(-1) 166 | for i := 0; i < NShards; i++ { 167 | if i < NShards/2 { 168 | if cf2.Shards[i] != gid3 { 169 | t.Fatalf("expected shard %v on gid %v actually %v", 170 | i, gid3, cf2.Shards[i]) 171 | } 172 | } else { 173 | if cf2.Shards[i] != gid4 { 174 | t.Fatalf("expected shard %v on gid %v actually %v", 175 | i, gid4, cf2.Shards[i]) 176 | } 177 | } 178 | } 179 | ck.Leave(gid3) 180 | ck.Leave(gid4) 181 | } 182 | fmt.Printf(" ... Passed\n") 183 | 184 | fmt.Printf("Test: Concurrent leave/join ...\n") 185 | 186 | const npara = 10 187 | var cka [npara]*Clerk 188 | for i := 0; i < len(cka); i++ { 189 | cka[i] = cfg.makeClient(cfg.All()) 190 | } 191 | gids := make([]int, npara) 192 | ch := make(chan bool) 193 | for xi := 0; xi < npara; xi++ { 194 | gids[xi] = int(xi + 1) 195 | go func(i int) { 196 | defer func() { ch <- true }() 197 | var gid int = gids[i] 198 | cka[i].Join(gid+1000, []string{"a", "b", "c"}) 199 | cka[i].Join(gid, []string{"a", "b", "c"}) 200 | cka[i].Leave(gid + 1000) 201 | }(xi) 202 | } 203 | for i := 0; i < npara; i++ { 204 | <-ch 205 | } 206 | check(t, gids, ck) 207 | 208 | fmt.Printf(" ... Passed\n") 209 | 210 | fmt.Printf("Test: Minimal transfers after joins ...\n") 211 | 212 | c1 := ck.Query(-1) 213 | for i := 0; i < 5; i++ { 214 | ck.Join(int(npara+1+i), []string{"a", "b", "c"}) 215 | } 216 | c2 := ck.Query(-1) 217 | for i := int(1); i <= npara; i++ { 218 | for j := 0; j < len(c1.Shards); j++ { 219 | if c2.Shards[j] == i { 220 | if c1.Shards[j] != i { 221 | t.Fatalf("non-minimal transfer after Join()s") 222 | } 223 | } 224 | } 225 | } 226 | 227 | fmt.Printf(" ... Passed\n") 228 | 229 | fmt.Printf("Test: Minimal transfers after leaves ...\n") 230 | 231 | for i := 0; i < 5; i++ { 232 | ck.Leave(int(npara + 1 + i)) 233 | } 234 | c3 := ck.Query(-1) 235 | for i := int(1); i <= npara; i++ { 236 | for j := 0; j < len(c1.Shards); j++ { 237 | if c2.Shards[j] == i { 238 | if c3.Shards[j] != i { 239 | t.Fatalf("non-minimal transfer after Leave()s") 240 | } 241 | } 242 | } 243 | } 244 | 245 | fmt.Printf(" ... Passed\n") 246 | } 247 | -------------------------------------------------------------------------------- /src/shardmaster2/client.go: -------------------------------------------------------------------------------- 1 | package shardmaster2 2 | 3 | // 4 | // Shardmaster clerk. 5 | // 6 | 7 | import "labrpc" 8 | import "time" 9 | import "crypto/rand" 10 | import "math/big" 11 | 12 | type Clerk struct { 13 | servers []*labrpc.ClientEnd 14 | // Your data here. 15 | } 16 | 17 | func nrand() int64 { 18 | max := big.NewInt(int64(1) << 62) 19 | bigx, _ := rand.Int(rand.Reader, max) 20 | x := bigx.Int64() 21 | return x 22 | } 23 | 24 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk { 25 | ck := new(Clerk) 26 | ck.servers = servers 27 | // Your code here. 28 | return ck 29 | } 30 | 31 | func (ck *Clerk) Query(num int) Config { 32 | args := &QueryArgs{} 33 | // Your code here. 34 | args.Num = num 35 | for { 36 | // try each known server. 37 | for _, srv := range ck.servers { 38 | var reply QueryReply 39 | ok := srv.Call("ShardMaster.Query", args, &reply) 40 | if ok && reply.WrongLeader == false { 41 | return reply.Config 42 | } 43 | } 44 | time.Sleep(100 * time.Millisecond) 45 | } 46 | } 47 | 48 | func (ck *Clerk) Join(servers map[int][]string) { 49 | args := &JoinArgs{} 50 | // Your code here. 51 | 52 | args.GID = gid 53 | ck.mu.Lock() 54 | ck.requestId++ 55 | args.ClientId = ck.clientId 56 | args.RequestId = ck.requestId 57 | ck.mu.Unlock() 58 | 59 | for { 60 | // try each known server. 61 | for _, srv := range ck.servers { 62 | var reply JoinReply 63 | ok := srv.Call("ShardMaster.Join", args, &reply) 64 | if ok && reply.WrongLeader == false { 65 | return 66 | } 67 | } 68 | time.Sleep(100 * time.Millisecond) 69 | } 70 | 71 | 72 | args.Servers = servers 73 | 74 | for { 75 | // try each known server. 76 | for _, srv := range ck.servers { 77 | var reply JoinReply 78 | ok := srv.Call("ShardMaster.Join", args, &reply) 79 | if ok && reply.WrongLeader == false { 80 | return 81 | } 82 | } 83 | time.Sleep(100 * time.Millisecond) 84 | } 85 | } 86 | 87 | func (ck *Clerk) Leave(gids []int) { 88 | args := &LeaveArgs{} 89 | // Your code here. 90 | args.GIDs = gids 91 | 92 | for { 93 | // try each known server. 94 | for _, srv := range ck.servers { 95 | var reply LeaveReply 96 | ok := srv.Call("ShardMaster.Leave", args, &reply) 97 | if ok && reply.WrongLeader == false { 98 | return 99 | } 100 | } 101 | time.Sleep(100 * time.Millisecond) 102 | } 103 | } 104 | 105 | func (ck *Clerk) Move(shard int, gid int) { 106 | args := &MoveArgs{} 107 | // Your code here. 108 | args.Shard = shard 109 | args.GID = gid 110 | 111 | for { 112 | // try each known server. 113 | for _, srv := range ck.servers { 114 | var reply MoveReply 115 | ok := srv.Call("ShardMaster.Move", args, &reply) 116 | if ok && reply.WrongLeader == false { 117 | return 118 | } 119 | } 120 | time.Sleep(100 * time.Millisecond) 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /src/shardmaster2/common.go: -------------------------------------------------------------------------------- 1 | package shardmaster2 2 | 3 | // 4 | // Master shard server: assigns shards to replication groups. 5 | // 6 | // RPC interface: 7 | // Join(servers) -- add a set of groups (gid -> server-list mapping). 8 | // Leave(gids) -- delete a set of groups. 9 | // Move(shard, gid) -- hand off one shard from current owner to gid. 10 | // Query(num) -> fetch Config # num, or latest config if num==-1. 11 | // 12 | // A Config (configuration) describes a set of replica groups, and the 13 | // replica group responsible for each shard. Configs are numbered. Config 14 | // #0 is the initial configuration, with no groups and all shards 15 | // assigned to group 0 (the invalid group). 16 | // 17 | // A GID is a replica group ID. GIDs must be uniqe and > 0. 18 | // Once a GID joins, and leaves, it should never join again. 19 | // 20 | // You will need to add fields to the RPC arguments. 21 | // 22 | 23 | // The number of shards. 24 | const NShards = 10 25 | 26 | // A configuration -- an assignment of shards to groups. 27 | // Please don't change this. 28 | type Config struct { 29 | Num int // config number 30 | Shards [NShards]int // shard -> gid 31 | Groups map[int][]string // gid -> servers[] 32 | } 33 | 34 | const ( 35 | OK = "OK" 36 | ) 37 | 38 | const ( 39 | Join = "Join" 40 | Leave = "Leave" 41 | Move = "Move" 42 | Query = "Query" 43 | ) 44 | 45 | 46 | 47 | type Err string 48 | 49 | type JoinArgs struct { 50 | Servers map[int][]string // new GID -> servers mappings 51 | ClientId int64 52 | RequestId int 53 | GID int // unique replica group ID 54 | } 55 | 56 | type JoinReply struct { 57 | WrongLeader bool 58 | Err Err 59 | } 60 | 61 | type LeaveArgs struct { 62 | GID int 63 | ClientId int64 64 | RequestId int 65 | } 66 | 67 | type LeaveReply struct { 68 | WrongLeader bool 69 | Err Err 70 | } 71 | 72 | type MoveArgs struct { 73 | Shard int 74 | GID int 75 | ClientId int64 76 | RequestId int 77 | } 78 | 79 | type MoveReply struct { 80 | WrongLeader bool 81 | Err Err 82 | } 83 | 84 | type QueryArgs struct { 85 | Num int // desired config number 86 | ClientId int64 87 | RequestId int 88 | } 89 | 90 | type QueryReply struct { 91 | WrongLeader bool 92 | Err Err 93 | Config Config 94 | } 95 | 96 | 97 | type Result struct { 98 | opType string 99 | args interface{} 100 | reply interface{} 101 | } 102 | -------------------------------------------------------------------------------- /src/shardmaster2/config.go: -------------------------------------------------------------------------------- 1 | package shardmaster2 2 | 3 | import "labrpc" 4 | import "raft" 5 | import "testing" 6 | import "os" 7 | 8 | // import "log" 9 | import crand "crypto/rand" 10 | import "math/rand" 11 | import "encoding/base64" 12 | import "sync" 13 | import "runtime" 14 | 15 | func randstring(n int) string { 16 | b := make([]byte, 2*n) 17 | crand.Read(b) 18 | s := base64.URLEncoding.EncodeToString(b) 19 | return s[0:n] 20 | } 21 | 22 | // Randomize server handles 23 | func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd { 24 | sa := make([]*labrpc.ClientEnd, len(kvh)) 25 | copy(sa, kvh) 26 | for i := range sa { 27 | j := rand.Intn(i + 1) 28 | sa[i], sa[j] = sa[j], sa[i] 29 | } 30 | return sa 31 | } 32 | 33 | type config struct { 34 | mu sync.Mutex 35 | t *testing.T 36 | net *labrpc.Network 37 | n int 38 | servers []*ShardMaster 39 | saved []*raft.Persister 40 | endnames [][]string // names of each server's sending ClientEnds 41 | clerks map[*Clerk][]string 42 | nextClientId int 43 | } 44 | 45 | func (cfg *config) cleanup() { 46 | cfg.mu.Lock() 47 | defer cfg.mu.Unlock() 48 | for i := 0; i < len(cfg.servers); i++ { 49 | if cfg.servers[i] != nil { 50 | cfg.servers[i].Kill() 51 | } 52 | } 53 | } 54 | 55 | // Maximum log size across all servers 56 | func (cfg *config) LogSize() int { 57 | logsize := 0 58 | for i := 0; i < cfg.n; i++ { 59 | n := cfg.saved[i].RaftStateSize() 60 | if n > logsize { 61 | logsize = n 62 | } 63 | } 64 | return logsize 65 | } 66 | 67 | // attach server i to servers listed in to 68 | // caller must hold cfg.mu 69 | func (cfg *config) connectUnlocked(i int, to []int) { 70 | // log.Printf("connect peer %d to %v\n", i, to) 71 | 72 | // outgoing socket files 73 | for j := 0; j < len(to); j++ { 74 | endname := cfg.endnames[i][to[j]] 75 | cfg.net.Enable(endname, true) 76 | } 77 | 78 | // incoming socket files 79 | for j := 0; j < len(to); j++ { 80 | endname := cfg.endnames[to[j]][i] 81 | cfg.net.Enable(endname, true) 82 | } 83 | } 84 | 85 | func (cfg *config) connect(i int, to []int) { 86 | cfg.mu.Lock() 87 | defer cfg.mu.Unlock() 88 | cfg.connectUnlocked(i, to) 89 | } 90 | 91 | // detach server i from the servers listed in from 92 | // caller must hold cfg.mu 93 | func (cfg *config) disconnectUnlocked(i int, from []int) { 94 | // log.Printf("disconnect peer %d from %v\n", i, from) 95 | 96 | // outgoing socket files 97 | for j := 0; j < len(from); j++ { 98 | if cfg.endnames[i] != nil { 99 | endname := cfg.endnames[i][from[j]] 100 | cfg.net.Enable(endname, false) 101 | } 102 | } 103 | 104 | // incoming socket files 105 | for j := 0; j < len(from); j++ { 106 | if cfg.endnames[j] != nil { 107 | endname := cfg.endnames[from[j]][i] 108 | cfg.net.Enable(endname, false) 109 | } 110 | } 111 | } 112 | 113 | func (cfg *config) disconnect(i int, from []int) { 114 | cfg.mu.Lock() 115 | defer cfg.mu.Unlock() 116 | cfg.disconnectUnlocked(i, from) 117 | } 118 | 119 | func (cfg *config) All() []int { 120 | all := make([]int, cfg.n) 121 | for i := 0; i < cfg.n; i++ { 122 | all[i] = i 123 | } 124 | return all 125 | } 126 | 127 | func (cfg *config) ConnectAll() { 128 | cfg.mu.Lock() 129 | defer cfg.mu.Unlock() 130 | for i := 0; i < cfg.n; i++ { 131 | cfg.connectUnlocked(i, cfg.All()) 132 | } 133 | } 134 | 135 | // Sets up 2 partitions with connectivity between servers in each partition. 136 | func (cfg *config) partition(p1 []int, p2 []int) { 137 | cfg.mu.Lock() 138 | defer cfg.mu.Unlock() 139 | // log.Printf("partition servers into: %v %v\n", p1, p2) 140 | for i := 0; i < len(p1); i++ { 141 | cfg.disconnectUnlocked(p1[i], p2) 142 | cfg.connectUnlocked(p1[i], p1) 143 | } 144 | for i := 0; i < len(p2); i++ { 145 | cfg.disconnectUnlocked(p2[i], p1) 146 | cfg.connectUnlocked(p2[i], p2) 147 | } 148 | } 149 | 150 | // Create a clerk with clerk specific server names. 151 | // Give it connections to all of the servers, but for 152 | // now enable only connections to servers in to[]. 153 | func (cfg *config) makeClient(to []int) *Clerk { 154 | cfg.mu.Lock() 155 | defer cfg.mu.Unlock() 156 | 157 | // a fresh set of ClientEnds. 158 | ends := make([]*labrpc.ClientEnd, cfg.n) 159 | endnames := make([]string, cfg.n) 160 | for j := 0; j < cfg.n; j++ { 161 | endnames[j] = randstring(20) 162 | ends[j] = cfg.net.MakeEnd(endnames[j]) 163 | cfg.net.Connect(endnames[j], j) 164 | } 165 | 166 | ck := MakeClerk(random_handles(ends)) 167 | cfg.clerks[ck] = endnames 168 | cfg.nextClientId++ 169 | cfg.ConnectClientUnlocked(ck, to) 170 | return ck 171 | } 172 | 173 | func (cfg *config) deleteClient(ck *Clerk) { 174 | cfg.mu.Lock() 175 | defer cfg.mu.Unlock() 176 | 177 | v := cfg.clerks[ck] 178 | for i := 0; i < len(v); i++ { 179 | os.Remove(v[i]) 180 | } 181 | delete(cfg.clerks, ck) 182 | } 183 | 184 | // caller should hold cfg.mu 185 | func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) { 186 | // log.Printf("ConnectClient %v to %v\n", ck, to) 187 | endnames := cfg.clerks[ck] 188 | for j := 0; j < len(to); j++ { 189 | s := endnames[to[j]] 190 | cfg.net.Enable(s, true) 191 | } 192 | } 193 | 194 | func (cfg *config) ConnectClient(ck *Clerk, to []int) { 195 | cfg.mu.Lock() 196 | defer cfg.mu.Unlock() 197 | cfg.ConnectClientUnlocked(ck, to) 198 | } 199 | 200 | // caller should hold cfg.mu 201 | func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) { 202 | // log.Printf("DisconnectClient %v from %v\n", ck, from) 203 | endnames := cfg.clerks[ck] 204 | for j := 0; j < len(from); j++ { 205 | s := endnames[from[j]] 206 | cfg.net.Enable(s, false) 207 | } 208 | } 209 | 210 | func (cfg *config) DisconnectClient(ck *Clerk, from []int) { 211 | cfg.mu.Lock() 212 | defer cfg.mu.Unlock() 213 | cfg.DisconnectClientUnlocked(ck, from) 214 | } 215 | 216 | // Shutdown a server by isolating it 217 | func (cfg *config) ShutdownServer(i int) { 218 | cfg.mu.Lock() 219 | defer cfg.mu.Unlock() 220 | 221 | cfg.disconnectUnlocked(i, cfg.All()) 222 | 223 | // disable client connections to the server. 224 | // it's important to do this before creating 225 | // the new Persister in saved[i], to avoid 226 | // the possibility of the server returning a 227 | // positive reply to an Append but persisting 228 | // the result in the superseded Persister. 229 | cfg.net.DeleteServer(i) 230 | 231 | // a fresh persister, in case old instance 232 | // continues to update the Persister. 233 | // but copy old persister's content so that we always 234 | // pass Make() the last persisted state. 235 | if cfg.saved[i] != nil { 236 | cfg.saved[i] = cfg.saved[i].Copy() 237 | } 238 | 239 | kv := cfg.servers[i] 240 | if kv != nil { 241 | cfg.mu.Unlock() 242 | kv.Kill() 243 | cfg.mu.Lock() 244 | cfg.servers[i] = nil 245 | } 246 | } 247 | 248 | // If restart servers, first call ShutdownServer 249 | func (cfg *config) StartServer(i int) { 250 | cfg.mu.Lock() 251 | 252 | // a fresh set of outgoing ClientEnd names. 253 | cfg.endnames[i] = make([]string, cfg.n) 254 | for j := 0; j < cfg.n; j++ { 255 | cfg.endnames[i][j] = randstring(20) 256 | } 257 | 258 | // a fresh set of ClientEnds. 259 | ends := make([]*labrpc.ClientEnd, cfg.n) 260 | for j := 0; j < cfg.n; j++ { 261 | ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j]) 262 | cfg.net.Connect(cfg.endnames[i][j], j) 263 | } 264 | 265 | // a fresh persister, so old instance doesn't overwrite 266 | // new instance's persisted state. 267 | // give the fresh persister a copy of the old persister's 268 | // state, so that the spec is that we pass StartKVServer() 269 | // the last persisted state. 270 | if cfg.saved[i] != nil { 271 | cfg.saved[i] = cfg.saved[i].Copy() 272 | } else { 273 | cfg.saved[i] = raft.MakePersister() 274 | } 275 | 276 | cfg.mu.Unlock() 277 | 278 | cfg.servers[i] = StartServer(ends, i, cfg.saved[i]) 279 | 280 | kvsvc := labrpc.MakeService(cfg.servers[i]) 281 | rfsvc := labrpc.MakeService(cfg.servers[i].rf) 282 | srv := labrpc.MakeServer() 283 | srv.AddService(kvsvc) 284 | srv.AddService(rfsvc) 285 | cfg.net.AddServer(i, srv) 286 | } 287 | 288 | func (cfg *config) Leader() (bool, int) { 289 | cfg.mu.Lock() 290 | defer cfg.mu.Unlock() 291 | 292 | for i := 0; i < cfg.n; i++ { 293 | _, is_leader := cfg.servers[i].rf.GetState() 294 | if is_leader { 295 | return true, i 296 | } 297 | } 298 | return false, 0 299 | } 300 | 301 | // Partition servers into 2 groups and put current leader in minority 302 | func (cfg *config) make_partition() ([]int, []int) { 303 | _, l := cfg.Leader() 304 | p1 := make([]int, cfg.n/2+1) 305 | p2 := make([]int, cfg.n/2) 306 | j := 0 307 | for i := 0; i < cfg.n; i++ { 308 | if i != l { 309 | if j < len(p1) { 310 | p1[j] = i 311 | } else { 312 | p2[j-len(p1)] = i 313 | } 314 | j++ 315 | } 316 | } 317 | p2[len(p2)-1] = l 318 | return p1, p2 319 | } 320 | 321 | func make_config(t *testing.T, n int, unreliable bool) *config { 322 | runtime.GOMAXPROCS(4) 323 | cfg := &config{} 324 | cfg.t = t 325 | cfg.net = labrpc.MakeNetwork() 326 | cfg.n = n 327 | cfg.servers = make([]*ShardMaster, cfg.n) 328 | cfg.saved = make([]*raft.Persister, cfg.n) 329 | cfg.endnames = make([][]string, cfg.n) 330 | cfg.clerks = make(map[*Clerk][]string) 331 | cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid 332 | 333 | // create a full set of KV servers. 334 | for i := 0; i < cfg.n; i++ { 335 | cfg.StartServer(i) 336 | } 337 | 338 | cfg.ConnectAll() 339 | 340 | cfg.net.Reliable(!unreliable) 341 | 342 | return cfg 343 | } 344 | -------------------------------------------------------------------------------- /src/shardmaster2/server.go: -------------------------------------------------------------------------------- 1 | package shardmaster2 2 | 3 | 4 | import "raft" 5 | import "labrpc" 6 | import "sync" 7 | import ( 8 | "encoding/gob" 9 | "time" 10 | "runtime/debug" 11 | "os" 12 | "log" 13 | ) 14 | 15 | 16 | type ShardMaster struct { 17 | mu sync.Mutex 18 | me int 19 | rf *raft.Raft 20 | applyCh chan raft.ApplyMsg 21 | 22 | // Your data here. 23 | cfgNum int 24 | ack map[int64]int 25 | messages map[int]chan Result 26 | 27 | configs []Config // indexed by config num 28 | } 29 | 30 | 31 | type Op struct { 32 | // Your data here. 33 | OpType string 34 | Args interface{} 35 | } 36 | 37 | 38 | 39 | 40 | 41 | func (sm *ShardMaster) Join(args *JoinArgs, reply *JoinReply) { 42 | // Your code here. 43 | 44 | index, _, isLeader := sm.rf.Start(Op{OpType:Join,Args:args}) 45 | if !isLeader { 46 | reply.WrongLeader = true 47 | return 48 | } 49 | sm.mu.Lock() 50 | if _,ok:=sm.messages[index];!ok{ 51 | sm.messages[index] = make(chan Result,1) 52 | } 53 | msg := sm.messages[index] 54 | sm.mu.Unlock() 55 | 56 | 57 | select { 58 | case msg := <- msg: 59 | if recArgs, ok := msg.args.(JoinArgs); !ok { 60 | reply.WrongLeader = true 61 | } else { 62 | if args.ClientId != recArgs.ClientId || args.RequestId != recArgs.RequestId { 63 | reply.WrongLeader = true 64 | } else { 65 | reply.Err = msg.reply.(JoinReply).Err 66 | reply.WrongLeader = false 67 | //DPrintf("[%d] Apply Join: [%d]", sm.me, args.RequestId) 68 | } 69 | } 70 | case <- time.After(time.Second * 1): 71 | reply.WrongLeader = true 72 | } 73 | 74 | } 75 | 76 | func (sm *ShardMaster) Leave(args *LeaveArgs, reply *LeaveReply) { 77 | // Your code here. 78 | index, _, isLeader := sm.rf.Start(Op{OpType: Leave, Args: *args}) 79 | if !isLeader { 80 | reply.WrongLeader = true 81 | return 82 | } 83 | 84 | sm.mu.Lock() 85 | if _, ok := sm.messages[index]; !ok { 86 | sm.messages[index] = make(chan Result, 1) 87 | 88 | } 89 | msg := sm.messages[index] 90 | sm.mu.Unlock() 91 | 92 | select { 93 | case msg := <- msg: 94 | if recArgs, ok := msg.args.(LeaveArgs); !ok { 95 | reply.WrongLeader = true 96 | } else { 97 | if args.ClientId != recArgs.ClientId || args.RequestId != recArgs.RequestId { 98 | reply.WrongLeader = true 99 | } else { 100 | reply.Err = msg.reply.(LeaveReply).Err 101 | reply.WrongLeader = false 102 | //DPrintf("[%d] Apply Leave: [%d]", sm.me, args.RequestId) 103 | } 104 | } 105 | case <- time.After(time.Second * 1): 106 | reply.WrongLeader = true 107 | } 108 | } 109 | 110 | func (sm *ShardMaster) Move(args *MoveArgs, reply *MoveReply) { 111 | // Your code here. 112 | index, _, isLeader := sm.rf.Start(Op{OpType: Move, Args: *args}) 113 | if !isLeader { 114 | reply.WrongLeader = true 115 | return 116 | } 117 | 118 | sm.mu.Lock() 119 | if _, ok := sm.messages[index]; !ok { 120 | sm.messages[index] = make(chan Result, 1) 121 | 122 | } 123 | chanMsg := sm.messages[index] 124 | sm.mu.Unlock() 125 | 126 | select { 127 | case msg := <- chanMsg: 128 | if recArgs, ok := msg.args.(MoveArgs); !ok { 129 | reply.WrongLeader = true 130 | } else { 131 | if args.ClientId != recArgs.ClientId || args.RequestId != recArgs.RequestId { 132 | reply.WrongLeader = true 133 | } else { 134 | reply.Err = msg.reply.(MoveReply).Err 135 | reply.WrongLeader = false 136 | //DPrintf("[%d] Apply Move: [%d]", sm.me, args.RequestId) 137 | } 138 | } 139 | case <- time.After(time.Second * 1): 140 | reply.WrongLeader = true 141 | } 142 | } 143 | 144 | func (sm *ShardMaster) Query(args *QueryArgs, reply *QueryReply) { 145 | // Your code here. 146 | index, _, isLeader := sm.rf.Start(Op{OpType: Query, Args: *args}) 147 | if !isLeader { 148 | reply.WrongLeader = true 149 | return 150 | } 151 | 152 | sm.mu.Lock() 153 | if _, ok := sm.messages[index]; !ok { 154 | sm.messages[index] = make(chan Result, 1) 155 | 156 | } 157 | chanMsg := sm.messages[index] 158 | sm.mu.Unlock() 159 | 160 | select { 161 | case msg := <- chanMsg: 162 | if recArgs, ok := msg.args.(QueryArgs); !ok { 163 | reply.WrongLeader = true 164 | } else { 165 | if args.ClientId != recArgs.ClientId || args.RequestId != recArgs.RequestId { 166 | reply.WrongLeader = true 167 | } else { 168 | //!!! return query value 169 | *reply = msg.reply.(QueryReply) 170 | reply.WrongLeader = false 171 | //DPrintf("[%d] Apply Query: [%d]", sm.me, args.RequestId) 172 | } 173 | } 174 | case <- time.After(time.Second * 1): 175 | reply.WrongLeader = true 176 | } 177 | } 178 | 179 | 180 | // 181 | // the tester calls Kill() when a ShardMaster instance won't 182 | // be needed again. you are not required to do anything 183 | // in Kill(), but it might be convenient to (for example) 184 | // turn off debug output from this instance. 185 | // 186 | func (sm *ShardMaster) Kill() { 187 | sm.rf.Kill() 188 | // Your code here, if desired. 189 | } 190 | 191 | // needed by shardkv tester 192 | func (sm *ShardMaster) Raft() *raft.Raft { 193 | return sm.rf 194 | } 195 | 196 | // 197 | // servers[] contains the ports of the set of 198 | // servers that will cooperate via Paxos to 199 | // form the fault-tolerant shardmaster service. 200 | // me is the index of the current server in servers[]. 201 | // 202 | func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister) *ShardMaster { 203 | sm := new(ShardMaster) 204 | sm.me = me 205 | 206 | sm.configs = make([]Config, 1) 207 | sm.configs[0].Groups = map[int][]string{} 208 | 209 | gob.Register(Op{}) 210 | sm.applyCh = make(chan raft.ApplyMsg) 211 | sm.rf = raft.Make(servers, me, persister, sm.applyCh) 212 | 213 | // Your code here. 214 | 215 | gob.Register(Op{}) 216 | sm.applyCh = make(chan raft.ApplyMsg) 217 | 218 | // Your code here. 219 | gob.Register(JoinArgs{}) 220 | gob.Register(LeaveArgs{}) 221 | gob.Register(MoveArgs{}) 222 | gob.Register(QueryArgs{}) 223 | gob.Register(JoinReply{}) 224 | gob.Register(LeaveReply{}) 225 | gob.Register(MoveReply{}) 226 | gob.Register(QueryReply{}) 227 | sm.cfgNum = 0 228 | sm.ack = make(map[int64]int) 229 | sm.messages = make(map[int]chan Result, 1) 230 | sm.rf = raft.Make(servers, me, persister, sm.applyCh) 231 | 232 | go sm.Update() 233 | 234 | return sm 235 | } 236 | 237 | 238 | 239 | func (sm *ShardMaster) ReBalanceShards(cfg *Config, request string, gid int) { 240 | shardsCount := sm.CountShards(cfg) // gid -> number of shards 241 | switch request { 242 | case Join: 243 | meanNum := NShards / len(cfg.Groups) 244 | for i := 0; i < meanNum; i++ { 245 | maxGid := sm.GetMaxGidByShards(shardsCount) 246 | if len(shardsCount[maxGid]) == 0 { 247 | DPrintf("ReBalanceShards: max gid does not have shards") 248 | debug.PrintStack() 249 | os.Exit(-1) 250 | } 251 | cfg.Shards[shardsCount[maxGid][0]] = gid 252 | shardsCount[maxGid] = shardsCount[maxGid][1:] 253 | } 254 | case Leave: 255 | shardsArray := shardsCount[gid] 256 | delete(shardsCount, gid) 257 | for _, v := range(shardsArray) { 258 | minGid := sm.GetMinGidByShards(shardsCount) 259 | cfg.Shards[v] = minGid 260 | shardsCount[minGid] = append(shardsCount[minGid], v) 261 | } 262 | } 263 | } 264 | 265 | func (sm *ShardMaster) GetMaxGidByShards(shardsCount map[int][]int) int { 266 | max := -1 267 | var gid int 268 | for k, v := range shardsCount { 269 | if max < len(v) { 270 | max = len(v) 271 | gid = k 272 | } 273 | } 274 | return gid 275 | } 276 | 277 | func (sm *ShardMaster) GetMinGidByShards(shardsCount map[int][]int) int { 278 | min := -1 279 | var gid int 280 | for k, v := range shardsCount { 281 | if min == -1 || min > len(v) { 282 | min = len(v) 283 | gid = k 284 | } 285 | } 286 | return gid 287 | } 288 | 289 | func (sm *ShardMaster) CountShards(cfg *Config) map[int][]int { 290 | shardsCount := map[int][]int{} 291 | for k := range cfg.Groups { 292 | shardsCount[k] = []int{} 293 | } 294 | for k, v := range cfg.Shards { 295 | shardsCount[v] = append(shardsCount[v], k) 296 | } 297 | return shardsCount 298 | } 299 | 300 | // receive command form raft to update database 301 | func (sm *ShardMaster) Update() { 302 | for true { 303 | msg := <- sm.applyCh 304 | request := msg.Command.(Op) 305 | //!!! value and type is a type of variable 306 | var result Result 307 | var clientId int64 308 | var requestId int 309 | switch request.OpType { 310 | case Join: 311 | args := request.Args.(JoinArgs) 312 | clientId = args.ClientId 313 | requestId = args.RequestId 314 | result.args = args 315 | case Leave: 316 | args := request.Args.(LeaveArgs) 317 | clientId = args.ClientId 318 | requestId = args.RequestId 319 | result.args = args 320 | case Move: 321 | args := request.Args.(MoveArgs) 322 | clientId = args.ClientId 323 | requestId = args.RequestId 324 | result.args = args 325 | case Query: 326 | args := request.Args.(QueryArgs) 327 | clientId = args.ClientId 328 | requestId = args.RequestId 329 | result.args = args 330 | } 331 | 332 | result.opType = request.OpType 333 | result.reply = sm.Apply(request, sm.IsDuplicated(clientId, requestId)) 334 | sm.SendResult(msg.Index, result) 335 | sm.CheckValid() 336 | } 337 | } 338 | 339 | func (sm *ShardMaster) Apply(request Op, isDuplicated bool) interface{} { 340 | sm.mu.Lock() 341 | defer sm.mu.Unlock() 342 | switch request.Args.(type) { 343 | case JoinArgs: 344 | var reply JoinReply 345 | if !isDuplicated { 346 | sm.ApplyJoin(request.Args.(JoinArgs)) 347 | DPrintln(sm.me, "apply Join", request.Args.(JoinArgs), "->", sm.configs[sm.cfgNum]) 348 | } 349 | reply.Err = OK 350 | return reply 351 | case LeaveArgs: 352 | var reply LeaveReply 353 | if !isDuplicated { 354 | sm.ApplyLeave(request.Args.(LeaveArgs)) 355 | DPrintln(sm.me, "apply Leave", request.Args.(LeaveArgs), "->", sm.configs[sm.cfgNum]) 356 | } 357 | reply.Err = OK 358 | return reply 359 | case MoveArgs: 360 | var reply MoveReply 361 | if !isDuplicated { 362 | sm.ApplyMove(request.Args.(MoveArgs)) 363 | } 364 | reply.Err = OK 365 | DPrintln(sm.me, "apply Move", request.Args.(MoveArgs), "->", sm.configs[sm.cfgNum]) 366 | return reply 367 | case QueryArgs: 368 | var reply QueryReply 369 | args := request.Args.(QueryArgs) 370 | if args.Num == -1 || args.Num > sm.cfgNum { 371 | reply.Config = sm.configs[sm.cfgNum] 372 | } else { 373 | reply.Config = sm.configs[args.Num] 374 | } 375 | reply.Err = OK 376 | DPrintln(sm.me, "apply Query", request.Args.(QueryArgs), "->", sm.configs[sm.cfgNum]) 377 | return reply 378 | } 379 | return nil 380 | } 381 | 382 | func (sm *ShardMaster) IsDuplicated(clientId int64, requestId int) bool { 383 | sm.mu.Lock() 384 | defer sm.mu.Unlock() 385 | if value, ok := sm.ack[clientId]; ok && value >= requestId { 386 | return true 387 | } 388 | sm.ack[clientId] = requestId 389 | return false 390 | } 391 | 392 | func (sm *ShardMaster) SendResult(index int, result Result) { 393 | sm.mu.Lock() 394 | defer sm.mu.Unlock() 395 | if _, ok := sm.messages[index]; !ok { 396 | sm.messages[index] = make(chan Result, 1) 397 | } else { 398 | select { 399 | case <- sm.messages[index]: 400 | default: 401 | } 402 | } 403 | sm.messages[index] <- result 404 | } 405 | 406 | //!!! Be careful of what the variable means 407 | func (sm *ShardMaster) CheckValid() { 408 | c := sm.configs[sm.cfgNum] 409 | for _, v := range c.Shards { 410 | //!!! init group is zero 411 | if len(c.Groups) == 0 && v == 0 { 412 | continue 413 | } 414 | if _, ok := c.Groups[v]; !ok { 415 | DPrintln("Check failed that", v, "group does not exit", c.Shards, c.Groups) 416 | debug.PrintStack() 417 | os.Exit(-1) 418 | } 419 | } 420 | } 421 | 422 | 423 | func (sm *ShardMaster) NextConfig() *Config { 424 | var c Config 425 | c.Num = sm.cfgNum + 1 426 | c.Shards = sm.configs[sm.cfgNum].Shards 427 | c.Groups = map[int][]string{} 428 | for k, v := range sm.configs[sm.cfgNum].Groups { 429 | c.Groups[k] = v 430 | } 431 | sm.cfgNum += 1 432 | sm.configs = append(sm.configs, c) 433 | //!!! return reference 434 | return &sm.configs[sm.cfgNum] 435 | } 436 | 437 | func (sm *ShardMaster) ApplyJoin(args JoinArgs) { 438 | cfg := sm.NextConfig() 439 | //!!! consider whether gid is exist or not 440 | if _, exist := cfg.Groups[args.GID]; !exist { 441 | //cfg.Groups[args.GID] = args.Servers 442 | cfg.Groups = args.Servers 443 | sm.ReBalanceShards(cfg, Join, args.GID) 444 | } 445 | } 446 | 447 | func (sm *ShardMaster) ApplyLeave(args LeaveArgs) { 448 | cfg := sm.NextConfig() 449 | //!!! consider whether gid is exist or not 450 | if _, exist := cfg.Groups[args.GID]; exist { 451 | delete(cfg.Groups, args.GID) 452 | sm.ReBalanceShards(cfg, Leave, args.GID) 453 | } 454 | } 455 | 456 | func (sm *ShardMaster) ApplyMove(args MoveArgs) { 457 | cfg := sm.NextConfig() 458 | cfg.Shards[args.Shard] = args.GID 459 | } 460 | 461 | const Debug = 0 462 | func DPrintf(format string, a ...interface{}) (n int, err error) { 463 | if Debug > 0 { 464 | log.Printf(format, a...) 465 | } 466 | return 467 | } 468 | 469 | func DPrintln(a ...interface{}) { 470 | if Debug > 0 { 471 | log.Println(a...) 472 | } 473 | return 474 | } 475 | -------------------------------------------------------------------------------- /src/shardmaster2/test_test.go: -------------------------------------------------------------------------------- 1 | package shardmaster2 2 | 3 | import ( 4 | "sync" 5 | "testing" 6 | ) 7 | 8 | // import "time" 9 | import "fmt" 10 | 11 | func check(t *testing.T, groups []int, ck *Clerk) { 12 | c := ck.Query(-1) 13 | if len(c.Groups) != len(groups) { 14 | t.Fatalf("wanted %v groups, got %v", len(groups), len(c.Groups)) 15 | } 16 | 17 | // are the groups as expected? 18 | for _, g := range groups { 19 | _, ok := c.Groups[g] 20 | if ok != true { 21 | t.Fatalf("missing group %v", g) 22 | } 23 | } 24 | 25 | // any un-allocated shards? 26 | if len(groups) > 0 { 27 | for s, g := range c.Shards { 28 | _, ok := c.Groups[g] 29 | if ok == false { 30 | t.Fatalf("shard %v -> invalid group %v", s, g) 31 | } 32 | } 33 | } 34 | 35 | // more or less balanced sharding? 36 | counts := map[int]int{} 37 | for _, g := range c.Shards { 38 | counts[g] += 1 39 | } 40 | min := 257 41 | max := 0 42 | for g, _ := range c.Groups { 43 | if counts[g] > max { 44 | max = counts[g] 45 | } 46 | if counts[g] < min { 47 | min = counts[g] 48 | } 49 | } 50 | if max > min+1 { 51 | t.Fatalf("max %v too much larger than min %v", max, min) 52 | } 53 | } 54 | 55 | func check_same_config(t *testing.T, c1 Config, c2 Config) { 56 | if c1.Num != c2.Num { 57 | t.Fatalf("Num wrong") 58 | } 59 | if c1.Shards != c2.Shards { 60 | t.Fatalf("Shards wrong") 61 | } 62 | if len(c1.Groups) != len(c2.Groups) { 63 | t.Fatalf("number of Groups is wrong") 64 | } 65 | for gid, sa := range c1.Groups { 66 | sa1, ok := c2.Groups[gid] 67 | if ok == false || len(sa1) != len(sa) { 68 | t.Fatalf("len(Groups) wrong") 69 | } 70 | if ok && len(sa1) == len(sa) { 71 | for j := 0; j < len(sa); j++ { 72 | if sa[j] != sa1[j] { 73 | t.Fatalf("Groups wrong") 74 | } 75 | } 76 | } 77 | } 78 | } 79 | 80 | func TestBasic(t *testing.T) { 81 | const nservers = 3 82 | cfg := make_config(t, nservers, false) 83 | defer cfg.cleanup() 84 | 85 | ck := cfg.makeClient(cfg.All()) 86 | 87 | fmt.Printf("Test: Basic leave/join ...\n") 88 | 89 | cfa := make([]Config, 6) 90 | cfa[0] = ck.Query(-1) 91 | 92 | check(t, []int{}, ck) 93 | 94 | var gid1 int = 1 95 | ck.Join(map[int][]string{gid1: []string{"x", "y", "z"}}) 96 | check(t, []int{gid1}, ck) 97 | cfa[1] = ck.Query(-1) 98 | 99 | var gid2 int = 2 100 | ck.Join(map[int][]string{gid2: []string{"a", "b", "c"}}) 101 | check(t, []int{gid1, gid2}, ck) 102 | cfa[2] = ck.Query(-1) 103 | 104 | ck.Join(map[int][]string{gid2: []string{"a", "b", "c"}}) 105 | check(t, []int{gid1, gid2}, ck) 106 | cfa[3] = ck.Query(-1) 107 | 108 | cfx := ck.Query(-1) 109 | sa1 := cfx.Groups[gid1] 110 | if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" { 111 | t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1) 112 | } 113 | sa2 := cfx.Groups[gid2] 114 | if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" { 115 | t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2) 116 | } 117 | 118 | ck.Leave([]int{gid1}) 119 | check(t, []int{gid2}, ck) 120 | cfa[4] = ck.Query(-1) 121 | 122 | ck.Leave([]int{gid1}) 123 | check(t, []int{gid2}, ck) 124 | cfa[5] = ck.Query(-1) 125 | 126 | fmt.Printf(" ... Passed\n") 127 | 128 | fmt.Printf("Test: Historical queries ...\n") 129 | 130 | for s := 0; s < nservers; s++ { 131 | cfg.ShutdownServer(s) 132 | for i := 0; i < len(cfa); i++ { 133 | c := ck.Query(cfa[i].Num) 134 | check_same_config(t, c, cfa[i]) 135 | } 136 | cfg.StartServer(s) 137 | cfg.ConnectAll() 138 | } 139 | 140 | fmt.Printf(" ... Passed\n") 141 | 142 | fmt.Printf("Test: Move ...\n") 143 | { 144 | var gid3 int = 503 145 | ck.Join(map[int][]string{gid3: []string{"3a", "3b", "3c"}}) 146 | var gid4 int = 504 147 | ck.Join(map[int][]string{gid4: []string{"4a", "4b", "4c"}}) 148 | for i := 0; i < NShards; i++ { 149 | cf := ck.Query(-1) 150 | if i < NShards/2 { 151 | ck.Move(i, gid3) 152 | if cf.Shards[i] != gid3 { 153 | cf1 := ck.Query(-1) 154 | if cf1.Num <= cf.Num { 155 | t.Fatalf("Move should increase Config.Num") 156 | } 157 | } 158 | } else { 159 | ck.Move(i, gid4) 160 | if cf.Shards[i] != gid4 { 161 | cf1 := ck.Query(-1) 162 | if cf1.Num <= cf.Num { 163 | t.Fatalf("Move should increase Config.Num") 164 | } 165 | } 166 | } 167 | } 168 | cf2 := ck.Query(-1) 169 | for i := 0; i < NShards; i++ { 170 | if i < NShards/2 { 171 | if cf2.Shards[i] != gid3 { 172 | t.Fatalf("expected shard %v on gid %v actually %v", 173 | i, gid3, cf2.Shards[i]) 174 | } 175 | } else { 176 | if cf2.Shards[i] != gid4 { 177 | t.Fatalf("expected shard %v on gid %v actually %v", 178 | i, gid4, cf2.Shards[i]) 179 | } 180 | } 181 | } 182 | ck.Leave([]int{gid3}) 183 | ck.Leave([]int{gid4}) 184 | } 185 | fmt.Printf(" ... Passed\n") 186 | 187 | fmt.Printf("Test: Concurrent leave/join ...\n") 188 | 189 | const npara = 10 190 | var cka [npara]*Clerk 191 | for i := 0; i < len(cka); i++ { 192 | cka[i] = cfg.makeClient(cfg.All()) 193 | } 194 | gids := make([]int, npara) 195 | ch := make(chan bool) 196 | for xi := 0; xi < npara; xi++ { 197 | gids[xi] = int(xi + 1) 198 | go func(i int) { 199 | defer func() { ch <- true }() 200 | var gid int = gids[i] 201 | cka[i].Join(map[int][]string{gid + 1000: []string{"a", "b", "c"}}) 202 | cka[i].Join(map[int][]string{gid: []string{"a", "b", "c"}}) 203 | cka[i].Leave([]int{gid + 1000}) 204 | }(xi) 205 | } 206 | for i := 0; i < npara; i++ { 207 | <-ch 208 | } 209 | check(t, gids, ck) 210 | 211 | fmt.Printf(" ... Passed\n") 212 | 213 | fmt.Printf("Test: Minimal transfers after joins ...\n") 214 | 215 | c1 := ck.Query(-1) 216 | for i := 0; i < 5; i++ { 217 | ck.Join(map[int][]string{int(npara + 1 + i): []string{"a", "b", "c"}}) 218 | } 219 | c2 := ck.Query(-1) 220 | for i := int(1); i <= npara; i++ { 221 | for j := 0; j < len(c1.Shards); j++ { 222 | if c2.Shards[j] == i { 223 | if c1.Shards[j] != i { 224 | t.Fatalf("non-minimal transfer after Join()s") 225 | } 226 | } 227 | } 228 | } 229 | 230 | fmt.Printf(" ... Passed\n") 231 | 232 | fmt.Printf("Test: Minimal transfers after leaves ...\n") 233 | 234 | for i := 0; i < 5; i++ { 235 | ck.Leave([]int{int(npara + 1 + i)}) 236 | } 237 | c3 := ck.Query(-1) 238 | for i := int(1); i <= npara; i++ { 239 | for j := 0; j < len(c1.Shards); j++ { 240 | if c2.Shards[j] == i { 241 | if c3.Shards[j] != i { 242 | t.Fatalf("non-minimal transfer after Leave()s") 243 | } 244 | } 245 | } 246 | } 247 | 248 | fmt.Printf(" ... Passed\n") 249 | } 250 | 251 | func TestMulti(t *testing.T) { 252 | const nservers = 3 253 | cfg := make_config(t, nservers, false) 254 | defer cfg.cleanup() 255 | 256 | ck := cfg.makeClient(cfg.All()) 257 | 258 | fmt.Printf("Test: Multi-group join/leave ...\n") 259 | 260 | cfa := make([]Config, 6) 261 | cfa[0] = ck.Query(-1) 262 | 263 | check(t, []int{}, ck) 264 | 265 | var gid1 int = 1 266 | var gid2 int = 2 267 | ck.Join(map[int][]string{ 268 | gid1: []string{"x", "y", "z"}, 269 | gid2: []string{"a", "b", "c"}, 270 | }) 271 | check(t, []int{gid1, gid2}, ck) 272 | cfa[1] = ck.Query(-1) 273 | 274 | var gid3 int = 3 275 | ck.Join(map[int][]string{gid3: []string{"j", "k", "l"}}) 276 | check(t, []int{gid1, gid2, gid3}, ck) 277 | cfa[2] = ck.Query(-1) 278 | 279 | ck.Join(map[int][]string{gid2: []string{"a", "b", "c"}}) 280 | check(t, []int{gid1, gid2, gid3}, ck) 281 | cfa[3] = ck.Query(-1) 282 | 283 | cfx := ck.Query(-1) 284 | sa1 := cfx.Groups[gid1] 285 | if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" { 286 | t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1) 287 | } 288 | sa2 := cfx.Groups[gid2] 289 | if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" { 290 | t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2) 291 | } 292 | sa3 := cfx.Groups[gid3] 293 | if len(sa3) != 3 || sa3[0] != "j" || sa3[1] != "k" || sa3[2] != "l" { 294 | t.Fatalf("wrong servers for gid %v: %v\n", gid3, sa3) 295 | } 296 | 297 | ck.Leave([]int{gid1, gid3}) 298 | check(t, []int{gid2}, ck) 299 | cfa[4] = ck.Query(-1) 300 | 301 | cfx = ck.Query(-1) 302 | sa2 = cfx.Groups[gid2] 303 | if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" { 304 | t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2) 305 | } 306 | 307 | fmt.Printf(" ... Passed\n") 308 | 309 | fmt.Printf("Test: Concurrent multi leave/join ...\n") 310 | 311 | const npara = 10 312 | var cka [npara]*Clerk 313 | for i := 0; i < len(cka); i++ { 314 | cka[i] = cfg.makeClient(cfg.All()) 315 | } 316 | gids := make([]int, npara) 317 | var wg sync.WaitGroup 318 | for xi := 0; xi < npara; xi++ { 319 | wg.Add(1) 320 | gids[xi] = int(xi + 1) 321 | go func(i int) { 322 | defer wg.Done() 323 | var gid int = gids[i] 324 | cka[i].Join(map[int][]string{ 325 | gid: []string{"a", "b", "c"}, 326 | gid + 1000: []string{"a", "b", "c"}, 327 | gid + 2000: []string{"a", "b", "c"}, 328 | }) 329 | cka[i].Leave([]int{gid + 1000, gid + 2000}) 330 | }(xi) 331 | } 332 | wg.Wait() 333 | check(t, gids, ck) 334 | 335 | fmt.Printf(" ... Passed\n") 336 | 337 | fmt.Printf("Test: Minimal transfers after multijoins ...\n") 338 | 339 | c1 := ck.Query(-1) 340 | m := make(map[int][]string) 341 | for i := 0; i < 5; i++ { 342 | m[npara+1+i] = []string{"a", "b", "c"} 343 | } 344 | ck.Join(m) 345 | c2 := ck.Query(-1) 346 | for i := int(1); i <= npara; i++ { 347 | for j := 0; j < len(c1.Shards); j++ { 348 | if c2.Shards[j] == i { 349 | if c1.Shards[j] != i { 350 | t.Fatalf("non-minimal transfer after Join()s") 351 | } 352 | } 353 | } 354 | } 355 | 356 | fmt.Printf(" ... Passed\n") 357 | 358 | fmt.Printf("Test: Minimal transfers after multileaves ...\n") 359 | 360 | var l []int 361 | for i := 0; i < 5; i++ { 362 | l = append(l, npara+1+i) 363 | } 364 | ck.Leave(l) 365 | c3 := ck.Query(-1) 366 | for i := int(1); i <= npara; i++ { 367 | for j := 0; j < len(c1.Shards); j++ { 368 | if c2.Shards[j] == i { 369 | if c3.Shards[j] != i { 370 | t.Fatalf("non-minimal transfer after Leave()s") 371 | } 372 | } 373 | } 374 | } 375 | 376 | fmt.Printf(" ... Passed\n") 377 | } 378 | --------------------------------------------------------------------------------